aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/garp.c14
-rw-r--r--net/802/mrp.c32
-rw-r--r--net/8021q/vlan.c3
-rw-r--r--net/8021q/vlan_core.c9
-rw-r--r--net/8021q/vlan_dev.c6
-rw-r--r--net/9p/client.c7
-rw-r--r--net/9p/protocol.c17
-rw-r--r--net/9p/trans_fd.c20
-rw-r--r--net/9p/trans_rdma.c15
-rw-r--r--net/9p/trans_virtio.c6
-rw-r--r--net/9p/trans_xen.c79
-rw-r--r--net/Kconfig2
-rw-r--r--net/Makefile3
-rw-r--r--net/appletalk/ddp.c42
-rw-r--r--net/atm/ioctl.c7
-rw-r--r--net/atm/mpoa_proc.c3
-rw-r--r--net/atm/resources.c2
-rw-r--r--net/ax25/af_ax25.c50
-rw-r--r--net/ax25/ax25_dev.c28
-rw-r--r--net/ax25/ax25_route.c13
-rw-r--r--net/ax25/ax25_subr.c20
-rw-r--r--net/batman-adv/bat_iv_ogm.c4
-rw-r--r--net/batman-adv/bat_v_elp.c3
-rw-r--r--net/batman-adv/bat_v_ogm.c7
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c8
-rw-r--r--net/batman-adv/distributed-arp-table.c2
-rw-r--r--net/batman-adv/fragmentation.c37
-rw-r--r--net/batman-adv/hard-interface.c46
-rw-r--r--net/batman-adv/log.c1
-rw-r--r--net/batman-adv/main.c56
-rw-r--r--net/batman-adv/multicast.c31
-rw-r--r--net/batman-adv/multicast.h15
-rw-r--r--net/batman-adv/network-coding.c4
-rw-r--r--net/batman-adv/soft-interface.c8
-rw-r--r--net/batman-adv/translation-table.c7
-rw-r--r--net/batman-adv/types.h6
-rw-r--r--net/bluetooth/6lowpan.c1
-rw-r--r--net/bluetooth/a2mp.c23
-rw-r--r--net/bluetooth/af_bluetooth.c4
-rw-r--r--net/bluetooth/amp.c3
-rw-r--r--net/bluetooth/cmtp/cmtp.h2
-rw-r--r--net/bluetooth/cmtp/core.c9
-rw-r--r--net/bluetooth/ecdh_helper.h2
-rw-r--r--net/bluetooth/hci_conn.c103
-rw-r--r--net/bluetooth/hci_core.c67
-rw-r--r--net/bluetooth/hci_event.c247
-rw-r--r--net/bluetooth/hci_request.c12
-rw-r--r--net/bluetooth/hci_sock.c100
-rw-r--r--net/bluetooth/hci_sysfs.c29
-rw-r--r--net/bluetooth/hidp/core.c4
-rw-r--r--net/bluetooth/l2cap_core.c229
-rw-r--r--net/bluetooth/l2cap_sock.c49
-rw-r--r--net/bluetooth/mgmt.c13
-rw-r--r--net/bluetooth/rfcomm/core.c50
-rw-r--r--net/bluetooth/rfcomm/sock.c46
-rw-r--r--net/bluetooth/sco.c123
-rw-r--r--net/bluetooth/smp.c9
-rw-r--r--net/bpf/test_run.c1
-rw-r--r--net/bridge/br_arp_nd_proxy.c4
-rw-r--r--net/bridge/br_device.c9
-rw-r--r--net/bridge/br_forward.c4
-rw-r--r--net/bridge/br_if.c24
-rw-r--r--net/bridge/br_input.c11
-rw-r--r--net/bridge/br_multicast.c2
-rw-r--r--net/bridge/br_netfilter_hooks.c37
-rw-r--r--net/bridge/br_netfilter_ipv6.c1
-rw-r--r--net/bridge/br_netlink.c2
-rw-r--r--net/bridge/br_private.h4
-rw-r--r--net/bridge/br_sysfs_if.c9
-rw-r--r--net/bridge/br_vlan.c4
-rw-r--r--net/bridge/br_vlan_tunnel.c38
-rw-r--r--net/bridge/netfilter/ebtable_broute.c8
-rw-r--r--net/bridge/netfilter/ebtable_filter.c8
-rw-r--r--net/bridge/netfilter/ebtable_nat.c8
-rw-r--r--net/bridge/netfilter/ebtables.c10
-rw-r--r--net/caif/caif_dev.c13
-rw-r--r--net/caif/caif_socket.c4
-rw-r--r--net/caif/caif_usb.c17
-rw-r--r--net/caif/cfcnfg.c16
-rw-r--r--net/caif/cfctrl.c6
-rw-r--r--net/caif/cfserl.c5
-rw-r--r--net/caif/chnl_net.c22
-rw-r--r--net/can/af_can.c38
-rw-r--r--net/can/bcm.c270
-rw-r--r--net/can/gw.c3
-rw-r--r--net/can/proc.c6
-rw-r--r--net/can/raw.c84
-rw-r--r--net/ceph/messenger.c84
-rw-r--r--net/ceph/osd_client.c20
-rw-r--r--net/compat.c2
-rw-r--r--net/core/datagram.c15
-rw-r--r--net/core/dev.c86
-rw-r--r--net/core/devlink.c6
-rw-r--r--net/core/drop_monitor.c15
-rw-r--r--net/core/dst.c8
-rw-r--r--net/core/ethtool.c15
-rw-r--r--net/core/fib_rules.c2
-rw-r--r--net/core/filter.c55
-rw-r--r--net/core/flow_dissector.c13
-rw-r--r--net/core/gen_estimator.c11
-rw-r--r--net/core/lwt_bpf.c8
-rw-r--r--net/core/neighbour.c65
-rw-r--r--net/core/net-procfs.c38
-rw-r--r--net/core/net-sysfs.c70
-rw-r--r--net/core/net_namespace.c41
-rw-r--r--net/core/netpoll.c41
-rw-r--r--net/core/pktgen.c16
-rw-r--r--net/core/rtnetlink.c33
-rw-r--r--net/core/secure_seq.c16
-rw-r--r--net/core/skbuff.c126
-rw-r--r--net/core/sock.c77
-rw-r--r--net/core/sock_reuseport.c2
-rw-r--r--net/core/stream.c13
-rw-r--r--net/core/sysctl_net_core.c17
-rw-r--r--net/dcb/dcbnl.c46
-rw-r--r--net/dccp/dccp.h7
-rw-r--r--net/dccp/ipv4.c22
-rw-r--r--net/dccp/ipv6.c51
-rw-r--r--net/dccp/minisocks.c2
-rw-r--r--net/dccp/options.c2
-rw-r--r--net/dccp/output.c2
-rw-r--r--net/dccp/proto.c48
-rw-r--r--net/decnet/Kconfig42
-rw-r--r--net/decnet/Makefile10
-rw-r--r--net/decnet/README8
-rw-r--r--net/decnet/af_decnet.c2411
-rw-r--r--net/decnet/dn_dev.c1438
-rw-r--r--net/decnet/dn_fib.c799
-rw-r--r--net/decnet/dn_neigh.c605
-rw-r--r--net/decnet/dn_nsp_in.c914
-rw-r--r--net/decnet/dn_nsp_out.c703
-rw-r--r--net/decnet/dn_route.c1929
-rw-r--r--net/decnet/dn_rules.c258
-rw-r--r--net/decnet/dn_table.c928
-rw-r--r--net/decnet/dn_timer.c104
-rw-r--r--net/decnet/netfilter/Kconfig16
-rw-r--r--net/decnet/netfilter/Makefile5
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c160
-rw-r--r--net/decnet/sysctl_net_decnet.c373
-rw-r--r--net/dsa/Kconfig1
-rw-r--r--net/dsa/dsa.c2
-rw-r--r--net/dsa/dsa2.c2
-rw-r--r--net/dsa/dsa_priv.h3
-rw-r--r--net/dsa/master.c5
-rw-r--r--net/dsa/slave.c39
-rw-r--r--net/dsa/tag_mtk.c19
-rw-r--r--net/hsr/hsr_forward.c5
-rw-r--r--net/hsr/hsr_framereg.c3
-rw-r--r--net/ieee802154/nl-mac.c11
-rw-r--r--net/ieee802154/nl-phy.c4
-rw-r--r--net/ieee802154/nl802154.c69
-rw-r--r--net/ieee802154/socket.c57
-rw-r--r--net/ife/ife.c1
-rw-r--r--net/ipv4/Kconfig10
-rw-r--r--net/ipv4/af_inet.c41
-rw-r--r--net/ipv4/arp.c9
-rw-r--r--net/ipv4/cipso_ipv4.c24
-rw-r--r--net/ipv4/devinet.c2
-rw-r--r--net/ipv4/esp4.c15
-rw-r--r--net/ipv4/esp4_offload.c3
-rw-r--r--net/ipv4/fib_frontend.c12
-rw-r--r--net/ipv4/gre_demux.c2
-rw-r--r--net/ipv4/icmp.c86
-rw-r--r--net/ipv4/igmp.c87
-rw-r--r--net/ipv4/inet_connection_sock.c27
-rw-r--r--net/ipv4/inet_diag.c4
-rw-r--r--net/ipv4/inet_hashtables.c121
-rw-r--r--net/ipv4/inetpeer.c12
-rw-r--r--net/ipv4/ip_gre.c59
-rw-r--r--net/ipv4/ip_input.c32
-rw-r--r--net/ipv4/ip_output.c68
-rw-r--r--net/ipv4/ip_sockglue.c12
-rw-r--r--net/ipv4/ip_tunnel.c56
-rw-r--r--net/ipv4/ip_vti.c4
-rw-r--r--net/ipv4/ipconfig.c13
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/metrics.c2
-rw-r--r--net/ipv4/netfilter.c12
-rw-r--r--net/ipv4/netfilter/arp_tables.c2
-rw-r--r--net/ipv4/netfilter/ip_tables.c2
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c5
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c2
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c2
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c2
-rw-r--r--net/ipv4/netfilter/nf_log_arp.c19
-rw-r--r--net/ipv4/netfilter/nf_log_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_socket_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nft_chain_route_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c18
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c3
-rw-r--r--net/ipv4/ping.c35
-rw-r--r--net/ipv4/raw.c12
-rw-r--r--net/ipv4/route.c125
-rw-r--r--net/ipv4/syncookies.c10
-rw-r--r--net/ipv4/sysctl_net_ipv4.c59
-rw-r--r--net/ipv4/tcp.c167
-rw-r--r--net/ipv4/tcp_bbr.c4
-rw-r--r--net/ipv4/tcp_cdg.c2
-rw-r--r--net/ipv4/tcp_cong.c9
-rw-r--r--net/ipv4/tcp_cubic.c5
-rw-r--r--net/ipv4/tcp_diag.c5
-rw-r--r--net/ipv4/tcp_fastopen.c32
-rw-r--r--net/ipv4/tcp_input.c176
-rw-r--r--net/ipv4/tcp_ipv4.c62
-rw-r--r--net/ipv4/tcp_metrics.c90
-rw-r--r--net/ipv4/tcp_minisocks.c22
-rw-r--r--net/ipv4/tcp_output.c163
-rw-r--r--net/ipv4/tcp_recovery.c13
-rw-r--r--net/ipv4/tcp_timer.c81
-rw-r--r--net/ipv4/udp.c48
-rw-r--r--net/ipv4/udp_offload.c2
-rw-r--r--net/ipv4/udp_tunnel.c1
-rw-r--r--net/ipv4/udplite.c10
-rw-r--r--net/ipv4/xfrm4_policy.c3
-rw-r--r--net/ipv4/xfrm4_protocol.c1
-rw-r--r--net/ipv6/addrconf.c58
-rw-r--r--net/ipv6/addrlabel.c27
-rw-r--r--net/ipv6/af_inet6.c15
-rw-r--r--net/ipv6/ah6.c3
-rw-r--r--net/ipv6/calipso.c14
-rw-r--r--net/ipv6/datagram.c2
-rw-r--r--net/ipv6/esp6.c15
-rw-r--r--net/ipv6/esp6_offload.c3
-rw-r--r--net/ipv6/exthdrs.c31
-rw-r--r--net/ipv6/exthdrs_core.c2
-rw-r--r--net/ipv6/icmp.c41
-rw-r--r--net/ipv6/ila/ila_xlat.c1
-rw-r--r--net/ipv6/inet6_hashtables.c4
-rw-r--r--net/ipv6/ip6_fib.c38
-rw-r--r--net/ipv6/ip6_flowlabel.c2
-rw-r--r--net/ipv6/ip6_gre.c96
-rw-r--r--net/ipv6/ip6_icmp.c46
-rw-r--r--net/ipv6/ip6_input.c33
-rw-r--r--net/ipv6/ip6_offload.c2
-rw-r--r--net/ipv6/ip6_output.c109
-rw-r--r--net/ipv6/ip6_tunnel.c61
-rw-r--r--net/ipv6/ip6_vti.c9
-rw-r--r--net/ipv6/ip6mr.c4
-rw-r--r--net/ipv6/ipv6_sockglue.c27
-rw-r--r--net/ipv6/mcast.c3
-rw-r--r--net/ipv6/ndisc.c3
-rw-r--r--net/ipv6/netfilter.c6
-rw-r--r--net/ipv6/netfilter/ip6_tables.c3
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c2
-rw-r--r--net/ipv6/netfilter/nf_log_ipv6.c8
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nf_socket_ipv6.c6
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c18
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c6
-rw-r--r--net/ipv6/output_core.c28
-rw-r--r--net/ipv6/ping.c3
-rw-r--r--net/ipv6/raw.c13
-rw-r--r--net/ipv6/reassembly.c8
-rw-r--r--net/ipv6/route.c102
-rw-r--r--net/ipv6/seg6.c5
-rw-r--r--net/ipv6/seg6_hmac.c2
-rw-r--r--net/ipv6/seg6_iptunnel.c13
-rw-r--r--net/ipv6/seg6_local.c2
-rw-r--r--net/ipv6/sit.c43
-rw-r--r--net/ipv6/syncookies.c18
-rw-r--r--net/ipv6/tcp_ipv6.c98
-rw-r--r--net/ipv6/udp.c49
-rw-r--r--net/ipv6/udp_impl.h1
-rw-r--r--net/ipv6/udplite.c11
-rw-r--r--net/ipv6/xfrm6_output.c18
-rw-r--r--net/ipv6/xfrm6_policy.c10
-rw-r--r--net/iucv/af_iucv.c7
-rw-r--r--net/iucv/iucv.c2
-rw-r--r--net/kcm/kcmsock.c115
-rw-r--r--net/key/af_key.c63
-rw-r--r--net/l2tp/l2tp_core.c4
-rw-r--r--net/l2tp/l2tp_ip6.c7
-rw-r--r--net/lapb/lapb_out.c3
-rw-r--r--net/llc/af_llc.c59
-rw-r--r--net/llc/llc_input.c13
-rw-r--r--net/llc/llc_s_ac.c5
-rw-r--r--net/llc/llc_station.c3
-rw-r--r--net/mac80211/agg-tx.c2
-rw-r--r--net/mac80211/cfg.c21
-rw-r--r--net/mac80211/chan.c7
-rw-r--r--net/mac80211/debugfs_sta.c1
-rw-r--r--net/mac80211/driver-ops.c5
-rw-r--r--net/mac80211/ibss.c6
-rw-r--r--net/mac80211/ieee80211_i.h68
-rw-r--r--net/mac80211/iface.c17
-rw-r--r--net/mac80211/key.c8
-rw-r--r--net/mac80211/key.h2
-rw-r--r--net/mac80211/main.c20
-rw-r--r--net/mac80211/mesh.h22
-rw-r--r--net/mac80211/mesh_hwmp.c2
-rw-r--r--net/mac80211/mesh_pathtbl.c90
-rw-r--r--net/mac80211/mesh_plink.c10
-rw-r--r--net/mac80211/mlme.c20
-rw-r--r--net/mac80211/rate.c3
-rw-r--r--net/mac80211/rc80211_minstrel.c27
-rw-r--r--net/mac80211/rc80211_minstrel.h1
-rw-r--r--net/mac80211/rx.c171
-rw-r--r--net/mac80211/scan.c31
-rw-r--r--net/mac80211/sta_info.c52
-rw-r--r--net/mac80211/sta_info.h33
-rw-r--r--net/mac80211/tx.c68
-rw-r--r--net/mac80211/util.c2
-rw-r--r--net/mac80211/vht.c14
-rw-r--r--net/mac80211/wpa.c19
-rw-r--r--net/mac802154/iface.c1
-rw-r--r--net/mac802154/llsec.c2
-rw-r--r--net/mac802154/rx.c7
-rw-r--r--net/mpls/af_mpls.c72
-rw-r--r--net/mpls/mpls_gso.c3
-rw-r--r--net/ncsi/internal.h9
-rw-r--r--net/ncsi/ncsi-manage.c25
-rw-r--r--net/ncsi/ncsi-netlink.c26
-rw-r--r--net/ncsi/ncsi-netlink.h3
-rw-r--r--net/ncsi/ncsi-pkt.h7
-rw-r--r--net/ncsi/ncsi-rsp.c28
-rw-r--r--net/netfilter/Kconfig3
-rw-r--r--net/netfilter/core.c15
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c4
-rw-r--r--net/netfilter/ipset/ip_set_core.c34
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h24
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c10
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c36
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c88
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c10
-rw-r--r--net/netfilter/nf_conntrack_core.c81
-rw-r--r--net/netfilter/nf_conntrack_ftp.c2
-rw-r--r--net/netfilter/nf_conntrack_helper.c4
-rw-r--r--net/netfilter/nf_conntrack_irc.c39
-rw-r--r--net/netfilter/nf_conntrack_netlink.c98
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c40
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c46
-rw-r--r--net/netfilter/nf_conntrack_sip.c6
-rw-r--r--net/netfilter/nf_conntrack_standalone.c9
-rw-r--r--net/netfilter/nf_dup_netdev.c1
-rw-r--r--net/netfilter/nf_flow_table_core.c4
-rw-r--r--net/netfilter/nf_log_common.c12
-rw-r--r--net/netfilter/nf_nat_core.c1
-rw-r--r--net/netfilter/nf_nat_proto_common.c37
-rw-r--r--net/netfilter/nf_nat_proto_dccp.c5
-rw-r--r--net/netfilter/nf_nat_proto_sctp.c5
-rw-r--r--net/netfilter/nf_nat_proto_tcp.c5
-rw-r--r--net/netfilter/nf_nat_proto_udp.c10
-rw-r--r--net/netfilter/nf_queue.c24
-rw-r--r--net/netfilter/nf_synproxy_core.c5
-rw-r--r--net/netfilter/nf_tables_api.c966
-rw-r--r--net/netfilter/nf_tables_core.c2
-rw-r--r--net/netfilter/nf_tables_trace.c9
-rw-r--r--net/netfilter/nfnetlink.c5
-rw-r--r--net/netfilter/nfnetlink_acct.c11
-rw-r--r--net/netfilter/nfnetlink_cthelper.c19
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c22
-rw-r--r--net/netfilter/nfnetlink_log.c16
-rw-r--r--net/netfilter/nfnetlink_osf.c11
-rw-r--r--net/netfilter/nfnetlink_queue.c38
-rw-r--r--net/netfilter/nft_bitwise.c14
-rw-r--r--net/netfilter/nft_byteorder.c28
-rw-r--r--net/netfilter/nft_chain_filter.c11
-rw-r--r--net/netfilter/nft_cmp.c8
-rw-r--r--net/netfilter/nft_compat.c11
-rw-r--r--net/netfilter/nft_ct.c12
-rw-r--r--net/netfilter/nft_dup_netdev.c6
-rw-r--r--net/netfilter/nft_dynset.c30
-rw-r--r--net/netfilter/nft_exthdr.c25
-rw-r--r--net/netfilter/nft_fib.c5
-rw-r--r--net/netfilter/nft_flow_offload.c26
-rw-r--r--net/netfilter/nft_fwd_netdev.c19
-rw-r--r--net/netfilter/nft_hash.c25
-rw-r--r--net/netfilter/nft_immediate.c6
-rw-r--r--net/netfilter/nft_limit.c4
-rw-r--r--net/netfilter/nft_lookup.c16
-rw-r--r--net/netfilter/nft_masq.c14
-rw-r--r--net/netfilter/nft_meta.c12
-rw-r--r--net/netfilter/nft_nat.c39
-rw-r--r--net/netfilter/nft_numgen.c15
-rw-r--r--net/netfilter/nft_objref.c16
-rw-r--r--net/netfilter/nft_osf.c31
-rw-r--r--net/netfilter/nft_payload.c44
-rw-r--r--net/netfilter/nft_queue.c12
-rw-r--r--net/netfilter/nft_range.c6
-rw-r--r--net/netfilter/nft_redir.c14
-rw-r--r--net/netfilter/nft_rt.c7
-rw-r--r--net/netfilter/nft_set_hash.c12
-rw-r--r--net/netfilter/nft_set_rbtree.c18
-rw-r--r--net/netfilter/nft_socket.c59
-rw-r--r--net/netfilter/nft_tproxy.c31
-rw-r--r--net/netfilter/nft_tunnel.c9
-rw-r--r--net/netfilter/x_tables.c18
-rw-r--r--net/netfilter/xt_RATEEST.c3
-rw-r--r--net/netfilter/xt_SECMARK.c88
-rw-r--r--net/netfilter/xt_osf.c1
-rw-r--r--net/netfilter/xt_owner.c37
-rw-r--r--net/netfilter/xt_recent.c14
-rw-r--r--net/netfilter/xt_sctp.c2
-rw-r--r--net/netfilter/xt_u32.c21
-rw-r--r--net/netlabel/netlabel_calipso.c52
-rw-r--r--net/netlabel/netlabel_cipso_v4.c19
-rw-r--r--net/netlabel/netlabel_kapi.c3
-rw-r--r--net/netlabel/netlabel_mgmt.c27
-rw-r--r--net/netlabel/netlabel_unlabeled.c27
-rw-r--r--net/netlabel/netlabel_user.h4
-rw-r--r--net/netlink/af_netlink.c166
-rw-r--r--net/netlink/diag.c7
-rw-r--r--net/netlink/genetlink.c35
-rw-r--r--net/netrom/af_netrom.c10
-rw-r--r--net/netrom/nr_subr.c7
-rw-r--r--net/netrom/nr_timer.c19
-rw-r--r--net/nfc/af_nfc.c3
-rw-r--r--net/nfc/core.c64
-rw-r--r--net/nfc/digital_core.c9
-rw-r--r--net/nfc/digital_dep.c2
-rw-r--r--net/nfc/digital_technology.c8
-rw-r--r--net/nfc/hci/llc_shdlc.c10
-rw-r--r--net/nfc/llcp.h8
-rw-r--r--net/nfc/llcp_commands.c47
-rw-r--r--net/nfc/llcp_core.c112
-rw-r--r--net/nfc/llcp_sock.c21
-rw-r--r--net/nfc/nci/core.c42
-rw-r--r--net/nfc/nci/data.c6
-rw-r--r--net/nfc/nci/hci.c9
-rw-r--r--net/nfc/nci/ntf.c6
-rw-r--r--net/nfc/nci/rsp.c2
-rw-r--r--net/nfc/nci/spi.c2
-rw-r--r--net/nfc/netlink.c75
-rw-r--r--net/nfc/nfc.h2
-rw-r--r--net/nfc/rawsock.c4
-rw-r--r--net/nsh/nsh.c8
-rw-r--r--net/openvswitch/actions.c62
-rw-r--r--net/openvswitch/conntrack.c3
-rw-r--r--net/openvswitch/datapath.c52
-rw-r--r--net/openvswitch/flow_netlink.c71
-rw-r--r--net/openvswitch/meter.c8
-rw-r--r--net/packet/af_packet.c143
-rw-r--r--net/packet/diag.c6
-rw-r--r--net/packet/internal.h28
-rw-r--r--net/phonet/pep.c3
-rw-r--r--net/psample/psample.c3
-rw-r--r--net/qrtr/qrtr.c17
-rw-r--r--net/qrtr/tun.c18
-rw-r--r--net/rds/connection.c24
-rw-r--r--net/rds/ib_frmr.c4
-rw-r--r--net/rds/ib_recv.c1
-rw-r--r--net/rds/message.c8
-rw-r--r--net/rds/rdma.c3
-rw-r--r--net/rds/recv.c2
-rw-r--r--net/rds/tcp.c8
-rw-r--r--net/rds/tcp.h3
-rw-r--r--net/rds/tcp_connect.c4
-rw-r--r--net/rds/tcp_listen.c8
-rw-r--r--net/rfkill/rfkill-gpio.c12
-rw-r--r--net/rose/af_rose.c62
-rw-r--r--net/rose/rose_link.c3
-rw-r--r--net/rose/rose_loopback.c18
-rw-r--r--net/rose/rose_route.c6
-rw-r--r--net/rose/rose_timer.c34
-rw-r--r--net/rxrpc/af_rxrpc.c6
-rw-r--r--net/rxrpc/call_accept.c1
-rw-r--r--net/rxrpc/call_event.c3
-rw-r--r--net/rxrpc/input.c2
-rw-r--r--net/rxrpc/key.c6
-rw-r--r--net/rxrpc/local_object.c3
-rw-r--r--net/rxrpc/net_ns.c2
-rw-r--r--net/rxrpc/peer_object.c14
-rw-r--r--net/rxrpc/sendmsg.c10
-rw-r--r--net/rxrpc/sysctl.c4
-rw-r--r--net/sched/Kconfig39
-rw-r--r--net/sched/Makefile3
-rw-r--r--net/sched/act_api.c16
-rw-r--r--net/sched/act_mirred.c2
-rw-r--r--net/sched/act_pedit.c31
-rw-r--r--net/sched/act_skbmod.c12
-rw-r--r--net/sched/act_tunnel_key.c2
-rw-r--r--net/sched/cls_api.c86
-rw-r--r--net/sched/cls_flow.c2
-rw-r--r--net/sched/cls_flower.c21
-rw-r--r--net/sched/cls_fw.c11
-rw-r--r--net/sched/cls_route.c13
-rw-r--r--net/sched/cls_rsvp.c28
-rw-r--r--net/sched/cls_rsvp.h775
-rw-r--r--net/sched/cls_rsvp6.c28
-rw-r--r--net/sched/cls_tcindex.c694
-rw-r--r--net/sched/cls_u32.c103
-rw-r--r--net/sched/em_text.c4
-rw-r--r--net/sched/ematch.c2
-rw-r--r--net/sched/sch_api.c125
-rw-r--r--net/sched/sch_atm.c7
-rw-r--r--net/sched/sch_cake.c18
-rw-r--r--net/sched/sch_cbq.c10
-rw-r--r--net/sched/sch_cbs.c2
-rw-r--r--net/sched/sch_choke.c7
-rw-r--r--net/sched/sch_drr.c4
-rw-r--r--net/sched/sch_dsmark.c5
-rw-r--r--net/sched/sch_fifo.c5
-rw-r--r--net/sched/sch_fq_codel.c14
-rw-r--r--net/sched/sch_generic.c68
-rw-r--r--net/sched/sch_gred.c2
-rw-r--r--net/sched/sch_hfsc.c16
-rw-r--r--net/sched/sch_htb.c4
-rw-r--r--net/sched/sch_ingress.c16
-rw-r--r--net/sched/sch_mq.c28
-rw-r--r--net/sched/sch_mqprio.c201
-rw-r--r--net/sched/sch_multiq.c6
-rw-r--r--net/sched/sch_netem.c76
-rw-r--r--net/sched/sch_plug.c2
-rw-r--r--net/sched/sch_prio.c6
-rw-r--r--net/sched/sch_qfq.c60
-rw-r--r--net/sched/sch_red.c15
-rw-r--r--net/sched/sch_sfb.c17
-rw-r--r--net/sched/sch_sfq.c4
-rw-r--r--net/sched/sch_tbf.c4
-rw-r--r--net/sched/sch_teql.c3
-rw-r--r--net/sctp/associola.c3
-rw-r--r--net/sctp/auth.c24
-rw-r--r--net/sctp/bind_addr.c25
-rw-r--r--net/sctp/diag.c21
-rw-r--r--net/sctp/endpointola.c23
-rw-r--r--net/sctp/input.c21
-rw-r--r--net/sctp/ipv6.c14
-rw-r--r--net/sctp/proc.c16
-rw-r--r--net/sctp/protocol.c12
-rw-r--r--net/sctp/sm_make_chunk.c46
-rw-r--r--net/sctp/sm_sideeffect.c17
-rw-r--r--net/sctp/sm_statefuns.c168
-rw-r--r--net/sctp/socket.c92
-rw-r--r--net/sctp/stream_interleave.c3
-rw-r--r--net/sctp/stream_sched.c2
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/smc/af_smc.c56
-rw-r--r--net/smc/smc.h5
-rw-r--r--net/smc/smc_cdc.c2
-rw-r--r--net/smc/smc_clc.c3
-rw-r--r--net/smc/smc_close.c36
-rw-r--r--net/smc/smc_close.h1
-rw-r--r--net/smc/smc_core.c9
-rw-r--r--net/smc/smc_rx.c4
-rw-r--r--net/socket.c341
-rw-r--r--net/sunrpc/addr.c2
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c49
-rw-r--r--net/sunrpc/auth_gss/auth_gss_internal.h45
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c31
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c49
-rw-r--r--net/sunrpc/backchannel_rqst.c14
-rw-r--r--net/sunrpc/clnt.c5
-rw-r--r--net/sunrpc/sched.c17
-rw-r--r--net/sunrpc/svc.c33
-rw-r--r--net/sunrpc/svc_xprt.c4
-rw-r--r--net/sunrpc/svcsock.c27
-rw-r--r--net/sunrpc/xdr.c6
-rw-r--r--net/sunrpc/xprt.c72
-rw-r--r--net/sunrpc/xprtrdma/module.c1
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c9
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c6
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c3
-rw-r--r--net/sunrpc/xprtrdma/transport.c5
-rw-r--r--net/sunrpc/xprtsock.c7
-rw-r--r--net/tipc/bearer.c101
-rw-r--r--net/tipc/core.c5
-rw-r--r--net/tipc/discover.c7
-rw-r--r--net/tipc/link.c18
-rw-r--r--net/tipc/monitor.c4
-rw-r--r--net/tipc/msg.c11
-rw-r--r--net/tipc/name_distr.c2
-rw-r--r--net/tipc/name_table.c2
-rw-r--r--net/tipc/netlink.c4
-rw-r--r--net/tipc/netlink_compat.c5
-rw-r--r--net/tipc/socket.c53
-rw-r--r--net/tipc/topsrv.c48
-rw-r--r--net/tls/tls_device.c24
-rw-r--r--net/tls/tls_sw.c6
-rw-r--r--net/unix/Kconfig5
-rw-r--r--net/unix/Makefile2
-rw-r--r--net/unix/af_unix.c182
-rw-r--r--net/unix/garbage.c82
-rw-r--r--net/unix/scm.c150
-rw-r--r--net/unix/scm.h10
-rw-r--r--net/vmw_vsock/af_vsock.c33
-rw-r--r--net/vmw_vsock/hyperv_transport.c4
-rw-r--r--net/vmw_vsock/virtio_transport.c7
-rw-r--r--net/vmw_vsock/virtio_transport_common.c8
-rw-r--r--net/vmw_vsock/vmci_transport.c9
-rw-r--r--net/wireless/Makefile2
-rw-r--r--net/wireless/certs/wens.hex87
-rw-r--r--net/wireless/core.c7
-rw-r--r--net/wireless/core.h2
-rw-r--r--net/wireless/debugfs.c3
-rw-r--r--net/wireless/nl80211.c32
-rw-r--r--net/wireless/reg.c19
-rw-r--r--net/wireless/scan.c4
-rw-r--r--net/wireless/sme.c31
-rw-r--r--net/wireless/util.c50
-rw-r--r--net/wireless/wext-core.c11
-rw-r--r--net/wireless/wext-spy.c14
-rw-r--r--net/x25/af_x25.c28
-rw-r--r--net/x25/x25_dev.c2
-rw-r--r--net/xdp/xsk.c8
-rw-r--r--net/xdp/xsk_queue.h7
-rw-r--r--net/xfrm/Makefile2
-rw-r--r--net/xfrm/xfrm_device.c6
-rw-r--r--net/xfrm/xfrm_input.c2
-rw-r--r--net/xfrm/xfrm_interface_core.c (renamed from net/xfrm/xfrm_interface.c)52
-rw-r--r--net/xfrm/xfrm_ipcomp.c1
-rw-r--r--net/xfrm/xfrm_policy.c73
-rw-r--r--net/xfrm/xfrm_state.c33
-rw-r--r--net/xfrm/xfrm_user.c67
609 files changed, 10334 insertions, 17589 deletions
diff --git a/net/802/garp.c b/net/802/garp.c
index 7f50d47470bd..8e19f51833d6 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -206,6 +206,19 @@ static void garp_attr_destroy(struct garp_applicant *app, struct garp_attr *attr
kfree(attr);
}
+static void garp_attr_destroy_all(struct garp_applicant *app)
+{
+ struct rb_node *node, *next;
+ struct garp_attr *attr;
+
+ for (node = rb_first(&app->gid);
+ next = node ? rb_next(node) : NULL, node != NULL;
+ node = next) {
+ attr = rb_entry(node, struct garp_attr, node);
+ garp_attr_destroy(app, attr);
+ }
+}
+
static int garp_pdu_init(struct garp_applicant *app)
{
struct sk_buff *skb;
@@ -612,6 +625,7 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
spin_lock_bh(&app->lock);
garp_gid_event(app, GARP_EVENT_TRANSMIT_PDU);
+ garp_attr_destroy_all(app);
garp_pdu_queue(app);
spin_unlock_bh(&app->lock);
diff --git a/net/802/mrp.c b/net/802/mrp.c
index a808dd5bbb27..ce6e4774d333 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -295,6 +295,19 @@ static void mrp_attr_destroy(struct mrp_applicant *app, struct mrp_attr *attr)
kfree(attr);
}
+static void mrp_attr_destroy_all(struct mrp_applicant *app)
+{
+ struct rb_node *node, *next;
+ struct mrp_attr *attr;
+
+ for (node = rb_first(&app->mad);
+ next = node ? rb_next(node) : NULL, node != NULL;
+ node = next) {
+ attr = rb_entry(node, struct mrp_attr, node);
+ mrp_attr_destroy(app, attr);
+ }
+}
+
static int mrp_pdu_init(struct mrp_applicant *app)
{
struct sk_buff *skb;
@@ -596,7 +609,10 @@ static void mrp_join_timer(struct timer_list *t)
spin_unlock(&app->lock);
mrp_queue_xmit(app);
- mrp_join_timer_arm(app);
+ spin_lock(&app->lock);
+ if (likely(app->active))
+ mrp_join_timer_arm(app);
+ spin_unlock(&app->lock);
}
static void mrp_periodic_timer_arm(struct mrp_applicant *app)
@@ -610,11 +626,12 @@ static void mrp_periodic_timer(struct timer_list *t)
struct mrp_applicant *app = from_timer(app, t, periodic_timer);
spin_lock(&app->lock);
- mrp_mad_event(app, MRP_EVENT_PERIODIC);
- mrp_pdu_queue(app);
+ if (likely(app->active)) {
+ mrp_mad_event(app, MRP_EVENT_PERIODIC);
+ mrp_pdu_queue(app);
+ mrp_periodic_timer_arm(app);
+ }
spin_unlock(&app->lock);
-
- mrp_periodic_timer_arm(app);
}
static int mrp_pdu_parse_end_mark(struct sk_buff *skb, int *offset)
@@ -862,6 +879,7 @@ int mrp_init_applicant(struct net_device *dev, struct mrp_application *appl)
app->dev = dev;
app->app = appl;
app->mad = RB_ROOT;
+ app->active = true;
spin_lock_init(&app->lock);
skb_queue_head_init(&app->queue);
rcu_assign_pointer(dev->mrp_port->applicants[appl->type], app);
@@ -890,6 +908,9 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
RCU_INIT_POINTER(port->applicants[appl->type], NULL);
+ spin_lock_bh(&app->lock);
+ app->active = false;
+ spin_unlock_bh(&app->lock);
/* Delete timer and generate a final TX event to flush out
* all pending messages before the applicant is gone.
*/
@@ -898,6 +919,7 @@ void mrp_uninit_applicant(struct net_device *dev, struct mrp_application *appl)
spin_lock_bh(&app->lock);
mrp_mad_event(app, MRP_EVENT_TX);
+ mrp_attr_destroy_all(app);
mrp_pdu_queue(app);
spin_unlock_bh(&app->lock);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 5e9950453955..512ada90657b 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -277,7 +277,8 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
return 0;
out_free_newdev:
- if (new_dev->reg_state == NETREG_UNINITIALIZED)
+ if (new_dev->reg_state == NETREG_UNINITIALIZED ||
+ new_dev->reg_state == NETREG_UNREGISTERED)
free_netdev(new_dev);
return err;
}
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 4f60e86f4b8d..e92c914316cb 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -380,6 +380,8 @@ int vlan_vids_add_by_dev(struct net_device *dev,
return 0;
list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
+ if (!vlan_hw_filter_capable(by_dev, vid_info->proto))
+ continue;
err = vlan_vid_add(dev, vid_info->proto, vid_info->vid);
if (err)
goto unwind;
@@ -390,6 +392,8 @@ unwind:
list_for_each_entry_continue_reverse(vid_info,
&vlan_info->vid_list,
list) {
+ if (!vlan_hw_filter_capable(by_dev, vid_info->proto))
+ continue;
vlan_vid_del(dev, vid_info->proto, vid_info->vid);
}
@@ -409,8 +413,11 @@ void vlan_vids_del_by_dev(struct net_device *dev,
if (!vlan_info)
return;
- list_for_each_entry(vid_info, &vlan_info->vid_list, list)
+ list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
+ if (!vlan_hw_filter_capable(by_dev, vid_info->proto))
+ continue;
vlan_vid_del(dev, vid_info->proto, vid_info->vid);
+ }
}
EXPORT_SYMBOL(vlan_vids_del_by_dev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 84ef83772114..ba9b8980f100 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -115,8 +115,8 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
* NOTE: THIS ASSUMES DIX ETHERNET, SPECIFICALLY NOT SUPPORTING
* OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
*/
- if (veth->h_vlan_proto != vlan->vlan_proto ||
- vlan->flags & VLAN_FLAG_REORDER_HDR) {
+ if (vlan->flags & VLAN_FLAG_REORDER_HDR ||
+ veth->h_vlan_proto != vlan->vlan_proto) {
u16 vlan_tci;
vlan_tci = vlan->vlan_id;
vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb->priority);
@@ -369,7 +369,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
switch (cmd) {
case SIOCSHWTSTAMP:
- if (!net_eq(dev_net(dev), &init_net))
+ if (!net_eq(dev_net(dev), dev_net(real_dev)))
break;
case SIOCGMIIPHY:
case SIOCGMIIREG:
diff --git a/net/9p/client.c b/net/9p/client.c
index d62f83f93d7b..98301add20f4 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -553,6 +553,8 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)
kfree(ename);
} else {
err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode);
+ if (err)
+ goto out_err;
err = -ecode;
p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode);
@@ -906,16 +908,13 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt)
struct p9_fid *fid;
p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt);
- fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL);
+ fid = kzalloc(sizeof(struct p9_fid), GFP_KERNEL);
if (!fid)
return NULL;
- memset(&fid->qid, 0, sizeof(struct p9_qid));
fid->mode = -1;
fid->uid = current_fsuid();
fid->clnt = clnt;
- fid->rdir = NULL;
- fid->fid = 0;
idr_preload(GFP_KERNEL);
spin_lock_irq(&clnt->lock);
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 462ba144cb39..9104a2fce015 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -243,6 +243,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
uint16_t *nwname = va_arg(ap, uint16_t *);
char ***wnames = va_arg(ap, char ***);
+ *wnames = NULL;
+
errcode = p9pdu_readf(pdu, proto_version,
"w", nwname);
if (!errcode) {
@@ -252,6 +254,8 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
GFP_NOFS);
if (!*wnames)
errcode = -ENOMEM;
+ else
+ (*wnames)[0] = NULL;
}
if (!errcode) {
@@ -263,8 +267,10 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
proto_version,
"s",
&(*wnames)[i]);
- if (errcode)
+ if (errcode) {
+ (*wnames)[i] = NULL;
break;
+ }
}
}
@@ -272,11 +278,14 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
if (*wnames) {
int i;
- for (i = 0; i < *nwname; i++)
+ for (i = 0; i < *nwname; i++) {
+ if (!(*wnames)[i])
+ break;
kfree((*wnames)[i]);
+ }
+ kfree(*wnames);
+ *wnames = NULL;
}
- kfree(*wnames);
- *wnames = NULL;
}
}
break;
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index b6dcb40fa8a7..0ef3d2ede6e6 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -133,7 +133,7 @@ struct p9_conn {
struct list_head unsent_req_list;
struct p9_req_t *rreq;
struct p9_req_t *wreq;
- char tmp_buf[7];
+ char tmp_buf[P9_HDRSZ];
struct p9_fcall rc;
int wpos;
int wsize;
@@ -215,11 +215,15 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
list_move(&req->req_list, &cancel_list);
+ req->status = REQ_STATUS_ERROR;
}
list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
list_move(&req->req_list, &cancel_list);
+ req->status = REQ_STATUS_ERROR;
}
+ spin_unlock(&m->client->lock);
+
list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);
list_del(&req->req_list);
@@ -227,7 +231,6 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
req->t_err = err;
p9_client_cb(m->client, req, REQ_STATUS_ERROR);
}
- spin_unlock(&m->client->lock);
}
static __poll_t
@@ -303,7 +306,7 @@ static void p9_read_work(struct work_struct *work)
if (!m->rc.sdata) {
m->rc.sdata = m->tmp_buf;
m->rc.offset = 0;
- m->rc.capacity = 7; /* start by reading header */
+ m->rc.capacity = P9_HDRSZ; /* start by reading header */
}
clear_bit(Rpending, &m->wsched);
@@ -326,7 +329,7 @@ static void p9_read_work(struct work_struct *work)
p9_debug(P9_DEBUG_TRANS, "got new header\n");
/* Header size */
- m->rc.size = 7;
+ m->rc.size = P9_HDRSZ;
err = p9_parse_header(&m->rc, &m->rc.size, NULL, NULL, 0);
if (err) {
p9_debug(P9_DEBUG_ERROR,
@@ -835,11 +838,14 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
goto out_free_ts;
if (!(ts->rd->f_mode & FMODE_READ))
goto out_put_rd;
+ /* prevent workers from hanging on IO when fd is a pipe */
+ ts->rd->f_flags |= O_NONBLOCK;
ts->wr = fget(wfd);
if (!ts->wr)
goto out_put_rd;
if (!(ts->wr->f_mode & FMODE_WRITE))
goto out_put_wr;
+ ts->wr->f_flags |= O_NONBLOCK;
client->trans = ts;
client->status = Connected;
@@ -861,8 +867,10 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket)
struct file *file;
p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL);
- if (!p)
+ if (!p) {
+ sock_release(csocket);
return -ENOMEM;
+ }
csocket->sk->sk_allocation = GFP_NOIO;
file = sock_alloc_file(csocket, 0, NULL);
@@ -1038,7 +1046,7 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
csocket = NULL;
- if (addr == NULL)
+ if (!addr || !strlen(addr))
return -EINVAL;
if (strlen(addr) >= UNIX_PATH_MAX) {
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 119103bfa82e..4bbb8683d451 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -400,6 +400,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
struct p9_trans_rdma *rdma = client->trans;
struct ib_recv_wr wr;
struct ib_sge sge;
+ int ret;
c->busa = ib_dma_map_single(rdma->cm_id->device,
c->rc.sdata, client->msize,
@@ -417,7 +418,12 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)
wr.wr_cqe = &c->cqe;
wr.sg_list = &sge;
wr.num_sge = 1;
- return ib_post_recv(rdma->qp, &wr, NULL);
+
+ ret = ib_post_recv(rdma->qp, &wr, NULL);
+ if (ret)
+ ib_dma_unmap_single(rdma->cm_id->device, c->busa,
+ client->msize, DMA_FROM_DEVICE);
+ return ret;
error:
p9_debug(P9_DEBUG_ERROR, "EIO\n");
@@ -514,7 +520,7 @@ dont_need_post_recv:
if (down_interruptible(&rdma->sq_sem)) {
err = -EINTR;
- goto send_error;
+ goto dma_unmap;
}
/* Mark request as `sent' *before* we actually send it,
@@ -524,11 +530,14 @@ dont_need_post_recv:
req->status = REQ_STATUS_SENT;
err = ib_post_send(rdma->qp, &wr, NULL);
if (err)
- goto send_error;
+ goto dma_unmap;
/* Success */
return 0;
+dma_unmap:
+ ib_dma_unmap_single(rdma->cm_id->device, c->busa,
+ c->req->tc.size, DMA_TO_DEVICE);
/* Handle errors that happened during or while preparing the send: */
send_error:
req->status = REQ_STATUS_ERROR;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 849336211c79..6b3357a77d99 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -409,7 +409,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
struct page **in_pages = NULL, **out_pages = NULL;
struct virtio_chan *chan = client->trans;
struct scatterlist *sgs[4];
- size_t offs;
+ size_t offs = 0;
int need_drop = 0;
int kicked = 0;
@@ -620,7 +620,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)
chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
if (!chan->vc_wq) {
err = -ENOMEM;
- goto out_free_tag;
+ goto out_remove_file;
}
init_waitqueue_head(chan->vc_wq);
chan->ring_bufs_avail = 1;
@@ -638,6 +638,8 @@ static int p9_virtio_probe(struct virtio_device *vdev)
return 0;
+out_remove_file:
+ sysfs_remove_file(&vdev->dev.kobj, &dev_attr_mount_tag.attr);
out_free_tag:
kfree(tag);
out_free_vq:
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 9daab0dd833b..c87146a49636 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -138,7 +138,7 @@ static bool p9_xen_write_todo(struct xen_9pfs_dataring *ring, RING_IDX size)
static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
{
- struct xen_9pfs_front_priv *priv = NULL;
+ struct xen_9pfs_front_priv *priv;
RING_IDX cons, prod, masked_cons, masked_prod;
unsigned long flags;
u32 size = p9_req->tc.size;
@@ -151,7 +151,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
break;
}
read_unlock(&xen_9pfs_lock);
- if (!priv || priv->client != client)
+ if (list_entry_is_head(priv, &xen_9pfs_devs, list))
return -EINVAL;
num = p9_req->tc.tag % priv->num_rings;
@@ -230,6 +230,14 @@ static void p9_xen_response(struct work_struct *work)
continue;
}
+ if (h.size > req->rc.capacity) {
+ dev_warn(&priv->dev->dev,
+ "requested packet size too big: %d for tag %d with capacity %zd\n",
+ h.size, h.tag, req->rc.capacity);
+ req->status = REQ_STATUS_ERROR;
+ goto recv_error;
+ }
+
memcpy(&req->rc, &h, sizeof(h));
req->rc.offset = 0;
@@ -239,6 +247,7 @@ static void p9_xen_response(struct work_struct *work)
masked_prod, &masked_cons,
XEN_9PFS_RING_SIZE);
+recv_error:
virt_mb();
cons += h.size;
ring->intf->in_cons = cons;
@@ -290,6 +299,10 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
write_unlock(&xen_9pfs_lock);
for (i = 0; i < priv->num_rings; i++) {
+ struct xen_9pfs_dataring *ring = &priv->rings[i];
+
+ cancel_work_sync(&ring->work);
+
if (!priv->rings[i].intf)
break;
if (priv->rings[i].irq > 0)
@@ -301,9 +314,9 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
ref = priv->rings[i].intf->ref[j];
gnttab_end_foreign_access(ref, 0, 0);
}
- free_pages((unsigned long)priv->rings[i].data.in,
- XEN_9PFS_RING_ORDER -
- (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ free_pages_exact(priv->rings[i].data.in,
+ 1UL << (XEN_9PFS_RING_ORDER +
+ XEN_PAGE_SHIFT));
}
gnttab_end_foreign_access(priv->rings[i].ref, 0, 0);
free_page((unsigned long)priv->rings[i].intf);
@@ -341,8 +354,8 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
if (ret < 0)
goto out;
ring->ref = ret;
- bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- XEN_9PFS_RING_ORDER - (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ bytes = alloc_pages_exact(1UL << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT),
+ GFP_KERNEL | __GFP_ZERO);
if (!bytes) {
ret = -ENOMEM;
goto out;
@@ -373,28 +386,31 @@ out:
if (bytes) {
for (i--; i >= 0; i--)
gnttab_end_foreign_access(ring->intf->ref[i], 0, 0);
- free_pages((unsigned long)bytes,
- XEN_9PFS_RING_ORDER -
- (PAGE_SHIFT - XEN_PAGE_SHIFT));
+ free_pages_exact(bytes, 1UL << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT));
}
gnttab_end_foreign_access(ring->ref, 0, 0);
free_page((unsigned long)ring->intf);
return ret;
}
-static int xen_9pfs_front_probe(struct xenbus_device *dev,
- const struct xenbus_device_id *id)
+static int xen_9pfs_front_init(struct xenbus_device *dev)
{
int ret, i;
struct xenbus_transaction xbt;
- struct xen_9pfs_front_priv *priv = NULL;
- char *versions;
+ struct xen_9pfs_front_priv *priv = dev_get_drvdata(&dev->dev);
+ char *versions, *v;
unsigned int max_rings, max_ring_order, len = 0;
versions = xenbus_read(XBT_NIL, dev->otherend, "versions", &len);
if (IS_ERR(versions))
return PTR_ERR(versions);
- if (strcmp(versions, "1")) {
+ for (v = versions; *v; v++) {
+ if (simple_strtoul(v, &v, 10) == 1) {
+ v = NULL;
+ break;
+ }
+ }
+ if (v) {
kfree(versions);
return -EINVAL;
}
@@ -407,11 +423,6 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev,
if (max_ring_order < XEN_9PFS_RING_ORDER)
return -EINVAL;
- priv = kzalloc(sizeof(*priv), GFP_KERNEL);
- if (!priv)
- return -ENOMEM;
-
- priv->dev = dev;
priv->num_rings = XEN_9PFS_NUM_RINGS;
priv->rings = kcalloc(priv->num_rings, sizeof(*priv->rings),
GFP_KERNEL);
@@ -469,23 +480,35 @@ static int xen_9pfs_front_probe(struct xenbus_device *dev,
goto error;
}
- write_lock(&xen_9pfs_lock);
- list_add_tail(&priv->list, &xen_9pfs_devs);
- write_unlock(&xen_9pfs_lock);
- dev_set_drvdata(&dev->dev, priv);
- xenbus_switch_state(dev, XenbusStateInitialised);
-
return 0;
error_xenbus:
xenbus_transaction_end(xbt, 1);
xenbus_dev_fatal(dev, ret, "writing xenstore");
error:
- dev_set_drvdata(&dev->dev, NULL);
xen_9pfs_front_free(priv);
return ret;
}
+static int xen_9pfs_front_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ struct xen_9pfs_front_priv *priv = NULL;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->dev = dev;
+ dev_set_drvdata(&dev->dev, priv);
+
+ write_lock(&xen_9pfs_lock);
+ list_add_tail(&priv->list, &xen_9pfs_devs);
+ write_unlock(&xen_9pfs_lock);
+
+ return 0;
+}
+
static int xen_9pfs_front_resume(struct xenbus_device *dev)
{
dev_warn(&dev->dev, "suspend/resume unsupported\n");
@@ -504,6 +527,8 @@ static void xen_9pfs_front_changed(struct xenbus_device *dev,
break;
case XenbusStateInitWait:
+ if (!xen_9pfs_front_init(dev))
+ xenbus_switch_state(dev, XenbusStateInitialised);
break;
case XenbusStateConnected:
diff --git a/net/Kconfig b/net/Kconfig
index 228dfa382eec..4bef62b4c806 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -197,7 +197,6 @@ config BRIDGE_NETFILTER
source "net/netfilter/Kconfig"
source "net/ipv4/netfilter/Kconfig"
source "net/ipv6/netfilter/Kconfig"
-source "net/decnet/netfilter/Kconfig"
source "net/bridge/netfilter/Kconfig"
endif
@@ -214,7 +213,6 @@ source "net/802/Kconfig"
source "net/bridge/Kconfig"
source "net/dsa/Kconfig"
source "net/8021q/Kconfig"
-source "net/decnet/Kconfig"
source "net/llc/Kconfig"
source "drivers/net/appletalk/Kconfig"
source "net/x25/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index bdaf53925acd..177b6fbac29c 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_INET) += ipv4/
obj-$(CONFIG_TLS) += tls/
obj-$(CONFIG_XFRM) += xfrm/
-obj-$(CONFIG_UNIX) += unix/
+obj-$(CONFIG_UNIX_SCM) += unix/
obj-$(CONFIG_NET) += ipv6/
obj-$(CONFIG_BPFILTER) += bpfilter/
obj-$(CONFIG_PACKET) += packet/
@@ -39,7 +39,6 @@ obj-$(CONFIG_AF_KCM) += kcm/
obj-$(CONFIG_STREAM_PARSER) += strparser/
obj-$(CONFIG_ATM) += atm/
obj-$(CONFIG_L2TP) += l2tp/
-obj-$(CONFIG_DECNET) += decnet/
obj-$(CONFIG_PHONET) += phonet/
ifneq ($(CONFIG_VLAN_8021Q),)
obj-y += 8021q/
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 2880ac470379..c4f1bfe6e040 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1573,8 +1573,8 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
struct sk_buff *skb;
struct net_device *dev;
struct ddpehdr *ddp;
- int size;
- struct atalk_route *rt;
+ int size, hard_header_len;
+ struct atalk_route *rt, *rt_lo = NULL;
int err;
if (flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
@@ -1637,7 +1637,22 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
SOCK_DEBUG(sk, "SK %p: Size needed %d, device %s\n",
sk, size, dev->name);
- size += dev->hard_header_len;
+ hard_header_len = dev->hard_header_len;
+ /* Leave room for loopback hardware header if necessary */
+ if (usat->sat_addr.s_node == ATADDR_BCAST &&
+ (dev->flags & IFF_LOOPBACK || !(rt->flags & RTF_GATEWAY))) {
+ struct atalk_addr at_lo;
+
+ at_lo.s_node = 0;
+ at_lo.s_net = 0;
+
+ rt_lo = atrtr_find(&at_lo);
+
+ if (rt_lo && rt_lo->dev->hard_header_len > hard_header_len)
+ hard_header_len = rt_lo->dev->hard_header_len;
+ }
+
+ size += hard_header_len;
release_sock(sk);
skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT), &err);
lock_sock(sk);
@@ -1645,7 +1660,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
goto out;
skb_reserve(skb, ddp_dl->header_length);
- skb_reserve(skb, dev->hard_header_len);
+ skb_reserve(skb, hard_header_len);
skb->dev = dev;
SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk);
@@ -1696,18 +1711,12 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
/* loop back */
skb_orphan(skb);
if (ddp->deh_dnode == ATADDR_BCAST) {
- struct atalk_addr at_lo;
-
- at_lo.s_node = 0;
- at_lo.s_net = 0;
-
- rt = atrtr_find(&at_lo);
- if (!rt) {
+ if (!rt_lo) {
kfree_skb(skb);
err = -ENETUNREACH;
goto out;
}
- dev = rt->dev;
+ dev = rt_lo->dev;
skb->dev = dev;
}
ddp_dl->request(ddp_dl, skb, dev->dev_addr);
@@ -1799,15 +1808,14 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
break;
}
case TIOCINQ: {
- /*
- * These two are safe on a single CPU system as only
- * user tasks fiddle here
- */
- struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
+ struct sk_buff *skb;
long amount = 0;
+ spin_lock_irq(&sk->sk_receive_queue.lock);
+ skb = skb_peek(&sk->sk_receive_queue);
if (skb)
amount = skb->len - sizeof(struct ddpehdr);
+ spin_unlock_irq(&sk->sk_receive_queue.lock);
rc = put_user(amount, (int __user *)argp);
break;
}
diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index 2ff0e5e470e3..38f7f164e484 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -71,14 +71,17 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd,
case SIOCINQ:
{
struct sk_buff *skb;
+ int amount;
if (sock->state != SS_CONNECTED) {
error = -EINVAL;
goto done;
}
+ spin_lock_irq(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
- error = put_user(skb ? skb->len : 0,
- (int __user *)argp) ? -EFAULT : 0;
+ amount = skb ? skb->len : 0;
+ spin_unlock_irq(&sk->sk_receive_queue.lock);
+ error = put_user(amount, (int __user *)argp) ? -EFAULT : 0;
goto done;
}
case SIOCGSTAMP: /* borrowed from IP */
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c
index 46d6cd9a36ae..c4e9538ac144 100644
--- a/net/atm/mpoa_proc.c
+++ b/net/atm/mpoa_proc.c
@@ -222,11 +222,12 @@ static ssize_t proc_mpc_write(struct file *file, const char __user *buff,
if (!page)
return -ENOMEM;
- for (p = page, len = 0; len < nbytes; p++, len++) {
+ for (p = page, len = 0; len < nbytes; p++) {
if (get_user(*p, buff++)) {
free_page((unsigned long)page);
return -EFAULT;
}
+ len += 1;
if (*p == '\0' || *p == '\n')
break;
}
diff --git a/net/atm/resources.c b/net/atm/resources.c
index bada395ecdb1..9389080224f8 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -447,6 +447,7 @@ done:
return error;
}
+#ifdef CONFIG_PROC_FS
void *atm_dev_seq_start(struct seq_file *seq, loff_t *pos)
{
mutex_lock(&atm_dev_mutex);
@@ -462,3 +463,4 @@ void *atm_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
return seq_list_next(v, &atm_devs, pos);
}
+#endif
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index a45db78eaf00..7861f2747f84 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -80,6 +80,7 @@ static void ax25_kill_by_device(struct net_device *dev)
{
ax25_dev *ax25_dev;
ax25_cb *s;
+ struct sock *sk;
if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
return;
@@ -88,11 +89,26 @@ static void ax25_kill_by_device(struct net_device *dev)
again:
ax25_for_each(s, &ax25_list) {
if (s->ax25_dev == ax25_dev) {
- s->ax25_dev = NULL;
+ sk = s->sk;
+ if (!sk) {
+ spin_unlock_bh(&ax25_list_lock);
+ ax25_disconnect(s, ENETUNREACH);
+ s->ax25_dev = NULL;
+ spin_lock_bh(&ax25_list_lock);
+ goto again;
+ }
+ sock_hold(sk);
spin_unlock_bh(&ax25_list_lock);
+ lock_sock(sk);
ax25_disconnect(s, ENETUNREACH);
+ s->ax25_dev = NULL;
+ if (sk->sk_socket) {
+ dev_put(ax25_dev->dev);
+ ax25_dev_put(ax25_dev);
+ }
+ release_sock(sk);
spin_lock_bh(&ax25_list_lock);
-
+ sock_put(sk);
/* The entry could have been deleted from the
* list meanwhile and thus the next pointer is
* no longer valid. Play it safe and restart
@@ -356,21 +372,25 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl)))
return -EFAULT;
- if ((ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr)) == NULL)
- return -ENODEV;
-
if (ax25_ctl.digi_count > AX25_MAX_DIGIS)
return -EINVAL;
if (ax25_ctl.arg > ULONG_MAX / HZ && ax25_ctl.cmd != AX25_KILL)
return -EINVAL;
+ ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr);
+ if (!ax25_dev)
+ return -ENODEV;
+
digi.ndigi = ax25_ctl.digi_count;
for (k = 0; k < digi.ndigi; k++)
digi.calls[k] = ax25_ctl.digi_addr[k];
- if ((ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev)) == NULL)
+ ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev);
+ if (!ax25) {
+ ax25_dev_put(ax25_dev);
return -ENOTCONN;
+ }
switch (ax25_ctl.cmd) {
case AX25_KILL:
@@ -437,6 +457,7 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
}
out_put:
+ ax25_dev_put(ax25_dev);
ax25_cb_put(ax25);
return ret;
@@ -962,14 +983,16 @@ static int ax25_release(struct socket *sock)
{
struct sock *sk = sock->sk;
ax25_cb *ax25;
+ ax25_dev *ax25_dev;
if (sk == NULL)
return 0;
sock_hold(sk);
- sock_orphan(sk);
lock_sock(sk);
+ sock_orphan(sk);
ax25 = sk_to_ax25(sk);
+ ax25_dev = ax25->ax25_dev;
if (sk->sk_type == SOCK_SEQPACKET) {
switch (ax25->state) {
@@ -1031,6 +1054,15 @@ static int ax25_release(struct socket *sock)
sk->sk_state_change(sk);
ax25_destroy_socket(ax25);
}
+ if (ax25_dev) {
+ del_timer_sync(&ax25->timer);
+ del_timer_sync(&ax25->t1timer);
+ del_timer_sync(&ax25->t2timer);
+ del_timer_sync(&ax25->t3timer);
+ del_timer_sync(&ax25->idletimer);
+ dev_put(ax25_dev->dev);
+ ax25_dev_put(ax25_dev);
+ }
sock->sk = NULL;
release_sock(sk);
@@ -1107,8 +1139,10 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
}
}
- if (ax25_dev != NULL)
+ if (ax25_dev) {
ax25_fillin_cb(ax25, ax25_dev);
+ dev_hold(ax25_dev->dev);
+ }
done:
ax25_cb_add(ax25);
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index d92195cd7834..55a611f7239b 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -40,6 +40,7 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr)
for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next)
if (ax25cmp(addr, (ax25_address *)ax25_dev->dev->dev_addr) == 0) {
res = ax25_dev;
+ ax25_dev_hold(ax25_dev);
}
spin_unlock_bh(&ax25_dev_lock);
@@ -59,6 +60,7 @@ void ax25_dev_device_up(struct net_device *dev)
return;
}
+ refcount_set(&ax25_dev->refcount, 1);
dev->ax25_ptr = ax25_dev;
ax25_dev->dev = dev;
dev_hold(dev);
@@ -87,6 +89,7 @@ void ax25_dev_device_up(struct net_device *dev)
ax25_dev->next = ax25_dev_list;
ax25_dev_list = ax25_dev;
spin_unlock_bh(&ax25_dev_lock);
+ ax25_dev_hold(ax25_dev);
ax25_register_dev_sysctl(ax25_dev);
}
@@ -116,9 +119,10 @@ void ax25_dev_device_down(struct net_device *dev)
if ((s = ax25_dev_list) == ax25_dev) {
ax25_dev_list = s->next;
spin_unlock_bh(&ax25_dev_lock);
+ ax25_dev_put(ax25_dev);
dev->ax25_ptr = NULL;
dev_put(dev);
- kfree(ax25_dev);
+ ax25_dev_put(ax25_dev);
return;
}
@@ -126,9 +130,10 @@ void ax25_dev_device_down(struct net_device *dev)
if (s->next == ax25_dev) {
s->next = ax25_dev->next;
spin_unlock_bh(&ax25_dev_lock);
+ ax25_dev_put(ax25_dev);
dev->ax25_ptr = NULL;
dev_put(dev);
- kfree(ax25_dev);
+ ax25_dev_put(ax25_dev);
return;
}
@@ -136,6 +141,7 @@ void ax25_dev_device_down(struct net_device *dev)
}
spin_unlock_bh(&ax25_dev_lock);
dev->ax25_ptr = NULL;
+ ax25_dev_put(ax25_dev);
}
int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
@@ -147,20 +153,32 @@ int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
switch (cmd) {
case SIOCAX25ADDFWD:
- if ((fwd_dev = ax25_addr_ax25dev(&fwd->port_to)) == NULL)
+ fwd_dev = ax25_addr_ax25dev(&fwd->port_to);
+ if (!fwd_dev) {
+ ax25_dev_put(ax25_dev);
return -EINVAL;
- if (ax25_dev->forward != NULL)
+ }
+ if (ax25_dev->forward) {
+ ax25_dev_put(fwd_dev);
+ ax25_dev_put(ax25_dev);
return -EINVAL;
+ }
ax25_dev->forward = fwd_dev->dev;
+ ax25_dev_put(fwd_dev);
+ ax25_dev_put(ax25_dev);
break;
case SIOCAX25DELFWD:
- if (ax25_dev->forward == NULL)
+ if (!ax25_dev->forward) {
+ ax25_dev_put(ax25_dev);
return -EINVAL;
+ }
ax25_dev->forward = NULL;
+ ax25_dev_put(ax25_dev);
break;
default:
+ ax25_dev_put(ax25_dev);
return -EINVAL;
}
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 66d54fc11831..8f81de88f006 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -78,11 +78,13 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
ax25_dev *ax25_dev;
int i;
- if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL)
- return -EINVAL;
if (route->digi_count > AX25_MAX_DIGIS)
return -EINVAL;
+ ax25_dev = ax25_addr_ax25dev(&route->port_addr);
+ if (!ax25_dev)
+ return -EINVAL;
+
write_lock_bh(&ax25_route_lock);
ax25_rt = ax25_route_list;
@@ -94,6 +96,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
if (route->digi_count != 0) {
if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return -ENOMEM;
}
ax25_rt->digipeat->lastrepeat = -1;
@@ -104,6 +107,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
}
}
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return 0;
}
ax25_rt = ax25_rt->next;
@@ -111,6 +115,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
if ((ax25_rt = kmalloc(sizeof(ax25_route), GFP_ATOMIC)) == NULL) {
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return -ENOMEM;
}
@@ -123,6 +128,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
write_unlock_bh(&ax25_route_lock);
kfree(ax25_rt);
+ ax25_dev_put(ax25_dev);
return -ENOMEM;
}
ax25_rt->digipeat->lastrepeat = -1;
@@ -135,6 +141,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
ax25_rt->next = ax25_route_list;
ax25_route_list = ax25_rt;
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return 0;
}
@@ -176,6 +183,7 @@ static int ax25_rt_del(struct ax25_routes_struct *route)
}
}
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return 0;
}
@@ -218,6 +226,7 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option)
out:
write_unlock_bh(&ax25_route_lock);
+ ax25_dev_put(ax25_dev);
return err;
}
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 038b109b2be7..c129865cad9f 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -264,12 +264,20 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
{
ax25_clear_queues(ax25);
- if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY))
- ax25_stop_heartbeat(ax25);
- ax25_stop_t1timer(ax25);
- ax25_stop_t2timer(ax25);
- ax25_stop_t3timer(ax25);
- ax25_stop_idletimer(ax25);
+ if (reason == ENETUNREACH) {
+ del_timer_sync(&ax25->timer);
+ del_timer_sync(&ax25->t1timer);
+ del_timer_sync(&ax25->t2timer);
+ del_timer_sync(&ax25->t3timer);
+ del_timer_sync(&ax25->idletimer);
+ } else {
+ if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY))
+ ax25_stop_heartbeat(ax25);
+ ax25_stop_t1timer(ax25);
+ ax25_stop_t2timer(ax25);
+ ax25_stop_t3timer(ax25);
+ ax25_stop_idletimer(ax25);
+ }
ax25->state = AX25_STATE_0;
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 0b052ff51bde..cede6826e5b3 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -594,8 +594,10 @@ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet)
if (WARN_ON(!forw_packet->if_outgoing))
return;
- if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface))
+ if (forw_packet->if_outgoing->soft_iface != soft_iface) {
+ pr_warn("%s: soft interface switch for queued OGM\n", __func__);
return;
+ }
if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE)
return;
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index af3da6cdfc79..17100d9ceaf0 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -513,7 +513,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
struct batadv_elp_packet *elp_packet;
struct batadv_hard_iface *primary_if;
- struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ struct ethhdr *ethhdr;
bool res;
int ret = NET_RX_DROP;
@@ -521,6 +521,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
if (!res)
goto free_skb;
+ ethhdr = eth_hdr(skb);
if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
goto free_skb;
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 04a620fd1301..5d4232d8d651 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -119,8 +119,10 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb,
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
- if (hard_iface->if_status != BATADV_IF_ACTIVE)
+ if (hard_iface->if_status != BATADV_IF_ACTIVE) {
+ kfree_skb(skb);
return;
+ }
batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX);
batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES,
@@ -832,7 +834,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
{
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
struct batadv_ogm2_packet *ogm_packet;
- struct ethhdr *ethhdr = eth_hdr(skb);
+ struct ethhdr *ethhdr;
int ogm_offset;
u8 *packet_pos;
int ret = NET_RX_DROP;
@@ -846,6 +848,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN))
goto free_skb;
+ ethhdr = eth_hdr(skb);
if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
goto free_skb;
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 1401031f4bb4..b9e61fc3928a 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -1574,10 +1574,14 @@ int batadv_bla_init(struct batadv_priv *bat_priv)
return 0;
bat_priv->bla.claim_hash = batadv_hash_new(128);
- bat_priv->bla.backbone_hash = batadv_hash_new(32);
+ if (!bat_priv->bla.claim_hash)
+ return -ENOMEM;
- if (!bat_priv->bla.claim_hash || !bat_priv->bla.backbone_hash)
+ bat_priv->bla.backbone_hash = batadv_hash_new(32);
+ if (!bat_priv->bla.backbone_hash) {
+ batadv_hash_destroy(bat_priv->bla.claim_hash);
return -ENOMEM;
+ }
batadv_hash_set_lock_class(bat_priv->bla.claim_hash,
&batadv_claim_hash_lock_class_key);
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index f2dc7499d266..af380dc877e3 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -68,7 +68,6 @@ static void batadv_dat_purge(struct work_struct *work);
*/
static void batadv_dat_start_timer(struct batadv_priv *bat_priv)
{
- INIT_DELAYED_WORK(&bat_priv->dat.work, batadv_dat_purge);
queue_delayed_work(batadv_event_workqueue, &bat_priv->dat.work,
msecs_to_jiffies(10000));
}
@@ -783,6 +782,7 @@ int batadv_dat_init(struct batadv_priv *bat_priv)
if (!bat_priv->dat.hash)
return -ENOMEM;
+ INIT_DELAYED_WORK(&bat_priv->dat.work, batadv_dat_purge);
batadv_dat_start_timer(bat_priv);
batadv_tvlv_handler_register(bat_priv, batadv_dat_tvlv_ogm_handler_v1,
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index 5b71a289d04f..a62eedf889eb 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -403,6 +403,7 @@ out:
/**
* batadv_frag_create() - create a fragment from skb
+ * @net_dev: outgoing device for fragment
* @skb: skb to create fragment from
* @frag_head: header to use in new fragment
* @fragment_size: size of new fragment
@@ -413,22 +414,25 @@ out:
*
* Return: the new fragment, NULL on error.
*/
-static struct sk_buff *batadv_frag_create(struct sk_buff *skb,
+static struct sk_buff *batadv_frag_create(struct net_device *net_dev,
+ struct sk_buff *skb,
struct batadv_frag_packet *frag_head,
unsigned int fragment_size)
{
+ unsigned int ll_reserved = LL_RESERVED_SPACE(net_dev);
+ unsigned int tailroom = net_dev->needed_tailroom;
struct sk_buff *skb_fragment;
unsigned int header_size = sizeof(*frag_head);
unsigned int mtu = fragment_size + header_size;
- skb_fragment = netdev_alloc_skb(NULL, mtu + ETH_HLEN);
+ skb_fragment = dev_alloc_skb(ll_reserved + mtu + tailroom);
if (!skb_fragment)
goto err;
skb_fragment->priority = skb->priority;
/* Eat the last mtu-bytes of the skb */
- skb_reserve(skb_fragment, header_size + ETH_HLEN);
+ skb_reserve(skb_fragment, ll_reserved + header_size);
skb_split(skb, skb_fragment, skb->len - fragment_size);
/* Add the header */
@@ -451,11 +455,12 @@ int batadv_frag_send_packet(struct sk_buff *skb,
struct batadv_orig_node *orig_node,
struct batadv_neigh_node *neigh_node)
{
+ struct net_device *net_dev = neigh_node->if_incoming->net_dev;
struct batadv_priv *bat_priv;
struct batadv_hard_iface *primary_if = NULL;
struct batadv_frag_packet frag_header;
struct sk_buff *skb_fragment;
- unsigned int mtu = neigh_node->if_incoming->net_dev->mtu;
+ unsigned int mtu = net_dev->mtu;
unsigned int header_size = sizeof(frag_header);
unsigned int max_fragment_size, num_fragments;
int ret;
@@ -485,6 +490,17 @@ int batadv_frag_send_packet(struct sk_buff *skb,
goto free_skb;
}
+ /* GRO might have added fragments to the fragment list instead of
+ * frags[]. But this is not handled by skb_split and must be
+ * linearized to avoid incorrect length information after all
+ * batman-adv fragments were created and submitted to the
+ * hard-interface
+ */
+ if (skb_has_frag_list(skb) && __skb_linearize(skb)) {
+ ret = -ENOMEM;
+ goto free_skb;
+ }
+
/* Create one header to be copied to all fragments */
frag_header.packet_type = BATADV_UNICAST_FRAG;
frag_header.version = BATADV_COMPAT_VERSION;
@@ -515,7 +531,7 @@ int batadv_frag_send_packet(struct sk_buff *skb,
goto put_primary_if;
}
- skb_fragment = batadv_frag_create(skb, &frag_header,
+ skb_fragment = batadv_frag_create(net_dev, skb, &frag_header,
max_fragment_size);
if (!skb_fragment) {
ret = -ENOMEM;
@@ -534,13 +550,14 @@ int batadv_frag_send_packet(struct sk_buff *skb,
frag_header.no++;
}
- /* Make room for the fragment header. */
- if (batadv_skb_head_push(skb, header_size) < 0 ||
- pskb_expand_head(skb, header_size + ETH_HLEN, 0, GFP_ATOMIC) < 0) {
- ret = -ENOMEM;
+ /* make sure that there is at least enough head for the fragmentation
+ * and ethernet headers
+ */
+ ret = skb_cow_head(skb, ETH_HLEN + header_size);
+ if (ret < 0)
goto put_primary_if;
- }
+ skb_push(skb, header_size);
memcpy(skb->data, &frag_header, header_size);
/* Send the last fragment */
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index c4e0435c952d..0d5519fcb438 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -162,22 +162,25 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
struct net *net = dev_net(net_dev);
struct net_device *parent_dev;
struct net *parent_net;
+ int iflink;
bool ret;
/* check if this is a batman-adv mesh interface */
if (batadv_softif_is_valid(net_dev))
return true;
- /* no more parents..stop recursion */
- if (dev_get_iflink(net_dev) == 0 ||
- dev_get_iflink(net_dev) == net_dev->ifindex)
+ iflink = dev_get_iflink(net_dev);
+ if (iflink == 0)
return false;
parent_net = batadv_getlink_net(net_dev, net);
+ /* iflink to itself, most likely physical device */
+ if (net == parent_net && iflink == net_dev->ifindex)
+ return false;
+
/* recurse over the parent device */
- parent_dev = __dev_get_by_index((struct net *)parent_net,
- dev_get_iflink(net_dev));
+ parent_dev = __dev_get_by_index((struct net *)parent_net, iflink);
/* if we got a NULL parent_dev there is something broken.. */
if (!parent_dev) {
pr_err("Cannot find parent device\n");
@@ -227,14 +230,15 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
struct net_device *real_netdev = NULL;
struct net *real_net;
struct net *net;
- int ifindex;
+ int iflink;
ASSERT_RTNL();
if (!netdev)
return NULL;
- if (netdev->ifindex == dev_get_iflink(netdev)) {
+ iflink = dev_get_iflink(netdev);
+ if (iflink == 0) {
dev_hold(netdev);
return netdev;
}
@@ -244,9 +248,16 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev)
goto out;
net = dev_net(hard_iface->soft_iface);
- ifindex = dev_get_iflink(netdev);
real_net = batadv_getlink_net(netdev, net);
- real_netdev = dev_get_by_index(real_net, ifindex);
+
+ /* iflink to itself, most likely physical device */
+ if (net == real_net && netdev->ifindex == iflink) {
+ real_netdev = netdev;
+ dev_hold(real_netdev);
+ goto out;
+ }
+
+ real_netdev = dev_get_by_index(real_net, iflink);
out:
if (hard_iface)
@@ -565,6 +576,9 @@ static void batadv_hardif_recalc_extra_skbroom(struct net_device *soft_iface)
needed_headroom = lower_headroom + (lower_header_len - ETH_HLEN);
needed_headroom += batadv_max_header_len();
+ /* fragmentation headers don't strip the unicast/... header */
+ needed_headroom += sizeof(struct batadv_frag_packet);
+
soft_iface->needed_headroom = needed_headroom;
soft_iface->needed_tailroom = lower_tailroom;
}
@@ -629,7 +643,19 @@ out:
*/
void batadv_update_min_mtu(struct net_device *soft_iface)
{
- soft_iface->mtu = batadv_hardif_min_mtu(soft_iface);
+ struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+ int limit_mtu;
+ int mtu;
+
+ mtu = batadv_hardif_min_mtu(soft_iface);
+
+ if (bat_priv->mtu_set_by_user)
+ limit_mtu = bat_priv->mtu_set_by_user;
+ else
+ limit_mtu = ETH_DATA_LEN;
+
+ mtu = min(mtu, limit_mtu);
+ dev_set_mtu(soft_iface, mtu);
/* Check if the local translate table should be cleaned up to match a
* new (and smaller) MTU.
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index 853773e45f79..837f67c9fad3 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -205,6 +205,7 @@ static const struct file_operations batadv_log_fops = {
.read = batadv_log_read,
.poll = batadv_log_poll,
.llseek = no_llseek,
+ .owner = THIS_MODULE,
};
/**
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 79b8a2d8793e..bba64b9b3668 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -187,29 +187,41 @@ int batadv_mesh_init(struct net_device *soft_iface)
INIT_HLIST_HEAD(&bat_priv->softif_vlan_list);
INIT_HLIST_HEAD(&bat_priv->tp_list);
- ret = batadv_v_mesh_init(bat_priv);
- if (ret < 0)
- goto err;
-
ret = batadv_originator_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_orig;
+ }
ret = batadv_tt_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_tt;
+ }
+
+ ret = batadv_v_mesh_init(bat_priv);
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_v;
+ }
ret = batadv_bla_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_bla;
+ }
ret = batadv_dat_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_dat;
+ }
ret = batadv_nc_mesh_init(bat_priv);
- if (ret < 0)
- goto err;
+ if (ret < 0) {
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING);
+ goto err_nc;
+ }
batadv_gw_init(bat_priv);
batadv_mcast_init(bat_priv);
@@ -219,8 +231,20 @@ int batadv_mesh_init(struct net_device *soft_iface)
return 0;
-err:
- batadv_mesh_free(soft_iface);
+err_nc:
+ batadv_dat_free(bat_priv);
+err_dat:
+ batadv_bla_free(bat_priv);
+err_bla:
+ batadv_v_mesh_free(bat_priv);
+err_v:
+ batadv_tt_free(bat_priv);
+err_tt:
+ batadv_originator_free(bat_priv);
+err_orig:
+ batadv_purge_outstanding_packets(bat_priv, NULL);
+ atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE);
+
return ret;
}
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index b90fe25d6b0b..9b3311dae2e4 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -62,10 +62,12 @@
#include <uapi/linux/batadv_packet.h>
#include <uapi/linux/batman_adv.h>
+#include "bridge_loop_avoidance.h"
#include "hard-interface.h"
#include "hash.h"
#include "log.h"
#include "netlink.h"
+#include "send.h"
#include "soft-interface.h"
#include "translation-table.h"
#include "tvlv.h"
@@ -1025,6 +1027,35 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
}
/**
+ * batadv_mcast_forw_send_orig() - send a multicast packet to an originator
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to send
+ * @vid: the vlan identifier
+ * @orig_node: the originator to send the packet to
+ *
+ * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
+ */
+int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ struct batadv_orig_node *orig_node)
+{
+ /* Avoid sending multicast-in-unicast packets to other BLA
+ * gateways - they already got the frame from the LAN side
+ * we share with them.
+ * TODO: Refactor to take BLA into account earlier, to avoid
+ * reducing the mcast_fanout count.
+ */
+ if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid)) {
+ dev_kfree_skb(skb);
+ return NET_XMIT_SUCCESS;
+ }
+
+ return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST, 0,
+ orig_node, vid);
+}
+
+/**
* batadv_mcast_want_unsnoop_update() - update unsnoop counter and list
* @bat_priv: the bat priv with all the soft interface information
* @orig: the orig_node which multicast state might have changed of
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 3b04ab13f0eb..6f9f3813fc59 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -51,6 +51,11 @@ enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
struct batadv_orig_node **mcast_single_orig);
+int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ struct batadv_orig_node *orig_node);
+
void batadv_mcast_init(struct batadv_priv *bat_priv);
int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset);
@@ -79,6 +84,16 @@ static inline int batadv_mcast_init(struct batadv_priv *bat_priv)
}
static inline int
+batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ struct batadv_orig_node *orig_node)
+{
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+}
+
+static inline int
batadv_mcast_mesh_info_put(struct sk_buff *msg, struct batadv_priv *bat_priv)
{
return 0;
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 7f1be5a28757..d6749fc22236 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -167,8 +167,10 @@ int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
&batadv_nc_coding_hash_lock_class_key);
bat_priv->nc.decoding_hash = batadv_hash_new(128);
- if (!bat_priv->nc.decoding_hash)
+ if (!bat_priv->nc.decoding_hash) {
+ batadv_hash_destroy(bat_priv->nc.coding_hash);
goto err;
+ }
batadv_hash_set_lock_class(bat_priv->nc.decoding_hash,
&batadv_nc_decoding_hash_lock_class_key);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 6ff78080ec7f..7447dbd305fc 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -167,11 +167,14 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
{
+ struct batadv_priv *bat_priv = netdev_priv(dev);
+
/* check ranges */
if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev))
return -EINVAL;
dev->mtu = new_mtu;
+ bat_priv->mtu_set_by_user = new_mtu;
return 0;
}
@@ -367,9 +370,8 @@ send:
goto dropped;
ret = batadv_send_skb_via_gw(bat_priv, skb, vid);
} else if (mcast_single_orig) {
- ret = batadv_send_skb_unicast(bat_priv, skb,
- BATADV_UNICAST, 0,
- mcast_single_orig, vid);
+ ret = batadv_mcast_forw_send_orig(bat_priv, skb, vid,
+ mcast_single_orig);
} else {
if (batadv_dat_snoop_outgoing_arp_request(bat_priv,
skb))
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 9fa5389ea244..c64d58c1b724 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -793,7 +793,6 @@ check_roaming:
if (roamed_back) {
batadv_tt_global_free(bat_priv, tt_global,
"Roaming canceled");
- tt_global = NULL;
} else {
/* The global entry has to be marked as ROAMING and
* has to be kept for consistency purpose
@@ -904,6 +903,7 @@ batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) {
tt_vlan->vid = htons(vlan->vid);
tt_vlan->crc = htonl(vlan->tt.crc);
+ tt_vlan->reserved = 0;
tt_vlan++;
}
@@ -987,6 +987,7 @@ batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
tt_vlan->vid = htons(vlan->vid);
tt_vlan->crc = htonl(vlan->tt.crc);
+ tt_vlan->reserved = 0;
tt_vlan++;
}
@@ -4411,8 +4412,10 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
return ret;
ret = batadv_tt_global_init(bat_priv);
- if (ret < 0)
+ if (ret < 0) {
+ batadv_tt_local_table_free(bat_priv);
return ret;
+ }
batadv_tvlv_handler_register(bat_priv, batadv_tt_tvlv_ogm_handler_v1,
batadv_tt_tvlv_unicast_handler_v1,
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 37598ae1d3f7..34c18f72a41b 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1515,6 +1515,12 @@ struct batadv_priv {
struct net_device *soft_iface;
/**
+ * @mtu_set_by_user: MTU was set once by user
+ * protected by rtnl_lock
+ */
+ int mtu_set_by_user;
+
+ /**
* @bat_counters: mesh internal traffic statistic counters (see
* batadv_counters)
*/
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 9a75f9b00b51..4530ffb2481a 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -1014,6 +1014,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
hci_dev_lock(hdev);
hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type);
hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
if (!hcon)
return -ENOENT;
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 51c2cf2d8923..e09ea78356c3 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -233,6 +233,9 @@ static int a2mp_discover_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
struct a2mp_info_req req;
found = true;
+
+ memset(&req, 0, sizeof(req));
+
req.id = cl->id;
a2mp_send(mgr, A2MP_GETINFO_REQ, __next_ident(mgr),
sizeof(req), &req);
@@ -312,6 +315,8 @@ static int a2mp_getinfo_req(struct amp_mgr *mgr, struct sk_buff *skb,
if (!hdev || hdev->dev_type != HCI_AMP) {
struct a2mp_info_rsp rsp;
+ memset(&rsp, 0, sizeof(rsp));
+
rsp.id = req->id;
rsp.status = A2MP_STATUS_INVALID_CTRL_ID;
@@ -355,6 +360,8 @@ static int a2mp_getinfo_rsp(struct amp_mgr *mgr, struct sk_buff *skb,
if (!ctrl)
return -ENOMEM;
+ memset(&req, 0, sizeof(req));
+
req.id = rsp->id;
a2mp_send(mgr, A2MP_GETAMPASSOC_REQ, __next_ident(mgr), sizeof(req),
&req);
@@ -381,6 +388,8 @@ static int a2mp_getampassoc_req(struct amp_mgr *mgr, struct sk_buff *skb,
hdev = hci_dev_get(req->id);
if (!hdev || hdev->amp_type == AMP_TYPE_BREDR || tmp) {
struct a2mp_amp_assoc_rsp rsp;
+
+ memset(&rsp, 0, sizeof(rsp));
rsp.id = req->id;
if (tmp) {
@@ -471,7 +480,6 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
struct a2mp_cmd *hdr)
{
struct a2mp_physlink_req *req = (void *) skb->data;
-
struct a2mp_physlink_rsp rsp;
struct hci_dev *hdev;
struct hci_conn *hcon;
@@ -482,6 +490,8 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
BT_DBG("local_id %d, remote_id %d", req->local_id, req->remote_id);
+ memset(&rsp, 0, sizeof(rsp));
+
rsp.local_id = req->remote_id;
rsp.remote_id = req->local_id;
@@ -509,6 +519,7 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
assoc = kmemdup(req->amp_assoc, assoc_len, GFP_KERNEL);
if (!assoc) {
amp_ctrl_put(ctrl);
+ hci_dev_put(hdev);
return -ENOMEM;
}
@@ -560,6 +571,8 @@ static int a2mp_discphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb,
BT_DBG("local_id %d remote_id %d", req->local_id, req->remote_id);
+ memset(&rsp, 0, sizeof(rsp));
+
rsp.local_id = req->remote_id;
rsp.remote_id = req->local_id;
rsp.status = A2MP_STATUS_SUCCESS;
@@ -682,6 +695,8 @@ static int a2mp_chan_recv_cb(struct l2cap_chan *chan, struct sk_buff *skb)
if (err) {
struct a2mp_cmd_rej rej;
+ memset(&rej, 0, sizeof(rej));
+
rej.reason = cpu_to_le16(0);
hdr = (void *) skb->data;
@@ -905,6 +920,8 @@ void a2mp_send_getinfo_rsp(struct hci_dev *hdev)
BT_DBG("%s mgr %p", hdev->name, mgr);
+ memset(&rsp, 0, sizeof(rsp));
+
rsp.id = hdev->id;
rsp.status = A2MP_STATUS_INVALID_CTRL_ID;
@@ -1002,6 +1019,8 @@ void a2mp_send_create_phy_link_rsp(struct hci_dev *hdev, u8 status)
if (!mgr)
return;
+ memset(&rsp, 0, sizeof(rsp));
+
hs_hcon = hci_conn_hash_lookup_state(hdev, AMP_LINK, BT_CONNECT);
if (!hs_hcon) {
rsp.status = A2MP_STATUS_UNABLE_START_LINK_CREATION;
@@ -1034,6 +1053,8 @@ void a2mp_discover_amp(struct l2cap_chan *chan)
mgr->bredr_chan = chan;
+ memset(&req, 0, sizeof(req));
+
req.mtu = cpu_to_le16(L2CAP_A2MP_DEFAULT_MTU);
req.ext_feat = 0;
a2mp_send(mgr, A2MP_DISCOVER_REQ, 1, sizeof(req), &req);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index ee60c30f3be2..798f8f485e5a 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -743,7 +743,7 @@ static int __init bt_init(void)
err = bt_sysfs_init();
if (err < 0)
- return err;
+ goto cleanup_led;
err = sock_register(&bt_sock_family_ops);
if (err)
@@ -779,6 +779,8 @@ unregister_socket:
sock_unregister(PF_BLUETOOTH);
cleanup_sysfs:
bt_sysfs_cleanup();
+cleanup_led:
+ bt_leds_cleanup();
return err;
}
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index 78bec8df8525..72ef967c5663 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -305,6 +305,9 @@ void amp_read_loc_assoc_final_data(struct hci_dev *hdev,
struct hci_request req;
int err;
+ if (!mgr)
+ return;
+
cp.phy_handle = hcon->handle;
cp.len_so_far = cpu_to_le16(0);
cp.max_len = cpu_to_le16(hdev->amp_assoc_size);
diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h
index c32638dddbf9..f6b9dc4e408f 100644
--- a/net/bluetooth/cmtp/cmtp.h
+++ b/net/bluetooth/cmtp/cmtp.h
@@ -26,7 +26,7 @@
#include <linux/types.h>
#include <net/bluetooth/bluetooth.h>
-#define BTNAMSIZ 18
+#define BTNAMSIZ 21
/* CMTP ioctl defines */
#define CMTPCONNADD _IOW('C', 200, int)
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 7f26a5a19ff6..4764ed73f33b 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -391,6 +391,11 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
if (!(session->flags & BIT(CMTP_LOOPBACK))) {
err = cmtp_attach_device(session);
if (err < 0) {
+ /* Caller will call fput in case of failure, and so
+ * will cmtp_session kthread.
+ */
+ get_file(session->sock->file);
+
atomic_inc(&session->terminate);
wake_up_interruptible(sk_sleep(session->sock->sk));
up_write(&cmtp_session_sem);
@@ -494,9 +499,7 @@ static int __init cmtp_init(void)
{
BT_INFO("CMTP (CAPI Emulation) ver %s", VERSION);
- cmtp_init_sockets();
-
- return 0;
+ return cmtp_init_sockets();
}
static void __exit cmtp_exit(void)
diff --git a/net/bluetooth/ecdh_helper.h b/net/bluetooth/ecdh_helper.h
index a6f8d03d4aaf..830723971cf8 100644
--- a/net/bluetooth/ecdh_helper.h
+++ b/net/bluetooth/ecdh_helper.h
@@ -25,6 +25,6 @@
int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 pair_public_key[64],
u8 secret[32]);
-int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 *private_key);
+int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 private_key[32]);
int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64]);
int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64]);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index db735d0d931e..b8730c5f1cac 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -125,13 +125,11 @@ static void hci_conn_cleanup(struct hci_conn *conn)
if (hdev->notify)
hdev->notify(hdev, HCI_NOTIFY_CONN_DEL);
- hci_conn_del_sysfs(conn);
-
debugfs_remove_recursive(conn->debugfs);
- hci_dev_put(hdev);
+ hci_conn_del_sysfs(conn);
- hci_conn_put(conn);
+ hci_dev_put(hdev);
}
static void le_scan_cleanup(struct work_struct *work)
@@ -1204,6 +1202,15 @@ struct hci_conn *hci_connect_acl(struct hci_dev *hdev, bdaddr_t *dst,
return ERR_PTR(-EOPNOTSUPP);
}
+ /* Reject outgoing connection to device with same BD ADDR against
+ * CVE-2020-26555
+ */
+ if (!bacmp(&hdev->bdaddr, dst)) {
+ bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n",
+ dst);
+ return ERR_PTR(-ECONNREFUSED);
+ }
+
acl = hci_conn_hash_lookup_ba(hdev, ACL_LINK, dst);
if (!acl) {
acl = hci_conn_add(hdev, ACL_LINK, dst, HCI_ROLE_MASTER);
@@ -1282,6 +1289,23 @@ int hci_conn_check_link_mode(struct hci_conn *conn)
return 0;
}
+ /* AES encryption is required for Level 4:
+ *
+ * BLUETOOTH CORE SPECIFICATION Version 5.2 | Vol 3, Part C
+ * page 1319:
+ *
+ * 128-bit equivalent strength for link and encryption keys
+ * required using FIPS approved algorithms (E0 not allowed,
+ * SAFER+ not allowed, and P-192 not allowed; encryption key
+ * not shortened)
+ */
+ if (conn->sec_level == BT_SECURITY_FIPS &&
+ !test_bit(HCI_CONN_AES_CCM, &conn->flags)) {
+ bt_dev_err(conn->hdev,
+ "Invalid security: Missing AES-CCM usage");
+ return 0;
+ }
+
if (hci_conn_ssp_enabled(conn) &&
!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
return 0;
@@ -1314,12 +1338,10 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED,
sizeof(cp), &cp);
- /* If we're already encrypted set the REAUTH_PEND flag,
- * otherwise set the ENCRYPT_PEND.
+ /* Set the ENCRYPT_PEND to trigger encryption after
+ * authentication.
*/
- if (test_bit(HCI_CONN_ENCRYPT, &conn->flags))
- set_bit(HCI_CONN_REAUTH_PEND, &conn->flags);
- else
+ if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
}
@@ -1362,34 +1384,41 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type,
if (!test_bit(HCI_CONN_AUTH, &conn->flags))
goto auth;
- /* An authenticated FIPS approved combination key has sufficient
- * security for security level 4. */
- if (conn->key_type == HCI_LK_AUTH_COMBINATION_P256 &&
- sec_level == BT_SECURITY_FIPS)
- goto encrypt;
-
- /* An authenticated combination key has sufficient security for
- security level 3. */
- if ((conn->key_type == HCI_LK_AUTH_COMBINATION_P192 ||
- conn->key_type == HCI_LK_AUTH_COMBINATION_P256) &&
- sec_level == BT_SECURITY_HIGH)
- goto encrypt;
-
- /* An unauthenticated combination key has sufficient security for
- security level 1 and 2. */
- if ((conn->key_type == HCI_LK_UNAUTH_COMBINATION_P192 ||
- conn->key_type == HCI_LK_UNAUTH_COMBINATION_P256) &&
- (sec_level == BT_SECURITY_MEDIUM || sec_level == BT_SECURITY_LOW))
- goto encrypt;
-
- /* A combination key has always sufficient security for the security
- levels 1 or 2. High security level requires the combination key
- is generated using maximum PIN code length (16).
- For pre 2.1 units. */
- if (conn->key_type == HCI_LK_COMBINATION &&
- (sec_level == BT_SECURITY_MEDIUM || sec_level == BT_SECURITY_LOW ||
- conn->pin_length == 16))
- goto encrypt;
+ switch (conn->key_type) {
+ case HCI_LK_AUTH_COMBINATION_P256:
+ /* An authenticated FIPS approved combination key has
+ * sufficient security for security level 4 or lower.
+ */
+ if (sec_level <= BT_SECURITY_FIPS)
+ goto encrypt;
+ break;
+ case HCI_LK_AUTH_COMBINATION_P192:
+ /* An authenticated combination key has sufficient security for
+ * security level 3 or lower.
+ */
+ if (sec_level <= BT_SECURITY_HIGH)
+ goto encrypt;
+ break;
+ case HCI_LK_UNAUTH_COMBINATION_P192:
+ case HCI_LK_UNAUTH_COMBINATION_P256:
+ /* An unauthenticated combination key has sufficient security
+ * for security level 2 or lower.
+ */
+ if (sec_level <= BT_SECURITY_MEDIUM)
+ goto encrypt;
+ break;
+ case HCI_LK_COMBINATION:
+ /* A combination key has always sufficient security for the
+ * security levels 2 or lower. High security level requires the
+ * combination key is generated using maximum PIN code length
+ * (16). For pre 2.1 units.
+ */
+ if (sec_level <= BT_SECURITY_MEDIUM || conn->pin_length == 16)
+ goto encrypt;
+ break;
+ default:
+ break;
+ }
auth:
if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags))
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index e03faca84919..4d89e38dceec 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1296,6 +1296,12 @@ int hci_inquiry(void __user *arg)
goto done;
}
+ /* Restrict maximum inquiry length to 60 seconds */
+ if (ir.length > 60) {
+ err = -EINVAL;
+ goto done;
+ }
+
hci_dev_lock(hdev);
if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX ||
inquiry_cache_empty(hdev) || ir.flags & IREQ_CACHE_FLUSH) {
@@ -1316,8 +1322,10 @@ int hci_inquiry(void __user *arg)
* cleared). If it is interrupted by a signal, return -EINTR.
*/
if (wait_on_bit(&hdev->flags, HCI_INQUIRY,
- TASK_INTERRUPTIBLE))
- return -EINTR;
+ TASK_INTERRUPTIBLE)) {
+ err = -EINTR;
+ goto done;
+ }
}
/* for unlimited number of responses we will use buffer with
@@ -1496,8 +1504,13 @@ static int hci_dev_do_open(struct hci_dev *hdev)
} else {
/* Init failed, cleanup */
flush_work(&hdev->tx_work);
- flush_work(&hdev->cmd_work);
+
+ /* Since hci_rx_work() is possible to awake new cmd_work
+ * it should be flushed first to avoid unexpected call of
+ * hci_cmd_work()
+ */
flush_work(&hdev->rx_work);
+ flush_work(&hdev->cmd_work);
skb_queue_purge(&hdev->cmd_q);
skb_queue_purge(&hdev->rx_q);
@@ -1506,6 +1519,7 @@ static int hci_dev_do_open(struct hci_dev *hdev)
hdev->flush(hdev);
if (hdev->sent_cmd) {
+ cancel_delayed_work_sync(&hdev->cmd_timer);
kfree_skb(hdev->sent_cmd);
hdev->sent_cmd = NULL;
}
@@ -1615,6 +1629,14 @@ int hci_dev_do_close(struct hci_dev *hdev)
hci_request_cancel_all(hdev);
hci_req_sync_lock(hdev);
+ if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
+ !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) &&
+ test_bit(HCI_UP, &hdev->flags)) {
+ /* Execute vendor specific shutdown routine */
+ if (hdev->shutdown)
+ hdev->shutdown(hdev);
+ }
+
if (!test_and_clear_bit(HCI_UP, &hdev->flags)) {
cancel_delayed_work_sync(&hdev->cmd_timer);
hci_req_sync_unlock(hdev);
@@ -2495,10 +2517,10 @@ int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr)
int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type)
{
- struct smp_ltk *k;
+ struct smp_ltk *k, *tmp;
int removed = 0;
- list_for_each_entry_rcu(k, &hdev->long_term_keys, list) {
+ list_for_each_entry_safe(k, tmp, &hdev->long_term_keys, list) {
if (bacmp(bdaddr, &k->bdaddr) || k->bdaddr_type != bdaddr_type)
continue;
@@ -2514,9 +2536,9 @@ int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type)
void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type)
{
- struct smp_irk *k;
+ struct smp_irk *k, *tmp;
- list_for_each_entry_rcu(k, &hdev->identity_resolving_keys, list) {
+ list_for_each_entry_safe(k, tmp, &hdev->identity_resolving_keys, list) {
if (bacmp(bdaddr, &k->bdaddr) || k->addr_type != addr_type)
continue;
@@ -3159,10 +3181,10 @@ int hci_register_dev(struct hci_dev *hdev)
*/
switch (hdev->dev_type) {
case HCI_PRIMARY:
- id = ida_simple_get(&hci_index_ida, 0, 0, GFP_KERNEL);
+ id = ida_simple_get(&hci_index_ida, 0, HCI_MAX_ID, GFP_KERNEL);
break;
case HCI_AMP:
- id = ida_simple_get(&hci_index_ida, 1, 0, GFP_KERNEL);
+ id = ida_simple_get(&hci_index_ida, 1, HCI_MAX_ID, GFP_KERNEL);
break;
default:
return -EINVAL;
@@ -3171,7 +3193,11 @@ int hci_register_dev(struct hci_dev *hdev)
if (id < 0)
return id;
- sprintf(hdev->name, "hci%d", id);
+ error = dev_set_name(&hdev->dev, "hci%u", id);
+ if (error)
+ return error;
+
+ hdev->name = dev_name(&hdev->dev);
hdev->id = id;
BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
@@ -3193,8 +3219,6 @@ int hci_register_dev(struct hci_dev *hdev)
if (!IS_ERR_OR_NULL(bt_debugfs))
hdev->debugfs = debugfs_create_dir(hdev->name, bt_debugfs);
- dev_set_name(&hdev->dev, "%s", hdev->name);
-
error = device_add(&hdev->dev);
if (error < 0)
goto err_wqueue;
@@ -3242,6 +3266,7 @@ int hci_register_dev(struct hci_dev *hdev)
return id;
err_wqueue:
+ debugfs_remove_recursive(hdev->debugfs);
destroy_workqueue(hdev->workqueue);
destroy_workqueue(hdev->req_workqueue);
err:
@@ -3254,14 +3279,10 @@ EXPORT_SYMBOL(hci_register_dev);
/* Unregister HCI device */
void hci_unregister_dev(struct hci_dev *hdev)
{
- int id;
-
BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus);
hci_dev_set_flag(hdev, HCI_UNREGISTER);
- id = hdev->id;
-
write_lock(&hci_dev_list_lock);
list_del(&hdev->list);
write_unlock(&hci_dev_list_lock);
@@ -3290,7 +3311,14 @@ void hci_unregister_dev(struct hci_dev *hdev)
}
device_del(&hdev->dev);
+ /* Actual cleanup is deferred until hci_cleanup_dev(). */
+ hci_dev_put(hdev);
+}
+EXPORT_SYMBOL(hci_unregister_dev);
+/* Cleanup HCI device */
+void hci_cleanup_dev(struct hci_dev *hdev)
+{
debugfs_remove_recursive(hdev->debugfs);
kfree_const(hdev->hw_info);
kfree_const(hdev->fw_info);
@@ -3313,11 +3341,8 @@ void hci_unregister_dev(struct hci_dev *hdev)
hci_discovery_filter_clear(hdev);
hci_dev_unlock(hdev);
- hci_dev_put(hdev);
-
- ida_simple_remove(&hci_index_ida, id);
+ ida_simple_remove(&hci_index_ida, hdev->id);
}
-EXPORT_SYMBOL(hci_unregister_dev);
/* Suspend HCI device */
int hci_suspend_dev(struct hci_dev *hdev)
@@ -4309,7 +4334,7 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status,
*req_complete_skb = bt_cb(skb)->hci.req_complete_skb;
else
*req_complete = bt_cb(skb)->hci.req_complete;
- kfree_skb(skb);
+ dev_kfree_skb_irq(skb);
}
spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
}
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 310622086f74..9d01874e6b93 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -25,6 +25,8 @@
/* Bluetooth HCI event handling. */
#include <asm/unaligned.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
@@ -1699,7 +1701,8 @@ static void hci_cs_inquiry(struct hci_dev *hdev, __u8 status)
return;
}
- set_bit(HCI_INQUIRY, &hdev->flags);
+ if (hci_sent_cmd_data(hdev, HCI_OP_INQUIRY))
+ set_bit(HCI_INQUIRY, &hdev->flags);
}
static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
@@ -2510,6 +2513,16 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s bdaddr %pMR type 0x%x", hdev->name, &ev->bdaddr,
ev->link_type);
+ /* Reject incoming connection from device with same BD ADDR against
+ * CVE-2020-26555
+ */
+ if (hdev && !bacmp(&hdev->bdaddr, &ev->bdaddr)) {
+ bt_dev_dbg(hdev, "Reject connection with same BD_ADDR %pMR\n",
+ &ev->bdaddr);
+ hci_reject_conn(hdev, &ev->bdaddr);
+ return;
+ }
+
mask |= hci_proto_connect_ind(hdev, &ev->bdaddr, ev->link_type,
&flags);
@@ -2709,14 +2722,8 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (!ev->status) {
clear_bit(HCI_CONN_AUTH_FAILURE, &conn->flags);
-
- if (!hci_conn_ssp_enabled(conn) &&
- test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) {
- bt_dev_info(hdev, "re-auth of legacy device is not possible.");
- } else {
- set_bit(HCI_CONN_AUTH, &conn->flags);
- conn->sec_level = conn->pending_sec_level;
- }
+ set_bit(HCI_CONN_AUTH, &conn->flags);
+ conn->sec_level = conn->pending_sec_level;
} else {
if (ev->status == HCI_ERROR_PIN_OR_KEY_MISSING)
set_bit(HCI_CONN_AUTH_FAILURE, &conn->flags);
@@ -2725,7 +2732,6 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
}
clear_bit(HCI_CONN_AUTH_PEND, &conn->flags);
- clear_bit(HCI_CONN_REAUTH_PEND, &conn->flags);
if (conn->state == BT_CONFIG) {
if (!ev->status && hci_conn_ssp_enabled(conn)) {
@@ -2756,7 +2762,7 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
&cp);
} else {
clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
- hci_encrypt_cfm(conn, ev->status, 0x00);
+ hci_encrypt_cfm(conn, ev->status);
}
}
@@ -2841,22 +2847,7 @@ static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status,
conn->enc_key_size = rp->key_size;
}
- if (conn->state == BT_CONFIG) {
- conn->state = BT_CONNECTED;
- hci_connect_cfm(conn, 0);
- hci_conn_drop(conn);
- } else {
- u8 encrypt;
-
- if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
- encrypt = 0x00;
- else if (test_bit(HCI_CONN_AES_CCM, &conn->flags))
- encrypt = 0x02;
- else
- encrypt = 0x01;
-
- hci_encrypt_cfm(conn, 0, encrypt);
- }
+ hci_encrypt_cfm(conn, 0);
unlock:
hci_dev_unlock(hdev);
@@ -2905,27 +2896,23 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
+ /* Check link security requirements are met */
+ if (!hci_conn_check_link_mode(conn))
+ ev->status = HCI_ERROR_AUTH_FAILURE;
+
if (ev->status && conn->state == BT_CONNECTED) {
if (ev->status == HCI_ERROR_PIN_OR_KEY_MISSING)
set_bit(HCI_CONN_AUTH_FAILURE, &conn->flags);
+ /* Notify upper layers so they can cleanup before
+ * disconnecting.
+ */
+ hci_encrypt_cfm(conn, ev->status);
hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
hci_conn_drop(conn);
goto unlock;
}
- /* In Secure Connections Only mode, do not allow any connections
- * that are not encrypted with AES-CCM using a P-256 authenticated
- * combination key.
- */
- if (hci_dev_test_flag(hdev, HCI_SC_ONLY) &&
- (!test_bit(HCI_CONN_AES_CCM, &conn->flags) ||
- conn->key_type != HCI_LK_AUTH_COMBINATION_P256)) {
- hci_connect_cfm(conn, HCI_ERROR_AUTH_FAILURE);
- hci_conn_drop(conn);
- goto unlock;
- }
-
/* Try reading the encryption key size for encrypted ACL links */
if (!ev->status && ev->encrypt && conn->type == ACL_LINK) {
struct hci_cp_read_enc_key_size cp;
@@ -2955,14 +2942,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
}
notify:
- if (conn->state == BT_CONFIG) {
- if (!ev->status)
- conn->state = BT_CONNECTED;
-
- hci_connect_cfm(conn, ev->status);
- hci_conn_drop(conn);
- } else
- hci_encrypt_cfm(conn, ev->status, ev->encrypt);
+ hci_encrypt_cfm(conn, ev->status);
unlock:
hci_dev_unlock(hdev);
@@ -3842,6 +3822,15 @@ static void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (!conn)
goto unlock;
+ /* Ignore NULL link key against CVE-2020-26555 */
+ if (!crypto_memneq(ev->link_key, ZERO_KEY, HCI_LINK_KEY_SIZE)) {
+ bt_dev_dbg(hdev, "Ignore NULL link key (ZERO KEY) for %pMR",
+ &ev->bdaddr);
+ hci_disconnect(conn, HCI_ERROR_AUTH_FAILURE);
+ hci_conn_drop(conn);
+ goto unlock;
+ }
+
hci_conn_hold(conn);
conn->disc_timeout = HCI_DISCONN_TIMEOUT;
hci_conn_drop(conn);
@@ -4109,6 +4098,21 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
switch (ev->status) {
case 0x00:
+ /* The synchronous connection complete event should only be
+ * sent once per new connection. Receiving a successful
+ * complete event when the connection status is already
+ * BT_CONNECTED means that the device is misbehaving and sent
+ * multiple complete event packets for the same new connection.
+ *
+ * Registering the device more than once can corrupt kernel
+ * memory, hence upon detecting this invalid event, we report
+ * an error and ignore the packet.
+ */
+ if (conn->state == BT_CONNECTED) {
+ bt_dev_err(hdev, "Ignoring connect complete event for existing connection");
+ goto unlock;
+ }
+
conn->handle = __le16_to_cpu(ev->handle);
conn->state = BT_CONNECTED;
conn->type = ev->link_type;
@@ -4305,8 +4309,8 @@ static u8 bredr_oob_data_present(struct hci_conn *conn)
* available, then do not declare that OOB data is
* present.
*/
- if (!memcmp(data->rand256, ZERO_KEY, 16) ||
- !memcmp(data->hash256, ZERO_KEY, 16))
+ if (!crypto_memneq(data->rand256, ZERO_KEY, 16) ||
+ !crypto_memneq(data->hash256, ZERO_KEY, 16))
return 0x00;
return 0x02;
@@ -4316,8 +4320,8 @@ static u8 bredr_oob_data_present(struct hci_conn *conn)
* not supported by the hardware, then check that if
* P-192 data values are present.
*/
- if (!memcmp(data->rand192, ZERO_KEY, 16) ||
- !memcmp(data->hash192, ZERO_KEY, 16))
+ if (!crypto_memneq(data->rand192, ZERO_KEY, 16) ||
+ !crypto_memneq(data->hash192, ZERO_KEY, 16))
return 0x00;
return 0x01;
@@ -4333,7 +4337,7 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
- if (!conn)
+ if (!conn || !hci_conn_ssp_enabled(conn))
goto unlock;
hci_conn_hold(conn);
@@ -4568,7 +4572,7 @@ static void hci_simple_pair_complete_evt(struct hci_dev *hdev,
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
- if (!conn)
+ if (!conn || !hci_conn_ssp_enabled(conn))
goto unlock;
/* Reset the authentication requirement to unknown */
@@ -4698,6 +4702,11 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev,
return;
}
+ if (!hcon->amp_mgr) {
+ hci_dev_unlock(hdev);
+ return;
+ }
+
if (ev->status) {
hci_conn_del(hcon);
hci_dev_unlock(hdev);
@@ -4742,6 +4751,7 @@ static void hci_loglink_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
return;
hchan->handle = le16_to_cpu(ev->handle);
+ hchan->amp = true;
BT_DBG("hcon %p mgr %p hchan %p", hcon, hcon->amp_mgr, hchan);
@@ -4774,7 +4784,7 @@ static void hci_disconn_loglink_complete_evt(struct hci_dev *hdev,
hci_dev_lock(hdev);
hchan = hci_chan_lookup_handle(hdev, le16_to_cpu(ev->handle));
- if (!hchan)
+ if (!hchan || !hchan->amp)
goto unlock;
amp_destroy_logical_link(hchan, ev->reason);
@@ -4797,8 +4807,9 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev,
hci_dev_lock(hdev);
hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
- if (hcon) {
+ if (hcon && hcon->type == AMP_LINK) {
hcon->state = BT_CLOSED;
+ hci_disconn_cfm(hcon, ev->reason);
hci_conn_del(hcon);
}
@@ -4806,9 +4817,64 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev,
}
#endif
+static void le_conn_update_addr(struct hci_conn *conn, bdaddr_t *bdaddr,
+ u8 bdaddr_type, bdaddr_t *local_rpa)
+{
+ if (conn->out) {
+ conn->dst_type = bdaddr_type;
+ conn->resp_addr_type = bdaddr_type;
+ bacpy(&conn->resp_addr, bdaddr);
+
+ /* Check if the controller has set a Local RPA then it must be
+ * used instead or hdev->rpa.
+ */
+ if (local_rpa && bacmp(local_rpa, BDADDR_ANY)) {
+ conn->init_addr_type = ADDR_LE_DEV_RANDOM;
+ bacpy(&conn->init_addr, local_rpa);
+ } else if (hci_dev_test_flag(conn->hdev, HCI_PRIVACY)) {
+ conn->init_addr_type = ADDR_LE_DEV_RANDOM;
+ bacpy(&conn->init_addr, &conn->hdev->rpa);
+ } else {
+ hci_copy_identity_address(conn->hdev, &conn->init_addr,
+ &conn->init_addr_type);
+ }
+ } else {
+ conn->resp_addr_type = conn->hdev->adv_addr_type;
+ /* Check if the controller has set a Local RPA then it must be
+ * used instead or hdev->rpa.
+ */
+ if (local_rpa && bacmp(local_rpa, BDADDR_ANY)) {
+ conn->resp_addr_type = ADDR_LE_DEV_RANDOM;
+ bacpy(&conn->resp_addr, local_rpa);
+ } else if (conn->hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) {
+ /* In case of ext adv, resp_addr will be updated in
+ * Adv Terminated event.
+ */
+ if (!ext_adv_capable(conn->hdev))
+ bacpy(&conn->resp_addr,
+ &conn->hdev->random_addr);
+ } else {
+ bacpy(&conn->resp_addr, &conn->hdev->bdaddr);
+ }
+
+ conn->init_addr_type = bdaddr_type;
+ bacpy(&conn->init_addr, bdaddr);
+
+ /* For incoming connections, set the default minimum
+ * and maximum connection interval. They will be used
+ * to check if the parameters are in range and if not
+ * trigger the connection update procedure.
+ */
+ conn->le_conn_min_interval = conn->hdev->le_conn_min_interval;
+ conn->le_conn_max_interval = conn->hdev->le_conn_max_interval;
+ }
+}
+
static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
- bdaddr_t *bdaddr, u8 bdaddr_type, u8 role, u16 handle,
- u16 interval, u16 latency, u16 supervision_timeout)
+ bdaddr_t *bdaddr, u8 bdaddr_type,
+ bdaddr_t *local_rpa, u8 role, u16 handle,
+ u16 interval, u16 latency,
+ u16 supervision_timeout)
{
struct hci_conn_params *params;
struct hci_conn *conn;
@@ -4856,32 +4922,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
cancel_delayed_work(&conn->le_conn_timeout);
}
- if (!conn->out) {
- /* Set the responder (our side) address type based on
- * the advertising address type.
- */
- conn->resp_addr_type = hdev->adv_addr_type;
- if (hdev->adv_addr_type == ADDR_LE_DEV_RANDOM) {
- /* In case of ext adv, resp_addr will be updated in
- * Adv Terminated event.
- */
- if (!ext_adv_capable(hdev))
- bacpy(&conn->resp_addr, &hdev->random_addr);
- } else {
- bacpy(&conn->resp_addr, &hdev->bdaddr);
- }
-
- conn->init_addr_type = bdaddr_type;
- bacpy(&conn->init_addr, bdaddr);
-
- /* For incoming connections, set the default minimum
- * and maximum connection interval. They will be used
- * to check if the parameters are in range and if not
- * trigger the connection update procedure.
- */
- conn->le_conn_min_interval = hdev->le_conn_min_interval;
- conn->le_conn_max_interval = hdev->le_conn_max_interval;
- }
+ le_conn_update_addr(conn, bdaddr, bdaddr_type, local_rpa);
/* Lookup the identity address from the stored connection
* address and address type.
@@ -4979,7 +5020,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
le_conn_complete_evt(hdev, ev->status, &ev->bdaddr, ev->bdaddr_type,
- ev->role, le16_to_cpu(ev->handle),
+ NULL, ev->role, le16_to_cpu(ev->handle),
le16_to_cpu(ev->interval),
le16_to_cpu(ev->latency),
le16_to_cpu(ev->supervision_timeout));
@@ -4993,7 +5034,7 @@ static void hci_le_enh_conn_complete_evt(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
le_conn_complete_evt(hdev, ev->status, &ev->bdaddr, ev->bdaddr_type,
- ev->role, le16_to_cpu(ev->handle),
+ &ev->local_rpa, ev->role, le16_to_cpu(ev->handle),
le16_to_cpu(ev->interval),
le16_to_cpu(ev->latency),
le16_to_cpu(ev->supervision_timeout));
@@ -5006,14 +5047,26 @@ static void hci_le_ext_adv_term_evt(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s status 0x%2.2x", hdev->name, ev->status);
- if (ev->status)
+ if (ev->status) {
+ struct adv_info *adv;
+
+ adv = hci_find_adv_instance(hdev, ev->handle);
+ if (!adv)
+ return;
+
+ /* Remove advertising as it has been terminated */
+ hci_remove_adv_instance(hdev, ev->handle);
+ mgmt_advertising_removed(NULL, hdev, ev->handle);
+
return;
+ }
conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->conn_handle));
if (conn) {
struct adv_info *adv_instance;
- if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM)
+ if (hdev->adv_addr_type != ADDR_LE_DEV_RANDOM ||
+ bacmp(&conn->resp_addr, BDADDR_ANY))
return;
if (!hdev->cur_adv_instance) {
@@ -5354,7 +5407,13 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb)
struct hci_ev_le_advertising_info *ev = ptr;
s8 rssi;
- if (ev->length <= HCI_MAX_AD_LENGTH) {
+ if (ptr > (void *)skb_tail_pointer(skb) - sizeof(*ev)) {
+ bt_dev_err(hdev, "Malicious advertising data.");
+ break;
+ }
+
+ if (ev->length <= HCI_MAX_AD_LENGTH &&
+ ev->data + ev->length <= skb_tail_pointer(skb)) {
rssi = ev->data[ev->length];
process_adv_report(hdev, ev->evt_type, &ev->bdaddr,
ev->bdaddr_type, NULL, 0, rssi,
@@ -5622,21 +5681,19 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev,
struct sk_buff *skb)
{
u8 num_reports = skb->data[0];
- void *ptr = &skb->data[1];
+ struct hci_ev_le_direct_adv_info *ev = (void *)&skb->data[1];
- hci_dev_lock(hdev);
+ if (!num_reports || skb->len < num_reports * sizeof(*ev) + 1)
+ return;
- while (num_reports--) {
- struct hci_ev_le_direct_adv_info *ev = ptr;
+ hci_dev_lock(hdev);
+ for (; num_reports; num_reports--, ev++)
process_adv_report(hdev, ev->evt_type, &ev->bdaddr,
ev->bdaddr_type, &ev->direct_addr,
ev->direct_addr_type, ev->rssi, NULL, 0,
false);
- ptr += sizeof(*ev);
- }
-
hci_dev_unlock(hdev);
}
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index a8ddd211e94c..76bd6b122724 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -271,12 +271,16 @@ int hci_req_sync(struct hci_dev *hdev, int (*req)(struct hci_request *req,
{
int ret;
- if (!test_bit(HCI_UP, &hdev->flags))
- return -ENETDOWN;
-
/* Serialize all requests */
hci_req_sync_lock(hdev);
- ret = __hci_req_sync(hdev, req, opt, timeout, hci_status);
+ /* check the state after obtaing the lock to protect the HCI_UP
+ * against any races from hci_dev_do_close when the controller
+ * gets removed.
+ */
+ if (test_bit(HCI_UP, &hdev->flags))
+ ret = __hci_req_sync(hdev, req, opt, timeout, hci_status);
+ else
+ ret = -ENETDOWN;
hci_req_sync_unlock(hdev);
return ret;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index e506c51ff765..a7e5bca9f7e4 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -59,6 +59,17 @@ struct hci_pinfo {
char comm[TASK_COMM_LEN];
};
+static struct hci_dev *hci_hdev_from_sock(struct sock *sk)
+{
+ struct hci_dev *hdev = hci_pi(sk)->hdev;
+
+ if (!hdev)
+ return ERR_PTR(-EBADFD);
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+ return ERR_PTR(-EPIPE);
+ return hdev;
+}
+
void hci_sock_set_flag(struct sock *sk, int nr)
{
set_bit(nr, &hci_pi(sk)->flags);
@@ -419,7 +430,8 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
ni->type = hdev->dev_type;
ni->bus = hdev->bus;
bacpy(&ni->bdaddr, &hdev->bdaddr);
- memcpy(ni->name, hdev->name, 8);
+ memcpy_and_pad(ni->name, sizeof(ni->name), hdev->name,
+ strnlen(hdev->name, sizeof(ni->name)), '\0');
opcode = cpu_to_le16(HCI_MON_NEW_INDEX);
break;
@@ -752,19 +764,13 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
if (event == HCI_DEV_UNREG) {
struct sock *sk;
- /* Detach sockets from device */
+ /* Wake up sockets using this dead device */
read_lock(&hci_sk_list.lock);
sk_for_each(sk, &hci_sk_list.head) {
- bh_lock_sock_nested(sk);
if (hci_pi(sk)->hdev == hdev) {
- hci_pi(sk)->hdev = NULL;
sk->sk_err = EPIPE;
- sk->sk_state = BT_OPEN;
sk->sk_state_change(sk);
-
- hci_dev_put(hdev);
}
- bh_unlock_sock(sk);
}
read_unlock(&hci_sk_list.lock);
}
@@ -876,10 +882,6 @@ static int hci_sock_release(struct socket *sock)
}
sock_orphan(sk);
-
- skb_queue_purge(&sk->sk_receive_queue);
- skb_queue_purge(&sk->sk_write_queue);
-
release_sock(sk);
sock_put(sk);
return 0;
@@ -923,10 +925,10 @@ static int hci_sock_blacklist_del(struct hci_dev *hdev, void __user *arg)
static int hci_sock_bound_ioctl(struct sock *sk, unsigned int cmd,
unsigned long arg)
{
- struct hci_dev *hdev = hci_pi(sk)->hdev;
+ struct hci_dev *hdev = hci_hdev_from_sock(sk);
- if (!hdev)
- return -EBADFD;
+ if (IS_ERR(hdev))
+ return PTR_ERR(hdev);
if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL))
return -EBUSY;
@@ -972,6 +974,34 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd,
BT_DBG("cmd %x arg %lx", cmd, arg);
+ /* Make sure the cmd is valid before doing anything */
+ switch (cmd) {
+ case HCIGETDEVLIST:
+ case HCIGETDEVINFO:
+ case HCIGETCONNLIST:
+ case HCIDEVUP:
+ case HCIDEVDOWN:
+ case HCIDEVRESET:
+ case HCIDEVRESTAT:
+ case HCISETSCAN:
+ case HCISETAUTH:
+ case HCISETENCRYPT:
+ case HCISETPTYPE:
+ case HCISETLINKPOL:
+ case HCISETLINKMODE:
+ case HCISETACLMTU:
+ case HCISETSCOMTU:
+ case HCIINQUIRY:
+ case HCISETRAW:
+ case HCIGETCONNINFO:
+ case HCIGETAUTHINFO:
+ case HCIBLOCKADDR:
+ case HCIUNBLOCKADDR:
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+
lock_sock(sk);
if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) {
@@ -988,7 +1018,14 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd,
if (hci_sock_gen_cookie(sk)) {
struct sk_buff *skb;
- if (capable(CAP_NET_ADMIN))
+ /* Perform careful checks before setting the HCI_SOCK_TRUSTED
+ * flag. Make sure that not only the current task but also
+ * the socket opener has the required capability, since
+ * privileged programs can be tricked into making ioctl calls
+ * on HCI sockets, and the socket should not be marked as
+ * trusted simply because the ioctl caller is privileged.
+ */
+ if (sk_capable(sk, CAP_NET_ADMIN))
hci_sock_set_flag(sk, HCI_SOCK_TRUSTED);
/* Send event to monitor */
@@ -1080,6 +1117,18 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr,
lock_sock(sk);
+ /* Allow detaching from dead device and attaching to alive device, if
+ * the caller wants to re-bind (instead of close) this socket in
+ * response to hci_sock_dev_event(HCI_DEV_UNREG) notification.
+ */
+ hdev = hci_pi(sk)->hdev;
+ if (hdev && hci_dev_test_flag(hdev, HCI_UNREGISTER)) {
+ hci_pi(sk)->hdev = NULL;
+ sk->sk_state = BT_OPEN;
+ hci_dev_put(hdev);
+ }
+ hdev = NULL;
+
if (sk->sk_state == BT_BOUND) {
err = -EALREADY;
goto done;
@@ -1356,9 +1405,9 @@ static int hci_sock_getname(struct socket *sock, struct sockaddr *addr,
lock_sock(sk);
- hdev = hci_pi(sk)->hdev;
- if (!hdev) {
- err = -EBADFD;
+ hdev = hci_hdev_from_sock(sk);
+ if (IS_ERR(hdev)) {
+ err = PTR_ERR(hdev);
goto done;
}
@@ -1718,9 +1767,9 @@ static int hci_sock_sendmsg(struct socket *sock, struct msghdr *msg,
goto done;
}
- hdev = hci_pi(sk)->hdev;
- if (!hdev) {
- err = -EBADFD;
+ hdev = hci_hdev_from_sock(sk);
+ if (IS_ERR(hdev)) {
+ err = PTR_ERR(hdev);
goto done;
}
@@ -1968,6 +2017,12 @@ done:
return err;
}
+static void hci_sock_destruct(struct sock *sk)
+{
+ skb_queue_purge(&sk->sk_receive_queue);
+ skb_queue_purge(&sk->sk_write_queue);
+}
+
static const struct proto_ops hci_sock_ops = {
.family = PF_BLUETOOTH,
.owner = THIS_MODULE,
@@ -2018,6 +2073,7 @@ static int hci_sock_create(struct net *net, struct socket *sock, int protocol,
sock->state = SS_UNCONNECTED;
sk->sk_state = BT_OPEN;
+ sk->sk_destruct = hci_sock_destruct;
bt_sock_link(&hci_sk_list, sk);
return 0;
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 9874844a95a9..266112c960ee 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -33,7 +33,7 @@ void hci_conn_init_sysfs(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
- BT_DBG("conn %p", conn);
+ bt_dev_dbg(hdev, "conn %p", conn);
conn->dev.type = &bt_link;
conn->dev.class = bt_class;
@@ -46,24 +46,30 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
- BT_DBG("conn %p", conn);
+ bt_dev_dbg(hdev, "conn %p", conn);
+
+ if (device_is_registered(&conn->dev))
+ return;
dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle);
- if (device_add(&conn->dev) < 0) {
+ if (device_add(&conn->dev) < 0)
bt_dev_err(hdev, "failed to register connection device");
- return;
- }
-
- hci_dev_hold(hdev);
}
void hci_conn_del_sysfs(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
- if (!device_is_registered(&conn->dev))
+ bt_dev_dbg(hdev, "conn %p", conn);
+
+ if (!device_is_registered(&conn->dev)) {
+ /* If device_add() has *not* succeeded, use *only* put_device()
+ * to drop the reference count.
+ */
+ put_device(&conn->dev);
return;
+ }
while (1) {
struct device *dev;
@@ -75,14 +81,15 @@ void hci_conn_del_sysfs(struct hci_conn *conn)
put_device(dev);
}
- device_del(&conn->dev);
-
- hci_dev_put(hdev);
+ device_unregister(&conn->dev);
}
static void bt_host_release(struct device *dev)
{
struct hci_dev *hdev = to_hci_dev(dev);
+
+ if (hci_dev_test_flag(hdev, HCI_UNREGISTER))
+ hci_cleanup_dev(hdev);
kfree(hdev);
module_put(THIS_MODULE);
}
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 253975cce943..00dae8e875a2 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -428,7 +428,7 @@ static void hidp_set_timer(struct hidp_session *session)
static void hidp_del_timer(struct hidp_session *session)
{
if (session->idle_to > 0)
- del_timer(&session->timer);
+ del_timer_sync(&session->timer);
}
static void hidp_process_report(struct hidp_session *session, int type,
@@ -1282,7 +1282,7 @@ static int hidp_session_thread(void *arg)
/* cleanup runtime environment */
remove_wait_queue(sk_sleep(session->intr_sock->sk), &intr_wait);
- remove_wait_queue(sk_sleep(session->intr_sock->sk), &ctrl_wait);
+ remove_wait_queue(sk_sleep(session->ctrl_sock->sk), &ctrl_wait);
wake_up_interruptible(&session->report_queue);
hidp_del_timer(session);
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index c04107d44601..9346fae5d664 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -63,6 +63,9 @@ static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err);
static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
struct sk_buff_head *skbs, u8 event);
+static void l2cap_retrans_timeout(struct work_struct *work);
+static void l2cap_monitor_timeout(struct work_struct *work);
+static void l2cap_ack_timeout(struct work_struct *work);
static inline u8 bdaddr_type(u8 link_type, u8 bdaddr_type)
{
@@ -113,7 +116,8 @@ static struct l2cap_chan *__l2cap_get_chan_by_scid(struct l2cap_conn *conn,
}
/* Find channel with given SCID.
- * Returns locked channel. */
+ * Returns a reference locked channel.
+ */
static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,
u16 cid)
{
@@ -121,15 +125,19 @@ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,
mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_scid(conn, cid);
- if (c)
- l2cap_chan_lock(c);
+ if (c) {
+ /* Only lock if chan reference is not 0 */
+ c = l2cap_chan_hold_unless_zero(c);
+ if (c)
+ l2cap_chan_lock(c);
+ }
mutex_unlock(&conn->chan_lock);
return c;
}
/* Find channel with given DCID.
- * Returns locked channel.
+ * Returns a reference locked channel.
*/
static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
u16 cid)
@@ -138,8 +146,12 @@ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_dcid(conn, cid);
- if (c)
- l2cap_chan_lock(c);
+ if (c) {
+ /* Only lock if chan reference is not 0 */
+ c = l2cap_chan_hold_unless_zero(c);
+ if (c)
+ l2cap_chan_lock(c);
+ }
mutex_unlock(&conn->chan_lock);
return c;
@@ -164,8 +176,12 @@ static struct l2cap_chan *l2cap_get_chan_by_ident(struct l2cap_conn *conn,
mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_ident(conn, ident);
- if (c)
- l2cap_chan_lock(c);
+ if (c) {
+ /* Only lock if chan reference is not 0 */
+ c = l2cap_chan_hold_unless_zero(c);
+ if (c)
+ l2cap_chan_lock(c);
+ }
mutex_unlock(&conn->chan_lock);
return c;
@@ -445,6 +461,8 @@ struct l2cap_chan *l2cap_chan_create(void)
if (!chan)
return NULL;
+ skb_queue_head_init(&chan->tx_q);
+ skb_queue_head_init(&chan->srej_q);
mutex_init(&chan->lock);
/* Set default lock nesting level */
@@ -455,6 +473,9 @@ struct l2cap_chan *l2cap_chan_create(void)
write_unlock(&chan_list_lock);
INIT_DELAYED_WORK(&chan->chan_timer, l2cap_chan_timeout);
+ INIT_DELAYED_WORK(&chan->retrans_timer, l2cap_retrans_timeout);
+ INIT_DELAYED_WORK(&chan->monitor_timer, l2cap_monitor_timeout);
+ INIT_DELAYED_WORK(&chan->ack_timer, l2cap_ack_timeout);
chan->state = BT_OPEN;
@@ -489,6 +510,16 @@ void l2cap_chan_hold(struct l2cap_chan *c)
kref_get(&c->kref);
}
+struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c)
+{
+ BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));
+
+ if (!kref_get_unless_zero(&c->kref))
+ return NULL;
+
+ return c;
+}
+
void l2cap_chan_put(struct l2cap_chan *c)
{
BT_DBG("chan %p orig refcnt %d", c, kref_read(&c->kref));
@@ -510,7 +541,9 @@ void l2cap_chan_set_defaults(struct l2cap_chan *chan)
chan->flush_to = L2CAP_DEFAULT_FLUSH_TO;
chan->retrans_timeout = L2CAP_DEFAULT_RETRANS_TO;
chan->monitor_timeout = L2CAP_DEFAULT_MONITOR_TO;
+
chan->conf_state = 0;
+ set_bit(CONF_NOT_COMPLETE, &chan->conf_state);
set_bit(FLAG_FORCE_ACTIVE, &chan->flags);
}
@@ -1777,11 +1810,11 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
bdaddr_t *dst,
u8 link_type)
{
- struct l2cap_chan *c, *c1 = NULL;
+ struct l2cap_chan *c, *tmp, *c1 = NULL;
read_lock(&chan_list_lock);
- list_for_each_entry(c, &chan_list, global_l) {
+ list_for_each_entry_safe(c, tmp, &chan_list, global_l) {
if (state && c->state != state)
continue;
@@ -1791,7 +1824,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
if (link_type == LE_LINK && c->src_type == BDADDR_BREDR)
continue;
- if (c->psm == psm) {
+ if (c->chan_type != L2CAP_CHAN_FIXED && c->psm == psm) {
int src_match, dst_match;
int src_any, dst_any;
@@ -1799,7 +1832,9 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
src_match = !bacmp(&c->src, src);
dst_match = !bacmp(&c->dst, dst);
if (src_match && dst_match) {
- l2cap_chan_hold(c);
+ if (!l2cap_chan_hold_unless_zero(c))
+ continue;
+
read_unlock(&chan_list_lock);
return c;
}
@@ -1814,7 +1849,7 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm,
}
if (c1)
- l2cap_chan_hold(c1);
+ c1 = l2cap_chan_hold_unless_zero(c1);
read_unlock(&chan_list_lock);
@@ -2482,14 +2517,6 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
if (IS_ERR(skb))
return PTR_ERR(skb);
- /* Channel lock is released before requesting new skb and then
- * reacquired thus we need to recheck channel state.
- */
- if (chan->state != BT_CONNECTED) {
- kfree_skb(skb);
- return -ENOTCONN;
- }
-
l2cap_do_send(chan, skb);
return len;
}
@@ -2533,14 +2560,6 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
if (IS_ERR(skb))
return PTR_ERR(skb);
- /* Channel lock is released before requesting new skb and then
- * reacquired thus we need to recheck channel state.
- */
- if (chan->state != BT_CONNECTED) {
- kfree_skb(skb);
- return -ENOTCONN;
- }
-
l2cap_do_send(chan, skb);
err = len;
break;
@@ -2561,14 +2580,6 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
*/
err = l2cap_segment_sdu(chan, &seg_queue, msg, len);
- /* The channel could have been closed while segmenting,
- * check that it is still connected.
- */
- if (chan->state != BT_CONNECTED) {
- __skb_queue_purge(&seg_queue);
- err = -ENOTCONN;
- }
-
if (err)
break;
@@ -3125,10 +3136,6 @@ int l2cap_ertm_init(struct l2cap_chan *chan)
chan->rx_state = L2CAP_RX_STATE_RECV;
chan->tx_state = L2CAP_TX_STATE_XMIT;
- INIT_DELAYED_WORK(&chan->retrans_timer, l2cap_retrans_timeout);
- INIT_DELAYED_WORK(&chan->monitor_timer, l2cap_monitor_timeout);
- INIT_DELAYED_WORK(&chan->ack_timer, l2cap_ack_timeout);
-
skb_queue_head_init(&chan->srej_q);
err = l2cap_seq_list_init(&chan->srej_list, chan->tx_win);
@@ -3520,7 +3527,8 @@ done:
l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
sizeof(rfc), (unsigned long) &rfc, endptr - ptr);
- if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) {
+ if (remote_efs &&
+ test_bit(FLAG_EFS_ENABLE, &chan->flags)) {
chan->remote_id = efs.id;
chan->remote_stype = efs.stype;
chan->remote_msdu = le16_to_cpu(efs.msdu);
@@ -3999,6 +4007,10 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
result = __le16_to_cpu(rsp->result);
status = __le16_to_cpu(rsp->status);
+ if (result == L2CAP_CR_SUCCESS && (dcid < L2CAP_CID_DYN_START ||
+ dcid > L2CAP_CID_DYN_END))
+ return -EPROTO;
+
BT_DBG("dcid 0x%4.4x scid 0x%4.4x result 0x%2.2x status 0x%2.2x",
dcid, scid, result, status);
@@ -4018,12 +4030,23 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
}
}
+ chan = l2cap_chan_hold_unless_zero(chan);
+ if (!chan) {
+ err = -EBADSLT;
+ goto unlock;
+ }
+
err = 0;
l2cap_chan_lock(chan);
switch (result) {
case L2CAP_CR_SUCCESS:
+ if (__l2cap_get_chan_by_dcid(conn, dcid)) {
+ err = -EBADSLT;
+ break;
+ }
+
l2cap_state_change(chan, BT_CONFIG);
chan->ident = 0;
chan->dcid = dcid;
@@ -4047,6 +4070,7 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn,
}
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
unlock:
mutex_unlock(&conn->chan_lock);
@@ -4154,7 +4178,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
chan->ident = cmd->ident;
l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp);
- chan->num_conf_rsp++;
+ if (chan->num_conf_rsp < L2CAP_CONF_MAX_CONF_RSP)
+ chan->num_conf_rsp++;
/* Reset config buffer. */
chan->conf_len = 0;
@@ -4200,6 +4225,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn,
unlock:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return err;
}
@@ -4312,6 +4338,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn,
done:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return err;
}
@@ -4332,33 +4359,29 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn,
BT_DBG("scid 0x%4.4x dcid 0x%4.4x", scid, dcid);
- mutex_lock(&conn->chan_lock);
-
- chan = __l2cap_get_chan_by_scid(conn, dcid);
+ chan = l2cap_get_chan_by_scid(conn, dcid);
if (!chan) {
- mutex_unlock(&conn->chan_lock);
cmd_reject_invalid_cid(conn, cmd->ident, dcid, scid);
return 0;
}
- l2cap_chan_hold(chan);
- l2cap_chan_lock(chan);
-
rsp.dcid = cpu_to_le16(chan->scid);
rsp.scid = cpu_to_le16(chan->dcid);
l2cap_send_cmd(conn, cmd->ident, L2CAP_DISCONN_RSP, sizeof(rsp), &rsp);
chan->ops->set_shutdown(chan);
+ l2cap_chan_unlock(chan);
+ mutex_lock(&conn->chan_lock);
+ l2cap_chan_lock(chan);
l2cap_chan_del(chan, ECONNRESET);
+ mutex_unlock(&conn->chan_lock);
chan->ops->close(chan);
l2cap_chan_unlock(chan);
l2cap_chan_put(chan);
- mutex_unlock(&conn->chan_lock);
-
return 0;
}
@@ -4378,33 +4401,28 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn,
BT_DBG("dcid 0x%4.4x scid 0x%4.4x", dcid, scid);
- mutex_lock(&conn->chan_lock);
-
- chan = __l2cap_get_chan_by_scid(conn, scid);
+ chan = l2cap_get_chan_by_scid(conn, scid);
if (!chan) {
- mutex_unlock(&conn->chan_lock);
return 0;
}
- l2cap_chan_hold(chan);
- l2cap_chan_lock(chan);
-
if (chan->state != BT_DISCONN) {
l2cap_chan_unlock(chan);
l2cap_chan_put(chan);
- mutex_unlock(&conn->chan_lock);
return 0;
}
+ l2cap_chan_unlock(chan);
+ mutex_lock(&conn->chan_lock);
+ l2cap_chan_lock(chan);
l2cap_chan_del(chan, 0);
+ mutex_unlock(&conn->chan_lock);
chan->ops->close(chan);
l2cap_chan_unlock(chan);
l2cap_chan_put(chan);
- mutex_unlock(&conn->chan_lock);
-
return 0;
}
@@ -5040,6 +5058,7 @@ send_move_response:
l2cap_send_move_chan_rsp(chan, result);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -5132,6 +5151,7 @@ static void l2cap_move_continue(struct l2cap_conn *conn, u16 icid, u16 result)
}
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,
@@ -5161,6 +5181,7 @@ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,
l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
static int l2cap_move_channel_rsp(struct l2cap_conn *conn,
@@ -5224,6 +5245,7 @@ static int l2cap_move_channel_confirm(struct l2cap_conn *conn,
l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -5259,6 +5281,7 @@ static inline int l2cap_move_channel_confirm_rsp(struct l2cap_conn *conn,
}
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -5507,6 +5530,19 @@ static int l2cap_le_connect_req(struct l2cap_conn *conn,
BT_DBG("psm 0x%2.2x scid 0x%4.4x mtu %u mps %u", __le16_to_cpu(psm),
scid, mtu, mps);
+ /* BLUETOOTH CORE SPECIFICATION Version 5.3 | Vol 3, Part A
+ * page 1059:
+ *
+ * Valid range: 0x0001-0x00ff
+ *
+ * Table 4.15: L2CAP_LE_CREDIT_BASED_CONNECTION_REQ SPSM ranges
+ */
+ if (!psm || __le16_to_cpu(psm) > L2CAP_PSM_LE_DYN_END) {
+ result = L2CAP_CR_BAD_PSM;
+ chan = NULL;
+ goto response;
+ }
+
/* Check if we have socket listening on psm */
pchan = l2cap_global_chan_by_psm(BT_LISTEN, psm, &conn->hcon->src,
&conn->hcon->dst, LE_LINK);
@@ -5631,12 +5667,11 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn,
if (credits > max_credits) {
BT_ERR("LE credits overflow");
l2cap_send_disconn_req(chan, ECONNRESET);
- l2cap_chan_unlock(chan);
/* Return 0 so that we don't trigger an unnecessary
* command reject packet.
*/
- return 0;
+ goto unlock;
}
chan->tx_credits += credits;
@@ -5647,7 +5682,9 @@ static inline int l2cap_le_credits(struct l2cap_conn *conn,
if (chan->tx_credits)
chan->ops->resume(chan);
+unlock:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
@@ -5668,9 +5705,14 @@ static inline int l2cap_le_command_rej(struct l2cap_conn *conn,
if (!chan)
goto done;
+ chan = l2cap_chan_hold_unless_zero(chan);
+ if (!chan)
+ goto done;
+
l2cap_chan_lock(chan);
l2cap_chan_del(chan, ECONNREFUSED);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
done:
mutex_unlock(&conn->chan_lock);
@@ -6209,6 +6251,7 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan,
struct l2cap_ctrl *control,
struct sk_buff *skb, u8 event)
{
+ struct l2cap_ctrl local_control;
int err = 0;
bool skb_in_use = false;
@@ -6233,15 +6276,32 @@ static int l2cap_rx_state_recv(struct l2cap_chan *chan,
chan->buffer_seq = chan->expected_tx_seq;
skb_in_use = true;
+ /* l2cap_reassemble_sdu may free skb, hence invalidate
+ * control, so make a copy in advance to use it after
+ * l2cap_reassemble_sdu returns and to avoid the race
+ * condition, for example:
+ *
+ * The current thread calls:
+ * l2cap_reassemble_sdu
+ * chan->ops->recv == l2cap_sock_recv_cb
+ * __sock_queue_rcv_skb
+ * Another thread calls:
+ * bt_sock_recvmsg
+ * skb_recv_datagram
+ * skb_free_datagram
+ * Then the current thread tries to access control, but
+ * it was freed by skb_free_datagram.
+ */
+ local_control = *control;
err = l2cap_reassemble_sdu(chan, skb, control);
if (err)
break;
- if (control->final) {
+ if (local_control.final) {
if (!test_and_clear_bit(CONN_REJ_ACT,
&chan->conn_state)) {
- control->final = 0;
- l2cap_retransmit_all(chan, control);
+ local_control.final = 0;
+ l2cap_retransmit_all(chan, &local_control);
l2cap_ertm_send(chan);
}
}
@@ -6621,11 +6681,27 @@ static int l2cap_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
struct sk_buff *skb)
{
+ /* l2cap_reassemble_sdu may free skb, hence invalidate control, so store
+ * the txseq field in advance to use it after l2cap_reassemble_sdu
+ * returns and to avoid the race condition, for example:
+ *
+ * The current thread calls:
+ * l2cap_reassemble_sdu
+ * chan->ops->recv == l2cap_sock_recv_cb
+ * __sock_queue_rcv_skb
+ * Another thread calls:
+ * bt_sock_recvmsg
+ * skb_recv_datagram
+ * skb_free_datagram
+ * Then the current thread tries to access control, but it was freed by
+ * skb_free_datagram.
+ */
+ u16 txseq = control->txseq;
+
BT_DBG("chan %p, control %p, skb %p, state %d", chan, control, skb,
chan->rx_state);
- if (l2cap_classify_txseq(chan, control->txseq) ==
- L2CAP_TXSEQ_EXPECTED) {
+ if (l2cap_classify_txseq(chan, txseq) == L2CAP_TXSEQ_EXPECTED) {
l2cap_pass_to_tx(chan, control);
BT_DBG("buffer_seq %d->%d", chan->buffer_seq,
@@ -6648,8 +6724,8 @@ static int l2cap_stream_rx(struct l2cap_chan *chan, struct l2cap_ctrl *control,
}
}
- chan->last_acked_seq = control->txseq;
- chan->expected_tx_seq = __next_seq(chan, control->txseq);
+ chan->last_acked_seq = txseq;
+ chan->expected_tx_seq = __next_seq(chan, txseq);
return 0;
}
@@ -6683,9 +6759,10 @@ static int l2cap_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb)
goto drop;
}
- if ((chan->mode == L2CAP_MODE_ERTM ||
- chan->mode == L2CAP_MODE_STREAMING) && sk_filter(chan->data, skb))
- goto drop;
+ if (chan->ops->filter) {
+ if (chan->ops->filter(chan, skb))
+ goto drop;
+ }
if (!control->sframe) {
int err;
@@ -6886,6 +6963,7 @@ static void l2cap_data_channel(struct l2cap_conn *conn, u16 cid,
return;
}
+ l2cap_chan_hold(chan);
l2cap_chan_lock(chan);
} else {
BT_DBG("unknown cid 0x%4.4x", cid);
@@ -6944,6 +7022,7 @@ drop:
done:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
@@ -7348,7 +7427,7 @@ static struct l2cap_chan *l2cap_global_fixed_chan(struct l2cap_chan *c,
if (src_type != c->src_type)
continue;
- l2cap_chan_hold(c);
+ c = l2cap_chan_hold_unless_zero(c);
read_unlock(&chan_list_lock);
return c;
}
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 5572042f0453..b831e5fe3ebc 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -45,6 +45,7 @@ static const struct proto_ops l2cap_sock_ops;
static void l2cap_sock_init(struct sock *sk, struct sock *parent);
static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock,
int proto, gfp_t prio, int kern);
+static void l2cap_sock_cleanup_listen(struct sock *parent);
bool l2cap_is_socket(struct socket *sock)
{
@@ -179,9 +180,17 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr,
struct l2cap_chan *chan = l2cap_pi(sk)->chan;
struct sockaddr_l2 la;
int len, err = 0;
+ bool zapped;
BT_DBG("sk %p", sk);
+ lock_sock(sk);
+ zapped = sock_flag(sk, SOCK_ZAPPED);
+ release_sock(sk);
+
+ if (zapped)
+ return -EINVAL;
+
if (!addr || alen < offsetofend(struct sockaddr, sa_family) ||
addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
@@ -1197,6 +1206,7 @@ static int l2cap_sock_release(struct socket *sock)
if (!sk)
return 0;
+ l2cap_sock_cleanup_listen(sk);
bt_sock_unlink(&l2cap_sk_list, sk);
err = l2cap_sock_shutdown(sock, 2);
@@ -1320,6 +1330,9 @@ static void l2cap_sock_close_cb(struct l2cap_chan *chan)
{
struct sock *sk = chan->data;
+ if (!sk)
+ return;
+
l2cap_sock_kill(sk);
}
@@ -1328,6 +1341,9 @@ static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err)
struct sock *sk = chan->data;
struct sock *parent;
+ if (!sk)
+ return;
+
BT_DBG("chan %p state %s", chan, state_to_string(chan->state));
/* This callback can be called both for server (BT_LISTEN)
@@ -1341,8 +1357,6 @@ static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err)
parent = bt_sk(sk)->parent;
- sock_set_flag(sk, SOCK_ZAPPED);
-
switch (chan->state) {
case BT_OPEN:
case BT_BOUND:
@@ -1369,8 +1383,11 @@ static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err)
break;
}
-
release_sock(sk);
+
+ /* Only zap after cleanup to avoid use after free race */
+ sock_set_flag(sk, SOCK_ZAPPED);
+
}
static void l2cap_sock_state_change_cb(struct l2cap_chan *chan, int state,
@@ -1399,6 +1416,14 @@ static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan,
if (!skb)
return ERR_PTR(err);
+ /* Channel lock is released before requesting new skb and then
+ * reacquired thus we need to recheck channel state.
+ */
+ if (chan->state != BT_CONNECTED) {
+ kfree_skb(skb);
+ return ERR_PTR(-ENOTCONN);
+ }
+
skb->priority = sk->sk_priority;
bt_cb(skb)->l2cap.chan = chan;
@@ -1476,6 +1501,19 @@ static void l2cap_sock_suspend_cb(struct l2cap_chan *chan)
sk->sk_state_change(sk);
}
+static int l2cap_sock_filter(struct l2cap_chan *chan, struct sk_buff *skb)
+{
+ struct sock *sk = chan->data;
+
+ switch (chan->mode) {
+ case L2CAP_MODE_ERTM:
+ case L2CAP_MODE_STREAMING:
+ return sk_filter(sk, skb);
+ }
+
+ return 0;
+}
+
static const struct l2cap_ops l2cap_chan_ops = {
.name = "L2CAP Socket Interface",
.new_connection = l2cap_sock_new_connection_cb,
@@ -1490,14 +1528,17 @@ static const struct l2cap_ops l2cap_chan_ops = {
.set_shutdown = l2cap_sock_set_shutdown_cb,
.get_sndtimeo = l2cap_sock_get_sndtimeo_cb,
.alloc_skb = l2cap_sock_alloc_skb_cb,
+ .filter = l2cap_sock_filter,
};
static void l2cap_sock_destruct(struct sock *sk)
{
BT_DBG("sk %p", sk);
- if (l2cap_pi(sk)->chan)
+ if (l2cap_pi(sk)->chan) {
+ l2cap_pi(sk)->chan->data = NULL;
l2cap_chan_put(l2cap_pi(sk)->chan);
+ }
if (l2cap_pi(sk)->rx_busy_skb) {
kfree_skb(l2cap_pi(sk)->rx_busy_skb);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index ccce954f8146..d0ec0e336909 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -219,12 +219,15 @@ static u8 mgmt_status_table[] = {
MGMT_STATUS_TIMEOUT, /* Instant Passed */
MGMT_STATUS_NOT_SUPPORTED, /* Pairing Not Supported */
MGMT_STATUS_FAILED, /* Transaction Collision */
+ MGMT_STATUS_FAILED, /* Reserved for future use */
MGMT_STATUS_INVALID_PARAMS, /* Unacceptable Parameter */
MGMT_STATUS_REJECTED, /* QoS Rejected */
MGMT_STATUS_NOT_SUPPORTED, /* Classification Not Supported */
MGMT_STATUS_REJECTED, /* Insufficient Security */
MGMT_STATUS_INVALID_PARAMS, /* Parameter Out Of Range */
+ MGMT_STATUS_FAILED, /* Reserved for future use */
MGMT_STATUS_BUSY, /* Role Switch Pending */
+ MGMT_STATUS_FAILED, /* Reserved for future use */
MGMT_STATUS_FAILED, /* Slot Violation */
MGMT_STATUS_FAILED, /* Role Switch Failed */
MGMT_STATUS_INVALID_PARAMS, /* EIR Too Large */
@@ -756,7 +759,8 @@ static u32 get_supported_settings(struct hci_dev *hdev)
if (lmp_ssp_capable(hdev)) {
settings |= MGMT_SETTING_SSP;
- settings |= MGMT_SETTING_HS;
+ if (IS_ENABLED(CONFIG_BT_HS))
+ settings |= MGMT_SETTING_HS;
}
if (lmp_sc_capable(hdev))
@@ -1771,6 +1775,10 @@ static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
BT_DBG("request for %s", hdev->name);
+ if (!IS_ENABLED(CONFIG_BT_HS))
+ return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS,
+ MGMT_STATUS_NOT_SUPPORTED);
+
status = mgmt_bredr_support(hdev);
if (status)
return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_HS, status);
@@ -6463,6 +6471,9 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data,
for (i = 0, cur_len = 0; i < len; i += (cur_len + 1)) {
cur_len = data[i];
+ if (!cur_len)
+ continue;
+
if (data[i + 1] == EIR_FLAGS &&
(!is_adv_data || flags_managed(adv_flags)))
return false;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index b98225d65e87..86edf512d497 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -552,22 +552,58 @@ struct rfcomm_dlc *rfcomm_dlc_exists(bdaddr_t *src, bdaddr_t *dst, u8 channel)
return dlc;
}
+static int rfcomm_dlc_send_frag(struct rfcomm_dlc *d, struct sk_buff *frag)
+{
+ int len = frag->len;
+
+ BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len);
+
+ if (len > d->mtu)
+ return -EINVAL;
+
+ rfcomm_make_uih(frag, d->addr);
+ __skb_queue_tail(&d->tx_queue, frag);
+
+ return len;
+}
+
int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb)
{
- int len = skb->len;
+ unsigned long flags;
+ struct sk_buff *frag, *next;
+ int len;
if (d->state != BT_CONNECTED)
return -ENOTCONN;
- BT_DBG("dlc %p mtu %d len %d", d, d->mtu, len);
+ frag = skb_shinfo(skb)->frag_list;
+ skb_shinfo(skb)->frag_list = NULL;
- if (len > d->mtu)
- return -EINVAL;
+ /* Queue all fragments atomically. */
+ spin_lock_irqsave(&d->tx_queue.lock, flags);
- rfcomm_make_uih(skb, d->addr);
- skb_queue_tail(&d->tx_queue, skb);
+ len = rfcomm_dlc_send_frag(d, skb);
+ if (len < 0 || !frag)
+ goto unlock;
+
+ for (; frag; frag = next) {
+ int ret;
+
+ next = frag->next;
+
+ ret = rfcomm_dlc_send_frag(d, frag);
+ if (ret < 0) {
+ dev_kfree_skb_irq(frag);
+ goto unlock;
+ }
+
+ len += ret;
+ }
+
+unlock:
+ spin_unlock_irqrestore(&d->tx_queue.lock, flags);
- if (!test_bit(RFCOMM_TX_THROTTLED, &d->flags))
+ if (len > 0 && !test_bit(RFCOMM_TX_THROTTLED, &d->flags))
rfcomm_schedule();
return len;
}
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index c044ff2f73e6..78830efe89d7 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -578,46 +578,20 @@ static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg,
lock_sock(sk);
sent = bt_sock_wait_ready(sk, msg->msg_flags);
- if (sent)
- goto done;
-
- while (len) {
- size_t size = min_t(size_t, len, d->mtu);
- int err;
-
- skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE,
- msg->msg_flags & MSG_DONTWAIT, &err);
- if (!skb) {
- if (sent == 0)
- sent = err;
- break;
- }
- skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE);
-
- err = memcpy_from_msg(skb_put(skb, size), msg, size);
- if (err) {
- kfree_skb(skb);
- if (sent == 0)
- sent = err;
- break;
- }
- skb->priority = sk->sk_priority;
+ release_sock(sk);
- err = rfcomm_dlc_send(d, skb);
- if (err < 0) {
- kfree_skb(skb);
- if (sent == 0)
- sent = err;
- break;
- }
+ if (sent)
+ return sent;
- sent += size;
- len -= size;
- }
+ skb = bt_skb_sendmmsg(sk, msg, len, d->mtu, RFCOMM_SKB_HEAD_RESERVE,
+ RFCOMM_SKB_TAIL_RESERVE);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
-done:
- release_sock(sk);
+ sent = rfcomm_dlc_send(d, skb);
+ if (sent < 0)
+ kfree_skb(skb);
return sent;
}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index a4ca55df7390..ee321d62b7d6 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -48,6 +48,8 @@ struct sco_conn {
spinlock_t lock;
struct sock *sk;
+ struct delayed_work timeout_work;
+
unsigned int mtu;
};
@@ -73,9 +75,20 @@ struct sco_pinfo {
#define SCO_CONN_TIMEOUT (HZ * 40)
#define SCO_DISCONN_TIMEOUT (HZ * 2)
-static void sco_sock_timeout(struct timer_list *t)
+static void sco_sock_timeout(struct work_struct *work)
{
- struct sock *sk = from_timer(sk, t, sk_timer);
+ struct sco_conn *conn = container_of(work, struct sco_conn,
+ timeout_work.work);
+ struct sock *sk;
+
+ sco_conn_lock(conn);
+ sk = conn->sk;
+ if (sk)
+ sock_hold(sk);
+ sco_conn_unlock(conn);
+
+ if (!sk)
+ return;
BT_DBG("sock %p state %d", sk, sk->sk_state);
@@ -84,20 +97,26 @@ static void sco_sock_timeout(struct timer_list *t)
sk->sk_state_change(sk);
bh_unlock_sock(sk);
- sco_sock_kill(sk);
sock_put(sk);
}
static void sco_sock_set_timer(struct sock *sk, long timeout)
{
+ if (!sco_pi(sk)->conn)
+ return;
+
BT_DBG("sock %p state %d timeout %ld", sk, sk->sk_state, timeout);
- sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout);
+ cancel_delayed_work(&sco_pi(sk)->conn->timeout_work);
+ schedule_delayed_work(&sco_pi(sk)->conn->timeout_work, timeout);
}
static void sco_sock_clear_timer(struct sock *sk)
{
+ if (!sco_pi(sk)->conn)
+ return;
+
BT_DBG("sock %p state %d", sk, sk->sk_state);
- sk_stop_timer(sk, &sk->sk_timer);
+ cancel_delayed_work(&sco_pi(sk)->conn->timeout_work);
}
/* ---- SCO connections ---- */
@@ -114,6 +133,7 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
return NULL;
spin_lock_init(&conn->lock);
+ INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout);
hcon->sco_data = conn;
conn->hcon = hcon;
@@ -176,10 +196,12 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
sco_sock_clear_timer(sk);
sco_chan_del(sk, err);
bh_unlock_sock(sk);
- sco_sock_kill(sk);
sock_put(sk);
}
+ /* Ensure no more work items will run before freeing conn. */
+ cancel_delayed_work_sync(&conn->timeout_work);
+
hcon->sco_data = NULL;
kfree(conn);
}
@@ -211,44 +233,32 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk,
return err;
}
-static int sco_connect(struct sock *sk)
+static int sco_connect(struct hci_dev *hdev, struct sock *sk)
{
struct sco_conn *conn;
struct hci_conn *hcon;
- struct hci_dev *hdev;
int err, type;
BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst);
- hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR);
- if (!hdev)
- return -EHOSTUNREACH;
-
- hci_dev_lock(hdev);
-
if (lmp_esco_capable(hdev) && !disable_esco)
type = ESCO_LINK;
else
type = SCO_LINK;
if (sco_pi(sk)->setting == BT_VOICE_TRANSPARENT &&
- (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) {
- err = -EOPNOTSUPP;
- goto done;
- }
+ (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev)))
+ return -EOPNOTSUPP;
hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst,
sco_pi(sk)->setting);
- if (IS_ERR(hcon)) {
- err = PTR_ERR(hcon);
- goto done;
- }
+ if (IS_ERR(hcon))
+ return PTR_ERR(hcon);
conn = sco_conn_add(hcon);
if (!conn) {
hci_conn_drop(hcon);
- err = -ENOMEM;
- goto done;
+ return -ENOMEM;
}
/* Update source addr of the socket */
@@ -256,7 +266,7 @@ static int sco_connect(struct sock *sk)
err = sco_chan_add(conn, sk, NULL);
if (err)
- goto done;
+ return err;
if (hcon->state == BT_CONNECTED) {
sco_sock_clear_timer(sk);
@@ -266,17 +276,13 @@ static int sco_connect(struct sock *sk)
sco_sock_set_timer(sk, sk->sk_sndtimeo);
}
-done:
- hci_dev_unlock(hdev);
- hci_dev_put(hdev);
return err;
}
-static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len)
+static int sco_send_frame(struct sock *sk, struct sk_buff *skb)
{
struct sco_conn *conn = sco_pi(sk)->conn;
- struct sk_buff *skb;
- int err;
+ int len = skb->len;
/* Check outgoing MTU */
if (len > conn->mtu)
@@ -284,15 +290,6 @@ static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len)
BT_DBG("sk %p len %d", sk, len);
- skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err);
- if (!skb)
- return err;
-
- if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
- kfree_skb(skb);
- return -EFAULT;
- }
-
hci_send_sco(conn->hcon, skb);
return len;
@@ -393,8 +390,7 @@ static void sco_sock_cleanup_listen(struct sock *parent)
*/
static void sco_sock_kill(struct sock *sk)
{
- if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket ||
- sock_flag(sk, SOCK_DEAD))
+ if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket)
return;
BT_DBG("sk %p state %d", sk, sk->sk_state);
@@ -446,7 +442,6 @@ static void sco_sock_close(struct sock *sk)
lock_sock(sk);
__sco_sock_close(sk);
release_sock(sk);
- sco_sock_kill(sk);
}
static void sco_sock_init(struct sock *sk, struct sock *parent)
@@ -488,8 +483,6 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock,
sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT;
- timer_setup(&sk->sk_timer, sco_sock_timeout, 0);
-
bt_sock_link(&sco_sk_list, sk);
return sk;
}
@@ -554,6 +547,7 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
{
struct sockaddr_sco *sa = (struct sockaddr_sco *) addr;
struct sock *sk = sock->sk;
+ struct hci_dev *hdev;
int err;
BT_DBG("sk %p", sk);
@@ -562,18 +556,30 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen
addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
- if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND)
- return -EBADFD;
+ lock_sock(sk);
+ if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) {
+ err = -EBADFD;
+ goto done;
+ }
- if (sk->sk_type != SOCK_SEQPACKET)
- return -EINVAL;
+ if (sk->sk_type != SOCK_SEQPACKET) {
+ err = -EINVAL;
+ goto done;
+ }
- lock_sock(sk);
+ hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR);
+ if (!hdev) {
+ err = -EHOSTUNREACH;
+ goto done;
+ }
+ hci_dev_lock(hdev);
/* Set destination address and psm */
bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr);
- err = sco_connect(sk);
+ err = sco_connect(hdev, sk);
+ hci_dev_unlock(hdev);
+ hci_dev_put(hdev);
if (err)
goto done;
@@ -702,6 +708,7 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
struct sock *sk = sock->sk;
+ struct sk_buff *skb;
int err;
BT_DBG("sock %p, sk %p", sock, sk);
@@ -713,14 +720,21 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg,
if (msg->msg_flags & MSG_OOB)
return -EOPNOTSUPP;
+ skb = bt_skb_sendmsg(sk, msg, len, len, 0, 0);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
lock_sock(sk);
if (sk->sk_state == BT_CONNECTED)
- err = sco_send_frame(sk, msg, len);
+ err = sco_send_frame(sk, skb);
else
err = -ENOTCONN;
release_sock(sk);
+
+ if (err < 0)
+ kfree_skb(skb);
return err;
}
@@ -761,6 +775,11 @@ static void sco_conn_defer_accept(struct hci_conn *conn, u16 setting)
cp.max_latency = cpu_to_le16(0xffff);
cp.retrans_effort = 0xff;
break;
+ default:
+ /* use CVSD settings as fallback */
+ cp.max_latency = cpu_to_le16(0xffff);
+ cp.retrans_effort = 0xff;
+ break;
}
hci_send_cmd(hdev, HCI_OP_ACCEPT_SYNC_CONN_REQ,
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index cc2f7ca91ccd..719ae1dff7b4 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -2703,6 +2703,15 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb)
if (skb->len < sizeof(*key))
return SMP_INVALID_PARAMS;
+ /* Check if remote and local public keys are the same and debug key is
+ * not in use.
+ */
+ if (!test_bit(SMP_FLAG_DEBUG_KEY, &smp->flags) &&
+ !crypto_memneq(key, smp->local_pk, 64)) {
+ bt_dev_err(hdev, "Remote and local public keys are identical");
+ return SMP_UNSPECIFIED;
+ }
+
memcpy(smp->remote_pk, key, 64);
if (test_bit(SMP_FLAG_REMOTE_OOB, &smp->flags)) {
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index f4078830ea50..e0c6dfae42d8 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -87,6 +87,7 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
return ERR_PTR(-EINVAL);
+ size = SKB_DATA_ALIGN(size);
data = kzalloc(size + headroom + tailroom, GFP_USER);
if (!data)
return ERR_PTR(-ENOMEM);
diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
index eb44ae05abaa..b52e70362268 100644
--- a/net/bridge/br_arp_nd_proxy.c
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -158,7 +158,9 @@ void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br,
if (br->neigh_suppress_enabled) {
if (p && (p->flags & BR_NEIGH_SUPPRESS))
return;
- if (ipv4_is_zeronet(sip) || sip == tip) {
+ if (parp->ar_op != htons(ARPOP_RREQUEST) &&
+ parp->ar_op != htons(ARPOP_RREPLY) &&
+ (ipv4_is_zeronet(sip) || sip == tip)) {
/* prevent flooding to neigh suppress ports */
BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
return;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 9ce661e2590d..9475e0443ff9 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -39,9 +39,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
const struct nf_br_ops *nf_ops;
const unsigned char *dest;
- struct ethhdr *eth;
u16 vid = 0;
+ memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
+
rcu_read_lock();
nf_ops = rcu_dereference(nf_br_ops);
if (nf_ops && nf_ops->br_dev_xmit_hook(skb)) {
@@ -58,15 +59,14 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
BR_INPUT_SKB_CB(skb)->brdev = dev;
skb_reset_mac_header(skb);
- eth = eth_hdr(skb);
skb_pull(skb, ETH_HLEN);
if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
goto out;
if (IS_ENABLED(CONFIG_INET) &&
- (eth->h_proto == htons(ETH_P_ARP) ||
- eth->h_proto == htons(ETH_P_RARP)) &&
+ (eth_hdr(skb)->h_proto == htons(ETH_P_ARP) ||
+ eth_hdr(skb)->h_proto == htons(ETH_P_RARP)) &&
br->neigh_suppress_enabled) {
br_do_proxy_suppress_arp(skb, br, vid, NULL);
} else if (IS_ENABLED(CONFIG_IPV6) &&
@@ -215,6 +215,7 @@ static void br_get_stats64(struct net_device *dev,
sum.rx_packets += tmp.rx_packets;
}
+ netdev_stats_to_stats64(stats, &dev->stats);
stats->tx_bytes = sum.tx_bytes;
stats->tx_packets = sum.tx_packets;
stats->rx_bytes = sum.rx_bytes;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 48ddc60b4fbd..c07a47d65c39 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -122,7 +122,7 @@ static int deliver_clone(const struct net_bridge_port *prev,
skb = skb_clone(skb, GFP_ATOMIC);
if (!skb) {
- dev->stats.tx_dropped++;
+ DEV_STATS_INC(dev, tx_dropped);
return -ENOMEM;
}
@@ -261,7 +261,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb,
skb = skb_copy(skb, GFP_ATOMIC);
if (!skb) {
- dev->stats.tx_dropped++;
+ DEV_STATS_INC(dev, tx_dropped);
return;
}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index ed2b6002ae53..ab539551b7d3 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -161,8 +161,9 @@ void br_manage_promisc(struct net_bridge *br)
* This lets us disable promiscuous mode and write
* this config to hw.
*/
- if (br->auto_cnt == 0 ||
- (br->auto_cnt == 1 && br_auto_port(p)))
+ if ((p->dev->priv_flags & IFF_UNICAST_FLT) &&
+ (br->auto_cnt == 0 ||
+ (br->auto_cnt == 1 && br_auto_port(p))))
br_port_clear_promisc(p);
else
br_port_set_promisc(p);
@@ -564,7 +565,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
struct net_bridge_port *p;
int err = 0;
unsigned br_hr, dev_hr;
- bool changed_addr;
+ bool changed_addr, fdb_synced = false;
/* Don't allow bridging non-ethernet like devices, or DSA-enabled
* master network devices since the bridge layer rx_handler prevents
@@ -604,6 +605,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
err = dev_set_allmulti(dev, 1);
if (err) {
+ br_multicast_del_port(p);
kfree(p); /* kobject not yet init'd, manually free */
goto err1;
}
@@ -640,6 +642,19 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
list_add_rcu(&p->list, &br->port_list);
nbp_update_port_count(br);
+ if (!br_promisc_port(p) && (p->dev->priv_flags & IFF_UNICAST_FLT)) {
+ /* When updating the port count we also update all ports'
+ * promiscuous mode.
+ * A port leaving promiscuous mode normally gets the bridge's
+ * fdb synced to the unicast filter (if supported), however,
+ * `br_port_clear_promisc` does not distinguish between
+ * non-promiscuous ports and *new* ports, so we need to
+ * sync explicitly here.
+ */
+ fdb_synced = br_fdb_sync_static(br, p) == 0;
+ if (!fdb_synced)
+ netdev_err(dev, "failed to sync bridge static fdb addresses to this port\n");
+ }
netdev_update_features(br->dev);
@@ -680,6 +695,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
return 0;
err7:
+ if (fdb_synced)
+ br_fdb_unsync_static(br, p);
list_del_rcu(&p->list);
br_fdb_delete_by_port(br, p, 0, 1);
nbp_update_port_count(br);
@@ -693,6 +710,7 @@ err4:
err3:
sysfs_remove_link(br->ifobj, p->dev->name);
err2:
+ br_multicast_del_port(p);
kobject_put(&p->kobj);
dev_set_allmulti(dev, -1);
err1:
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 2532c1a19645..f3938337ff87 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -47,6 +47,13 @@ static int br_pass_frame_up(struct sk_buff *skb)
u64_stats_update_end(&brstats->syncp);
vg = br_vlan_group_rcu(br);
+
+ /* Reset the offload_fwd_mark because there could be a stacked
+ * bridge above, and it should not think this bridge it doing
+ * that bridge's work forwarding out its ports.
+ */
+ br_switchdev_frame_unmark(skb);
+
/* Bridge is just like any other port. Make sure the
* packet is allowed except in promisc modue when someone
* may be running packet capture.
@@ -139,12 +146,12 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
if ((mdst && mdst->host_joined) ||
br_multicast_is_router(br)) {
local_rcv = true;
- br->dev->stats.multicast++;
+ DEV_STATS_INC(br->dev, multicast);
}
mcast_hit = true;
} else {
local_rcv = true;
- br->dev->stats.multicast++;
+ DEV_STATS_INC(br->dev, multicast);
}
break;
case BR_PKT_UNICAST:
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 6a362da211e1..3504c3acd38f 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1791,7 +1791,9 @@ static void br_multicast_pim(struct net_bridge *br,
pim_hdr_type(pimhdr) != PIM_TYPE_HELLO)
return;
+ spin_lock(&br->multicast_lock);
br_multicast_mark_router(br, port);
+ spin_unlock(&br->multicast_lock);
}
static int br_multicast_ipv4_rcv(struct net_bridge *br,
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index ccab290c14d4..35642dc96852 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -385,6 +385,7 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
/* - Bridged-and-DNAT'ed traffic doesn't
* require ip_forwarding. */
if (rt->dst.dev == dev) {
+ skb_dst_drop(skb);
skb_dst_set(skb, &rt->dst);
goto bridged_dnat;
}
@@ -414,6 +415,7 @@ bridged_dnat:
kfree_skb(skb);
return 0;
}
+ skb_dst_drop(skb);
skb_dst_set_noref(skb, &rt->dst);
}
@@ -719,9 +721,17 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
mtu_reserved = nf_bridge_mtu_reduction(skb);
mtu = skb->dev->mtu;
+ if (nf_bridge->pkt_otherhost) {
+ skb->pkt_type = PACKET_OTHERHOST;
+ nf_bridge->pkt_otherhost = false;
+ }
+
if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu)
mtu = nf_bridge->frag_max_size;
+ nf_bridge_update_protocol(skb);
+ nf_bridge_push_encap_header(skb);
+
if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) {
nf_bridge_info_free(skb);
return br_dev_queue_push_xmit(net, sk, skb);
@@ -739,8 +749,6 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
- nf_bridge_update_protocol(skb);
-
data = this_cpu_ptr(&brnf_frag_data_storage);
data->vlan_tci = skb->vlan_tci;
@@ -763,8 +771,6 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
- nf_bridge_update_protocol(skb);
-
data = this_cpu_ptr(&brnf_frag_data_storage);
data->encap_size = nf_bridge_encap_header_len(skb);
data->size = ETH_HLEN + data->encap_size;
@@ -812,8 +818,6 @@ static unsigned int br_nf_post_routing(void *priv,
else
return NF_ACCEPT;
- /* We assume any code from br_dev_queue_push_xmit onwards doesn't care
- * about the value of skb->pkt_type. */
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
nf_bridge->pkt_otherhost = true;
@@ -999,9 +1003,24 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
return okfn(net, sk, skb);
ops = nf_hook_entries_get_hook_ops(e);
- for (i = 0; i < e->num_hook_entries &&
- ops[i]->priority <= NF_BR_PRI_BRNF; i++)
- ;
+ for (i = 0; i < e->num_hook_entries; i++) {
+ /* These hooks have already been called */
+ if (ops[i]->priority < NF_BR_PRI_BRNF)
+ continue;
+
+ /* These hooks have not been called yet, run them. */
+ if (ops[i]->priority > NF_BR_PRI_BRNF)
+ break;
+
+ /* take a closer look at NF_BR_PRI_BRNF. */
+ if (ops[i]->hook == br_nf_pre_routing) {
+ /* This hook diverted the skb to this function,
+ * hooks after this have not been run yet.
+ */
+ i++;
+ break;
+ }
+ }
nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
sk, net, okfn);
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 09d5e0c7b3ba..995d86777e7c 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -201,6 +201,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
kfree_skb(skb);
return 0;
}
+ skb_dst_drop(skb);
skb_dst_set_noref(skb, &rt->dst);
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index ec2b58a09f76..c00cb376263a 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1511,7 +1511,7 @@ static size_t br_get_linkxstats_size(const struct net_device *dev, int attr)
}
return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) +
- nla_total_size(sizeof(struct br_mcast_stats)) +
+ nla_total_size_64bit(sizeof(struct br_mcast_stats)) +
nla_total_size(0);
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 33b8222db75c..7ca3b469242e 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -100,8 +100,8 @@ struct br_vlan_stats {
};
struct br_tunnel_info {
- __be64 tunnel_id;
- struct metadata_dst *tunnel_dst;
+ __be64 tunnel_id;
+ struct metadata_dst __rcu *tunnel_dst;
};
/**
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 7c87a2fe5248..e9e2a3b1f477 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -59,9 +59,8 @@ static BRPORT_ATTR(_name, 0644, \
static int store_flag(struct net_bridge_port *p, unsigned long v,
unsigned long mask)
{
- unsigned long flags;
-
- flags = p->flags;
+ unsigned long flags = p->flags;
+ int err;
if (v)
flags |= mask;
@@ -69,6 +68,10 @@ static int store_flag(struct net_bridge_port *p, unsigned long v,
flags &= ~mask;
if (flags != p->flags) {
+ err = br_switchdev_set_port_flag(p, flags, mask);
+ if (err)
+ return err;
+
p->flags = flags;
br_port_flags_change(p, mask);
}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 5f3950f00f73..a82d0021d461 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -242,8 +242,10 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
}
masterv = br_vlan_get_master(br, v->vid);
- if (!masterv)
+ if (!masterv) {
+ err = -ENOMEM;
goto out_filt;
+ }
v->brvlan = masterv;
v->stats = masterv->stats;
} else {
diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c
index 6d2c4eed2dc8..adb6845ceba4 100644
--- a/net/bridge/br_vlan_tunnel.c
+++ b/net/bridge/br_vlan_tunnel.c
@@ -46,26 +46,33 @@ static struct net_bridge_vlan *br_vlan_tunnel_lookup(struct rhashtable *tbl,
br_vlan_tunnel_rht_params);
}
+static void vlan_tunnel_info_release(struct net_bridge_vlan *vlan)
+{
+ struct metadata_dst *tdst = rtnl_dereference(vlan->tinfo.tunnel_dst);
+
+ WRITE_ONCE(vlan->tinfo.tunnel_id, 0);
+ RCU_INIT_POINTER(vlan->tinfo.tunnel_dst, NULL);
+ dst_release(&tdst->dst);
+}
+
void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg,
struct net_bridge_vlan *vlan)
{
- if (!vlan->tinfo.tunnel_dst)
+ if (!rcu_access_pointer(vlan->tinfo.tunnel_dst))
return;
rhashtable_remove_fast(&vg->tunnel_hash, &vlan->tnode,
br_vlan_tunnel_rht_params);
- vlan->tinfo.tunnel_id = 0;
- dst_release(&vlan->tinfo.tunnel_dst->dst);
- vlan->tinfo.tunnel_dst = NULL;
+ vlan_tunnel_info_release(vlan);
}
static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
struct net_bridge_vlan *vlan, u32 tun_id)
{
- struct metadata_dst *metadata = NULL;
+ struct metadata_dst *metadata = rtnl_dereference(vlan->tinfo.tunnel_dst);
__be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id));
int err;
- if (vlan->tinfo.tunnel_dst)
+ if (metadata)
return -EEXIST;
metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY,
@@ -74,8 +81,8 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
return -EINVAL;
metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_BRIDGE;
- vlan->tinfo.tunnel_dst = metadata;
- vlan->tinfo.tunnel_id = key;
+ rcu_assign_pointer(vlan->tinfo.tunnel_dst, metadata);
+ WRITE_ONCE(vlan->tinfo.tunnel_id, key);
err = rhashtable_lookup_insert_fast(&vg->tunnel_hash, &vlan->tnode,
br_vlan_tunnel_rht_params);
@@ -84,9 +91,7 @@ static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg,
return 0;
out:
- dst_release(&vlan->tinfo.tunnel_dst->dst);
- vlan->tinfo.tunnel_dst = NULL;
- vlan->tinfo.tunnel_id = 0;
+ vlan_tunnel_info_release(vlan);
return err;
}
@@ -186,12 +191,15 @@ int br_handle_ingress_vlan_tunnel(struct sk_buff *skb,
int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
struct net_bridge_vlan *vlan)
{
+ struct metadata_dst *tunnel_dst;
+ __be64 tunnel_id;
int err;
- if (!vlan || !vlan->tinfo.tunnel_id)
+ if (!vlan)
return 0;
- if (unlikely(!skb_vlan_tag_present(skb)))
+ tunnel_id = READ_ONCE(vlan->tinfo.tunnel_id);
+ if (!tunnel_id || unlikely(!skb_vlan_tag_present(skb)))
return 0;
skb_dst_drop(skb);
@@ -199,7 +207,9 @@ int br_handle_egress_vlan_tunnel(struct sk_buff *skb,
if (err)
return err;
- skb_dst_set(skb, dst_clone(&vlan->tinfo.tunnel_dst->dst));
+ tunnel_dst = rcu_dereference(vlan->tinfo.tunnel_dst);
+ if (tunnel_dst && dst_hold_safe(&tunnel_dst->dst))
+ skb_dst_set(skb, &tunnel_dst->dst);
return 0;
}
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 276b60262981..b21c8a317be7 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -33,18 +33,10 @@ static struct ebt_replace_kernel initial_table = {
.entries = (char *)&initial_chain,
};
-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
-{
- if (valid_hooks & ~(1 << NF_BR_BROUTING))
- return -EINVAL;
- return 0;
-}
-
static const struct ebt_table broute_table = {
.name = "broute",
.table = &initial_table,
.valid_hooks = 1 << NF_BR_BROUTING,
- .check = check,
.me = THIS_MODULE,
};
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 550324c516ee..c71795e4c18c 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -42,18 +42,10 @@ static struct ebt_replace_kernel initial_table = {
.entries = (char *)initial_chains,
};
-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
-{
- if (valid_hooks & ~FILTER_VALID_HOOKS)
- return -EINVAL;
- return 0;
-}
-
static const struct ebt_table frame_filter = {
.name = "filter",
.table = &initial_table,
.valid_hooks = FILTER_VALID_HOOKS,
- .check = check,
.me = THIS_MODULE,
};
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index c0fb3ca518af..44dde9e635e2 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -42,18 +42,10 @@ static struct ebt_replace_kernel initial_table = {
.entries = (char *)initial_chains,
};
-static int check(const struct ebt_table_info *info, unsigned int valid_hooks)
-{
- if (valid_hooks & ~NAT_VALID_HOOKS)
- return -EINVAL;
- return 0;
-}
-
static const struct ebt_table frame_nat = {
.name = "nat",
.table = &initial_table,
.valid_hooks = NAT_VALID_HOOKS,
- .check = check,
.me = THIS_MODULE,
};
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index f59230e4fc29..59d8974ee92b 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1003,9 +1003,10 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
goto free_iterate;
}
- /* the table doesn't like it */
- if (t->check && (ret = t->check(newinfo, repl->valid_hooks)))
+ if (repl->valid_hooks != t->valid_hooks) {
+ ret = -EINVAL;
goto free_unlock;
+ }
if (repl->num_counters && repl->num_counters != t->private->nentries) {
ret = -EINVAL;
@@ -1197,11 +1198,6 @@ int ebt_register_table(struct net *net, const struct ebt_table *input_table,
if (ret != 0)
goto free_chainstack;
- if (table->check && table->check(newinfo, table->valid_hooks)) {
- ret = -EINVAL;
- goto free_chainstack;
- }
-
table->private = newinfo;
rwlock_init(&table->lock);
mutex_lock(&ebt_mutex);
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 711d7156efd8..cc305d84168f 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -303,7 +303,7 @@ static void dev_flowctrl(struct net_device *dev, int on)
caifd_put(caifd);
}
-void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
+int caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
struct cflayer *link_support, int head_room,
struct cflayer **layer,
int (**rcv_func)(struct sk_buff *, struct net_device *,
@@ -314,11 +314,12 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
enum cfcnfg_phy_preference pref;
struct cfcnfg *cfg = get_cfcnfg(dev_net(dev));
struct caif_device_entry_list *caifdevs;
+ int res;
caifdevs = caif_device_list(dev_net(dev));
caifd = caif_device_alloc(dev);
if (!caifd)
- return;
+ return -ENOMEM;
*layer = &caifd->layer;
spin_lock_init(&caifd->flow_lock);
@@ -339,7 +340,7 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
strlcpy(caifd->layer.name, dev->name,
sizeof(caifd->layer.name));
caifd->layer.transmit = transmit;
- cfcnfg_add_phy_layer(cfg,
+ res = cfcnfg_add_phy_layer(cfg,
dev,
&caifd->layer,
pref,
@@ -349,6 +350,7 @@ void caif_enroll_dev(struct net_device *dev, struct caif_dev_common *caifdev,
mutex_unlock(&caifdevs->lock);
if (rcv_func)
*rcv_func = receive;
+ return res;
}
EXPORT_SYMBOL(caif_enroll_dev);
@@ -363,6 +365,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
struct cflayer *layer, *link_support;
int head_room = 0;
struct caif_device_entry_list *caifdevs;
+ int res;
cfg = get_cfcnfg(dev_net(dev));
caifdevs = caif_device_list(dev_net(dev));
@@ -388,8 +391,10 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
break;
}
}
- caif_enroll_dev(dev, caifdev, link_support, head_room,
+ res = caif_enroll_dev(dev, caifdev, link_support, head_room,
&layer, NULL);
+ if (res)
+ cfserl_release(link_support);
caifdev->flowctrl = dev_flowctrl;
break;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 4b31f0aaa96d..cab0b239f96a 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -539,7 +539,8 @@ static int caif_seqpkt_sendmsg(struct socket *sock, struct msghdr *msg,
goto err;
ret = -EINVAL;
- if (unlikely(msg->msg_iter.iov->iov_base == NULL))
+ if (unlikely(msg->msg_iter.nr_segs == 0) ||
+ unlikely(msg->msg_iter.iov->iov_base == NULL))
goto err;
noblock = msg->msg_flags & MSG_DONTWAIT;
@@ -1021,6 +1022,7 @@ static void caif_sock_destructor(struct sock *sk)
return;
}
sk_stream_kill_queues(&cf_sk->sk);
+ WARN_ON(sk->sk_forward_alloc);
caif_free_client(&cf_sk->layer);
}
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index 1a082a946045..609c5793f45a 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -116,6 +116,11 @@ static struct cflayer *cfusbl_create(int phyid, u8 ethaddr[ETH_ALEN],
return (struct cflayer *) this;
}
+static void cfusbl_release(struct cflayer *layer)
+{
+ kfree(layer);
+}
+
static struct packet_type caif_usb_type __read_mostly = {
.type = cpu_to_be16(ETH_P_802_EX1),
};
@@ -128,6 +133,10 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
struct cflayer *layer, *link_support;
struct usbnet *usbnet;
struct usb_device *usbdev;
+ int res;
+
+ if (what == NETDEV_UNREGISTER && dev->reg_state >= NETREG_UNREGISTERED)
+ return 0;
/* Check whether we have a NCM device, and find its VID/PID. */
if (!(dev->dev.parent && dev->dev.parent->driver &&
@@ -170,8 +179,11 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
if (dev->num_tx_queues > 1)
pr_warn("USB device uses more than one tx queue\n");
- caif_enroll_dev(dev, &common, link_support, CFUSB_MAX_HEADLEN,
+ res = caif_enroll_dev(dev, &common, link_support, CFUSB_MAX_HEADLEN,
&layer, &caif_usb_type.func);
+ if (res)
+ goto err;
+
if (!pack_added)
dev_add_pack(&caif_usb_type);
pack_added = true;
@@ -179,6 +191,9 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
strlcpy(layer->name, dev->name, sizeof(layer->name));
return 0;
+err:
+ cfusbl_release(link_support);
+ return res;
}
static struct notifier_block caif_device_notifier = {
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 8f00bea093b9..b456b79abd3b 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -450,7 +450,7 @@ unlock:
rcu_read_unlock();
}
-void
+int
cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
struct net_device *dev, struct cflayer *phy_layer,
enum cfcnfg_phy_preference pref,
@@ -459,7 +459,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
{
struct cflayer *frml;
struct cfcnfg_phyinfo *phyinfo = NULL;
- int i;
+ int i, res = 0;
u8 phyid;
mutex_lock(&cnfg->lock);
@@ -473,12 +473,15 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg,
goto got_phyid;
}
pr_warn("Too many CAIF Link Layers (max 6)\n");
+ res = -EEXIST;
goto out;
got_phyid:
phyinfo = kzalloc(sizeof(struct cfcnfg_phyinfo), GFP_ATOMIC);
- if (!phyinfo)
+ if (!phyinfo) {
+ res = -ENOMEM;
goto out_err;
+ }
phy_layer->id = phyid;
phyinfo->pref = pref;
@@ -492,8 +495,10 @@ got_phyid:
frml = cffrml_create(phyid, fcs);
- if (!frml)
+ if (!frml) {
+ res = -ENOMEM;
goto out_err;
+ }
phyinfo->frm_layer = frml;
layer_set_up(frml, cnfg->mux);
@@ -511,11 +516,12 @@ got_phyid:
list_add_rcu(&phyinfo->node, &cnfg->phys);
out:
mutex_unlock(&cnfg->lock);
- return;
+ return res;
out_err:
kfree(phyinfo);
mutex_unlock(&cnfg->lock);
+ return res;
}
EXPORT_SYMBOL(cfcnfg_add_phy_layer);
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index a1e85f032108..330cb2b087bb 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -269,11 +269,15 @@ int cfctrl_linkup_request(struct cflayer *layer,
default:
pr_warn("Request setup of bad link type = %d\n",
param->linktype);
+ cfpkt_destroy(pkt);
return -EINVAL;
}
req = kzalloc(sizeof(*req), GFP_KERNEL);
- if (!req)
+ if (!req) {
+ cfpkt_destroy(pkt);
return -ENOMEM;
+ }
+
req->client_layer = user_layer;
req->cmd = CFCTRL_CMD_LINK_SETUP;
req->param = *param;
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index ce60f06d76de..af1e1e36dc90 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -31,6 +31,11 @@ static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
int phyid);
+void cfserl_release(struct cflayer *layer)
+{
+ kfree(layer);
+}
+
struct cflayer *cfserl_create(int instance, bool use_stx)
{
struct cfserl *this = kzalloc(sizeof(struct cfserl), GFP_ATOMIC);
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 13e2ae6be620..ece140ad0ac1 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -53,20 +53,6 @@ struct chnl_net {
enum caif_states state;
};
-static void robust_list_del(struct list_head *delete_node)
-{
- struct list_head *list_node;
- struct list_head *n;
- ASSERT_RTNL();
- list_for_each_safe(list_node, n, &chnl_net_list) {
- if (list_node == delete_node) {
- list_del(list_node);
- return;
- }
- }
- WARN_ON(1);
-}
-
static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
{
struct sk_buff *skb;
@@ -328,9 +314,6 @@ static int chnl_net_open(struct net_device *dev)
if (result == 0) {
pr_debug("connect timeout\n");
- caif_disconnect_client(dev_net(dev), &priv->chnl);
- priv->state = CAIF_DISCONNECTED;
- pr_debug("state disconnected\n");
result = -ETIMEDOUT;
goto error;
}
@@ -368,6 +351,7 @@ static int chnl_net_init(struct net_device *dev)
ASSERT_RTNL();
priv = netdev_priv(dev);
strncpy(priv->name, dev->name, sizeof(priv->name));
+ INIT_LIST_HEAD(&priv->list_field);
return 0;
}
@@ -376,7 +360,7 @@ static void chnl_net_uninit(struct net_device *dev)
struct chnl_net *priv;
ASSERT_RTNL();
priv = netdev_priv(dev);
- robust_list_del(&priv->list_field);
+ list_del_init(&priv->list_field);
}
static const struct net_device_ops netdev_ops = {
@@ -541,7 +525,7 @@ static void __exit chnl_exit_module(void)
rtnl_lock();
list_for_each_safe(list_node, _tmp, &chnl_net_list) {
dev = list_entry(list_node, struct chnl_net, list_field);
- list_del(list_node);
+ list_del_init(list_node);
delete_device(dev);
}
rtnl_unlock();
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 04132b0b5d36..b3edb8092124 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -722,16 +722,25 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev,
{
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
- if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU ||
- cfd->len > CAN_MAX_DLEN)) {
- pr_warn_once("PF_CAN: dropped non conform CAN skbuf: dev type %d, len %d, datalen %d\n",
+ if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU)) {
+ pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n",
+ dev->type, skb->len);
+ goto free_skb;
+ }
+
+ /* This check is made separately since cfd->len would be uninitialized if skb->len = 0. */
+ if (unlikely(cfd->len > CAN_MAX_DLEN)) {
+ pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d, datalen %d\n",
dev->type, skb->len, cfd->len);
- kfree_skb(skb);
- return NET_RX_DROP;
+ goto free_skb;
}
can_receive(skb, dev);
return NET_RX_SUCCESS;
+
+free_skb:
+ kfree_skb(skb);
+ return NET_RX_DROP;
}
static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
@@ -739,16 +748,25 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev,
{
struct canfd_frame *cfd = (struct canfd_frame *)skb->data;
- if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU ||
- cfd->len > CANFD_MAX_DLEN)) {
- pr_warn_once("PF_CAN: dropped non conform CAN FD skbuf: dev type %d, len %d, datalen %d\n",
+ if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU)) {
+ pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n",
+ dev->type, skb->len);
+ goto free_skb;
+ }
+
+ /* This check is made separately since cfd->len would be uninitialized if skb->len = 0. */
+ if (unlikely(cfd->len > CANFD_MAX_DLEN)) {
+ pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d, datalen %d\n",
dev->type, skb->len, cfd->len);
- kfree_skb(skb);
- return NET_RX_DROP;
+ goto free_skb;
}
can_receive(skb, dev);
return NET_RX_SUCCESS;
+
+free_skb:
+ kfree_skb(skb);
+ return NET_RX_DROP;
}
/*
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 79bb8afa9c0c..1c9953c68f09 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -99,13 +99,13 @@ static inline u64 get_u64(const struct canfd_frame *cp, int offset)
struct bcm_op {
struct list_head list;
+ struct rcu_head rcu;
int ifindex;
canid_t can_id;
u32 flags;
unsigned long frames_abs, frames_filtered;
struct bcm_timeval ival1, ival2;
struct hrtimer timer, thrtimer;
- struct tasklet_struct tsklet, thrtsklet;
ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg;
int rx_ifindex;
int cfsiz;
@@ -125,7 +125,7 @@ struct bcm_sock {
struct sock sk;
int bound;
int ifindex;
- struct notifier_block notifier;
+ struct list_head notifier;
struct list_head rx_ops;
struct list_head tx_ops;
unsigned long dropped_usr_msgs;
@@ -133,6 +133,10 @@ struct bcm_sock {
char procname [32]; /* inode number in decimal with \0 */
};
+static LIST_HEAD(bcm_notifier_list);
+static DEFINE_SPINLOCK(bcm_notifier_lock);
+static struct bcm_sock *bcm_busy_notifier;
+
static inline struct bcm_sock *bcm_sk(const struct sock *sk)
{
return (struct bcm_sock *)sk;
@@ -269,6 +273,7 @@ static void bcm_can_tx(struct bcm_op *op)
struct sk_buff *skb;
struct net_device *dev;
struct canfd_frame *cf = op->frames + op->cfsiz * op->currframe;
+ int err;
/* no target device? => exit */
if (!op->ifindex)
@@ -293,11 +298,11 @@ static void bcm_can_tx(struct bcm_op *op)
/* send with loopback */
skb->dev = dev;
can_skb_set_owner(skb, op->sk);
- can_send(skb, 1);
+ err = can_send(skb, 1);
+ if (!err)
+ op->frames_abs++;
- /* update statistics */
op->currframe++;
- op->frames_abs++;
/* reached last frame? */
if (op->currframe >= op->nframes)
@@ -370,29 +375,39 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
}
}
-static void bcm_tx_start_timer(struct bcm_op *op)
+static bool bcm_tx_set_expiry(struct bcm_op *op, struct hrtimer *hrt)
{
+ ktime_t ival;
+
if (op->kt_ival1 && op->count)
- hrtimer_start(&op->timer,
- ktime_add(ktime_get(), op->kt_ival1),
- HRTIMER_MODE_ABS);
+ ival = op->kt_ival1;
else if (op->kt_ival2)
- hrtimer_start(&op->timer,
- ktime_add(ktime_get(), op->kt_ival2),
- HRTIMER_MODE_ABS);
+ ival = op->kt_ival2;
+ else
+ return false;
+
+ hrtimer_set_expires(hrt, ktime_add(ktime_get(), ival));
+ return true;
}
-static void bcm_tx_timeout_tsklet(unsigned long data)
+static void bcm_tx_start_timer(struct bcm_op *op)
{
- struct bcm_op *op = (struct bcm_op *)data;
+ if (bcm_tx_set_expiry(op, &op->timer))
+ hrtimer_start_expires(&op->timer, HRTIMER_MODE_ABS_SOFT);
+}
+
+/* bcm_tx_timeout_handler - performs cyclic CAN frame transmissions */
+static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
+{
+ struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
struct bcm_msg_head msg_head;
if (op->kt_ival1 && (op->count > 0)) {
-
op->count--;
if (!op->count && (op->flags & TX_COUNTEVT)) {
/* create notification to user */
+ memset(&msg_head, 0, sizeof(msg_head));
msg_head.opcode = TX_EXPIRED;
msg_head.flags = op->flags;
msg_head.count = op->count;
@@ -405,22 +420,12 @@ static void bcm_tx_timeout_tsklet(unsigned long data)
}
bcm_can_tx(op);
- } else if (op->kt_ival2)
+ } else if (op->kt_ival2) {
bcm_can_tx(op);
+ }
- bcm_tx_start_timer(op);
-}
-
-/*
- * bcm_tx_timeout_handler - performs cyclic CAN frame transmissions
- */
-static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
-{
- struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
-
- tasklet_schedule(&op->tsklet);
-
- return HRTIMER_NORESTART;
+ return bcm_tx_set_expiry(op, &op->timer) ?
+ HRTIMER_RESTART : HRTIMER_NORESTART;
}
/*
@@ -440,6 +445,7 @@ static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data)
/* this element is not throttled anymore */
data->flags &= (BCM_CAN_FLAGS_MASK|RX_RECV);
+ memset(&head, 0, sizeof(head));
head.opcode = RX_CHANGED;
head.flags = op->flags;
head.count = op->count;
@@ -486,7 +492,7 @@ static void bcm_rx_update_and_send(struct bcm_op *op,
/* do not send the saved data - only start throttle timer */
hrtimer_start(&op->thrtimer,
ktime_add(op->kt_lastmsg, op->kt_ival2),
- HRTIMER_MODE_ABS);
+ HRTIMER_MODE_ABS_SOFT);
return;
}
@@ -545,15 +551,23 @@ static void bcm_rx_starttimer(struct bcm_op *op)
return;
if (op->kt_ival1)
- hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL);
+ hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL_SOFT);
}
-static void bcm_rx_timeout_tsklet(unsigned long data)
+/* bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out */
+static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
{
- struct bcm_op *op = (struct bcm_op *)data;
+ struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
struct bcm_msg_head msg_head;
+ /* if user wants to be informed, when cyclic CAN-Messages come back */
+ if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) {
+ /* clear received CAN frames to indicate 'nothing received' */
+ memset(op->last_frames, 0, op->nframes * op->cfsiz);
+ }
+
/* create notification to user */
+ memset(&msg_head, 0, sizeof(msg_head));
msg_head.opcode = RX_TIMEOUT;
msg_head.flags = op->flags;
msg_head.count = op->count;
@@ -563,25 +577,6 @@ static void bcm_rx_timeout_tsklet(unsigned long data)
msg_head.nframes = 0;
bcm_send_to_user(op, &msg_head, NULL, 0);
-}
-
-/*
- * bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out
- */
-static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
-{
- struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
-
- /* schedule before NET_RX_SOFTIRQ */
- tasklet_hi_schedule(&op->tsklet);
-
- /* no restart of the timer is done here! */
-
- /* if user wants to be informed, when cyclic CAN-Messages come back */
- if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) {
- /* clear received CAN frames to indicate 'nothing received' */
- memset(op->last_frames, 0, op->nframes * op->cfsiz);
- }
return HRTIMER_NORESTART;
}
@@ -589,14 +584,12 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
/*
* bcm_rx_do_flush - helper for bcm_rx_thr_flush
*/
-static inline int bcm_rx_do_flush(struct bcm_op *op, int update,
- unsigned int index)
+static inline int bcm_rx_do_flush(struct bcm_op *op, unsigned int index)
{
struct canfd_frame *lcf = op->last_frames + op->cfsiz * index;
if ((op->last_frames) && (lcf->flags & RX_THR)) {
- if (update)
- bcm_rx_changed(op, lcf);
+ bcm_rx_changed(op, lcf);
return 1;
}
return 0;
@@ -604,11 +597,8 @@ static inline int bcm_rx_do_flush(struct bcm_op *op, int update,
/*
* bcm_rx_thr_flush - Check for throttled data and send it to the userspace
- *
- * update == 0 : just check if throttled data is available (any irq context)
- * update == 1 : check and send throttled data to userspace (soft_irq context)
*/
-static int bcm_rx_thr_flush(struct bcm_op *op, int update)
+static int bcm_rx_thr_flush(struct bcm_op *op)
{
int updated = 0;
@@ -617,24 +607,16 @@ static int bcm_rx_thr_flush(struct bcm_op *op, int update)
/* for MUX filter we start at index 1 */
for (i = 1; i < op->nframes; i++)
- updated += bcm_rx_do_flush(op, update, i);
+ updated += bcm_rx_do_flush(op, i);
} else {
/* for RX_FILTER_ID and simple filter */
- updated += bcm_rx_do_flush(op, update, 0);
+ updated += bcm_rx_do_flush(op, 0);
}
return updated;
}
-static void bcm_rx_thr_tsklet(unsigned long data)
-{
- struct bcm_op *op = (struct bcm_op *)data;
-
- /* push the changed data to the userspace */
- bcm_rx_thr_flush(op, 1);
-}
-
/*
* bcm_rx_thr_handler - the time for blocked content updates is over now:
* Check for throttled data and send it to the userspace
@@ -643,9 +625,7 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer)
{
struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer);
- tasklet_schedule(&op->thrtsklet);
-
- if (bcm_rx_thr_flush(op, 0)) {
+ if (bcm_rx_thr_flush(op)) {
hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2);
return HRTIMER_RESTART;
} else {
@@ -739,25 +719,9 @@ static struct bcm_op *bcm_find_op(struct list_head *ops,
return NULL;
}
-static void bcm_remove_op(struct bcm_op *op)
+static void bcm_free_op_rcu(struct rcu_head *rcu_head)
{
- if (op->tsklet.func) {
- while (test_bit(TASKLET_STATE_SCHED, &op->tsklet.state) ||
- test_bit(TASKLET_STATE_RUN, &op->tsklet.state) ||
- hrtimer_active(&op->timer)) {
- hrtimer_cancel(&op->timer);
- tasklet_kill(&op->tsklet);
- }
- }
-
- if (op->thrtsklet.func) {
- while (test_bit(TASKLET_STATE_SCHED, &op->thrtsklet.state) ||
- test_bit(TASKLET_STATE_RUN, &op->thrtsklet.state) ||
- hrtimer_active(&op->thrtimer)) {
- hrtimer_cancel(&op->thrtimer);
- tasklet_kill(&op->thrtsklet);
- }
- }
+ struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu);
if ((op->frames) && (op->frames != &op->sframe))
kfree(op->frames);
@@ -768,6 +732,14 @@ static void bcm_remove_op(struct bcm_op *op)
kfree(op);
}
+static void bcm_remove_op(struct bcm_op *op)
+{
+ hrtimer_cancel(&op->timer);
+ hrtimer_cancel(&op->thrtimer);
+
+ call_rcu(&op->rcu, bcm_free_op_rcu);
+}
+
static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
{
if (op->rx_reg_dev == dev) {
@@ -793,6 +765,9 @@ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh,
if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) &&
(op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) {
+ /* disable automatic timer on frame reception */
+ op->flags |= RX_NO_AUTOTIMER;
+
/*
* Don't care if we're bound or not (due to netdev
* problems) can_rx_unregister() is always a save
@@ -960,6 +935,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
cf = op->frames + op->cfsiz * i;
err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz);
+ if (err < 0)
+ goto free_op;
if (op->flags & CAN_FD_FRAME) {
if (cf->len > 64)
@@ -969,12 +946,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
err = -EINVAL;
}
- if (err < 0) {
- if (op->frames != &op->sframe)
- kfree(op->frames);
- kfree(op);
- return err;
- }
+ if (err < 0)
+ goto free_op;
if (msg_head->flags & TX_CP_CAN_ID) {
/* copy can_id into frame */
@@ -990,15 +963,13 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
op->ifindex = ifindex;
/* initialize uninitialized (kzalloc) structure */
- hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&op->timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_SOFT);
op->timer.function = bcm_tx_timeout_handler;
- /* initialize tasklet for tx countevent notification */
- tasklet_init(&op->tsklet, bcm_tx_timeout_tsklet,
- (unsigned long) op);
-
/* currently unused in tx_ops */
- hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_SOFT);
/* add this bcm_op to the list of the tx_ops */
list_add(&op->list, &bo->tx_ops);
@@ -1047,6 +1018,12 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
bcm_tx_start_timer(op);
return msg_head->nframes * op->cfsiz + MHSIZ;
+
+free_op:
+ if (op->frames != &op->sframe)
+ kfree(op->frames);
+ kfree(op);
+ return err;
}
/*
@@ -1167,20 +1144,14 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
op->rx_ifindex = ifindex;
/* initialize uninitialized (kzalloc) structure */
- hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&op->timer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_SOFT);
op->timer.function = bcm_rx_timeout_handler;
- /* initialize tasklet for rx timeout notification */
- tasklet_init(&op->tsklet, bcm_rx_timeout_tsklet,
- (unsigned long) op);
-
- hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC,
+ HRTIMER_MODE_REL_SOFT);
op->thrtimer.function = bcm_rx_thr_handler;
- /* initialize tasklet for rx throttle handling */
- tasklet_init(&op->thrtsklet, bcm_rx_thr_tsklet,
- (unsigned long) op);
-
/* add this bcm_op to the list of the rx_ops */
list_add(&op->list, &bo->rx_ops);
@@ -1226,12 +1197,12 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
*/
op->kt_lastmsg = 0;
hrtimer_cancel(&op->thrtimer);
- bcm_rx_thr_flush(op, 1);
+ bcm_rx_thr_flush(op);
}
if ((op->flags & STARTTIMER) && op->kt_ival1)
hrtimer_start(&op->timer, op->kt_ival1,
- HRTIMER_MODE_REL);
+ HRTIMER_MODE_REL_SOFT);
}
/* now we can register for can_ids, if we added a new bcm_op */
@@ -1429,20 +1400,15 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
/*
* notification handler for netdevice status changes
*/
-static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
- void *ptr)
+static void bcm_notify(struct bcm_sock *bo, unsigned long msg,
+ struct net_device *dev)
{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct bcm_sock *bo = container_of(nb, struct bcm_sock, notifier);
struct sock *sk = &bo->sk;
struct bcm_op *op;
int notify_enodev = 0;
if (!net_eq(dev_net(dev), sock_net(sk)))
- return NOTIFY_DONE;
-
- if (dev->type != ARPHRD_CAN)
- return NOTIFY_DONE;
+ return;
switch (msg) {
@@ -1477,7 +1443,28 @@ static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
sk->sk_error_report(sk);
}
}
+}
+
+static int bcm_notifier(struct notifier_block *nb, unsigned long msg,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ if (dev->type != ARPHRD_CAN)
+ return NOTIFY_DONE;
+ if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN)
+ return NOTIFY_DONE;
+ if (unlikely(bcm_busy_notifier)) /* Check for reentrant bug. */
+ return NOTIFY_DONE;
+ spin_lock(&bcm_notifier_lock);
+ list_for_each_entry(bcm_busy_notifier, &bcm_notifier_list, notifier) {
+ spin_unlock(&bcm_notifier_lock);
+ bcm_notify(bcm_busy_notifier, msg, dev);
+ spin_lock(&bcm_notifier_lock);
+ }
+ bcm_busy_notifier = NULL;
+ spin_unlock(&bcm_notifier_lock);
return NOTIFY_DONE;
}
@@ -1497,9 +1484,9 @@ static int bcm_init(struct sock *sk)
INIT_LIST_HEAD(&bo->rx_ops);
/* set notifier */
- bo->notifier.notifier_call = bcm_notifier;
-
- register_netdevice_notifier(&bo->notifier);
+ spin_lock(&bcm_notifier_lock);
+ list_add_tail(&bo->notifier, &bcm_notifier_list);
+ spin_unlock(&bcm_notifier_lock);
return 0;
}
@@ -1522,10 +1509,23 @@ static int bcm_release(struct socket *sock)
/* remove bcm_ops, timer, rx_unregister(), etc. */
- unregister_netdevice_notifier(&bo->notifier);
+ spin_lock(&bcm_notifier_lock);
+ while (bcm_busy_notifier == bo) {
+ spin_unlock(&bcm_notifier_lock);
+ schedule_timeout_uninterruptible(1);
+ spin_lock(&bcm_notifier_lock);
+ }
+ list_del(&bo->notifier);
+ spin_unlock(&bcm_notifier_lock);
lock_sock(sk);
+#if IS_ENABLED(CONFIG_PROC_FS)
+ /* remove procfs entry */
+ if (net->can.bcmproc_dir && bo->bcm_proc_read)
+ remove_proc_entry(bo->procname, net->can.bcmproc_dir);
+#endif /* CONFIG_PROC_FS */
+
list_for_each_entry_safe(op, next, &bo->tx_ops, list)
bcm_remove_op(op);
@@ -1554,14 +1554,12 @@ static int bcm_release(struct socket *sock)
REGMASK(op->can_id),
bcm_rx_handler, op);
- bcm_remove_op(op);
}
-#if IS_ENABLED(CONFIG_PROC_FS)
- /* remove procfs entry */
- if (net->can.bcmproc_dir && bo->bcm_proc_read)
- remove_proc_entry(bo->procname, net->can.bcmproc_dir);
-#endif /* CONFIG_PROC_FS */
+ synchronize_rcu();
+
+ list_for_each_entry_safe(op, next, &bo->rx_ops, list)
+ bcm_remove_op(op);
/* remove device reference */
if (bo->bound) {
@@ -1737,6 +1735,10 @@ static struct pernet_operations canbcm_pernet_ops __read_mostly = {
.exit = canbcm_pernet_exit,
};
+static struct notifier_block canbcm_notifier = {
+ .notifier_call = bcm_notifier
+};
+
static int __init bcm_module_init(void)
{
int err;
@@ -1750,12 +1752,14 @@ static int __init bcm_module_init(void)
}
register_pernet_subsys(&canbcm_pernet_ops);
+ register_netdevice_notifier(&canbcm_notifier);
return 0;
}
static void __exit bcm_module_exit(void)
{
can_proto_unregister(&bcm_can_proto);
+ unregister_netdevice_notifier(&canbcm_notifier);
unregister_pernet_subsys(&canbcm_pernet_ops);
}
diff --git a/net/can/gw.c b/net/can/gw.c
index bd2161470e45..9c2066323529 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -494,6 +494,7 @@ static int cgw_notifier(struct notifier_block *nb,
if (gwj->src.dev == dev || gwj->dst.dev == dev) {
hlist_del(&gwj->list);
cgw_unregister_filter(net, gwj);
+ synchronize_rcu();
kmem_cache_free(cgw_cache, gwj);
}
}
@@ -941,6 +942,7 @@ static void cgw_remove_all_jobs(struct net *net)
hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) {
hlist_del(&gwj->list);
cgw_unregister_filter(net, gwj);
+ synchronize_rcu();
kmem_cache_free(cgw_cache, gwj);
}
}
@@ -1010,6 +1012,7 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh,
hlist_del(&gwj->list);
cgw_unregister_filter(net, gwj);
+ synchronize_rcu();
kmem_cache_free(cgw_cache, gwj);
err = 0;
break;
diff --git a/net/can/proc.c b/net/can/proc.c
index 70fea17bb04c..a3071f43acd7 100644
--- a/net/can/proc.c
+++ b/net/can/proc.c
@@ -467,6 +467,9 @@ void can_init_proc(struct net *net)
*/
void can_remove_proc(struct net *net)
{
+ if (!net->can.proc_dir)
+ return;
+
if (net->can.pde_version)
remove_proc_entry(CAN_PROC_VERSION, net->can.proc_dir);
@@ -494,6 +497,5 @@ void can_remove_proc(struct net *net)
if (net->can.pde_rcvlist_sff)
remove_proc_entry(CAN_PROC_RCVLIST_SFF, net->can.proc_dir);
- if (net->can.proc_dir)
- remove_proc_entry("can", net->proc_net);
+ remove_proc_entry("can", net->proc_net);
}
diff --git a/net/can/raw.c b/net/can/raw.c
index 3aab7664933f..2a6db8752b61 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -84,7 +84,7 @@ struct raw_sock {
struct sock sk;
int bound;
int ifindex;
- struct notifier_block notifier;
+ struct list_head notifier;
int loopback;
int recv_own_msgs;
int fd_frames;
@@ -96,6 +96,10 @@ struct raw_sock {
struct uniqframe __percpu *uniq;
};
+static LIST_HEAD(raw_notifier_list);
+static DEFINE_SPINLOCK(raw_notifier_lock);
+static struct raw_sock *raw_busy_notifier;
+
/*
* Return pointer to store the extra msg flags for raw_recvmsg().
* We use the space of one unsigned int beyond the 'struct sockaddr_can'
@@ -266,21 +270,16 @@ static int raw_enable_allfilters(struct net *net, struct net_device *dev,
return err;
}
-static int raw_notifier(struct notifier_block *nb,
- unsigned long msg, void *ptr)
+static void raw_notify(struct raw_sock *ro, unsigned long msg,
+ struct net_device *dev)
{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct raw_sock *ro = container_of(nb, struct raw_sock, notifier);
struct sock *sk = &ro->sk;
if (!net_eq(dev_net(dev), sock_net(sk)))
- return NOTIFY_DONE;
-
- if (dev->type != ARPHRD_CAN)
- return NOTIFY_DONE;
+ return;
if (ro->ifindex != dev->ifindex)
- return NOTIFY_DONE;
+ return;
switch (msg) {
@@ -309,7 +308,28 @@ static int raw_notifier(struct notifier_block *nb,
sk->sk_error_report(sk);
break;
}
+}
+
+static int raw_notifier(struct notifier_block *nb, unsigned long msg,
+ void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ if (dev->type != ARPHRD_CAN)
+ return NOTIFY_DONE;
+ if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN)
+ return NOTIFY_DONE;
+ if (unlikely(raw_busy_notifier)) /* Check for reentrant bug. */
+ return NOTIFY_DONE;
+ spin_lock(&raw_notifier_lock);
+ list_for_each_entry(raw_busy_notifier, &raw_notifier_list, notifier) {
+ spin_unlock(&raw_notifier_lock);
+ raw_notify(raw_busy_notifier, msg, dev);
+ spin_lock(&raw_notifier_lock);
+ }
+ raw_busy_notifier = NULL;
+ spin_unlock(&raw_notifier_lock);
return NOTIFY_DONE;
}
@@ -338,9 +358,9 @@ static int raw_init(struct sock *sk)
return -ENOMEM;
/* set notifier */
- ro->notifier.notifier_call = raw_notifier;
-
- register_netdevice_notifier(&ro->notifier);
+ spin_lock(&raw_notifier_lock);
+ list_add_tail(&ro->notifier, &raw_notifier_list);
+ spin_unlock(&raw_notifier_lock);
return 0;
}
@@ -355,7 +375,14 @@ static int raw_release(struct socket *sock)
ro = raw_sk(sk);
- unregister_netdevice_notifier(&ro->notifier);
+ spin_lock(&raw_notifier_lock);
+ while (raw_busy_notifier == ro) {
+ spin_unlock(&raw_notifier_lock);
+ schedule_timeout_uninterruptible(1);
+ spin_lock(&raw_notifier_lock);
+ }
+ list_del(&ro->notifier);
+ spin_unlock(&raw_notifier_lock);
lock_sock(sk);
@@ -522,10 +549,18 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
return -EFAULT;
}
+ rtnl_lock();
lock_sock(sk);
- if (ro->bound && ro->ifindex)
+ if (ro->bound && ro->ifindex) {
dev = dev_get_by_index(sock_net(sk), ro->ifindex);
+ if (!dev) {
+ if (count > 1)
+ kfree(filter);
+ err = -ENODEV;
+ goto out_fil;
+ }
+ }
if (ro->bound) {
/* (try to) register the new filters */
@@ -564,6 +599,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
dev_put(dev);
release_sock(sk);
+ rtnl_unlock();
break;
@@ -576,10 +612,16 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
err_mask &= CAN_ERR_MASK;
+ rtnl_lock();
lock_sock(sk);
- if (ro->bound && ro->ifindex)
+ if (ro->bound && ro->ifindex) {
dev = dev_get_by_index(sock_net(sk), ro->ifindex);
+ if (!dev) {
+ err = -ENODEV;
+ goto out_err;
+ }
+ }
/* remove current error mask */
if (ro->bound) {
@@ -603,6 +645,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
dev_put(dev);
release_sock(sk);
+ rtnl_unlock();
break;
@@ -771,7 +814,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
if (err < 0)
goto free_skb;
- sock_tx_timestamp(sk, sk->sk_tsflags, &skb_shinfo(skb)->tx_flags);
+ skb_setup_tx_timestamp(skb, sk->sk_tsflags);
skb->dev = dev;
skb->sk = sk;
@@ -870,6 +913,10 @@ static const struct can_proto raw_can_proto = {
.prot = &raw_proto,
};
+static struct notifier_block canraw_notifier = {
+ .notifier_call = raw_notifier
+};
+
static __init int raw_module_init(void)
{
int err;
@@ -879,6 +926,8 @@ static __init int raw_module_init(void)
err = can_proto_register(&raw_can_proto);
if (err < 0)
printk(KERN_ERR "can: registration of raw protocol failed\n");
+ else
+ register_netdevice_notifier(&canraw_notifier);
return err;
}
@@ -886,6 +935,7 @@ static __init int raw_module_init(void)
static __exit void raw_module_exit(void)
{
can_proto_unregister(&raw_can_proto);
+ unregister_netdevice_notifier(&canraw_notifier);
}
module_init(raw_module_init);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index f7d7f32ac673..7fd18e10755e 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -462,7 +462,7 @@ static void set_sock_callbacks(struct socket *sock,
*/
static int ceph_tcp_connect(struct ceph_connection *con)
{
- struct sockaddr_storage *paddr = &con->peer_addr.in_addr;
+ struct sockaddr_storage ss = con->peer_addr.in_addr; /* align */
struct socket *sock;
unsigned int noio_flag;
int ret;
@@ -471,7 +471,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
/* sock_create_kern() allocates with GFP_KERNEL */
noio_flag = memalloc_noio_save();
- ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family,
+ ret = sock_create_kern(read_pnet(&con->msgr->net), ss.ss_family,
SOCK_STREAM, IPPROTO_TCP, &sock);
memalloc_noio_restore(noio_flag);
if (ret)
@@ -487,8 +487,8 @@ static int ceph_tcp_connect(struct ceph_connection *con)
dout("connect %s\n", ceph_pr_addr(&con->peer_addr.in_addr));
con_sock_state_connecting(con);
- ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr),
- O_NONBLOCK);
+ ret = kernel_connect(sock, (struct sockaddr *)&ss, sizeof(ss),
+ O_NONBLOCK);
if (ret == -EINPROGRESS) {
dout("connect %s EINPROGRESS sk_state = %u\n",
ceph_pr_addr(&con->peer_addr.in_addr),
@@ -1824,14 +1824,15 @@ static int verify_hello(struct ceph_connection *con)
return 0;
}
-static bool addr_is_blank(struct sockaddr_storage *ss)
+static bool addr_is_blank(struct ceph_entity_addr *addr)
{
- struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr;
- struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr;
+ struct sockaddr_storage ss = addr->in_addr; /* align */
+ struct in_addr *addr4 = &((struct sockaddr_in *)&ss)->sin_addr;
+ struct in6_addr *addr6 = &((struct sockaddr_in6 *)&ss)->sin6_addr;
- switch (ss->ss_family) {
+ switch (ss.ss_family) {
case AF_INET:
- return addr->s_addr == htonl(INADDR_ANY);
+ return addr4->s_addr == htonl(INADDR_ANY);
case AF_INET6:
return ipv6_addr_any(addr6);
default:
@@ -1839,25 +1840,25 @@ static bool addr_is_blank(struct sockaddr_storage *ss)
}
}
-static int addr_port(struct sockaddr_storage *ss)
+static int addr_port(struct ceph_entity_addr *addr)
{
- switch (ss->ss_family) {
+ switch (get_unaligned(&addr->in_addr.ss_family)) {
case AF_INET:
- return ntohs(((struct sockaddr_in *)ss)->sin_port);
+ return ntohs(get_unaligned(&((struct sockaddr_in *)&addr->in_addr)->sin_port));
case AF_INET6:
- return ntohs(((struct sockaddr_in6 *)ss)->sin6_port);
+ return ntohs(get_unaligned(&((struct sockaddr_in6 *)&addr->in_addr)->sin6_port));
}
return 0;
}
-static void addr_set_port(struct sockaddr_storage *ss, int p)
+static void addr_set_port(struct ceph_entity_addr *addr, int p)
{
- switch (ss->ss_family) {
+ switch (get_unaligned(&addr->in_addr.ss_family)) {
case AF_INET:
- ((struct sockaddr_in *)ss)->sin_port = htons(p);
+ put_unaligned(htons(p), &((struct sockaddr_in *)&addr->in_addr)->sin_port);
break;
case AF_INET6:
- ((struct sockaddr_in6 *)ss)->sin6_port = htons(p);
+ put_unaligned(htons(p), &((struct sockaddr_in6 *)&addr->in_addr)->sin6_port);
break;
}
}
@@ -1865,21 +1866,18 @@ static void addr_set_port(struct sockaddr_storage *ss, int p)
/*
* Unlike other *_pton function semantics, zero indicates success.
*/
-static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss,
+static int ceph_pton(const char *str, size_t len, struct ceph_entity_addr *addr,
char delim, const char **ipend)
{
- struct sockaddr_in *in4 = (struct sockaddr_in *) ss;
- struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) ss;
-
- memset(ss, 0, sizeof(*ss));
+ memset(&addr->in_addr, 0, sizeof(addr->in_addr));
- if (in4_pton(str, len, (u8 *)&in4->sin_addr.s_addr, delim, ipend)) {
- ss->ss_family = AF_INET;
+ if (in4_pton(str, len, (u8 *)&((struct sockaddr_in *)&addr->in_addr)->sin_addr.s_addr, delim, ipend)) {
+ put_unaligned(AF_INET, &addr->in_addr.ss_family);
return 0;
}
- if (in6_pton(str, len, (u8 *)&in6->sin6_addr.s6_addr, delim, ipend)) {
- ss->ss_family = AF_INET6;
+ if (in6_pton(str, len, (u8 *)&((struct sockaddr_in6 *)&addr->in_addr)->sin6_addr.s6_addr, delim, ipend)) {
+ put_unaligned(AF_INET6, &addr->in_addr.ss_family);
return 0;
}
@@ -1891,7 +1889,7 @@ static int ceph_pton(const char *str, size_t len, struct sockaddr_storage *ss,
*/
#ifdef CONFIG_CEPH_LIB_USE_DNS_RESOLVER
static int ceph_dns_resolve_name(const char *name, size_t namelen,
- struct sockaddr_storage *ss, char delim, const char **ipend)
+ struct ceph_entity_addr *addr, char delim, const char **ipend)
{
const char *end, *delim_p;
char *colon_p, *ip_addr = NULL;
@@ -1920,7 +1918,7 @@ static int ceph_dns_resolve_name(const char *name, size_t namelen,
/* do dns_resolve upcall */
ip_len = dns_query(NULL, name, end - name, NULL, &ip_addr, NULL);
if (ip_len > 0)
- ret = ceph_pton(ip_addr, ip_len, ss, -1, NULL);
+ ret = ceph_pton(ip_addr, ip_len, addr, -1, NULL);
else
ret = -ESRCH;
@@ -1929,13 +1927,13 @@ static int ceph_dns_resolve_name(const char *name, size_t namelen,
*ipend = end;
pr_info("resolve '%.*s' (ret=%d): %s\n", (int)(end - name), name,
- ret, ret ? "failed" : ceph_pr_addr(ss));
+ ret, ret ? "failed" : ceph_pr_addr(&addr->in_addr));
return ret;
}
#else
static inline int ceph_dns_resolve_name(const char *name, size_t namelen,
- struct sockaddr_storage *ss, char delim, const char **ipend)
+ struct ceph_entity_addr *addr, char delim, const char **ipend)
{
return -EINVAL;
}
@@ -1946,13 +1944,13 @@ static inline int ceph_dns_resolve_name(const char *name, size_t namelen,
* then try to extract a hostname to resolve using userspace DNS upcall.
*/
static int ceph_parse_server_name(const char *name, size_t namelen,
- struct sockaddr_storage *ss, char delim, const char **ipend)
+ struct ceph_entity_addr *addr, char delim, const char **ipend)
{
int ret;
- ret = ceph_pton(name, namelen, ss, delim, ipend);
+ ret = ceph_pton(name, namelen, addr, delim, ipend);
if (ret)
- ret = ceph_dns_resolve_name(name, namelen, ss, delim, ipend);
+ ret = ceph_dns_resolve_name(name, namelen, addr, delim, ipend);
return ret;
}
@@ -1971,7 +1969,6 @@ int ceph_parse_ips(const char *c, const char *end,
dout("parse_ips on '%.*s'\n", (int)(end-c), c);
for (i = 0; i < max_count; i++) {
const char *ipend;
- struct sockaddr_storage *ss = &addr[i].in_addr;
int port;
char delim = ',';
@@ -1980,7 +1977,7 @@ int ceph_parse_ips(const char *c, const char *end,
p++;
}
- ret = ceph_parse_server_name(p, end - p, ss, delim, &ipend);
+ ret = ceph_parse_server_name(p, end - p, &addr[i], delim, &ipend);
if (ret)
goto bad;
ret = -EINVAL;
@@ -2011,9 +2008,9 @@ int ceph_parse_ips(const char *c, const char *end,
port = CEPH_MON_PORT;
}
- addr_set_port(ss, port);
+ addr_set_port(&addr[i], port);
- dout("parse_ips got %s\n", ceph_pr_addr(ss));
+ dout("parse_ips got %s\n", ceph_pr_addr(&addr[i].in_addr));
if (p == end)
break;
@@ -2052,7 +2049,7 @@ static int process_banner(struct ceph_connection *con)
*/
if (memcmp(&con->peer_addr, &con->actual_peer_addr,
sizeof(con->peer_addr)) != 0 &&
- !(addr_is_blank(&con->actual_peer_addr.in_addr) &&
+ !(addr_is_blank(&con->actual_peer_addr) &&
con->actual_peer_addr.nonce == con->peer_addr.nonce)) {
pr_warn("wrong peer, want %s/%d, got %s/%d\n",
ceph_pr_addr(&con->peer_addr.in_addr),
@@ -2066,13 +2063,13 @@ static int process_banner(struct ceph_connection *con)
/*
* did we learn our address?
*/
- if (addr_is_blank(&con->msgr->inst.addr.in_addr)) {
- int port = addr_port(&con->msgr->inst.addr.in_addr);
+ if (addr_is_blank(&con->msgr->inst.addr)) {
+ int port = addr_port(&con->msgr->inst.addr);
memcpy(&con->msgr->inst.addr.in_addr,
&con->peer_addr_for_me.in_addr,
sizeof(con->peer_addr_for_me.in_addr));
- addr_set_port(&con->msgr->inst.addr.in_addr, port);
+ addr_set_port(&con->msgr->inst.addr, port);
encode_my_addr(con->msgr);
dout("process_banner learned my addr is %s\n",
ceph_pr_addr(&con->msgr->inst.addr.in_addr));
@@ -3037,6 +3034,11 @@ static void con_fault(struct ceph_connection *con)
ceph_msg_put(con->in_msg);
con->in_msg = NULL;
}
+ if (con->out_msg) {
+ BUG_ON(con->out_msg->con != con);
+ ceph_msg_put(con->out_msg);
+ con->out_msg = NULL;
+ }
/* Requeue anything that hasn't been acked */
list_splice_init(&con->out_sent, &con->out_queue);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 713fe1fbcb18..90ebb0ba927c 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -3137,17 +3137,24 @@ static int linger_reg_commit_wait(struct ceph_osd_linger_request *lreq)
int ret;
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
- ret = wait_for_completion_interruptible(&lreq->reg_commit_wait);
+ ret = wait_for_completion_killable(&lreq->reg_commit_wait);
return ret ?: lreq->reg_commit_error;
}
-static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq)
+static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq,
+ unsigned long timeout)
{
- int ret;
+ long left;
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
- ret = wait_for_completion_interruptible(&lreq->notify_finish_wait);
- return ret ?: lreq->notify_finish_error;
+ left = wait_for_completion_killable_timeout(&lreq->notify_finish_wait,
+ ceph_timeout_jiffies(timeout));
+ if (left <= 0)
+ left = left ?: -ETIMEDOUT;
+ else
+ left = lreq->notify_finish_error; /* completed */
+
+ return left;
}
/*
@@ -4760,7 +4767,8 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
ret = linger_reg_commit_wait(lreq);
if (!ret)
- ret = linger_notify_finish_wait(lreq);
+ ret = linger_notify_finish_wait(lreq,
+ msecs_to_jiffies(2 * timeout * MSEC_PER_SEC));
else
dout("lreq %p failed to initiate notify %d\n", lreq, ret);
diff --git a/net/compat.c b/net/compat.c
index 2a8c7cb5f06a..2778a236e091 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -158,7 +158,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
if (kcmlen > stackbuf_size)
kcmsg_base = kcmsg = sock_kmalloc(sk, kcmlen, GFP_KERNEL);
if (kcmsg == NULL)
- return -ENOBUFS;
+ return -ENOMEM;
/* Now copy them over neatly. */
memset(kcmsg, 0, kcmlen);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 865a8cb7b0bd..6ba82eb14b46 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -837,18 +837,21 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
{
struct sock *sk = sock->sk;
__poll_t mask;
+ u8 shutdown;
sock_poll_wait(file, sock, wait);
mask = 0;
/* exceptional events? */
- if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
+ if (READ_ONCE(sk->sk_err) ||
+ !skb_queue_empty_lockless(&sk->sk_error_queue))
mask |= EPOLLERR |
(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
- if (sk->sk_shutdown & RCV_SHUTDOWN)
+ shutdown = READ_ONCE(sk->sk_shutdown);
+ if (shutdown & RCV_SHUTDOWN)
mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
- if (sk->sk_shutdown == SHUTDOWN_MASK)
+ if (shutdown == SHUTDOWN_MASK)
mask |= EPOLLHUP;
/* readable? */
@@ -857,10 +860,12 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
/* Connection-based need to check for termination and startup */
if (connection_based(sk)) {
- if (sk->sk_state == TCP_CLOSE)
+ int state = READ_ONCE(sk->sk_state);
+
+ if (state == TCP_CLOSE)
mask |= EPOLLHUP;
/* connection hasn't started yet? */
- if (sk->sk_state == TCP_SYN_SENT)
+ if (state == TCP_SYN_SENT)
return mask;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index c77d12a35f92..b5c9648c2192 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2303,6 +2303,8 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
bool active = false;
unsigned int nr_ids;
+ WARN_ON_ONCE(index >= dev->num_tx_queues);
+
if (dev->num_tc) {
/* Do not allow XPS on subordinate device directly */
num_tc = dev->num_tc;
@@ -2648,6 +2650,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
if (dev->num_tc)
netif_setup_tc(dev, txq);
+ dev_qdisc_change_real_num_tx(dev, txq);
+
dev->real_num_tx_queues = txq;
if (disabling) {
@@ -2792,8 +2796,10 @@ void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
{
if (in_irq() || irqs_disabled())
__dev_kfree_skb_irq(skb, reason);
+ else if (unlikely(reason == SKB_REASON_DROPPED))
+ kfree_skb(skb);
else
- dev_kfree_skb(skb);
+ consume_skb(skb);
}
EXPORT_SYMBOL(__dev_kfree_skb_any);
@@ -2846,6 +2852,12 @@ static u16 skb_tx_hash(const struct net_device *dev,
qoffset = sb_dev->tc_to_txq[tc].offset;
qcount = sb_dev->tc_to_txq[tc].count;
+ if (unlikely(!qcount)) {
+ net_warn_ratelimited("%s: invalid qcount, qoffset %u for tc %u\n",
+ sb_dev->name, qoffset, tc);
+ qoffset = 0;
+ qcount = dev->real_num_tx_queues;
+ }
}
if (skb_rx_queue_recorded(skb)) {
@@ -3188,6 +3200,14 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb,
if (gso_segs > dev->gso_max_segs)
return features & ~NETIF_F_GSO_MASK;
+ if (unlikely(skb->len >= READ_ONCE(dev->gso_max_size)))
+ return features & ~NETIF_F_GSO_MASK;
+
+ if (!skb_shinfo(skb)->gso_type) {
+ skb_warn_bad_offload(skb);
+ return features & ~NETIF_F_GSO_MASK;
+ }
+
/* Support for GSO partial features requires software
* intervention before we can actually process the packets
* so we need to strip support for any partial features now
@@ -3823,7 +3843,10 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
if (dev->flags & IFF_UP) {
int cpu = smp_processor_id(); /* ok because BHs are off */
- if (txq->xmit_lock_owner != cpu) {
+ /* Other cpus might concurrently change txq->xmit_lock_owner
+ * to -1 or to their cpu id, but not to our id.
+ */
+ if (READ_ONCE(txq->xmit_lock_owner) != cpu) {
if (dev_xmit_recursion())
goto recursion_alert;
@@ -4049,8 +4072,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
u32 next_cpu;
u32 ident;
- /* First check into global flow table if there is a match */
- ident = sock_flow_table->ents[hash & sock_flow_table->mask];
+ /* First check into global flow table if there is a match.
+ * This READ_ONCE() pairs with WRITE_ONCE() from rps_record_sock_flow().
+ */
+ ident = READ_ONCE(sock_flow_table->ents[hash & sock_flow_table->mask]);
if ((ident ^ hash) & ~rps_cpu_mask)
goto try_rps;
@@ -4463,7 +4488,7 @@ static int netif_rx_internal(struct sk_buff *skb)
{
int ret;
- net_timestamp_check(netdev_tstamp_prequeue, skb);
+ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
trace_netif_rx(skb);
@@ -4783,7 +4808,7 @@ static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc,
int ret = NET_RX_DROP;
__be16 type;
- net_timestamp_check(!netdev_tstamp_prequeue, skb);
+ net_timestamp_check(!READ_ONCE(netdev_tstamp_prequeue), skb);
trace_netif_receive_skb(skb);
@@ -5135,7 +5160,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
{
int ret;
- net_timestamp_check(netdev_tstamp_prequeue, skb);
+ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
@@ -5165,7 +5190,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
- net_timestamp_check(netdev_tstamp_prequeue, skb);
+ net_timestamp_check(READ_ONCE(netdev_tstamp_prequeue), skb);
skb_list_del_init(skb);
if (!skb_defer_rx_timestamp(skb))
list_add_tail(&skb->list, &sublist);
@@ -5400,7 +5425,8 @@ static void skb_gro_reset_offset(struct sk_buff *skb)
if (skb_mac_header(skb) == skb_tail_pointer(skb) &&
pinfo->nr_frags &&
- !PageHighMem(skb_frag_page(frag0))) {
+ !PageHighMem(skb_frag_page(frag0)) &&
+ (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) {
NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
skb_frag_size(frag0),
@@ -5839,7 +5865,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
net_rps_action_and_irq_enable(sd);
}
- napi->weight = dev_rx_weight;
+ napi->weight = READ_ONCE(dev_rx_weight);
while (again) {
struct sk_buff *skb;
@@ -5931,11 +5957,18 @@ EXPORT_SYMBOL(napi_schedule_prep);
* __napi_schedule_irqoff - schedule for receive
* @n: entry to schedule
*
- * Variant of __napi_schedule() assuming hard irqs are masked
+ * Variant of __napi_schedule() assuming hard irqs are masked.
+ *
+ * On PREEMPT_RT enabled kernels this maps to __napi_schedule()
+ * because the interrupt disabled assumption might not be true
+ * due to force-threaded interrupts and spinlock substitution.
*/
void __napi_schedule_irqoff(struct napi_struct *n)
{
- ____napi_schedule(this_cpu_ptr(&softnet_data), n);
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ ____napi_schedule(this_cpu_ptr(&softnet_data), n);
+ else
+ __napi_schedule(n);
}
EXPORT_SYMBOL(__napi_schedule_irqoff);
@@ -6316,8 +6349,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
unsigned long time_limit = jiffies +
- usecs_to_jiffies(netdev_budget_usecs);
- int budget = netdev_budget;
+ usecs_to_jiffies(READ_ONCE(netdev_budget_usecs));
+ int budget = READ_ONCE(netdev_budget);
LIST_HEAD(list);
LIST_HEAD(repoll);
@@ -8351,6 +8384,11 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
}
}
+ if ((features & NETIF_F_HW_TLS_RX) && !(features & NETIF_F_RXCSUM)) {
+ netdev_dbg(dev, "Dropping TLS RX HW offload feature since no RXCSUM feature.\n");
+ features &= ~NETIF_F_HW_TLS_RX;
+ }
+
return features;
}
@@ -8980,9 +9018,7 @@ void netdev_run_todo(void)
BUG_ON(!list_empty(&dev->ptype_specific));
WARN_ON(rcu_access_pointer(dev->ip_ptr));
WARN_ON(rcu_access_pointer(dev->ip6_ptr));
-#if IS_ENABLED(CONFIG_DECNET)
- WARN_ON(dev->dn_ptr);
-#endif
+
if (dev->priv_destructor)
dev->priv_destructor(dev);
if (dev->needs_free_netdev)
@@ -9007,24 +9043,16 @@ void netdev_run_todo(void)
void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
const struct net_device_stats *netdev_stats)
{
-#if BITS_PER_LONG == 64
- BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
- memcpy(stats64, netdev_stats, sizeof(*netdev_stats));
- /* zero out counters that only exist in rtnl_link_stats64 */
- memset((char *)stats64 + sizeof(*netdev_stats), 0,
- sizeof(*stats64) - sizeof(*netdev_stats));
-#else
- size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long);
- const unsigned long *src = (const unsigned long *)netdev_stats;
+ size_t i, n = sizeof(*netdev_stats) / sizeof(atomic_long_t);
+ const atomic_long_t *src = (atomic_long_t *)netdev_stats;
u64 *dst = (u64 *)stats64;
BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
for (i = 0; i < n; i++)
- dst[i] = src[i];
+ dst[i] = (unsigned long)atomic_long_read(&src[i]);
/* zero out counters that only exist in rtnl_link_stats64 */
memset((char *)stats64 + n * sizeof(u64), 0,
sizeof(*stats64) - n * sizeof(u64));
-#endif
}
EXPORT_SYMBOL(netdev_stats_to_stats64);
@@ -9703,7 +9731,7 @@ static void __net_exit default_device_exit(struct net *net)
continue;
/* Leave virtual devices for the generic cleanup */
- if (dev->rtnl_link_ops)
+ if (dev->rtnl_link_ops && !dev->rtnl_link_ops->netns_refund)
continue;
/* Push remaining network devices to init_net */
diff --git a/net/core/devlink.c b/net/core/devlink.c
index a77e3777c8dd..6ad095264896 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1113,7 +1113,7 @@ static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
err = ops->sb_occ_port_pool_get(devlink_port, devlink_sb->index,
pool_index, &cur, &max);
if (err && err != -EOPNOTSUPP)
- return err;
+ goto sb_occ_get_failure;
if (!err) {
if (nla_put_u32(msg, DEVLINK_ATTR_SB_OCC_CUR, cur))
goto nla_put_failure;
@@ -1126,8 +1126,10 @@ static int devlink_nl_sb_port_pool_fill(struct sk_buff *msg,
return 0;
nla_put_failure:
+ err = -EMSGSIZE;
+sb_occ_get_failure:
genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
+ return err;
}
static int devlink_nl_cmd_sb_port_pool_get_doit(struct sk_buff *skb,
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 3978a5e8d261..52e559252a9e 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -122,7 +122,7 @@ out:
}
static const struct genl_multicast_group dropmon_mcgrps[] = {
- { .name = "events", },
+ { .name = "events", .cap_sys_admin = 1 },
};
static void send_dm_alert(struct work_struct *work)
@@ -219,13 +219,17 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
rcu_read_lock();
list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
+ struct net_device *dev;
+
/*
* only add a note to our monitor buffer if:
* 1) this is the dev we received on
* 2) its after the last_rx delta
* 3) our rx_dropped count has gone up
*/
- if ((new_stat->dev == napi->dev) &&
+ /* Paired with WRITE_ONCE() in dropmon_net_event() */
+ dev = READ_ONCE(new_stat->dev);
+ if ((dev == napi->dev) &&
(time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
(napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
trace_drop_common(NULL, NULL);
@@ -340,7 +344,10 @@ static int dropmon_net_event(struct notifier_block *ev_block,
mutex_lock(&trace_state_mutex);
list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
if (new_stat->dev == dev) {
- new_stat->dev = NULL;
+
+ /* Paired with READ_ONCE() in trace_napi_poll_hit() */
+ WRITE_ONCE(new_stat->dev, NULL);
+
if (trace_state == TRACE_OFF) {
list_del_rcu(&new_stat->list);
kfree_rcu(new_stat, rcu);
@@ -363,10 +370,12 @@ static const struct genl_ops dropmon_ops[] = {
{
.cmd = NET_DM_CMD_START,
.doit = net_dm_cmd_trace,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NET_DM_CMD_STOP,
.doit = net_dm_cmd_trace,
+ .flags = GENL_ADMIN_PERM,
},
};
diff --git a/net/core/dst.c b/net/core/dst.c
index 81ccf20e2826..1b1677683b97 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -97,10 +97,10 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
{
struct dst_entry *dst;
- if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
- if (ops->gc(ops))
- return NULL;
- }
+ if (ops->gc &&
+ !(flags & DST_NOCOUNT) &&
+ dst_entries_get_fast(ops) > ops->gc_thresh)
+ ops->gc(ops);
dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
if (!dst)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 1011625a0ca4..d007f1cca64c 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -618,7 +618,7 @@ store_link_ksettings_for_user(void __user *to,
{
struct ethtool_link_usettings link_usettings;
- memcpy(&link_usettings.base, &from->base, sizeof(link_usettings));
+ memcpy(&link_usettings, from, sizeof(link_usettings));
bitmap_to_arr32(link_usettings.link_modes.supported,
from->link_modes.supported,
__ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -1594,7 +1594,7 @@ static int ethtool_get_any_eeprom(struct net_device *dev, void __user *useraddr,
if (eeprom.offset + eeprom.len > total_len)
return -EINVAL;
- data = kmalloc(PAGE_SIZE, GFP_USER);
+ data = kzalloc(PAGE_SIZE, GFP_USER);
if (!data)
return -ENOMEM;
@@ -1659,7 +1659,7 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev))
return -EINVAL;
- data = kmalloc(PAGE_SIZE, GFP_USER);
+ data = kzalloc(PAGE_SIZE, GFP_USER);
if (!data)
return -ENOMEM;
@@ -1840,7 +1840,7 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
return -EFAULT;
test.len = test_len;
- data = kmalloc_array(test_len, sizeof(u64), GFP_USER);
+ data = kcalloc(test_len, sizeof(u64), GFP_USER);
if (!data)
return -ENOMEM;
@@ -2023,7 +2023,8 @@ static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
return n_stats;
if (n_stats > S32_MAX / sizeof(u64))
return -ENOMEM;
- WARN_ON_ONCE(!n_stats);
+ if (WARN_ON_ONCE(!n_stats))
+ return -EOPNOTSUPP;
if (copy_from_user(&stats, useraddr, sizeof(stats)))
return -EFAULT;
@@ -2372,7 +2373,7 @@ static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr)
ret = ethtool_tunable_valid(&tuna);
if (ret)
return ret;
- data = kmalloc(tuna.len, GFP_USER);
+ data = kzalloc(tuna.len, GFP_USER);
if (!data)
return -ENOMEM;
ret = ops->get_tunable(dev, &tuna, data);
@@ -2552,7 +2553,7 @@ static int get_phy_tunable(struct net_device *dev, void __user *useraddr)
ret = ethtool_phy_tunable_valid(&tuna);
if (ret)
return ret;
- data = kmalloc(tuna.len, GFP_USER);
+ data = kzalloc(tuna.len, GFP_USER);
if (!data)
return -ENOMEM;
mutex_lock(&phydev->lock);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 8916c5d9b3b3..46a13ed15c4e 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -1105,7 +1105,7 @@ static void notify_rule_change(int event, struct fib_rule *rule,
{
struct net *net;
struct sk_buff *skb;
- int err = -ENOBUFS;
+ int err = -ENOMEM;
net = ops->fro_net;
skb = nlmsg_new(fib_rule_nlmsg_size(ops, rule), GFP_KERNEL);
diff --git a/net/core/filter.c b/net/core/filter.c
index 557bd5cc8f94..dea7132f3813 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1666,7 +1666,7 @@ BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
return -EINVAL;
- if (unlikely(offset > 0xffff))
+ if (unlikely(offset > INT_MAX))
return -EFAULT;
if (unlikely(bpf_try_make_writable(skb, offset + len)))
return -EFAULT;
@@ -1701,7 +1701,7 @@ BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
{
void *ptr;
- if (unlikely(offset > 0xffff))
+ if (unlikely(offset > INT_MAX))
goto err_clear;
ptr = skb_header_pointer(skb, offset, len, to);
@@ -2025,6 +2025,10 @@ static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
if (mlen) {
__skb_pull(skb, mlen);
+ if (unlikely(!skb->len)) {
+ kfree_skb(skb);
+ return -ERANGE;
+ }
/* At ingress, the mac header has already been pulled once.
* At egress, skb_pospull_rcsum has to be done in case that
@@ -2561,15 +2565,18 @@ static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
{
+ void *old_data;
+
/* skb_ensure_writable() is not needed here, as we're
* already working on an uncloned skb.
*/
if (unlikely(!pskb_may_pull(skb, off + len)))
return -ENOMEM;
- skb_postpull_rcsum(skb, skb->data + off, len);
- memmove(skb->data + len, skb->data, off);
+ old_data = skb->data;
__skb_pull(skb, len);
+ skb_postpull_rcsum(skb, old_data + off, len);
+ memmove(skb->data, old_data, off);
return 0;
}
@@ -2639,8 +2646,6 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
shinfo->gso_type |= SKB_GSO_TCPV6;
}
- /* Due to IPv6 header, MSS needs to be downgraded. */
- skb_decrease_gso_size(shinfo, len_diff);
/* Header must be checked, and gso_segs recomputed. */
shinfo->gso_type |= SKB_GSO_DODGY;
shinfo->gso_segs = 0;
@@ -2680,8 +2685,6 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
shinfo->gso_type |= SKB_GSO_TCPV4;
}
- /* Due to IPv4 header, MSS can be upgraded. */
- skb_increase_gso_size(shinfo, len_diff);
/* Header must be checked, and gso_segs recomputed. */
shinfo->gso_type |= SKB_GSO_DODGY;
shinfo->gso_segs = 0;
@@ -2836,18 +2839,14 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
return 0;
}
-static u32 __bpf_skb_max_len(const struct sk_buff *skb)
-{
- return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len :
- SKB_MAX_ALLOC;
-}
+#define BPF_SKB_MAX_LEN SKB_MAX_ALLOC
static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
{
bool trans_same = skb->transport_header == skb->network_header;
u32 len_cur, len_diff_abs = abs(len_diff);
u32 len_min = bpf_skb_net_base_len(skb);
- u32 len_max = __bpf_skb_max_len(skb);
+ u32 len_max = BPF_SKB_MAX_LEN;
__be16 proto = skb_protocol(skb, true);
bool shrink = len_diff < 0;
int ret;
@@ -2926,7 +2925,7 @@ static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
u64 flags)
{
- u32 max_len = __bpf_skb_max_len(skb);
+ u32 max_len = BPF_SKB_MAX_LEN;
u32 min_len = __bpf_skb_min_len(skb);
int ret;
@@ -3002,7 +3001,7 @@ static const struct bpf_func_proto sk_skb_change_tail_proto = {
static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
u64 flags)
{
- u32 max_len = __bpf_skb_max_len(skb);
+ u32 max_len = BPF_SKB_MAX_LEN;
u32 new_len = skb->len + head_room;
int ret;
@@ -3024,6 +3023,7 @@ static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
__skb_push(skb, head_room);
memset(skb->data, 0, head_room);
skb_reset_mac_header(skb);
+ skb_reset_mac_len(skb);
}
return ret;
@@ -4235,7 +4235,6 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
params->h_vlan_TCI = 0;
params->h_vlan_proto = 0;
- params->ifindex = dev->ifindex;
return 0;
}
@@ -4333,6 +4332,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->ipv4_dst = nh->nh_gw;
params->rt_metric = res.fi->fib_priority;
+ params->ifindex = dev->ifindex;
/* xdp and cls_bpf programs are run in RCU-bh so
* rcu_read_lock_bh is not needed here
@@ -4447,6 +4447,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
dev = f6i->fib6_nh.nh_dev;
params->rt_metric = f6i->fib6_metric;
+ params->ifindex = dev->ifindex;
/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
* not needed here. Can not use __ipv6_neigh_lookup_noref here
@@ -4500,6 +4501,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
{
struct net *net = dev_net(skb->dev);
int rc = -EAFNOSUPPORT;
+ bool check_mtu = false;
if (plen < sizeof(*params))
return -EINVAL;
@@ -4507,22 +4509,28 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
return -EINVAL;
+ if (params->tot_len)
+ check_mtu = true;
+
switch (params->family) {
#if IS_ENABLED(CONFIG_INET)
case AF_INET:
- rc = bpf_ipv4_fib_lookup(net, params, flags, false);
+ rc = bpf_ipv4_fib_lookup(net, params, flags, check_mtu);
break;
#endif
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- rc = bpf_ipv6_fib_lookup(net, params, flags, false);
+ rc = bpf_ipv6_fib_lookup(net, params, flags, check_mtu);
break;
#endif
}
- if (!rc) {
+ if (rc == BPF_FIB_LKUP_RET_SUCCESS && !check_mtu) {
struct net_device *dev;
+ /* When tot_len isn't provided by user, check skb
+ * against MTU of FIB lookup resulting net_device
+ */
dev = dev_get_by_index_rcu(net, params->ifindex);
if (!is_skb_forwardable(dev, skb))
rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
@@ -4570,7 +4578,6 @@ static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len
if (err)
return err;
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
return seg6_lookup_nexthop(skb, NULL, 0);
@@ -5534,9 +5541,9 @@ void bpf_warn_invalid_xdp_action(u32 act)
{
const u32 act_max = XDP_REDIRECT;
- WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n",
- act > act_max ? "Illegal" : "Driver unsupported",
- act);
+ pr_warn_once("%s XDP return value %u, expect packet loss!\n",
+ act > act_max ? "Illegal" : "Driver unsupported",
+ act);
}
EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 994dd1520f07..da860a680256 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -694,8 +694,10 @@ proto_again:
FLOW_DISSECTOR_KEY_IPV4_ADDRS,
target_container);
- memcpy(&key_addrs->v4addrs, &iph->saddr,
- sizeof(key_addrs->v4addrs));
+ memcpy(&key_addrs->v4addrs.src, &iph->saddr,
+ sizeof(key_addrs->v4addrs.src));
+ memcpy(&key_addrs->v4addrs.dst, &iph->daddr,
+ sizeof(key_addrs->v4addrs.dst));
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
}
@@ -744,8 +746,10 @@ proto_again:
FLOW_DISSECTOR_KEY_IPV6_ADDRS,
target_container);
- memcpy(&key_addrs->v6addrs, &iph->saddr,
- sizeof(key_addrs->v6addrs));
+ memcpy(&key_addrs->v6addrs.src, &iph->saddr,
+ sizeof(key_addrs->v6addrs.src));
+ memcpy(&key_addrs->v6addrs.dst, &iph->daddr,
+ sizeof(key_addrs->v6addrs.dst));
key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
}
@@ -823,6 +827,7 @@ proto_again:
VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
}
key_vlan->vlan_tpid = saved_vlan_tpid;
+ key_vlan->vlan_eth_type = proto;
}
fdret = FLOW_DISSECT_RET_PROTO_AGAIN;
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index e4e442d70c2d..752744db11ff 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -84,11 +84,11 @@ static void est_timer(struct timer_list *t)
u64 rate, brate;
est_fetch_counters(est, &b);
- brate = (b.bytes - est->last_bytes) << (10 - est->ewma_log - est->intvl_log);
- brate -= (est->avbps >> est->ewma_log);
+ brate = (b.bytes - est->last_bytes) << (10 - est->intvl_log);
+ brate = (brate >> est->ewma_log) - (est->avbps >> est->ewma_log);
- rate = (u64)(b.packets - est->last_packets) << (10 - est->ewma_log - est->intvl_log);
- rate -= (est->avpps >> est->ewma_log);
+ rate = (u64)(b.packets - est->last_packets) << (10 - est->intvl_log);
+ rate = (rate >> est->ewma_log) - (est->avpps >> est->ewma_log);
write_seqcount_begin(&est->seq);
est->avbps += brate;
@@ -147,6 +147,9 @@ int gen_new_estimator(struct gnet_stats_basic_packed *bstats,
if (parm->interval < -2 || parm->interval > 3)
return -EINVAL;
+ if (parm->ewma_log == 0 || parm->ewma_log >= 31)
+ return -EINVAL;
+
est = kzalloc(sizeof(*est), GFP_KERNEL);
if (!est)
return -ENOBUFS;
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index a648568c5e8f..4a5f4fbffd83 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -44,12 +44,11 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
{
int ret;
- /* Preempt disable is needed to protect per-cpu redirect_info between
- * BPF prog and skb_do_redirect(). The call_rcu in bpf_prog_put() and
- * access to maps strictly require a rcu_read_lock() for protection,
- * mixing with BH RCU lock doesn't work.
+ /* Preempt disable and BH disable are needed to protect per-cpu
+ * redirect_info between BPF prog and skb_do_redirect().
*/
preempt_disable();
+ local_bh_disable();
bpf_compute_data_pointers(skb);
ret = bpf_prog_run_save_cb(lwt->prog, skb);
@@ -82,6 +81,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
break;
}
+ local_bh_enable();
preempt_enable();
return ret;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 6e890f51b7d8..5b6f3175d55b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -224,11 +224,26 @@ static int neigh_del_timer(struct neighbour *n)
return 0;
}
-static void pneigh_queue_purge(struct sk_buff_head *list)
+static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net)
{
+ struct sk_buff_head tmp;
+ unsigned long flags;
struct sk_buff *skb;
- while ((skb = skb_dequeue(list)) != NULL) {
+ skb_queue_head_init(&tmp);
+ spin_lock_irqsave(&list->lock, flags);
+ skb = skb_peek(list);
+ while (skb != NULL) {
+ struct sk_buff *skb_next = skb_peek_next(skb, list);
+ if (net == NULL || net_eq(dev_net(skb->dev), net)) {
+ __skb_unlink(skb, list);
+ __skb_queue_tail(&tmp, skb);
+ }
+ skb = skb_next;
+ }
+ spin_unlock_irqrestore(&list->lock, flags);
+
+ while ((skb = __skb_dequeue(&tmp))) {
dev_put(skb->dev);
kfree_skb(skb);
}
@@ -297,9 +312,9 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
write_lock_bh(&tbl->lock);
neigh_flush_dev(tbl, dev);
pneigh_ifdown_and_unlock(tbl, dev);
-
- del_timer_sync(&tbl->proxy_timer);
- pneigh_queue_purge(&tbl->proxy_queue);
+ pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL);
+ if (skb_queue_empty_lockless(&tbl->proxy_queue))
+ del_timer_sync(&tbl->proxy_timer);
return 0;
}
EXPORT_SYMBOL(neigh_ifdown);
@@ -461,37 +476,6 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
}
EXPORT_SYMBOL(neigh_lookup);
-struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
- const void *pkey)
-{
- struct neighbour *n;
- unsigned int key_len = tbl->key_len;
- u32 hash_val;
- struct neigh_hash_table *nht;
-
- NEIGH_CACHE_STAT_INC(tbl, lookups);
-
- rcu_read_lock_bh();
- nht = rcu_dereference_bh(tbl->nht);
- hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
-
- for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
- n != NULL;
- n = rcu_dereference_bh(n->next)) {
- if (!memcmp(n->primary_key, pkey, key_len) &&
- net_eq(dev_net(n->dev), net)) {
- if (!refcount_inc_not_zero(&n->refcnt))
- n = NULL;
- NEIGH_CACHE_STAT_INC(tbl, hits);
- break;
- }
- }
-
- rcu_read_unlock_bh();
- return n;
-}
-EXPORT_SYMBOL(neigh_lookup_nodev);
-
struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
struct net_device *dev, bool want_ref)
{
@@ -635,7 +619,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
ASSERT_RTNL();
- n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
+ n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL);
if (!n)
goto out;
@@ -1271,7 +1255,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
* we can reinject the packet there.
*/
n2 = NULL;
- if (dst) {
+ if (dst && dst->obsolete != DST_OBSOLETE_DEAD) {
n2 = dst_neigh_lookup_skb(dst, skb);
if (n2)
n1 = n2;
@@ -1614,7 +1598,7 @@ int neigh_table_clear(int index, struct neigh_table *tbl)
/* It is not clean... Fix it to unload IPv6 module safely */
cancel_delayed_work_sync(&tbl->gc_work);
del_timer_sync(&tbl->proxy_timer);
- pneigh_queue_purge(&tbl->proxy_queue);
+ pneigh_queue_purge(&tbl->proxy_queue, NULL);
neigh_ifdown(tbl, NULL);
if (atomic_read(&tbl->entries))
pr_crit("neighbour leakage\n");
@@ -1646,9 +1630,6 @@ static struct neigh_table *neigh_find_table(int family)
case AF_INET6:
tbl = neigh_tables[NEIGH_ND_TABLE];
break;
- case AF_DECnet:
- tbl = neigh_tables[NEIGH_DN_TABLE];
- break;
}
return tbl;
diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
index 63881f72ef71..2808c5f9c1f0 100644
--- a/net/core/net-procfs.c
+++ b/net/core/net-procfs.c
@@ -182,12 +182,23 @@ static const struct seq_operations softnet_seq_ops = {
.show = softnet_seq_show,
};
-static void *ptype_get_idx(loff_t pos)
+static void *ptype_get_idx(struct seq_file *seq, loff_t pos)
{
+ struct list_head *ptype_list = NULL;
struct packet_type *pt = NULL;
+ struct net_device *dev;
loff_t i = 0;
int t;
+ for_each_netdev_rcu(seq_file_net(seq), dev) {
+ ptype_list = &dev->ptype_all;
+ list_for_each_entry_rcu(pt, ptype_list, list) {
+ if (i == pos)
+ return pt;
+ ++i;
+ }
+ }
+
list_for_each_entry_rcu(pt, &ptype_all, list) {
if (i == pos)
return pt;
@@ -208,22 +219,40 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
rcu_read_lock();
- return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
+ return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
}
static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ struct net_device *dev;
struct packet_type *pt;
struct list_head *nxt;
int hash;
++*pos;
if (v == SEQ_START_TOKEN)
- return ptype_get_idx(0);
+ return ptype_get_idx(seq, 0);
pt = v;
nxt = pt->list.next;
+ if (pt->dev) {
+ if (nxt != &pt->dev->ptype_all)
+ goto found;
+
+ dev = pt->dev;
+ for_each_netdev_continue_rcu(seq_file_net(seq), dev) {
+ if (!list_empty(&dev->ptype_all)) {
+ nxt = dev->ptype_all.next;
+ goto found;
+ }
+ }
+
+ nxt = ptype_all.next;
+ goto ptype_all;
+ }
+
if (pt->type == htons(ETH_P_ALL)) {
+ptype_all:
if (nxt != &ptype_all)
goto found;
hash = 0;
@@ -252,7 +281,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
if (v == SEQ_START_TOKEN)
seq_puts(seq, "Type Device Function\n");
- else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
+ else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) &&
+ (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) {
if (pt->type == htons(ETH_P_ALL))
seq_puts(seq, "ALL ");
else
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 001d7f07e780..7a11b2d90975 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -203,7 +203,7 @@ static ssize_t speed_show(struct device *dev,
if (!rtnl_trylock())
return restart_syscall();
- if (netif_running(netdev)) {
+ if (netif_running(netdev) && netif_device_present(netdev)) {
struct ethtool_link_ksettings cmd;
if (!__ethtool_get_link_ksettings(netdev, &cmd))
@@ -1244,8 +1244,8 @@ static const struct attribute_group dql_group = {
static ssize_t xps_cpus_show(struct netdev_queue *queue,
char *buf)
{
+ int cpu, len, ret, num_tc = 1, tc = 0;
struct net_device *dev = queue->dev;
- int cpu, len, num_tc = 1, tc = 0;
struct xps_dev_maps *dev_maps;
cpumask_var_t mask;
unsigned long index;
@@ -1255,22 +1255,31 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
index = get_netdev_queue_index(queue);
+ if (!rtnl_trylock())
+ return restart_syscall();
+
if (dev->num_tc) {
/* Do not allow XPS on subordinate device directly */
num_tc = dev->num_tc;
- if (num_tc < 0)
- return -EINVAL;
+ if (num_tc < 0) {
+ ret = -EINVAL;
+ goto err_rtnl_unlock;
+ }
/* If queue belongs to subordinate dev use its map */
dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev;
tc = netdev_txq_to_tc(dev, index);
- if (tc < 0)
- return -EINVAL;
+ if (tc < 0) {
+ ret = -EINVAL;
+ goto err_rtnl_unlock;
+ }
}
- if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
- return -ENOMEM;
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto err_rtnl_unlock;
+ }
rcu_read_lock();
dev_maps = rcu_dereference(dev->xps_cpus_map);
@@ -1293,9 +1302,15 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
}
rcu_read_unlock();
+ rtnl_unlock();
+
len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask));
free_cpumask_var(mask);
return len < PAGE_SIZE ? len : -EINVAL;
+
+err_rtnl_unlock:
+ rtnl_unlock();
+ return ret;
}
static ssize_t xps_cpus_store(struct netdev_queue *queue,
@@ -1323,7 +1338,13 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue,
return err;
}
+ if (!rtnl_trylock()) {
+ free_cpumask_var(mask);
+ return restart_syscall();
+ }
+
err = netif_set_xps_queue(dev, mask, index);
+ rtnl_unlock();
free_cpumask_var(mask);
@@ -1335,23 +1356,30 @@ static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init
static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
{
+ int j, len, ret, num_tc = 1, tc = 0;
struct net_device *dev = queue->dev;
struct xps_dev_maps *dev_maps;
unsigned long *mask, index;
- int j, len, num_tc = 1, tc = 0;
index = get_netdev_queue_index(queue);
+ if (!rtnl_trylock())
+ return restart_syscall();
+
if (dev->num_tc) {
num_tc = dev->num_tc;
tc = netdev_txq_to_tc(dev, index);
- if (tc < 0)
- return -EINVAL;
+ if (tc < 0) {
+ ret = -EINVAL;
+ goto err_rtnl_unlock;
+ }
}
mask = kcalloc(BITS_TO_LONGS(dev->num_rx_queues), sizeof(long),
GFP_KERNEL);
- if (!mask)
- return -ENOMEM;
+ if (!mask) {
+ ret = -ENOMEM;
+ goto err_rtnl_unlock;
+ }
rcu_read_lock();
dev_maps = rcu_dereference(dev->xps_rxqs_map);
@@ -1377,10 +1405,16 @@ static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf)
out_no_maps:
rcu_read_unlock();
+ rtnl_unlock();
+
len = bitmap_print_to_pagebuf(false, buf, mask, dev->num_rx_queues);
kfree(mask);
return len < PAGE_SIZE ? len : -EINVAL;
+
+err_rtnl_unlock:
+ rtnl_unlock();
+ return ret;
}
static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
@@ -1407,10 +1441,17 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf,
return err;
}
+ if (!rtnl_trylock()) {
+ bitmap_free(mask);
+ return restart_syscall();
+ }
+
cpus_read_lock();
err = __netif_set_xps_queue(dev, mask, index, true);
cpus_read_unlock();
+ rtnl_unlock();
+
kfree(mask);
return err ? : len;
}
@@ -1575,6 +1616,9 @@ static void remove_queue_kobjects(struct net_device *dev)
net_rx_queue_update_kobjects(dev, real_rx, 0);
netdev_queue_update_kobjects(dev, real_tx, 0);
+
+ dev->real_num_rx_queues = 0;
+ dev->real_num_tx_queues = 0;
#ifdef CONFIG_SYSFS
kset_unregister(dev->queues_kset);
#endif
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index c60123dff803..a87774424829 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -112,6 +112,7 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
static int ops_init(const struct pernet_operations *ops, struct net *net)
{
+ struct net_generic *ng;
int err = -ENOMEM;
void *data = NULL;
@@ -130,6 +131,12 @@ static int ops_init(const struct pernet_operations *ops, struct net *net)
if (!err)
return 0;
+ if (ops->id && ops->size) {
+ ng = rcu_dereference_protected(net->gen,
+ lockdep_is_held(&pernet_ops_rwsem));
+ ng->ptr[*ops->id] = NULL;
+ }
+
cleanup:
kfree(data);
@@ -149,8 +156,10 @@ static void ops_exit_list(const struct pernet_operations *ops,
{
struct net *net;
if (ops->exit) {
- list_for_each_entry(net, net_exit_list, exit_list)
+ list_for_each_entry(net, net_exit_list, exit_list) {
ops->exit(net);
+ cond_resched();
+ }
}
if (ops->exit_batch)
ops->exit_batch(net_exit_list);
@@ -192,9 +201,9 @@ static int net_eq_idr(int id, void *net, void *peer)
return 0;
}
-/* Should be called with nsid_lock held. If a new id is assigned, the bool alloc
- * is set to true, thus the caller knows that the new id must be notified via
- * rtnl.
+/* Must be called from RCU-critical section or with nsid_lock held. If
+ * a new id is assigned, the bool alloc is set to true, thus the
+ * caller knows that the new id must be notified via rtnl.
*/
static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc)
{
@@ -218,7 +227,7 @@ static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc)
return NETNSA_NSID_NOT_ASSIGNED;
}
-/* should be called with nsid_lock held */
+/* Must be called from RCU-critical section or with nsid_lock held */
static int __peernet2id(struct net *net, struct net *peer)
{
bool no = false;
@@ -261,9 +270,10 @@ int peernet2id(struct net *net, struct net *peer)
{
int id;
- spin_lock_bh(&net->nsid_lock);
+ rcu_read_lock();
id = __peernet2id(net, peer);
- spin_unlock_bh(&net->nsid_lock);
+ rcu_read_unlock();
+
return id;
}
EXPORT_SYMBOL(peernet2id);
@@ -598,6 +608,18 @@ void __put_net(struct net *net)
}
EXPORT_SYMBOL_GPL(__put_net);
+/**
+ * get_net_ns - increment the refcount of the network namespace
+ * @ns: common namespace (net)
+ *
+ * Returns the net's common namespace.
+ */
+struct ns_common *get_net_ns(struct ns_common *ns)
+{
+ return &get_net(container_of(ns, struct net, ns))->ns;
+}
+EXPORT_SYMBOL_GPL(get_net_ns);
+
struct net *get_net_ns_by_fd(int fd)
{
struct file *file;
@@ -825,6 +847,7 @@ struct rtnl_net_dump_cb {
int s_idx;
};
+/* Runs in RCU-critical section. */
static int rtnl_net_dumpid_one(int id, void *peer, void *data)
{
struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
@@ -855,9 +878,9 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
.s_idx = cb->args[0],
};
- spin_lock_bh(&net->nsid_lock);
+ rcu_read_lock();
idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb);
- spin_unlock_bh(&net->nsid_lock);
+ rcu_read_unlock();
cb->args[0] = net_cb.idx;
return skb->len;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 023ce0fbb496..08f0da9e6a80 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <linux/if_vlan.h>
+#include <net/dsa.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/addrconf.h>
@@ -135,6 +136,20 @@ static void queue_process(struct work_struct *work)
}
}
+static int netif_local_xmit_active(struct net_device *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+
+ if (READ_ONCE(txq->xmit_lock_owner) == smp_processor_id())
+ return 1;
+ }
+
+ return 0;
+}
+
static void poll_one_napi(struct napi_struct *napi)
{
int work;
@@ -181,7 +196,10 @@ void netpoll_poll_dev(struct net_device *dev)
if (!ni || down_trylock(&ni->dev_lock))
return;
- if (!netif_running(dev)) {
+ /* Some drivers will take the same locks in poll and xmit,
+ * we can't poll if local CPU is already in xmit.
+ */
+ if (!netif_running(dev) || netif_local_xmit_active(dev)) {
up(&ni->dev_lock);
return;
}
@@ -638,15 +656,15 @@ EXPORT_SYMBOL_GPL(__netpoll_setup);
int netpoll_setup(struct netpoll *np)
{
- struct net_device *ndev = NULL;
+ struct net_device *ndev = NULL, *dev = NULL;
+ struct net *net = current->nsproxy->net_ns;
struct in_device *in_dev;
int err;
rtnl_lock();
- if (np->dev_name[0]) {
- struct net *net = current->nsproxy->net_ns;
+ if (np->dev_name[0])
ndev = __dev_get_by_name(net, np->dev_name);
- }
+
if (!ndev) {
np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
err = -ENODEV;
@@ -654,6 +672,19 @@ int netpoll_setup(struct netpoll *np)
}
dev_hold(ndev);
+ /* bring up DSA management network devices up first */
+ for_each_netdev(net, dev) {
+ if (!netdev_uses_dsa(dev))
+ continue;
+
+ err = dev_change_flags(dev, dev->flags | IFF_UP);
+ if (err < 0) {
+ np_err(np, "%s failed to open %s\n",
+ np->dev_name, dev->name);
+ goto put;
+ }
+ }
+
if (netdev_master_upper_dev_get(ndev)) {
np_err(np, "%s is a slave device, aborting\n", np->dev_name);
err = -EBUSY;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 092fa3d75b32..3ade60ec4512 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -651,19 +651,19 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
seq_puts(seq, " Flags: ");
for (i = 0; i < NR_PKT_FLAGS; i++) {
- if (i == F_FLOW_SEQ)
+ if (i == FLOW_SEQ_SHIFT)
if (!pkt_dev->cflows)
continue;
- if (pkt_dev->flags & (1 << i))
+ if (pkt_dev->flags & (1 << i)) {
seq_printf(seq, "%s ", pkt_flag_names[i]);
- else if (i == F_FLOW_SEQ)
- seq_puts(seq, "FLOW_RND ");
-
#ifdef CONFIG_XFRM
- if (i == F_IPSEC && pkt_dev->spi)
- seq_printf(seq, "spi:%u", pkt_dev->spi);
+ if (i == IPSEC_SHIFT && pkt_dev->spi)
+ seq_printf(seq, "spi:%u ", pkt_dev->spi);
#endif
+ } else if (i == FLOW_SEQ_SHIFT) {
+ seq_puts(seq, "FLOW_RND ");
+ }
}
seq_puts(seq, "\n");
@@ -3471,7 +3471,7 @@ static int pktgen_thread_worker(void *arg)
struct pktgen_dev *pkt_dev = NULL;
int cpu = t->cpu;
- BUG_ON(smp_processor_id() != cpu);
+ WARN_ON(smp_processor_id() != cpu);
init_waitqueue_head(&t->queue);
complete(&t->start_done);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 935053ee7765..0d3f724da78b 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -130,6 +130,12 @@ int rtnl_is_locked(void)
}
EXPORT_SYMBOL(rtnl_is_locked);
+bool refcount_dec_and_rtnl_lock(refcount_t *r)
+{
+ return refcount_dec_and_mutex_lock(r, &rtnl_mutex);
+}
+EXPORT_SYMBOL(refcount_dec_and_rtnl_lock);
+
#ifdef CONFIG_PROVE_LOCKING
bool lockdep_rtnl_is_held(void)
{
@@ -2936,9 +2942,9 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
{
struct net *net = sock_net(skb->sk);
const struct rtnl_link_ops *ops;
- const struct rtnl_link_ops *m_ops = NULL;
+ const struct rtnl_link_ops *m_ops;
struct net_device *dev;
- struct net_device *master_dev = NULL;
+ struct net_device *master_dev;
struct ifinfomsg *ifm;
char kind[MODULE_NAME_LEN];
char ifname[IFNAMSIZ];
@@ -2964,15 +2970,20 @@ replay:
ifname[0] = '\0';
ifm = nlmsg_data(nlh);
- if (ifm->ifi_index > 0)
+ if (ifm->ifi_index > 0) {
dev = __dev_get_by_index(net, ifm->ifi_index);
- else {
+ } else if (ifm->ifi_index < 0) {
+ NL_SET_ERR_MSG(extack, "ifindex can't be negative");
+ return -EINVAL;
+ } else {
if (ifname[0])
dev = __dev_get_by_name(net, ifname);
else
dev = NULL;
}
+ master_dev = NULL;
+ m_ops = NULL;
if (dev) {
master_dev = netdev_master_upper_dev_get(dev);
if (master_dev)
@@ -3428,7 +3439,7 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
ndm->ndm_ifindex = dev->ifindex;
ndm->ndm_state = ndm_state;
- if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr))
+ if (nla_put(skb, NDA_LLADDR, dev->addr_len, addr))
goto nla_put_failure;
if (vid)
if (nla_put(skb, NDA_VLAN, sizeof(u16), &vid))
@@ -3442,10 +3453,10 @@ nla_put_failure:
return -EMSGSIZE;
}
-static inline size_t rtnl_fdb_nlmsg_size(void)
+static inline size_t rtnl_fdb_nlmsg_size(const struct net_device *dev)
{
return NLMSG_ALIGN(sizeof(struct ndmsg)) +
- nla_total_size(ETH_ALEN) + /* NDA_LLADDR */
+ nla_total_size(dev->addr_len) + /* NDA_LLADDR */
nla_total_size(sizeof(u16)) + /* NDA_VLAN */
0;
}
@@ -3457,7 +3468,7 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type,
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(rtnl_fdb_nlmsg_size(), GFP_ATOMIC);
+ skb = nlmsg_new(rtnl_fdb_nlmsg_size(dev), GFP_ATOMIC);
if (!skb)
goto errout;
@@ -4102,6 +4113,10 @@ static int rtnl_bridge_notify(struct net_device *dev)
if (err < 0)
goto errout;
+ /* Notification info is only filled for bridge ports, not the bridge
+ * device itself. Therefore, a zero notification length is valid and
+ * should not result in an error.
+ */
if (!skb->len)
goto errout;
@@ -4508,7 +4523,7 @@ nla_put_failure:
static size_t if_nlmsg_stats_size(const struct net_device *dev,
u32 filter_mask)
{
- size_t size = 0;
+ size_t size = NLMSG_ALIGN(sizeof(struct if_stats_msg));
if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0))
size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64));
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index af6ad467ed61..6fd25279bee9 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -22,6 +22,8 @@
static siphash_key_t net_secret __read_mostly;
static siphash_key_t ts_secret __read_mostly;
+#define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ)
+
static __always_inline void net_secret_init(void)
{
net_get_random_once(&net_secret, sizeof(net_secret));
@@ -94,17 +96,19 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
}
EXPORT_SYMBOL(secure_tcpv6_seq);
-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
+u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport)
{
const struct {
struct in6_addr saddr;
struct in6_addr daddr;
+ unsigned int timeseed;
__be16 dport;
} __aligned(SIPHASH_ALIGNMENT) combined = {
.saddr = *(struct in6_addr *)saddr,
.daddr = *(struct in6_addr *)daddr,
- .dport = dport
+ .timeseed = jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD,
+ .dport = dport,
};
net_secret_init();
return siphash(&combined, offsetofend(typeof(combined), dport),
@@ -142,11 +146,13 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
}
EXPORT_SYMBOL_GPL(secure_tcp_seq);
-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
+u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
net_secret_init();
- return siphash_3u32((__force u32)saddr, (__force u32)daddr,
- (__force u16)dport, &net_secret);
+ return siphash_4u32((__force u32)saddr, (__force u32)daddr,
+ (__force u16)dport,
+ jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD,
+ &net_secret);
}
EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
#endif
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index be4bc833c28a..e03cd719b86b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -398,7 +398,11 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
len += NET_SKB_PAD;
- if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
+ /* If requested length is either too small or too big,
+ * we use kmalloc() for skb->head allocation.
+ */
+ if (len <= SKB_WITH_OVERHEAD(1024) ||
+ len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
if (!skb)
@@ -459,13 +463,17 @@ EXPORT_SYMBOL(__netdev_alloc_skb);
struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
gfp_t gfp_mask)
{
- struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+ struct napi_alloc_cache *nc;
struct sk_buff *skb;
void *data;
len += NET_SKB_PAD + NET_IP_ALIGN;
- if ((len > SKB_WITH_OVERHEAD(PAGE_SIZE)) ||
+ /* If requested length is either too small or too big,
+ * we use kmalloc() for skb->head allocation.
+ */
+ if (len <= SKB_WITH_OVERHEAD(1024) ||
+ len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
(gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
if (!skb)
@@ -473,6 +481,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
goto skb_success;
}
+ nc = this_cpu_ptr(&napi_alloc_cache);
len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
len = SKB_DATA_ALIGN(len);
@@ -1853,6 +1862,12 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len)
skb->csum = csum_block_sub(skb->csum,
skb_checksum(skb, len, delta, 0),
len);
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ int hdlen = (len > skb_headlen(skb)) ? skb_headlen(skb) : len;
+ int offset = skb_checksum_start_offset(skb) + skb->csum_offset;
+
+ if (offset + sizeof(__sum16) > hdlen)
+ return -EINVAL;
}
return __pskb_trim(skb, len);
}
@@ -1938,6 +1953,9 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
insp = list;
} else {
/* Eaten partially. */
+ if (skb_is_gso(skb) && !list->head_frag &&
+ skb_headlen(list))
+ skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
if (skb_shared(list)) {
/* Sucks! We need to fork list. :-( */
@@ -1962,7 +1980,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
/* Free pulled out fragments. */
while ((list = skb_shinfo(skb)->frag_list) != insp) {
skb_shinfo(skb)->frag_list = list->next;
- kfree_skb(list);
+ consume_skb(list);
}
/* And insert new clone at head. */
if (clone) {
@@ -2690,8 +2708,11 @@ skb_zerocopy_headlen(const struct sk_buff *from)
if (!from->head_frag ||
skb_headlen(from) < L1_CACHE_BYTES ||
- skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS)
+ skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) {
hlen = skb_headlen(from);
+ if (!hlen)
+ hlen = from->len;
+ }
if (skb_has_frag_list(from))
hlen = from->len;
@@ -3077,7 +3098,19 @@ EXPORT_SYMBOL(skb_split);
*/
static int skb_prepare_for_shift(struct sk_buff *skb)
{
- return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ int ret = 0;
+
+ if (skb_cloned(skb)) {
+ /* Save and restore truesize: pskb_expand_head() may reallocate
+ * memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we
+ * cannot change truesize at this point.
+ */
+ unsigned int save_truesize = skb->truesize;
+
+ ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ skb->truesize = save_truesize;
+ }
+ return ret;
}
/**
@@ -3513,40 +3546,41 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
- skb_frag_t *frag = skb_shinfo(head_skb)->frags;
unsigned int mss = skb_shinfo(head_skb)->gso_size;
unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
- struct sk_buff *frag_skb = head_skb;
unsigned int offset = doffset;
unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
unsigned int partial_segs = 0;
unsigned int headroom;
unsigned int len = head_skb->len;
+ struct sk_buff *frag_skb;
+ skb_frag_t *frag;
__be16 proto;
bool csum, sg;
- int nfrags = skb_shinfo(head_skb)->nr_frags;
int err = -ENOMEM;
int i = 0;
- int pos;
+ int nfrags, pos;
int dummy;
- if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) &&
- (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) {
- /* gso_size is untrusted, and we have a frag_list with a linear
- * non head_frag head.
- *
- * (we assume checking the first list_skb member suffices;
- * i.e if either of the list_skb members have non head_frag
- * head, then the first one has too).
- *
- * If head_skb's headlen does not fit requested gso_size, it
- * means that the frag_list members do NOT terminate on exact
- * gso_size boundaries. Hence we cannot perform skb_frag_t page
- * sharing. Therefore we must fallback to copying the frag_list
- * skbs; we do so by disabling SG.
- */
- if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb))
- features &= ~NETIF_F_SG;
+ if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
+ mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
+ struct sk_buff *check_skb;
+
+ for (check_skb = list_skb; check_skb; check_skb = check_skb->next) {
+ if (skb_headlen(check_skb) && !check_skb->head_frag) {
+ /* gso_size is untrusted, and we have a frag_list with
+ * a linear non head_frag item.
+ *
+ * If head_skb's headlen does not fit requested gso_size,
+ * it means that the frag_list members do NOT terminate
+ * on exact gso_size boundaries. Hence we cannot perform
+ * skb_frag_t page sharing. Therefore we must fallback to
+ * copying the frag_list skbs; we do so by disabling SG.
+ */
+ features &= ~NETIF_F_SG;
+ break;
+ }
+ }
}
__skb_push(head_skb, doffset);
@@ -3603,6 +3637,13 @@ normal:
headroom = skb_headroom(head_skb);
pos = skb_headlen(head_skb);
+ if (skb_orphan_frags(head_skb, GFP_ATOMIC))
+ return ERR_PTR(-ENOMEM);
+
+ nfrags = skb_shinfo(head_skb)->nr_frags;
+ frag = skb_shinfo(head_skb)->frags;
+ frag_skb = head_skb;
+
do {
struct sk_buff *nskb;
skb_frag_t *nskb_frag;
@@ -3627,6 +3668,10 @@ normal:
(skb_headlen(list_skb) == len || sg)) {
BUG_ON(skb_headlen(list_skb) > len);
+ nskb = skb_clone(list_skb, GFP_ATOMIC);
+ if (unlikely(!nskb))
+ goto err;
+
i = 0;
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
@@ -3645,12 +3690,8 @@ normal:
frag++;
}
- nskb = skb_clone(list_skb, GFP_ATOMIC);
list_skb = list_skb->next;
- if (unlikely(!nskb))
- goto err;
-
if (unlikely(pskb_trim(nskb, len))) {
kfree_skb(nskb);
goto err;
@@ -3715,12 +3756,16 @@ normal:
skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags &
SKBTX_SHARED_FRAG;
- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
- skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+ if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
goto err;
while (pos < offset + len) {
if (i >= nfrags) {
+ if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
+ skb_zerocopy_clone(nskb, list_skb,
+ GFP_ATOMIC))
+ goto err;
+
i = 0;
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
@@ -3734,10 +3779,6 @@ normal:
i--;
frag--;
}
- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
- skb_zerocopy_clone(nskb, frag_skb,
- GFP_ATOMIC))
- goto err;
list_skb = list_skb->next;
}
@@ -4265,7 +4306,7 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
if (skb && (skb_next = skb_peek(q))) {
icmp_next = is_icmp_err_skb(skb_next);
if (icmp_next)
- sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin;
+ sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_errno;
}
spin_unlock_irqrestore(&q->lock, flags);
@@ -4347,7 +4388,7 @@ static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
{
bool ret;
- if (likely(sysctl_tstamp_allow_data || tsonly))
+ if (likely(READ_ONCE(sysctl_tstamp_allow_data) || tsonly))
return true;
read_lock_bh(&sk->sk_callback_lock);
@@ -4410,6 +4451,11 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
skb = alloc_skb(0, GFP_ATOMIC);
} else {
skb = skb_clone(orig_skb, GFP_ATOMIC);
+
+ if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) {
+ kfree_skb(skb);
+ return;
+ }
}
if (!skb)
return;
@@ -5452,7 +5498,7 @@ static int pskb_carve_frag_list(struct sk_buff *skb,
/* Free pulled out fragments. */
while ((list = shinfo->frag_list) != insp) {
shinfo->frag_list = list->next;
- kfree_skb(list);
+ consume_skb(list);
}
/* And insert new clone at head. */
if (clone) {
diff --git a/net/core/sock.c b/net/core/sock.c
index e6cbe137cb6f..62d169bcfcfa 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -496,7 +496,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
sk_tx_queue_clear(sk);
- sk->sk_dst_pending_confirm = 0;
+ WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
dst_release(dst);
return NULL;
@@ -632,7 +632,8 @@ bool sk_mc_loop(struct sock *sk)
return false;
if (!sk)
return true;
- switch (sk->sk_family) {
+ /* IPV6_ADDRFORM can change sk->sk_family under us. */
+ switch (READ_ONCE(sk->sk_family)) {
case AF_INET:
return inet_sk(sk)->mc_loop;
#if IS_ENABLED(CONFIG_IPV6)
@@ -989,7 +990,7 @@ set_rcvbuf:
if (val < 0)
ret = -EINVAL;
else
- sk->sk_ll_usec = val;
+ WRITE_ONCE(sk->sk_ll_usec, val);
}
break;
#endif
@@ -1057,6 +1058,16 @@ set_rcvbuf:
}
EXPORT_SYMBOL(sock_setsockopt);
+static const struct cred *sk_get_peer_cred(struct sock *sk)
+{
+ const struct cred *cred;
+
+ spin_lock(&sk->sk_peer_lock);
+ cred = get_cred(sk->sk_peer_cred);
+ spin_unlock(&sk->sk_peer_lock);
+
+ return cred;
+}
static void cred_to_ucred(struct pid *pid, const struct cred *cred,
struct ucred *ucred)
@@ -1231,7 +1242,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
struct ucred peercred;
if (len > sizeof(peercred))
len = sizeof(peercred);
+
+ spin_lock(&sk->sk_peer_lock);
cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
+ spin_unlock(&sk->sk_peer_lock);
+
if (copy_to_user(optval, &peercred, len))
return -EFAULT;
goto lenout;
@@ -1239,20 +1254,23 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
case SO_PEERGROUPS:
{
+ const struct cred *cred;
int ret, n;
- if (!sk->sk_peer_cred)
+ cred = sk_get_peer_cred(sk);
+ if (!cred)
return -ENODATA;
- n = sk->sk_peer_cred->group_info->ngroups;
+ n = cred->group_info->ngroups;
if (len < n * sizeof(gid_t)) {
len = n * sizeof(gid_t);
+ put_cred(cred);
return put_user(len, optlen) ? -EFAULT : -ERANGE;
}
len = n * sizeof(gid_t);
- ret = groups_to_user((gid_t __user *)optval,
- sk->sk_peer_cred->group_info);
+ ret = groups_to_user((gid_t __user *)optval, cred->group_info);
+ put_cred(cred);
if (ret)
return ret;
goto lenout;
@@ -1302,7 +1320,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
if (!sock->ops->set_peek_off)
return -EOPNOTSUPP;
- v.val = sk->sk_peek_off;
+ v.val = READ_ONCE(sk->sk_peek_off);
break;
case SO_NOFCS:
v.val = sock_flag(sk, SOCK_NOFCS);
@@ -1332,7 +1350,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
#ifdef CONFIG_NET_RX_BUSY_POLL
case SO_BUSY_POLL:
- v.val = sk->sk_ll_usec;
+ v.val = READ_ONCE(sk->sk_ll_usec);
break;
#endif
@@ -1576,9 +1594,10 @@ static void __sk_destruct(struct rcu_head *head)
sk->sk_frag.page = NULL;
}
- if (sk->sk_peer_cred)
- put_cred(sk->sk_peer_cred);
+ /* We do not need to acquire sk->sk_peer_lock, we are the last user. */
+ put_cred(sk->sk_peer_cred);
put_pid(sk->sk_peer_pid);
+
if (likely(sk->sk_net_refcnt))
put_net(sock_net(sk));
sk_prot_free(sk->sk_prot_creator, sk);
@@ -1777,7 +1796,6 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
u32 max_segs = 1;
- sk_dst_set(sk, dst);
sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
if (sk->sk_route_caps & NETIF_F_GSO)
sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
@@ -1792,6 +1810,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
}
}
sk->sk_gso_max_segs = max_segs;
+ sk_dst_set(sk, dst);
}
EXPORT_SYMBOL_GPL(sk_setup_caps);
@@ -1921,13 +1940,24 @@ kuid_t sock_i_uid(struct sock *sk)
}
EXPORT_SYMBOL(sock_i_uid);
-unsigned long sock_i_ino(struct sock *sk)
+unsigned long __sock_i_ino(struct sock *sk)
{
unsigned long ino;
- read_lock_bh(&sk->sk_callback_lock);
+ read_lock(&sk->sk_callback_lock);
ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
- read_unlock_bh(&sk->sk_callback_lock);
+ read_unlock(&sk->sk_callback_lock);
+ return ino;
+}
+EXPORT_SYMBOL(__sock_i_ino);
+
+unsigned long sock_i_ino(struct sock *sk)
+{
+ unsigned long ino;
+
+ local_bh_disable();
+ ino = __sock_i_ino(sk);
+ local_bh_enable();
return ino;
}
EXPORT_SYMBOL(sock_i_ino);
@@ -2042,9 +2072,9 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
if (refcount_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
break;
- if (sk->sk_shutdown & SEND_SHUTDOWN)
+ if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
break;
- if (sk->sk_err)
+ if (READ_ONCE(sk->sk_err))
break;
timeo = schedule_timeout(timeo);
}
@@ -2072,7 +2102,7 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
goto failure;
err = -EPIPE;
- if (sk->sk_shutdown & SEND_SHUTDOWN)
+ if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
goto failure;
if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
@@ -2189,9 +2219,6 @@ static void sk_leave_memory_pressure(struct sock *sk)
}
}
-/* On 32bit arches, an skb frag is limited to 2^15 */
-#define SKB_FRAG_PAGE_ORDER get_order(32768)
-
/**
* skb_page_frag_refill - check that a page_frag contains enough room
* @sz: minimum size of the fragment we want to get
@@ -2512,7 +2539,7 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount)
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
- if (sk_under_memory_pressure(sk) &&
+ if (sk_under_global_memory_pressure(sk) &&
(sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
sk_leave_memory_pressure(sk);
}
@@ -2533,7 +2560,7 @@ EXPORT_SYMBOL(__sk_mem_reclaim);
int sk_set_peek_off(struct sock *sk, int val)
{
- sk->sk_peek_off = val;
+ WRITE_ONCE(sk->sk_peek_off, val);
return 0;
}
EXPORT_SYMBOL_GPL(sk_set_peek_off);
@@ -2826,6 +2853,8 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_peer_pid = NULL;
sk->sk_peer_cred = NULL;
+ spin_lock_init(&sk->sk_peer_lock);
+
sk->sk_write_pending = 0;
sk->sk_rcvlowat = 1;
sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
@@ -2839,7 +2868,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
#ifdef CONFIG_NET_RX_BUSY_POLL
sk->sk_napi_id = 0;
- sk->sk_ll_usec = sysctl_net_busy_read;
+ sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read);
#endif
sk->sk_max_pacing_rate = ~0U;
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 9c85ef2b7e1d..375a3bbe6485 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -299,7 +299,7 @@ select_by_hash:
i = j = reciprocal_scale(hash, socks);
while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
i++;
- if (i >= reuse->num_socks)
+ if (i >= socks)
i = 0;
if (i == j)
goto out;
diff --git a/net/core/stream.c b/net/core/stream.c
index 7f5eaa95a675..cd60746877b1 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -32,7 +32,7 @@ void sk_stream_write_space(struct sock *sk)
struct socket *sock = sk->sk_socket;
struct socket_wq *wq;
- if (sk_stream_is_writeable(sk) && sock) {
+ if (__sk_stream_is_writeable(sk, 1) && sock) {
clear_bit(SOCK_NOSPACE, &sock->flags);
rcu_read_lock();
@@ -159,7 +159,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
*timeo_p = current_timeo;
}
out:
- remove_wait_queue(sk_sleep(sk), &wait);
+ if (!sock_flag(sk, SOCK_DEAD))
+ remove_wait_queue(sk_sleep(sk), &wait);
return err;
do_error:
@@ -195,8 +196,11 @@ void sk_stream_kill_queues(struct sock *sk)
/* First the read buffer. */
__skb_queue_purge(&sk->sk_receive_queue);
- /* Next, the error queue. */
- __skb_queue_purge(&sk->sk_error_queue);
+ /* Next, the error queue.
+ * We need to use queue lock, because other threads might
+ * add packets to the queue without socket lock being held.
+ */
+ skb_queue_purge(&sk->sk_error_queue);
/* Next, the write queue. */
WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
@@ -205,7 +209,6 @@ void sk_stream_kill_queues(struct sock *sk)
sk_mem_reclaim(sk);
WARN_ON(sk->sk_wmem_queued);
- WARN_ON(sk->sk_forward_alloc);
/* It is _impossible_ for the backlog to contain anything
* when we get here. All user references to this socket
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 2597449ae9b4..d7e39167ceca 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -231,14 +231,17 @@ static int set_default_qdisc(struct ctl_table *table, int write,
static int proc_do_dev_weight(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- int ret;
+ static DEFINE_MUTEX(dev_weight_mutex);
+ int ret, weight;
+ mutex_lock(&dev_weight_mutex);
ret = proc_dointvec(table, write, buffer, lenp, ppos);
- if (ret != 0)
- return ret;
-
- dev_rx_weight = weight_p * dev_weight_rx_bias;
- dev_tx_weight = weight_p * dev_weight_tx_bias;
+ if (!ret && write) {
+ weight = READ_ONCE(weight_p);
+ WRITE_ONCE(dev_rx_weight, weight * dev_weight_rx_bias);
+ WRITE_ONCE(dev_tx_weight, weight * dev_weight_tx_bias);
+ }
+ mutex_unlock(&dev_weight_mutex);
return ret;
}
@@ -417,7 +420,7 @@ static struct ctl_table net_core_table[] = {
.mode = 0600,
.proc_handler = proc_dolongvec_minmax_bpf_restricted,
.extra1 = &long_one,
- .extra2 = &long_max,
+ .extra2 = &bpf_jit_limit_max,
},
#endif
{
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 5ee6b94131b2..1ceeba2429f7 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -1756,6 +1756,8 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
fn = &reply_funcs[dcb->cmd];
if (!fn->cb)
return -EOPNOTSUPP;
+ if (fn->type == RTM_SETDCB && !netlink_capable(skb, CAP_NET_ADMIN))
+ return -EPERM;
if (!tb[DCB_ATTR_IFNAME])
return -EINVAL;
@@ -2052,10 +2054,54 @@ u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev)
}
EXPORT_SYMBOL(dcb_ieee_getapp_default_prio_mask);
+static void dcbnl_flush_dev(struct net_device *dev)
+{
+ struct dcb_app_type *itr, *tmp;
+
+ spin_lock_bh(&dcb_lock);
+
+ list_for_each_entry_safe(itr, tmp, &dcb_app_list, list) {
+ if (itr->ifindex == dev->ifindex) {
+ list_del(&itr->list);
+ kfree(itr);
+ }
+ }
+
+ spin_unlock_bh(&dcb_lock);
+}
+
+static int dcbnl_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ switch (event) {
+ case NETDEV_UNREGISTER:
+ if (!dev->dcbnl_ops)
+ return NOTIFY_DONE;
+
+ dcbnl_flush_dev(dev);
+
+ return NOTIFY_OK;
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static struct notifier_block dcbnl_nb __read_mostly = {
+ .notifier_call = dcbnl_netdevice_event,
+};
+
static int __init dcbnl_init(void)
{
+ int err;
+
INIT_LIST_HEAD(&dcb_app_list);
+ err = register_netdevice_notifier(&dcbnl_nb);
+ if (err)
+ return err;
+
rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index f91e3816806b..579f39e0d02e 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -44,9 +44,9 @@ extern bool dccp_debug;
#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a)
#define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a)
#else
-#define dccp_pr_debug(format, a...)
-#define dccp_pr_debug_cat(format, a...)
-#define dccp_debug(format, a...)
+#define dccp_pr_debug(format, a...) do {} while (0)
+#define dccp_pr_debug_cat(format, a...) do {} while (0)
+#define dccp_debug(format, a...) do {} while (0)
#endif
extern struct inet_hashinfo dccp_hashinfo;
@@ -291,6 +291,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
const struct dccp_hdr *dh, const unsigned int len);
+void dccp_destruct_common(struct sock *sk);
int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized);
void dccp_destroy_sock(struct sock *sk);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 176bddacc16e..5281ac3260f6 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -134,6 +134,8 @@ failure:
* This unhashes the socket and releases the local port, if necessary.
*/
dccp_set_state(sk, DCCP_CLOSED);
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+ inet_reset_saddr(sk);
ip_rt_put(rt);
sk->sk_route_caps = 0;
inet->inet_dport = 0;
@@ -245,12 +247,12 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info)
int err;
struct net *net = dev_net(skb->dev);
- /* Only need dccph_dport & dccph_sport which are the first
- * 4 bytes in dccp header.
- * Our caller (icmp_socket_deliver()) already pulled 8 bytes for us.
- */
- BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
- BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
+ if (!pskb_may_pull(skb, offset + sizeof(*dh)))
+ return;
+ dh = (struct dccp_hdr *)(skb->data + offset);
+ if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
+ return;
+ iph = (struct iphdr *)skb->data;
dh = (struct dccp_hdr *)(skb->data + offset);
sk = __inet_lookup_established(net, &dccp_hashinfo,
@@ -428,7 +430,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
- *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL);
if (*own_req)
ireq->ireq_opt = NULL;
else
@@ -610,9 +612,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (dccp_parse_options(sk, dreq, skb))
goto drop_and_free;
- if (security_inet_conn_request(sk, skb, req))
- goto drop_and_free;
-
ireq = inet_rsk(req);
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
@@ -620,6 +619,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
ireq->ireq_family = AF_INET;
ireq->ir_iif = sk->sk_bound_dev_if;
+ if (security_inet_conn_request(sk, skb, req))
+ goto drop_and_free;
+
/*
* Step 3: Process LISTEN state
*
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index b438bed6749d..72ceefbf2312 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -71,7 +71,7 @@ static inline __u64 dccp_v6_init_sequence(struct sk_buff *skb)
static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info)
{
- const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
+ const struct ipv6hdr *hdr;
const struct dccp_hdr *dh;
struct dccp_sock *dp;
struct ipv6_pinfo *np;
@@ -80,12 +80,12 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
__u64 seq;
struct net *net = dev_net(skb->dev);
- /* Only need dccph_dport & dccph_sport which are the first
- * 4 bytes in dccp header.
- * Our caller (icmpv6_notify()) already pulled 8 bytes for us.
- */
- BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_sport) > 8);
- BUILD_BUG_ON(offsetofend(struct dccp_hdr, dccph_dport) > 8);
+ if (!pskb_may_pull(skb, offset + sizeof(*dh)))
+ return;
+ dh = (struct dccp_hdr *)(skb->data + offset);
+ if (!pskb_may_pull(skb, offset + __dccp_basic_hdr_len(dh)))
+ return;
+ hdr = (const struct ipv6hdr *)skb->data;
dh = (struct dccp_hdr *)(skb->data + offset);
sk = __inet6_lookup_established(net, &dccp_hashinfo,
@@ -319,6 +319,11 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (!ipv6_unicast_destination(skb))
return 0; /* discard, don't send a reset here */
+ if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
+ __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
+ return 0;
+ }
+
if (dccp_bad_service_code(sk, service)) {
dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
goto drop;
@@ -344,15 +349,15 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (dccp_parse_options(sk, dreq, skb))
goto drop_and_free;
- if (security_inet_conn_request(sk, skb, req))
- goto drop_and_free;
-
ireq = inet_rsk(req);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
ireq->ireq_family = AF_INET6;
ireq->ir_mark = inet_request_mark(sk, skb);
+ if (security_inet_conn_request(sk, skb, req))
+ goto drop_and_free;
+
if (ipv6_opt_accepted(sk, skb, IP6CB(skb)) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -533,14 +538,12 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
dccp_done(newsk);
goto out;
}
- *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL);
/* Clone pktoptions received with SYN, if we own the req */
if (*own_req && ireq->pktopts) {
- newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
+ newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
consume_skb(ireq->pktopts);
ireq->pktopts = NULL;
- if (newnp->pktoptions)
- skb_set_owner_r(newnp->pktoptions, newsk);
}
return newsk;
@@ -600,7 +603,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
--ANK (980728)
*/
if (np->rxopt.all)
- opt_skb = skb_clone(skb, GFP_ATOMIC);
+ opt_skb = skb_clone_and_charge_r(skb, sk);
if (sk->sk_state == DCCP_OPEN) { /* Fast path */
if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
@@ -664,7 +667,6 @@ ipv6_pktoptions:
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb,
&DCCP_SKB_CB(opt_skb)->header.h6)) {
- skb_set_owner_r(opt_skb, sk);
memmove(IP6CB(opt_skb),
&DCCP_SKB_CB(opt_skb)->header.h6,
sizeof(struct inet6_skb_parm));
@@ -952,6 +954,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
late_failure:
dccp_set_state(sk, DCCP_CLOSED);
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+ inet_reset_saddr(sk);
__sk_dst_reset(sk);
failure:
inet->inet_dport = 0;
@@ -996,6 +1000,12 @@ static const struct inet_connection_sock_af_ops dccp_ipv6_mapped = {
#endif
};
+static void dccp_v6_sk_destruct(struct sock *sk)
+{
+ dccp_destruct_common(sk);
+ inet6_sock_destruct(sk);
+}
+
/* NOTE: A lot of things set to zero explicitly by call to
* sk_alloc() so need not be done here.
*/
@@ -1008,17 +1018,12 @@ static int dccp_v6_init_sock(struct sock *sk)
if (unlikely(!dccp_v6_ctl_sock_initialized))
dccp_v6_ctl_sock_initialized = 1;
inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops;
+ sk->sk_destruct = dccp_v6_sk_destruct;
}
return err;
}
-static void dccp_v6_destroy_sock(struct sock *sk)
-{
- dccp_destroy_sock(sk);
- inet6_destroy_sock(sk);
-}
-
static struct timewait_sock_ops dccp6_timewait_sock_ops = {
.twsk_obj_size = sizeof(struct dccp6_timewait_sock),
};
@@ -1041,7 +1046,7 @@ static struct proto dccp_v6_prot = {
.accept = inet_csk_accept,
.get_port = inet_csk_get_port,
.shutdown = dccp_shutdown,
- .destroy = dccp_v6_destroy_sock,
+ .destroy = dccp_destroy_sock,
.orphan_count = &dccp_orphan_count,
.max_header = MAX_DCCP_HEADER,
.obj_size = sizeof(struct dccp6_sock),
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index ba6fc3c1186b..e91838a7b849 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -98,6 +98,8 @@ struct sock *dccp_create_openreq_child(const struct sock *sk,
newdp->dccps_role = DCCP_ROLE_SERVER;
newdp->dccps_hc_rx_ackvec = NULL;
newdp->dccps_service_list = NULL;
+ newdp->dccps_hc_rx_ccid = NULL;
+ newdp->dccps_hc_tx_ccid = NULL;
newdp->dccps_service = dreq->dreq_service;
newdp->dccps_timestamp_echo = dreq->dreq_timestamp_echo;
newdp->dccps_timestamp_time = dreq->dreq_timestamp_time;
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 4e40db017e19..3c464d63b0bb 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -60,7 +60,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
(dh->dccph_doff * 4);
struct dccp_options_received *opt_recv = &dp->dccps_options_received;
unsigned char opt, len;
- unsigned char *uninitialized_var(value);
+ unsigned char *value;
u32 elapsed_time;
__be32 opt_val;
int rc;
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 91a15b3c4915..d872dd1cfb5e 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -189,7 +189,7 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
/* And store cached results */
icsk->icsk_pmtu_cookie = pmtu;
- dp->dccps_mss_cache = cur_mps;
+ WRITE_ONCE(dp->dccps_mss_cache, cur_mps);
return cur_mps;
}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 43733accf58e..c4ea0159ce2e 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -174,12 +174,18 @@ const char *dccp_packet_name(const int type)
EXPORT_SYMBOL_GPL(dccp_packet_name);
-static void dccp_sk_destruct(struct sock *sk)
+void dccp_destruct_common(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
dp->dccps_hc_tx_ccid = NULL;
+}
+EXPORT_SYMBOL_GPL(dccp_destruct_common);
+
+static void dccp_sk_destruct(struct sock *sk)
+{
+ dccp_destruct_common(sk);
inet_sock_destruct(sk);
}
@@ -322,11 +328,15 @@ EXPORT_SYMBOL_GPL(dccp_disconnect);
__poll_t dccp_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
- __poll_t mask;
struct sock *sk = sock->sk;
+ __poll_t mask;
+ u8 shutdown;
+ int state;
sock_poll_wait(file, sock, wait);
- if (sk->sk_state == DCCP_LISTEN)
+
+ state = inet_sk_state_load(sk);
+ if (state == DCCP_LISTEN)
return inet_csk_listen_poll(sk);
/* Socket is not locked. We are protected from async events
@@ -335,20 +345,21 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
*/
mask = 0;
- if (sk->sk_err)
+ if (READ_ONCE(sk->sk_err))
mask = EPOLLERR;
+ shutdown = READ_ONCE(sk->sk_shutdown);
- if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
+ if (shutdown == SHUTDOWN_MASK || state == DCCP_CLOSED)
mask |= EPOLLHUP;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
+ if (shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
/* Connected? */
- if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
+ if ((1 << state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
if (atomic_read(&sk->sk_rmem_alloc) > 0)
mask |= EPOLLIN | EPOLLRDNORM;
- if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+ if (!(shutdown & SEND_SHUTDOWN)) {
if (sk_stream_is_writeable(sk)) {
mask |= EPOLLOUT | EPOLLWRNORM;
} else { /* send SIGIO later */
@@ -366,7 +377,6 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
}
return mask;
}
-
EXPORT_SYMBOL_GPL(dccp_poll);
int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
@@ -642,7 +652,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
return dccp_getsockopt_service(sk, len,
(__be32 __user *)optval, optlen);
case DCCP_SOCKOPT_GET_CUR_MPS:
- val = dp->dccps_mss_cache;
+ val = READ_ONCE(dp->dccps_mss_cache);
break;
case DCCP_SOCKOPT_AVAILABLE_CCIDS:
return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
@@ -764,16 +774,11 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
trace_dccp_probe(sk, len);
- if (len > dp->dccps_mss_cache)
+ if (len > READ_ONCE(dp->dccps_mss_cache))
return -EMSGSIZE;
lock_sock(sk);
- if (dccp_qpolicy_full(sk)) {
- rc = -EAGAIN;
- goto out_release;
- }
-
timeo = sock_sndtimeo(sk, noblock);
/*
@@ -792,11 +797,22 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (skb == NULL)
goto out_release;
+ if (dccp_qpolicy_full(sk)) {
+ rc = -EAGAIN;
+ goto out_discard;
+ }
+
if (sk->sk_state == DCCP_CLOSED) {
rc = -ENOTCONN;
goto out_discard;
}
+ /* We need to check dccps_mss_cache after socket is locked. */
+ if (len > dp->dccps_mss_cache) {
+ rc = -EMSGSIZE;
+ goto out_discard;
+ }
+
skb_reserve(skb, sk->sk_prot->max_header);
rc = memcpy_from_msg(skb_put(skb, len), msg, len);
if (rc != 0)
diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig
deleted file mode 100644
index dcc74956badd..000000000000
--- a/net/decnet/Kconfig
+++ /dev/null
@@ -1,42 +0,0 @@
-#
-# DECnet configuration
-#
-config DECNET
- tristate "DECnet Support"
- ---help---
- The DECnet networking protocol was used in many products made by
- Digital (now Compaq). It provides reliable stream and sequenced
- packet communications over which run a variety of services similar
- to those which run over TCP/IP.
-
- To find some tools to use with the kernel layer support, please
- look at Patrick Caulfield's web site:
- <http://linux-decnet.sourceforge.net/>.
-
- More detailed documentation is available in
- <file:Documentation/networking/decnet.txt>.
-
- Be sure to say Y to "/proc file system support" and "Sysctl support"
- below when using DECnet, since you will need sysctl support to aid
- in configuration at run time.
-
- The DECnet code is also available as a module ( = code which can be
- inserted in and removed from the running kernel whenever you want).
- The module is called decnet.
-
-config DECNET_ROUTER
- bool "DECnet: router support"
- depends on DECNET
- select FIB_RULES
- ---help---
- Add support for turning your DECnet Endnode into a level 1 or 2
- router. This is an experimental, but functional option. If you
- do say Y here, then make sure that you also say Y to "Kernel/User
- network link driver", "Routing messages" and "Network packet
- filtering". The first two are required to allow configuration via
- rtnetlink (you will need Alexey Kuznetsov's iproute2 package
- from <ftp://ftp.tux.org/pub/net/ip-routing/>). The "Network packet
- filtering" option will be required for the forthcoming routing daemon
- to work.
-
- See <file:Documentation/networking/decnet.txt> for more information.
diff --git a/net/decnet/Makefile b/net/decnet/Makefile
deleted file mode 100644
index 07b38e441b2d..000000000000
--- a/net/decnet/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-obj-$(CONFIG_DECNET) += decnet.o
-
-decnet-y := af_decnet.o dn_nsp_in.o dn_nsp_out.o \
- dn_route.o dn_dev.o dn_neigh.o dn_timer.o
-decnet-$(CONFIG_DECNET_ROUTER) += dn_fib.o dn_rules.o dn_table.o
-decnet-y += sysctl_net_decnet.o
-
-obj-$(CONFIG_NETFILTER) += netfilter/
diff --git a/net/decnet/README b/net/decnet/README
deleted file mode 100644
index 60e7ec88c81f..000000000000
--- a/net/decnet/README
+++ /dev/null
@@ -1,8 +0,0 @@
- Linux DECnet Project
- ======================
-
-The documentation for this kernel subsystem is available in the
-Documentation/networking subdirectory of this distribution and also
-on line at http://www.chygwyn.com/DECnet/
-
-Steve Whitehouse <SteveW@ACM.org>
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
deleted file mode 100644
index 0e6f32defd67..000000000000
--- a/net/decnet/af_decnet.c
+++ /dev/null
@@ -1,2411 +0,0 @@
-
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Socket Layer Interface
- *
- * Authors: Eduardo Marcelo Serrat <emserrat@geocities.com>
- * Patrick Caulfield <patrick@pandh.demon.co.uk>
- *
- * Changes:
- * Steve Whitehouse: Copied from Eduardo Serrat and Patrick Caulfield's
- * version of the code. Original copyright preserved
- * below.
- * Steve Whitehouse: Some bug fixes, cleaning up some code to make it
- * compatible with my routing layer.
- * Steve Whitehouse: Merging changes from Eduardo Serrat and Patrick
- * Caulfield.
- * Steve Whitehouse: Further bug fixes, checking module code still works
- * with new routing layer.
- * Steve Whitehouse: Additional set/get_sockopt() calls.
- * Steve Whitehouse: Fixed TIOCINQ ioctl to be same as Eduardo's new
- * code.
- * Steve Whitehouse: recvmsg() changed to try and behave in a POSIX like
- * way. Didn't manage it entirely, but its better.
- * Steve Whitehouse: ditto for sendmsg().
- * Steve Whitehouse: A selection of bug fixes to various things.
- * Steve Whitehouse: Added TIOCOUTQ ioctl.
- * Steve Whitehouse: Fixes to username2sockaddr & sockaddr2username.
- * Steve Whitehouse: Fixes to connect() error returns.
- * Patrick Caulfield: Fixes to delayed acceptance logic.
- * David S. Miller: New socket locking
- * Steve Whitehouse: Socket list hashing/locking
- * Arnaldo C. Melo: use capable, not suser
- * Steve Whitehouse: Removed unused code. Fix to use sk->allocation
- * when required.
- * Patrick Caulfield: /proc/net/decnet now has object name/number
- * Steve Whitehouse: Fixed local port allocation, hashed sk list
- * Matthew Wilcox: Fixes for dn_ioctl()
- * Steve Whitehouse: New connect/accept logic to allow timeouts and
- * prepare for sendpage etc.
- */
-
-
-/******************************************************************************
- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
-HISTORY:
-
-Version Kernel Date Author/Comments
-------- ------ ---- ---------------
-Version 0.0.1 2.0.30 01-dic-97 Eduardo Marcelo Serrat
- (emserrat@geocities.com)
-
- First Development of DECnet Socket La-
- yer for Linux. Only supports outgoing
- connections.
-
-Version 0.0.2 2.1.105 20-jun-98 Patrick J. Caulfield
- (patrick@pandh.demon.co.uk)
-
- Port to new kernel development version.
-
-Version 0.0.3 2.1.106 25-jun-98 Eduardo Marcelo Serrat
- (emserrat@geocities.com)
- _
- Added support for incoming connections
- so we can start developing server apps
- on Linux.
- -
- Module Support
-Version 0.0.4 2.1.109 21-jul-98 Eduardo Marcelo Serrat
- (emserrat@geocities.com)
- _
- Added support for X11R6.4. Now we can
- use DECnet transport for X on Linux!!!
- -
-Version 0.0.5 2.1.110 01-aug-98 Eduardo Marcelo Serrat
- (emserrat@geocities.com)
- Removed bugs on flow control
- Removed bugs on incoming accessdata
- order
- -
-Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat
- dn_recvmsg fixes
-
- Patrick J. Caulfield
- dn_bind fixes
-*******************************************************************************/
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/sched/signal.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/inet.h>
-#include <linux/route.h>
-#include <linux/netfilter.h>
-#include <linux/seq_file.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <net/flow.h>
-#include <asm/ioctls.h>
-#include <linux/capability.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/jiffies.h>
-#include <net/net_namespace.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/fib_rules.h>
-#include <net/tcp.h>
-#include <net/dn.h>
-#include <net/dn_nsp.h>
-#include <net/dn_dev.h>
-#include <net/dn_route.h>
-#include <net/dn_fib.h>
-#include <net/dn_neigh.h>
-
-struct dn_sock {
- struct sock sk;
- struct dn_scp scp;
-};
-
-static void dn_keepalive(struct sock *sk);
-
-#define DN_SK_HASH_SHIFT 8
-#define DN_SK_HASH_SIZE (1 << DN_SK_HASH_SHIFT)
-#define DN_SK_HASH_MASK (DN_SK_HASH_SIZE - 1)
-
-
-static const struct proto_ops dn_proto_ops;
-static DEFINE_RWLOCK(dn_hash_lock);
-static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
-static struct hlist_head dn_wild_sk;
-static atomic_long_t decnet_memory_allocated;
-
-static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen, int flags);
-static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
-
-static struct hlist_head *dn_find_list(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->addr.sdn_flags & SDF_WILD)
- return hlist_empty(&dn_wild_sk) ? &dn_wild_sk : NULL;
-
- return &dn_sk_hash[le16_to_cpu(scp->addrloc) & DN_SK_HASH_MASK];
-}
-
-/*
- * Valid ports are those greater than zero and not already in use.
- */
-static int check_port(__le16 port)
-{
- struct sock *sk;
-
- if (port == 0)
- return -1;
-
- sk_for_each(sk, &dn_sk_hash[le16_to_cpu(port) & DN_SK_HASH_MASK]) {
- struct dn_scp *scp = DN_SK(sk);
- if (scp->addrloc == port)
- return -1;
- }
- return 0;
-}
-
-static unsigned short port_alloc(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-static unsigned short port = 0x2000;
- unsigned short i_port = port;
-
- while(check_port(cpu_to_le16(++port)) != 0) {
- if (port == i_port)
- return 0;
- }
-
- scp->addrloc = cpu_to_le16(port);
-
- return 1;
-}
-
-/*
- * Since this is only ever called from user
- * level, we don't need a write_lock() version
- * of this.
- */
-static int dn_hash_sock(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct hlist_head *list;
- int rv = -EUSERS;
-
- BUG_ON(sk_hashed(sk));
-
- write_lock_bh(&dn_hash_lock);
-
- if (!scp->addrloc && !port_alloc(sk))
- goto out;
-
- rv = -EADDRINUSE;
- if ((list = dn_find_list(sk)) == NULL)
- goto out;
-
- sk_add_node(sk, list);
- rv = 0;
-out:
- write_unlock_bh(&dn_hash_lock);
- return rv;
-}
-
-static void dn_unhash_sock(struct sock *sk)
-{
- write_lock(&dn_hash_lock);
- sk_del_node_init(sk);
- write_unlock(&dn_hash_lock);
-}
-
-static void dn_unhash_sock_bh(struct sock *sk)
-{
- write_lock_bh(&dn_hash_lock);
- sk_del_node_init(sk);
- write_unlock_bh(&dn_hash_lock);
-}
-
-static struct hlist_head *listen_hash(struct sockaddr_dn *addr)
-{
- int i;
- unsigned int hash = addr->sdn_objnum;
-
- if (hash == 0) {
- hash = addr->sdn_objnamel;
- for(i = 0; i < le16_to_cpu(addr->sdn_objnamel); i++) {
- hash ^= addr->sdn_objname[i];
- hash ^= (hash << 3);
- }
- }
-
- return &dn_sk_hash[hash & DN_SK_HASH_MASK];
-}
-
-/*
- * Called to transform a socket from bound (i.e. with a local address)
- * into a listening socket (doesn't need a local port number) and rehashes
- * based upon the object name/number.
- */
-static void dn_rehash_sock(struct sock *sk)
-{
- struct hlist_head *list;
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->addr.sdn_flags & SDF_WILD)
- return;
-
- write_lock_bh(&dn_hash_lock);
- sk_del_node_init(sk);
- DN_SK(sk)->addrloc = 0;
- list = listen_hash(&DN_SK(sk)->addr);
- sk_add_node(sk, list);
- write_unlock_bh(&dn_hash_lock);
-}
-
-int dn_sockaddr2username(struct sockaddr_dn *sdn, unsigned char *buf, unsigned char type)
-{
- int len = 2;
-
- *buf++ = type;
-
- switch (type) {
- case 0:
- *buf++ = sdn->sdn_objnum;
- break;
- case 1:
- *buf++ = 0;
- *buf++ = le16_to_cpu(sdn->sdn_objnamel);
- memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel));
- len = 3 + le16_to_cpu(sdn->sdn_objnamel);
- break;
- case 2:
- memset(buf, 0, 5);
- buf += 5;
- *buf++ = le16_to_cpu(sdn->sdn_objnamel);
- memcpy(buf, sdn->sdn_objname, le16_to_cpu(sdn->sdn_objnamel));
- len = 7 + le16_to_cpu(sdn->sdn_objnamel);
- break;
- }
-
- return len;
-}
-
-/*
- * On reception of usernames, we handle types 1 and 0 for destination
- * addresses only. Types 2 and 4 are used for source addresses, but the
- * UIC, GIC are ignored and they are both treated the same way. Type 3
- * is never used as I've no idea what its purpose might be or what its
- * format is.
- */
-int dn_username2sockaddr(unsigned char *data, int len, struct sockaddr_dn *sdn, unsigned char *fmt)
-{
- unsigned char type;
- int size = len;
- int namel = 12;
-
- sdn->sdn_objnum = 0;
- sdn->sdn_objnamel = cpu_to_le16(0);
- memset(sdn->sdn_objname, 0, DN_MAXOBJL);
-
- if (len < 2)
- return -1;
-
- len -= 2;
- *fmt = *data++;
- type = *data++;
-
- switch (*fmt) {
- case 0:
- sdn->sdn_objnum = type;
- return 2;
- case 1:
- namel = 16;
- break;
- case 2:
- len -= 4;
- data += 4;
- break;
- case 4:
- len -= 8;
- data += 8;
- break;
- default:
- return -1;
- }
-
- len -= 1;
-
- if (len < 0)
- return -1;
-
- sdn->sdn_objnamel = cpu_to_le16(*data++);
- len -= le16_to_cpu(sdn->sdn_objnamel);
-
- if ((len < 0) || (le16_to_cpu(sdn->sdn_objnamel) > namel))
- return -1;
-
- memcpy(sdn->sdn_objname, data, le16_to_cpu(sdn->sdn_objnamel));
-
- return size - len;
-}
-
-struct sock *dn_sklist_find_listener(struct sockaddr_dn *addr)
-{
- struct hlist_head *list = listen_hash(addr);
- struct sock *sk;
-
- read_lock(&dn_hash_lock);
- sk_for_each(sk, list) {
- struct dn_scp *scp = DN_SK(sk);
- if (sk->sk_state != TCP_LISTEN)
- continue;
- if (scp->addr.sdn_objnum) {
- if (scp->addr.sdn_objnum != addr->sdn_objnum)
- continue;
- } else {
- if (addr->sdn_objnum)
- continue;
- if (scp->addr.sdn_objnamel != addr->sdn_objnamel)
- continue;
- if (memcmp(scp->addr.sdn_objname, addr->sdn_objname, le16_to_cpu(addr->sdn_objnamel)) != 0)
- continue;
- }
- sock_hold(sk);
- read_unlock(&dn_hash_lock);
- return sk;
- }
-
- sk = sk_head(&dn_wild_sk);
- if (sk) {
- if (sk->sk_state == TCP_LISTEN)
- sock_hold(sk);
- else
- sk = NULL;
- }
-
- read_unlock(&dn_hash_lock);
- return sk;
-}
-
-struct sock *dn_find_by_skb(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct sock *sk;
- struct dn_scp *scp;
-
- read_lock(&dn_hash_lock);
- sk_for_each(sk, &dn_sk_hash[le16_to_cpu(cb->dst_port) & DN_SK_HASH_MASK]) {
- scp = DN_SK(sk);
- if (cb->src != dn_saddr2dn(&scp->peer))
- continue;
- if (cb->dst_port != scp->addrloc)
- continue;
- if (scp->addrrem && (cb->src_port != scp->addrrem))
- continue;
- sock_hold(sk);
- goto found;
- }
- sk = NULL;
-found:
- read_unlock(&dn_hash_lock);
- return sk;
-}
-
-
-
-static void dn_destruct(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- skb_queue_purge(&scp->data_xmit_queue);
- skb_queue_purge(&scp->other_xmit_queue);
- skb_queue_purge(&scp->other_receive_queue);
-
- dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
-}
-
-static unsigned long dn_memory_pressure;
-
-static void dn_enter_memory_pressure(struct sock *sk)
-{
- if (!dn_memory_pressure) {
- dn_memory_pressure = 1;
- }
-}
-
-static struct proto dn_proto = {
- .name = "NSP",
- .owner = THIS_MODULE,
- .enter_memory_pressure = dn_enter_memory_pressure,
- .memory_pressure = &dn_memory_pressure,
- .memory_allocated = &decnet_memory_allocated,
- .sysctl_mem = sysctl_decnet_mem,
- .sysctl_wmem = sysctl_decnet_wmem,
- .sysctl_rmem = sysctl_decnet_rmem,
- .max_header = DN_MAX_NSP_DATA_HEADER + 64,
- .obj_size = sizeof(struct dn_sock),
-};
-
-static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gfp, int kern)
-{
- struct dn_scp *scp;
- struct sock *sk = sk_alloc(net, PF_DECnet, gfp, &dn_proto, kern);
-
- if (!sk)
- goto out;
-
- if (sock)
- sock->ops = &dn_proto_ops;
- sock_init_data(sock, sk);
-
- sk->sk_backlog_rcv = dn_nsp_backlog_rcv;
- sk->sk_destruct = dn_destruct;
- sk->sk_no_check_tx = 1;
- sk->sk_family = PF_DECnet;
- sk->sk_protocol = 0;
- sk->sk_allocation = gfp;
- sk->sk_sndbuf = sysctl_decnet_wmem[1];
- sk->sk_rcvbuf = sysctl_decnet_rmem[1];
-
- /* Initialization of DECnet Session Control Port */
- scp = DN_SK(sk);
- scp->state = DN_O; /* Open */
- scp->numdat = 1; /* Next data seg to tx */
- scp->numoth = 1; /* Next oth data to tx */
- scp->ackxmt_dat = 0; /* Last data seg ack'ed */
- scp->ackxmt_oth = 0; /* Last oth data ack'ed */
- scp->ackrcv_dat = 0; /* Highest data ack recv*/
- scp->ackrcv_oth = 0; /* Last oth data ack rec*/
- scp->flowrem_sw = DN_SEND;
- scp->flowloc_sw = DN_SEND;
- scp->flowrem_dat = 0;
- scp->flowrem_oth = 1;
- scp->flowloc_dat = 0;
- scp->flowloc_oth = 1;
- scp->services_rem = 0;
- scp->services_loc = 1 | NSP_FC_NONE;
- scp->info_rem = 0;
- scp->info_loc = 0x03; /* NSP version 4.1 */
- scp->segsize_rem = 230 - DN_MAX_NSP_DATA_HEADER; /* Default: Updated by remote segsize */
- scp->nonagle = 0;
- scp->multi_ireq = 1;
- scp->accept_mode = ACC_IMMED;
- scp->addr.sdn_family = AF_DECnet;
- scp->peer.sdn_family = AF_DECnet;
- scp->accessdata.acc_accl = 5;
- memcpy(scp->accessdata.acc_acc, "LINUX", 5);
-
- scp->max_window = NSP_MAX_WINDOW;
- scp->snd_window = NSP_MIN_WINDOW;
- scp->nsp_srtt = NSP_INITIAL_SRTT;
- scp->nsp_rttvar = NSP_INITIAL_RTTVAR;
- scp->nsp_rxtshift = 0;
-
- skb_queue_head_init(&scp->data_xmit_queue);
- skb_queue_head_init(&scp->other_xmit_queue);
- skb_queue_head_init(&scp->other_receive_queue);
-
- scp->persist = 0;
- scp->persist_fxn = NULL;
- scp->keepalive = 10 * HZ;
- scp->keepalive_fxn = dn_keepalive;
-
- dn_start_slow_timer(sk);
-out:
- return sk;
-}
-
-/*
- * Keepalive timer.
- * FIXME: Should respond to SO_KEEPALIVE etc.
- */
-static void dn_keepalive(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- /*
- * By checking the other_data transmit queue is empty
- * we are double checking that we are not sending too
- * many of these keepalive frames.
- */
- if (skb_queue_empty(&scp->other_xmit_queue))
- dn_nsp_send_link(sk, DN_NOCHANGE, 0);
-}
-
-
-/*
- * Timer for shutdown/destroyed sockets.
- * When socket is dead & no packets have been sent for a
- * certain amount of time, they are removed by this
- * routine. Also takes care of sending out DI & DC
- * frames at correct times.
- */
-int dn_destroy_timer(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- scp->persist = dn_nsp_persist(sk);
-
- switch (scp->state) {
- case DN_DI:
- dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC);
- if (scp->nsp_rxtshift >= decnet_di_count)
- scp->state = DN_CN;
- return 0;
-
- case DN_DR:
- dn_nsp_send_disc(sk, NSP_DISCINIT, 0, GFP_ATOMIC);
- if (scp->nsp_rxtshift >= decnet_dr_count)
- scp->state = DN_DRC;
- return 0;
-
- case DN_DN:
- if (scp->nsp_rxtshift < decnet_dn_count) {
- /* printk(KERN_DEBUG "dn_destroy_timer: DN\n"); */
- dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC,
- GFP_ATOMIC);
- return 0;
- }
- }
-
- scp->persist = (HZ * decnet_time_wait);
-
- if (sk->sk_socket)
- return 0;
-
- if (time_after_eq(jiffies, scp->stamp + HZ * decnet_time_wait)) {
- dn_unhash_sock(sk);
- sock_put(sk);
- return 1;
- }
-
- return 0;
-}
-
-static void dn_destroy_sock(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- scp->nsp_rxtshift = 0; /* reset back off */
-
- if (sk->sk_socket) {
- if (sk->sk_socket->state != SS_UNCONNECTED)
- sk->sk_socket->state = SS_DISCONNECTING;
- }
-
- sk->sk_state = TCP_CLOSE;
-
- switch (scp->state) {
- case DN_DN:
- dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC,
- sk->sk_allocation);
- scp->persist_fxn = dn_destroy_timer;
- scp->persist = dn_nsp_persist(sk);
- break;
- case DN_CR:
- scp->state = DN_DR;
- goto disc_reject;
- case DN_RUN:
- scp->state = DN_DI;
- /* fall through */
- case DN_DI:
- case DN_DR:
-disc_reject:
- dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation);
- /* fall through */
- case DN_NC:
- case DN_NR:
- case DN_RJ:
- case DN_DIC:
- case DN_CN:
- case DN_DRC:
- case DN_CI:
- case DN_CD:
- scp->persist_fxn = dn_destroy_timer;
- scp->persist = dn_nsp_persist(sk);
- break;
- default:
- printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n");
- /* fall through */
- case DN_O:
- dn_stop_slow_timer(sk);
-
- dn_unhash_sock_bh(sk);
- sock_put(sk);
-
- break;
- }
-}
-
-char *dn_addr2asc(__u16 addr, char *buf)
-{
- unsigned short node, area;
-
- node = addr & 0x03ff;
- area = addr >> 10;
- sprintf(buf, "%hd.%hd", area, node);
-
- return buf;
-}
-
-
-
-static int dn_create(struct net *net, struct socket *sock, int protocol,
- int kern)
-{
- struct sock *sk;
-
- if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
- return -EINVAL;
-
- if (!net_eq(net, &init_net))
- return -EAFNOSUPPORT;
-
- switch (sock->type) {
- case SOCK_SEQPACKET:
- if (protocol != DNPROTO_NSP)
- return -EPROTONOSUPPORT;
- break;
- case SOCK_STREAM:
- break;
- default:
- return -ESOCKTNOSUPPORT;
- }
-
-
- if ((sk = dn_alloc_sock(net, sock, GFP_KERNEL, kern)) == NULL)
- return -ENOBUFS;
-
- sk->sk_protocol = protocol;
-
- return 0;
-}
-
-
-static int
-dn_release(struct socket *sock)
-{
- struct sock *sk = sock->sk;
-
- if (sk) {
- sock_orphan(sk);
- sock_hold(sk);
- lock_sock(sk);
- dn_destroy_sock(sk);
- release_sock(sk);
- sock_put(sk);
- }
-
- return 0;
-}
-
-static int dn_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- struct sockaddr_dn *saddr = (struct sockaddr_dn *)uaddr;
- struct net_device *dev, *ldev;
- int rv;
-
- if (addr_len != sizeof(struct sockaddr_dn))
- return -EINVAL;
-
- if (saddr->sdn_family != AF_DECnet)
- return -EINVAL;
-
- if (le16_to_cpu(saddr->sdn_nodeaddrl) && (le16_to_cpu(saddr->sdn_nodeaddrl) != 2))
- return -EINVAL;
-
- if (le16_to_cpu(saddr->sdn_objnamel) > DN_MAXOBJL)
- return -EINVAL;
-
- if (saddr->sdn_flags & ~SDF_WILD)
- return -EINVAL;
-
- if (!capable(CAP_NET_BIND_SERVICE) && (saddr->sdn_objnum ||
- (saddr->sdn_flags & SDF_WILD)))
- return -EACCES;
-
- if (!(saddr->sdn_flags & SDF_WILD)) {
- if (le16_to_cpu(saddr->sdn_nodeaddrl)) {
- rcu_read_lock();
- ldev = NULL;
- for_each_netdev_rcu(&init_net, dev) {
- if (!dev->dn_ptr)
- continue;
- if (dn_dev_islocal(dev, dn_saddr2dn(saddr))) {
- ldev = dev;
- break;
- }
- }
- rcu_read_unlock();
- if (ldev == NULL)
- return -EADDRNOTAVAIL;
- }
- }
-
- rv = -EINVAL;
- lock_sock(sk);
- if (sock_flag(sk, SOCK_ZAPPED)) {
- memcpy(&scp->addr, saddr, addr_len);
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- rv = dn_hash_sock(sk);
- if (rv)
- sock_set_flag(sk, SOCK_ZAPPED);
- }
- release_sock(sk);
-
- return rv;
-}
-
-
-static int dn_auto_bind(struct socket *sock)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- int rv;
-
- sock_reset_flag(sk, SOCK_ZAPPED);
-
- scp->addr.sdn_flags = 0;
- scp->addr.sdn_objnum = 0;
-
- /*
- * This stuff is to keep compatibility with Eduardo's
- * patch. I hope I can dispense with it shortly...
- */
- if ((scp->accessdata.acc_accl != 0) &&
- (scp->accessdata.acc_accl <= 12)) {
-
- scp->addr.sdn_objnamel = cpu_to_le16(scp->accessdata.acc_accl);
- memcpy(scp->addr.sdn_objname, scp->accessdata.acc_acc, le16_to_cpu(scp->addr.sdn_objnamel));
-
- scp->accessdata.acc_accl = 0;
- memset(scp->accessdata.acc_acc, 0, 40);
- }
- /* End of compatibility stuff */
-
- scp->addr.sdn_add.a_len = cpu_to_le16(2);
- rv = dn_dev_bind_default((__le16 *)scp->addr.sdn_add.a_addr);
- if (rv == 0) {
- rv = dn_hash_sock(sk);
- if (rv)
- sock_set_flag(sk, SOCK_ZAPPED);
- }
-
- return rv;
-}
-
-static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
-{
- struct dn_scp *scp = DN_SK(sk);
- DEFINE_WAIT(wait);
- int err;
-
- if (scp->state != DN_CR)
- return -EINVAL;
-
- scp->state = DN_CC;
- scp->segsize_loc = dst_metric_advmss(__sk_dst_get(sk));
- dn_send_conn_conf(sk, allocation);
-
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- for(;;) {
- release_sock(sk);
- if (scp->state == DN_CC)
- *timeo = schedule_timeout(*timeo);
- lock_sock(sk);
- err = 0;
- if (scp->state == DN_RUN)
- break;
- err = sock_error(sk);
- if (err)
- break;
- err = sock_intr_errno(*timeo);
- if (signal_pending(current))
- break;
- err = -EAGAIN;
- if (!*timeo)
- break;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- }
- finish_wait(sk_sleep(sk), &wait);
- if (err == 0) {
- sk->sk_socket->state = SS_CONNECTED;
- } else if (scp->state != DN_CC) {
- sk->sk_socket->state = SS_UNCONNECTED;
- }
- return err;
-}
-
-static int dn_wait_run(struct sock *sk, long *timeo)
-{
- struct dn_scp *scp = DN_SK(sk);
- DEFINE_WAIT(wait);
- int err = 0;
-
- if (scp->state == DN_RUN)
- goto out;
-
- if (!*timeo)
- return -EALREADY;
-
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- for(;;) {
- release_sock(sk);
- if (scp->state == DN_CI || scp->state == DN_CC)
- *timeo = schedule_timeout(*timeo);
- lock_sock(sk);
- err = 0;
- if (scp->state == DN_RUN)
- break;
- err = sock_error(sk);
- if (err)
- break;
- err = sock_intr_errno(*timeo);
- if (signal_pending(current))
- break;
- err = -ETIMEDOUT;
- if (!*timeo)
- break;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- }
- finish_wait(sk_sleep(sk), &wait);
-out:
- if (err == 0) {
- sk->sk_socket->state = SS_CONNECTED;
- } else if (scp->state != DN_CI && scp->state != DN_CC) {
- sk->sk_socket->state = SS_UNCONNECTED;
- }
- return err;
-}
-
-static int __dn_connect(struct sock *sk, struct sockaddr_dn *addr, int addrlen, long *timeo, int flags)
-{
- struct socket *sock = sk->sk_socket;
- struct dn_scp *scp = DN_SK(sk);
- int err = -EISCONN;
- struct flowidn fld;
- struct dst_entry *dst;
-
- if (sock->state == SS_CONNECTED)
- goto out;
-
- if (sock->state == SS_CONNECTING) {
- err = 0;
- if (scp->state == DN_RUN) {
- sock->state = SS_CONNECTED;
- goto out;
- }
- err = -ECONNREFUSED;
- if (scp->state != DN_CI && scp->state != DN_CC) {
- sock->state = SS_UNCONNECTED;
- goto out;
- }
- return dn_wait_run(sk, timeo);
- }
-
- err = -EINVAL;
- if (scp->state != DN_O)
- goto out;
-
- if (addr == NULL || addrlen != sizeof(struct sockaddr_dn))
- goto out;
- if (addr->sdn_family != AF_DECnet)
- goto out;
- if (addr->sdn_flags & SDF_WILD)
- goto out;
-
- if (sock_flag(sk, SOCK_ZAPPED)) {
- err = dn_auto_bind(sk->sk_socket);
- if (err)
- goto out;
- }
-
- memcpy(&scp->peer, addr, sizeof(struct sockaddr_dn));
-
- err = -EHOSTUNREACH;
- memset(&fld, 0, sizeof(fld));
- fld.flowidn_oif = sk->sk_bound_dev_if;
- fld.daddr = dn_saddr2dn(&scp->peer);
- fld.saddr = dn_saddr2dn(&scp->addr);
- dn_sk_ports_copy(&fld, scp);
- fld.flowidn_proto = DNPROTO_NSP;
- if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, flags) < 0)
- goto out;
- dst = __sk_dst_get(sk);
- sk->sk_route_caps = dst->dev->features;
- sock->state = SS_CONNECTING;
- scp->state = DN_CI;
- scp->segsize_loc = dst_metric_advmss(dst);
-
- dn_nsp_send_conninit(sk, NSP_CI);
- err = -EINPROGRESS;
- if (*timeo) {
- err = dn_wait_run(sk, timeo);
- }
-out:
- return err;
-}
-
-static int dn_connect(struct socket *sock, struct sockaddr *uaddr, int addrlen, int flags)
-{
- struct sockaddr_dn *addr = (struct sockaddr_dn *)uaddr;
- struct sock *sk = sock->sk;
- int err;
- long timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
-
- lock_sock(sk);
- err = __dn_connect(sk, addr, addrlen, &timeo, 0);
- release_sock(sk);
-
- return err;
-}
-
-static inline int dn_check_state(struct sock *sk, struct sockaddr_dn *addr, int addrlen, long *timeo, int flags)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- switch (scp->state) {
- case DN_RUN:
- return 0;
- case DN_CR:
- return dn_confirm_accept(sk, timeo, sk->sk_allocation);
- case DN_CI:
- case DN_CC:
- return dn_wait_run(sk, timeo);
- case DN_O:
- return __dn_connect(sk, addr, addrlen, timeo, flags);
- }
-
- return -EINVAL;
-}
-
-
-static void dn_access_copy(struct sk_buff *skb, struct accessdata_dn *acc)
-{
- unsigned char *ptr = skb->data;
-
- acc->acc_userl = *ptr++;
- memcpy(&acc->acc_user, ptr, acc->acc_userl);
- ptr += acc->acc_userl;
-
- acc->acc_passl = *ptr++;
- memcpy(&acc->acc_pass, ptr, acc->acc_passl);
- ptr += acc->acc_passl;
-
- acc->acc_accl = *ptr++;
- memcpy(&acc->acc_acc, ptr, acc->acc_accl);
-
- skb_pull(skb, acc->acc_accl + acc->acc_passl + acc->acc_userl + 3);
-
-}
-
-static void dn_user_copy(struct sk_buff *skb, struct optdata_dn *opt)
-{
- unsigned char *ptr = skb->data;
- u16 len = *ptr++; /* yes, it's 8bit on the wire */
-
- BUG_ON(len > 16); /* we've checked the contents earlier */
- opt->opt_optl = cpu_to_le16(len);
- opt->opt_status = 0;
- memcpy(opt->opt_data, ptr, len);
- skb_pull(skb, len + 1);
-}
-
-static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
-{
- DEFINE_WAIT(wait);
- struct sk_buff *skb = NULL;
- int err = 0;
-
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- for(;;) {
- release_sock(sk);
- skb = skb_dequeue(&sk->sk_receive_queue);
- if (skb == NULL) {
- *timeo = schedule_timeout(*timeo);
- skb = skb_dequeue(&sk->sk_receive_queue);
- }
- lock_sock(sk);
- if (skb != NULL)
- break;
- err = -EINVAL;
- if (sk->sk_state != TCP_LISTEN)
- break;
- err = sock_intr_errno(*timeo);
- if (signal_pending(current))
- break;
- err = -EAGAIN;
- if (!*timeo)
- break;
- prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- }
- finish_wait(sk_sleep(sk), &wait);
-
- return skb == NULL ? ERR_PTR(err) : skb;
-}
-
-static int dn_accept(struct socket *sock, struct socket *newsock, int flags,
- bool kern)
-{
- struct sock *sk = sock->sk, *newsk;
- struct sk_buff *skb = NULL;
- struct dn_skb_cb *cb;
- unsigned char menuver;
- int err = 0;
- unsigned char type;
- long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
- struct dst_entry *dst;
-
- lock_sock(sk);
-
- if (sk->sk_state != TCP_LISTEN || DN_SK(sk)->state != DN_O) {
- release_sock(sk);
- return -EINVAL;
- }
-
- skb = skb_dequeue(&sk->sk_receive_queue);
- if (skb == NULL) {
- skb = dn_wait_for_connect(sk, &timeo);
- if (IS_ERR(skb)) {
- release_sock(sk);
- return PTR_ERR(skb);
- }
- }
-
- cb = DN_SKB_CB(skb);
- sk->sk_ack_backlog--;
- newsk = dn_alloc_sock(sock_net(sk), newsock, sk->sk_allocation, kern);
- if (newsk == NULL) {
- release_sock(sk);
- kfree_skb(skb);
- return -ENOBUFS;
- }
- release_sock(sk);
-
- dst = skb_dst(skb);
- sk_dst_set(newsk, dst);
- skb_dst_set(skb, NULL);
-
- DN_SK(newsk)->state = DN_CR;
- DN_SK(newsk)->addrrem = cb->src_port;
- DN_SK(newsk)->services_rem = cb->services;
- DN_SK(newsk)->info_rem = cb->info;
- DN_SK(newsk)->segsize_rem = cb->segsize;
- DN_SK(newsk)->accept_mode = DN_SK(sk)->accept_mode;
-
- if (DN_SK(newsk)->segsize_rem < 230)
- DN_SK(newsk)->segsize_rem = 230;
-
- if ((DN_SK(newsk)->services_rem & NSP_FC_MASK) == NSP_FC_NONE)
- DN_SK(newsk)->max_window = decnet_no_fc_max_cwnd;
-
- newsk->sk_state = TCP_LISTEN;
- memcpy(&(DN_SK(newsk)->addr), &(DN_SK(sk)->addr), sizeof(struct sockaddr_dn));
-
- /*
- * If we are listening on a wild socket, we don't want
- * the newly created socket on the wrong hash queue.
- */
- DN_SK(newsk)->addr.sdn_flags &= ~SDF_WILD;
-
- skb_pull(skb, dn_username2sockaddr(skb->data, skb->len, &(DN_SK(newsk)->addr), &type));
- skb_pull(skb, dn_username2sockaddr(skb->data, skb->len, &(DN_SK(newsk)->peer), &type));
- *(__le16 *)(DN_SK(newsk)->peer.sdn_add.a_addr) = cb->src;
- *(__le16 *)(DN_SK(newsk)->addr.sdn_add.a_addr) = cb->dst;
-
- menuver = *skb->data;
- skb_pull(skb, 1);
-
- if (menuver & DN_MENUVER_ACC)
- dn_access_copy(skb, &(DN_SK(newsk)->accessdata));
-
- if (menuver & DN_MENUVER_USR)
- dn_user_copy(skb, &(DN_SK(newsk)->conndata_in));
-
- if (menuver & DN_MENUVER_PRX)
- DN_SK(newsk)->peer.sdn_flags |= SDF_PROXY;
-
- if (menuver & DN_MENUVER_UIC)
- DN_SK(newsk)->peer.sdn_flags |= SDF_UICPROXY;
-
- kfree_skb(skb);
-
- memcpy(&(DN_SK(newsk)->conndata_out), &(DN_SK(sk)->conndata_out),
- sizeof(struct optdata_dn));
- memcpy(&(DN_SK(newsk)->discdata_out), &(DN_SK(sk)->discdata_out),
- sizeof(struct optdata_dn));
-
- lock_sock(newsk);
- err = dn_hash_sock(newsk);
- if (err == 0) {
- sock_reset_flag(newsk, SOCK_ZAPPED);
- dn_send_conn_ack(newsk);
-
- /*
- * Here we use sk->sk_allocation since although the conn conf is
- * for the newsk, the context is the old socket.
- */
- if (DN_SK(newsk)->accept_mode == ACC_IMMED)
- err = dn_confirm_accept(newsk, &timeo,
- sk->sk_allocation);
- }
- release_sock(newsk);
- return err;
-}
-
-
-static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer)
-{
- struct sockaddr_dn *sa = (struct sockaddr_dn *)uaddr;
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
-
- lock_sock(sk);
-
- if (peer) {
- if ((sock->state != SS_CONNECTED &&
- sock->state != SS_CONNECTING) &&
- scp->accept_mode == ACC_IMMED) {
- release_sock(sk);
- return -ENOTCONN;
- }
-
- memcpy(sa, &scp->peer, sizeof(struct sockaddr_dn));
- } else {
- memcpy(sa, &scp->addr, sizeof(struct sockaddr_dn));
- }
-
- release_sock(sk);
-
- return sizeof(struct sockaddr_dn);
-}
-
-
-static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- __poll_t mask = datagram_poll(file, sock, wait);
-
- if (!skb_queue_empty_lockless(&scp->other_receive_queue))
- mask |= EPOLLRDBAND;
-
- return mask;
-}
-
-static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- int err = -EOPNOTSUPP;
- long amount = 0;
- struct sk_buff *skb;
- int val;
-
- switch(cmd)
- {
- case SIOCGIFADDR:
- case SIOCSIFADDR:
- return dn_dev_ioctl(cmd, (void __user *)arg);
-
- case SIOCATMARK:
- lock_sock(sk);
- val = !skb_queue_empty(&scp->other_receive_queue);
- if (scp->state != DN_RUN)
- val = -ENOTCONN;
- release_sock(sk);
- return val;
-
- case TIOCOUTQ:
- amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk);
- if (amount < 0)
- amount = 0;
- err = put_user(amount, (int __user *)arg);
- break;
-
- case TIOCINQ:
- lock_sock(sk);
- skb = skb_peek(&scp->other_receive_queue);
- if (skb) {
- amount = skb->len;
- } else {
- skb_queue_walk(&sk->sk_receive_queue, skb)
- amount += skb->len;
- }
- release_sock(sk);
- err = put_user(amount, (int __user *)arg);
- break;
-
- default:
- err = -ENOIOCTLCMD;
- break;
- }
-
- return err;
-}
-
-static int dn_listen(struct socket *sock, int backlog)
-{
- struct sock *sk = sock->sk;
- int err = -EINVAL;
-
- lock_sock(sk);
-
- if (sock_flag(sk, SOCK_ZAPPED))
- goto out;
-
- if ((DN_SK(sk)->state != DN_O) || (sk->sk_state == TCP_LISTEN))
- goto out;
-
- sk->sk_max_ack_backlog = backlog;
- sk->sk_ack_backlog = 0;
- sk->sk_state = TCP_LISTEN;
- err = 0;
- dn_rehash_sock(sk);
-
-out:
- release_sock(sk);
-
- return err;
-}
-
-
-static int dn_shutdown(struct socket *sock, int how)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- int err = -ENOTCONN;
-
- lock_sock(sk);
-
- if (sock->state == SS_UNCONNECTED)
- goto out;
-
- err = 0;
- if (sock->state == SS_DISCONNECTING)
- goto out;
-
- err = -EINVAL;
- if (scp->state == DN_O)
- goto out;
-
- if (how != SHUT_RDWR)
- goto out;
-
- sk->sk_shutdown = SHUTDOWN_MASK;
- dn_destroy_sock(sk);
- err = 0;
-
-out:
- release_sock(sk);
-
- return err;
-}
-
-static int dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
-{
- struct sock *sk = sock->sk;
- int err;
-
- lock_sock(sk);
- err = __dn_setsockopt(sock, level, optname, optval, optlen, 0);
- release_sock(sk);
-#ifdef CONFIG_NETFILTER
- /* we need to exclude all possible ENOPROTOOPTs except default case */
- if (err == -ENOPROTOOPT && optname != DSO_LINKINFO &&
- optname != DSO_STREAM && optname != DSO_SEQPACKET)
- err = nf_setsockopt(sk, PF_DECnet, optname, optval, optlen);
-#endif
-
- return err;
-}
-
-static int __dn_setsockopt(struct socket *sock, int level,int optname, char __user *optval, unsigned int optlen, int flags)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- long timeo;
- union {
- struct optdata_dn opt;
- struct accessdata_dn acc;
- int mode;
- unsigned long win;
- int val;
- unsigned char services;
- unsigned char info;
- } u;
- int err;
-
- if (optlen && !optval)
- return -EINVAL;
-
- if (optlen > sizeof(u))
- return -EINVAL;
-
- if (copy_from_user(&u, optval, optlen))
- return -EFAULT;
-
- switch (optname) {
- case DSO_CONDATA:
- if (sock->state == SS_CONNECTED)
- return -EISCONN;
- if ((scp->state != DN_O) && (scp->state != DN_CR))
- return -EINVAL;
-
- if (optlen != sizeof(struct optdata_dn))
- return -EINVAL;
-
- if (le16_to_cpu(u.opt.opt_optl) > 16)
- return -EINVAL;
-
- memcpy(&scp->conndata_out, &u.opt, optlen);
- break;
-
- case DSO_DISDATA:
- if (sock->state != SS_CONNECTED &&
- scp->accept_mode == ACC_IMMED)
- return -ENOTCONN;
-
- if (optlen != sizeof(struct optdata_dn))
- return -EINVAL;
-
- if (le16_to_cpu(u.opt.opt_optl) > 16)
- return -EINVAL;
-
- memcpy(&scp->discdata_out, &u.opt, optlen);
- break;
-
- case DSO_CONACCESS:
- if (sock->state == SS_CONNECTED)
- return -EISCONN;
- if (scp->state != DN_O)
- return -EINVAL;
-
- if (optlen != sizeof(struct accessdata_dn))
- return -EINVAL;
-
- if ((u.acc.acc_accl > DN_MAXACCL) ||
- (u.acc.acc_passl > DN_MAXACCL) ||
- (u.acc.acc_userl > DN_MAXACCL))
- return -EINVAL;
-
- memcpy(&scp->accessdata, &u.acc, optlen);
- break;
-
- case DSO_ACCEPTMODE:
- if (sock->state == SS_CONNECTED)
- return -EISCONN;
- if (scp->state != DN_O)
- return -EINVAL;
-
- if (optlen != sizeof(int))
- return -EINVAL;
-
- if ((u.mode != ACC_IMMED) && (u.mode != ACC_DEFER))
- return -EINVAL;
-
- scp->accept_mode = (unsigned char)u.mode;
- break;
-
- case DSO_CONACCEPT:
- if (scp->state != DN_CR)
- return -EINVAL;
- timeo = sock_rcvtimeo(sk, 0);
- err = dn_confirm_accept(sk, &timeo, sk->sk_allocation);
- return err;
-
- case DSO_CONREJECT:
- if (scp->state != DN_CR)
- return -EINVAL;
-
- scp->state = DN_DR;
- sk->sk_shutdown = SHUTDOWN_MASK;
- dn_nsp_send_disc(sk, 0x38, 0, sk->sk_allocation);
- break;
-
- case DSO_MAXWINDOW:
- if (optlen != sizeof(unsigned long))
- return -EINVAL;
- if (u.win > NSP_MAX_WINDOW)
- u.win = NSP_MAX_WINDOW;
- if (u.win == 0)
- return -EINVAL;
- scp->max_window = u.win;
- if (scp->snd_window > u.win)
- scp->snd_window = u.win;
- break;
-
- case DSO_NODELAY:
- if (optlen != sizeof(int))
- return -EINVAL;
- if (scp->nonagle == TCP_NAGLE_CORK)
- return -EINVAL;
- scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_OFF;
- /* if (scp->nonagle == 1) { Push pending frames } */
- break;
-
- case DSO_CORK:
- if (optlen != sizeof(int))
- return -EINVAL;
- if (scp->nonagle == TCP_NAGLE_OFF)
- return -EINVAL;
- scp->nonagle = (u.val == 0) ? 0 : TCP_NAGLE_CORK;
- /* if (scp->nonagle == 0) { Push pending frames } */
- break;
-
- case DSO_SERVICES:
- if (optlen != sizeof(unsigned char))
- return -EINVAL;
- if ((u.services & ~NSP_FC_MASK) != 0x01)
- return -EINVAL;
- if ((u.services & NSP_FC_MASK) == NSP_FC_MASK)
- return -EINVAL;
- scp->services_loc = u.services;
- break;
-
- case DSO_INFO:
- if (optlen != sizeof(unsigned char))
- return -EINVAL;
- if (u.info & 0xfc)
- return -EINVAL;
- scp->info_loc = u.info;
- break;
-
- case DSO_LINKINFO:
- case DSO_STREAM:
- case DSO_SEQPACKET:
- default:
- return -ENOPROTOOPT;
- }
-
- return 0;
-}
-
-static int dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
-{
- struct sock *sk = sock->sk;
- int err;
-
- lock_sock(sk);
- err = __dn_getsockopt(sock, level, optname, optval, optlen, 0);
- release_sock(sk);
-#ifdef CONFIG_NETFILTER
- if (err == -ENOPROTOOPT && optname != DSO_STREAM &&
- optname != DSO_SEQPACKET && optname != DSO_CONACCEPT &&
- optname != DSO_CONREJECT) {
- int len;
-
- if (get_user(len, optlen))
- return -EFAULT;
-
- err = nf_getsockopt(sk, PF_DECnet, optname, optval, &len);
- if (err >= 0)
- err = put_user(len, optlen);
- }
-#endif
-
- return err;
-}
-
-static int __dn_getsockopt(struct socket *sock, int level,int optname, char __user *optval,int __user *optlen, int flags)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- struct linkinfo_dn link;
- unsigned int r_len;
- void *r_data = NULL;
- unsigned int val;
-
- if(get_user(r_len , optlen))
- return -EFAULT;
-
- switch (optname) {
- case DSO_CONDATA:
- if (r_len > sizeof(struct optdata_dn))
- r_len = sizeof(struct optdata_dn);
- r_data = &scp->conndata_in;
- break;
-
- case DSO_DISDATA:
- if (r_len > sizeof(struct optdata_dn))
- r_len = sizeof(struct optdata_dn);
- r_data = &scp->discdata_in;
- break;
-
- case DSO_CONACCESS:
- if (r_len > sizeof(struct accessdata_dn))
- r_len = sizeof(struct accessdata_dn);
- r_data = &scp->accessdata;
- break;
-
- case DSO_ACCEPTMODE:
- if (r_len > sizeof(unsigned char))
- r_len = sizeof(unsigned char);
- r_data = &scp->accept_mode;
- break;
-
- case DSO_LINKINFO:
- if (r_len > sizeof(struct linkinfo_dn))
- r_len = sizeof(struct linkinfo_dn);
-
- memset(&link, 0, sizeof(link));
-
- switch (sock->state) {
- case SS_CONNECTING:
- link.idn_linkstate = LL_CONNECTING;
- break;
- case SS_DISCONNECTING:
- link.idn_linkstate = LL_DISCONNECTING;
- break;
- case SS_CONNECTED:
- link.idn_linkstate = LL_RUNNING;
- break;
- default:
- link.idn_linkstate = LL_INACTIVE;
- }
-
- link.idn_segsize = scp->segsize_rem;
- r_data = &link;
- break;
-
- case DSO_MAXWINDOW:
- if (r_len > sizeof(unsigned long))
- r_len = sizeof(unsigned long);
- r_data = &scp->max_window;
- break;
-
- case DSO_NODELAY:
- if (r_len > sizeof(int))
- r_len = sizeof(int);
- val = (scp->nonagle == TCP_NAGLE_OFF);
- r_data = &val;
- break;
-
- case DSO_CORK:
- if (r_len > sizeof(int))
- r_len = sizeof(int);
- val = (scp->nonagle == TCP_NAGLE_CORK);
- r_data = &val;
- break;
-
- case DSO_SERVICES:
- if (r_len > sizeof(unsigned char))
- r_len = sizeof(unsigned char);
- r_data = &scp->services_rem;
- break;
-
- case DSO_INFO:
- if (r_len > sizeof(unsigned char))
- r_len = sizeof(unsigned char);
- r_data = &scp->info_rem;
- break;
-
- case DSO_STREAM:
- case DSO_SEQPACKET:
- case DSO_CONACCEPT:
- case DSO_CONREJECT:
- default:
- return -ENOPROTOOPT;
- }
-
- if (r_data) {
- if (copy_to_user(optval, r_data, r_len))
- return -EFAULT;
- if (put_user(r_len, optlen))
- return -EFAULT;
- }
-
- return 0;
-}
-
-
-static int dn_data_ready(struct sock *sk, struct sk_buff_head *q, int flags, int target)
-{
- struct sk_buff *skb;
- int len = 0;
-
- if (flags & MSG_OOB)
- return !skb_queue_empty(q) ? 1 : 0;
-
- skb_queue_walk(q, skb) {
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- len += skb->len;
-
- if (cb->nsp_flags & 0x40) {
- /* SOCK_SEQPACKET reads to EOM */
- if (sk->sk_type == SOCK_SEQPACKET)
- return 1;
- /* so does SOCK_STREAM unless WAITALL is specified */
- if (!(flags & MSG_WAITALL))
- return 1;
- }
-
- /* minimum data length for read exceeded */
- if (len >= target)
- return 1;
- }
-
- return 0;
-}
-
-
-static int dn_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
- int flags)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- struct sk_buff_head *queue = &sk->sk_receive_queue;
- size_t target = size > 1 ? 1 : 0;
- size_t copied = 0;
- int rv = 0;
- struct sk_buff *skb, *n;
- struct dn_skb_cb *cb = NULL;
- unsigned char eor = 0;
- long timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
-
- lock_sock(sk);
-
- if (sock_flag(sk, SOCK_ZAPPED)) {
- rv = -EADDRNOTAVAIL;
- goto out;
- }
-
- if (sk->sk_shutdown & RCV_SHUTDOWN) {
- rv = 0;
- goto out;
- }
-
- rv = dn_check_state(sk, NULL, 0, &timeo, flags);
- if (rv)
- goto out;
-
- if (flags & ~(MSG_CMSG_COMPAT|MSG_PEEK|MSG_OOB|MSG_WAITALL|MSG_DONTWAIT|MSG_NOSIGNAL)) {
- rv = -EOPNOTSUPP;
- goto out;
- }
-
- if (flags & MSG_OOB)
- queue = &scp->other_receive_queue;
-
- if (flags & MSG_WAITALL)
- target = size;
-
-
- /*
- * See if there is data ready to read, sleep if there isn't
- */
- for(;;) {
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
-
- if (sk->sk_err)
- goto out;
-
- if (!skb_queue_empty(&scp->other_receive_queue)) {
- if (!(flags & MSG_OOB)) {
- msg->msg_flags |= MSG_OOB;
- if (!scp->other_report) {
- scp->other_report = 1;
- goto out;
- }
- }
- }
-
- if (scp->state != DN_RUN)
- goto out;
-
- if (signal_pending(current)) {
- rv = sock_intr_errno(timeo);
- goto out;
- }
-
- if (dn_data_ready(sk, queue, flags, target))
- break;
-
- if (flags & MSG_DONTWAIT) {
- rv = -EWOULDBLOCK;
- goto out;
- }
-
- add_wait_queue(sk_sleep(sk), &wait);
- sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target), &wait);
- sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- remove_wait_queue(sk_sleep(sk), &wait);
- }
-
- skb_queue_walk_safe(queue, skb, n) {
- unsigned int chunk = skb->len;
- cb = DN_SKB_CB(skb);
-
- if ((chunk + copied) > size)
- chunk = size - copied;
-
- if (memcpy_to_msg(msg, skb->data, chunk)) {
- rv = -EFAULT;
- break;
- }
- copied += chunk;
-
- if (!(flags & MSG_PEEK))
- skb_pull(skb, chunk);
-
- eor = cb->nsp_flags & 0x40;
-
- if (skb->len == 0) {
- skb_unlink(skb, queue);
- kfree_skb(skb);
- /*
- * N.B. Don't refer to skb or cb after this point
- * in loop.
- */
- if ((scp->flowloc_sw == DN_DONTSEND) && !dn_congested(sk)) {
- scp->flowloc_sw = DN_SEND;
- dn_nsp_send_link(sk, DN_SEND, 0);
- }
- }
-
- if (eor) {
- if (sk->sk_type == SOCK_SEQPACKET)
- break;
- if (!(flags & MSG_WAITALL))
- break;
- }
-
- if (flags & MSG_OOB)
- break;
-
- if (copied >= target)
- break;
- }
-
- rv = copied;
-
-
- if (eor && (sk->sk_type == SOCK_SEQPACKET))
- msg->msg_flags |= MSG_EOR;
-
-out:
- if (rv == 0)
- rv = (flags & MSG_PEEK) ? -sk->sk_err : sock_error(sk);
-
- if ((rv >= 0) && msg->msg_name) {
- __sockaddr_check_size(sizeof(struct sockaddr_dn));
- memcpy(msg->msg_name, &scp->peer, sizeof(struct sockaddr_dn));
- msg->msg_namelen = sizeof(struct sockaddr_dn);
- }
-
- release_sock(sk);
-
- return rv;
-}
-
-
-static inline int dn_queue_too_long(struct dn_scp *scp, struct sk_buff_head *queue, int flags)
-{
- unsigned char fctype = scp->services_rem & NSP_FC_MASK;
- if (skb_queue_len(queue) >= scp->snd_window)
- return 1;
- if (fctype != NSP_FC_NONE) {
- if (flags & MSG_OOB) {
- if (scp->flowrem_oth == 0)
- return 1;
- } else {
- if (scp->flowrem_dat == 0)
- return 1;
- }
- }
- return 0;
-}
-
-/*
- * The DECnet spec requires that the "routing layer" accepts packets which
- * are at least 230 bytes in size. This excludes any headers which the NSP
- * layer might add, so we always assume that we'll be using the maximal
- * length header on data packets. The variation in length is due to the
- * inclusion (or not) of the two 16 bit acknowledgement fields so it doesn't
- * make much practical difference.
- */
-unsigned int dn_mss_from_pmtu(struct net_device *dev, int mtu)
-{
- unsigned int mss = 230 - DN_MAX_NSP_DATA_HEADER;
- if (dev) {
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
- mtu -= LL_RESERVED_SPACE(dev);
- if (dn_db->use_long)
- mtu -= 21;
- else
- mtu -= 6;
- mtu -= DN_MAX_NSP_DATA_HEADER;
- } else {
- /*
- * 21 = long header, 16 = guess at MAC header length
- */
- mtu -= (21 + DN_MAX_NSP_DATA_HEADER + 16);
- }
- if (mtu > mss)
- mss = mtu;
- return mss;
-}
-
-static inline unsigned int dn_current_mss(struct sock *sk, int flags)
-{
- struct dst_entry *dst = __sk_dst_get(sk);
- struct dn_scp *scp = DN_SK(sk);
- int mss_now = min_t(int, scp->segsize_loc, scp->segsize_rem);
-
- /* Other data messages are limited to 16 bytes per packet */
- if (flags & MSG_OOB)
- return 16;
-
- /* This works out the maximum size of segment we can send out */
- if (dst) {
- u32 mtu = dst_mtu(dst);
- mss_now = min_t(int, dn_mss_from_pmtu(dst->dev, mtu), mss_now);
- }
-
- return mss_now;
-}
-
-/*
- * N.B. We get the timeout wrong here, but then we always did get it
- * wrong before and this is another step along the road to correcting
- * it. It ought to get updated each time we pass through the routine,
- * but in practise it probably doesn't matter too much for now.
- */
-static inline struct sk_buff *dn_alloc_send_pskb(struct sock *sk,
- unsigned long datalen, int noblock,
- int *errcode)
-{
- struct sk_buff *skb = sock_alloc_send_skb(sk, datalen,
- noblock, errcode);
- if (skb) {
- skb->protocol = htons(ETH_P_DNA_RT);
- skb->pkt_type = PACKET_OUTGOING;
- }
- return skb;
-}
-
-static int dn_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
-{
- struct sock *sk = sock->sk;
- struct dn_scp *scp = DN_SK(sk);
- size_t mss;
- struct sk_buff_head *queue = &scp->data_xmit_queue;
- int flags = msg->msg_flags;
- int err = 0;
- size_t sent = 0;
- int addr_len = msg->msg_namelen;
- DECLARE_SOCKADDR(struct sockaddr_dn *, addr, msg->msg_name);
- struct sk_buff *skb = NULL;
- struct dn_skb_cb *cb;
- size_t len;
- unsigned char fctype;
- long timeo;
-
- if (flags & ~(MSG_TRYHARD|MSG_OOB|MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL|MSG_MORE|MSG_CMSG_COMPAT))
- return -EOPNOTSUPP;
-
- if (addr_len && (addr_len != sizeof(struct sockaddr_dn)))
- return -EINVAL;
-
- lock_sock(sk);
- timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
- /*
- * The only difference between stream sockets and sequenced packet
- * sockets is that the stream sockets always behave as if MSG_EOR
- * has been set.
- */
- if (sock->type == SOCK_STREAM) {
- if (flags & MSG_EOR) {
- err = -EINVAL;
- goto out;
- }
- flags |= MSG_EOR;
- }
-
-
- err = dn_check_state(sk, addr, addr_len, &timeo, flags);
- if (err)
- goto out_err;
-
- if (sk->sk_shutdown & SEND_SHUTDOWN) {
- err = -EPIPE;
- if (!(flags & MSG_NOSIGNAL))
- send_sig(SIGPIPE, current, 0);
- goto out_err;
- }
-
- if ((flags & MSG_TRYHARD) && sk->sk_dst_cache)
- dst_negative_advice(sk);
-
- mss = scp->segsize_rem;
- fctype = scp->services_rem & NSP_FC_MASK;
-
- mss = dn_current_mss(sk, flags);
-
- if (flags & MSG_OOB) {
- queue = &scp->other_xmit_queue;
- if (size > mss) {
- err = -EMSGSIZE;
- goto out;
- }
- }
-
- scp->persist_fxn = dn_nsp_xmit_timeout;
-
- while(sent < size) {
- err = sock_error(sk);
- if (err)
- goto out;
-
- if (signal_pending(current)) {
- err = sock_intr_errno(timeo);
- goto out;
- }
-
- /*
- * Calculate size that we wish to send.
- */
- len = size - sent;
-
- if (len > mss)
- len = mss;
-
- /*
- * Wait for queue size to go down below the window
- * size.
- */
- if (dn_queue_too_long(scp, queue, flags)) {
- DEFINE_WAIT_FUNC(wait, woken_wake_function);
-
- if (flags & MSG_DONTWAIT) {
- err = -EWOULDBLOCK;
- goto out;
- }
-
- add_wait_queue(sk_sleep(sk), &wait);
- sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- sk_wait_event(sk, &timeo,
- !dn_queue_too_long(scp, queue, flags), &wait);
- sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
- remove_wait_queue(sk_sleep(sk), &wait);
- continue;
- }
-
- /*
- * Get a suitably sized skb.
- * 64 is a bit of a hack really, but its larger than any
- * link-layer headers and has served us well as a good
- * guess as to their real length.
- */
- skb = dn_alloc_send_pskb(sk, len + 64 + DN_MAX_NSP_DATA_HEADER,
- flags & MSG_DONTWAIT, &err);
-
- if (err)
- break;
-
- if (!skb)
- continue;
-
- cb = DN_SKB_CB(skb);
-
- skb_reserve(skb, 64 + DN_MAX_NSP_DATA_HEADER);
-
- if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
- err = -EFAULT;
- goto out;
- }
-
- if (flags & MSG_OOB) {
- cb->nsp_flags = 0x30;
- if (fctype != NSP_FC_NONE)
- scp->flowrem_oth--;
- } else {
- cb->nsp_flags = 0x00;
- if (scp->seg_total == 0)
- cb->nsp_flags |= 0x20;
-
- scp->seg_total += len;
-
- if (((sent + len) == size) && (flags & MSG_EOR)) {
- cb->nsp_flags |= 0x40;
- scp->seg_total = 0;
- if (fctype == NSP_FC_SCMC)
- scp->flowrem_dat--;
- }
- if (fctype == NSP_FC_SRC)
- scp->flowrem_dat--;
- }
-
- sent += len;
- dn_nsp_queue_xmit(sk, skb, sk->sk_allocation, flags & MSG_OOB);
- skb = NULL;
-
- scp->persist = dn_nsp_persist(sk);
-
- }
-out:
-
- kfree_skb(skb);
-
- release_sock(sk);
-
- return sent ? sent : err;
-
-out_err:
- err = sk_stream_error(sk, flags, err);
- release_sock(sk);
- return err;
-}
-
-static int dn_device_event(struct notifier_block *this, unsigned long event,
- void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-
- switch (event) {
- case NETDEV_UP:
- dn_dev_up(dev);
- break;
- case NETDEV_DOWN:
- dn_dev_down(dev);
- break;
- default:
- break;
- }
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block dn_dev_notifier = {
- .notifier_call = dn_device_event,
-};
-
-static struct packet_type dn_dix_packet_type __read_mostly = {
- .type = cpu_to_be16(ETH_P_DNA_RT),
- .func = dn_route_rcv,
-};
-
-#ifdef CONFIG_PROC_FS
-struct dn_iter_state {
- int bucket;
-};
-
-static struct sock *dn_socket_get_first(struct seq_file *seq)
-{
- struct dn_iter_state *state = seq->private;
- struct sock *n = NULL;
-
- for(state->bucket = 0;
- state->bucket < DN_SK_HASH_SIZE;
- ++state->bucket) {
- n = sk_head(&dn_sk_hash[state->bucket]);
- if (n)
- break;
- }
-
- return n;
-}
-
-static struct sock *dn_socket_get_next(struct seq_file *seq,
- struct sock *n)
-{
- struct dn_iter_state *state = seq->private;
-
- n = sk_next(n);
-try_again:
- if (n)
- goto out;
- if (++state->bucket >= DN_SK_HASH_SIZE)
- goto out;
- n = sk_head(&dn_sk_hash[state->bucket]);
- goto try_again;
-out:
- return n;
-}
-
-static struct sock *socket_get_idx(struct seq_file *seq, loff_t *pos)
-{
- struct sock *sk = dn_socket_get_first(seq);
-
- if (sk) {
- while(*pos && (sk = dn_socket_get_next(seq, sk)))
- --*pos;
- }
- return *pos ? NULL : sk;
-}
-
-static void *dn_socket_get_idx(struct seq_file *seq, loff_t pos)
-{
- void *rc;
- read_lock_bh(&dn_hash_lock);
- rc = socket_get_idx(seq, &pos);
- if (!rc) {
- read_unlock_bh(&dn_hash_lock);
- }
- return rc;
-}
-
-static void *dn_socket_seq_start(struct seq_file *seq, loff_t *pos)
-{
- return *pos ? dn_socket_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
-}
-
-static void *dn_socket_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- void *rc;
-
- if (v == SEQ_START_TOKEN) {
- rc = dn_socket_get_idx(seq, 0);
- goto out;
- }
-
- rc = dn_socket_get_next(seq, v);
- if (rc)
- goto out;
- read_unlock_bh(&dn_hash_lock);
-out:
- ++*pos;
- return rc;
-}
-
-static void dn_socket_seq_stop(struct seq_file *seq, void *v)
-{
- if (v && v != SEQ_START_TOKEN)
- read_unlock_bh(&dn_hash_lock);
-}
-
-#define IS_NOT_PRINTABLE(x) ((x) < 32 || (x) > 126)
-
-static void dn_printable_object(struct sockaddr_dn *dn, unsigned char *buf)
-{
- int i;
-
- switch (le16_to_cpu(dn->sdn_objnamel)) {
- case 0:
- sprintf(buf, "%d", dn->sdn_objnum);
- break;
- default:
- for (i = 0; i < le16_to_cpu(dn->sdn_objnamel); i++) {
- buf[i] = dn->sdn_objname[i];
- if (IS_NOT_PRINTABLE(buf[i]))
- buf[i] = '.';
- }
- buf[i] = 0;
- }
-}
-
-static char *dn_state2asc(unsigned char state)
-{
- switch (state) {
- case DN_O:
- return "OPEN";
- case DN_CR:
- return " CR";
- case DN_DR:
- return " DR";
- case DN_DRC:
- return " DRC";
- case DN_CC:
- return " CC";
- case DN_CI:
- return " CI";
- case DN_NR:
- return " NR";
- case DN_NC:
- return " NC";
- case DN_CD:
- return " CD";
- case DN_RJ:
- return " RJ";
- case DN_RUN:
- return " RUN";
- case DN_DI:
- return " DI";
- case DN_DIC:
- return " DIC";
- case DN_DN:
- return " DN";
- case DN_CL:
- return " CL";
- case DN_CN:
- return " CN";
- }
-
- return "????";
-}
-
-static inline void dn_socket_format_entry(struct seq_file *seq, struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
- char buf1[DN_ASCBUF_LEN];
- char buf2[DN_ASCBUF_LEN];
- char local_object[DN_MAXOBJL+3];
- char remote_object[DN_MAXOBJL+3];
-
- dn_printable_object(&scp->addr, local_object);
- dn_printable_object(&scp->peer, remote_object);
-
- seq_printf(seq,
- "%6s/%04X %04d:%04d %04d:%04d %01d %-16s "
- "%6s/%04X %04d:%04d %04d:%04d %01d %-16s %4s %s\n",
- dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->addr)), buf1),
- scp->addrloc,
- scp->numdat,
- scp->numoth,
- scp->ackxmt_dat,
- scp->ackxmt_oth,
- scp->flowloc_sw,
- local_object,
- dn_addr2asc(le16_to_cpu(dn_saddr2dn(&scp->peer)), buf2),
- scp->addrrem,
- scp->numdat_rcv,
- scp->numoth_rcv,
- scp->ackrcv_dat,
- scp->ackrcv_oth,
- scp->flowrem_sw,
- remote_object,
- dn_state2asc(scp->state),
- ((scp->accept_mode == ACC_IMMED) ? "IMMED" : "DEFER"));
-}
-
-static int dn_socket_seq_show(struct seq_file *seq, void *v)
-{
- if (v == SEQ_START_TOKEN) {
- seq_puts(seq, "Local Remote\n");
- } else {
- dn_socket_format_entry(seq, v);
- }
- return 0;
-}
-
-static const struct seq_operations dn_socket_seq_ops = {
- .start = dn_socket_seq_start,
- .next = dn_socket_seq_next,
- .stop = dn_socket_seq_stop,
- .show = dn_socket_seq_show,
-};
-#endif
-
-static const struct net_proto_family dn_family_ops = {
- .family = AF_DECnet,
- .create = dn_create,
- .owner = THIS_MODULE,
-};
-
-static const struct proto_ops dn_proto_ops = {
- .family = AF_DECnet,
- .owner = THIS_MODULE,
- .release = dn_release,
- .bind = dn_bind,
- .connect = dn_connect,
- .socketpair = sock_no_socketpair,
- .accept = dn_accept,
- .getname = dn_getname,
- .poll = dn_poll,
- .ioctl = dn_ioctl,
- .listen = dn_listen,
- .shutdown = dn_shutdown,
- .setsockopt = dn_setsockopt,
- .getsockopt = dn_getsockopt,
- .sendmsg = dn_sendmsg,
- .recvmsg = dn_recvmsg,
- .mmap = sock_no_mmap,
- .sendpage = sock_no_sendpage,
-};
-
-MODULE_DESCRIPTION("The Linux DECnet Network Protocol");
-MODULE_AUTHOR("Linux DECnet Project Team");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NETPROTO(PF_DECnet);
-
-static const char banner[] __initconst = KERN_INFO
-"NET4: DECnet for Linux: V.2.5.68s (C) 1995-2003 Linux DECnet Project Team\n";
-
-static int __init decnet_init(void)
-{
- int rc;
-
- printk(banner);
-
- rc = proto_register(&dn_proto, 1);
- if (rc != 0)
- goto out;
-
- dn_neigh_init();
- dn_dev_init();
- dn_route_init();
- dn_fib_init();
-
- sock_register(&dn_family_ops);
- dev_add_pack(&dn_dix_packet_type);
- register_netdevice_notifier(&dn_dev_notifier);
-
- proc_create_seq_private("decnet", 0444, init_net.proc_net,
- &dn_socket_seq_ops, sizeof(struct dn_iter_state),
- NULL);
- dn_register_sysctl();
-out:
- return rc;
-
-}
-module_init(decnet_init);
-
-/*
- * Prevent DECnet module unloading until its fixed properly.
- * Requires an audit of the code to check for memory leaks and
- * initialisation problems etc.
- */
-#if 0
-static void __exit decnet_exit(void)
-{
- sock_unregister(AF_DECnet);
- rtnl_unregister_all(PF_DECnet);
- dev_remove_pack(&dn_dix_packet_type);
-
- dn_unregister_sysctl();
-
- unregister_netdevice_notifier(&dn_dev_notifier);
-
- dn_route_cleanup();
- dn_dev_cleanup();
- dn_neigh_cleanup();
- dn_fib_cleanup();
-
- remove_proc_entry("decnet", init_net.proc_net);
-
- proto_unregister(&dn_proto);
-
- rcu_barrier_bh(); /* Wait for completion of call_rcu_bh()'s */
-}
-module_exit(decnet_exit);
-#endif
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
deleted file mode 100644
index 3235540f6adf..000000000000
--- a/net/decnet/dn_dev.c
+++ /dev/null
@@ -1,1438 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Device Layer
- *
- * Authors: Steve Whitehouse <SteveW@ACM.org>
- * Eduardo Marcelo Serrat <emserrat@geocities.com>
- *
- * Changes:
- * Steve Whitehouse : Devices now see incoming frames so they
- * can mark on who it came from.
- * Steve Whitehouse : Fixed bug in creating neighbours. Each neighbour
- * can now have a device specific setup func.
- * Steve Whitehouse : Added /proc/sys/net/decnet/conf/<dev>/
- * Steve Whitehouse : Fixed bug which sometimes killed timer
- * Steve Whitehouse : Multiple ifaddr support
- * Steve Whitehouse : SIOCGIFCONF is now a compile time option
- * Steve Whitehouse : /proc/sys/net/decnet/conf/<sys>/forwarding
- * Steve Whitehouse : Removed timer1 - it's a user space issue now
- * Patrick Caulfield : Fixed router hello message format
- * Steve Whitehouse : Got rid of constant sizes for blksize for
- * devices. All mtu based now.
- */
-
-#include <linux/capability.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/init.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/if_addr.h>
-#include <linux/if_arp.h>
-#include <linux/if_ether.h>
-#include <linux/skbuff.h>
-#include <linux/sysctl.h>
-#include <linux/notifier.h>
-#include <linux/slab.h>
-#include <linux/jiffies.h>
-#include <linux/uaccess.h>
-#include <net/net_namespace.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/fib_rules.h>
-#include <net/netlink.h>
-#include <net/dn.h>
-#include <net/dn_dev.h>
-#include <net/dn_route.h>
-#include <net/dn_neigh.h>
-#include <net/dn_fib.h>
-
-#define DN_IFREQ_SIZE (offsetof(struct ifreq, ifr_ifru) + sizeof(struct sockaddr_dn))
-
-static char dn_rt_all_end_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x04,0x00,0x00};
-static char dn_rt_all_rt_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x03,0x00,0x00};
-static char dn_hiord[ETH_ALEN] = {0xAA,0x00,0x04,0x00,0x00,0x00};
-static unsigned char dn_eco_version[3] = {0x02,0x00,0x00};
-
-extern struct neigh_table dn_neigh_table;
-
-/*
- * decnet_address is kept in network order.
- */
-__le16 decnet_address = 0;
-
-static DEFINE_SPINLOCK(dndev_lock);
-static struct net_device *decnet_default_device;
-static BLOCKING_NOTIFIER_HEAD(dnaddr_chain);
-
-static struct dn_dev *dn_dev_create(struct net_device *dev, int *err);
-static void dn_dev_delete(struct net_device *dev);
-static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa);
-
-static int dn_eth_up(struct net_device *);
-static void dn_eth_down(struct net_device *);
-static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa);
-static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa);
-
-static struct dn_dev_parms dn_dev_list[] = {
-{
- .type = ARPHRD_ETHER, /* Ethernet */
- .mode = DN_DEV_BCAST,
- .state = DN_DEV_S_RU,
- .t2 = 1,
- .t3 = 10,
- .name = "ethernet",
- .up = dn_eth_up,
- .down = dn_eth_down,
- .timer3 = dn_send_brd_hello,
-},
-{
- .type = ARPHRD_IPGRE, /* DECnet tunneled over GRE in IP */
- .mode = DN_DEV_BCAST,
- .state = DN_DEV_S_RU,
- .t2 = 1,
- .t3 = 10,
- .name = "ipgre",
- .timer3 = dn_send_brd_hello,
-},
-#if 0
-{
- .type = ARPHRD_X25, /* Bog standard X.25 */
- .mode = DN_DEV_UCAST,
- .state = DN_DEV_S_DS,
- .t2 = 1,
- .t3 = 120,
- .name = "x25",
- .timer3 = dn_send_ptp_hello,
-},
-#endif
-#if 0
-{
- .type = ARPHRD_PPP, /* DECnet over PPP */
- .mode = DN_DEV_BCAST,
- .state = DN_DEV_S_RU,
- .t2 = 1,
- .t3 = 10,
- .name = "ppp",
- .timer3 = dn_send_brd_hello,
-},
-#endif
-{
- .type = ARPHRD_DDCMP, /* DECnet over DDCMP */
- .mode = DN_DEV_UCAST,
- .state = DN_DEV_S_DS,
- .t2 = 1,
- .t3 = 120,
- .name = "ddcmp",
- .timer3 = dn_send_ptp_hello,
-},
-{
- .type = ARPHRD_LOOPBACK, /* Loopback interface - always last */
- .mode = DN_DEV_BCAST,
- .state = DN_DEV_S_RU,
- .t2 = 1,
- .t3 = 10,
- .name = "loopback",
- .timer3 = dn_send_brd_hello,
-}
-};
-
-#define DN_DEV_LIST_SIZE ARRAY_SIZE(dn_dev_list)
-
-#define DN_DEV_PARMS_OFFSET(x) offsetof(struct dn_dev_parms, x)
-
-#ifdef CONFIG_SYSCTL
-
-static int min_t2[] = { 1 };
-static int max_t2[] = { 60 }; /* No max specified, but this seems sensible */
-static int min_t3[] = { 1 };
-static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MULT or T3MULT */
-
-static int min_priority[1];
-static int max_priority[] = { 127 }; /* From DECnet spec */
-
-static int dn_forwarding_proc(struct ctl_table *, int,
- void __user *, size_t *, loff_t *);
-static struct dn_dev_sysctl_table {
- struct ctl_table_header *sysctl_header;
- struct ctl_table dn_dev_vars[5];
-} dn_dev_sysctl = {
- NULL,
- {
- {
- .procname = "forwarding",
- .data = (void *)DN_DEV_PARMS_OFFSET(forwarding),
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = dn_forwarding_proc,
- },
- {
- .procname = "priority",
- .data = (void *)DN_DEV_PARMS_OFFSET(priority),
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_priority,
- .extra2 = &max_priority
- },
- {
- .procname = "t2",
- .data = (void *)DN_DEV_PARMS_OFFSET(t2),
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_t2,
- .extra2 = &max_t2
- },
- {
- .procname = "t3",
- .data = (void *)DN_DEV_PARMS_OFFSET(t3),
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_t3,
- .extra2 = &max_t3
- },
- { }
- },
-};
-
-static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms)
-{
- struct dn_dev_sysctl_table *t;
- int i;
-
- char path[sizeof("net/decnet/conf/") + IFNAMSIZ];
-
- t = kmemdup(&dn_dev_sysctl, sizeof(*t), GFP_KERNEL);
- if (t == NULL)
- return;
-
- for(i = 0; i < ARRAY_SIZE(t->dn_dev_vars) - 1; i++) {
- long offset = (long)t->dn_dev_vars[i].data;
- t->dn_dev_vars[i].data = ((char *)parms) + offset;
- }
-
- snprintf(path, sizeof(path), "net/decnet/conf/%s",
- dev? dev->name : parms->name);
-
- t->dn_dev_vars[0].extra1 = (void *)dev;
-
- t->sysctl_header = register_net_sysctl(&init_net, path, t->dn_dev_vars);
- if (t->sysctl_header == NULL)
- kfree(t);
- else
- parms->sysctl = t;
-}
-
-static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
-{
- if (parms->sysctl) {
- struct dn_dev_sysctl_table *t = parms->sysctl;
- parms->sysctl = NULL;
- unregister_net_sysctl_table(t->sysctl_header);
- kfree(t);
- }
-}
-
-static int dn_forwarding_proc(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos)
-{
-#ifdef CONFIG_DECNET_ROUTER
- struct net_device *dev = table->extra1;
- struct dn_dev *dn_db;
- int err;
- int tmp, old;
-
- if (table->extra1 == NULL)
- return -EINVAL;
-
- dn_db = rcu_dereference_raw(dev->dn_ptr);
- old = dn_db->parms.forwarding;
-
- err = proc_dointvec(table, write, buffer, lenp, ppos);
-
- if ((err >= 0) && write) {
- if (dn_db->parms.forwarding < 0)
- dn_db->parms.forwarding = 0;
- if (dn_db->parms.forwarding > 2)
- dn_db->parms.forwarding = 2;
- /*
- * What an ugly hack this is... its works, just. It
- * would be nice if sysctl/proc were just that little
- * bit more flexible so I don't have to write a special
- * routine, or suffer hacks like this - SJW
- */
- tmp = dn_db->parms.forwarding;
- dn_db->parms.forwarding = old;
- if (dn_db->parms.down)
- dn_db->parms.down(dev);
- dn_db->parms.forwarding = tmp;
- if (dn_db->parms.up)
- dn_db->parms.up(dev);
- }
-
- return err;
-#else
- return -EINVAL;
-#endif
-}
-
-#else /* CONFIG_SYSCTL */
-static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
-{
-}
-static void dn_dev_sysctl_register(struct net_device *dev, struct dn_dev_parms *parms)
-{
-}
-
-#endif /* CONFIG_SYSCTL */
-
-static inline __u16 mtu2blksize(struct net_device *dev)
-{
- u32 blksize = dev->mtu;
- if (blksize > 0xffff)
- blksize = 0xffff;
-
- if (dev->type == ARPHRD_ETHER ||
- dev->type == ARPHRD_PPP ||
- dev->type == ARPHRD_IPGRE ||
- dev->type == ARPHRD_LOOPBACK)
- blksize -= 2;
-
- return (__u16)blksize;
-}
-
-static struct dn_ifaddr *dn_dev_alloc_ifa(void)
-{
- struct dn_ifaddr *ifa;
-
- ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
-
- return ifa;
-}
-
-static void dn_dev_free_ifa(struct dn_ifaddr *ifa)
-{
- kfree_rcu(ifa, rcu);
-}
-
-static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr __rcu **ifap, int destroy)
-{
- struct dn_ifaddr *ifa1 = rtnl_dereference(*ifap);
- unsigned char mac_addr[6];
- struct net_device *dev = dn_db->dev;
-
- ASSERT_RTNL();
-
- *ifap = ifa1->ifa_next;
-
- if (dn_db->dev->type == ARPHRD_ETHER) {
- if (ifa1->ifa_local != dn_eth2dn(dev->dev_addr)) {
- dn_dn2eth(mac_addr, ifa1->ifa_local);
- dev_mc_del(dev, mac_addr);
- }
- }
-
- dn_ifaddr_notify(RTM_DELADDR, ifa1);
- blocking_notifier_call_chain(&dnaddr_chain, NETDEV_DOWN, ifa1);
- if (destroy) {
- dn_dev_free_ifa(ifa1);
-
- if (dn_db->ifa_list == NULL)
- dn_dev_delete(dn_db->dev);
- }
-}
-
-static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
-{
- struct net_device *dev = dn_db->dev;
- struct dn_ifaddr *ifa1;
- unsigned char mac_addr[6];
-
- ASSERT_RTNL();
-
- /* Check for duplicates */
- for (ifa1 = rtnl_dereference(dn_db->ifa_list);
- ifa1 != NULL;
- ifa1 = rtnl_dereference(ifa1->ifa_next)) {
- if (ifa1->ifa_local == ifa->ifa_local)
- return -EEXIST;
- }
-
- if (dev->type == ARPHRD_ETHER) {
- if (ifa->ifa_local != dn_eth2dn(dev->dev_addr)) {
- dn_dn2eth(mac_addr, ifa->ifa_local);
- dev_mc_add(dev, mac_addr);
- }
- }
-
- ifa->ifa_next = dn_db->ifa_list;
- rcu_assign_pointer(dn_db->ifa_list, ifa);
-
- dn_ifaddr_notify(RTM_NEWADDR, ifa);
- blocking_notifier_call_chain(&dnaddr_chain, NETDEV_UP, ifa);
-
- return 0;
-}
-
-static int dn_dev_set_ifa(struct net_device *dev, struct dn_ifaddr *ifa)
-{
- struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
- int rv;
-
- if (dn_db == NULL) {
- int err;
- dn_db = dn_dev_create(dev, &err);
- if (dn_db == NULL)
- return err;
- }
-
- ifa->ifa_dev = dn_db;
-
- if (dev->flags & IFF_LOOPBACK)
- ifa->ifa_scope = RT_SCOPE_HOST;
-
- rv = dn_dev_insert_ifa(dn_db, ifa);
- if (rv)
- dn_dev_free_ifa(ifa);
- return rv;
-}
-
-
-int dn_dev_ioctl(unsigned int cmd, void __user *arg)
-{
- char buffer[DN_IFREQ_SIZE];
- struct ifreq *ifr = (struct ifreq *)buffer;
- struct sockaddr_dn *sdn = (struct sockaddr_dn *)&ifr->ifr_addr;
- struct dn_dev *dn_db;
- struct net_device *dev;
- struct dn_ifaddr *ifa = NULL;
- struct dn_ifaddr __rcu **ifap = NULL;
- int ret = 0;
-
- if (copy_from_user(ifr, arg, DN_IFREQ_SIZE))
- return -EFAULT;
- ifr->ifr_name[IFNAMSIZ-1] = 0;
-
- dev_load(&init_net, ifr->ifr_name);
-
- switch (cmd) {
- case SIOCGIFADDR:
- break;
- case SIOCSIFADDR:
- if (!capable(CAP_NET_ADMIN))
- return -EACCES;
- if (sdn->sdn_family != AF_DECnet)
- return -EINVAL;
- break;
- default:
- return -EINVAL;
- }
-
- rtnl_lock();
-
- if ((dev = __dev_get_by_name(&init_net, ifr->ifr_name)) == NULL) {
- ret = -ENODEV;
- goto done;
- }
-
- if ((dn_db = rtnl_dereference(dev->dn_ptr)) != NULL) {
- for (ifap = &dn_db->ifa_list;
- (ifa = rtnl_dereference(*ifap)) != NULL;
- ifap = &ifa->ifa_next)
- if (strcmp(ifr->ifr_name, ifa->ifa_label) == 0)
- break;
- }
-
- if (ifa == NULL && cmd != SIOCSIFADDR) {
- ret = -EADDRNOTAVAIL;
- goto done;
- }
-
- switch (cmd) {
- case SIOCGIFADDR:
- *((__le16 *)sdn->sdn_nodeaddr) = ifa->ifa_local;
- goto rarok;
-
- case SIOCSIFADDR:
- if (!ifa) {
- if ((ifa = dn_dev_alloc_ifa()) == NULL) {
- ret = -ENOBUFS;
- break;
- }
- memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
- } else {
- if (ifa->ifa_local == dn_saddr2dn(sdn))
- break;
- dn_dev_del_ifa(dn_db, ifap, 0);
- }
-
- ifa->ifa_local = ifa->ifa_address = dn_saddr2dn(sdn);
-
- ret = dn_dev_set_ifa(dev, ifa);
- }
-done:
- rtnl_unlock();
-
- return ret;
-rarok:
- if (copy_to_user(arg, ifr, DN_IFREQ_SIZE))
- ret = -EFAULT;
- goto done;
-}
-
-struct net_device *dn_dev_get_default(void)
-{
- struct net_device *dev;
-
- spin_lock(&dndev_lock);
- dev = decnet_default_device;
- if (dev) {
- if (dev->dn_ptr)
- dev_hold(dev);
- else
- dev = NULL;
- }
- spin_unlock(&dndev_lock);
-
- return dev;
-}
-
-int dn_dev_set_default(struct net_device *dev, int force)
-{
- struct net_device *old = NULL;
- int rv = -EBUSY;
- if (!dev->dn_ptr)
- return -ENODEV;
-
- spin_lock(&dndev_lock);
- if (force || decnet_default_device == NULL) {
- old = decnet_default_device;
- decnet_default_device = dev;
- rv = 0;
- }
- spin_unlock(&dndev_lock);
-
- if (old)
- dev_put(old);
- return rv;
-}
-
-static void dn_dev_check_default(struct net_device *dev)
-{
- spin_lock(&dndev_lock);
- if (dev == decnet_default_device) {
- decnet_default_device = NULL;
- } else {
- dev = NULL;
- }
- spin_unlock(&dndev_lock);
-
- if (dev)
- dev_put(dev);
-}
-
-/*
- * Called with RTNL
- */
-static struct dn_dev *dn_dev_by_index(int ifindex)
-{
- struct net_device *dev;
- struct dn_dev *dn_dev = NULL;
-
- dev = __dev_get_by_index(&init_net, ifindex);
- if (dev)
- dn_dev = rtnl_dereference(dev->dn_ptr);
-
- return dn_dev;
-}
-
-static const struct nla_policy dn_ifa_policy[IFA_MAX+1] = {
- [IFA_ADDRESS] = { .type = NLA_U16 },
- [IFA_LOCAL] = { .type = NLA_U16 },
- [IFA_LABEL] = { .type = NLA_STRING,
- .len = IFNAMSIZ - 1 },
- [IFA_FLAGS] = { .type = NLA_U32 },
-};
-
-static int dn_nl_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(skb->sk);
- struct nlattr *tb[IFA_MAX+1];
- struct dn_dev *dn_db;
- struct ifaddrmsg *ifm;
- struct dn_ifaddr *ifa;
- struct dn_ifaddr __rcu **ifap;
- int err = -EINVAL;
-
- if (!netlink_capable(skb, CAP_NET_ADMIN))
- return -EPERM;
-
- if (!net_eq(net, &init_net))
- goto errout;
-
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy,
- extack);
- if (err < 0)
- goto errout;
-
- err = -ENODEV;
- ifm = nlmsg_data(nlh);
- if ((dn_db = dn_dev_by_index(ifm->ifa_index)) == NULL)
- goto errout;
-
- err = -EADDRNOTAVAIL;
- for (ifap = &dn_db->ifa_list;
- (ifa = rtnl_dereference(*ifap)) != NULL;
- ifap = &ifa->ifa_next) {
- if (tb[IFA_LOCAL] &&
- nla_memcmp(tb[IFA_LOCAL], &ifa->ifa_local, 2))
- continue;
-
- if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
- continue;
-
- dn_dev_del_ifa(dn_db, ifap, 1);
- return 0;
- }
-
-errout:
- return err;
-}
-
-static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(skb->sk);
- struct nlattr *tb[IFA_MAX+1];
- struct net_device *dev;
- struct dn_dev *dn_db;
- struct ifaddrmsg *ifm;
- struct dn_ifaddr *ifa;
- int err;
-
- if (!netlink_capable(skb, CAP_NET_ADMIN))
- return -EPERM;
-
- if (!net_eq(net, &init_net))
- return -EINVAL;
-
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, dn_ifa_policy,
- extack);
- if (err < 0)
- return err;
-
- if (tb[IFA_LOCAL] == NULL)
- return -EINVAL;
-
- ifm = nlmsg_data(nlh);
- if ((dev = __dev_get_by_index(&init_net, ifm->ifa_index)) == NULL)
- return -ENODEV;
-
- if ((dn_db = rtnl_dereference(dev->dn_ptr)) == NULL) {
- dn_db = dn_dev_create(dev, &err);
- if (!dn_db)
- return err;
- }
-
- if ((ifa = dn_dev_alloc_ifa()) == NULL)
- return -ENOBUFS;
-
- if (tb[IFA_ADDRESS] == NULL)
- tb[IFA_ADDRESS] = tb[IFA_LOCAL];
-
- ifa->ifa_local = nla_get_le16(tb[IFA_LOCAL]);
- ifa->ifa_address = nla_get_le16(tb[IFA_ADDRESS]);
- ifa->ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) :
- ifm->ifa_flags;
- ifa->ifa_scope = ifm->ifa_scope;
- ifa->ifa_dev = dn_db;
-
- if (tb[IFA_LABEL])
- nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
- else
- memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
-
- err = dn_dev_insert_ifa(dn_db, ifa);
- if (err)
- dn_dev_free_ifa(ifa);
-
- return err;
-}
-
-static inline size_t dn_ifaddr_nlmsg_size(void)
-{
- return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
- + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
- + nla_total_size(2) /* IFA_ADDRESS */
- + nla_total_size(2) /* IFA_LOCAL */
- + nla_total_size(4); /* IFA_FLAGS */
-}
-
-static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa,
- u32 portid, u32 seq, int event, unsigned int flags)
-{
- struct ifaddrmsg *ifm;
- struct nlmsghdr *nlh;
- u32 ifa_flags = ifa->ifa_flags | IFA_F_PERMANENT;
-
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags);
- if (nlh == NULL)
- return -EMSGSIZE;
-
- ifm = nlmsg_data(nlh);
- ifm->ifa_family = AF_DECnet;
- ifm->ifa_prefixlen = 16;
- ifm->ifa_flags = ifa_flags;
- ifm->ifa_scope = ifa->ifa_scope;
- ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
-
- if ((ifa->ifa_address &&
- nla_put_le16(skb, IFA_ADDRESS, ifa->ifa_address)) ||
- (ifa->ifa_local &&
- nla_put_le16(skb, IFA_LOCAL, ifa->ifa_local)) ||
- (ifa->ifa_label[0] &&
- nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
- nla_put_u32(skb, IFA_FLAGS, ifa_flags))
- goto nla_put_failure;
- nlmsg_end(skb, nlh);
- return 0;
-
-nla_put_failure:
- nlmsg_cancel(skb, nlh);
- return -EMSGSIZE;
-}
-
-static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa)
-{
- struct sk_buff *skb;
- int err = -ENOBUFS;
-
- skb = alloc_skb(dn_ifaddr_nlmsg_size(), GFP_KERNEL);
- if (skb == NULL)
- goto errout;
-
- err = dn_nl_fill_ifaddr(skb, ifa, 0, 0, event, 0);
- if (err < 0) {
- /* -EMSGSIZE implies BUG in dn_ifaddr_nlmsg_size() */
- WARN_ON(err == -EMSGSIZE);
- kfree_skb(skb);
- goto errout;
- }
- rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
- return;
-errout:
- if (err < 0)
- rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err);
-}
-
-static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct net *net = sock_net(skb->sk);
- int idx, dn_idx = 0, skip_ndevs, skip_naddr;
- struct net_device *dev;
- struct dn_dev *dn_db;
- struct dn_ifaddr *ifa;
-
- if (!net_eq(net, &init_net))
- return 0;
-
- skip_ndevs = cb->args[0];
- skip_naddr = cb->args[1];
-
- idx = 0;
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, dev) {
- if (idx < skip_ndevs)
- goto cont;
- else if (idx > skip_ndevs) {
- /* Only skip over addresses for first dev dumped
- * in this iteration (idx == skip_ndevs) */
- skip_naddr = 0;
- }
-
- if ((dn_db = rcu_dereference(dev->dn_ptr)) == NULL)
- goto cont;
-
- for (ifa = rcu_dereference(dn_db->ifa_list), dn_idx = 0; ifa;
- ifa = rcu_dereference(ifa->ifa_next), dn_idx++) {
- if (dn_idx < skip_naddr)
- continue;
-
- if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, RTM_NEWADDR,
- NLM_F_MULTI) < 0)
- goto done;
- }
-cont:
- idx++;
- }
-done:
- rcu_read_unlock();
- cb->args[0] = idx;
- cb->args[1] = dn_idx;
-
- return skb->len;
-}
-
-static int dn_dev_get_first(struct net_device *dev, __le16 *addr)
-{
- struct dn_dev *dn_db;
- struct dn_ifaddr *ifa;
- int rv = -ENODEV;
-
- rcu_read_lock();
- dn_db = rcu_dereference(dev->dn_ptr);
- if (dn_db == NULL)
- goto out;
-
- ifa = rcu_dereference(dn_db->ifa_list);
- if (ifa != NULL) {
- *addr = ifa->ifa_local;
- rv = 0;
- }
-out:
- rcu_read_unlock();
- return rv;
-}
-
-/*
- * Find a default address to bind to.
- *
- * This is one of those areas where the initial VMS concepts don't really
- * map onto the Linux concepts, and since we introduced multiple addresses
- * per interface we have to cope with slightly odd ways of finding out what
- * "our address" really is. Mostly it's not a problem; for this we just guess
- * a sensible default. Eventually the routing code will take care of all the
- * nasties for us I hope.
- */
-int dn_dev_bind_default(__le16 *addr)
-{
- struct net_device *dev;
- int rv;
- dev = dn_dev_get_default();
-last_chance:
- if (dev) {
- rv = dn_dev_get_first(dev, addr);
- dev_put(dev);
- if (rv == 0 || dev == init_net.loopback_dev)
- return rv;
- }
- dev = init_net.loopback_dev;
- dev_hold(dev);
- goto last_chance;
-}
-
-static void dn_send_endnode_hello(struct net_device *dev, struct dn_ifaddr *ifa)
-{
- struct endnode_hello_message *msg;
- struct sk_buff *skb = NULL;
- __le16 *pktlen;
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
-
- if ((skb = dn_alloc_skb(NULL, sizeof(*msg), GFP_ATOMIC)) == NULL)
- return;
-
- skb->dev = dev;
-
- msg = skb_put(skb, sizeof(*msg));
-
- msg->msgflg = 0x0D;
- memcpy(msg->tiver, dn_eco_version, 3);
- dn_dn2eth(msg->id, ifa->ifa_local);
- msg->iinfo = DN_RT_INFO_ENDN;
- msg->blksize = cpu_to_le16(mtu2blksize(dev));
- msg->area = 0x00;
- memset(msg->seed, 0, 8);
- memcpy(msg->neighbor, dn_hiord, ETH_ALEN);
-
- if (dn_db->router) {
- struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
- dn_dn2eth(msg->neighbor, dn->addr);
- }
-
- msg->timer = cpu_to_le16((unsigned short)dn_db->parms.t3);
- msg->mpd = 0x00;
- msg->datalen = 0x02;
- memset(msg->data, 0xAA, 2);
-
- pktlen = skb_push(skb, 2);
- *pktlen = cpu_to_le16(skb->len - 2);
-
- skb_reset_network_header(skb);
-
- dn_rt_finish_output(skb, dn_rt_all_rt_mcast, msg->id);
-}
-
-
-#define DRDELAY (5 * HZ)
-
-static int dn_am_i_a_router(struct dn_neigh *dn, struct dn_dev *dn_db, struct dn_ifaddr *ifa)
-{
- /* First check time since device went up */
- if (time_before(jiffies, dn_db->uptime + DRDELAY))
- return 0;
-
- /* If there is no router, then yes... */
- if (!dn_db->router)
- return 1;
-
- /* otherwise only if we have a higher priority or.. */
- if (dn->priority < dn_db->parms.priority)
- return 1;
-
- /* if we have equal priority and a higher node number */
- if (dn->priority != dn_db->parms.priority)
- return 0;
-
- if (le16_to_cpu(dn->addr) < le16_to_cpu(ifa->ifa_local))
- return 1;
-
- return 0;
-}
-
-static void dn_send_router_hello(struct net_device *dev, struct dn_ifaddr *ifa)
-{
- int n;
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
- struct dn_neigh *dn = (struct dn_neigh *)dn_db->router;
- struct sk_buff *skb;
- size_t size;
- unsigned char *ptr;
- unsigned char *i1, *i2;
- __le16 *pktlen;
- char *src;
-
- if (mtu2blksize(dev) < (26 + 7))
- return;
-
- n = mtu2blksize(dev) - 26;
- n /= 7;
-
- if (n > 32)
- n = 32;
-
- size = 2 + 26 + 7 * n;
-
- if ((skb = dn_alloc_skb(NULL, size, GFP_ATOMIC)) == NULL)
- return;
-
- skb->dev = dev;
- ptr = skb_put(skb, size);
-
- *ptr++ = DN_RT_PKT_CNTL | DN_RT_PKT_ERTH;
- *ptr++ = 2; /* ECO */
- *ptr++ = 0;
- *ptr++ = 0;
- dn_dn2eth(ptr, ifa->ifa_local);
- src = ptr;
- ptr += ETH_ALEN;
- *ptr++ = dn_db->parms.forwarding == 1 ?
- DN_RT_INFO_L1RT : DN_RT_INFO_L2RT;
- *((__le16 *)ptr) = cpu_to_le16(mtu2blksize(dev));
- ptr += 2;
- *ptr++ = dn_db->parms.priority; /* Priority */
- *ptr++ = 0; /* Area: Reserved */
- *((__le16 *)ptr) = cpu_to_le16((unsigned short)dn_db->parms.t3);
- ptr += 2;
- *ptr++ = 0; /* MPD: Reserved */
- i1 = ptr++;
- memset(ptr, 0, 7); /* Name: Reserved */
- ptr += 7;
- i2 = ptr++;
-
- n = dn_neigh_elist(dev, ptr, n);
-
- *i2 = 7 * n;
- *i1 = 8 + *i2;
-
- skb_trim(skb, (27 + *i2));
-
- pktlen = skb_push(skb, 2);
- *pktlen = cpu_to_le16(skb->len - 2);
-
- skb_reset_network_header(skb);
-
- if (dn_am_i_a_router(dn, dn_db, ifa)) {
- struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC);
- if (skb2) {
- dn_rt_finish_output(skb2, dn_rt_all_end_mcast, src);
- }
- }
-
- dn_rt_finish_output(skb, dn_rt_all_rt_mcast, src);
-}
-
-static void dn_send_brd_hello(struct net_device *dev, struct dn_ifaddr *ifa)
-{
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
-
- if (dn_db->parms.forwarding == 0)
- dn_send_endnode_hello(dev, ifa);
- else
- dn_send_router_hello(dev, ifa);
-}
-
-static void dn_send_ptp_hello(struct net_device *dev, struct dn_ifaddr *ifa)
-{
- int tdlen = 16;
- int size = dev->hard_header_len + 2 + 4 + tdlen;
- struct sk_buff *skb = dn_alloc_skb(NULL, size, GFP_ATOMIC);
- int i;
- unsigned char *ptr;
- char src[ETH_ALEN];
-
- if (skb == NULL)
- return ;
-
- skb->dev = dev;
- skb_push(skb, dev->hard_header_len);
- ptr = skb_put(skb, 2 + 4 + tdlen);
-
- *ptr++ = DN_RT_PKT_HELO;
- *((__le16 *)ptr) = ifa->ifa_local;
- ptr += 2;
- *ptr++ = tdlen;
-
- for(i = 0; i < tdlen; i++)
- *ptr++ = 0252;
-
- dn_dn2eth(src, ifa->ifa_local);
- dn_rt_finish_output(skb, dn_rt_all_rt_mcast, src);
-}
-
-static int dn_eth_up(struct net_device *dev)
-{
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
-
- if (dn_db->parms.forwarding == 0)
- dev_mc_add(dev, dn_rt_all_end_mcast);
- else
- dev_mc_add(dev, dn_rt_all_rt_mcast);
-
- dn_db->use_long = 1;
-
- return 0;
-}
-
-static void dn_eth_down(struct net_device *dev)
-{
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
-
- if (dn_db->parms.forwarding == 0)
- dev_mc_del(dev, dn_rt_all_end_mcast);
- else
- dev_mc_del(dev, dn_rt_all_rt_mcast);
-}
-
-static void dn_dev_set_timer(struct net_device *dev);
-
-static void dn_dev_timer_func(struct timer_list *t)
-{
- struct dn_dev *dn_db = from_timer(dn_db, t, timer);
- struct net_device *dev;
- struct dn_ifaddr *ifa;
-
- rcu_read_lock();
- dev = dn_db->dev;
- if (dn_db->t3 <= dn_db->parms.t2) {
- if (dn_db->parms.timer3) {
- for (ifa = rcu_dereference(dn_db->ifa_list);
- ifa;
- ifa = rcu_dereference(ifa->ifa_next)) {
- if (!(ifa->ifa_flags & IFA_F_SECONDARY))
- dn_db->parms.timer3(dev, ifa);
- }
- }
- dn_db->t3 = dn_db->parms.t3;
- } else {
- dn_db->t3 -= dn_db->parms.t2;
- }
- rcu_read_unlock();
- dn_dev_set_timer(dev);
-}
-
-static void dn_dev_set_timer(struct net_device *dev)
-{
- struct dn_dev *dn_db = rcu_dereference_raw(dev->dn_ptr);
-
- if (dn_db->parms.t2 > dn_db->parms.t3)
- dn_db->parms.t2 = dn_db->parms.t3;
-
- dn_db->timer.expires = jiffies + (dn_db->parms.t2 * HZ);
-
- add_timer(&dn_db->timer);
-}
-
-static struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
-{
- int i;
- struct dn_dev_parms *p = dn_dev_list;
- struct dn_dev *dn_db;
-
- for(i = 0; i < DN_DEV_LIST_SIZE; i++, p++) {
- if (p->type == dev->type)
- break;
- }
-
- *err = -ENODEV;
- if (i == DN_DEV_LIST_SIZE)
- return NULL;
-
- *err = -ENOBUFS;
- if ((dn_db = kzalloc(sizeof(struct dn_dev), GFP_ATOMIC)) == NULL)
- return NULL;
-
- memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms));
-
- rcu_assign_pointer(dev->dn_ptr, dn_db);
- dn_db->dev = dev;
- timer_setup(&dn_db->timer, dn_dev_timer_func, 0);
-
- dn_db->uptime = jiffies;
-
- dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table);
- if (!dn_db->neigh_parms) {
- RCU_INIT_POINTER(dev->dn_ptr, NULL);
- kfree(dn_db);
- return NULL;
- }
-
- if (dn_db->parms.up) {
- if (dn_db->parms.up(dev) < 0) {
- neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms);
- dev->dn_ptr = NULL;
- kfree(dn_db);
- return NULL;
- }
- }
-
- dn_dev_sysctl_register(dev, &dn_db->parms);
-
- dn_dev_set_timer(dev);
-
- *err = 0;
- return dn_db;
-}
-
-
-/*
- * This processes a device up event. We only start up
- * the loopback device & ethernet devices with correct
- * MAC addresses automatically. Others must be started
- * specifically.
- *
- * FIXME: How should we configure the loopback address ? If we could dispense
- * with using decnet_address here and for autobind, it will be one less thing
- * for users to worry about setting up.
- */
-
-void dn_dev_up(struct net_device *dev)
-{
- struct dn_ifaddr *ifa;
- __le16 addr = decnet_address;
- int maybe_default = 0;
- struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
-
- if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK))
- return;
-
- /*
- * Need to ensure that loopback device has a dn_db attached to it
- * to allow creation of neighbours against it, even though it might
- * not have a local address of its own. Might as well do the same for
- * all autoconfigured interfaces.
- */
- if (dn_db == NULL) {
- int err;
- dn_db = dn_dev_create(dev, &err);
- if (dn_db == NULL)
- return;
- }
-
- if (dev->type == ARPHRD_ETHER) {
- if (memcmp(dev->dev_addr, dn_hiord, 4) != 0)
- return;
- addr = dn_eth2dn(dev->dev_addr);
- maybe_default = 1;
- }
-
- if (addr == 0)
- return;
-
- if ((ifa = dn_dev_alloc_ifa()) == NULL)
- return;
-
- ifa->ifa_local = ifa->ifa_address = addr;
- ifa->ifa_flags = 0;
- ifa->ifa_scope = RT_SCOPE_UNIVERSE;
- strcpy(ifa->ifa_label, dev->name);
-
- dn_dev_set_ifa(dev, ifa);
-
- /*
- * Automagically set the default device to the first automatically
- * configured ethernet card in the system.
- */
- if (maybe_default) {
- dev_hold(dev);
- if (dn_dev_set_default(dev, 0))
- dev_put(dev);
- }
-}
-
-static void dn_dev_delete(struct net_device *dev)
-{
- struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
-
- if (dn_db == NULL)
- return;
-
- del_timer_sync(&dn_db->timer);
- dn_dev_sysctl_unregister(&dn_db->parms);
- dn_dev_check_default(dev);
- neigh_ifdown(&dn_neigh_table, dev);
-
- if (dn_db->parms.down)
- dn_db->parms.down(dev);
-
- dev->dn_ptr = NULL;
-
- neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms);
- neigh_ifdown(&dn_neigh_table, dev);
-
- if (dn_db->router)
- neigh_release(dn_db->router);
- if (dn_db->peer)
- neigh_release(dn_db->peer);
-
- kfree(dn_db);
-}
-
-void dn_dev_down(struct net_device *dev)
-{
- struct dn_dev *dn_db = rtnl_dereference(dev->dn_ptr);
- struct dn_ifaddr *ifa;
-
- if (dn_db == NULL)
- return;
-
- while ((ifa = rtnl_dereference(dn_db->ifa_list)) != NULL) {
- dn_dev_del_ifa(dn_db, &dn_db->ifa_list, 0);
- dn_dev_free_ifa(ifa);
- }
-
- dn_dev_delete(dev);
-}
-
-void dn_dev_init_pkt(struct sk_buff *skb)
-{
-}
-
-void dn_dev_veri_pkt(struct sk_buff *skb)
-{
-}
-
-void dn_dev_hello(struct sk_buff *skb)
-{
-}
-
-void dn_dev_devices_off(void)
-{
- struct net_device *dev;
-
- rtnl_lock();
- for_each_netdev(&init_net, dev)
- dn_dev_down(dev);
- rtnl_unlock();
-
-}
-
-void dn_dev_devices_on(void)
-{
- struct net_device *dev;
-
- rtnl_lock();
- for_each_netdev(&init_net, dev) {
- if (dev->flags & IFF_UP)
- dn_dev_up(dev);
- }
- rtnl_unlock();
-}
-
-int register_dnaddr_notifier(struct notifier_block *nb)
-{
- return blocking_notifier_chain_register(&dnaddr_chain, nb);
-}
-
-int unregister_dnaddr_notifier(struct notifier_block *nb)
-{
- return blocking_notifier_chain_unregister(&dnaddr_chain, nb);
-}
-
-#ifdef CONFIG_PROC_FS
-static inline int is_dn_dev(struct net_device *dev)
-{
- return dev->dn_ptr != NULL;
-}
-
-static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos)
- __acquires(RCU)
-{
- int i;
- struct net_device *dev;
-
- rcu_read_lock();
-
- if (*pos == 0)
- return SEQ_START_TOKEN;
-
- i = 1;
- for_each_netdev_rcu(&init_net, dev) {
- if (!is_dn_dev(dev))
- continue;
-
- if (i++ == *pos)
- return dev;
- }
-
- return NULL;
-}
-
-static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct net_device *dev;
-
- ++*pos;
-
- dev = v;
- if (v == SEQ_START_TOKEN)
- dev = net_device_entry(&init_net.dev_base_head);
-
- for_each_netdev_continue_rcu(&init_net, dev) {
- if (!is_dn_dev(dev))
- continue;
-
- return dev;
- }
-
- return NULL;
-}
-
-static void dn_dev_seq_stop(struct seq_file *seq, void *v)
- __releases(RCU)
-{
- rcu_read_unlock();
-}
-
-static char *dn_type2asc(char type)
-{
- switch (type) {
- case DN_DEV_BCAST:
- return "B";
- case DN_DEV_UCAST:
- return "U";
- case DN_DEV_MPOINT:
- return "M";
- }
-
- return "?";
-}
-
-static int dn_dev_seq_show(struct seq_file *seq, void *v)
-{
- if (v == SEQ_START_TOKEN)
- seq_puts(seq, "Name Flags T1 Timer1 T3 Timer3 BlkSize Pri State DevType Router Peer\n");
- else {
- struct net_device *dev = v;
- char peer_buf[DN_ASCBUF_LEN];
- char router_buf[DN_ASCBUF_LEN];
- struct dn_dev *dn_db = rcu_dereference(dev->dn_ptr);
-
- seq_printf(seq, "%-8s %1s %04u %04u %04lu %04lu"
- " %04hu %03d %02x %-10s %-7s %-7s\n",
- dev->name ? dev->name : "???",
- dn_type2asc(dn_db->parms.mode),
- 0, 0,
- dn_db->t3, dn_db->parms.t3,
- mtu2blksize(dev),
- dn_db->parms.priority,
- dn_db->parms.state, dn_db->parms.name,
- dn_db->router ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->router->primary_key), router_buf) : "",
- dn_db->peer ? dn_addr2asc(le16_to_cpu(*(__le16 *)dn_db->peer->primary_key), peer_buf) : "");
- }
- return 0;
-}
-
-static const struct seq_operations dn_dev_seq_ops = {
- .start = dn_dev_seq_start,
- .next = dn_dev_seq_next,
- .stop = dn_dev_seq_stop,
- .show = dn_dev_seq_show,
-};
-#endif /* CONFIG_PROC_FS */
-
-static int addr[2];
-module_param_array(addr, int, NULL, 0444);
-MODULE_PARM_DESC(addr, "The DECnet address of this machine: area,node");
-
-void __init dn_dev_init(void)
-{
- if (addr[0] > 63 || addr[0] < 0) {
- printk(KERN_ERR "DECnet: Area must be between 0 and 63");
- return;
- }
-
- if (addr[1] > 1023 || addr[1] < 0) {
- printk(KERN_ERR "DECnet: Node must be between 0 and 1023");
- return;
- }
-
- decnet_address = cpu_to_le16((addr[0] << 10) | addr[1]);
-
- dn_dev_devices_on();
-
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWADDR,
- dn_nl_newaddr, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELADDR,
- dn_nl_deladdr, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETADDR,
- NULL, dn_nl_dump_ifaddr, 0);
-
- proc_create_seq("decnet_dev", 0444, init_net.proc_net, &dn_dev_seq_ops);
-
-#ifdef CONFIG_SYSCTL
- {
- int i;
- for(i = 0; i < DN_DEV_LIST_SIZE; i++)
- dn_dev_sysctl_register(NULL, &dn_dev_list[i]);
- }
-#endif /* CONFIG_SYSCTL */
-}
-
-void __exit dn_dev_cleanup(void)
-{
-#ifdef CONFIG_SYSCTL
- {
- int i;
- for(i = 0; i < DN_DEV_LIST_SIZE; i++)
- dn_dev_sysctl_unregister(&dn_dev_list[i]);
- }
-#endif /* CONFIG_SYSCTL */
-
- remove_proc_entry("decnet_dev", init_net.proc_net);
-
- dn_dev_devices_off();
-}
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
deleted file mode 100644
index f78fe58eafc8..000000000000
--- a/net/decnet/dn_fib.c
+++ /dev/null
@@ -1,799 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Routing Forwarding Information Base (Glue/Info List)
- *
- * Author: Steve Whitehouse <SteveW@ACM.org>
- *
- *
- * Changes:
- * Alexey Kuznetsov : SMP locking changes
- * Steve Whitehouse : Rewrote it... Well to be more correct, I
- * copied most of it from the ipv4 fib code.
- * Steve Whitehouse : Updated it in style and fixed a few bugs
- * which were fixed in the ipv4 code since
- * this code was copied from it.
- *
- */
-#include <linux/string.h>
-#include <linux/net.h>
-#include <linux/socket.h>
-#include <linux/slab.h>
-#include <linux/sockios.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
-#include <linux/proc_fs.h>
-#include <linux/netdevice.h>
-#include <linux/timer.h>
-#include <linux/spinlock.h>
-#include <linux/atomic.h>
-#include <linux/uaccess.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/fib_rules.h>
-#include <net/dn.h>
-#include <net/dn_route.h>
-#include <net/dn_fib.h>
-#include <net/dn_neigh.h>
-#include <net/dn_dev.h>
-#include <net/nexthop.h>
-
-#define RT_MIN_TABLE 1
-
-#define for_fib_info() { struct dn_fib_info *fi;\
- for(fi = dn_fib_info_list; fi; fi = fi->fib_next)
-#define endfor_fib_info() }
-
-#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\
- for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
-
-#define change_nexthops(fi) { int nhsel; struct dn_fib_nh *nh;\
- for(nhsel = 0, nh = (struct dn_fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
-
-#define endfor_nexthops(fi) }
-
-static DEFINE_SPINLOCK(dn_fib_multipath_lock);
-static struct dn_fib_info *dn_fib_info_list;
-static DEFINE_SPINLOCK(dn_fib_info_lock);
-
-static struct
-{
- int error;
- u8 scope;
-} dn_fib_props[RTN_MAX+1] = {
- [RTN_UNSPEC] = { .error = 0, .scope = RT_SCOPE_NOWHERE },
- [RTN_UNICAST] = { .error = 0, .scope = RT_SCOPE_UNIVERSE },
- [RTN_LOCAL] = { .error = 0, .scope = RT_SCOPE_HOST },
- [RTN_BROADCAST] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
- [RTN_ANYCAST] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
- [RTN_MULTICAST] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
- [RTN_BLACKHOLE] = { .error = -EINVAL, .scope = RT_SCOPE_UNIVERSE },
- [RTN_UNREACHABLE] = { .error = -EHOSTUNREACH, .scope = RT_SCOPE_UNIVERSE },
- [RTN_PROHIBIT] = { .error = -EACCES, .scope = RT_SCOPE_UNIVERSE },
- [RTN_THROW] = { .error = -EAGAIN, .scope = RT_SCOPE_UNIVERSE },
- [RTN_NAT] = { .error = 0, .scope = RT_SCOPE_NOWHERE },
- [RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE },
-};
-
-static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force);
-static int dn_fib_sync_up(struct net_device *dev);
-
-void dn_fib_free_info(struct dn_fib_info *fi)
-{
- if (fi->fib_dead == 0) {
- printk(KERN_DEBUG "DECnet: BUG! Attempt to free alive dn_fib_info\n");
- return;
- }
-
- change_nexthops(fi) {
- if (nh->nh_dev)
- dev_put(nh->nh_dev);
- nh->nh_dev = NULL;
- } endfor_nexthops(fi);
- kfree(fi);
-}
-
-void dn_fib_release_info(struct dn_fib_info *fi)
-{
- spin_lock(&dn_fib_info_lock);
- if (fi && --fi->fib_treeref == 0) {
- if (fi->fib_next)
- fi->fib_next->fib_prev = fi->fib_prev;
- if (fi->fib_prev)
- fi->fib_prev->fib_next = fi->fib_next;
- if (fi == dn_fib_info_list)
- dn_fib_info_list = fi->fib_next;
- fi->fib_dead = 1;
- dn_fib_info_put(fi);
- }
- spin_unlock(&dn_fib_info_lock);
-}
-
-static inline int dn_fib_nh_comp(const struct dn_fib_info *fi, const struct dn_fib_info *ofi)
-{
- const struct dn_fib_nh *onh = ofi->fib_nh;
-
- for_nexthops(fi) {
- if (nh->nh_oif != onh->nh_oif ||
- nh->nh_gw != onh->nh_gw ||
- nh->nh_scope != onh->nh_scope ||
- nh->nh_weight != onh->nh_weight ||
- ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
- return -1;
- onh++;
- } endfor_nexthops(fi);
- return 0;
-}
-
-static inline struct dn_fib_info *dn_fib_find_info(const struct dn_fib_info *nfi)
-{
- for_fib_info() {
- if (fi->fib_nhs != nfi->fib_nhs)
- continue;
- if (nfi->fib_protocol == fi->fib_protocol &&
- nfi->fib_prefsrc == fi->fib_prefsrc &&
- nfi->fib_priority == fi->fib_priority &&
- memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 &&
- ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
- (nfi->fib_nhs == 0 || dn_fib_nh_comp(fi, nfi) == 0))
- return fi;
- } endfor_fib_info();
- return NULL;
-}
-
-static int dn_fib_count_nhs(const struct nlattr *attr)
-{
- struct rtnexthop *nhp = nla_data(attr);
- int nhs = 0, nhlen = nla_len(attr);
-
- while (rtnh_ok(nhp, nhlen)) {
- nhs++;
- nhp = rtnh_next(nhp, &nhlen);
- }
-
- /* leftover implies invalid nexthop configuration, discard it */
- return nhlen > 0 ? 0 : nhs;
-}
-
-static int dn_fib_get_nhs(struct dn_fib_info *fi, const struct nlattr *attr,
- const struct rtmsg *r)
-{
- struct rtnexthop *nhp = nla_data(attr);
- int nhlen = nla_len(attr);
-
- change_nexthops(fi) {
- int attrlen;
-
- if (!rtnh_ok(nhp, nhlen))
- return -EINVAL;
-
- nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
- nh->nh_oif = nhp->rtnh_ifindex;
- nh->nh_weight = nhp->rtnh_hops + 1;
-
- attrlen = rtnh_attrlen(nhp);
- if (attrlen > 0) {
- struct nlattr *gw_attr;
-
- gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
- nh->nh_gw = gw_attr ? nla_get_le16(gw_attr) : 0;
- }
-
- nhp = rtnh_next(nhp, &nhlen);
- } endfor_nexthops(fi);
-
- return 0;
-}
-
-
-static int dn_fib_check_nh(const struct rtmsg *r, struct dn_fib_info *fi, struct dn_fib_nh *nh)
-{
- int err;
-
- if (nh->nh_gw) {
- struct flowidn fld;
- struct dn_fib_res res;
-
- if (nh->nh_flags&RTNH_F_ONLINK) {
- struct net_device *dev;
-
- if (r->rtm_scope >= RT_SCOPE_LINK)
- return -EINVAL;
- if (dnet_addr_type(nh->nh_gw) != RTN_UNICAST)
- return -EINVAL;
- if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL)
- return -ENODEV;
- if (!(dev->flags&IFF_UP))
- return -ENETDOWN;
- nh->nh_dev = dev;
- dev_hold(dev);
- nh->nh_scope = RT_SCOPE_LINK;
- return 0;
- }
-
- memset(&fld, 0, sizeof(fld));
- fld.daddr = nh->nh_gw;
- fld.flowidn_oif = nh->nh_oif;
- fld.flowidn_scope = r->rtm_scope + 1;
-
- if (fld.flowidn_scope < RT_SCOPE_LINK)
- fld.flowidn_scope = RT_SCOPE_LINK;
-
- if ((err = dn_fib_lookup(&fld, &res)) != 0)
- return err;
-
- err = -EINVAL;
- if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
- goto out;
- nh->nh_scope = res.scope;
- nh->nh_oif = DN_FIB_RES_OIF(res);
- nh->nh_dev = DN_FIB_RES_DEV(res);
- if (nh->nh_dev == NULL)
- goto out;
- dev_hold(nh->nh_dev);
- err = -ENETDOWN;
- if (!(nh->nh_dev->flags & IFF_UP))
- goto out;
- err = 0;
-out:
- dn_fib_res_put(&res);
- return err;
- } else {
- struct net_device *dev;
-
- if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
- return -EINVAL;
-
- dev = __dev_get_by_index(&init_net, nh->nh_oif);
- if (dev == NULL || dev->dn_ptr == NULL)
- return -ENODEV;
- if (!(dev->flags&IFF_UP))
- return -ENETDOWN;
- nh->nh_dev = dev;
- dev_hold(nh->nh_dev);
- nh->nh_scope = RT_SCOPE_HOST;
- }
-
- return 0;
-}
-
-
-struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *attrs[],
- const struct nlmsghdr *nlh, int *errp)
-{
- int err;
- struct dn_fib_info *fi = NULL;
- struct dn_fib_info *ofi;
- int nhs = 1;
-
- if (r->rtm_type > RTN_MAX)
- goto err_inval;
-
- if (dn_fib_props[r->rtm_type].scope > r->rtm_scope)
- goto err_inval;
-
- if (attrs[RTA_MULTIPATH] &&
- (nhs = dn_fib_count_nhs(attrs[RTA_MULTIPATH])) == 0)
- goto err_inval;
-
- fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct dn_fib_nh), GFP_KERNEL);
- err = -ENOBUFS;
- if (fi == NULL)
- goto failure;
-
- fi->fib_protocol = r->rtm_protocol;
- fi->fib_nhs = nhs;
- fi->fib_flags = r->rtm_flags;
-
- if (attrs[RTA_PRIORITY])
- fi->fib_priority = nla_get_u32(attrs[RTA_PRIORITY]);
-
- if (attrs[RTA_METRICS]) {
- struct nlattr *attr;
- int rem;
-
- nla_for_each_nested(attr, attrs[RTA_METRICS], rem) {
- int type = nla_type(attr);
-
- if (type) {
- if (type > RTAX_MAX || type == RTAX_CC_ALGO ||
- nla_len(attr) < 4)
- goto err_inval;
-
- fi->fib_metrics[type-1] = nla_get_u32(attr);
- }
- }
- }
-
- if (attrs[RTA_PREFSRC])
- fi->fib_prefsrc = nla_get_le16(attrs[RTA_PREFSRC]);
-
- if (attrs[RTA_MULTIPATH]) {
- if ((err = dn_fib_get_nhs(fi, attrs[RTA_MULTIPATH], r)) != 0)
- goto failure;
-
- if (attrs[RTA_OIF] &&
- fi->fib_nh->nh_oif != nla_get_u32(attrs[RTA_OIF]))
- goto err_inval;
-
- if (attrs[RTA_GATEWAY] &&
- fi->fib_nh->nh_gw != nla_get_le16(attrs[RTA_GATEWAY]))
- goto err_inval;
- } else {
- struct dn_fib_nh *nh = fi->fib_nh;
-
- if (attrs[RTA_OIF])
- nh->nh_oif = nla_get_u32(attrs[RTA_OIF]);
-
- if (attrs[RTA_GATEWAY])
- nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]);
-
- nh->nh_flags = r->rtm_flags;
- nh->nh_weight = 1;
- }
-
- if (r->rtm_type == RTN_NAT) {
- if (!attrs[RTA_GATEWAY] || nhs != 1 || attrs[RTA_OIF])
- goto err_inval;
-
- fi->fib_nh->nh_gw = nla_get_le16(attrs[RTA_GATEWAY]);
- goto link_it;
- }
-
- if (dn_fib_props[r->rtm_type].error) {
- if (attrs[RTA_GATEWAY] || attrs[RTA_OIF] || attrs[RTA_MULTIPATH])
- goto err_inval;
-
- goto link_it;
- }
-
- if (r->rtm_scope > RT_SCOPE_HOST)
- goto err_inval;
-
- if (r->rtm_scope == RT_SCOPE_HOST) {
- struct dn_fib_nh *nh = fi->fib_nh;
-
- /* Local address is added */
- if (nhs != 1 || nh->nh_gw)
- goto err_inval;
- nh->nh_scope = RT_SCOPE_NOWHERE;
- nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif);
- err = -ENODEV;
- if (nh->nh_dev == NULL)
- goto failure;
- } else {
- change_nexthops(fi) {
- if ((err = dn_fib_check_nh(r, fi, nh)) != 0)
- goto failure;
- } endfor_nexthops(fi)
- }
-
- if (fi->fib_prefsrc) {
- if (r->rtm_type != RTN_LOCAL || !attrs[RTA_DST] ||
- fi->fib_prefsrc != nla_get_le16(attrs[RTA_DST]))
- if (dnet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
- goto err_inval;
- }
-
-link_it:
- if ((ofi = dn_fib_find_info(fi)) != NULL) {
- fi->fib_dead = 1;
- dn_fib_free_info(fi);
- ofi->fib_treeref++;
- return ofi;
- }
-
- fi->fib_treeref++;
- refcount_set(&fi->fib_clntref, 1);
- spin_lock(&dn_fib_info_lock);
- fi->fib_next = dn_fib_info_list;
- fi->fib_prev = NULL;
- if (dn_fib_info_list)
- dn_fib_info_list->fib_prev = fi;
- dn_fib_info_list = fi;
- spin_unlock(&dn_fib_info_lock);
- return fi;
-
-err_inval:
- err = -EINVAL;
-
-failure:
- *errp = err;
- if (fi) {
- fi->fib_dead = 1;
- dn_fib_free_info(fi);
- }
-
- return NULL;
-}
-
-int dn_fib_semantic_match(int type, struct dn_fib_info *fi, const struct flowidn *fld, struct dn_fib_res *res)
-{
- int err = dn_fib_props[type].error;
-
- if (err == 0) {
- if (fi->fib_flags & RTNH_F_DEAD)
- return 1;
-
- res->fi = fi;
-
- switch (type) {
- case RTN_NAT:
- DN_FIB_RES_RESET(*res);
- refcount_inc(&fi->fib_clntref);
- return 0;
- case RTN_UNICAST:
- case RTN_LOCAL:
- for_nexthops(fi) {
- if (nh->nh_flags & RTNH_F_DEAD)
- continue;
- if (!fld->flowidn_oif ||
- fld->flowidn_oif == nh->nh_oif)
- break;
- }
- if (nhsel < fi->fib_nhs) {
- res->nh_sel = nhsel;
- refcount_inc(&fi->fib_clntref);
- return 0;
- }
- endfor_nexthops(fi);
- res->fi = NULL;
- return 1;
- default:
- net_err_ratelimited("DECnet: impossible routing event : dn_fib_semantic_match type=%d\n",
- type);
- res->fi = NULL;
- return -EINVAL;
- }
- }
- return err;
-}
-
-void dn_fib_select_multipath(const struct flowidn *fld, struct dn_fib_res *res)
-{
- struct dn_fib_info *fi = res->fi;
- int w;
-
- spin_lock_bh(&dn_fib_multipath_lock);
- if (fi->fib_power <= 0) {
- int power = 0;
- change_nexthops(fi) {
- if (!(nh->nh_flags&RTNH_F_DEAD)) {
- power += nh->nh_weight;
- nh->nh_power = nh->nh_weight;
- }
- } endfor_nexthops(fi);
- fi->fib_power = power;
- if (power < 0) {
- spin_unlock_bh(&dn_fib_multipath_lock);
- res->nh_sel = 0;
- return;
- }
- }
-
- w = jiffies % fi->fib_power;
-
- change_nexthops(fi) {
- if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
- if ((w -= nh->nh_power) <= 0) {
- nh->nh_power--;
- fi->fib_power--;
- res->nh_sel = nhsel;
- spin_unlock_bh(&dn_fib_multipath_lock);
- return;
- }
- }
- } endfor_nexthops(fi);
- res->nh_sel = 0;
- spin_unlock_bh(&dn_fib_multipath_lock);
-}
-
-static inline u32 rtm_get_table(struct nlattr *attrs[], u8 table)
-{
- if (attrs[RTA_TABLE])
- table = nla_get_u32(attrs[RTA_TABLE]);
-
- return table;
-}
-
-static int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(skb->sk);
- struct dn_fib_table *tb;
- struct rtmsg *r = nlmsg_data(nlh);
- struct nlattr *attrs[RTA_MAX+1];
- int err;
-
- if (!netlink_capable(skb, CAP_NET_ADMIN))
- return -EPERM;
-
- if (!net_eq(net, &init_net))
- return -EINVAL;
-
- err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy,
- extack);
- if (err < 0)
- return err;
-
- tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 0);
- if (!tb)
- return -ESRCH;
-
- return tb->delete(tb, r, attrs, nlh, &NETLINK_CB(skb));
-}
-
-static int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(skb->sk);
- struct dn_fib_table *tb;
- struct rtmsg *r = nlmsg_data(nlh);
- struct nlattr *attrs[RTA_MAX+1];
- int err;
-
- if (!netlink_capable(skb, CAP_NET_ADMIN))
- return -EPERM;
-
- if (!net_eq(net, &init_net))
- return -EINVAL;
-
- err = nlmsg_parse(nlh, sizeof(*r), attrs, RTA_MAX, rtm_dn_policy,
- extack);
- if (err < 0)
- return err;
-
- tb = dn_fib_get_table(rtm_get_table(attrs, r->rtm_table), 1);
- if (!tb)
- return -ENOBUFS;
-
- return tb->insert(tb, r, attrs, nlh, &NETLINK_CB(skb));
-}
-
-static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa)
-{
- struct dn_fib_table *tb;
- struct {
- struct nlmsghdr nlh;
- struct rtmsg rtm;
- } req;
- struct {
- struct nlattr hdr;
- __le16 dst;
- } dst_attr = {
- .dst = dst,
- };
- struct {
- struct nlattr hdr;
- __le16 prefsrc;
- } prefsrc_attr = {
- .prefsrc = ifa->ifa_local,
- };
- struct {
- struct nlattr hdr;
- u32 oif;
- } oif_attr = {
- .oif = ifa->ifa_dev->dev->ifindex,
- };
- struct nlattr *attrs[RTA_MAX+1] = {
- [RTA_DST] = (struct nlattr *) &dst_attr,
- [RTA_PREFSRC] = (struct nlattr * ) &prefsrc_attr,
- [RTA_OIF] = (struct nlattr *) &oif_attr,
- };
-
- memset(&req.rtm, 0, sizeof(req.rtm));
-
- if (type == RTN_UNICAST)
- tb = dn_fib_get_table(RT_MIN_TABLE, 1);
- else
- tb = dn_fib_get_table(RT_TABLE_LOCAL, 1);
-
- if (tb == NULL)
- return;
-
- req.nlh.nlmsg_len = sizeof(req);
- req.nlh.nlmsg_type = cmd;
- req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND;
- req.nlh.nlmsg_pid = 0;
- req.nlh.nlmsg_seq = 0;
-
- req.rtm.rtm_dst_len = dst_len;
- req.rtm.rtm_table = tb->n;
- req.rtm.rtm_protocol = RTPROT_KERNEL;
- req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST);
- req.rtm.rtm_type = type;
-
- if (cmd == RTM_NEWROUTE)
- tb->insert(tb, &req.rtm, attrs, &req.nlh, NULL);
- else
- tb->delete(tb, &req.rtm, attrs, &req.nlh, NULL);
-}
-
-static void dn_fib_add_ifaddr(struct dn_ifaddr *ifa)
-{
-
- fib_magic(RTM_NEWROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa);
-
-#if 0
- if (!(dev->flags&IFF_UP))
- return;
- /* In the future, we will want to add default routes here */
-
-#endif
-}
-
-static void dn_fib_del_ifaddr(struct dn_ifaddr *ifa)
-{
- int found_it = 0;
- struct net_device *dev;
- struct dn_dev *dn_db;
- struct dn_ifaddr *ifa2;
-
- ASSERT_RTNL();
-
- /* Scan device list */
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, dev) {
- dn_db = rcu_dereference(dev->dn_ptr);
- if (dn_db == NULL)
- continue;
- for (ifa2 = rcu_dereference(dn_db->ifa_list);
- ifa2 != NULL;
- ifa2 = rcu_dereference(ifa2->ifa_next)) {
- if (ifa2->ifa_local == ifa->ifa_local) {
- found_it = 1;
- break;
- }
- }
- }
- rcu_read_unlock();
-
- if (found_it == 0) {
- fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 16, ifa);
-
- if (dnet_addr_type(ifa->ifa_local) != RTN_LOCAL) {
- if (dn_fib_sync_down(ifa->ifa_local, NULL, 0))
- dn_fib_flush();
- }
- }
-}
-
-static void dn_fib_disable_addr(struct net_device *dev, int force)
-{
- if (dn_fib_sync_down(0, dev, force))
- dn_fib_flush();
- dn_rt_cache_flush(0);
- neigh_ifdown(&dn_neigh_table, dev);
-}
-
-static int dn_fib_dnaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
- struct dn_ifaddr *ifa = (struct dn_ifaddr *)ptr;
-
- switch (event) {
- case NETDEV_UP:
- dn_fib_add_ifaddr(ifa);
- dn_fib_sync_up(ifa->ifa_dev->dev);
- dn_rt_cache_flush(-1);
- break;
- case NETDEV_DOWN:
- dn_fib_del_ifaddr(ifa);
- if (ifa->ifa_dev && ifa->ifa_dev->ifa_list == NULL) {
- dn_fib_disable_addr(ifa->ifa_dev->dev, 1);
- } else {
- dn_rt_cache_flush(-1);
- }
- break;
- }
- return NOTIFY_DONE;
-}
-
-static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force)
-{
- int ret = 0;
- int scope = RT_SCOPE_NOWHERE;
-
- if (force)
- scope = -1;
-
- for_fib_info() {
- /*
- * This makes no sense for DECnet.... we will almost
- * certainly have more than one local address the same
- * over all our interfaces. It needs thinking about
- * some more.
- */
- if (local && fi->fib_prefsrc == local) {
- fi->fib_flags |= RTNH_F_DEAD;
- ret++;
- } else if (dev && fi->fib_nhs) {
- int dead = 0;
-
- change_nexthops(fi) {
- if (nh->nh_flags&RTNH_F_DEAD)
- dead++;
- else if (nh->nh_dev == dev &&
- nh->nh_scope != scope) {
- spin_lock_bh(&dn_fib_multipath_lock);
- nh->nh_flags |= RTNH_F_DEAD;
- fi->fib_power -= nh->nh_power;
- nh->nh_power = 0;
- spin_unlock_bh(&dn_fib_multipath_lock);
- dead++;
- }
- } endfor_nexthops(fi)
- if (dead == fi->fib_nhs) {
- fi->fib_flags |= RTNH_F_DEAD;
- ret++;
- }
- }
- } endfor_fib_info();
- return ret;
-}
-
-
-static int dn_fib_sync_up(struct net_device *dev)
-{
- int ret = 0;
-
- if (!(dev->flags&IFF_UP))
- return 0;
-
- for_fib_info() {
- int alive = 0;
-
- change_nexthops(fi) {
- if (!(nh->nh_flags&RTNH_F_DEAD)) {
- alive++;
- continue;
- }
- if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
- continue;
- if (nh->nh_dev != dev || dev->dn_ptr == NULL)
- continue;
- alive++;
- spin_lock_bh(&dn_fib_multipath_lock);
- nh->nh_power = 0;
- nh->nh_flags &= ~RTNH_F_DEAD;
- spin_unlock_bh(&dn_fib_multipath_lock);
- } endfor_nexthops(fi);
-
- if (alive > 0) {
- fi->fib_flags &= ~RTNH_F_DEAD;
- ret++;
- }
- } endfor_fib_info();
- return ret;
-}
-
-static struct notifier_block dn_fib_dnaddr_notifier = {
- .notifier_call = dn_fib_dnaddr_event,
-};
-
-void __exit dn_fib_cleanup(void)
-{
- dn_fib_table_cleanup();
- dn_fib_rules_cleanup();
-
- unregister_dnaddr_notifier(&dn_fib_dnaddr_notifier);
-}
-
-
-void __init dn_fib_init(void)
-{
- dn_fib_table_init();
- dn_fib_rules_init();
-
- register_dnaddr_notifier(&dn_fib_dnaddr_notifier);
-
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_NEWROUTE,
- dn_fib_rtm_newroute, NULL, 0);
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_DELROUTE,
- dn_fib_rtm_delroute, NULL, 0);
-}
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
deleted file mode 100644
index 94b306f6d551..000000000000
--- a/net/decnet/dn_neigh.c
+++ /dev/null
@@ -1,605 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Neighbour Functions (Adjacency Database and
- * On-Ethernet Cache)
- *
- * Author: Steve Whitehouse <SteveW@ACM.org>
- *
- *
- * Changes:
- * Steve Whitehouse : Fixed router listing routine
- * Steve Whitehouse : Added error_report functions
- * Steve Whitehouse : Added default router detection
- * Steve Whitehouse : Hop counts in outgoing messages
- * Steve Whitehouse : Fixed src/dst in outgoing messages so
- * forwarding now stands a good chance of
- * working.
- * Steve Whitehouse : Fixed neighbour states (for now anyway).
- * Steve Whitehouse : Made error_report functions dummies. This
- * is not the right place to return skbs.
- * Steve Whitehouse : Convert to seq_file
- *
- */
-
-#include <linux/net.h>
-#include <linux/module.h>
-#include <linux/socket.h>
-#include <linux/if_arp.h>
-#include <linux/slab.h>
-#include <linux/if_ether.h>
-#include <linux/init.h>
-#include <linux/proc_fs.h>
-#include <linux/string.h>
-#include <linux/netfilter_decnet.h>
-#include <linux/spinlock.h>
-#include <linux/seq_file.h>
-#include <linux/rcupdate.h>
-#include <linux/jhash.h>
-#include <linux/atomic.h>
-#include <net/net_namespace.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/dn.h>
-#include <net/dn_dev.h>
-#include <net/dn_neigh.h>
-#include <net/dn_route.h>
-
-static int dn_neigh_construct(struct neighbour *);
-static void dn_neigh_error_report(struct neighbour *, struct sk_buff *);
-static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb);
-
-/*
- * Operations for adding the link layer header.
- */
-static const struct neigh_ops dn_neigh_ops = {
- .family = AF_DECnet,
- .error_report = dn_neigh_error_report,
- .output = dn_neigh_output,
- .connected_output = dn_neigh_output,
-};
-
-static u32 dn_neigh_hash(const void *pkey,
- const struct net_device *dev,
- __u32 *hash_rnd)
-{
- return jhash_2words(*(__u16 *)pkey, 0, hash_rnd[0]);
-}
-
-static bool dn_key_eq(const struct neighbour *neigh, const void *pkey)
-{
- return neigh_key_eq16(neigh, pkey);
-}
-
-struct neigh_table dn_neigh_table = {
- .family = PF_DECnet,
- .entry_size = NEIGH_ENTRY_SIZE(sizeof(struct dn_neigh)),
- .key_len = sizeof(__le16),
- .protocol = cpu_to_be16(ETH_P_DNA_RT),
- .hash = dn_neigh_hash,
- .key_eq = dn_key_eq,
- .constructor = dn_neigh_construct,
- .id = "dn_neigh_cache",
- .parms ={
- .tbl = &dn_neigh_table,
- .reachable_time = 30 * HZ,
- .data = {
- [NEIGH_VAR_MCAST_PROBES] = 0,
- [NEIGH_VAR_UCAST_PROBES] = 0,
- [NEIGH_VAR_APP_PROBES] = 0,
- [NEIGH_VAR_RETRANS_TIME] = 1 * HZ,
- [NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ,
- [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ,
- [NEIGH_VAR_GC_STALETIME] = 60 * HZ,
- [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX,
- [NEIGH_VAR_PROXY_QLEN] = 0,
- [NEIGH_VAR_ANYCAST_DELAY] = 0,
- [NEIGH_VAR_PROXY_DELAY] = 0,
- [NEIGH_VAR_LOCKTIME] = 1 * HZ,
- },
- },
- .gc_interval = 30 * HZ,
- .gc_thresh1 = 128,
- .gc_thresh2 = 512,
- .gc_thresh3 = 1024,
-};
-
-static int dn_neigh_construct(struct neighbour *neigh)
-{
- struct net_device *dev = neigh->dev;
- struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n);
- struct dn_dev *dn_db;
- struct neigh_parms *parms;
-
- rcu_read_lock();
- dn_db = rcu_dereference(dev->dn_ptr);
- if (dn_db == NULL) {
- rcu_read_unlock();
- return -EINVAL;
- }
-
- parms = dn_db->neigh_parms;
- if (!parms) {
- rcu_read_unlock();
- return -EINVAL;
- }
-
- __neigh_parms_put(neigh->parms);
- neigh->parms = neigh_parms_clone(parms);
- rcu_read_unlock();
-
- neigh->ops = &dn_neigh_ops;
- neigh->nud_state = NUD_NOARP;
- neigh->output = neigh->ops->connected_output;
-
- if ((dev->type == ARPHRD_IPGRE) || (dev->flags & IFF_POINTOPOINT))
- memcpy(neigh->ha, dev->broadcast, dev->addr_len);
- else if ((dev->type == ARPHRD_ETHER) || (dev->type == ARPHRD_LOOPBACK))
- dn_dn2eth(neigh->ha, dn->addr);
- else {
- net_dbg_ratelimited("Trying to create neigh for hw %d\n",
- dev->type);
- return -EINVAL;
- }
-
- /*
- * Make an estimate of the remote block size by assuming that its
- * two less then the device mtu, which it true for ethernet (and
- * other things which support long format headers) since there is
- * an extra length field (of 16 bits) which isn't part of the
- * ethernet headers and which the DECnet specs won't admit is part
- * of the DECnet routing headers either.
- *
- * If we over estimate here its no big deal, the NSP negotiations
- * will prevent us from sending packets which are too large for the
- * remote node to handle. In any case this figure is normally updated
- * by a hello message in most cases.
- */
- dn->blksize = dev->mtu - 2;
-
- return 0;
-}
-
-static void dn_neigh_error_report(struct neighbour *neigh, struct sk_buff *skb)
-{
- printk(KERN_DEBUG "dn_neigh_error_report: called\n");
- kfree_skb(skb);
-}
-
-static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
- struct dn_route *rt = (struct dn_route *)dst;
- struct net_device *dev = neigh->dev;
- char mac_addr[ETH_ALEN];
- unsigned int seq;
- int err;
-
- dn_dn2eth(mac_addr, rt->rt_local_src);
- do {
- seq = read_seqbegin(&neigh->ha_lock);
- err = dev_hard_header(skb, dev, ntohs(skb->protocol),
- neigh->ha, mac_addr, skb->len);
- } while (read_seqretry(&neigh->ha_lock, seq));
-
- if (err >= 0)
- err = dev_queue_xmit(skb);
- else {
- kfree_skb(skb);
- err = -EINVAL;
- }
- return err;
-}
-
-static int dn_neigh_output_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
- struct dn_route *rt = (struct dn_route *)dst;
- struct neighbour *neigh = rt->n;
-
- return neigh->output(neigh, skb);
-}
-
-/*
- * For talking to broadcast devices: Ethernet & PPP
- */
-static int dn_long_output(struct neighbour *neigh, struct sock *sk,
- struct sk_buff *skb)
-{
- struct net_device *dev = neigh->dev;
- int headroom = dev->hard_header_len + sizeof(struct dn_long_packet) + 3;
- unsigned char *data;
- struct dn_long_packet *lp;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
-
- if (skb_headroom(skb) < headroom) {
- struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom);
- if (skb2 == NULL) {
- net_crit_ratelimited("dn_long_output: no memory\n");
- kfree_skb(skb);
- return -ENOBUFS;
- }
- consume_skb(skb);
- skb = skb2;
- net_info_ratelimited("dn_long_output: Increasing headroom\n");
- }
-
- data = skb_push(skb, sizeof(struct dn_long_packet) + 3);
- lp = (struct dn_long_packet *)(data+3);
-
- *((__le16 *)data) = cpu_to_le16(skb->len - 2);
- *(data + 2) = 1 | DN_RT_F_PF; /* Padding */
-
- lp->msgflg = DN_RT_PKT_LONG|(cb->rt_flags&(DN_RT_F_IE|DN_RT_F_RQR|DN_RT_F_RTS));
- lp->d_area = lp->d_subarea = 0;
- dn_dn2eth(lp->d_id, cb->dst);
- lp->s_area = lp->s_subarea = 0;
- dn_dn2eth(lp->s_id, cb->src);
- lp->nl2 = 0;
- lp->visit_ct = cb->hops & 0x3f;
- lp->s_class = 0;
- lp->pt = 0;
-
- skb_reset_network_header(skb);
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
- &init_net, sk, skb, NULL, neigh->dev,
- dn_neigh_output_packet);
-}
-
-/*
- * For talking to pointopoint and multidrop devices: DDCMP and X.25
- */
-static int dn_short_output(struct neighbour *neigh, struct sock *sk,
- struct sk_buff *skb)
-{
- struct net_device *dev = neigh->dev;
- int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2;
- struct dn_short_packet *sp;
- unsigned char *data;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
-
- if (skb_headroom(skb) < headroom) {
- struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom);
- if (skb2 == NULL) {
- net_crit_ratelimited("dn_short_output: no memory\n");
- kfree_skb(skb);
- return -ENOBUFS;
- }
- consume_skb(skb);
- skb = skb2;
- net_info_ratelimited("dn_short_output: Increasing headroom\n");
- }
-
- data = skb_push(skb, sizeof(struct dn_short_packet) + 2);
- *((__le16 *)data) = cpu_to_le16(skb->len - 2);
- sp = (struct dn_short_packet *)(data+2);
-
- sp->msgflg = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS));
- sp->dstnode = cb->dst;
- sp->srcnode = cb->src;
- sp->forward = cb->hops & 0x3f;
-
- skb_reset_network_header(skb);
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
- &init_net, sk, skb, NULL, neigh->dev,
- dn_neigh_output_packet);
-}
-
-/*
- * For talking to DECnet phase III nodes
- * Phase 3 output is the same as short output, execpt that
- * it clears the area bits before transmission.
- */
-static int dn_phase3_output(struct neighbour *neigh, struct sock *sk,
- struct sk_buff *skb)
-{
- struct net_device *dev = neigh->dev;
- int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2;
- struct dn_short_packet *sp;
- unsigned char *data;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
- if (skb_headroom(skb) < headroom) {
- struct sk_buff *skb2 = skb_realloc_headroom(skb, headroom);
- if (skb2 == NULL) {
- net_crit_ratelimited("dn_phase3_output: no memory\n");
- kfree_skb(skb);
- return -ENOBUFS;
- }
- consume_skb(skb);
- skb = skb2;
- net_info_ratelimited("dn_phase3_output: Increasing headroom\n");
- }
-
- data = skb_push(skb, sizeof(struct dn_short_packet) + 2);
- *((__le16 *)data) = cpu_to_le16(skb->len - 2);
- sp = (struct dn_short_packet *)(data + 2);
-
- sp->msgflg = DN_RT_PKT_SHORT|(cb->rt_flags&(DN_RT_F_RQR|DN_RT_F_RTS));
- sp->dstnode = cb->dst & cpu_to_le16(0x03ff);
- sp->srcnode = cb->src & cpu_to_le16(0x03ff);
- sp->forward = cb->hops & 0x3f;
-
- skb_reset_network_header(skb);
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
- &init_net, sk, skb, NULL, neigh->dev,
- dn_neigh_output_packet);
-}
-
-int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
- struct dn_route *rt = (struct dn_route *) dst;
- struct neighbour *neigh = rt->n;
- struct dn_neigh *dn = container_of(neigh, struct dn_neigh, n);
- struct dn_dev *dn_db;
- bool use_long;
-
- rcu_read_lock();
- dn_db = rcu_dereference(neigh->dev->dn_ptr);
- if (dn_db == NULL) {
- rcu_read_unlock();
- return -EINVAL;
- }
- use_long = dn_db->use_long;
- rcu_read_unlock();
-
- if (dn->flags & DN_NDFLAG_P3)
- return dn_phase3_output(neigh, sk, skb);
- if (use_long)
- return dn_long_output(neigh, sk, skb);
- else
- return dn_short_output(neigh, sk, skb);
-}
-
-/*
- * Unfortunately, the neighbour code uses the device in its hash
- * function, so we don't get any advantage from it. This function
- * basically does a neigh_lookup(), but without comparing the device
- * field. This is required for the On-Ethernet cache
- */
-
-/*
- * Pointopoint link receives a hello message
- */
-void dn_neigh_pointopoint_hello(struct sk_buff *skb)
-{
- kfree_skb(skb);
-}
-
-/*
- * Ethernet router hello message received
- */
-int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct rtnode_hello_message *msg = (struct rtnode_hello_message *)skb->data;
-
- struct neighbour *neigh;
- struct dn_neigh *dn;
- struct dn_dev *dn_db;
- __le16 src;
-
- src = dn_eth2dn(msg->id);
-
- neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1);
-
- dn = container_of(neigh, struct dn_neigh, n);
-
- if (neigh) {
- write_lock(&neigh->lock);
-
- neigh->used = jiffies;
- dn_db = rcu_dereference(neigh->dev->dn_ptr);
-
- if (!(neigh->nud_state & NUD_PERMANENT)) {
- neigh->updated = jiffies;
-
- if (neigh->dev->type == ARPHRD_ETHER)
- memcpy(neigh->ha, &eth_hdr(skb)->h_source, ETH_ALEN);
-
- dn->blksize = le16_to_cpu(msg->blksize);
- dn->priority = msg->priority;
-
- dn->flags &= ~DN_NDFLAG_P3;
-
- switch (msg->iinfo & DN_RT_INFO_TYPE) {
- case DN_RT_INFO_L1RT:
- dn->flags &=~DN_NDFLAG_R2;
- dn->flags |= DN_NDFLAG_R1;
- break;
- case DN_RT_INFO_L2RT:
- dn->flags |= DN_NDFLAG_R2;
- }
- }
-
- /* Only use routers in our area */
- if ((le16_to_cpu(src)>>10) == (le16_to_cpu((decnet_address))>>10)) {
- if (!dn_db->router) {
- dn_db->router = neigh_clone(neigh);
- } else {
- if (msg->priority > ((struct dn_neigh *)dn_db->router)->priority)
- neigh_release(xchg(&dn_db->router, neigh_clone(neigh)));
- }
- }
- write_unlock(&neigh->lock);
- neigh_release(neigh);
- }
-
- kfree_skb(skb);
- return 0;
-}
-
-/*
- * Endnode hello message received
- */
-int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct endnode_hello_message *msg = (struct endnode_hello_message *)skb->data;
- struct neighbour *neigh;
- struct dn_neigh *dn;
- __le16 src;
-
- src = dn_eth2dn(msg->id);
-
- neigh = __neigh_lookup(&dn_neigh_table, &src, skb->dev, 1);
-
- dn = container_of(neigh, struct dn_neigh, n);
-
- if (neigh) {
- write_lock(&neigh->lock);
-
- neigh->used = jiffies;
-
- if (!(neigh->nud_state & NUD_PERMANENT)) {
- neigh->updated = jiffies;
-
- if (neigh->dev->type == ARPHRD_ETHER)
- memcpy(neigh->ha, &eth_hdr(skb)->h_source, ETH_ALEN);
- dn->flags &= ~(DN_NDFLAG_R1 | DN_NDFLAG_R2);
- dn->blksize = le16_to_cpu(msg->blksize);
- dn->priority = 0;
- }
-
- write_unlock(&neigh->lock);
- neigh_release(neigh);
- }
-
- kfree_skb(skb);
- return 0;
-}
-
-static char *dn_find_slot(char *base, int max, int priority)
-{
- int i;
- unsigned char *min = NULL;
-
- base += 6; /* skip first id */
-
- for(i = 0; i < max; i++) {
- if (!min || (*base < *min))
- min = base;
- base += 7; /* find next priority */
- }
-
- if (!min)
- return NULL;
-
- return (*min < priority) ? (min - 6) : NULL;
-}
-
-struct elist_cb_state {
- struct net_device *dev;
- unsigned char *ptr;
- unsigned char *rs;
- int t, n;
-};
-
-static void neigh_elist_cb(struct neighbour *neigh, void *_info)
-{
- struct elist_cb_state *s = _info;
- struct dn_neigh *dn;
-
- if (neigh->dev != s->dev)
- return;
-
- dn = container_of(neigh, struct dn_neigh, n);
- if (!(dn->flags & (DN_NDFLAG_R1|DN_NDFLAG_R2)))
- return;
-
- if (s->t == s->n)
- s->rs = dn_find_slot(s->ptr, s->n, dn->priority);
- else
- s->t++;
- if (s->rs == NULL)
- return;
-
- dn_dn2eth(s->rs, dn->addr);
- s->rs += 6;
- *(s->rs) = neigh->nud_state & NUD_CONNECTED ? 0x80 : 0x0;
- *(s->rs) |= dn->priority;
- s->rs++;
-}
-
-int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n)
-{
- struct elist_cb_state state;
-
- state.dev = dev;
- state.t = 0;
- state.n = n;
- state.ptr = ptr;
- state.rs = ptr;
-
- neigh_for_each(&dn_neigh_table, neigh_elist_cb, &state);
-
- return state.t;
-}
-
-
-#ifdef CONFIG_PROC_FS
-
-static inline void dn_neigh_format_entry(struct seq_file *seq,
- struct neighbour *n)
-{
- struct dn_neigh *dn = container_of(n, struct dn_neigh, n);
- char buf[DN_ASCBUF_LEN];
-
- read_lock(&n->lock);
- seq_printf(seq, "%-7s %s%s%s %02x %02d %07ld %-8s\n",
- dn_addr2asc(le16_to_cpu(dn->addr), buf),
- (dn->flags&DN_NDFLAG_R1) ? "1" : "-",
- (dn->flags&DN_NDFLAG_R2) ? "2" : "-",
- (dn->flags&DN_NDFLAG_P3) ? "3" : "-",
- dn->n.nud_state,
- refcount_read(&dn->n.refcnt),
- dn->blksize,
- (dn->n.dev) ? dn->n.dev->name : "?");
- read_unlock(&n->lock);
-}
-
-static int dn_neigh_seq_show(struct seq_file *seq, void *v)
-{
- if (v == SEQ_START_TOKEN) {
- seq_puts(seq, "Addr Flags State Use Blksize Dev\n");
- } else {
- dn_neigh_format_entry(seq, v);
- }
-
- return 0;
-}
-
-static void *dn_neigh_seq_start(struct seq_file *seq, loff_t *pos)
-{
- return neigh_seq_start(seq, pos, &dn_neigh_table,
- NEIGH_SEQ_NEIGH_ONLY);
-}
-
-static const struct seq_operations dn_neigh_seq_ops = {
- .start = dn_neigh_seq_start,
- .next = neigh_seq_next,
- .stop = neigh_seq_stop,
- .show = dn_neigh_seq_show,
-};
-#endif
-
-void __init dn_neigh_init(void)
-{
- neigh_table_init(NEIGH_DN_TABLE, &dn_neigh_table);
- proc_create_net("decnet_neigh", 0444, init_net.proc_net,
- &dn_neigh_seq_ops, sizeof(struct neigh_seq_state));
-}
-
-void __exit dn_neigh_cleanup(void)
-{
- remove_proc_entry("decnet_neigh", init_net.proc_net);
- neigh_table_clear(NEIGH_DN_TABLE, &dn_neigh_table);
-}
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
deleted file mode 100644
index 2fb5e055ba25..000000000000
--- a/net/decnet/dn_nsp_in.c
+++ /dev/null
@@ -1,914 +0,0 @@
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Network Services Protocol (Input)
- *
- * Author: Eduardo Marcelo Serrat <emserrat@geocities.com>
- *
- * Changes:
- *
- * Steve Whitehouse: Split into dn_nsp_in.c and dn_nsp_out.c from
- * original dn_nsp.c.
- * Steve Whitehouse: Updated to work with my new routing architecture.
- * Steve Whitehouse: Add changes from Eduardo Serrat's patches.
- * Steve Whitehouse: Put all ack handling code in a common routine.
- * Steve Whitehouse: Put other common bits into dn_nsp_rx()
- * Steve Whitehouse: More checks on skb->len to catch bogus packets
- * Fixed various race conditions and possible nasties.
- * Steve Whitehouse: Now handles returned conninit frames.
- * David S. Miller: New socket locking
- * Steve Whitehouse: Fixed lockup when socket filtering was enabled.
- * Paul Koning: Fix to push CC sockets into RUN when acks are
- * received.
- * Steve Whitehouse:
- * Patrick Caulfield: Checking conninits for correctness & sending of error
- * responses.
- * Steve Whitehouse: Added backlog congestion level return codes.
- * Patrick Caulfield:
- * Steve Whitehouse: Added flow control support (outbound)
- * Steve Whitehouse: Prepare for nonlinear skbs
- */
-
-/******************************************************************************
- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-*******************************************************************************/
-
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/inet.h>
-#include <linux/route.h>
-#include <linux/slab.h>
-#include <net/sock.h>
-#include <net/tcp_states.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/termios.h>
-#include <linux/interrupt.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/netfilter_decnet.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/dn.h>
-#include <net/dn_nsp.h>
-#include <net/dn_dev.h>
-#include <net/dn_route.h>
-
-extern int decnet_log_martians;
-
-static void dn_log_martian(struct sk_buff *skb, const char *msg)
-{
- if (decnet_log_martians) {
- char *devname = skb->dev ? skb->dev->name : "???";
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- net_info_ratelimited("DECnet: Martian packet (%s) dev=%s src=0x%04hx dst=0x%04hx srcport=0x%04hx dstport=0x%04hx\n",
- msg, devname,
- le16_to_cpu(cb->src),
- le16_to_cpu(cb->dst),
- le16_to_cpu(cb->src_port),
- le16_to_cpu(cb->dst_port));
- }
-}
-
-/*
- * For this function we've flipped the cross-subchannel bit
- * if the message is an otherdata or linkservice message. Thus
- * we can use it to work out what to update.
- */
-static void dn_ack(struct sock *sk, struct sk_buff *skb, unsigned short ack)
-{
- struct dn_scp *scp = DN_SK(sk);
- unsigned short type = ((ack >> 12) & 0x0003);
- int wakeup = 0;
-
- switch (type) {
- case 0: /* ACK - Data */
- if (dn_after(ack, scp->ackrcv_dat)) {
- scp->ackrcv_dat = ack & 0x0fff;
- wakeup |= dn_nsp_check_xmit_queue(sk, skb,
- &scp->data_xmit_queue,
- ack);
- }
- break;
- case 1: /* NAK - Data */
- break;
- case 2: /* ACK - OtherData */
- if (dn_after(ack, scp->ackrcv_oth)) {
- scp->ackrcv_oth = ack & 0x0fff;
- wakeup |= dn_nsp_check_xmit_queue(sk, skb,
- &scp->other_xmit_queue,
- ack);
- }
- break;
- case 3: /* NAK - OtherData */
- break;
- }
-
- if (wakeup && !sock_flag(sk, SOCK_DEAD))
- sk->sk_state_change(sk);
-}
-
-/*
- * This function is a universal ack processor.
- */
-static int dn_process_ack(struct sock *sk, struct sk_buff *skb, int oth)
-{
- __le16 *ptr = (__le16 *)skb->data;
- int len = 0;
- unsigned short ack;
-
- if (skb->len < 2)
- return len;
-
- if ((ack = le16_to_cpu(*ptr)) & 0x8000) {
- skb_pull(skb, 2);
- ptr++;
- len += 2;
- if ((ack & 0x4000) == 0) {
- if (oth)
- ack ^= 0x2000;
- dn_ack(sk, skb, ack);
- }
- }
-
- if (skb->len < 2)
- return len;
-
- if ((ack = le16_to_cpu(*ptr)) & 0x8000) {
- skb_pull(skb, 2);
- len += 2;
- if ((ack & 0x4000) == 0) {
- if (oth)
- ack ^= 0x2000;
- dn_ack(sk, skb, ack);
- }
- }
-
- return len;
-}
-
-
-/**
- * dn_check_idf - Check an image data field format is correct.
- * @pptr: Pointer to pointer to image data
- * @len: Pointer to length of image data
- * @max: The maximum allowed length of the data in the image data field
- * @follow_on: Check that this many bytes exist beyond the end of the image data
- *
- * Returns: 0 if ok, -1 on error
- */
-static inline int dn_check_idf(unsigned char **pptr, int *len, unsigned char max, unsigned char follow_on)
-{
- unsigned char *ptr = *pptr;
- unsigned char flen = *ptr++;
-
- (*len)--;
- if (flen > max)
- return -1;
- if ((flen + follow_on) > *len)
- return -1;
-
- *len -= flen;
- *pptr = ptr + flen;
- return 0;
-}
-
-/*
- * Table of reason codes to pass back to node which sent us a badly
- * formed message, plus text messages for the log. A zero entry in
- * the reason field means "don't reply" otherwise a disc init is sent with
- * the specified reason code.
- */
-static struct {
- unsigned short reason;
- const char *text;
-} ci_err_table[] = {
- { 0, "CI: Truncated message" },
- { NSP_REASON_ID, "CI: Destination username error" },
- { NSP_REASON_ID, "CI: Destination username type" },
- { NSP_REASON_US, "CI: Source username error" },
- { 0, "CI: Truncated at menuver" },
- { 0, "CI: Truncated before access or user data" },
- { NSP_REASON_IO, "CI: Access data format error" },
- { NSP_REASON_IO, "CI: User data format error" }
-};
-
-/*
- * This function uses a slightly different lookup method
- * to find its sockets, since it searches on object name/number
- * rather than port numbers. Various tests are done to ensure that
- * the incoming data is in the correct format before it is queued to
- * a socket.
- */
-static struct sock *dn_find_listener(struct sk_buff *skb, unsigned short *reason)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct nsp_conn_init_msg *msg = (struct nsp_conn_init_msg *)skb->data;
- struct sockaddr_dn dstaddr;
- struct sockaddr_dn srcaddr;
- unsigned char type = 0;
- int dstlen;
- int srclen;
- unsigned char *ptr;
- int len;
- int err = 0;
- unsigned char menuver;
-
- memset(&dstaddr, 0, sizeof(struct sockaddr_dn));
- memset(&srcaddr, 0, sizeof(struct sockaddr_dn));
-
- /*
- * 1. Decode & remove message header
- */
- cb->src_port = msg->srcaddr;
- cb->dst_port = msg->dstaddr;
- cb->services = msg->services;
- cb->info = msg->info;
- cb->segsize = le16_to_cpu(msg->segsize);
-
- if (!pskb_may_pull(skb, sizeof(*msg)))
- goto err_out;
-
- skb_pull(skb, sizeof(*msg));
-
- len = skb->len;
- ptr = skb->data;
-
- /*
- * 2. Check destination end username format
- */
- dstlen = dn_username2sockaddr(ptr, len, &dstaddr, &type);
- err++;
- if (dstlen < 0)
- goto err_out;
-
- err++;
- if (type > 1)
- goto err_out;
-
- len -= dstlen;
- ptr += dstlen;
-
- /*
- * 3. Check source end username format
- */
- srclen = dn_username2sockaddr(ptr, len, &srcaddr, &type);
- err++;
- if (srclen < 0)
- goto err_out;
-
- len -= srclen;
- ptr += srclen;
- err++;
- if (len < 1)
- goto err_out;
-
- menuver = *ptr;
- ptr++;
- len--;
-
- /*
- * 4. Check that optional data actually exists if menuver says it does
- */
- err++;
- if ((menuver & (DN_MENUVER_ACC | DN_MENUVER_USR)) && (len < 1))
- goto err_out;
-
- /*
- * 5. Check optional access data format
- */
- err++;
- if (menuver & DN_MENUVER_ACC) {
- if (dn_check_idf(&ptr, &len, 39, 1))
- goto err_out;
- if (dn_check_idf(&ptr, &len, 39, 1))
- goto err_out;
- if (dn_check_idf(&ptr, &len, 39, (menuver & DN_MENUVER_USR) ? 1 : 0))
- goto err_out;
- }
-
- /*
- * 6. Check optional user data format
- */
- err++;
- if (menuver & DN_MENUVER_USR) {
- if (dn_check_idf(&ptr, &len, 16, 0))
- goto err_out;
- }
-
- /*
- * 7. Look up socket based on destination end username
- */
- return dn_sklist_find_listener(&dstaddr);
-err_out:
- dn_log_martian(skb, ci_err_table[err].text);
- *reason = ci_err_table[err].reason;
- return NULL;
-}
-
-
-static void dn_nsp_conn_init(struct sock *sk, struct sk_buff *skb)
-{
- if (sk_acceptq_is_full(sk)) {
- kfree_skb(skb);
- return;
- }
-
- sk->sk_ack_backlog++;
- skb_queue_tail(&sk->sk_receive_queue, skb);
- sk->sk_state_change(sk);
-}
-
-static void dn_nsp_conn_conf(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct dn_scp *scp = DN_SK(sk);
- unsigned char *ptr;
-
- if (skb->len < 4)
- goto out;
-
- ptr = skb->data;
- cb->services = *ptr++;
- cb->info = *ptr++;
- cb->segsize = le16_to_cpu(*(__le16 *)ptr);
-
- if ((scp->state == DN_CI) || (scp->state == DN_CD)) {
- scp->persist = 0;
- scp->addrrem = cb->src_port;
- sk->sk_state = TCP_ESTABLISHED;
- scp->state = DN_RUN;
- scp->services_rem = cb->services;
- scp->info_rem = cb->info;
- scp->segsize_rem = cb->segsize;
-
- if ((scp->services_rem & NSP_FC_MASK) == NSP_FC_NONE)
- scp->max_window = decnet_no_fc_max_cwnd;
-
- if (skb->len > 0) {
- u16 dlen = *skb->data;
- if ((dlen <= 16) && (dlen <= skb->len)) {
- scp->conndata_in.opt_optl = cpu_to_le16(dlen);
- skb_copy_from_linear_data_offset(skb, 1,
- scp->conndata_in.opt_data, dlen);
- }
- }
- dn_nsp_send_link(sk, DN_NOCHANGE, 0);
- if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_state_change(sk);
- }
-
-out:
- kfree_skb(skb);
-}
-
-static void dn_nsp_conn_ack(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->state == DN_CI) {
- scp->state = DN_CD;
- scp->persist = 0;
- }
-
- kfree_skb(skb);
-}
-
-static void dn_nsp_disc_init(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- unsigned short reason;
-
- if (skb->len < 2)
- goto out;
-
- reason = le16_to_cpu(*(__le16 *)skb->data);
- skb_pull(skb, 2);
-
- scp->discdata_in.opt_status = cpu_to_le16(reason);
- scp->discdata_in.opt_optl = 0;
- memset(scp->discdata_in.opt_data, 0, 16);
-
- if (skb->len > 0) {
- u16 dlen = *skb->data;
- if ((dlen <= 16) && (dlen <= skb->len)) {
- scp->discdata_in.opt_optl = cpu_to_le16(dlen);
- skb_copy_from_linear_data_offset(skb, 1, scp->discdata_in.opt_data, dlen);
- }
- }
-
- scp->addrrem = cb->src_port;
- sk->sk_state = TCP_CLOSE;
-
- switch (scp->state) {
- case DN_CI:
- case DN_CD:
- scp->state = DN_RJ;
- sk->sk_err = ECONNREFUSED;
- break;
- case DN_RUN:
- sk->sk_shutdown |= SHUTDOWN_MASK;
- scp->state = DN_DN;
- break;
- case DN_DI:
- scp->state = DN_DIC;
- break;
- }
-
- if (!sock_flag(sk, SOCK_DEAD)) {
- if (sk->sk_socket->state != SS_UNCONNECTED)
- sk->sk_socket->state = SS_DISCONNECTING;
- sk->sk_state_change(sk);
- }
-
- /*
- * It appears that its possible for remote machines to send disc
- * init messages with no port identifier if we are in the CI and
- * possibly also the CD state. Obviously we shouldn't reply with
- * a message if we don't know what the end point is.
- */
- if (scp->addrrem) {
- dn_nsp_send_disc(sk, NSP_DISCCONF, NSP_REASON_DC, GFP_ATOMIC);
- }
- scp->persist_fxn = dn_destroy_timer;
- scp->persist = dn_nsp_persist(sk);
-
-out:
- kfree_skb(skb);
-}
-
-/*
- * disc_conf messages are also called no_resources or no_link
- * messages depending upon the "reason" field.
- */
-static void dn_nsp_disc_conf(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
- unsigned short reason;
-
- if (skb->len != 2)
- goto out;
-
- reason = le16_to_cpu(*(__le16 *)skb->data);
-
- sk->sk_state = TCP_CLOSE;
-
- switch (scp->state) {
- case DN_CI:
- scp->state = DN_NR;
- break;
- case DN_DR:
- if (reason == NSP_REASON_DC)
- scp->state = DN_DRC;
- if (reason == NSP_REASON_NL)
- scp->state = DN_CN;
- break;
- case DN_DI:
- scp->state = DN_DIC;
- break;
- case DN_RUN:
- sk->sk_shutdown |= SHUTDOWN_MASK;
- /* fall through */
- case DN_CC:
- scp->state = DN_CN;
- }
-
- if (!sock_flag(sk, SOCK_DEAD)) {
- if (sk->sk_socket->state != SS_UNCONNECTED)
- sk->sk_socket->state = SS_DISCONNECTING;
- sk->sk_state_change(sk);
- }
-
- scp->persist_fxn = dn_destroy_timer;
- scp->persist = dn_nsp_persist(sk);
-
-out:
- kfree_skb(skb);
-}
-
-static void dn_nsp_linkservice(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
- unsigned short segnum;
- unsigned char lsflags;
- signed char fcval;
- int wake_up = 0;
- char *ptr = skb->data;
- unsigned char fctype = scp->services_rem & NSP_FC_MASK;
-
- if (skb->len != 4)
- goto out;
-
- segnum = le16_to_cpu(*(__le16 *)ptr);
- ptr += 2;
- lsflags = *(unsigned char *)ptr++;
- fcval = *ptr;
-
- /*
- * Here we ignore erronous packets which should really
- * should cause a connection abort. It is not critical
- * for now though.
- */
- if (lsflags & 0xf8)
- goto out;
-
- if (seq_next(scp->numoth_rcv, segnum)) {
- seq_add(&scp->numoth_rcv, 1);
- switch(lsflags & 0x04) { /* FCVAL INT */
- case 0x00: /* Normal Request */
- switch(lsflags & 0x03) { /* FCVAL MOD */
- case 0x00: /* Request count */
- if (fcval < 0) {
- unsigned char p_fcval = -fcval;
- if ((scp->flowrem_dat > p_fcval) &&
- (fctype == NSP_FC_SCMC)) {
- scp->flowrem_dat -= p_fcval;
- }
- } else if (fcval > 0) {
- scp->flowrem_dat += fcval;
- wake_up = 1;
- }
- break;
- case 0x01: /* Stop outgoing data */
- scp->flowrem_sw = DN_DONTSEND;
- break;
- case 0x02: /* Ok to start again */
- scp->flowrem_sw = DN_SEND;
- dn_nsp_output(sk);
- wake_up = 1;
- }
- break;
- case 0x04: /* Interrupt Request */
- if (fcval > 0) {
- scp->flowrem_oth += fcval;
- wake_up = 1;
- }
- break;
- }
- if (wake_up && !sock_flag(sk, SOCK_DEAD))
- sk->sk_state_change(sk);
- }
-
- dn_nsp_send_oth_ack(sk);
-
-out:
- kfree_skb(skb);
-}
-
-/*
- * Copy of sock_queue_rcv_skb (from sock.h) without
- * bh_lock_sock() (its already held when this is called) which
- * also allows data and other data to be queued to a socket.
- */
-static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig, struct sk_buff_head *queue)
-{
- int err;
-
- /* Cast skb->rcvbuf to unsigned... It's pointless, but reduces
- number of warnings when compiling with -W --ANK
- */
- if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
- (unsigned int)sk->sk_rcvbuf) {
- err = -ENOMEM;
- goto out;
- }
-
- err = sk_filter(sk, skb);
- if (err)
- goto out;
-
- skb_set_owner_r(skb, sk);
- skb_queue_tail(queue, skb);
-
- if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk);
-out:
- return err;
-}
-
-static void dn_nsp_otherdata(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
- unsigned short segnum;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- int queued = 0;
-
- if (skb->len < 2)
- goto out;
-
- cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data);
- skb_pull(skb, 2);
-
- if (seq_next(scp->numoth_rcv, segnum)) {
-
- if (dn_queue_skb(sk, skb, SIGURG, &scp->other_receive_queue) == 0) {
- seq_add(&scp->numoth_rcv, 1);
- scp->other_report = 0;
- queued = 1;
- }
- }
-
- dn_nsp_send_oth_ack(sk);
-out:
- if (!queued)
- kfree_skb(skb);
-}
-
-static void dn_nsp_data(struct sock *sk, struct sk_buff *skb)
-{
- int queued = 0;
- unsigned short segnum;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct dn_scp *scp = DN_SK(sk);
-
- if (skb->len < 2)
- goto out;
-
- cb->segnum = segnum = le16_to_cpu(*(__le16 *)skb->data);
- skb_pull(skb, 2);
-
- if (seq_next(scp->numdat_rcv, segnum)) {
- if (dn_queue_skb(sk, skb, SIGIO, &sk->sk_receive_queue) == 0) {
- seq_add(&scp->numdat_rcv, 1);
- queued = 1;
- }
-
- if ((scp->flowloc_sw == DN_SEND) && dn_congested(sk)) {
- scp->flowloc_sw = DN_DONTSEND;
- dn_nsp_send_link(sk, DN_DONTSEND, 0);
- }
- }
-
- dn_nsp_send_data_ack(sk);
-out:
- if (!queued)
- kfree_skb(skb);
-}
-
-/*
- * If one of our conninit messages is returned, this function
- * deals with it. It puts the socket into the NO_COMMUNICATION
- * state.
- */
-static void dn_returned_conn_init(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->state == DN_CI) {
- scp->state = DN_NC;
- sk->sk_state = TCP_CLOSE;
- if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_state_change(sk);
- }
-
- kfree_skb(skb);
-}
-
-static int dn_nsp_no_socket(struct sk_buff *skb, unsigned short reason)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- int ret = NET_RX_DROP;
-
- /* Must not reply to returned packets */
- if (cb->rt_flags & DN_RT_F_RTS)
- goto out;
-
- if ((reason != NSP_REASON_OK) && ((cb->nsp_flags & 0x0c) == 0x08)) {
- switch (cb->nsp_flags & 0x70) {
- case 0x10:
- case 0x60: /* (Retransmitted) Connect Init */
- dn_nsp_return_disc(skb, NSP_DISCINIT, reason);
- ret = NET_RX_SUCCESS;
- break;
- case 0x20: /* Connect Confirm */
- dn_nsp_return_disc(skb, NSP_DISCCONF, reason);
- ret = NET_RX_SUCCESS;
- break;
- }
- }
-
-out:
- kfree_skb(skb);
- return ret;
-}
-
-static int dn_nsp_rx_packet(struct net *net, struct sock *sk2,
- struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct sock *sk = NULL;
- unsigned char *ptr = (unsigned char *)skb->data;
- unsigned short reason = NSP_REASON_NL;
-
- if (!pskb_may_pull(skb, 2))
- goto free_out;
-
- skb_reset_transport_header(skb);
- cb->nsp_flags = *ptr++;
-
- if (decnet_debug_level & 2)
- printk(KERN_DEBUG "dn_nsp_rx: Message type 0x%02x\n", (int)cb->nsp_flags);
-
- if (cb->nsp_flags & 0x83)
- goto free_out;
-
- /*
- * Filter out conninits and useless packet types
- */
- if ((cb->nsp_flags & 0x0c) == 0x08) {
- switch (cb->nsp_flags & 0x70) {
- case 0x00: /* NOP */
- case 0x70: /* Reserved */
- case 0x50: /* Reserved, Phase II node init */
- goto free_out;
- case 0x10:
- case 0x60:
- if (unlikely(cb->rt_flags & DN_RT_F_RTS))
- goto free_out;
- sk = dn_find_listener(skb, &reason);
- goto got_it;
- }
- }
-
- if (!pskb_may_pull(skb, 3))
- goto free_out;
-
- /*
- * Grab the destination address.
- */
- cb->dst_port = *(__le16 *)ptr;
- cb->src_port = 0;
- ptr += 2;
-
- /*
- * If not a connack, grab the source address too.
- */
- if (pskb_may_pull(skb, 5)) {
- cb->src_port = *(__le16 *)ptr;
- ptr += 2;
- skb_pull(skb, 5);
- }
-
- /*
- * Returned packets...
- * Swap src & dst and look up in the normal way.
- */
- if (unlikely(cb->rt_flags & DN_RT_F_RTS)) {
- swap(cb->dst_port, cb->src_port);
- swap(cb->dst, cb->src);
- }
-
- /*
- * Find the socket to which this skb is destined.
- */
- sk = dn_find_by_skb(skb);
-got_it:
- if (sk != NULL) {
- struct dn_scp *scp = DN_SK(sk);
-
- /* Reset backoff */
- scp->nsp_rxtshift = 0;
-
- /*
- * We linearize everything except data segments here.
- */
- if (cb->nsp_flags & ~0x60) {
- if (unlikely(skb_linearize(skb)))
- goto free_out;
- }
-
- return sk_receive_skb(sk, skb, 0);
- }
-
- return dn_nsp_no_socket(skb, reason);
-
-free_out:
- kfree_skb(skb);
- return NET_RX_DROP;
-}
-
-int dn_nsp_rx(struct sk_buff *skb)
-{
- return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_nsp_rx_packet);
-}
-
-/*
- * This is the main receive routine for sockets. It is called
- * from the above when the socket is not busy, and also from
- * sock_release() when there is a backlog queued up.
- */
-int dn_nsp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
- if (cb->rt_flags & DN_RT_F_RTS) {
- if (cb->nsp_flags == 0x18 || cb->nsp_flags == 0x68)
- dn_returned_conn_init(sk, skb);
- else
- kfree_skb(skb);
- return NET_RX_SUCCESS;
- }
-
- /*
- * Control packet.
- */
- if ((cb->nsp_flags & 0x0c) == 0x08) {
- switch (cb->nsp_flags & 0x70) {
- case 0x10:
- case 0x60:
- dn_nsp_conn_init(sk, skb);
- break;
- case 0x20:
- dn_nsp_conn_conf(sk, skb);
- break;
- case 0x30:
- dn_nsp_disc_init(sk, skb);
- break;
- case 0x40:
- dn_nsp_disc_conf(sk, skb);
- break;
- }
-
- } else if (cb->nsp_flags == 0x24) {
- /*
- * Special for connacks, 'cos they don't have
- * ack data or ack otherdata info.
- */
- dn_nsp_conn_ack(sk, skb);
- } else {
- int other = 1;
-
- /* both data and ack frames can kick a CC socket into RUN */
- if ((scp->state == DN_CC) && !sock_flag(sk, SOCK_DEAD)) {
- scp->state = DN_RUN;
- sk->sk_state = TCP_ESTABLISHED;
- sk->sk_state_change(sk);
- }
-
- if ((cb->nsp_flags & 0x1c) == 0)
- other = 0;
- if (cb->nsp_flags == 0x04)
- other = 0;
-
- /*
- * Read out ack data here, this applies equally
- * to data, other data, link serivce and both
- * ack data and ack otherdata.
- */
- dn_process_ack(sk, skb, other);
-
- /*
- * If we've some sort of data here then call a
- * suitable routine for dealing with it, otherwise
- * the packet is an ack and can be discarded.
- */
- if ((cb->nsp_flags & 0x0c) == 0) {
-
- if (scp->state != DN_RUN)
- goto free_out;
-
- switch (cb->nsp_flags) {
- case 0x10: /* LS */
- dn_nsp_linkservice(sk, skb);
- break;
- case 0x30: /* OD */
- dn_nsp_otherdata(sk, skb);
- break;
- default:
- dn_nsp_data(sk, skb);
- }
-
- } else { /* Ack, chuck it out here */
-free_out:
- kfree_skb(skb);
- }
- }
-
- return NET_RX_SUCCESS;
-}
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
deleted file mode 100644
index a1779de6bd9c..000000000000
--- a/net/decnet/dn_nsp_out.c
+++ /dev/null
@@ -1,703 +0,0 @@
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Network Services Protocol (Output)
- *
- * Author: Eduardo Marcelo Serrat <emserrat@geocities.com>
- *
- * Changes:
- *
- * Steve Whitehouse: Split into dn_nsp_in.c and dn_nsp_out.c from
- * original dn_nsp.c.
- * Steve Whitehouse: Updated to work with my new routing architecture.
- * Steve Whitehouse: Added changes from Eduardo Serrat's patches.
- * Steve Whitehouse: Now conninits have the "return" bit set.
- * Steve Whitehouse: Fixes to check alloc'd skbs are non NULL!
- * Moved output state machine into one function
- * Steve Whitehouse: New output state machine
- * Paul Koning: Connect Confirm message fix.
- * Eduardo Serrat: Fix to stop dn_nsp_do_disc() sending malformed packets.
- * Steve Whitehouse: dn_nsp_output() and friends needed a spring clean
- * Steve Whitehouse: Moved dn_nsp_send() in here from route.h
- */
-
-/******************************************************************************
- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-*******************************************************************************/
-
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/timer.h>
-#include <linux/string.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/inet.h>
-#include <linux/route.h>
-#include <linux/slab.h>
-#include <net/sock.h>
-#include <linux/fcntl.h>
-#include <linux/mm.h>
-#include <linux/termios.h>
-#include <linux/interrupt.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
-#include <linux/init.h>
-#include <linux/poll.h>
-#include <linux/if_packet.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/dn.h>
-#include <net/dn_nsp.h>
-#include <net/dn_dev.h>
-#include <net/dn_route.h>
-
-
-static int nsp_backoff[NSP_MAXRXTSHIFT + 1] = { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
-
-static void dn_nsp_send(struct sk_buff *skb)
-{
- struct sock *sk = skb->sk;
- struct dn_scp *scp = DN_SK(sk);
- struct dst_entry *dst;
- struct flowidn fld;
-
- skb_reset_transport_header(skb);
- scp->stamp = jiffies;
-
- dst = sk_dst_check(sk, 0);
- if (dst) {
-try_again:
- skb_dst_set(skb, dst);
- dst_output(&init_net, skb->sk, skb);
- return;
- }
-
- memset(&fld, 0, sizeof(fld));
- fld.flowidn_oif = sk->sk_bound_dev_if;
- fld.saddr = dn_saddr2dn(&scp->addr);
- fld.daddr = dn_saddr2dn(&scp->peer);
- dn_sk_ports_copy(&fld, scp);
- fld.flowidn_proto = DNPROTO_NSP;
- if (dn_route_output_sock(&sk->sk_dst_cache, &fld, sk, 0) == 0) {
- dst = sk_dst_get(sk);
- sk->sk_route_caps = dst->dev->features;
- goto try_again;
- }
-
- sk->sk_err = EHOSTUNREACH;
- if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_state_change(sk);
-}
-
-
-/*
- * If sk == NULL, then we assume that we are supposed to be making
- * a routing layer skb. If sk != NULL, then we are supposed to be
- * creating an skb for the NSP layer.
- *
- * The eventual aim is for each socket to have a cached header size
- * for its outgoing packets, and to set hdr from this when sk != NULL.
- */
-struct sk_buff *dn_alloc_skb(struct sock *sk, int size, gfp_t pri)
-{
- struct sk_buff *skb;
- int hdr = 64;
-
- if ((skb = alloc_skb(size + hdr, pri)) == NULL)
- return NULL;
-
- skb->protocol = htons(ETH_P_DNA_RT);
- skb->pkt_type = PACKET_OUTGOING;
-
- if (sk)
- skb_set_owner_w(skb, sk);
-
- skb_reserve(skb, hdr);
-
- return skb;
-}
-
-/*
- * Calculate persist timer based upon the smoothed round
- * trip time and the variance. Backoff according to the
- * nsp_backoff[] array.
- */
-unsigned long dn_nsp_persist(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- unsigned long t = ((scp->nsp_srtt >> 2) + scp->nsp_rttvar) >> 1;
-
- t *= nsp_backoff[scp->nsp_rxtshift];
-
- if (t < HZ) t = HZ;
- if (t > (600*HZ)) t = (600*HZ);
-
- if (scp->nsp_rxtshift < NSP_MAXRXTSHIFT)
- scp->nsp_rxtshift++;
-
- /* printk(KERN_DEBUG "rxtshift %lu, t=%lu\n", scp->nsp_rxtshift, t); */
-
- return t;
-}
-
-/*
- * This is called each time we get an estimate for the rtt
- * on the link.
- */
-static void dn_nsp_rtt(struct sock *sk, long rtt)
-{
- struct dn_scp *scp = DN_SK(sk);
- long srtt = (long)scp->nsp_srtt;
- long rttvar = (long)scp->nsp_rttvar;
- long delta;
-
- /*
- * If the jiffies clock flips over in the middle of timestamp
- * gathering this value might turn out negative, so we make sure
- * that is it always positive here.
- */
- if (rtt < 0)
- rtt = -rtt;
- /*
- * Add new rtt to smoothed average
- */
- delta = ((rtt << 3) - srtt);
- srtt += (delta >> 3);
- if (srtt >= 1)
- scp->nsp_srtt = (unsigned long)srtt;
- else
- scp->nsp_srtt = 1;
-
- /*
- * Add new rtt varience to smoothed varience
- */
- delta >>= 1;
- rttvar += ((((delta>0)?(delta):(-delta)) - rttvar) >> 2);
- if (rttvar >= 1)
- scp->nsp_rttvar = (unsigned long)rttvar;
- else
- scp->nsp_rttvar = 1;
-
- /* printk(KERN_DEBUG "srtt=%lu rttvar=%lu\n", scp->nsp_srtt, scp->nsp_rttvar); */
-}
-
-/**
- * dn_nsp_clone_and_send - Send a data packet by cloning it
- * @skb: The packet to clone and transmit
- * @gfp: memory allocation flag
- *
- * Clone a queued data or other data packet and transmit it.
- *
- * Returns: The number of times the packet has been sent previously
- */
-static inline unsigned int dn_nsp_clone_and_send(struct sk_buff *skb,
- gfp_t gfp)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct sk_buff *skb2;
- int ret = 0;
-
- if ((skb2 = skb_clone(skb, gfp)) != NULL) {
- ret = cb->xmit_count;
- cb->xmit_count++;
- cb->stamp = jiffies;
- skb2->sk = skb->sk;
- dn_nsp_send(skb2);
- }
-
- return ret;
-}
-
-/**
- * dn_nsp_output - Try and send something from socket queues
- * @sk: The socket whose queues are to be investigated
- *
- * Try and send the packet on the end of the data and other data queues.
- * Other data gets priority over data, and if we retransmit a packet we
- * reduce the window by dividing it in two.
- *
- */
-void dn_nsp_output(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct sk_buff *skb;
- unsigned int reduce_win = 0;
-
- /*
- * First we check for otherdata/linkservice messages
- */
- if ((skb = skb_peek(&scp->other_xmit_queue)) != NULL)
- reduce_win = dn_nsp_clone_and_send(skb, GFP_ATOMIC);
-
- /*
- * If we may not send any data, we don't.
- * If we are still trying to get some other data down the
- * channel, we don't try and send any data.
- */
- if (reduce_win || (scp->flowrem_sw != DN_SEND))
- goto recalc_window;
-
- if ((skb = skb_peek(&scp->data_xmit_queue)) != NULL)
- reduce_win = dn_nsp_clone_and_send(skb, GFP_ATOMIC);
-
- /*
- * If we've sent any frame more than once, we cut the
- * send window size in half. There is always a minimum
- * window size of one available.
- */
-recalc_window:
- if (reduce_win) {
- scp->snd_window >>= 1;
- if (scp->snd_window < NSP_MIN_WINDOW)
- scp->snd_window = NSP_MIN_WINDOW;
- }
-}
-
-int dn_nsp_xmit_timeout(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- dn_nsp_output(sk);
-
- if (!skb_queue_empty(&scp->data_xmit_queue) ||
- !skb_queue_empty(&scp->other_xmit_queue))
- scp->persist = dn_nsp_persist(sk);
-
- return 0;
-}
-
-static inline __le16 *dn_mk_common_header(struct dn_scp *scp, struct sk_buff *skb, unsigned char msgflag, int len)
-{
- unsigned char *ptr = skb_push(skb, len);
-
- BUG_ON(len < 5);
-
- *ptr++ = msgflag;
- *((__le16 *)ptr) = scp->addrrem;
- ptr += 2;
- *((__le16 *)ptr) = scp->addrloc;
- ptr += 2;
- return (__le16 __force *)ptr;
-}
-
-static __le16 *dn_mk_ack_header(struct sock *sk, struct sk_buff *skb, unsigned char msgflag, int hlen, int other)
-{
- struct dn_scp *scp = DN_SK(sk);
- unsigned short acknum = scp->numdat_rcv & 0x0FFF;
- unsigned short ackcrs = scp->numoth_rcv & 0x0FFF;
- __le16 *ptr;
-
- BUG_ON(hlen < 9);
-
- scp->ackxmt_dat = acknum;
- scp->ackxmt_oth = ackcrs;
- acknum |= 0x8000;
- ackcrs |= 0x8000;
-
- /* If this is an "other data/ack" message, swap acknum and ackcrs */
- if (other)
- swap(acknum, ackcrs);
-
- /* Set "cross subchannel" bit in ackcrs */
- ackcrs |= 0x2000;
-
- ptr = dn_mk_common_header(scp, skb, msgflag, hlen);
-
- *ptr++ = cpu_to_le16(acknum);
- *ptr++ = cpu_to_le16(ackcrs);
-
- return ptr;
-}
-
-static __le16 *dn_nsp_mk_data_header(struct sock *sk, struct sk_buff *skb, int oth)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- __le16 *ptr = dn_mk_ack_header(sk, skb, cb->nsp_flags, 11, oth);
-
- if (unlikely(oth)) {
- cb->segnum = scp->numoth;
- seq_add(&scp->numoth, 1);
- } else {
- cb->segnum = scp->numdat;
- seq_add(&scp->numdat, 1);
- }
- *(ptr++) = cpu_to_le16(cb->segnum);
-
- return ptr;
-}
-
-void dn_nsp_queue_xmit(struct sock *sk, struct sk_buff *skb,
- gfp_t gfp, int oth)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- unsigned long t = ((scp->nsp_srtt >> 2) + scp->nsp_rttvar) >> 1;
-
- cb->xmit_count = 0;
- dn_nsp_mk_data_header(sk, skb, oth);
-
- /*
- * Slow start: If we have been idle for more than
- * one RTT, then reset window to min size.
- */
- if ((jiffies - scp->stamp) > t)
- scp->snd_window = NSP_MIN_WINDOW;
-
- if (oth)
- skb_queue_tail(&scp->other_xmit_queue, skb);
- else
- skb_queue_tail(&scp->data_xmit_queue, skb);
-
- if (scp->flowrem_sw != DN_SEND)
- return;
-
- dn_nsp_clone_and_send(skb, gfp);
-}
-
-
-int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb, struct sk_buff_head *q, unsigned short acknum)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct dn_scp *scp = DN_SK(sk);
- struct sk_buff *skb2, *n, *ack = NULL;
- int wakeup = 0;
- int try_retrans = 0;
- unsigned long reftime = cb->stamp;
- unsigned long pkttime;
- unsigned short xmit_count;
- unsigned short segnum;
-
- skb_queue_walk_safe(q, skb2, n) {
- struct dn_skb_cb *cb2 = DN_SKB_CB(skb2);
-
- if (dn_before_or_equal(cb2->segnum, acknum))
- ack = skb2;
-
- /* printk(KERN_DEBUG "ack: %s %04x %04x\n", ack ? "ACK" : "SKIP", (int)cb2->segnum, (int)acknum); */
-
- if (ack == NULL)
- continue;
-
- /* printk(KERN_DEBUG "check_xmit_queue: %04x, %d\n", acknum, cb2->xmit_count); */
-
- /* Does _last_ packet acked have xmit_count > 1 */
- try_retrans = 0;
- /* Remember to wake up the sending process */
- wakeup = 1;
- /* Keep various statistics */
- pkttime = cb2->stamp;
- xmit_count = cb2->xmit_count;
- segnum = cb2->segnum;
- /* Remove and drop ack'ed packet */
- skb_unlink(ack, q);
- kfree_skb(ack);
- ack = NULL;
-
- /*
- * We don't expect to see acknowledgements for packets we
- * haven't sent yet.
- */
- WARN_ON(xmit_count == 0);
-
- /*
- * If the packet has only been sent once, we can use it
- * to calculate the RTT and also open the window a little
- * further.
- */
- if (xmit_count == 1) {
- if (dn_equal(segnum, acknum))
- dn_nsp_rtt(sk, (long)(pkttime - reftime));
-
- if (scp->snd_window < scp->max_window)
- scp->snd_window++;
- }
-
- /*
- * Packet has been sent more than once. If this is the last
- * packet to be acknowledged then we want to send the next
- * packet in the send queue again (assumes the remote host does
- * go-back-N error control).
- */
- if (xmit_count > 1)
- try_retrans = 1;
- }
-
- if (try_retrans)
- dn_nsp_output(sk);
-
- return wakeup;
-}
-
-void dn_nsp_send_data_ack(struct sock *sk)
-{
- struct sk_buff *skb = NULL;
-
- if ((skb = dn_alloc_skb(sk, 9, GFP_ATOMIC)) == NULL)
- return;
-
- skb_reserve(skb, 9);
- dn_mk_ack_header(sk, skb, 0x04, 9, 0);
- dn_nsp_send(skb);
-}
-
-void dn_nsp_send_oth_ack(struct sock *sk)
-{
- struct sk_buff *skb = NULL;
-
- if ((skb = dn_alloc_skb(sk, 9, GFP_ATOMIC)) == NULL)
- return;
-
- skb_reserve(skb, 9);
- dn_mk_ack_header(sk, skb, 0x14, 9, 1);
- dn_nsp_send(skb);
-}
-
-
-void dn_send_conn_ack (struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct sk_buff *skb = NULL;
- struct nsp_conn_ack_msg *msg;
-
- if ((skb = dn_alloc_skb(sk, 3, sk->sk_allocation)) == NULL)
- return;
-
- msg = skb_put(skb, 3);
- msg->msgflg = 0x24;
- msg->dstaddr = scp->addrrem;
-
- dn_nsp_send(skb);
-}
-
-static int dn_nsp_retrans_conn_conf(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->state == DN_CC)
- dn_send_conn_conf(sk, GFP_ATOMIC);
-
- return 0;
-}
-
-void dn_send_conn_conf(struct sock *sk, gfp_t gfp)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct sk_buff *skb = NULL;
- struct nsp_conn_init_msg *msg;
- __u8 len = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
-
- if ((skb = dn_alloc_skb(sk, 50 + len, gfp)) == NULL)
- return;
-
- msg = skb_put(skb, sizeof(*msg));
- msg->msgflg = 0x28;
- msg->dstaddr = scp->addrrem;
- msg->srcaddr = scp->addrloc;
- msg->services = scp->services_loc;
- msg->info = scp->info_loc;
- msg->segsize = cpu_to_le16(scp->segsize_loc);
-
- skb_put_u8(skb, len);
-
- if (len > 0)
- skb_put_data(skb, scp->conndata_out.opt_data, len);
-
-
- dn_nsp_send(skb);
-
- scp->persist = dn_nsp_persist(sk);
- scp->persist_fxn = dn_nsp_retrans_conn_conf;
-}
-
-
-static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg,
- unsigned short reason, gfp_t gfp,
- struct dst_entry *dst,
- int ddl, unsigned char *dd, __le16 rem, __le16 loc)
-{
- struct sk_buff *skb = NULL;
- int size = 7 + ddl + ((msgflg == NSP_DISCINIT) ? 1 : 0);
- unsigned char *msg;
-
- if ((dst == NULL) || (rem == 0)) {
- net_dbg_ratelimited("DECnet: dn_nsp_do_disc: BUG! Please report this to SteveW@ACM.org rem=%u dst=%p\n",
- le16_to_cpu(rem), dst);
- return;
- }
-
- if ((skb = dn_alloc_skb(sk, size, gfp)) == NULL)
- return;
-
- msg = skb_put(skb, size);
- *msg++ = msgflg;
- *(__le16 *)msg = rem;
- msg += 2;
- *(__le16 *)msg = loc;
- msg += 2;
- *(__le16 *)msg = cpu_to_le16(reason);
- msg += 2;
- if (msgflg == NSP_DISCINIT)
- *msg++ = ddl;
-
- if (ddl) {
- memcpy(msg, dd, ddl);
- }
-
- /*
- * This doesn't go via the dn_nsp_send() function since we need
- * to be able to send disc packets out which have no socket
- * associations.
- */
- skb_dst_set(skb, dst_clone(dst));
- dst_output(&init_net, skb->sk, skb);
-}
-
-
-void dn_nsp_send_disc(struct sock *sk, unsigned char msgflg,
- unsigned short reason, gfp_t gfp)
-{
- struct dn_scp *scp = DN_SK(sk);
- int ddl = 0;
-
- if (msgflg == NSP_DISCINIT)
- ddl = le16_to_cpu(scp->discdata_out.opt_optl);
-
- if (reason == 0)
- reason = le16_to_cpu(scp->discdata_out.opt_status);
-
- dn_nsp_do_disc(sk, msgflg, reason, gfp, __sk_dst_get(sk), ddl,
- scp->discdata_out.opt_data, scp->addrrem, scp->addrloc);
-}
-
-
-void dn_nsp_return_disc(struct sk_buff *skb, unsigned char msgflg,
- unsigned short reason)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- int ddl = 0;
- gfp_t gfp = GFP_ATOMIC;
-
- dn_nsp_do_disc(NULL, msgflg, reason, gfp, skb_dst(skb), ddl,
- NULL, cb->src_port, cb->dst_port);
-}
-
-
-void dn_nsp_send_link(struct sock *sk, unsigned char lsflags, char fcval)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct sk_buff *skb;
- unsigned char *ptr;
- gfp_t gfp = GFP_ATOMIC;
-
- if ((skb = dn_alloc_skb(sk, DN_MAX_NSP_DATA_HEADER + 2, gfp)) == NULL)
- return;
-
- skb_reserve(skb, DN_MAX_NSP_DATA_HEADER);
- ptr = skb_put(skb, 2);
- DN_SKB_CB(skb)->nsp_flags = 0x10;
- *ptr++ = lsflags;
- *ptr = fcval;
-
- dn_nsp_queue_xmit(sk, skb, gfp, 1);
-
- scp->persist = dn_nsp_persist(sk);
- scp->persist_fxn = dn_nsp_xmit_timeout;
-}
-
-static int dn_nsp_retrans_conninit(struct sock *sk)
-{
- struct dn_scp *scp = DN_SK(sk);
-
- if (scp->state == DN_CI)
- dn_nsp_send_conninit(sk, NSP_RCI);
-
- return 0;
-}
-
-void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
-{
- struct dn_scp *scp = DN_SK(sk);
- struct nsp_conn_init_msg *msg;
- unsigned char aux;
- unsigned char menuver;
- struct dn_skb_cb *cb;
- unsigned char type = 1;
- gfp_t allocation = (msgflg == NSP_CI) ? sk->sk_allocation : GFP_ATOMIC;
- struct sk_buff *skb = dn_alloc_skb(sk, 200, allocation);
-
- if (!skb)
- return;
-
- cb = DN_SKB_CB(skb);
- msg = skb_put(skb, sizeof(*msg));
-
- msg->msgflg = msgflg;
- msg->dstaddr = 0x0000; /* Remote Node will assign it*/
-
- msg->srcaddr = scp->addrloc;
- msg->services = scp->services_loc; /* Requested flow control */
- msg->info = scp->info_loc; /* Version Number */
- msg->segsize = cpu_to_le16(scp->segsize_loc); /* Max segment size */
-
- if (scp->peer.sdn_objnum)
- type = 0;
-
- skb_put(skb, dn_sockaddr2username(&scp->peer,
- skb_tail_pointer(skb), type));
- skb_put(skb, dn_sockaddr2username(&scp->addr,
- skb_tail_pointer(skb), 2));
-
- menuver = DN_MENUVER_ACC | DN_MENUVER_USR;
- if (scp->peer.sdn_flags & SDF_PROXY)
- menuver |= DN_MENUVER_PRX;
- if (scp->peer.sdn_flags & SDF_UICPROXY)
- menuver |= DN_MENUVER_UIC;
-
- skb_put_u8(skb, menuver); /* Menu Version */
-
- aux = scp->accessdata.acc_userl;
- skb_put_u8(skb, aux);
- if (aux > 0)
- skb_put_data(skb, scp->accessdata.acc_user, aux);
-
- aux = scp->accessdata.acc_passl;
- skb_put_u8(skb, aux);
- if (aux > 0)
- skb_put_data(skb, scp->accessdata.acc_pass, aux);
-
- aux = scp->accessdata.acc_accl;
- skb_put_u8(skb, aux);
- if (aux > 0)
- skb_put_data(skb, scp->accessdata.acc_acc, aux);
-
- aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
- skb_put_u8(skb, aux);
- if (aux > 0)
- skb_put_data(skb, scp->conndata_out.opt_data, aux);
-
- scp->persist = dn_nsp_persist(sk);
- scp->persist_fxn = dn_nsp_retrans_conninit;
-
- cb->rt_flags = DN_RT_F_RQR;
-
- dn_nsp_send(skb);
-}
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
deleted file mode 100644
index 658191fba94e..000000000000
--- a/net/decnet/dn_route.c
+++ /dev/null
@@ -1,1929 +0,0 @@
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Routing Functions (Endnode and Router)
- *
- * Authors: Steve Whitehouse <SteveW@ACM.org>
- * Eduardo Marcelo Serrat <emserrat@geocities.com>
- *
- * Changes:
- * Steve Whitehouse : Fixes to allow "intra-ethernet" and
- * "return-to-sender" bits on outgoing
- * packets.
- * Steve Whitehouse : Timeouts for cached routes.
- * Steve Whitehouse : Use dst cache for input routes too.
- * Steve Whitehouse : Fixed error values in dn_send_skb.
- * Steve Whitehouse : Rework routing functions to better fit
- * DECnet routing design
- * Alexey Kuznetsov : New SMP locking
- * Steve Whitehouse : More SMP locking changes & dn_cache_dump()
- * Steve Whitehouse : Prerouting NF hook, now really is prerouting.
- * Fixed possible skb leak in rtnetlink funcs.
- * Steve Whitehouse : Dave Miller's dynamic hash table sizing and
- * Alexey Kuznetsov's finer grained locking
- * from ipv4/route.c.
- * Steve Whitehouse : Routing is now starting to look like a
- * sensible set of code now, mainly due to
- * my copying the IPv4 routing code. The
- * hooks here are modified and will continue
- * to evolve for a while.
- * Steve Whitehouse : Real SMP at last :-) Also new netfilter
- * stuff. Look out raw sockets your days
- * are numbered!
- * Steve Whitehouse : Added return-to-sender functions. Added
- * backlog congestion level return codes.
- * Steve Whitehouse : Fixed bug where routes were set up with
- * no ref count on net devices.
- * Steve Whitehouse : RCU for the route cache
- * Steve Whitehouse : Preparations for the flow cache
- * Steve Whitehouse : Prepare for nonlinear skbs
- */
-
-/******************************************************************************
- (c) 1995-1998 E.M. Serrat emserrat@geocities.com
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-*******************************************************************************/
-
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/sockios.h>
-#include <linux/net.h>
-#include <linux/netdevice.h>
-#include <linux/inet.h>
-#include <linux/route.h>
-#include <linux/in_route.h>
-#include <linux/slab.h>
-#include <net/sock.h>
-#include <linux/mm.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <linux/rtnetlink.h>
-#include <linux/string.h>
-#include <linux/netfilter_decnet.h>
-#include <linux/rcupdate.h>
-#include <linux/times.h>
-#include <linux/export.h>
-#include <asm/errno.h>
-#include <net/net_namespace.h>
-#include <net/netlink.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/fib_rules.h>
-#include <net/dn.h>
-#include <net/dn_dev.h>
-#include <net/dn_nsp.h>
-#include <net/dn_route.h>
-#include <net/dn_neigh.h>
-#include <net/dn_fib.h>
-
-struct dn_rt_hash_bucket
-{
- struct dn_route __rcu *chain;
- spinlock_t lock;
-};
-
-extern struct neigh_table dn_neigh_table;
-
-
-static unsigned char dn_hiord_addr[6] = {0xAA,0x00,0x04,0x00,0x00,0x00};
-
-static const int dn_rt_min_delay = 2 * HZ;
-static const int dn_rt_max_delay = 10 * HZ;
-static const int dn_rt_mtu_expires = 10 * 60 * HZ;
-
-static unsigned long dn_rt_deadline;
-
-static int dn_dst_gc(struct dst_ops *ops);
-static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
-static unsigned int dn_dst_default_advmss(const struct dst_entry *dst);
-static unsigned int dn_dst_mtu(const struct dst_entry *dst);
-static void dn_dst_destroy(struct dst_entry *);
-static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how);
-static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
-static void dn_dst_link_failure(struct sk_buff *);
-static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb , u32 mtu,
- bool confirm_neigh);
-static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb);
-static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
- struct sk_buff *skb,
- const void *daddr);
-static int dn_route_input(struct sk_buff *);
-static void dn_run_flush(struct timer_list *unused);
-
-static struct dn_rt_hash_bucket *dn_rt_hash_table;
-static unsigned int dn_rt_hash_mask;
-
-static struct timer_list dn_route_timer;
-static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush);
-int decnet_dst_gc_interval = 2;
-
-static struct dst_ops dn_dst_ops = {
- .family = PF_DECnet,
- .gc_thresh = 128,
- .gc = dn_dst_gc,
- .check = dn_dst_check,
- .default_advmss = dn_dst_default_advmss,
- .mtu = dn_dst_mtu,
- .cow_metrics = dst_cow_metrics_generic,
- .destroy = dn_dst_destroy,
- .ifdown = dn_dst_ifdown,
- .negative_advice = dn_dst_negative_advice,
- .link_failure = dn_dst_link_failure,
- .update_pmtu = dn_dst_update_pmtu,
- .redirect = dn_dst_redirect,
- .neigh_lookup = dn_dst_neigh_lookup,
-};
-
-static void dn_dst_destroy(struct dst_entry *dst)
-{
- struct dn_route *rt = (struct dn_route *) dst;
-
- if (rt->n)
- neigh_release(rt->n);
- dst_destroy_metrics_generic(dst);
-}
-
-static void dn_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how)
-{
- if (how) {
- struct dn_route *rt = (struct dn_route *) dst;
- struct neighbour *n = rt->n;
-
- if (n && n->dev == dev) {
- n->dev = dev_net(dev)->loopback_dev;
- dev_hold(n->dev);
- dev_put(dev);
- }
- }
-}
-
-static __inline__ unsigned int dn_hash(__le16 src, __le16 dst)
-{
- __u16 tmp = (__u16 __force)(src ^ dst);
- tmp ^= (tmp >> 3);
- tmp ^= (tmp >> 5);
- tmp ^= (tmp >> 10);
- return dn_rt_hash_mask & (unsigned int)tmp;
-}
-
-static void dn_dst_check_expire(struct timer_list *unused)
-{
- int i;
- struct dn_route *rt;
- struct dn_route __rcu **rtp;
- unsigned long now = jiffies;
- unsigned long expire = 120 * HZ;
-
- for (i = 0; i <= dn_rt_hash_mask; i++) {
- rtp = &dn_rt_hash_table[i].chain;
-
- spin_lock(&dn_rt_hash_table[i].lock);
- while ((rt = rcu_dereference_protected(*rtp,
- lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
- if (atomic_read(&rt->dst.__refcnt) > 1 ||
- (now - rt->dst.lastuse) < expire) {
- rtp = &rt->dn_next;
- continue;
- }
- *rtp = rt->dn_next;
- rt->dn_next = NULL;
- dst_dev_put(&rt->dst);
- dst_release(&rt->dst);
- }
- spin_unlock(&dn_rt_hash_table[i].lock);
-
- if ((jiffies - now) > 0)
- break;
- }
-
- mod_timer(&dn_route_timer, now + decnet_dst_gc_interval * HZ);
-}
-
-static int dn_dst_gc(struct dst_ops *ops)
-{
- struct dn_route *rt;
- struct dn_route __rcu **rtp;
- int i;
- unsigned long now = jiffies;
- unsigned long expire = 10 * HZ;
-
- for (i = 0; i <= dn_rt_hash_mask; i++) {
-
- spin_lock_bh(&dn_rt_hash_table[i].lock);
- rtp = &dn_rt_hash_table[i].chain;
-
- while ((rt = rcu_dereference_protected(*rtp,
- lockdep_is_held(&dn_rt_hash_table[i].lock))) != NULL) {
- if (atomic_read(&rt->dst.__refcnt) > 1 ||
- (now - rt->dst.lastuse) < expire) {
- rtp = &rt->dn_next;
- continue;
- }
- *rtp = rt->dn_next;
- rt->dn_next = NULL;
- dst_dev_put(&rt->dst);
- dst_release(&rt->dst);
- break;
- }
- spin_unlock_bh(&dn_rt_hash_table[i].lock);
- }
-
- return 0;
-}
-
-/*
- * The decnet standards don't impose a particular minimum mtu, what they
- * do insist on is that the routing layer accepts a datagram of at least
- * 230 bytes long. Here we have to subtract the routing header length from
- * 230 to get the minimum acceptable mtu. If there is no neighbour, then we
- * assume the worst and use a long header size.
- *
- * We update both the mtu and the advertised mss (i.e. the segment size we
- * advertise to the other end).
- */
-static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu,
- bool confirm_neigh)
-{
- struct dn_route *rt = (struct dn_route *) dst;
- struct neighbour *n = rt->n;
- u32 min_mtu = 230;
- struct dn_dev *dn;
-
- dn = n ? rcu_dereference_raw(n->dev->dn_ptr) : NULL;
-
- if (dn && dn->use_long == 0)
- min_mtu -= 6;
- else
- min_mtu -= 21;
-
- if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) {
- if (!(dst_metric_locked(dst, RTAX_MTU))) {
- dst_metric_set(dst, RTAX_MTU, mtu);
- dst_set_expires(dst, dn_rt_mtu_expires);
- }
- if (!(dst_metric_locked(dst, RTAX_ADVMSS))) {
- u32 mss = mtu - DN_MAX_NSP_DATA_HEADER;
- u32 existing_mss = dst_metric_raw(dst, RTAX_ADVMSS);
- if (!existing_mss || existing_mss > mss)
- dst_metric_set(dst, RTAX_ADVMSS, mss);
- }
- }
-}
-
-static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb)
-{
-}
-
-/*
- * When a route has been marked obsolete. (e.g. routing cache flush)
- */
-static struct dst_entry *dn_dst_check(struct dst_entry *dst, __u32 cookie)
-{
- return NULL;
-}
-
-static struct dst_entry *dn_dst_negative_advice(struct dst_entry *dst)
-{
- dst_release(dst);
- return NULL;
-}
-
-static void dn_dst_link_failure(struct sk_buff *skb)
-{
-}
-
-static inline int compare_keys(struct flowidn *fl1, struct flowidn *fl2)
-{
- return ((fl1->daddr ^ fl2->daddr) |
- (fl1->saddr ^ fl2->saddr) |
- (fl1->flowidn_mark ^ fl2->flowidn_mark) |
- (fl1->flowidn_scope ^ fl2->flowidn_scope) |
- (fl1->flowidn_oif ^ fl2->flowidn_oif) |
- (fl1->flowidn_iif ^ fl2->flowidn_iif)) == 0;
-}
-
-static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_route **rp)
-{
- struct dn_route *rth;
- struct dn_route __rcu **rthp;
- unsigned long now = jiffies;
-
- rthp = &dn_rt_hash_table[hash].chain;
-
- spin_lock_bh(&dn_rt_hash_table[hash].lock);
- while ((rth = rcu_dereference_protected(*rthp,
- lockdep_is_held(&dn_rt_hash_table[hash].lock))) != NULL) {
- if (compare_keys(&rth->fld, &rt->fld)) {
- /* Put it first */
- *rthp = rth->dn_next;
- rcu_assign_pointer(rth->dn_next,
- dn_rt_hash_table[hash].chain);
- rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth);
-
- dst_hold_and_use(&rth->dst, now);
- spin_unlock_bh(&dn_rt_hash_table[hash].lock);
-
- dst_release_immediate(&rt->dst);
- *rp = rth;
- return 0;
- }
- rthp = &rth->dn_next;
- }
-
- rcu_assign_pointer(rt->dn_next, dn_rt_hash_table[hash].chain);
- rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt);
-
- dst_hold_and_use(&rt->dst, now);
- spin_unlock_bh(&dn_rt_hash_table[hash].lock);
- *rp = rt;
- return 0;
-}
-
-static void dn_run_flush(struct timer_list *unused)
-{
- int i;
- struct dn_route *rt, *next;
-
- for (i = 0; i < dn_rt_hash_mask; i++) {
- spin_lock_bh(&dn_rt_hash_table[i].lock);
-
- if ((rt = xchg((struct dn_route **)&dn_rt_hash_table[i].chain, NULL)) == NULL)
- goto nothing_to_declare;
-
- for(; rt; rt = next) {
- next = rcu_dereference_raw(rt->dn_next);
- RCU_INIT_POINTER(rt->dn_next, NULL);
- dst_dev_put(&rt->dst);
- dst_release(&rt->dst);
- }
-
-nothing_to_declare:
- spin_unlock_bh(&dn_rt_hash_table[i].lock);
- }
-}
-
-static DEFINE_SPINLOCK(dn_rt_flush_lock);
-
-void dn_rt_cache_flush(int delay)
-{
- unsigned long now = jiffies;
- int user_mode = !in_interrupt();
-
- if (delay < 0)
- delay = dn_rt_min_delay;
-
- spin_lock_bh(&dn_rt_flush_lock);
-
- if (del_timer(&dn_rt_flush_timer) && delay > 0 && dn_rt_deadline) {
- long tmo = (long)(dn_rt_deadline - now);
-
- if (user_mode && tmo < dn_rt_max_delay - dn_rt_min_delay)
- tmo = 0;
-
- if (delay > tmo)
- delay = tmo;
- }
-
- if (delay <= 0) {
- spin_unlock_bh(&dn_rt_flush_lock);
- dn_run_flush(NULL);
- return;
- }
-
- if (dn_rt_deadline == 0)
- dn_rt_deadline = now + dn_rt_max_delay;
-
- dn_rt_flush_timer.expires = now + delay;
- add_timer(&dn_rt_flush_timer);
- spin_unlock_bh(&dn_rt_flush_lock);
-}
-
-/**
- * dn_return_short - Return a short packet to its sender
- * @skb: The packet to return
- *
- */
-static int dn_return_short(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb;
- unsigned char *ptr;
- __le16 *src;
- __le16 *dst;
-
- /* Add back headers */
- skb_push(skb, skb->data - skb_network_header(skb));
-
- if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
- return NET_RX_DROP;
-
- cb = DN_SKB_CB(skb);
- /* Skip packet length and point to flags */
- ptr = skb->data + 2;
- *ptr++ = (cb->rt_flags & ~DN_RT_F_RQR) | DN_RT_F_RTS;
-
- dst = (__le16 *)ptr;
- ptr += 2;
- src = (__le16 *)ptr;
- ptr += 2;
- *ptr = 0; /* Zero hop count */
-
- swap(*src, *dst);
-
- skb->pkt_type = PACKET_OUTGOING;
- dn_rt_finish_output(skb, NULL, NULL);
- return NET_RX_SUCCESS;
-}
-
-/**
- * dn_return_long - Return a long packet to its sender
- * @skb: The long format packet to return
- *
- */
-static int dn_return_long(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb;
- unsigned char *ptr;
- unsigned char *src_addr, *dst_addr;
- unsigned char tmp[ETH_ALEN];
-
- /* Add back all headers */
- skb_push(skb, skb->data - skb_network_header(skb));
-
- if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL)
- return NET_RX_DROP;
-
- cb = DN_SKB_CB(skb);
- /* Ignore packet length and point to flags */
- ptr = skb->data + 2;
-
- /* Skip padding */
- if (*ptr & DN_RT_F_PF) {
- char padlen = (*ptr & ~DN_RT_F_PF);
- ptr += padlen;
- }
-
- *ptr++ = (cb->rt_flags & ~DN_RT_F_RQR) | DN_RT_F_RTS;
- ptr += 2;
- dst_addr = ptr;
- ptr += 8;
- src_addr = ptr;
- ptr += 6;
- *ptr = 0; /* Zero hop count */
-
- /* Swap source and destination */
- memcpy(tmp, src_addr, ETH_ALEN);
- memcpy(src_addr, dst_addr, ETH_ALEN);
- memcpy(dst_addr, tmp, ETH_ALEN);
-
- skb->pkt_type = PACKET_OUTGOING;
- dn_rt_finish_output(skb, dst_addr, src_addr);
- return NET_RX_SUCCESS;
-}
-
-/**
- * dn_route_rx_packet - Try and find a route for an incoming packet
- * @skb: The packet to find a route for
- *
- * Returns: result of input function if route is found, error code otherwise
- */
-static int dn_route_rx_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct dn_skb_cb *cb;
- int err;
-
- if ((err = dn_route_input(skb)) == 0)
- return dst_input(skb);
-
- cb = DN_SKB_CB(skb);
- if (decnet_debug_level & 4) {
- char *devname = skb->dev ? skb->dev->name : "???";
-
- printk(KERN_DEBUG
- "DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n",
- (int)cb->rt_flags, devname, skb->len,
- le16_to_cpu(cb->src), le16_to_cpu(cb->dst),
- err, skb->pkt_type);
- }
-
- if ((skb->pkt_type == PACKET_HOST) && (cb->rt_flags & DN_RT_F_RQR)) {
- switch (cb->rt_flags & DN_RT_PKT_MSK) {
- case DN_RT_PKT_SHORT:
- return dn_return_short(skb);
- case DN_RT_PKT_LONG:
- return dn_return_long(skb);
- }
- }
-
- kfree_skb(skb);
- return NET_RX_DROP;
-}
-
-static int dn_route_rx_long(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- unsigned char *ptr = skb->data;
-
- if (!pskb_may_pull(skb, 21)) /* 20 for long header, 1 for shortest nsp */
- goto drop_it;
-
- skb_pull(skb, 20);
- skb_reset_transport_header(skb);
-
- /* Destination info */
- ptr += 2;
- cb->dst = dn_eth2dn(ptr);
- if (memcmp(ptr, dn_hiord_addr, 4) != 0)
- goto drop_it;
- ptr += 6;
-
-
- /* Source info */
- ptr += 2;
- cb->src = dn_eth2dn(ptr);
- if (memcmp(ptr, dn_hiord_addr, 4) != 0)
- goto drop_it;
- ptr += 6;
- /* Other junk */
- ptr++;
- cb->hops = *ptr++; /* Visit Count */
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_route_rx_packet);
-
-drop_it:
- kfree_skb(skb);
- return NET_RX_DROP;
-}
-
-
-
-static int dn_route_rx_short(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- unsigned char *ptr = skb->data;
-
- if (!pskb_may_pull(skb, 6)) /* 5 for short header + 1 for shortest nsp */
- goto drop_it;
-
- skb_pull(skb, 5);
- skb_reset_transport_header(skb);
-
- cb->dst = *(__le16 *)ptr;
- ptr += 2;
- cb->src = *(__le16 *)ptr;
- ptr += 2;
- cb->hops = *ptr & 0x3f;
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_route_rx_packet);
-
-drop_it:
- kfree_skb(skb);
- return NET_RX_DROP;
-}
-
-static int dn_route_discard(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- /*
- * I know we drop the packet here, but thats considered success in
- * this case
- */
- kfree_skb(skb);
- return NET_RX_SUCCESS;
-}
-
-static int dn_route_ptp_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- dn_dev_hello(skb);
- dn_neigh_pointopoint_hello(skb);
- return NET_RX_SUCCESS;
-}
-
-int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
-{
- struct dn_skb_cb *cb;
- unsigned char flags = 0;
- __u16 len = le16_to_cpu(*(__le16 *)skb->data);
- struct dn_dev *dn = rcu_dereference(dev->dn_ptr);
- unsigned char padlen = 0;
-
- if (!net_eq(dev_net(dev), &init_net))
- goto dump_it;
-
- if (dn == NULL)
- goto dump_it;
-
- if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
- goto out;
-
- if (!pskb_may_pull(skb, 3))
- goto dump_it;
-
- skb_pull(skb, 2);
-
- if (len > skb->len)
- goto dump_it;
-
- skb_trim(skb, len);
-
- flags = *skb->data;
-
- cb = DN_SKB_CB(skb);
- cb->stamp = jiffies;
- cb->iif = dev->ifindex;
-
- /*
- * If we have padding, remove it.
- */
- if (flags & DN_RT_F_PF) {
- padlen = flags & ~DN_RT_F_PF;
- if (!pskb_may_pull(skb, padlen + 1))
- goto dump_it;
- skb_pull(skb, padlen);
- flags = *skb->data;
- }
-
- skb_reset_network_header(skb);
-
- /*
- * Weed out future version DECnet
- */
- if (flags & DN_RT_F_VER)
- goto dump_it;
-
- cb->rt_flags = flags;
-
- if (decnet_debug_level & 1)
- printk(KERN_DEBUG
- "dn_route_rcv: got 0x%02x from %s [%d %d %d]\n",
- (int)flags, (dev) ? dev->name : "???", len, skb->len,
- padlen);
-
- if (flags & DN_RT_PKT_CNTL) {
- if (unlikely(skb_linearize(skb)))
- goto dump_it;
-
- switch (flags & DN_RT_CNTL_MSK) {
- case DN_RT_PKT_INIT:
- dn_dev_init_pkt(skb);
- break;
- case DN_RT_PKT_VERI:
- dn_dev_veri_pkt(skb);
- break;
- }
-
- if (dn->parms.state != DN_DEV_S_RU)
- goto dump_it;
-
- switch (flags & DN_RT_CNTL_MSK) {
- case DN_RT_PKT_HELO:
- return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_route_ptp_hello);
-
- case DN_RT_PKT_L1RT:
- case DN_RT_PKT_L2RT:
- return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_route_discard);
- case DN_RT_PKT_ERTH:
- return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_neigh_router_hello);
-
- case DN_RT_PKT_EEDH:
- return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
- &init_net, NULL, skb, skb->dev, NULL,
- dn_neigh_endnode_hello);
- }
- } else {
- if (dn->parms.state != DN_DEV_S_RU)
- goto dump_it;
-
- skb_pull(skb, 1); /* Pull flags */
-
- switch (flags & DN_RT_PKT_MSK) {
- case DN_RT_PKT_LONG:
- return dn_route_rx_long(skb);
- case DN_RT_PKT_SHORT:
- return dn_route_rx_short(skb);
- }
- }
-
-dump_it:
- kfree_skb(skb);
-out:
- return NET_RX_DROP;
-}
-
-static int dn_output(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct dst_entry *dst = skb_dst(skb);
- struct dn_route *rt = (struct dn_route *)dst;
- struct net_device *dev = dst->dev;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
- int err = -EINVAL;
-
- if (rt->n == NULL)
- goto error;
-
- skb->dev = dev;
-
- cb->src = rt->rt_saddr;
- cb->dst = rt->rt_daddr;
-
- /*
- * Always set the Intra-Ethernet bit on all outgoing packets
- * originated on this node. Only valid flag from upper layers
- * is return-to-sender-requested. Set hop count to 0 too.
- */
- cb->rt_flags &= ~DN_RT_F_RQR;
- cb->rt_flags |= DN_RT_F_IE;
- cb->hops = 0;
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT,
- &init_net, sk, skb, NULL, dev,
- dn_to_neigh_output);
-
-error:
- net_dbg_ratelimited("dn_output: This should not happen\n");
-
- kfree_skb(skb);
-
- return err;
-}
-
-static int dn_forward(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct dst_entry *dst = skb_dst(skb);
- struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr);
- struct dn_route *rt;
- int header_len;
- struct net_device *dev = skb->dev;
-
- if (skb->pkt_type != PACKET_HOST)
- goto drop;
-
- /* Ensure that we have enough space for headers */
- rt = (struct dn_route *)skb_dst(skb);
- header_len = dn_db->use_long ? 21 : 6;
- if (skb_cow(skb, LL_RESERVED_SPACE(rt->dst.dev)+header_len))
- goto drop;
-
- /*
- * Hop count exceeded.
- */
- if (++cb->hops > 30)
- goto drop;
-
- skb->dev = rt->dst.dev;
-
- /*
- * If packet goes out same interface it came in on, then set
- * the Intra-Ethernet bit. This has no effect for short
- * packets, so we don't need to test for them here.
- */
- cb->rt_flags &= ~DN_RT_F_IE;
- if (rt->rt_flags & RTCF_DOREDIRECT)
- cb->rt_flags |= DN_RT_F_IE;
-
- return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD,
- &init_net, NULL, skb, dev, skb->dev,
- dn_to_neigh_output);
-
-drop:
- kfree_skb(skb);
- return NET_RX_DROP;
-}
-
-/*
- * Used to catch bugs. This should never normally get
- * called.
- */
-static int dn_rt_bug_out(struct net *net, struct sock *sk, struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
- net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n",
- le16_to_cpu(cb->src), le16_to_cpu(cb->dst));
-
- kfree_skb(skb);
-
- return NET_RX_DROP;
-}
-
-static int dn_rt_bug(struct sk_buff *skb)
-{
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
-
- net_dbg_ratelimited("dn_rt_bug: skb from:%04x to:%04x\n",
- le16_to_cpu(cb->src), le16_to_cpu(cb->dst));
-
- kfree_skb(skb);
-
- return NET_RX_DROP;
-}
-
-static unsigned int dn_dst_default_advmss(const struct dst_entry *dst)
-{
- return dn_mss_from_pmtu(dst->dev, dst_mtu(dst));
-}
-
-static unsigned int dn_dst_mtu(const struct dst_entry *dst)
-{
- unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
-
- return mtu ? : dst->dev->mtu;
-}
-
-static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
- struct sk_buff *skb,
- const void *daddr)
-{
- return __neigh_lookup_errno(&dn_neigh_table, daddr, dst->dev);
-}
-
-static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
-{
- struct dn_fib_info *fi = res->fi;
- struct net_device *dev = rt->dst.dev;
- unsigned int mss_metric;
- struct neighbour *n;
-
- if (fi) {
- if (DN_FIB_RES_GW(*res) &&
- DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
- rt->rt_gateway = DN_FIB_RES_GW(*res);
- dst_init_metrics(&rt->dst, fi->fib_metrics, true);
- }
- rt->rt_type = res->type;
-
- if (dev != NULL && rt->n == NULL) {
- n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev);
- if (IS_ERR(n))
- return PTR_ERR(n);
- rt->n = n;
- }
-
- if (dst_metric(&rt->dst, RTAX_MTU) > rt->dst.dev->mtu)
- dst_metric_set(&rt->dst, RTAX_MTU, rt->dst.dev->mtu);
- mss_metric = dst_metric_raw(&rt->dst, RTAX_ADVMSS);
- if (mss_metric) {
- unsigned int mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->dst));
- if (mss_metric > mss)
- dst_metric_set(&rt->dst, RTAX_ADVMSS, mss);
- }
- return 0;
-}
-
-static inline int dn_match_addr(__le16 addr1, __le16 addr2)
-{
- __u16 tmp = le16_to_cpu(addr1) ^ le16_to_cpu(addr2);
- int match = 16;
- while(tmp) {
- tmp >>= 1;
- match--;
- }
- return match;
-}
-
-static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int scope)
-{
- __le16 saddr = 0;
- struct dn_dev *dn_db;
- struct dn_ifaddr *ifa;
- int best_match = 0;
- int ret;
-
- rcu_read_lock();
- dn_db = rcu_dereference(dev->dn_ptr);
- for (ifa = rcu_dereference(dn_db->ifa_list);
- ifa != NULL;
- ifa = rcu_dereference(ifa->ifa_next)) {
- if (ifa->ifa_scope > scope)
- continue;
- if (!daddr) {
- saddr = ifa->ifa_local;
- break;
- }
- ret = dn_match_addr(daddr, ifa->ifa_local);
- if (ret > best_match)
- saddr = ifa->ifa_local;
- if (best_match == 0)
- saddr = ifa->ifa_local;
- }
- rcu_read_unlock();
-
- return saddr;
-}
-
-static inline __le16 __dn_fib_res_prefsrc(struct dn_fib_res *res)
-{
- return dnet_select_source(DN_FIB_RES_DEV(*res), DN_FIB_RES_GW(*res), res->scope);
-}
-
-static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_res *res)
-{
- __le16 mask = dnet_make_mask(res->prefixlen);
- return (daddr&~mask)|res->fi->fib_nh->nh_gw;
-}
-
-static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *oldflp, int try_hard)
-{
- struct flowidn fld = {
- .daddr = oldflp->daddr,
- .saddr = oldflp->saddr,
- .flowidn_scope = RT_SCOPE_UNIVERSE,
- .flowidn_mark = oldflp->flowidn_mark,
- .flowidn_iif = LOOPBACK_IFINDEX,
- .flowidn_oif = oldflp->flowidn_oif,
- };
- struct dn_route *rt = NULL;
- struct net_device *dev_out = NULL, *dev;
- struct neighbour *neigh = NULL;
- unsigned int hash;
- unsigned int flags = 0;
- struct dn_fib_res res = { .fi = NULL, .type = RTN_UNICAST };
- int err;
- int free_res = 0;
- __le16 gateway = 0;
-
- if (decnet_debug_level & 16)
- printk(KERN_DEBUG
- "dn_route_output_slow: dst=%04x src=%04x mark=%d"
- " iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr),
- le16_to_cpu(oldflp->saddr),
- oldflp->flowidn_mark, LOOPBACK_IFINDEX,
- oldflp->flowidn_oif);
-
- /* If we have an output interface, verify its a DECnet device */
- if (oldflp->flowidn_oif) {
- dev_out = dev_get_by_index(&init_net, oldflp->flowidn_oif);
- err = -ENODEV;
- if (dev_out && dev_out->dn_ptr == NULL) {
- dev_put(dev_out);
- dev_out = NULL;
- }
- if (dev_out == NULL)
- goto out;
- }
-
- /* If we have a source address, verify that its a local address */
- if (oldflp->saddr) {
- err = -EADDRNOTAVAIL;
-
- if (dev_out) {
- if (dn_dev_islocal(dev_out, oldflp->saddr))
- goto source_ok;
- dev_put(dev_out);
- goto out;
- }
- rcu_read_lock();
- for_each_netdev_rcu(&init_net, dev) {
- if (!dev->dn_ptr)
- continue;
- if (!dn_dev_islocal(dev, oldflp->saddr))
- continue;
- if ((dev->flags & IFF_LOOPBACK) &&
- oldflp->daddr &&
- !dn_dev_islocal(dev, oldflp->daddr))
- continue;
-
- dev_out = dev;
- break;
- }
- rcu_read_unlock();
- if (dev_out == NULL)
- goto out;
- dev_hold(dev_out);
-source_ok:
- ;
- }
-
- /* No destination? Assume its local */
- if (!fld.daddr) {
- fld.daddr = fld.saddr;
-
- if (dev_out)
- dev_put(dev_out);
- err = -EINVAL;
- dev_out = init_net.loopback_dev;
- if (!dev_out->dn_ptr)
- goto out;
- err = -EADDRNOTAVAIL;
- dev_hold(dev_out);
- if (!fld.daddr) {
- fld.daddr =
- fld.saddr = dnet_select_source(dev_out, 0,
- RT_SCOPE_HOST);
- if (!fld.daddr)
- goto out;
- }
- fld.flowidn_oif = LOOPBACK_IFINDEX;
- res.type = RTN_LOCAL;
- goto make_route;
- }
-
- if (decnet_debug_level & 16)
- printk(KERN_DEBUG
- "dn_route_output_slow: initial checks complete."
- " dst=%04x src=%04x oif=%d try_hard=%d\n",
- le16_to_cpu(fld.daddr), le16_to_cpu(fld.saddr),
- fld.flowidn_oif, try_hard);
-
- /*
- * N.B. If the kernel is compiled without router support then
- * dn_fib_lookup() will evaluate to non-zero so this if () block
- * will always be executed.
- */
- err = -ESRCH;
- if (try_hard || (err = dn_fib_lookup(&fld, &res)) != 0) {
- struct dn_dev *dn_db;
- if (err != -ESRCH)
- goto out;
- /*
- * Here the fallback is basically the standard algorithm for
- * routing in endnodes which is described in the DECnet routing
- * docs
- *
- * If we are not trying hard, look in neighbour cache.
- * The result is tested to ensure that if a specific output
- * device/source address was requested, then we honour that
- * here
- */
- if (!try_hard) {
- neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fld.daddr);
- if (neigh) {
- if ((oldflp->flowidn_oif &&
- (neigh->dev->ifindex != oldflp->flowidn_oif)) ||
- (oldflp->saddr &&
- (!dn_dev_islocal(neigh->dev,
- oldflp->saddr)))) {
- neigh_release(neigh);
- neigh = NULL;
- } else {
- if (dev_out)
- dev_put(dev_out);
- if (dn_dev_islocal(neigh->dev, fld.daddr)) {
- dev_out = init_net.loopback_dev;
- res.type = RTN_LOCAL;
- } else {
- dev_out = neigh->dev;
- }
- dev_hold(dev_out);
- goto select_source;
- }
- }
- }
-
- /* Not there? Perhaps its a local address */
- if (dev_out == NULL)
- dev_out = dn_dev_get_default();
- err = -ENODEV;
- if (dev_out == NULL)
- goto out;
- dn_db = rcu_dereference_raw(dev_out->dn_ptr);
- if (!dn_db)
- goto e_inval;
- /* Possible improvement - check all devices for local addr */
- if (dn_dev_islocal(dev_out, fld.daddr)) {
- dev_put(dev_out);
- dev_out = init_net.loopback_dev;
- dev_hold(dev_out);
- res.type = RTN_LOCAL;
- goto select_source;
- }
- /* Not local either.... try sending it to the default router */
- neigh = neigh_clone(dn_db->router);
- BUG_ON(neigh && neigh->dev != dev_out);
-
- /* Ok then, we assume its directly connected and move on */
-select_source:
- if (neigh)
- gateway = ((struct dn_neigh *)neigh)->addr;
- if (gateway == 0)
- gateway = fld.daddr;
- if (fld.saddr == 0) {
- fld.saddr = dnet_select_source(dev_out, gateway,
- res.type == RTN_LOCAL ?
- RT_SCOPE_HOST :
- RT_SCOPE_LINK);
- if (fld.saddr == 0 && res.type != RTN_LOCAL)
- goto e_addr;
- }
- fld.flowidn_oif = dev_out->ifindex;
- goto make_route;
- }
- free_res = 1;
-
- if (res.type == RTN_NAT)
- goto e_inval;
-
- if (res.type == RTN_LOCAL) {
- if (!fld.saddr)
- fld.saddr = fld.daddr;
- if (dev_out)
- dev_put(dev_out);
- dev_out = init_net.loopback_dev;
- dev_hold(dev_out);
- if (!dev_out->dn_ptr)
- goto e_inval;
- fld.flowidn_oif = dev_out->ifindex;
- if (res.fi)
- dn_fib_info_put(res.fi);
- res.fi = NULL;
- goto make_route;
- }
-
- if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0)
- dn_fib_select_multipath(&fld, &res);
-
- /*
- * We could add some logic to deal with default routes here and
- * get rid of some of the special casing above.
- */
-
- if (!fld.saddr)
- fld.saddr = DN_FIB_RES_PREFSRC(res);
-
- if (dev_out)
- dev_put(dev_out);
- dev_out = DN_FIB_RES_DEV(res);
- dev_hold(dev_out);
- fld.flowidn_oif = dev_out->ifindex;
- gateway = DN_FIB_RES_GW(res);
-
-make_route:
- if (dev_out->flags & IFF_LOOPBACK)
- flags |= RTCF_LOCAL;
-
- rt = dst_alloc(&dn_dst_ops, dev_out, 0, DST_OBSOLETE_NONE, DST_HOST);
- if (rt == NULL)
- goto e_nobufs;
-
- rt->dn_next = NULL;
- memset(&rt->fld, 0, sizeof(rt->fld));
- rt->fld.saddr = oldflp->saddr;
- rt->fld.daddr = oldflp->daddr;
- rt->fld.flowidn_oif = oldflp->flowidn_oif;
- rt->fld.flowidn_iif = 0;
- rt->fld.flowidn_mark = oldflp->flowidn_mark;
-
- rt->rt_saddr = fld.saddr;
- rt->rt_daddr = fld.daddr;
- rt->rt_gateway = gateway ? gateway : fld.daddr;
- rt->rt_local_src = fld.saddr;
-
- rt->rt_dst_map = fld.daddr;
- rt->rt_src_map = fld.saddr;
-
- rt->n = neigh;
- neigh = NULL;
-
- rt->dst.lastuse = jiffies;
- rt->dst.output = dn_output;
- rt->dst.input = dn_rt_bug;
- rt->rt_flags = flags;
- if (flags & RTCF_LOCAL)
- rt->dst.input = dn_nsp_rx;
-
- err = dn_rt_set_next_hop(rt, &res);
- if (err)
- goto e_neighbour;
-
- hash = dn_hash(rt->fld.saddr, rt->fld.daddr);
- /* dn_insert_route() increments dst->__refcnt */
- dn_insert_route(rt, hash, (struct dn_route **)pprt);
-
-done:
- if (neigh)
- neigh_release(neigh);
- if (free_res)
- dn_fib_res_put(&res);
- if (dev_out)
- dev_put(dev_out);
-out:
- return err;
-
-e_addr:
- err = -EADDRNOTAVAIL;
- goto done;
-e_inval:
- err = -EINVAL;
- goto done;
-e_nobufs:
- err = -ENOBUFS;
- goto done;
-e_neighbour:
- dst_release_immediate(&rt->dst);
- goto e_nobufs;
-}
-
-
-/*
- * N.B. The flags may be moved into the flowi at some future stage.
- */
-static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn *flp, int flags)
-{
- unsigned int hash = dn_hash(flp->saddr, flp->daddr);
- struct dn_route *rt = NULL;
-
- if (!(flags & MSG_TRYHARD)) {
- rcu_read_lock_bh();
- for (rt = rcu_dereference_bh(dn_rt_hash_table[hash].chain); rt;
- rt = rcu_dereference_bh(rt->dn_next)) {
- if ((flp->daddr == rt->fld.daddr) &&
- (flp->saddr == rt->fld.saddr) &&
- (flp->flowidn_mark == rt->fld.flowidn_mark) &&
- dn_is_output_route(rt) &&
- (rt->fld.flowidn_oif == flp->flowidn_oif)) {
- dst_hold_and_use(&rt->dst, jiffies);
- rcu_read_unlock_bh();
- *pprt = &rt->dst;
- return 0;
- }
- }
- rcu_read_unlock_bh();
- }
-
- return dn_route_output_slow(pprt, flp, flags);
-}
-
-static int dn_route_output_key(struct dst_entry **pprt, struct flowidn *flp, int flags)
-{
- int err;
-
- err = __dn_route_output_key(pprt, flp, flags);
- if (err == 0 && flp->flowidn_proto) {
- *pprt = xfrm_lookup(&init_net, *pprt,
- flowidn_to_flowi(flp), NULL, 0);
- if (IS_ERR(*pprt)) {
- err = PTR_ERR(*pprt);
- *pprt = NULL;
- }
- }
- return err;
-}
-
-int dn_route_output_sock(struct dst_entry __rcu **pprt, struct flowidn *fl, struct sock *sk, int flags)
-{
- int err;
-
- err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD);
- if (err == 0 && fl->flowidn_proto) {
- *pprt = xfrm_lookup(&init_net, *pprt,
- flowidn_to_flowi(fl), sk, 0);
- if (IS_ERR(*pprt)) {
- err = PTR_ERR(*pprt);
- *pprt = NULL;
- }
- }
- return err;
-}
-
-static int dn_route_input_slow(struct sk_buff *skb)
-{
- struct dn_route *rt = NULL;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- struct net_device *in_dev = skb->dev;
- struct net_device *out_dev = NULL;
- struct dn_dev *dn_db;
- struct neighbour *neigh = NULL;
- unsigned int hash;
- int flags = 0;
- __le16 gateway = 0;
- __le16 local_src = 0;
- struct flowidn fld = {
- .daddr = cb->dst,
- .saddr = cb->src,
- .flowidn_scope = RT_SCOPE_UNIVERSE,
- .flowidn_mark = skb->mark,
- .flowidn_iif = skb->dev->ifindex,
- };
- struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE };
- int err = -EINVAL;
- int free_res = 0;
-
- dev_hold(in_dev);
-
- if ((dn_db = rcu_dereference(in_dev->dn_ptr)) == NULL)
- goto out;
-
- /* Zero source addresses are not allowed */
- if (fld.saddr == 0)
- goto out;
-
- /*
- * In this case we've just received a packet from a source
- * outside ourselves pretending to come from us. We don't
- * allow it any further to prevent routing loops, spoofing and
- * other nasties. Loopback packets already have the dst attached
- * so this only affects packets which have originated elsewhere.
- */
- err = -ENOTUNIQ;
- if (dn_dev_islocal(in_dev, cb->src))
- goto out;
-
- err = dn_fib_lookup(&fld, &res);
- if (err) {
- if (err != -ESRCH)
- goto out;
- /*
- * Is the destination us ?
- */
- if (!dn_dev_islocal(in_dev, cb->dst))
- goto e_inval;
-
- res.type = RTN_LOCAL;
- } else {
- __le16 src_map = fld.saddr;
- free_res = 1;
-
- out_dev = DN_FIB_RES_DEV(res);
- if (out_dev == NULL) {
- net_crit_ratelimited("Bug in dn_route_input_slow() No output device\n");
- goto e_inval;
- }
- dev_hold(out_dev);
-
- if (res.r)
- src_map = fld.saddr; /* no NAT support for now */
-
- gateway = DN_FIB_RES_GW(res);
- if (res.type == RTN_NAT) {
- fld.daddr = dn_fib_rules_map_destination(fld.daddr, &res);
- dn_fib_res_put(&res);
- free_res = 0;
- if (dn_fib_lookup(&fld, &res))
- goto e_inval;
- free_res = 1;
- if (res.type != RTN_UNICAST)
- goto e_inval;
- flags |= RTCF_DNAT;
- gateway = fld.daddr;
- }
- fld.saddr = src_map;
- }
-
- switch(res.type) {
- case RTN_UNICAST:
- /*
- * Forwarding check here, we only check for forwarding
- * being turned off, if you want to only forward intra
- * area, its up to you to set the routing tables up
- * correctly.
- */
- if (dn_db->parms.forwarding == 0)
- goto e_inval;
-
- if (res.fi->fib_nhs > 1 && fld.flowidn_oif == 0)
- dn_fib_select_multipath(&fld, &res);
-
- /*
- * Check for out_dev == in_dev. We use the RTCF_DOREDIRECT
- * flag as a hint to set the intra-ethernet bit when
- * forwarding. If we've got NAT in operation, we don't do
- * this optimisation.
- */
- if (out_dev == in_dev && !(flags & RTCF_NAT))
- flags |= RTCF_DOREDIRECT;
-
- local_src = DN_FIB_RES_PREFSRC(res);
-
- case RTN_BLACKHOLE:
- case RTN_UNREACHABLE:
- break;
- case RTN_LOCAL:
- flags |= RTCF_LOCAL;
- fld.saddr = cb->dst;
- fld.daddr = cb->src;
-
- /* Routing tables gave us a gateway */
- if (gateway)
- goto make_route;
-
- /* Packet was intra-ethernet, so we know its on-link */
- if (cb->rt_flags & DN_RT_F_IE) {
- gateway = cb->src;
- goto make_route;
- }
-
- /* Use the default router if there is one */
- neigh = neigh_clone(dn_db->router);
- if (neigh) {
- gateway = ((struct dn_neigh *)neigh)->addr;
- goto make_route;
- }
-
- /* Close eyes and pray */
- gateway = cb->src;
- goto make_route;
- default:
- goto e_inval;
- }
-
-make_route:
- rt = dst_alloc(&dn_dst_ops, out_dev, 1, DST_OBSOLETE_NONE, DST_HOST);
- if (rt == NULL)
- goto e_nobufs;
-
- rt->dn_next = NULL;
- memset(&rt->fld, 0, sizeof(rt->fld));
- rt->rt_saddr = fld.saddr;
- rt->rt_daddr = fld.daddr;
- rt->rt_gateway = fld.daddr;
- if (gateway)
- rt->rt_gateway = gateway;
- rt->rt_local_src = local_src ? local_src : rt->rt_saddr;
-
- rt->rt_dst_map = fld.daddr;
- rt->rt_src_map = fld.saddr;
-
- rt->fld.saddr = cb->src;
- rt->fld.daddr = cb->dst;
- rt->fld.flowidn_oif = 0;
- rt->fld.flowidn_iif = in_dev->ifindex;
- rt->fld.flowidn_mark = fld.flowidn_mark;
-
- rt->n = neigh;
- rt->dst.lastuse = jiffies;
- rt->dst.output = dn_rt_bug_out;
- switch (res.type) {
- case RTN_UNICAST:
- rt->dst.input = dn_forward;
- break;
- case RTN_LOCAL:
- rt->dst.output = dn_output;
- rt->dst.input = dn_nsp_rx;
- rt->dst.dev = in_dev;
- flags |= RTCF_LOCAL;
- break;
- default:
- case RTN_UNREACHABLE:
- case RTN_BLACKHOLE:
- rt->dst.input = dst_discard;
- }
- rt->rt_flags = flags;
-
- err = dn_rt_set_next_hop(rt, &res);
- if (err)
- goto e_neighbour;
-
- hash = dn_hash(rt->fld.saddr, rt->fld.daddr);
- /* dn_insert_route() increments dst->__refcnt */
- dn_insert_route(rt, hash, &rt);
- skb_dst_set(skb, &rt->dst);
-
-done:
- if (neigh)
- neigh_release(neigh);
- if (free_res)
- dn_fib_res_put(&res);
- dev_put(in_dev);
- if (out_dev)
- dev_put(out_dev);
-out:
- return err;
-
-e_inval:
- err = -EINVAL;
- goto done;
-
-e_nobufs:
- err = -ENOBUFS;
- goto done;
-
-e_neighbour:
- dst_release_immediate(&rt->dst);
- goto done;
-}
-
-static int dn_route_input(struct sk_buff *skb)
-{
- struct dn_route *rt;
- struct dn_skb_cb *cb = DN_SKB_CB(skb);
- unsigned int hash = dn_hash(cb->src, cb->dst);
-
- if (skb_dst(skb))
- return 0;
-
- rcu_read_lock();
- for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL;
- rt = rcu_dereference(rt->dn_next)) {
- if ((rt->fld.saddr == cb->src) &&
- (rt->fld.daddr == cb->dst) &&
- (rt->fld.flowidn_oif == 0) &&
- (rt->fld.flowidn_mark == skb->mark) &&
- (rt->fld.flowidn_iif == cb->iif)) {
- dst_hold_and_use(&rt->dst, jiffies);
- rcu_read_unlock();
- skb_dst_set(skb, (struct dst_entry *)rt);
- return 0;
- }
- }
- rcu_read_unlock();
-
- return dn_route_input_slow(skb);
-}
-
-static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
- int event, int nowait, unsigned int flags)
-{
- struct dn_route *rt = (struct dn_route *)skb_dst(skb);
- struct rtmsg *r;
- struct nlmsghdr *nlh;
- long expires;
-
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
- if (!nlh)
- return -EMSGSIZE;
-
- r = nlmsg_data(nlh);
- r->rtm_family = AF_DECnet;
- r->rtm_dst_len = 16;
- r->rtm_src_len = 0;
- r->rtm_tos = 0;
- r->rtm_table = RT_TABLE_MAIN;
- r->rtm_type = rt->rt_type;
- r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
- r->rtm_scope = RT_SCOPE_UNIVERSE;
- r->rtm_protocol = RTPROT_UNSPEC;
-
- if (rt->rt_flags & RTCF_NOTIFY)
- r->rtm_flags |= RTM_F_NOTIFY;
-
- if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN) < 0 ||
- nla_put_le16(skb, RTA_DST, rt->rt_daddr) < 0)
- goto errout;
-
- if (rt->fld.saddr) {
- r->rtm_src_len = 16;
- if (nla_put_le16(skb, RTA_SRC, rt->fld.saddr) < 0)
- goto errout;
- }
- if (rt->dst.dev &&
- nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex) < 0)
- goto errout;
-
- /*
- * Note to self - change this if input routes reverse direction when
- * they deal only with inputs and not with replies like they do
- * currently.
- */
- if (nla_put_le16(skb, RTA_PREFSRC, rt->rt_local_src) < 0)
- goto errout;
-
- if (rt->rt_daddr != rt->rt_gateway &&
- nla_put_le16(skb, RTA_GATEWAY, rt->rt_gateway) < 0)
- goto errout;
-
- if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0)
- goto errout;
-
- expires = rt->dst.expires ? rt->dst.expires - jiffies : 0;
- if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires,
- rt->dst.error) < 0)
- goto errout;
-
- if (dn_is_input_route(rt) &&
- nla_put_u32(skb, RTA_IIF, rt->fld.flowidn_iif) < 0)
- goto errout;
-
- nlmsg_end(skb, nlh);
- return 0;
-
-errout:
- nlmsg_cancel(skb, nlh);
- return -EMSGSIZE;
-}
-
-const struct nla_policy rtm_dn_policy[RTA_MAX + 1] = {
- [RTA_DST] = { .type = NLA_U16 },
- [RTA_SRC] = { .type = NLA_U16 },
- [RTA_IIF] = { .type = NLA_U32 },
- [RTA_OIF] = { .type = NLA_U32 },
- [RTA_GATEWAY] = { .type = NLA_U16 },
- [RTA_PRIORITY] = { .type = NLA_U32 },
- [RTA_PREFSRC] = { .type = NLA_U16 },
- [RTA_METRICS] = { .type = NLA_NESTED },
- [RTA_MULTIPATH] = { .type = NLA_NESTED },
- [RTA_TABLE] = { .type = NLA_U32 },
- [RTA_MARK] = { .type = NLA_U32 },
-};
-
-/*
- * This is called by both endnodes and routers now.
- */
-static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
- struct netlink_ext_ack *extack)
-{
- struct net *net = sock_net(in_skb->sk);
- struct rtmsg *rtm = nlmsg_data(nlh);
- struct dn_route *rt = NULL;
- struct dn_skb_cb *cb;
- int err;
- struct sk_buff *skb;
- struct flowidn fld;
- struct nlattr *tb[RTA_MAX+1];
-
- if (!net_eq(net, &init_net))
- return -EINVAL;
-
- err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_dn_policy,
- extack);
- if (err < 0)
- return err;
-
- memset(&fld, 0, sizeof(fld));
- fld.flowidn_proto = DNPROTO_NSP;
-
- skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (skb == NULL)
- return -ENOBUFS;
- skb_reset_mac_header(skb);
- cb = DN_SKB_CB(skb);
-
- if (tb[RTA_SRC])
- fld.saddr = nla_get_le16(tb[RTA_SRC]);
-
- if (tb[RTA_DST])
- fld.daddr = nla_get_le16(tb[RTA_DST]);
-
- if (tb[RTA_IIF])
- fld.flowidn_iif = nla_get_u32(tb[RTA_IIF]);
-
- if (fld.flowidn_iif) {
- struct net_device *dev;
- dev = __dev_get_by_index(&init_net, fld.flowidn_iif);
- if (!dev || !dev->dn_ptr) {
- kfree_skb(skb);
- return -ENODEV;
- }
- skb->protocol = htons(ETH_P_DNA_RT);
- skb->dev = dev;
- cb->src = fld.saddr;
- cb->dst = fld.daddr;
- local_bh_disable();
- err = dn_route_input(skb);
- local_bh_enable();
- memset(cb, 0, sizeof(struct dn_skb_cb));
- rt = (struct dn_route *)skb_dst(skb);
- if (!err && -rt->dst.error)
- err = rt->dst.error;
- } else {
- if (tb[RTA_OIF])
- fld.flowidn_oif = nla_get_u32(tb[RTA_OIF]);
-
- err = dn_route_output_key((struct dst_entry **)&rt, &fld, 0);
- }
-
- skb->dev = NULL;
- if (err)
- goto out_free;
- skb_dst_set(skb, &rt->dst);
- if (rtm->rtm_flags & RTM_F_NOTIFY)
- rt->rt_flags |= RTCF_NOTIFY;
-
- err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0);
- if (err < 0) {
- err = -EMSGSIZE;
- goto out_free;
- }
-
- return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).portid);
-
-out_free:
- kfree_skb(skb);
- return err;
-}
-
-/*
- * For routers, this is called from dn_fib_dump, but for endnodes its
- * called directly from the rtnetlink dispatch table.
- */
-int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct net *net = sock_net(skb->sk);
- struct dn_route *rt;
- int h, s_h;
- int idx, s_idx;
- struct rtmsg *rtm;
-
- if (!net_eq(net, &init_net))
- return 0;
-
- if (nlmsg_len(cb->nlh) < sizeof(struct rtmsg))
- return -EINVAL;
-
- rtm = nlmsg_data(cb->nlh);
- if (!(rtm->rtm_flags & RTM_F_CLONED))
- return 0;
-
- s_h = cb->args[0];
- s_idx = idx = cb->args[1];
- for(h = 0; h <= dn_rt_hash_mask; h++) {
- if (h < s_h)
- continue;
- if (h > s_h)
- s_idx = 0;
- rcu_read_lock_bh();
- for(rt = rcu_dereference_bh(dn_rt_hash_table[h].chain), idx = 0;
- rt;
- rt = rcu_dereference_bh(rt->dn_next), idx++) {
- if (idx < s_idx)
- continue;
- skb_dst_set(skb, dst_clone(&rt->dst));
- if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, RTM_NEWROUTE,
- 1, NLM_F_MULTI) < 0) {
- skb_dst_drop(skb);
- rcu_read_unlock_bh();
- goto done;
- }
- skb_dst_drop(skb);
- }
- rcu_read_unlock_bh();
- }
-
-done:
- cb->args[0] = h;
- cb->args[1] = idx;
- return skb->len;
-}
-
-#ifdef CONFIG_PROC_FS
-struct dn_rt_cache_iter_state {
- int bucket;
-};
-
-static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq)
-{
- struct dn_route *rt = NULL;
- struct dn_rt_cache_iter_state *s = seq->private;
-
- for(s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) {
- rcu_read_lock_bh();
- rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
- if (rt)
- break;
- rcu_read_unlock_bh();
- }
- return rt;
-}
-
-static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_route *rt)
-{
- struct dn_rt_cache_iter_state *s = seq->private;
-
- rt = rcu_dereference_bh(rt->dn_next);
- while (!rt) {
- rcu_read_unlock_bh();
- if (--s->bucket < 0)
- break;
- rcu_read_lock_bh();
- rt = rcu_dereference_bh(dn_rt_hash_table[s->bucket].chain);
- }
- return rt;
-}
-
-static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
-{
- struct dn_route *rt = dn_rt_cache_get_first(seq);
-
- if (rt) {
- while(*pos && (rt = dn_rt_cache_get_next(seq, rt)))
- --*pos;
- }
- return *pos ? NULL : rt;
-}
-
-static void *dn_rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct dn_route *rt = dn_rt_cache_get_next(seq, v);
- ++*pos;
- return rt;
-}
-
-static void dn_rt_cache_seq_stop(struct seq_file *seq, void *v)
-{
- if (v)
- rcu_read_unlock_bh();
-}
-
-static int dn_rt_cache_seq_show(struct seq_file *seq, void *v)
-{
- struct dn_route *rt = v;
- char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN];
-
- seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n",
- rt->dst.dev ? rt->dst.dev->name : "*",
- dn_addr2asc(le16_to_cpu(rt->rt_daddr), buf1),
- dn_addr2asc(le16_to_cpu(rt->rt_saddr), buf2),
- atomic_read(&rt->dst.__refcnt),
- rt->dst.__use, 0);
- return 0;
-}
-
-static const struct seq_operations dn_rt_cache_seq_ops = {
- .start = dn_rt_cache_seq_start,
- .next = dn_rt_cache_seq_next,
- .stop = dn_rt_cache_seq_stop,
- .show = dn_rt_cache_seq_show,
-};
-#endif /* CONFIG_PROC_FS */
-
-void __init dn_route_init(void)
-{
- int i, goal, order;
-
- dn_dst_ops.kmem_cachep =
- kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
- dst_entries_init(&dn_dst_ops);
- timer_setup(&dn_route_timer, dn_dst_check_expire, 0);
- dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
- add_timer(&dn_route_timer);
-
- goal = totalram_pages >> (26 - PAGE_SHIFT);
-
- for(order = 0; (1UL << order) < goal; order++)
- /* NOTHING */;
-
- /*
- * Only want 1024 entries max, since the table is very, very unlikely
- * to be larger than that.
- */
- while(order && ((((1UL << order) * PAGE_SIZE) /
- sizeof(struct dn_rt_hash_bucket)) >= 2048))
- order--;
-
- do {
- dn_rt_hash_mask = (1UL << order) * PAGE_SIZE /
- sizeof(struct dn_rt_hash_bucket);
- while(dn_rt_hash_mask & (dn_rt_hash_mask - 1))
- dn_rt_hash_mask--;
- dn_rt_hash_table = (struct dn_rt_hash_bucket *)
- __get_free_pages(GFP_ATOMIC, order);
- } while (dn_rt_hash_table == NULL && --order > 0);
-
- if (!dn_rt_hash_table)
- panic("Failed to allocate DECnet route cache hash table\n");
-
- printk(KERN_INFO
- "DECnet: Routing cache hash table of %u buckets, %ldKbytes\n",
- dn_rt_hash_mask,
- (long)(dn_rt_hash_mask*sizeof(struct dn_rt_hash_bucket))/1024);
-
- dn_rt_hash_mask--;
- for(i = 0; i <= dn_rt_hash_mask; i++) {
- spin_lock_init(&dn_rt_hash_table[i].lock);
- dn_rt_hash_table[i].chain = NULL;
- }
-
- dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1);
-
- proc_create_seq_private("decnet_cache", 0444, init_net.proc_net,
- &dn_rt_cache_seq_ops,
- sizeof(struct dn_rt_cache_iter_state), NULL);
-
-#ifdef CONFIG_DECNET_ROUTER
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE,
- dn_cache_getroute, dn_fib_dump, 0);
-#else
- rtnl_register_module(THIS_MODULE, PF_DECnet, RTM_GETROUTE,
- dn_cache_getroute, dn_cache_dump, 0);
-#endif
-}
-
-void __exit dn_route_cleanup(void)
-{
- del_timer(&dn_route_timer);
- dn_run_flush(NULL);
-
- remove_proc_entry("decnet_cache", init_net.proc_net);
- dst_entries_destroy(&dn_dst_ops);
-}
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
deleted file mode 100644
index 4a4e3c17740c..000000000000
--- a/net/decnet/dn_rules.c
+++ /dev/null
@@ -1,258 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Routing Forwarding Information Base (Rules)
- *
- * Author: Steve Whitehouse <SteveW@ACM.org>
- * Mostly copied from Alexey Kuznetsov's ipv4/fib_rules.c
- *
- *
- * Changes:
- * Steve Whitehouse <steve@chygwyn.com>
- * Updated for Thomas Graf's generic rules
- *
- */
-#include <linux/net.h>
-#include <linux/init.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
-#include <linux/netdevice.h>
-#include <linux/spinlock.h>
-#include <linux/list.h>
-#include <linux/rcupdate.h>
-#include <linux/export.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/fib_rules.h>
-#include <net/dn.h>
-#include <net/dn_fib.h>
-#include <net/dn_neigh.h>
-#include <net/dn_dev.h>
-#include <net/dn_route.h>
-
-static struct fib_rules_ops *dn_fib_rules_ops;
-
-struct dn_fib_rule
-{
- struct fib_rule common;
- unsigned char dst_len;
- unsigned char src_len;
- __le16 src;
- __le16 srcmask;
- __le16 dst;
- __le16 dstmask;
- __le16 srcmap;
- u8 flags;
-};
-
-
-int dn_fib_lookup(struct flowidn *flp, struct dn_fib_res *res)
-{
- struct fib_lookup_arg arg = {
- .result = res,
- };
- int err;
-
- err = fib_rules_lookup(dn_fib_rules_ops,
- flowidn_to_flowi(flp), 0, &arg);
- res->r = arg.rule;
-
- return err;
-}
-
-static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp,
- int flags, struct fib_lookup_arg *arg)
-{
- struct flowidn *fld = &flp->u.dn;
- int err = -EAGAIN;
- struct dn_fib_table *tbl;
-
- switch(rule->action) {
- case FR_ACT_TO_TBL:
- break;
-
- case FR_ACT_UNREACHABLE:
- err = -ENETUNREACH;
- goto errout;
-
- case FR_ACT_PROHIBIT:
- err = -EACCES;
- goto errout;
-
- case FR_ACT_BLACKHOLE:
- default:
- err = -EINVAL;
- goto errout;
- }
-
- tbl = dn_fib_get_table(rule->table, 0);
- if (tbl == NULL)
- goto errout;
-
- err = tbl->lookup(tbl, fld, (struct dn_fib_res *)arg->result);
- if (err > 0)
- err = -EAGAIN;
-errout:
- return err;
-}
-
-static const struct nla_policy dn_fib_rule_policy[FRA_MAX+1] = {
- FRA_GENERIC_POLICY,
-};
-
-static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
-{
- struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
- struct flowidn *fld = &fl->u.dn;
- __le16 daddr = fld->daddr;
- __le16 saddr = fld->saddr;
-
- if (((saddr ^ r->src) & r->srcmask) ||
- ((daddr ^ r->dst) & r->dstmask))
- return 0;
-
- return 1;
-}
-
-static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
- struct fib_rule_hdr *frh,
- struct nlattr **tb,
- struct netlink_ext_ack *extack)
-{
- int err = -EINVAL;
- struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
-
- if (frh->tos) {
- NL_SET_ERR_MSG(extack, "Invalid tos value");
- goto errout;
- }
-
- if (rule->table == RT_TABLE_UNSPEC) {
- if (rule->action == FR_ACT_TO_TBL) {
- struct dn_fib_table *table;
-
- table = dn_fib_empty_table();
- if (table == NULL) {
- err = -ENOBUFS;
- goto errout;
- }
-
- rule->table = table->n;
- }
- }
-
- if (frh->src_len)
- r->src = nla_get_le16(tb[FRA_SRC]);
-
- if (frh->dst_len)
- r->dst = nla_get_le16(tb[FRA_DST]);
-
- r->src_len = frh->src_len;
- r->srcmask = dnet_make_mask(r->src_len);
- r->dst_len = frh->dst_len;
- r->dstmask = dnet_make_mask(r->dst_len);
- err = 0;
-errout:
- return err;
-}
-
-static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
- struct nlattr **tb)
-{
- struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
-
- if (frh->src_len && (r->src_len != frh->src_len))
- return 0;
-
- if (frh->dst_len && (r->dst_len != frh->dst_len))
- return 0;
-
- if (frh->src_len && (r->src != nla_get_le16(tb[FRA_SRC])))
- return 0;
-
- if (frh->dst_len && (r->dst != nla_get_le16(tb[FRA_DST])))
- return 0;
-
- return 1;
-}
-
-unsigned int dnet_addr_type(__le16 addr)
-{
- struct flowidn fld = { .daddr = addr };
- struct dn_fib_res res;
- unsigned int ret = RTN_UNICAST;
- struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0);
-
- res.r = NULL;
-
- if (tb) {
- if (!tb->lookup(tb, &fld, &res)) {
- ret = res.type;
- dn_fib_res_put(&res);
- }
- }
- return ret;
-}
-
-static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
- struct fib_rule_hdr *frh)
-{
- struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
-
- frh->dst_len = r->dst_len;
- frh->src_len = r->src_len;
- frh->tos = 0;
-
- if ((r->dst_len &&
- nla_put_le16(skb, FRA_DST, r->dst)) ||
- (r->src_len &&
- nla_put_le16(skb, FRA_SRC, r->src)))
- goto nla_put_failure;
- return 0;
-
-nla_put_failure:
- return -ENOBUFS;
-}
-
-static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
-{
- dn_rt_cache_flush(-1);
-}
-
-static const struct fib_rules_ops __net_initconst dn_fib_rules_ops_template = {
- .family = AF_DECnet,
- .rule_size = sizeof(struct dn_fib_rule),
- .addr_size = sizeof(u16),
- .action = dn_fib_rule_action,
- .match = dn_fib_rule_match,
- .configure = dn_fib_rule_configure,
- .compare = dn_fib_rule_compare,
- .fill = dn_fib_rule_fill,
- .flush_cache = dn_fib_rule_flush_cache,
- .nlgroup = RTNLGRP_DECnet_RULE,
- .policy = dn_fib_rule_policy,
- .owner = THIS_MODULE,
- .fro_net = &init_net,
-};
-
-void __init dn_fib_rules_init(void)
-{
- dn_fib_rules_ops =
- fib_rules_register(&dn_fib_rules_ops_template, &init_net);
- BUG_ON(IS_ERR(dn_fib_rules_ops));
- BUG_ON(fib_default_rule_add(dn_fib_rules_ops, 0x7fff,
- RT_TABLE_MAIN, 0));
-}
-
-void __exit dn_fib_rules_cleanup(void)
-{
- rtnl_lock();
- fib_rules_unregister(dn_fib_rules_ops);
- rtnl_unlock();
- rcu_barrier();
-}
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
deleted file mode 100644
index f0710b5d037d..000000000000
--- a/net/decnet/dn_table.c
+++ /dev/null
@@ -1,928 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Routing Forwarding Information Base (Routing Tables)
- *
- * Author: Steve Whitehouse <SteveW@ACM.org>
- * Mostly copied from the IPv4 routing code
- *
- *
- * Changes:
- *
- */
-#include <linux/string.h>
-#include <linux/net.h>
-#include <linux/socket.h>
-#include <linux/slab.h>
-#include <linux/sockios.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
-#include <linux/proc_fs.h>
-#include <linux/netdevice.h>
-#include <linux/timer.h>
-#include <linux/spinlock.h>
-#include <linux/atomic.h>
-#include <linux/uaccess.h>
-#include <linux/route.h> /* RTF_xxx */
-#include <net/neighbour.h>
-#include <net/netlink.h>
-#include <net/tcp.h>
-#include <net/dst.h>
-#include <net/flow.h>
-#include <net/fib_rules.h>
-#include <net/dn.h>
-#include <net/dn_route.h>
-#include <net/dn_fib.h>
-#include <net/dn_neigh.h>
-#include <net/dn_dev.h>
-
-struct dn_zone
-{
- struct dn_zone *dz_next;
- struct dn_fib_node **dz_hash;
- int dz_nent;
- int dz_divisor;
- u32 dz_hashmask;
-#define DZ_HASHMASK(dz) ((dz)->dz_hashmask)
- int dz_order;
- __le16 dz_mask;
-#define DZ_MASK(dz) ((dz)->dz_mask)
-};
-
-struct dn_hash
-{
- struct dn_zone *dh_zones[17];
- struct dn_zone *dh_zone_list;
-};
-
-#define dz_key_0(key) ((key).datum = 0)
-
-#define for_nexthops(fi) { int nhsel; const struct dn_fib_nh *nh;\
- for(nhsel = 0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
-
-#define endfor_nexthops(fi) }
-
-#define DN_MAX_DIVISOR 1024
-#define DN_S_ZOMBIE 1
-#define DN_S_ACCESSED 2
-
-#define DN_FIB_SCAN(f, fp) \
-for( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next)
-
-#define DN_FIB_SCAN_KEY(f, fp, key) \
-for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next)
-
-#define RT_TABLE_MIN 1
-#define DN_FIB_TABLE_HASHSZ 256
-static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ];
-static DEFINE_RWLOCK(dn_fib_tables_lock);
-
-static struct kmem_cache *dn_hash_kmem __read_mostly;
-static int dn_fib_hash_zombies;
-
-static inline dn_fib_idx_t dn_hash(dn_fib_key_t key, struct dn_zone *dz)
-{
- u16 h = le16_to_cpu(key.datum)>>(16 - dz->dz_order);
- h ^= (h >> 10);
- h ^= (h >> 6);
- h &= DZ_HASHMASK(dz);
- return *(dn_fib_idx_t *)&h;
-}
-
-static inline dn_fib_key_t dz_key(__le16 dst, struct dn_zone *dz)
-{
- dn_fib_key_t k;
- k.datum = dst & DZ_MASK(dz);
- return k;
-}
-
-static inline struct dn_fib_node **dn_chain_p(dn_fib_key_t key, struct dn_zone *dz)
-{
- return &dz->dz_hash[dn_hash(key, dz).datum];
-}
-
-static inline struct dn_fib_node *dz_chain(dn_fib_key_t key, struct dn_zone *dz)
-{
- return dz->dz_hash[dn_hash(key, dz).datum];
-}
-
-static inline int dn_key_eq(dn_fib_key_t a, dn_fib_key_t b)
-{
- return a.datum == b.datum;
-}
-
-static inline int dn_key_leq(dn_fib_key_t a, dn_fib_key_t b)
-{
- return a.datum <= b.datum;
-}
-
-static inline void dn_rebuild_zone(struct dn_zone *dz,
- struct dn_fib_node **old_ht,
- int old_divisor)
-{
- struct dn_fib_node *f, **fp, *next;
- int i;
-
- for(i = 0; i < old_divisor; i++) {
- for(f = old_ht[i]; f; f = next) {
- next = f->fn_next;
- for(fp = dn_chain_p(f->fn_key, dz);
- *fp && dn_key_leq((*fp)->fn_key, f->fn_key);
- fp = &(*fp)->fn_next)
- /* NOTHING */;
- f->fn_next = *fp;
- *fp = f;
- }
- }
-}
-
-static void dn_rehash_zone(struct dn_zone *dz)
-{
- struct dn_fib_node **ht, **old_ht;
- int old_divisor, new_divisor;
- u32 new_hashmask;
-
- old_divisor = dz->dz_divisor;
-
- switch (old_divisor) {
- case 16:
- new_divisor = 256;
- new_hashmask = 0xFF;
- break;
- default:
- printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n",
- old_divisor);
- /* fall through */
- case 256:
- new_divisor = 1024;
- new_hashmask = 0x3FF;
- break;
- }
-
- ht = kcalloc(new_divisor, sizeof(struct dn_fib_node*), GFP_KERNEL);
- if (ht == NULL)
- return;
-
- write_lock_bh(&dn_fib_tables_lock);
- old_ht = dz->dz_hash;
- dz->dz_hash = ht;
- dz->dz_hashmask = new_hashmask;
- dz->dz_divisor = new_divisor;
- dn_rebuild_zone(dz, old_ht, old_divisor);
- write_unlock_bh(&dn_fib_tables_lock);
- kfree(old_ht);
-}
-
-static void dn_free_node(struct dn_fib_node *f)
-{
- dn_fib_release_info(DN_FIB_INFO(f));
- kmem_cache_free(dn_hash_kmem, f);
-}
-
-
-static struct dn_zone *dn_new_zone(struct dn_hash *table, int z)
-{
- int i;
- struct dn_zone *dz = kzalloc(sizeof(struct dn_zone), GFP_KERNEL);
- if (!dz)
- return NULL;
-
- if (z) {
- dz->dz_divisor = 16;
- dz->dz_hashmask = 0x0F;
- } else {
- dz->dz_divisor = 1;
- dz->dz_hashmask = 0;
- }
-
- dz->dz_hash = kcalloc(dz->dz_divisor, sizeof(struct dn_fib_node *), GFP_KERNEL);
- if (!dz->dz_hash) {
- kfree(dz);
- return NULL;
- }
-
- dz->dz_order = z;
- dz->dz_mask = dnet_make_mask(z);
-
- for(i = z + 1; i <= 16; i++)
- if (table->dh_zones[i])
- break;
-
- write_lock_bh(&dn_fib_tables_lock);
- if (i>16) {
- dz->dz_next = table->dh_zone_list;
- table->dh_zone_list = dz;
- } else {
- dz->dz_next = table->dh_zones[i]->dz_next;
- table->dh_zones[i]->dz_next = dz;
- }
- table->dh_zones[z] = dz;
- write_unlock_bh(&dn_fib_tables_lock);
- return dz;
-}
-
-
-static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct nlattr *attrs[], struct dn_fib_info *fi)
-{
- struct rtnexthop *nhp;
- int nhlen;
-
- if (attrs[RTA_PRIORITY] &&
- nla_get_u32(attrs[RTA_PRIORITY]) != fi->fib_priority)
- return 1;
-
- if (attrs[RTA_OIF] || attrs[RTA_GATEWAY]) {
- if ((!attrs[RTA_OIF] || nla_get_u32(attrs[RTA_OIF]) == fi->fib_nh->nh_oif) &&
- (!attrs[RTA_GATEWAY] || nla_get_le16(attrs[RTA_GATEWAY]) != fi->fib_nh->nh_gw))
- return 0;
- return 1;
- }
-
- if (!attrs[RTA_MULTIPATH])
- return 0;
-
- nhp = nla_data(attrs[RTA_MULTIPATH]);
- nhlen = nla_len(attrs[RTA_MULTIPATH]);
-
- for_nexthops(fi) {
- int attrlen = nhlen - sizeof(struct rtnexthop);
- __le16 gw;
-
- if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
- return -EINVAL;
- if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
- return 1;
- if (attrlen) {
- struct nlattr *gw_attr;
-
- gw_attr = nla_find((struct nlattr *) (nhp + 1), attrlen, RTA_GATEWAY);
- gw = gw_attr ? nla_get_le16(gw_attr) : 0;
-
- if (gw && gw != nh->nh_gw)
- return 1;
- }
- nhp = RTNH_NEXT(nhp);
- } endfor_nexthops(fi);
-
- return 0;
-}
-
-static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi)
-{
- size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
- + nla_total_size(4) /* RTA_TABLE */
- + nla_total_size(2) /* RTA_DST */
- + nla_total_size(4) /* RTA_PRIORITY */
- + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
-
- /* space for nested metrics */
- payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
-
- if (fi->fib_nhs) {
- /* Also handles the special case fib_nhs == 1 */
-
- /* each nexthop is packed in an attribute */
- size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
-
- /* may contain a gateway attribute */
- nhsize += nla_total_size(4);
-
- /* all nexthops are packed in a nested attribute */
- payload += nla_total_size(fi->fib_nhs * nhsize);
- }
-
- return payload;
-}
-
-static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
- u32 tb_id, u8 type, u8 scope, void *dst, int dst_len,
- struct dn_fib_info *fi, unsigned int flags)
-{
- struct rtmsg *rtm;
- struct nlmsghdr *nlh;
-
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
- if (!nlh)
- return -EMSGSIZE;
-
- rtm = nlmsg_data(nlh);
- rtm->rtm_family = AF_DECnet;
- rtm->rtm_dst_len = dst_len;
- rtm->rtm_src_len = 0;
- rtm->rtm_tos = 0;
- rtm->rtm_table = tb_id;
- rtm->rtm_flags = fi->fib_flags;
- rtm->rtm_scope = scope;
- rtm->rtm_type = type;
- rtm->rtm_protocol = fi->fib_protocol;
-
- if (nla_put_u32(skb, RTA_TABLE, tb_id) < 0)
- goto errout;
-
- if (rtm->rtm_dst_len &&
- nla_put(skb, RTA_DST, 2, dst) < 0)
- goto errout;
-
- if (fi->fib_priority &&
- nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority) < 0)
- goto errout;
-
- if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
- goto errout;
-
- if (fi->fib_nhs == 1) {
- if (fi->fib_nh->nh_gw &&
- nla_put_le16(skb, RTA_GATEWAY, fi->fib_nh->nh_gw) < 0)
- goto errout;
-
- if (fi->fib_nh->nh_oif &&
- nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif) < 0)
- goto errout;
- }
-
- if (fi->fib_nhs > 1) {
- struct rtnexthop *nhp;
- struct nlattr *mp_head;
-
- if (!(mp_head = nla_nest_start(skb, RTA_MULTIPATH)))
- goto errout;
-
- for_nexthops(fi) {
- if (!(nhp = nla_reserve_nohdr(skb, sizeof(*nhp))))
- goto errout;
-
- nhp->rtnh_flags = nh->nh_flags & 0xFF;
- nhp->rtnh_hops = nh->nh_weight - 1;
- nhp->rtnh_ifindex = nh->nh_oif;
-
- if (nh->nh_gw &&
- nla_put_le16(skb, RTA_GATEWAY, nh->nh_gw) < 0)
- goto errout;
-
- nhp->rtnh_len = skb_tail_pointer(skb) - (unsigned char *)nhp;
- } endfor_nexthops(fi);
-
- nla_nest_end(skb, mp_head);
- }
-
- nlmsg_end(skb, nlh);
- return 0;
-
-errout:
- nlmsg_cancel(skb, nlh);
- return -EMSGSIZE;
-}
-
-
-static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
- struct nlmsghdr *nlh, struct netlink_skb_parms *req)
-{
- struct sk_buff *skb;
- u32 portid = req ? req->portid : 0;
- int err = -ENOBUFS;
-
- skb = nlmsg_new(dn_fib_nlmsg_size(DN_FIB_INFO(f)), GFP_KERNEL);
- if (skb == NULL)
- goto errout;
-
- err = dn_fib_dump_info(skb, portid, nlh->nlmsg_seq, event, tb_id,
- f->fn_type, f->fn_scope, &f->fn_key, z,
- DN_FIB_INFO(f), 0);
- if (err < 0) {
- /* -EMSGSIZE implies BUG in dn_fib_nlmsg_size() */
- WARN_ON(err == -EMSGSIZE);
- kfree_skb(skb);
- goto errout;
- }
- rtnl_notify(skb, &init_net, portid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
- return;
-errout:
- if (err < 0)
- rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err);
-}
-
-static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb,
- struct netlink_callback *cb,
- struct dn_fib_table *tb,
- struct dn_zone *dz,
- struct dn_fib_node *f)
-{
- int i, s_i;
-
- s_i = cb->args[4];
- for(i = 0; f; i++, f = f->fn_next) {
- if (i < s_i)
- continue;
- if (f->fn_state & DN_S_ZOMBIE)
- continue;
- if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWROUTE,
- tb->n,
- (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type,
- f->fn_scope, &f->fn_key, dz->dz_order,
- f->fn_info, NLM_F_MULTI) < 0) {
- cb->args[4] = i;
- return -1;
- }
- }
- cb->args[4] = i;
- return skb->len;
-}
-
-static __inline__ int dn_hash_dump_zone(struct sk_buff *skb,
- struct netlink_callback *cb,
- struct dn_fib_table *tb,
- struct dn_zone *dz)
-{
- int h, s_h;
-
- s_h = cb->args[3];
- for(h = 0; h < dz->dz_divisor; h++) {
- if (h < s_h)
- continue;
- if (h > s_h)
- memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0]));
- if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL)
- continue;
- if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) {
- cb->args[3] = h;
- return -1;
- }
- }
- cb->args[3] = h;
- return skb->len;
-}
-
-static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb,
- struct netlink_callback *cb)
-{
- int m, s_m;
- struct dn_zone *dz;
- struct dn_hash *table = (struct dn_hash *)tb->data;
-
- s_m = cb->args[2];
- read_lock(&dn_fib_tables_lock);
- for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) {
- if (m < s_m)
- continue;
- if (m > s_m)
- memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0]));
-
- if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) {
- cb->args[2] = m;
- read_unlock(&dn_fib_tables_lock);
- return -1;
- }
- }
- read_unlock(&dn_fib_tables_lock);
- cb->args[2] = m;
-
- return skb->len;
-}
-
-int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct net *net = sock_net(skb->sk);
- unsigned int h, s_h;
- unsigned int e = 0, s_e;
- struct dn_fib_table *tb;
- int dumped = 0;
-
- if (!net_eq(net, &init_net))
- return 0;
-
- if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) &&
- ((struct rtmsg *)nlmsg_data(cb->nlh))->rtm_flags&RTM_F_CLONED)
- return dn_cache_dump(skb, cb);
-
- s_h = cb->args[0];
- s_e = cb->args[1];
-
- for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) {
- e = 0;
- hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist) {
- if (e < s_e)
- goto next;
- if (dumped)
- memset(&cb->args[2], 0, sizeof(cb->args) -
- 2 * sizeof(cb->args[0]));
- if (tb->dump(tb, skb, cb) < 0)
- goto out;
- dumped = 1;
-next:
- e++;
- }
- }
-out:
- cb->args[1] = e;
- cb->args[0] = h;
-
- return skb->len;
-}
-
-static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[],
- struct nlmsghdr *n, struct netlink_skb_parms *req)
-{
- struct dn_hash *table = (struct dn_hash *)tb->data;
- struct dn_fib_node *new_f, *f, **fp, **del_fp;
- struct dn_zone *dz;
- struct dn_fib_info *fi;
- int z = r->rtm_dst_len;
- int type = r->rtm_type;
- dn_fib_key_t key;
- int err;
-
- if (z > 16)
- return -EINVAL;
-
- dz = table->dh_zones[z];
- if (!dz && !(dz = dn_new_zone(table, z)))
- return -ENOBUFS;
-
- dz_key_0(key);
- if (attrs[RTA_DST]) {
- __le16 dst = nla_get_le16(attrs[RTA_DST]);
- if (dst & ~DZ_MASK(dz))
- return -EINVAL;
- key = dz_key(dst, dz);
- }
-
- if ((fi = dn_fib_create_info(r, attrs, n, &err)) == NULL)
- return err;
-
- if (dz->dz_nent > (dz->dz_divisor << 2) &&
- dz->dz_divisor > DN_MAX_DIVISOR &&
- (z==16 || (1<<z) > dz->dz_divisor))
- dn_rehash_zone(dz);
-
- fp = dn_chain_p(key, dz);
-
- DN_FIB_SCAN(f, fp) {
- if (dn_key_leq(key, f->fn_key))
- break;
- }
-
- del_fp = NULL;
-
- if (f && (f->fn_state & DN_S_ZOMBIE) &&
- dn_key_eq(f->fn_key, key)) {
- del_fp = fp;
- fp = &f->fn_next;
- f = *fp;
- goto create;
- }
-
- DN_FIB_SCAN_KEY(f, fp, key) {
- if (fi->fib_priority <= DN_FIB_INFO(f)->fib_priority)
- break;
- }
-
- if (f && dn_key_eq(f->fn_key, key) &&
- fi->fib_priority == DN_FIB_INFO(f)->fib_priority) {
- struct dn_fib_node **ins_fp;
-
- err = -EEXIST;
- if (n->nlmsg_flags & NLM_F_EXCL)
- goto out;
-
- if (n->nlmsg_flags & NLM_F_REPLACE) {
- del_fp = fp;
- fp = &f->fn_next;
- f = *fp;
- goto replace;
- }
-
- ins_fp = fp;
- err = -EEXIST;
-
- DN_FIB_SCAN_KEY(f, fp, key) {
- if (fi->fib_priority != DN_FIB_INFO(f)->fib_priority)
- break;
- if (f->fn_type == type &&
- f->fn_scope == r->rtm_scope &&
- DN_FIB_INFO(f) == fi)
- goto out;
- }
-
- if (!(n->nlmsg_flags & NLM_F_APPEND)) {
- fp = ins_fp;
- f = *fp;
- }
- }
-
-create:
- err = -ENOENT;
- if (!(n->nlmsg_flags & NLM_F_CREATE))
- goto out;
-
-replace:
- err = -ENOBUFS;
- new_f = kmem_cache_zalloc(dn_hash_kmem, GFP_KERNEL);
- if (new_f == NULL)
- goto out;
-
- new_f->fn_key = key;
- new_f->fn_type = type;
- new_f->fn_scope = r->rtm_scope;
- DN_FIB_INFO(new_f) = fi;
-
- new_f->fn_next = f;
- write_lock_bh(&dn_fib_tables_lock);
- *fp = new_f;
- write_unlock_bh(&dn_fib_tables_lock);
- dz->dz_nent++;
-
- if (del_fp) {
- f = *del_fp;
- write_lock_bh(&dn_fib_tables_lock);
- *del_fp = f->fn_next;
- write_unlock_bh(&dn_fib_tables_lock);
-
- if (!(f->fn_state & DN_S_ZOMBIE))
- dn_rtmsg_fib(RTM_DELROUTE, f, z, tb->n, n, req);
- if (f->fn_state & DN_S_ACCESSED)
- dn_rt_cache_flush(-1);
- dn_free_node(f);
- dz->dz_nent--;
- } else {
- dn_rt_cache_flush(-1);
- }
-
- dn_rtmsg_fib(RTM_NEWROUTE, new_f, z, tb->n, n, req);
-
- return 0;
-out:
- dn_fib_release_info(fi);
- return err;
-}
-
-
-static int dn_fib_table_delete(struct dn_fib_table *tb, struct rtmsg *r, struct nlattr *attrs[],
- struct nlmsghdr *n, struct netlink_skb_parms *req)
-{
- struct dn_hash *table = (struct dn_hash*)tb->data;
- struct dn_fib_node **fp, **del_fp, *f;
- int z = r->rtm_dst_len;
- struct dn_zone *dz;
- dn_fib_key_t key;
- int matched;
-
-
- if (z > 16)
- return -EINVAL;
-
- if ((dz = table->dh_zones[z]) == NULL)
- return -ESRCH;
-
- dz_key_0(key);
- if (attrs[RTA_DST]) {
- __le16 dst = nla_get_le16(attrs[RTA_DST]);
- if (dst & ~DZ_MASK(dz))
- return -EINVAL;
- key = dz_key(dst, dz);
- }
-
- fp = dn_chain_p(key, dz);
-
- DN_FIB_SCAN(f, fp) {
- if (dn_key_eq(f->fn_key, key))
- break;
- if (dn_key_leq(key, f->fn_key))
- return -ESRCH;
- }
-
- matched = 0;
- del_fp = NULL;
- DN_FIB_SCAN_KEY(f, fp, key) {
- struct dn_fib_info *fi = DN_FIB_INFO(f);
-
- if (f->fn_state & DN_S_ZOMBIE)
- return -ESRCH;
-
- matched++;
-
- if (del_fp == NULL &&
- (!r->rtm_type || f->fn_type == r->rtm_type) &&
- (r->rtm_scope == RT_SCOPE_NOWHERE || f->fn_scope == r->rtm_scope) &&
- (!r->rtm_protocol ||
- fi->fib_protocol == r->rtm_protocol) &&
- dn_fib_nh_match(r, n, attrs, fi) == 0)
- del_fp = fp;
- }
-
- if (del_fp) {
- f = *del_fp;
- dn_rtmsg_fib(RTM_DELROUTE, f, z, tb->n, n, req);
-
- if (matched != 1) {
- write_lock_bh(&dn_fib_tables_lock);
- *del_fp = f->fn_next;
- write_unlock_bh(&dn_fib_tables_lock);
-
- if (f->fn_state & DN_S_ACCESSED)
- dn_rt_cache_flush(-1);
- dn_free_node(f);
- dz->dz_nent--;
- } else {
- f->fn_state |= DN_S_ZOMBIE;
- if (f->fn_state & DN_S_ACCESSED) {
- f->fn_state &= ~DN_S_ACCESSED;
- dn_rt_cache_flush(-1);
- }
- if (++dn_fib_hash_zombies > 128)
- dn_fib_flush();
- }
-
- return 0;
- }
-
- return -ESRCH;
-}
-
-static inline int dn_flush_list(struct dn_fib_node **fp, int z, struct dn_hash *table)
-{
- int found = 0;
- struct dn_fib_node *f;
-
- while((f = *fp) != NULL) {
- struct dn_fib_info *fi = DN_FIB_INFO(f);
-
- if (fi && ((f->fn_state & DN_S_ZOMBIE) || (fi->fib_flags & RTNH_F_DEAD))) {
- write_lock_bh(&dn_fib_tables_lock);
- *fp = f->fn_next;
- write_unlock_bh(&dn_fib_tables_lock);
-
- dn_free_node(f);
- found++;
- continue;
- }
- fp = &f->fn_next;
- }
-
- return found;
-}
-
-static int dn_fib_table_flush(struct dn_fib_table *tb)
-{
- struct dn_hash *table = (struct dn_hash *)tb->data;
- struct dn_zone *dz;
- int found = 0;
-
- dn_fib_hash_zombies = 0;
- for(dz = table->dh_zone_list; dz; dz = dz->dz_next) {
- int i;
- int tmp = 0;
- for(i = dz->dz_divisor-1; i >= 0; i--)
- tmp += dn_flush_list(&dz->dz_hash[i], dz->dz_order, table);
- dz->dz_nent -= tmp;
- found += tmp;
- }
-
- return found;
-}
-
-static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowidn *flp, struct dn_fib_res *res)
-{
- int err;
- struct dn_zone *dz;
- struct dn_hash *t = (struct dn_hash *)tb->data;
-
- read_lock(&dn_fib_tables_lock);
- for(dz = t->dh_zone_list; dz; dz = dz->dz_next) {
- struct dn_fib_node *f;
- dn_fib_key_t k = dz_key(flp->daddr, dz);
-
- for(f = dz_chain(k, dz); f; f = f->fn_next) {
- if (!dn_key_eq(k, f->fn_key)) {
- if (dn_key_leq(k, f->fn_key))
- break;
- else
- continue;
- }
-
- f->fn_state |= DN_S_ACCESSED;
-
- if (f->fn_state&DN_S_ZOMBIE)
- continue;
-
- if (f->fn_scope < flp->flowidn_scope)
- continue;
-
- err = dn_fib_semantic_match(f->fn_type, DN_FIB_INFO(f), flp, res);
-
- if (err == 0) {
- res->type = f->fn_type;
- res->scope = f->fn_scope;
- res->prefixlen = dz->dz_order;
- goto out;
- }
- if (err < 0)
- goto out;
- }
- }
- err = 1;
-out:
- read_unlock(&dn_fib_tables_lock);
- return err;
-}
-
-
-struct dn_fib_table *dn_fib_get_table(u32 n, int create)
-{
- struct dn_fib_table *t;
- unsigned int h;
-
- if (n < RT_TABLE_MIN)
- return NULL;
-
- if (n > RT_TABLE_MAX)
- return NULL;
-
- h = n & (DN_FIB_TABLE_HASHSZ - 1);
- rcu_read_lock();
- hlist_for_each_entry_rcu(t, &dn_fib_table_hash[h], hlist) {
- if (t->n == n) {
- rcu_read_unlock();
- return t;
- }
- }
- rcu_read_unlock();
-
- if (!create)
- return NULL;
-
- if (in_interrupt()) {
- net_dbg_ratelimited("DECnet: BUG! Attempt to create routing table from interrupt\n");
- return NULL;
- }
-
- t = kzalloc(sizeof(struct dn_fib_table) + sizeof(struct dn_hash),
- GFP_KERNEL);
- if (t == NULL)
- return NULL;
-
- t->n = n;
- t->insert = dn_fib_table_insert;
- t->delete = dn_fib_table_delete;
- t->lookup = dn_fib_table_lookup;
- t->flush = dn_fib_table_flush;
- t->dump = dn_fib_table_dump;
- hlist_add_head_rcu(&t->hlist, &dn_fib_table_hash[h]);
-
- return t;
-}
-
-struct dn_fib_table *dn_fib_empty_table(void)
-{
- u32 id;
-
- for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++)
- if (dn_fib_get_table(id, 0) == NULL)
- return dn_fib_get_table(id, 1);
- return NULL;
-}
-
-void dn_fib_flush(void)
-{
- int flushed = 0;
- struct dn_fib_table *tb;
- unsigned int h;
-
- for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
- hlist_for_each_entry(tb, &dn_fib_table_hash[h], hlist)
- flushed += tb->flush(tb);
- }
-
- if (flushed)
- dn_rt_cache_flush(-1);
-}
-
-void __init dn_fib_table_init(void)
-{
- dn_hash_kmem = kmem_cache_create("dn_fib_info_cache",
- sizeof(struct dn_fib_info),
- 0, SLAB_HWCACHE_ALIGN,
- NULL);
-}
-
-void __exit dn_fib_table_cleanup(void)
-{
- struct dn_fib_table *t;
- struct hlist_node *next;
- unsigned int h;
-
- write_lock(&dn_fib_tables_lock);
- for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) {
- hlist_for_each_entry_safe(t, next, &dn_fib_table_hash[h],
- hlist) {
- hlist_del(&t->hlist);
- kfree(t);
- }
- }
- write_unlock(&dn_fib_tables_lock);
-}
diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c
deleted file mode 100644
index aa4155875ca8..000000000000
--- a/net/decnet/dn_timer.c
+++ /dev/null
@@ -1,104 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Socket Timer Functions
- *
- * Author: Steve Whitehouse <SteveW@ACM.org>
- *
- *
- * Changes:
- * Steve Whitehouse : Made keepalive timer part of the same
- * timer idea.
- * Steve Whitehouse : Added checks for sk->sock_readers
- * David S. Miller : New socket locking
- * Steve Whitehouse : Timer grabs socket ref.
- */
-#include <linux/net.h>
-#include <linux/socket.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/timer.h>
-#include <linux/spinlock.h>
-#include <net/sock.h>
-#include <linux/atomic.h>
-#include <linux/jiffies.h>
-#include <net/flow.h>
-#include <net/dn.h>
-
-/*
- * Slow timer is for everything else (n * 500mS)
- */
-
-#define SLOW_INTERVAL (HZ/2)
-
-static void dn_slow_timer(struct timer_list *t);
-
-void dn_start_slow_timer(struct sock *sk)
-{
- timer_setup(&sk->sk_timer, dn_slow_timer, 0);
- sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL);
-}
-
-void dn_stop_slow_timer(struct sock *sk)
-{
- sk_stop_timer(sk, &sk->sk_timer);
-}
-
-static void dn_slow_timer(struct timer_list *t)
-{
- struct sock *sk = from_timer(sk, t, sk_timer);
- struct dn_scp *scp = DN_SK(sk);
-
- bh_lock_sock(sk);
-
- if (sock_owned_by_user(sk)) {
- sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 10);
- goto out;
- }
-
- /*
- * The persist timer is the standard slow timer used for retransmits
- * in both connection establishment and disconnection as well as
- * in the RUN state. The different states are catered for by changing
- * the function pointer in the socket. Setting the timer to a value
- * of zero turns it off. We allow the persist_fxn to turn the
- * timer off in a permant way by returning non-zero, so that
- * timer based routines may remove sockets. This is why we have a
- * sock_hold()/sock_put() around the timer to prevent the socket
- * going away in the middle.
- */
- if (scp->persist && scp->persist_fxn) {
- if (scp->persist <= SLOW_INTERVAL) {
- scp->persist = 0;
-
- if (scp->persist_fxn(sk))
- goto out;
- } else {
- scp->persist -= SLOW_INTERVAL;
- }
- }
-
- /*
- * Check for keepalive timeout. After the other timer 'cos if
- * the previous timer caused a retransmit, we don't need to
- * do this. scp->stamp is the last time that we sent a packet.
- * The keepalive function sends a link service packet to the
- * other end. If it remains unacknowledged, the standard
- * socket timers will eventually shut the socket down. Each
- * time we do this, scp->stamp will be updated, thus
- * we won't try and send another until scp->keepalive has passed
- * since the last successful transmission.
- */
- if (scp->keepalive && scp->keepalive_fxn && (scp->state == DN_RUN)) {
- if (time_after_eq(jiffies, scp->stamp + scp->keepalive))
- scp->keepalive_fxn(sk);
- }
-
- sk_reset_timer(sk, &sk->sk_timer, jiffies + SLOW_INTERVAL);
-out:
- bh_unlock_sock(sk);
- sock_put(sk);
-}
diff --git a/net/decnet/netfilter/Kconfig b/net/decnet/netfilter/Kconfig
deleted file mode 100644
index 8d7c109d5109..000000000000
--- a/net/decnet/netfilter/Kconfig
+++ /dev/null
@@ -1,16 +0,0 @@
-#
-# DECnet netfilter configuration
-#
-
-menu "DECnet: Netfilter Configuration"
- depends on DECNET && NETFILTER
- depends on NETFILTER_ADVANCED
-
-config DECNET_NF_GRABULATOR
- tristate "Routing message grabulator (for userland routing daemon)"
- help
- Enable this module if you want to use the userland DECnet routing
- daemon. You will also need to enable routing support for DECnet
- unless you just want to monitor routing messages from other nodes.
-
-endmenu
diff --git a/net/decnet/netfilter/Makefile b/net/decnet/netfilter/Makefile
deleted file mode 100644
index b579e52130aa..000000000000
--- a/net/decnet/netfilter/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-#
-# Makefile for DECnet netfilter modules
-#
-
-obj-$(CONFIG_DECNET_NF_GRABULATOR) += dn_rtmsg.o
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
deleted file mode 100644
index a4faacadd8a8..000000000000
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ /dev/null
@@ -1,160 +0,0 @@
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet Routing Message Grabulator
- *
- * (C) 2000 ChyGwyn Limited - http://www.chygwyn.com/
- * This code may be copied under the GPL v.2 or at your option
- * any later version.
- *
- * Author: Steven Whitehouse <steve@chygwyn.com>
- *
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include <linux/netfilter.h>
-#include <linux/spinlock.h>
-#include <net/netlink.h>
-#include <linux/netfilter_decnet.h>
-
-#include <net/sock.h>
-#include <net/flow.h>
-#include <net/dn.h>
-#include <net/dn_route.h>
-
-static struct sock *dnrmg = NULL;
-
-
-static struct sk_buff *dnrmg_build_message(struct sk_buff *rt_skb, int *errp)
-{
- struct sk_buff *skb = NULL;
- size_t size;
- sk_buff_data_t old_tail;
- struct nlmsghdr *nlh;
- unsigned char *ptr;
- struct nf_dn_rtmsg *rtm;
-
- size = NLMSG_ALIGN(rt_skb->len) +
- NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg));
- skb = nlmsg_new(size, GFP_ATOMIC);
- if (!skb) {
- *errp = -ENOMEM;
- return NULL;
- }
- old_tail = skb->tail;
- nlh = nlmsg_put(skb, 0, 0, 0, size, 0);
- if (!nlh) {
- kfree_skb(skb);
- *errp = -ENOMEM;
- return NULL;
- }
- rtm = (struct nf_dn_rtmsg *)nlmsg_data(nlh);
- rtm->nfdn_ifindex = rt_skb->dev->ifindex;
- ptr = NFDN_RTMSG(rtm);
- skb_copy_from_linear_data(rt_skb, ptr, rt_skb->len);
- nlh->nlmsg_len = skb->tail - old_tail;
- return skb;
-}
-
-static void dnrmg_send_peer(struct sk_buff *skb)
-{
- struct sk_buff *skb2;
- int status = 0;
- int group = 0;
- unsigned char flags = *skb->data;
-
- switch (flags & DN_RT_CNTL_MSK) {
- case DN_RT_PKT_L1RT:
- group = DNRNG_NLGRP_L1;
- break;
- case DN_RT_PKT_L2RT:
- group = DNRNG_NLGRP_L2;
- break;
- default:
- return;
- }
-
- skb2 = dnrmg_build_message(skb, &status);
- if (skb2 == NULL)
- return;
- NETLINK_CB(skb2).dst_group = group;
- netlink_broadcast(dnrmg, skb2, 0, group, GFP_ATOMIC);
-}
-
-
-static unsigned int dnrmg_hook(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- dnrmg_send_peer(skb);
- return NF_ACCEPT;
-}
-
-
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err), NULL); return; } while (0)
-
-static inline void dnrmg_receive_user_skb(struct sk_buff *skb)
-{
- struct nlmsghdr *nlh = nlmsg_hdr(skb);
-
- if (skb->len < sizeof(*nlh) ||
- nlh->nlmsg_len < sizeof(*nlh) ||
- skb->len < nlh->nlmsg_len)
- return;
-
- if (!netlink_capable(skb, CAP_NET_ADMIN))
- RCV_SKB_FAIL(-EPERM);
-
- /* Eventually we might send routing messages too */
-
- RCV_SKB_FAIL(-EINVAL);
-}
-
-static const struct nf_hook_ops dnrmg_ops = {
- .hook = dnrmg_hook,
- .pf = NFPROTO_DECNET,
- .hooknum = NF_DN_ROUTE,
- .priority = NF_DN_PRI_DNRTMSG,
-};
-
-static int __init dn_rtmsg_init(void)
-{
- int rv = 0;
- struct netlink_kernel_cfg cfg = {
- .groups = DNRNG_NLGRP_MAX,
- .input = dnrmg_receive_user_skb,
- };
-
- dnrmg = netlink_kernel_create(&init_net, NETLINK_DNRTMSG, &cfg);
- if (dnrmg == NULL) {
- printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket");
- return -ENOMEM;
- }
-
- rv = nf_register_net_hook(&init_net, &dnrmg_ops);
- if (rv) {
- netlink_kernel_release(dnrmg);
- }
-
- return rv;
-}
-
-static void __exit dn_rtmsg_fini(void)
-{
- nf_unregister_net_hook(&init_net, &dnrmg_ops);
- netlink_kernel_release(dnrmg);
-}
-
-
-MODULE_DESCRIPTION("DECnet Routing Message Grabulator");
-MODULE_AUTHOR("Steven Whitehouse <steve@chygwyn.com>");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_DNRTMSG);
-
-module_init(dn_rtmsg_init);
-module_exit(dn_rtmsg_fini);
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
deleted file mode 100644
index 55bf64a22b59..000000000000
--- a/net/decnet/sysctl_net_decnet.c
+++ /dev/null
@@ -1,373 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DECnet An implementation of the DECnet protocol suite for the LINUX
- * operating system. DECnet is implemented using the BSD Socket
- * interface as the means of communication with the user level.
- *
- * DECnet sysctl support functions
- *
- * Author: Steve Whitehouse <SteveW@ACM.org>
- *
- *
- * Changes:
- * Steve Whitehouse - C99 changes and default device handling
- * Steve Whitehouse - Memory buffer settings, like the tcp ones
- *
- */
-#include <linux/mm.h>
-#include <linux/sysctl.h>
-#include <linux/fs.h>
-#include <linux/netdevice.h>
-#include <linux/string.h>
-#include <net/neighbour.h>
-#include <net/dst.h>
-#include <net/flow.h>
-
-#include <linux/uaccess.h>
-
-#include <net/dn.h>
-#include <net/dn_dev.h>
-#include <net/dn_route.h>
-
-
-int decnet_debug_level;
-int decnet_time_wait = 30;
-int decnet_dn_count = 1;
-int decnet_di_count = 3;
-int decnet_dr_count = 3;
-int decnet_log_martians = 1;
-int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW;
-
-/* Reasonable defaults, I hope, based on tcp's defaults */
-long sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
-int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
-int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
-
-#ifdef CONFIG_SYSCTL
-extern int decnet_dst_gc_interval;
-static int min_decnet_time_wait[] = { 5 };
-static int max_decnet_time_wait[] = { 600 };
-static int min_state_count[] = { 1 };
-static int max_state_count[] = { NSP_MAXRXTSHIFT };
-static int min_decnet_dst_gc_interval[] = { 1 };
-static int max_decnet_dst_gc_interval[] = { 60 };
-static int min_decnet_no_fc_max_cwnd[] = { NSP_MIN_WINDOW };
-static int max_decnet_no_fc_max_cwnd[] = { NSP_MAX_WINDOW };
-static char node_name[7] = "???";
-
-static struct ctl_table_header *dn_table_header = NULL;
-
-/*
- * ctype.h :-)
- */
-#define ISNUM(x) (((x) >= '0') && ((x) <= '9'))
-#define ISLOWER(x) (((x) >= 'a') && ((x) <= 'z'))
-#define ISUPPER(x) (((x) >= 'A') && ((x) <= 'Z'))
-#define ISALPHA(x) (ISLOWER(x) || ISUPPER(x))
-#define INVALID_END_CHAR(x) (ISNUM(x) || ISALPHA(x))
-
-static void strip_it(char *str)
-{
- for(;;) {
- switch (*str) {
- case ' ':
- case '\n':
- case '\r':
- case ':':
- *str = 0;
- /* Fallthrough */
- case 0:
- return;
- }
- str++;
- }
-}
-
-/*
- * Simple routine to parse an ascii DECnet address
- * into a network order address.
- */
-static int parse_addr(__le16 *addr, char *str)
-{
- __u16 area, node;
-
- while(*str && !ISNUM(*str)) str++;
-
- if (*str == 0)
- return -1;
-
- area = (*str++ - '0');
- if (ISNUM(*str)) {
- area *= 10;
- area += (*str++ - '0');
- }
-
- if (*str++ != '.')
- return -1;
-
- if (!ISNUM(*str))
- return -1;
-
- node = *str++ - '0';
- if (ISNUM(*str)) {
- node *= 10;
- node += (*str++ - '0');
- }
- if (ISNUM(*str)) {
- node *= 10;
- node += (*str++ - '0');
- }
- if (ISNUM(*str)) {
- node *= 10;
- node += (*str++ - '0');
- }
-
- if ((node > 1023) || (area > 63))
- return -1;
-
- if (INVALID_END_CHAR(*str))
- return -1;
-
- *addr = cpu_to_le16((area << 10) | node);
-
- return 0;
-}
-
-static int dn_node_address_handler(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos)
-{
- char addr[DN_ASCBUF_LEN];
- size_t len;
- __le16 dnaddr;
-
- if (!*lenp || (*ppos && !write)) {
- *lenp = 0;
- return 0;
- }
-
- if (write) {
- len = (*lenp < DN_ASCBUF_LEN) ? *lenp : (DN_ASCBUF_LEN-1);
-
- if (copy_from_user(addr, buffer, len))
- return -EFAULT;
-
- addr[len] = 0;
- strip_it(addr);
-
- if (parse_addr(&dnaddr, addr))
- return -EINVAL;
-
- dn_dev_devices_off();
-
- decnet_address = dnaddr;
-
- dn_dev_devices_on();
-
- *ppos += len;
-
- return 0;
- }
-
- dn_addr2asc(le16_to_cpu(decnet_address), addr);
- len = strlen(addr);
- addr[len++] = '\n';
-
- if (len > *lenp) len = *lenp;
-
- if (copy_to_user(buffer, addr, len))
- return -EFAULT;
-
- *lenp = len;
- *ppos += len;
-
- return 0;
-}
-
-static int dn_def_dev_handler(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos)
-{
- size_t len;
- struct net_device *dev;
- char devname[17];
-
- if (!*lenp || (*ppos && !write)) {
- *lenp = 0;
- return 0;
- }
-
- if (write) {
- if (*lenp > 16)
- return -E2BIG;
-
- if (copy_from_user(devname, buffer, *lenp))
- return -EFAULT;
-
- devname[*lenp] = 0;
- strip_it(devname);
-
- dev = dev_get_by_name(&init_net, devname);
- if (dev == NULL)
- return -ENODEV;
-
- if (dev->dn_ptr == NULL) {
- dev_put(dev);
- return -ENODEV;
- }
-
- if (dn_dev_set_default(dev, 1)) {
- dev_put(dev);
- return -ENODEV;
- }
- *ppos += *lenp;
-
- return 0;
- }
-
- dev = dn_dev_get_default();
- if (dev == NULL) {
- *lenp = 0;
- return 0;
- }
-
- strcpy(devname, dev->name);
- dev_put(dev);
- len = strlen(devname);
- devname[len++] = '\n';
-
- if (len > *lenp) len = *lenp;
-
- if (copy_to_user(buffer, devname, len))
- return -EFAULT;
-
- *lenp = len;
- *ppos += len;
-
- return 0;
-}
-
-static struct ctl_table dn_table[] = {
- {
- .procname = "node_address",
- .maxlen = 7,
- .mode = 0644,
- .proc_handler = dn_node_address_handler,
- },
- {
- .procname = "node_name",
- .data = node_name,
- .maxlen = 7,
- .mode = 0644,
- .proc_handler = proc_dostring,
- },
- {
- .procname = "default_device",
- .maxlen = 16,
- .mode = 0644,
- .proc_handler = dn_def_dev_handler,
- },
- {
- .procname = "time_wait",
- .data = &decnet_time_wait,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_decnet_time_wait,
- .extra2 = &max_decnet_time_wait
- },
- {
- .procname = "dn_count",
- .data = &decnet_dn_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_state_count,
- .extra2 = &max_state_count
- },
- {
- .procname = "di_count",
- .data = &decnet_di_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_state_count,
- .extra2 = &max_state_count
- },
- {
- .procname = "dr_count",
- .data = &decnet_dr_count,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_state_count,
- .extra2 = &max_state_count
- },
- {
- .procname = "dst_gc_interval",
- .data = &decnet_dst_gc_interval,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_decnet_dst_gc_interval,
- .extra2 = &max_decnet_dst_gc_interval
- },
- {
- .procname = "no_fc_max_cwnd",
- .data = &decnet_no_fc_max_cwnd,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &min_decnet_no_fc_max_cwnd,
- .extra2 = &max_decnet_no_fc_max_cwnd
- },
- {
- .procname = "decnet_mem",
- .data = &sysctl_decnet_mem,
- .maxlen = sizeof(sysctl_decnet_mem),
- .mode = 0644,
- .proc_handler = proc_doulongvec_minmax
- },
- {
- .procname = "decnet_rmem",
- .data = &sysctl_decnet_rmem,
- .maxlen = sizeof(sysctl_decnet_rmem),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "decnet_wmem",
- .data = &sysctl_decnet_wmem,
- .maxlen = sizeof(sysctl_decnet_wmem),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "debug",
- .data = &decnet_debug_level,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- { }
-};
-
-void dn_register_sysctl(void)
-{
- dn_table_header = register_net_sysctl(&init_net, "net/decnet", dn_table);
-}
-
-void dn_unregister_sysctl(void)
-{
- unregister_net_sysctl_table(dn_table_header);
-}
-
-#else /* CONFIG_SYSCTL */
-void dn_unregister_sysctl(void)
-{
-}
-void dn_register_sysctl(void)
-{
-}
-
-#endif
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 4183e4ba27a5..81f620a3c32b 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -8,6 +8,7 @@ config NET_DSA
tristate "Distributed Switch Architecture"
depends on HAVE_NET_DSA && MAY_USE_DEVLINK
depends on BRIDGE || BRIDGE=n
+ select GRO_CELLS
select NET_SWITCHDEV
select PHYLINK
---help---
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 601534a5bfe8..598200e9d522 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -191,7 +191,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
if (dsa_skb_defer_rx_timestamp(p, skb))
return 0;
- netif_receive_skb(skb);
+ gro_cells_receive(&p->gcells, skb);
return 0;
}
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index b036c55f5cb1..05aadb25e294 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -261,6 +261,7 @@ static int dsa_port_setup(struct dsa_port *dp)
int err = 0;
memset(&dp->devlink_port, 0, sizeof(dp->devlink_port));
+ dp->mac = of_get_mac_address(dp->dn);
if (dp->type != DSA_PORT_TYPE_UNUSED)
err = devlink_port_register(ds->devlink, &dp->devlink_port,
@@ -595,6 +596,7 @@ static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn)
struct net_device *master;
master = of_find_net_device_by_node(ethernet);
+ of_node_put(ethernet);
if (!master)
return -EPROBE_DEFER;
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 3964c6f7a7c0..79d17507609a 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -15,6 +15,7 @@
#include <linux/netdevice.h>
#include <linux/netpoll.h>
#include <net/dsa.h>
+#include <net/gro_cells.h>
enum {
DSA_NOTIFIER_AGEING_TIME,
@@ -72,6 +73,8 @@ struct dsa_slave_priv {
struct pcpu_sw_netstats *stats64;
+ struct gro_cells gcells;
+
/* DSA port data, such as switch, port index, etc. */
struct dsa_port *dp;
diff --git a/net/dsa/master.c b/net/dsa/master.c
index aae478d61101..22e9ce6e51a4 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -87,8 +87,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
struct dsa_switch *ds = cpu_dp->ds;
int port = cpu_dp->index;
int len = ETH_GSTRING_LEN;
- int mcount = 0, count;
- unsigned int i;
+ int mcount = 0, count, i;
uint8_t pfx[4];
uint8_t *ndata;
@@ -118,6 +117,8 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset,
*/
ds->ops->get_strings(ds, port, stringset, ndata);
count = ds->ops->get_sset_count(ds, port, stringset);
+ if (count < 0)
+ return;
for (i = 0; i < count; i++) {
memmove(ndata + (i * len + sizeof(pfx)),
ndata + i * len, len - sizeof(pfx));
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 8ee28b6016d8..f7c122357a96 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -598,13 +598,15 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset)
struct dsa_switch *ds = dp->ds;
if (sset == ETH_SS_STATS) {
- int count;
+ int count = 0;
- count = 4;
- if (ds->ops->get_sset_count)
- count += ds->ops->get_sset_count(ds, dp->index, sset);
+ if (ds->ops->get_sset_count) {
+ count = ds->ops->get_sset_count(ds, dp->index, sset);
+ if (count < 0)
+ return count;
+ }
- return count;
+ return count + 4;
}
return -EOPNOTSUPP;
@@ -1224,13 +1226,11 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
* use the switch internal MDIO bus instead
*/
ret = dsa_slave_phy_connect(slave_dev, dp->index);
- if (ret) {
- netdev_err(slave_dev,
- "failed to connect to port %d: %d\n",
- dp->index, ret);
- phylink_destroy(dp->pl);
- return ret;
- }
+ }
+ if (ret) {
+ netdev_err(slave_dev, "failed to connect to PHY: %pe\n",
+ ERR_PTR(ret));
+ phylink_destroy(dp->pl);
}
return ret;
@@ -1313,7 +1313,10 @@ int dsa_slave_create(struct dsa_port *port)
slave_dev->features = master->vlan_features | NETIF_F_HW_TC;
slave_dev->hw_features |= NETIF_F_HW_TC;
slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
- eth_hw_addr_inherit(slave_dev, master);
+ if (port->mac && is_valid_ether_addr(port->mac))
+ ether_addr_copy(slave_dev->dev_addr, port->mac);
+ else
+ eth_hw_addr_inherit(slave_dev, master);
slave_dev->priv_flags |= IFF_NO_QUEUE;
slave_dev->netdev_ops = &dsa_slave_netdev_ops;
slave_dev->switchdev_ops = &dsa_slave_switchdev_ops;
@@ -1334,6 +1337,11 @@ int dsa_slave_create(struct dsa_port *port)
free_netdev(slave_dev);
return -ENOMEM;
}
+
+ ret = gro_cells_init(&p->gcells, slave_dev);
+ if (ret)
+ goto out_free;
+
p->dp = port;
INIT_LIST_HEAD(&p->mall_tc_list);
p->xmit = cpu_dp->tag_ops->xmit;
@@ -1344,7 +1352,7 @@ int dsa_slave_create(struct dsa_port *port)
ret = dsa_slave_phy_setup(slave_dev);
if (ret) {
netdev_err(master, "error %d setting up slave phy\n", ret);
- goto out_free;
+ goto out_gcells;
}
dsa_slave_notify(slave_dev, DSA_PORT_REGISTER);
@@ -1363,6 +1371,8 @@ out_phy:
phylink_disconnect_phy(p->dp->pl);
rtnl_unlock();
phylink_destroy(p->dp->pl);
+out_gcells:
+ gro_cells_destroy(&p->gcells);
out_free:
free_percpu(p->stats64);
free_netdev(slave_dev);
@@ -1383,6 +1393,7 @@ void dsa_slave_destroy(struct net_device *slave_dev)
dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER);
unregister_netdev(slave_dev);
phylink_destroy(dp->pl);
+ gro_cells_destroy(&p->gcells);
free_percpu(p->stats64);
free_netdev(slave_dev);
}
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 6574425ad94c..1f57dce969f7 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -20,6 +20,7 @@
#define MTK_HDR_LEN 4
#define MTK_HDR_XMIT_UNTAGGED 0
#define MTK_HDR_XMIT_TAGGED_TPID_8100 1
+#define MTK_HDR_XMIT_TAGGED_TPID_88A8 2
#define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0)
#define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0)
#define MTK_HDR_XMIT_SA_DIS BIT(6)
@@ -28,8 +29,8 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
+ u8 xmit_tpid;
u8 *mtk_tag;
- bool is_vlan_skb = true;
unsigned char *dest = eth_hdr(skb)->h_dest;
bool is_multicast_skb = is_multicast_ether_addr(dest) &&
!is_broadcast_ether_addr(dest);
@@ -40,13 +41,20 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
* the both special and VLAN tag at the same time and then look up VLAN
* table with VID.
*/
- if (!skb_vlan_tagged(skb)) {
+ switch (skb->protocol) {
+ case htons(ETH_P_8021Q):
+ xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_8100;
+ break;
+ case htons(ETH_P_8021AD):
+ xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_88A8;
+ break;
+ default:
if (skb_cow_head(skb, MTK_HDR_LEN) < 0)
return NULL;
+ xmit_tpid = MTK_HDR_XMIT_UNTAGGED;
skb_push(skb, MTK_HDR_LEN);
memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
- is_vlan_skb = false;
}
mtk_tag = skb->data + 2 * ETH_ALEN;
@@ -54,8 +62,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
/* Mark tag attribute on special tag insertion to notify hardware
* whether that's a combined special tag with 802.1Q header.
*/
- mtk_tag[0] = is_vlan_skb ? MTK_HDR_XMIT_TAGGED_TPID_8100 :
- MTK_HDR_XMIT_UNTAGGED;
+ mtk_tag[0] = xmit_tpid;
mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
/* Disable SA learning for multicast frames */
@@ -63,7 +70,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS;
/* Tag control information is kept for 802.1Q */
- if (!is_vlan_skb) {
+ if (xmit_tpid == MTK_HDR_XMIT_UNTAGGED) {
mtk_tag[2] = 0;
mtk_tag[3] = 0;
}
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 04b5450c5a55..adfb49760678 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -207,17 +207,18 @@ static void hsr_deliver_master(struct sk_buff *skb, struct net_device *dev,
struct hsr_node *node_src)
{
bool was_multicast_frame;
- int res;
+ int res, recv_len;
was_multicast_frame = (skb->pkt_type == PACKET_MULTICAST);
hsr_addr_subst_source(node_src, skb);
skb_pull(skb, ETH_HLEN);
+ recv_len = skb->len;
res = netif_rx(skb);
if (res == NET_RX_DROP) {
dev->stats.rx_dropped++;
} else {
dev->stats.rx_packets++;
- dev->stats.rx_bytes += skb->len;
+ dev->stats.rx_bytes += recv_len;
if (was_multicast_frame)
dev->stats.multicast++;
}
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index f5a3601948ca..37795502bb51 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -310,7 +310,8 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
node_dst = find_node_by_AddrA(&port->hsr->node_db, eth_hdr(skb)->h_dest);
if (!node_dst) {
- WARN_ONCE(1, "%s: Unknown node\n", __func__);
+ if (net_ratelimit())
+ netdev_err(skb->dev, "%s: Unknown node\n", __func__);
return;
}
if (port->type != node_dst->AddrB_port)
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index d3cbb3258718..7531cb1665d2 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -559,9 +559,7 @@ ieee802154_llsec_parse_key_id(struct genl_info *info,
desc->mode = nla_get_u8(info->attrs[IEEE802154_ATTR_LLSEC_KEY_MODE]);
if (desc->mode == IEEE802154_SCF_KEY_IMPLICIT) {
- if (!info->attrs[IEEE802154_ATTR_PAN_ID] &&
- !(info->attrs[IEEE802154_ATTR_SHORT_ADDR] ||
- info->attrs[IEEE802154_ATTR_HW_ADDR]))
+ if (!info->attrs[IEEE802154_ATTR_PAN_ID])
return -EINVAL;
desc->device_addr.pan_id = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_PAN_ID]);
@@ -570,6 +568,9 @@ ieee802154_llsec_parse_key_id(struct genl_info *info,
desc->device_addr.mode = IEEE802154_ADDR_SHORT;
desc->device_addr.short_addr = nla_get_shortaddr(info->attrs[IEEE802154_ATTR_SHORT_ADDR]);
} else {
+ if (!info->attrs[IEEE802154_ATTR_HW_ADDR])
+ return -EINVAL;
+
desc->device_addr.mode = IEEE802154_ADDR_LONG;
desc->device_addr.extended_addr = nla_get_hwaddr(info->attrs[IEEE802154_ATTR_HW_ADDR]);
}
@@ -687,8 +688,10 @@ int ieee802154_llsec_getparams(struct sk_buff *skb, struct genl_info *info)
nla_put_u8(msg, IEEE802154_ATTR_LLSEC_SECLEVEL, params.out_level) ||
nla_put_u32(msg, IEEE802154_ATTR_LLSEC_FRAME_COUNTER,
be32_to_cpu(params.frame_counter)) ||
- ieee802154_llsec_fill_key_id(msg, &params.out_key))
+ ieee802154_llsec_fill_key_id(msg, &params.out_key)) {
+ rc = -ENOBUFS;
goto out_free;
+ }
dev_put(dev);
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index b231e40f006a..ca1dd9ff07ab 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -249,8 +249,10 @@ int ieee802154_add_iface(struct sk_buff *skb, struct genl_info *info)
}
if (nla_put_string(msg, IEEE802154_ATTR_PHY_NAME, wpan_phy_name(phy)) ||
- nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name))
+ nla_put_string(msg, IEEE802154_ATTR_DEV_NAME, dev->name)) {
+ rc = -EMSGSIZE;
goto nla_put_failure;
+ }
dev_put(dev);
wpan_phy_put(phy);
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 99f6c254ea77..3407ee1159f7 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -836,8 +836,13 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags,
goto nla_put_failure;
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR)
+ goto out;
+
if (nl802154_get_llsec_params(msg, rdev, wpan_dev) < 0)
goto nla_put_failure;
+
+out:
#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
genlmsg_end(msg, hdr);
@@ -1310,19 +1315,20 @@ ieee802154_llsec_parse_dev_addr(struct nlattr *nla,
nl802154_dev_addr_policy, NULL))
return -EINVAL;
- if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] ||
- !attrs[NL802154_DEV_ADDR_ATTR_MODE] ||
- !(attrs[NL802154_DEV_ADDR_ATTR_SHORT] ||
- attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]))
+ if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] || !attrs[NL802154_DEV_ADDR_ATTR_MODE])
return -EINVAL;
addr->pan_id = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_PAN_ID]);
addr->mode = nla_get_u32(attrs[NL802154_DEV_ADDR_ATTR_MODE]);
switch (addr->mode) {
case NL802154_DEV_ADDR_SHORT:
+ if (!attrs[NL802154_DEV_ADDR_ATTR_SHORT])
+ return -EINVAL;
addr->short_addr = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_SHORT]);
break;
case NL802154_DEV_ADDR_EXTENDED:
+ if (!attrs[NL802154_DEV_ADDR_ATTR_EXTENDED])
+ return -EINVAL;
addr->extended_addr = nla_get_le64(attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]);
break;
default:
@@ -1402,6 +1408,9 @@ static int nl802154_set_llsec_params(struct sk_buff *skb,
u32 changed = 0;
int ret;
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR)
+ return -EOPNOTSUPP;
+
if (info->attrs[NL802154_ATTR_SEC_ENABLED]) {
u8 enabled;
@@ -1450,7 +1459,7 @@ static int nl802154_send_key(struct sk_buff *msg, u32 cmd, u32 portid,
hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
- return -1;
+ return -ENOBUFS;
if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
@@ -1508,6 +1517,11 @@ nl802154_dump_llsec_key(struct sk_buff *skb, struct netlink_callback *cb)
if (err)
return err;
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) {
+ err = skb->len;
+ goto out_err;
+ }
+
if (!wpan_dev->netdev) {
err = -EINVAL;
goto out_err;
@@ -1562,7 +1576,8 @@ static int nl802154_add_llsec_key(struct sk_buff *skb, struct genl_info *info)
struct ieee802154_llsec_key_id id = { };
u32 commands[NL802154_CMD_FRAME_NR_IDS / 32] = { };
- if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
+ if (!info->attrs[NL802154_ATTR_SEC_KEY] ||
+ nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_KEY],
nl802154_key_policy, info->extack))
return -EINVAL;
@@ -1612,7 +1627,8 @@ static int nl802154_del_llsec_key(struct sk_buff *skb, struct genl_info *info)
struct nlattr *attrs[NL802154_KEY_ATTR_MAX + 1];
struct ieee802154_llsec_key_id id;
- if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
+ if (!info->attrs[NL802154_ATTR_SEC_KEY] ||
+ nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_KEY],
nl802154_key_policy, info->extack))
return -EINVAL;
@@ -1634,7 +1650,7 @@ static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid,
hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
- return -1;
+ return -ENOBUFS;
if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
@@ -1678,6 +1694,11 @@ nl802154_dump_llsec_dev(struct sk_buff *skb, struct netlink_callback *cb)
if (err)
return err;
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) {
+ err = skb->len;
+ goto out_err;
+ }
+
if (!wpan_dev->netdev) {
err = -EINVAL;
goto out_err;
@@ -1765,6 +1786,9 @@ static int nl802154_add_llsec_dev(struct sk_buff *skb, struct genl_info *info)
struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
struct ieee802154_llsec_device dev_desc;
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR)
+ return -EOPNOTSUPP;
+
if (ieee802154_llsec_parse_device(info->attrs[NL802154_ATTR_SEC_DEVICE],
&dev_desc) < 0)
return -EINVAL;
@@ -1780,7 +1804,8 @@ static int nl802154_del_llsec_dev(struct sk_buff *skb, struct genl_info *info)
struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1];
__le64 extended_addr;
- if (nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX,
+ if (!info->attrs[NL802154_ATTR_SEC_DEVICE] ||
+ nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_DEVICE],
nl802154_dev_policy, info->extack))
return -EINVAL;
@@ -1803,7 +1828,7 @@ static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid,
hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
- return -1;
+ return -ENOBUFS;
if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
@@ -1850,6 +1875,11 @@ nl802154_dump_llsec_devkey(struct sk_buff *skb, struct netlink_callback *cb)
if (err)
return err;
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) {
+ err = skb->len;
+ goto out_err;
+ }
+
if (!wpan_dev->netdev) {
err = -EINVAL;
goto out_err;
@@ -1907,6 +1937,9 @@ static int nl802154_add_llsec_devkey(struct sk_buff *skb, struct genl_info *info
struct ieee802154_llsec_device_key key;
__le64 extended_addr;
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR)
+ return -EOPNOTSUPP;
+
if (!info->attrs[NL802154_ATTR_SEC_DEVKEY] ||
nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_DEVKEY],
@@ -1940,7 +1973,8 @@ static int nl802154_del_llsec_devkey(struct sk_buff *skb, struct genl_info *info
struct ieee802154_llsec_device_key key;
__le64 extended_addr;
- if (nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX,
+ if (!info->attrs[NL802154_ATTR_SEC_DEVKEY] ||
+ nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX,
info->attrs[NL802154_ATTR_SEC_DEVKEY],
nl802154_devkey_policy, info->extack))
return -EINVAL;
@@ -1971,7 +2005,7 @@ static int nl802154_send_seclevel(struct sk_buff *msg, u32 cmd, u32 portid,
hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
if (!hdr)
- return -1;
+ return -ENOBUFS;
if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
goto nla_put_failure;
@@ -2015,6 +2049,11 @@ nl802154_dump_llsec_seclevel(struct sk_buff *skb, struct netlink_callback *cb)
if (err)
return err;
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) {
+ err = skb->len;
+ goto out_err;
+ }
+
if (!wpan_dev->netdev) {
err = -EINVAL;
goto out_err;
@@ -2100,6 +2139,9 @@ static int nl802154_add_llsec_seclevel(struct sk_buff *skb,
struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
struct ieee802154_llsec_seclevel sl;
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR)
+ return -EOPNOTSUPP;
+
if (llsec_parse_seclevel(info->attrs[NL802154_ATTR_SEC_LEVEL],
&sl) < 0)
return -EINVAL;
@@ -2115,6 +2157,9 @@ static int nl802154_del_llsec_seclevel(struct sk_buff *skb,
struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
struct ieee802154_llsec_seclevel sl;
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR)
+ return -EOPNOTSUPP;
+
if (!info->attrs[NL802154_ATTR_SEC_LEVEL] ||
llsec_parse_seclevel(info->attrs[NL802154_ATTR_SEC_LEVEL],
&sl) < 0)
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 89819745e482..ee1536de5fca 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -213,8 +213,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len)
int err = 0;
struct net_device *dev = NULL;
- if (len < sizeof(*uaddr))
- return -EINVAL;
+ err = ieee802154_sockaddr_check_size(uaddr, len);
+ if (err < 0)
+ return err;
uaddr = (struct sockaddr_ieee802154 *)_uaddr;
if (uaddr->family != AF_IEEE802154)
@@ -284,6 +285,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
err = -EMSGSIZE;
goto out_dev;
}
+ if (!size) {
+ err = 0;
+ goto out_dev;
+ }
hlen = LL_RESERVED_SPACE(dev);
tlen = dev->needed_tailroom;
@@ -509,11 +514,14 @@ static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len)
ro->bound = 0;
- if (len < sizeof(*addr))
+ err = ieee802154_sockaddr_check_size(addr, len);
+ if (err < 0)
goto out;
- if (addr->family != AF_IEEE802154)
+ if (addr->family != AF_IEEE802154) {
+ err = -EINVAL;
goto out;
+ }
ieee802154_addr_from_sa(&haddr, &addr->addr);
dev = ieee802154_get_dev(sock_net(sk), &haddr);
@@ -580,8 +588,9 @@ static int dgram_connect(struct sock *sk, struct sockaddr *uaddr,
struct dgram_sock *ro = dgram_sk(sk);
int err = 0;
- if (len < sizeof(*addr))
- return -EINVAL;
+ err = ieee802154_sockaddr_check_size(addr, len);
+ if (err < 0)
+ return err;
if (addr->family != AF_IEEE802154)
return -EINVAL;
@@ -620,6 +629,7 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
struct ieee802154_mac_cb *cb;
struct dgram_sock *ro = dgram_sk(sk);
struct ieee802154_addr dst_addr;
+ DECLARE_SOCKADDR(struct sockaddr_ieee802154*, daddr, msg->msg_name);
int hlen, tlen;
int err;
@@ -628,10 +638,20 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
return -EOPNOTSUPP;
}
- if (!ro->connected && !msg->msg_name)
- return -EDESTADDRREQ;
- else if (ro->connected && msg->msg_name)
- return -EISCONN;
+ if (msg->msg_name) {
+ if (ro->connected)
+ return -EISCONN;
+ if (msg->msg_namelen < IEEE802154_MIN_NAMELEN)
+ return -EINVAL;
+ err = ieee802154_sockaddr_check_size(daddr, msg->msg_namelen);
+ if (err < 0)
+ return err;
+ ieee802154_addr_from_sa(&dst_addr, &daddr->addr);
+ } else {
+ if (!ro->connected)
+ return -EDESTADDRREQ;
+ dst_addr = ro->dst_addr;
+ }
if (!ro->bound)
dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154);
@@ -667,16 +687,6 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
cb = mac_cb_init(skb);
cb->type = IEEE802154_FC_TYPE_DATA;
cb->ackreq = ro->want_ack;
-
- if (msg->msg_name) {
- DECLARE_SOCKADDR(struct sockaddr_ieee802154*,
- daddr, msg->msg_name);
-
- ieee802154_addr_from_sa(&dst_addr, &daddr->addr);
- } else {
- dst_addr = ro->dst_addr;
- }
-
cb->secen = ro->secen;
cb->secen_override = ro->secen_override;
cb->seclevel = ro->seclevel;
@@ -1002,6 +1012,11 @@ static const struct proto_ops ieee802154_dgram_ops = {
#endif
};
+static void ieee802154_sock_destruct(struct sock *sk)
+{
+ skb_queue_purge(&sk->sk_receive_queue);
+}
+
/* Create a socket. Initialise the socket, blank the addresses
* set the state.
*/
@@ -1042,7 +1057,7 @@ static int ieee802154_create(struct net *net, struct socket *sock,
sock->ops = ops;
sock_init_data(sock, sk);
- /* FIXME: sk->sk_destruct */
+ sk->sk_destruct = ieee802154_sock_destruct;
sk->sk_family = PF_IEEE802154;
/* Checksums on by default */
diff --git a/net/ife/ife.c b/net/ife/ife.c
index 13bbf8cb6a39..be05b690b9ef 100644
--- a/net/ife/ife.c
+++ b/net/ife/ife.c
@@ -82,6 +82,7 @@ void *ife_decode(struct sk_buff *skb, u16 *metalen)
if (unlikely(!pskb_may_pull(skb, total_pull)))
return NULL;
+ ifehdr = (struct ifeheadr *)(skb->data + skb->dev->hard_header_len);
skb_set_mac_header(skb, total_pull);
__skb_pull(skb, total_pull);
*metalen = ifehdrln - IFE_METAHDRLEN;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 2e12f848203a..8acfa1487478 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -388,6 +388,16 @@ config INET_IPCOMP
If unsure, say Y.
+config INET_TABLE_PERTURB_ORDER
+ int "INET: Source port perturbation table size (as power of 2)" if EXPERT
+ default 16
+ help
+ Source port perturbation table size (as power of 2) for
+ RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm.
+
+ The default is almost always what you want.
+ Only change this if you know what you are doing.
+
config INET_XFRM_TUNNEL
tristate
select INET_TUNNEL
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index bbf3b3daa999..7c902a1efbbf 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -157,7 +157,7 @@ void inet_sock_destruct(struct sock *sk)
kfree(rcu_dereference_protected(inet->inet_opt, 1));
dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
- dst_release(sk->sk_rx_dst);
+ dst_release(rcu_dereference_protected(sk->sk_rx_dst, 1));
sk_refcnt_debug_dec(sk);
}
EXPORT_SYMBOL(inet_sock_destruct);
@@ -218,7 +218,7 @@ int inet_listen(struct socket *sock, int backlog)
* because the socket was in TCP_LISTEN state previously but
* was shutdown() rather than close().
*/
- tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
(tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
@@ -578,6 +578,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
add_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending += writebias;
+ sk->sk_wait_pending++;
/* Basic assumption: if someone sets sk->sk_err, he _must_
* change state of the socket from TCP_SYN_*.
@@ -593,6 +594,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias)
}
remove_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending -= writebias;
+ sk->sk_wait_pending--;
return timeo;
}
@@ -865,7 +867,7 @@ int inet_shutdown(struct socket *sock, int how)
EPOLLHUP, even on eg. unconnected UDP sockets -- RR */
/* fall through */
default:
- sk->sk_shutdown |= how;
+ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | how);
if (sk->sk_prot->shutdown)
sk->sk_prot->shutdown(sk, how);
break;
@@ -1209,7 +1211,7 @@ static int inet_sk_reselect_saddr(struct sock *sk)
if (new_saddr == old_saddr)
return 0;
- if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) {
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) {
pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",
__func__, &old_saddr, &new_saddr);
}
@@ -1264,7 +1266,7 @@ int inet_sk_rebuild_header(struct sock *sk)
* Other protocols have to map its equivalent state to TCP_SYN_SENT.
* DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
*/
- if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr ||
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) ||
sk->sk_state != TCP_SYN_SENT ||
(sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
(err = inet_sk_reselect_saddr(sk)) != 0)
@@ -1338,8 +1340,11 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
}
ops = rcu_dereference(inet_offloads[proto]);
- if (likely(ops && ops->callbacks.gso_segment))
+ if (likely(ops && ops->callbacks.gso_segment)) {
segs = ops->callbacks.gso_segment(skb, features);
+ if (!segs)
+ skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
+ }
if (IS_ERR_OR_NULL(segs))
goto out;
@@ -1675,12 +1680,7 @@ static const struct net_protocol igmp_protocol = {
};
#endif
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol tcp_protocol = {
- .early_demux = tcp_v4_early_demux,
- .early_demux_handler = tcp_v4_early_demux,
+static const struct net_protocol tcp_protocol = {
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
.no_policy = 1,
@@ -1688,12 +1688,7 @@ static struct net_protocol tcp_protocol = {
.icmp_strict_tag_validation = 1,
};
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol udp_protocol = {
- .early_demux = udp_v4_early_demux,
- .early_demux_handler = udp_v4_early_demux,
+static const struct net_protocol udp_protocol = {
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
@@ -1955,6 +1950,10 @@ static int __init inet_init(void)
ip_init();
+ /* Initialise per-cpu ipv4 mibs */
+ if (init_ipv4_mibs())
+ panic("%s: Cannot init ipv4 mibs\n", __func__);
+
/* Setup TCP slab cache for open requests. */
tcp_init();
@@ -1983,12 +1982,6 @@ static int __init inet_init(void)
if (init_inet_pernet_ops())
pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
- /*
- * Initialise per-cpu ipv4 mibs
- */
-
- if (init_ipv4_mibs())
- pr_crit("%s: Cannot init ipv4 mibs\n", __func__);
ipv4_proc_init();
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 9e17cd05daff..0f2dac5c25bb 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1121,13 +1121,18 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
return err;
}
-static int arp_invalidate(struct net_device *dev, __be32 ip)
+int arp_invalidate(struct net_device *dev, __be32 ip, bool force)
{
struct neighbour *neigh = neigh_lookup(&arp_tbl, &ip, dev);
int err = -ENXIO;
struct neigh_table *tbl = &arp_tbl;
if (neigh) {
+ if ((neigh->nud_state & NUD_VALID) && !force) {
+ neigh_release(neigh);
+ return 0;
+ }
+
if (neigh->nud_state & ~NUD_NOARP)
err = neigh_update(neigh, NULL, NUD_FAILED,
NEIGH_UPDATE_F_OVERRIDE|
@@ -1174,7 +1179,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
if (!dev)
return -EINVAL;
}
- return arp_invalidate(dev, ip);
+ return arp_invalidate(dev, ip, true);
}
/*
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 5535b722f66d..8dcf9aec7b77 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -254,7 +254,7 @@ static int cipso_v4_cache_check(const unsigned char *key,
struct cipso_v4_map_cache_entry *prev_entry = NULL;
u32 hash;
- if (!cipso_v4_cache_enabled)
+ if (!READ_ONCE(cipso_v4_cache_enabled))
return -ENOENT;
hash = cipso_v4_map_cache_hash(key, key_len);
@@ -311,13 +311,14 @@ static int cipso_v4_cache_check(const unsigned char *key,
int cipso_v4_cache_add(const unsigned char *cipso_ptr,
const struct netlbl_lsm_secattr *secattr)
{
+ int bkt_size = READ_ONCE(cipso_v4_cache_bucketsize);
int ret_val = -EPERM;
u32 bkt;
struct cipso_v4_map_cache_entry *entry = NULL;
struct cipso_v4_map_cache_entry *old_entry = NULL;
u32 cipso_ptr_len;
- if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0)
+ if (!READ_ONCE(cipso_v4_cache_enabled) || bkt_size <= 0)
return 0;
cipso_ptr_len = cipso_ptr[1];
@@ -337,7 +338,7 @@ int cipso_v4_cache_add(const unsigned char *cipso_ptr,
bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1);
spin_lock_bh(&cipso_v4_cache[bkt].lock);
- if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
+ if (cipso_v4_cache[bkt].size < bkt_size) {
list_add(&entry->list, &cipso_v4_cache[bkt].list);
cipso_v4_cache[bkt].size += 1;
} else {
@@ -486,6 +487,7 @@ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
kfree(doi_def->map.std->lvl.local);
kfree(doi_def->map.std->cat.cipso);
kfree(doi_def->map.std->cat.local);
+ kfree(doi_def->map.std);
break;
}
kfree(doi_def);
@@ -533,16 +535,10 @@ int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info)
ret_val = -ENOENT;
goto doi_remove_return;
}
- if (!refcount_dec_and_test(&doi_def->refcount)) {
- spin_unlock(&cipso_v4_doi_list_lock);
- ret_val = -EBUSY;
- goto doi_remove_return;
- }
list_del_rcu(&doi_def->list);
spin_unlock(&cipso_v4_doi_list_lock);
- cipso_v4_cache_invalidate();
- call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
+ cipso_v4_doi_putdef(doi_def);
ret_val = 0;
doi_remove_return:
@@ -599,9 +595,6 @@ void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def)
if (!refcount_dec_and_test(&doi_def->refcount))
return;
- spin_lock(&cipso_v4_doi_list_lock);
- list_del_rcu(&doi_def->list);
- spin_unlock(&cipso_v4_doi_list_lock);
cipso_v4_cache_invalidate();
call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
@@ -1222,7 +1215,8 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def,
/* This will send packets using the "optimized" format when
* possible as specified in section 3.4.2.6 of the
* CIPSO draft. */
- if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10)
+ if (READ_ONCE(cipso_v4_rbm_optfmt) && ret_val > 0 &&
+ ret_val <= 10)
tag_len = 14;
else
tag_len = 4 + ret_val;
@@ -1625,7 +1619,7 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
* all the CIPSO validations here but it doesn't
* really specify _exactly_ what we need to validate
* ... so, just make it a sysctl tunable. */
- if (cipso_v4_rbm_strictvalid) {
+ if (READ_ONCE(cipso_v4_rbm_strictvalid)) {
if (cipso_v4_map_lvl_valid(doi_def,
tag[3]) < 0) {
err_offset = opt_iter + 3;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 12a2cea9d606..e2ab8cdb7134 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2354,7 +2354,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name,
free:
kfree(t);
out:
- return -ENOBUFS;
+ return -ENOMEM;
}
static void __devinet_sysctl_unregister(struct net *net,
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 114f9def1ec5..24cd5c9c7839 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -270,7 +270,6 @@ static int esp_output_udp_encap(struct xfrm_state *x, struct sk_buff *skb, struc
int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
{
u8 *tail;
- u8 *vaddr;
int nfrags;
int esph_offset;
struct page *page;
@@ -285,6 +284,10 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
return err;
}
+ if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
+ ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
+ goto cow;
+
if (!skb_cloned(skb)) {
if (tailen <= skb_tailroom(skb)) {
nfrags = 1;
@@ -312,14 +315,10 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
page = pfrag->page;
get_page(page);
- vaddr = kmap_atomic(page);
-
- tail = vaddr + pfrag->offset;
+ tail = page_address(page) + pfrag->offset;
esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);
- kunmap_atomic(vaddr);
-
nfrags = skb_shinfo(skb)->nr_frags;
__skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
@@ -566,7 +565,9 @@ static inline int esp_remove_trailer(struct sk_buff *skb)
skb->csum = csum_block_sub(skb->csum, csumdiff,
skb->len - trimlen);
}
- pskb_trim(skb, skb->len - trimlen);
+ ret = pskb_trim(skb, skb->len - trimlen);
+ if (unlikely(ret))
+ return ret;
ret = nexthdr[1];
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 58834a10c0be..93045373e44b 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -237,6 +237,9 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
secpath_reset(skb);
+ if (skb_needs_linearize(skb, skb->dev->features) &&
+ __skb_linearize(skb))
+ return -ENOMEM;
return 0;
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7f4ec36e5f70..9aa48b4c4096 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -302,7 +302,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
.flowi4_iif = LOOPBACK_IFINDEX,
.flowi4_oif = l3mdev_master_ifindex_rcu(dev),
.daddr = ip_hdr(skb)->saddr,
- .flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
+ .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK,
.flowi4_scope = scope,
.flowi4_mark = vmark ? skb->mark : 0,
};
@@ -353,6 +353,8 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
fl4.flowi4_proto = 0;
fl4.fl4_sport = 0;
fl4.fl4_dport = 0;
+ } else {
+ swap(fl4.fl4_sport, fl4.fl4_dport);
}
if (fib_lookup(net, &fl4, &res, 0))
@@ -555,6 +557,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
cfg->fc_scope = RT_SCOPE_UNIVERSE;
}
+ if (!cfg->fc_table)
+ cfg->fc_table = RT_TABLE_MAIN;
+
if (cmd == SIOCDELRT)
return 0;
@@ -915,9 +920,11 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
return;
/* Add broadcast address, if it is explicitly assigned. */
- if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
+ if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) {
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
prim, 0);
+ arp_invalidate(dev, ifa->ifa_broadcast, false);
+ }
if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
(prefix != addr || ifa->ifa_prefixlen < 32)) {
@@ -933,6 +940,7 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
prim, 0);
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
32, prim, 0);
+ arp_invalidate(dev, prefix | ~mask, false);
}
}
}
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index ad9ea82daeb3..9376b30cf626 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -133,7 +133,7 @@ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
* to 0 and sets the configured key in the
* inner erspan header field
*/
- if (greh->protocol == htons(ETH_P_ERSPAN) ||
+ if ((greh->protocol == htons(ETH_P_ERSPAN) && hdr_len != 4) ||
greh->protocol == htons(ETH_P_ERSPAN2)) {
struct erspan_base_hdr *ershdr;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4efa5e33513e..af0ddaa55e43 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -244,7 +244,7 @@ static struct {
/**
* icmp_global_allow - Are we allowed to send one more ICMP message ?
*
- * Uses a token bucket to limit our ICMP messages to sysctl_icmp_msgs_per_sec.
+ * Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec.
* Returns false if we reached the limit and can not send another packet.
* Note: called with BH disabled
*/
@@ -266,13 +266,17 @@ bool icmp_global_allow(void)
spin_lock(&icmp_global.lock);
delta = min_t(u32, now - icmp_global.stamp, HZ);
if (delta >= HZ / 50) {
- incr = sysctl_icmp_msgs_per_sec * delta / HZ ;
+ incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ;
if (incr)
WRITE_ONCE(icmp_global.stamp, now);
}
- credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst);
+ credit = min_t(u32, icmp_global.credit + incr,
+ READ_ONCE(sysctl_icmp_msgs_burst));
if (credit) {
- credit--;
+ /* We want to use a credit of one in average, but need to randomize
+ * it for security reasons.
+ */
+ credit = max_t(int, credit - prandom_u32_max(3), 0);
rc = true;
}
WRITE_ONCE(icmp_global.credit, credit);
@@ -291,7 +295,7 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code)
return true;
/* Limit if icmp type is enabled in ratemask. */
- if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask))
+ if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask)))
return true;
return false;
@@ -329,7 +333,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
vif = l3mdev_master_ifindex(dst->dev);
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
- rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit);
+ rc = inet_peer_xrlim_allow(peer,
+ READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
if (peer)
inet_putpeer(peer);
out:
@@ -462,6 +467,23 @@ out_bh_enable:
local_bh_enable();
}
+/*
+ * The device used for looking up which routing table to use for sending an ICMP
+ * error is preferably the source whenever it is set, which should ensure the
+ * icmp error can be sent to the source host, else lookup using the routing
+ * table of the destination device, else use the main routing table (index 0).
+ */
+static struct net_device *icmp_get_route_lookup_dev(struct sk_buff *skb)
+{
+ struct net_device *route_lookup_dev = NULL;
+
+ if (skb->dev)
+ route_lookup_dev = skb->dev;
+ else if (skb_dst(skb))
+ route_lookup_dev = skb_dst(skb)->dev;
+ return route_lookup_dev;
+}
+
static struct rtable *icmp_route_lookup(struct net *net,
struct flowi4 *fl4,
struct sk_buff *skb_in,
@@ -470,6 +492,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
int type, int code,
struct icmp_bxm *param)
{
+ struct net_device *route_lookup_dev;
struct rtable *rt, *rt2;
struct flowi4 fl4_dec;
int err;
@@ -484,7 +507,8 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
- fl4->flowi4_oif = l3mdev_master_ifindex(skb_dst(skb_in)->dev);
+ route_lookup_dev = icmp_get_route_lookup_dev(skb_in);
+ fl4->flowi4_oif = l3mdev_master_ifindex(route_lookup_dev);
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
rt = ip_route_output_key_hash(net, fl4, skb_in);
@@ -508,7 +532,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
if (err)
goto relookup_failed;
- if (inet_addr_type_dev_table(net, skb_dst(skb_in)->dev,
+ if (inet_addr_type_dev_table(net, route_lookup_dev,
fl4_dec.saddr) == RTN_LOCAL) {
rt2 = __ip_route_output_key(net, &fl4_dec);
if (IS_ERR(rt2))
@@ -735,12 +759,24 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
room = 576;
room -= sizeof(struct iphdr) + icmp_param.replyopts.opt.opt.optlen;
room -= sizeof(struct icmphdr);
+ /* Guard against tiny mtu. We need to include at least one
+ * IP network header for this message to make any sense.
+ */
+ if (room <= (int)sizeof(struct iphdr))
+ goto ende;
icmp_param.data_len = skb_in->len - icmp_param.offset;
if (icmp_param.data_len > room)
icmp_param.data_len = room;
icmp_param.head_len = sizeof(struct icmphdr);
+ /* if we don't have a source address at this point, fall back to the
+ * dummy address instead of sending out a packet with a source address
+ * of 0.0.0.0
+ */
+ if (!fl4.saddr)
+ fl4.saddr = htonl(INADDR_DUMMY);
+
icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
ende:
ip_rt_put(rt);
@@ -752,6 +788,40 @@ out:;
}
EXPORT_SYMBOL(__icmp_send);
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_conntrack.h>
+void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+{
+ struct sk_buff *cloned_skb = NULL;
+ struct ip_options opts = { 0 };
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ __be32 orig_ip;
+
+ ct = nf_ct_get(skb_in, &ctinfo);
+ if (!ct || !(ct->status & IPS_SRC_NAT)) {
+ __icmp_send(skb_in, type, code, info, &opts);
+ return;
+ }
+
+ if (skb_shared(skb_in))
+ skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
+
+ if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head ||
+ (skb_network_header(skb_in) + sizeof(struct iphdr)) >
+ skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in,
+ skb_network_offset(skb_in) + sizeof(struct iphdr))))
+ goto out;
+
+ orig_ip = ip_hdr(skb_in)->saddr;
+ ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
+ __icmp_send(skb_in, type, code, info, &opts);
+ ip_hdr(skb_in)->saddr = orig_ip;
+out:
+ consume_skb(cloned_skb);
+}
+EXPORT_SYMBOL(icmp_ndo_send);
+#endif
static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
{
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 523d26f5e22e..5edf426fa414 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -221,8 +221,10 @@ static void igmp_start_timer(struct ip_mc_list *im, int max_delay)
int tv = prandom_u32() % max_delay;
im->tm_running = 1;
- if (!mod_timer(&im->timer, jiffies+tv+2))
- refcount_inc(&im->refcnt);
+ if (refcount_inc_not_zero(&im->refcnt)) {
+ if (mod_timer(&im->timer, jiffies + tv + 2))
+ ip_ma_put(im);
+ }
}
static void igmp_gq_start_timer(struct in_device *in_dev)
@@ -357,8 +359,9 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
struct flowi4 fl4;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- unsigned int size = mtu;
+ unsigned int size;
+ size = min(mtu, IP_MAX_MTU);
while (1) {
skb = alloc_skb(size + hlen + tlen,
GFP_ATOMIC | __GFP_NOWARN);
@@ -471,7 +474,8 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
if (pmc->multiaddr == IGMP_ALL_HOSTS)
return skb;
- if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(pmc->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return skb;
mtu = READ_ONCE(dev->mtu);
@@ -597,7 +601,7 @@ static int igmpv3_send_report(struct in_device *in_dev, struct ip_mc_list *pmc)
if (pmc->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(pmc->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
spin_lock_bh(&pmc->lock);
if (pmc->sfcount[MCAST_EXCLUDE])
@@ -740,7 +744,8 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
return igmpv3_send_report(in_dev, pmc);
- if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(group) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return 0;
if (type == IGMP_HOST_LEAVE_MESSAGE)
@@ -807,10 +812,17 @@ static void igmp_gq_timer_expire(struct timer_list *t)
static void igmp_ifc_timer_expire(struct timer_list *t)
{
struct in_device *in_dev = from_timer(in_dev, t, mr_ifc_timer);
+ u32 mr_ifc_count;
igmpv3_send_cr(in_dev);
- if (in_dev->mr_ifc_count) {
- in_dev->mr_ifc_count--;
+restart:
+ mr_ifc_count = READ_ONCE(in_dev->mr_ifc_count);
+
+ if (mr_ifc_count) {
+ if (cmpxchg(&in_dev->mr_ifc_count,
+ mr_ifc_count,
+ mr_ifc_count - 1) != mr_ifc_count)
+ goto restart;
igmp_ifc_start_timer(in_dev,
unsolicited_report_interval(in_dev));
}
@@ -822,7 +834,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
struct net *net = dev_net(in_dev->dev);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
return;
- in_dev->mr_ifc_count = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv));
igmp_ifc_start_timer(in_dev, 1);
}
@@ -917,7 +929,8 @@ static bool igmp_heard_report(struct in_device *in_dev, __be32 group)
if (group == IGMP_ALL_HOSTS)
return false;
- if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(group) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return false;
rcu_read_lock();
@@ -961,7 +974,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
in_dev->mr_qri;
}
/* cancel the interface change timer */
- in_dev->mr_ifc_count = 0;
+ WRITE_ONCE(in_dev->mr_ifc_count, 0);
if (del_timer(&in_dev->mr_ifc_timer))
__in_dev_put(in_dev);
/* clear deleted report items */
@@ -1003,7 +1016,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
* received value was zero, use the default or statically
* configured value.
*/
- in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
+ in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
/* RFC3376, 8.3. Query Response Interval:
@@ -1042,7 +1055,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(im->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
spin_lock_bh(&im->lock);
if (im->tm_running)
@@ -1182,7 +1195,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
pmc->interface = im->interface;
in_dev_hold(in_dev);
pmc->multiaddr = im->multiaddr;
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
pmc->sfmode = im->sfmode;
if (pmc->sfmode == MCAST_INCLUDE) {
struct ip_sf_list *psf;
@@ -1233,9 +1246,11 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
swap(im->tomb, pmc->tomb);
swap(im->sources, pmc->sources);
for (psf = im->sources; psf; psf = psf->sf_next)
- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ psf->sf_crcount = in_dev->mr_qrv ?:
+ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
} else {
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ im->crcount = in_dev->mr_qrv ?:
+ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
}
in_dev_put(pmc->interface);
kfree_pmc(pmc);
@@ -1292,7 +1307,8 @@ static void igmp_group_dropped(struct ip_mc_list *im)
#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
- if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(im->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return;
reporter = im->reporter;
@@ -1329,13 +1345,14 @@ static void igmp_group_added(struct ip_mc_list *im)
#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
- if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(im->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return;
if (in_dev->dead)
return;
- im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
+ im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
spin_lock_bh(&im->lock);
igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
@@ -1349,7 +1366,7 @@ static void igmp_group_added(struct ip_mc_list *im)
* IN() to IN(A).
*/
if (im->sfmode == MCAST_EXCLUDE)
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
igmp_ifc_event(in_dev);
#endif
@@ -1650,7 +1667,7 @@ static void ip_mc_rejoin_groups(struct in_device *in_dev)
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(im->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
/* a failover is happening and switches
@@ -1739,7 +1756,7 @@ void ip_mc_down(struct in_device *in_dev)
igmp_group_dropped(pmc);
#ifdef CONFIG_IP_MULTICAST
- in_dev->mr_ifc_count = 0;
+ WRITE_ONCE(in_dev->mr_ifc_count, 0);
if (del_timer(&in_dev->mr_ifc_timer))
__in_dev_put(in_dev);
in_dev->mr_gq_running = 0;
@@ -1757,7 +1774,7 @@ static void ip_mc_reset(struct in_device *in_dev)
in_dev->mr_qi = IGMP_QUERY_INTERVAL;
in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
+ in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
}
#else
static void ip_mc_reset(struct in_device *in_dev)
@@ -1816,6 +1833,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
while ((i = rtnl_dereference(in_dev->mc_list)) != NULL) {
in_dev->mc_list = i->next_rcu;
in_dev->mc_count--;
+ ip_mc_clear_src(i);
ip_ma_put(i);
}
}
@@ -1890,7 +1908,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
#ifdef CONFIG_IP_MULTICAST
if (psf->sf_oldin &&
!IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
psf->sf_next = pmc->tomb;
pmc->tomb = psf;
rv = 1;
@@ -1954,8 +1972,8 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
/* filter mode change */
pmc->sfmode = MCAST_INCLUDE;
#ifdef CONFIG_IP_MULTICAST
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
- in_dev->mr_ifc_count = pmc->crcount;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
igmp_ifc_event(pmc->interface);
@@ -2133,8 +2151,8 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
#ifdef CONFIG_IP_MULTICAST
/* else no filters; keep old mode for reports */
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
- in_dev->mr_ifc_count = pmc->crcount;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
+ WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
igmp_ifc_event(in_dev);
@@ -2199,7 +2217,7 @@ static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr,
count++;
}
err = -ENOBUFS;
- if (count >= net->ipv4.sysctl_igmp_max_memberships)
+ if (count >= READ_ONCE(net->ipv4.sysctl_igmp_max_memberships))
goto done;
iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
if (!iml)
@@ -2408,9 +2426,10 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
newpsl->sl_addr[i] = psl->sl_addr[i];
/* decrease mem now to avoid the memleak warning */
atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
- kfree_rcu(psl, rcu);
}
rcu_assign_pointer(pmc->sflist, newpsl);
+ if (psl)
+ kfree_rcu(psl, rcu);
psl = newpsl;
}
rv = 1; /* > 0 for insert logic below if sl_count is 0 */
@@ -2508,11 +2527,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
psl->sl_count, psl->sl_addr, 0);
/* decrease mem now to avoid the memleak warning */
atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
- kfree_rcu(psl, rcu);
- } else
+ } else {
(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
0, NULL, 0);
+ }
rcu_assign_pointer(pmc->sflist, newpsl);
+ if (psl)
+ kfree_rcu(psl, rcu);
pmc->sfmode = msf->imsf_fmode;
err = 0;
done:
@@ -2735,6 +2756,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u
rv = 1;
} else if (im) {
if (src_addr) {
+ spin_lock_bh(&im->lock);
for (psf = im->sources; psf; psf = psf->sf_next) {
if (psf->sf_inaddr == src_addr)
break;
@@ -2745,6 +2767,7 @@ int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u
im->sfcount[MCAST_EXCLUDE];
else
rv = im->sfcount[MCAST_EXCLUDE] != 0;
+ spin_unlock_bh(&im->lock);
} else
rv = 1; /* unspecified source; tentatively allow */
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 534e2598981d..7392a744c677 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -698,12 +698,15 @@ static bool reqsk_queue_unlink(struct request_sock_queue *queue,
return found;
}
-void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
+bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
{
- if (reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req)) {
+ bool unlinked = reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req);
+
+ if (unlinked) {
reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
reqsk_put(req);
}
+ return unlinked;
}
EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
@@ -790,7 +793,7 @@ static void reqsk_queue_hash_req(struct request_sock *req,
timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
mod_timer(&req->rsk_timer, jiffies + timeout);
- inet_ehash_insert(req_to_sk(req), NULL);
+ inet_ehash_insert(req_to_sk(req), NULL, NULL);
/* before letting lookups find us, make sure all req fields
* are committed to memory and refcnt initialized.
*/
@@ -823,6 +826,7 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
if (newsk) {
struct inet_connection_sock *newicsk = inet_csk(newsk);
+ newsk->sk_wait_pending = 0;
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;
@@ -900,11 +904,25 @@ void inet_csk_prepare_forced_close(struct sock *sk)
}
EXPORT_SYMBOL(inet_csk_prepare_forced_close);
+static int inet_ulp_can_listen(const struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (icsk->icsk_ulp_ops)
+ return -EINVAL;
+
+ return 0;
+}
+
int inet_csk_listen_start(struct sock *sk, int backlog)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk);
- int err = -EADDRINUSE;
+ int err;
+
+ err = inet_ulp_can_listen(sk);
+ if (unlikely(err))
+ return err;
reqsk_queue_alloc(&icsk->icsk_accept_queue);
@@ -917,6 +935,7 @@ int inet_csk_listen_start(struct sock *sk, int backlog)
* It is OK, because this socket enters to hash table only
* after validation is complete.
*/
+ err = -EADDRINUSE;
inet_sk_state_store(sk, TCP_LISTEN);
if (!sk->sk_prot->get_port(sk, inet->inet_num)) {
inet->inet_sport = htons(inet->inet_num);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index f0957ebf82cf..d07917059d70 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -392,8 +392,10 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
r->idiag_inode = 0;
if (net_admin && nla_put_u32(skb, INET_DIAG_MARK,
- inet_rsk(reqsk)->ir_mark))
+ inet_rsk(reqsk)->ir_mark)) {
+ nlmsg_cancel(skb, nlh);
return -EMSGSIZE;
+ }
nlmsg_end(skb, nlh);
return 0;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 3a5f12f011cb..c6d670cd872f 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -24,6 +24,9 @@
#include <net/addrconf.h>
#include <net/inet_connection_sock.h>
#include <net/inet_hashtables.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/inet6_hashtables.h>
+#endif
#include <net/secure_seq.h>
#include <net/ip.h>
#include <net/tcp.h>
@@ -504,7 +507,7 @@ not_unique:
return -EADDRNOTAVAIL;
}
-static u32 inet_sk_port_offset(const struct sock *sk)
+static u64 inet_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -513,10 +516,52 @@ static u32 inet_sk_port_offset(const struct sock *sk)
inet->inet_dport);
}
-/* insert a socket into ehash, and eventually remove another one
- * (The another one can be a SYN_RECV or TIMEWAIT
+/* Searches for an exsiting socket in the ehash bucket list.
+ * Returns true if found, false otherwise.
+ */
+static bool inet_ehash_lookup_by_sk(struct sock *sk,
+ struct hlist_nulls_head *list)
+{
+ const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num);
+ const int sdif = sk->sk_bound_dev_if;
+ const int dif = sk->sk_bound_dev_if;
+ const struct hlist_nulls_node *node;
+ struct net *net = sock_net(sk);
+ struct sock *esk;
+
+ INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr);
+
+ sk_nulls_for_each_rcu(esk, node, list) {
+ if (esk->sk_hash != sk->sk_hash)
+ continue;
+ if (sk->sk_family == AF_INET) {
+ if (unlikely(INET_MATCH(esk, net, acookie,
+ sk->sk_daddr,
+ sk->sk_rcv_saddr,
+ ports, dif, sdif))) {
+ return true;
+ }
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (sk->sk_family == AF_INET6) {
+ if (unlikely(INET6_MATCH(esk, net,
+ &sk->sk_v6_daddr,
+ &sk->sk_v6_rcv_saddr,
+ ports, dif, sdif))) {
+ return true;
+ }
+ }
+#endif
+ }
+ return false;
+}
+
+/* Insert a socket into ehash, and eventually remove another one
+ * (The another one can be a SYN_RECV or TIMEWAIT)
+ * If an existing socket already exists, socket sk is not inserted,
+ * and sets found_dup_sk parameter to true.
*/
-bool inet_ehash_insert(struct sock *sk, struct sock *osk)
+bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct hlist_nulls_head *list;
@@ -535,16 +580,23 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk)
if (osk) {
WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
ret = sk_nulls_del_node_init_rcu(osk);
+ } else if (found_dup_sk) {
+ *found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
+ if (*found_dup_sk)
+ ret = false;
}
+
if (ret)
__sk_nulls_add_node_rcu(sk, list);
+
spin_unlock(lock);
+
return ret;
}
-bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
+bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk)
{
- bool ok = inet_ehash_insert(sk, osk);
+ bool ok = inet_ehash_insert(sk, osk, found_dup_sk);
if (ok) {
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -588,7 +640,7 @@ int __inet_hash(struct sock *sk, struct sock *osk)
int err = 0;
if (sk->sk_state != TCP_LISTEN) {
- inet_ehash_nolisten(sk, osk);
+ inet_ehash_nolisten(sk, osk, NULL);
return 0;
}
WARN_ON(!sk_unhashed(sk));
@@ -662,8 +714,21 @@ unlock:
}
EXPORT_SYMBOL_GPL(inet_unhash);
+/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
+ * Note that we use 32bit integers (vs RFC 'short integers')
+ * because 2^16 is not a multiple of num_ephemeral and this
+ * property might be used by clever attacker.
+ *
+ * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though
+ * attacks were since demonstrated, thus we use 65536 by default instead
+ * to really give more isolation and privacy, at the expense of 256kB
+ * of kernel memory.
+ */
+#define INET_TABLE_PERTURB_SIZE (1 << CONFIG_INET_TABLE_PERTURB_ORDER)
+static u32 *table_perturb;
+
int __inet_hash_connect(struct inet_timewait_death_row *death_row,
- struct sock *sk, u32 port_offset,
+ struct sock *sk, u64 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
struct sock *, __u16, struct inet_timewait_sock **))
{
@@ -675,20 +740,10 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct inet_bind_bucket *tb;
u32 remaining, offset;
int ret, i, low, high;
- static u32 hint;
+ u32 index;
if (port) {
- head = &hinfo->bhash[inet_bhashfn(net, port,
- hinfo->bhash_size)];
- tb = inet_csk(sk)->icsk_bind_hash;
- spin_lock_bh(&head->lock);
- if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
- inet_ehash_nolisten(sk, NULL);
- spin_unlock_bh(&head->lock);
- return 0;
- }
- spin_unlock(&head->lock);
- /* No definite answer... Walk to established hash table */
+ local_bh_disable();
ret = check_established(death_row, sk, port, NULL);
local_bh_enable();
return ret;
@@ -700,7 +755,13 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
if (likely(remaining > 1))
remaining &= ~1U;
- offset = (hint + port_offset) % remaining;
+ get_random_slow_once(table_perturb,
+ INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb));
+ index = port_offset & (INET_TABLE_PERTURB_SIZE - 1);
+
+ offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32);
+ offset %= remaining;
+
/* In first pass we try ports of @low parity.
* inet_csk_get_port() does the opposite choice.
*/
@@ -753,13 +814,19 @@ next_port:
return -EADDRNOTAVAIL;
ok:
- hint += i + 2;
+ /* Here we want to add a little bit of randomness to the next source
+ * port that will be chosen. We use a max() with a random here so that
+ * on low contention the randomness is maximal and on high contention
+ * it may be inexistent.
+ */
+ i = max_t(int, i, (prandom_u32() & 7) * 2);
+ WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
- inet_ehash_nolisten(sk, (struct sock *)tw);
+ inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
}
if (tw)
inet_twsk_bind_unhash(tw, hinfo);
@@ -776,7 +843,7 @@ ok:
int inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
- u32 port_offset = 0;
+ u64 port_offset = 0;
if (!inet_sk(sk)->inet_num)
port_offset = inet_sk_port_offset(sk);
@@ -822,6 +889,12 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
INIT_HLIST_HEAD(&h->lhash2[i].head);
h->lhash2[i].count = 0;
}
+
+ /* this one is used for source ports of outgoing connections */
+ table_perturb = kmalloc_array(INET_TABLE_PERTURB_SIZE,
+ sizeof(*table_perturb), GFP_KERNEL);
+ if (!table_perturb)
+ panic("TCP: failed to alloc table_perturb");
}
int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index ff327a62c9ce..a18668552d33 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -148,16 +148,20 @@ static void inet_peer_gc(struct inet_peer_base *base,
struct inet_peer *gc_stack[],
unsigned int gc_cnt)
{
+ int peer_threshold, peer_maxttl, peer_minttl;
struct inet_peer *p;
__u32 delta, ttl;
int i;
- if (base->total >= inet_peer_threshold)
+ peer_threshold = READ_ONCE(inet_peer_threshold);
+ peer_maxttl = READ_ONCE(inet_peer_maxttl);
+ peer_minttl = READ_ONCE(inet_peer_minttl);
+
+ if (base->total >= peer_threshold)
ttl = 0; /* be aggressive */
else
- ttl = inet_peer_maxttl
- - (inet_peer_maxttl - inet_peer_minttl) / HZ *
- base->total / inet_peer_threshold * HZ;
+ ttl = peer_maxttl - (peer_maxttl - peer_minttl) / HZ *
+ base->total / peer_threshold * HZ;
for (i = 0; i < gc_cnt; i++) {
p = gc_stack[i];
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ffcb5983107d..38c8db78cda1 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -435,14 +435,12 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
-
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
+ __be16 flags = tunnel->parms.o_flags;
/* Push GRE header. */
gre_build_header(skb, tunnel->tun_hlen,
- tunnel->parms.o_flags, proto, tunnel->parms.o_key,
- htonl(tunnel->o_seqno));
+ flags, proto, tunnel->parms.o_key,
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
@@ -548,7 +546,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
gre_build_header(skb, tunnel_hlen, flags, proto,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
@@ -576,7 +574,6 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
int tunnel_hlen;
int version;
int nhoff;
- int thoff;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
@@ -606,15 +603,21 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
truncate = true;
}
- nhoff = skb_network_header(skb) - skb_mac_header(skb);
+ nhoff = skb_network_offset(skb);
if (skb->protocol == htons(ETH_P_IP) &&
(ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
truncate = true;
- thoff = skb_transport_header(skb) - skb_mac_header(skb);
- if (skb->protocol == htons(ETH_P_IPV6) &&
- (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
- truncate = true;
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ int thoff;
+
+ if (skb_transport_header_was_set(skb))
+ thoff = skb_transport_offset(skb);
+ else
+ thoff = nhoff + sizeof(struct ipv6hdr);
+ if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
+ truncate = true;
+ }
if (version == 1) {
erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
@@ -632,7 +635,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
}
gre_build_header(skb, 8, TUNNEL_SEQ,
- proto, 0, htonl(tunnel->o_seqno++));
+ proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
@@ -680,18 +683,23 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
}
if (dev->header_ops) {
- /* Need space for new headers */
- if (skb_cow_head(skb, dev->needed_headroom -
- (tunnel->hlen + sizeof(struct iphdr))))
+ int pull_len = tunnel->hlen + sizeof(struct iphdr);
+
+ if (skb_cow_head(skb, 0))
goto free_skb;
tnl_params = (const struct iphdr *)skb->data;
- /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
- * to gre header.
- */
- skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
+ if (!pskb_network_may_pull(skb, pull_len))
+ goto free_skb;
+
+ /* ip_tunnel_xmit() needs skb->data pointing to gre header. */
+ skb_pull(skb, pull_len);
skb_reset_mac_header(skb);
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_start(skb) < skb->data)
+ goto free_skb;
} else {
if (skb_cow_head(skb, dev->needed_headroom))
goto free_skb;
@@ -800,7 +808,11 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
len = tunnel->tun_hlen - len;
tunnel->hlen = tunnel->hlen + len;
- dev->needed_headroom = dev->needed_headroom + len;
+ if (dev->header_ops)
+ dev->hard_header_len += len;
+ else
+ dev->needed_headroom += len;
+
if (set_mtu)
dev->mtu = max_t(int, dev->mtu - len, 68);
@@ -1003,6 +1015,7 @@ static void __gre_tunnel_init(struct net_device *dev)
tunnel->parms.iph.protocol = IPPROTO_GRE;
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
+ dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
@@ -1046,10 +1059,14 @@ static int ipgre_tunnel_init(struct net_device *dev)
return -EINVAL;
dev->flags = IFF_BROADCAST;
dev->header_ops = &ipgre_header_ops;
+ dev->hard_header_len = tunnel->hlen + sizeof(*iph);
+ dev->needed_headroom = 0;
}
#endif
} else if (!tunnel->collect_md) {
dev->header_ops = &ipgre_header_ops;
+ dev->hard_header_len = tunnel->hlen + sizeof(*iph);
+ dev->needed_headroom = 0;
}
return ip_tunnel_init(dev);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index c3a0683e83df..7ead5192b2a9 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -306,28 +306,38 @@ drop:
return true;
}
+int udp_v4_early_demux(struct sk_buff *);
+int tcp_v4_early_demux(struct sk_buff *);
static int ip_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb, struct net_device *dev)
{
const struct iphdr *iph = ip_hdr(skb);
- int (*edemux)(struct sk_buff *skb);
struct rtable *rt;
int err;
- if (net->ipv4.sysctl_ip_early_demux &&
+ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
!skb_dst(skb) &&
!skb->sk &&
!ip_is_fragment(iph)) {
- const struct net_protocol *ipprot;
- int protocol = iph->protocol;
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) {
+ tcp_v4_early_demux(skb);
- ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
- err = edemux(skb);
- if (unlikely(err))
- goto drop_error;
- /* must reload iph, skb->head might have changed */
- iph = ip_hdr(skb);
+ /* must reload iph, skb->head might have changed */
+ iph = ip_hdr(skb);
+ }
+ break;
+ case IPPROTO_UDP:
+ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
+ err = udp_v4_early_demux(skb);
+ if (unlikely(err))
+ goto drop_error;
+
+ /* must reload iph, skb->head might have changed */
+ iph = ip_hdr(skb);
+ }
+ break;
}
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index f0faf1193dd8..6936f703758b 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -160,12 +160,19 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
- if (ip_dont_fragment(sk, &rt->dst)) {
+ /* Do not bother generating IPID for small packets (eg SYNACK) */
+ if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) {
iph->frag_off = htons(IP_DF);
iph->id = 0;
} else {
iph->frag_off = 0;
- __ip_select_ident(net, iph, 1);
+ /* TCP packets here are SYNACK with fat IPv4/TCP options.
+ * Avoid using the hashed IP ident generator.
+ */
+ if (sk->sk_protocol == IPPROTO_TCP)
+ iph->id = (__force __be16)prandom_u32();
+ else
+ __ip_select_ident(net, iph, 1);
}
if (opt && opt->opt.optlen) {
@@ -214,7 +221,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
int res = lwtunnel_xmit(skb);
- if (res < 0 || res == LWTUNNEL_XMIT_DONE)
+ if (res != LWTUNNEL_XMIT_CONTINUE)
return res;
}
@@ -312,7 +319,7 @@ static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *sk
if (skb_is_gso(skb))
return ip_finish_output_gso(net, sk, skb, mtu);
- if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
+ if (skb->len > mtu || IPCB(skb)->frag_max_size)
return ip_fragment(net, sk, skb, mtu, ip_finish_output2);
return ip_finish_output2(net, sk, skb);
@@ -419,8 +426,9 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
{
BUILD_BUG_ON(offsetof(typeof(*fl4), daddr) !=
offsetof(typeof(*fl4), saddr) + sizeof(fl4->saddr));
- memcpy(&iph->saddr, &fl4->saddr,
- sizeof(fl4->saddr) + sizeof(fl4->daddr));
+
+ iph->saddr = fl4->saddr;
+ iph->daddr = fl4->daddr;
}
/* Note: skb->sk can be different from sk, in case of tunnels */
@@ -940,7 +948,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int datalen;
unsigned int fraglen;
unsigned int fraggap;
- unsigned int alloclen;
+ unsigned int alloclen, alloc_extra;
unsigned int pagedlen;
struct sk_buff *skb_prev;
alloc_new_skb:
@@ -960,35 +968,39 @@ alloc_new_skb:
fraglen = datalen + fragheaderlen;
pagedlen = 0;
+ alloc_extra = hh_len + 15;
+ alloc_extra += exthdrlen;
+
+ /* The last fragment gets additional space at tail.
+ * Note, with MSG_MORE we overallocate on fragments,
+ * because we have no idea what fragment will be
+ * the last.
+ */
+ if (datalen == length + fraggap)
+ alloc_extra += rt->dst.trailer_len;
+
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else if (!paged)
+ else if (!paged &&
+ (fraglen + alloc_extra < SKB_MAX_ALLOC ||
+ !(rt->dst.dev->features & NETIF_F_SG)))
alloclen = fraglen;
else {
alloclen = min_t(int, fraglen, MAX_HEADER);
pagedlen = fraglen - alloclen;
}
- alloclen += exthdrlen;
-
- /* The last fragment gets additional space at tail.
- * Note, with MSG_MORE we overallocate on fragments,
- * because we have no idea what fragment will be
- * the last.
- */
- if (datalen == length + fraggap)
- alloclen += rt->dst.trailer_len;
+ alloclen += alloc_extra;
if (transhdrlen) {
- skb = sock_alloc_send_skb(sk,
- alloclen + hh_len + 15,
+ skb = sock_alloc_send_skb(sk, alloclen,
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
- skb = alloc_skb(alloclen + hh_len + 15,
+ skb = alloc_skb(alloclen,
sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;
@@ -1431,9 +1443,19 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
cork->dst = NULL;
skb_dst_set(skb, &rt->dst);
- if (iph->protocol == IPPROTO_ICMP)
- icmp_out_count(net, ((struct icmphdr *)
- skb_transport_header(skb))->type);
+ if (iph->protocol == IPPROTO_ICMP) {
+ u8 icmp_type;
+
+ /* For such sockets, transhdrlen is zero when do ip_append_data(),
+ * so icmphdr does not in skb linear region and can not get icmp_type
+ * by icmp_hdr(skb)->type.
+ */
+ if (sk->sk_type == SOCK_RAW && !inet_sk(sk)->hdrincl)
+ icmp_type = fl4->fl4_icmp_type;
+ else
+ icmp_type = icmp_hdr(skb)->type;
+ icmp_out_count(net, icmp_type);
+ }
ip_cork_release(cork);
out:
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 82f341e84fae..fbf39077fc54 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -316,7 +316,14 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
ipc->tos = val;
ipc->priority = rt_tos2priority(ipc->tos);
break;
-
+ case IP_PROTOCOL:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
+ return -EINVAL;
+ val = *(int *)CMSG_DATA(cmsg);
+ if (val < 1 || val > 255)
+ return -EINVAL;
+ ipc->protocol = val;
+ break;
default:
return -EINVAL;
}
@@ -1522,6 +1529,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_MINTTL:
val = inet->min_ttl;
break;
+ case IP_PROTOCOL:
+ val = inet_sk(sk)->inet_num;
+ break;
default:
release_sock(sk);
return -ENOPROTOOPT;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 375d0e516d85..9c2381cf675d 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -330,7 +330,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
}
dev->needed_headroom = t_hlen + hlen;
- mtu -= (dev->hard_header_len + t_hlen);
+ mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
if (mtu < IPV4_MIN_MTU)
mtu = IPV4_MIN_MTU;
@@ -360,7 +360,10 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
nt = netdev_priv(dev);
t_hlen = nt->hlen + sizeof(struct iphdr);
dev->min_mtu = ETH_MIN_MTU;
- dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
+ dev->max_mtu = IP_MAX_MTU - t_hlen;
+ if (dev->type == ARPHRD_ETHER)
+ dev->max_mtu -= dev->hard_header_len;
+
ip_tunnel_add(itn, nt);
return nt;
@@ -502,14 +505,18 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
const struct iphdr *inner_iph)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
+ int pkt_size;
int mtu;
- if (df)
- mtu = dst_mtu(&rt->dst) - dev->hard_header_len
- - sizeof(struct iphdr) - tunnel->hlen;
- else
+ pkt_size = skb->len - tunnel->hlen;
+ pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
+
+ if (df) {
+ mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel->hlen);
+ mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
+ } else {
mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+ }
skb_dst_update_pmtu_no_confirm(skb, mtu);
@@ -602,10 +609,10 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
else if (skb->protocol == htons(ETH_P_IP))
df = inner_iph->frag_off & htons(IP_DF);
headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
- if (headroom > dev->needed_headroom)
- dev->needed_headroom = headroom;
+ if (headroom > READ_ONCE(dev->needed_headroom))
+ WRITE_ONCE(dev->needed_headroom, headroom);
- if (skb_cow_head(skb, dev->needed_headroom)) {
+ if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
ip_rt_put(rt);
goto tx_dropped;
}
@@ -736,7 +743,11 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
- if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
+ df = tnl_params->frag_off;
+ if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
+ df |= (inner_iph->frag_off & htons(IP_DF));
+
+ if (tnl_update_pmtu(dev, skb, rt, df, inner_iph)) {
ip_rt_put(rt);
goto tx_error;
}
@@ -764,16 +775,12 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
ttl = ip4_dst_hoplimit(&rt->dst);
}
- df = tnl_params->frag_off;
- if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
- df |= (inner_iph->frag_off&htons(IP_DF));
-
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
- if (max_headroom > dev->needed_headroom)
- dev->needed_headroom = max_headroom;
+ if (max_headroom > READ_ONCE(dev->needed_headroom))
+ WRITE_ONCE(dev->needed_headroom, max_headroom);
- if (skb_cow_head(skb, dev->needed_headroom)) {
+ if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
ip_rt_put(rt);
dev->stats.tx_dropped++;
kfree_skb(skb);
@@ -935,7 +942,10 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
- int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
+ int max_mtu = IP_MAX_MTU - t_hlen;
+
+ if (dev->type == ARPHRD_ETHER)
+ max_mtu -= dev->hard_header_len;
if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
@@ -1112,10 +1122,12 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
mtu = ip_tunnel_bind_dev(dev);
if (tb[IFLA_MTU]) {
- unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen;
+ unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
+
+ if (dev->type == ARPHRD_ETHER)
+ max -= dev->hard_header_len;
- mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
- (unsigned int)(max - sizeof(struct iphdr)));
+ mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
}
err = dev_set_mtu(dev, mtu);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 15c71b08c2df..a3536dfe9b16 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -319,12 +319,12 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
switch (skb->protocol) {
case htons(ETH_P_IP):
- xfrm_decode_session(skb, &fl, AF_INET);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ xfrm_decode_session(skb, &fl, AF_INET);
break;
case htons(ETH_P_IPV6):
- xfrm_decode_session(skb, &fl, AF_INET6);
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ xfrm_decode_session(skb, &fl, AF_INET6);
break;
default:
goto tx_err;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 88212615bf4c..58719b9635d9 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -866,7 +866,7 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d
/*
- * Copy BOOTP-supplied string if not already set.
+ * Copy BOOTP-supplied string
*/
static int __init ic_bootp_string(char *dest, char *src, int len, int max)
{
@@ -915,12 +915,15 @@ static void __init ic_do_bootp_ext(u8 *ext)
}
break;
case 12: /* Host name */
- ic_bootp_string(utsname()->nodename, ext+1, *ext,
- __NEW_UTS_LEN);
- ic_host_name_set = 1;
+ if (!ic_host_name_set) {
+ ic_bootp_string(utsname()->nodename, ext+1, *ext,
+ __NEW_UTS_LEN);
+ ic_host_name_set = 1;
+ }
break;
case 15: /* Domain name (DNS) */
- ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain));
+ if (!ic_domain[0])
+ ic_bootp_string(ic_domain, ext+1, *ext, sizeof(ic_domain));
break;
case 17: /* Root path */
if (!root_server_path[0])
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index d235478d9ca3..2085af224a41 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -265,7 +265,9 @@ static int __net_init ipmr_rules_init(struct net *net)
return 0;
err2:
+ rtnl_lock();
ipmr_free_table(mrt);
+ rtnl_unlock();
err1:
fib_rules_unregister(ops);
return err;
diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c
index 04311f7067e2..9a6b01d85cd0 100644
--- a/net/ipv4/metrics.c
+++ b/net/ipv4/metrics.c
@@ -1,4 +1,5 @@
#include <linux/netlink.h>
+#include <linux/nospec.h>
#include <linux/rtnetlink.h>
#include <linux/types.h>
#include <net/ip.h>
@@ -24,6 +25,7 @@ int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len,
if (type > RTAX_MAX)
return -EINVAL;
+ type = array_index_nospec(type, RTAX_MAX + 1);
if (type == RTAX_CC_ALGO) {
char tmp[TCP_CA_NAME_MAX];
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 8d2e5dc9a827..3d670d5aea34 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -17,17 +17,19 @@
#include <net/netfilter/nf_queue.h>
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_type)
+int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int addr_type)
{
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
struct flowi4 fl4 = {};
__be32 saddr = iph->saddr;
- const struct sock *sk = skb_to_full_sk(skb);
- __u8 flags = sk ? inet_sk_flowi_flags(sk) : 0;
+ __u8 flags;
struct net_device *dev = skb_dst(skb)->dev;
unsigned int hh_len;
+ sk = sk_to_full_sk(sk);
+ flags = sk ? inet_sk_flowi_flags(sk) : 0;
+
if (addr_type == RTN_UNSPEC)
addr_type = inet_addr_type_dev_table(net, dev, saddr);
if (addr_type == RTN_LOCAL || addr_type == RTN_UNICAST)
@@ -91,8 +93,8 @@ int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
skb->mark == rt_info->mark &&
iph->daddr == rt_info->daddr &&
iph->saddr == rt_info->saddr))
- return ip_route_me_harder(entry->state.net, skb,
- RTN_UNSPEC);
+ return ip_route_me_harder(entry->state.net, entry->state.sk,
+ skb, RTN_UNSPEC);
}
return 0;
}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 10d8f95eb771..fba56cd95896 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1195,6 +1195,8 @@ static int translate_compat_table(struct net *net,
if (!newinfo)
goto out_unlock;
+ memset(newinfo->entries, 0, size);
+
newinfo->number = compatr->num_entries;
for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
newinfo->hook_entry[i] = compatr->hook_entry[i];
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e77872c93c20..730a40dc829a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1433,6 +1433,8 @@ translate_compat_table(struct net *net,
if (!newinfo)
goto out_unlock;
+ memset(newinfo->entries, 0, size);
+
newinfo->number = compatr->num_entries;
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
newinfo->hook_entry[i] = compatr->hook_entry[i];
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 2fa196325988..954c96f4ddd0 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -509,8 +509,11 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
if (IS_ERR(config))
return PTR_ERR(config);
}
- } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN))
+ } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) {
+ clusterip_config_entry_put(config);
+ clusterip_config_put(config);
return -EINVAL;
+ }
ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0) {
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 690b17ef6a44..d64b1ef43c10 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -54,7 +54,7 @@ synproxy_send_tcp(struct net *net,
skb_dst_set_noref(nskb, skb_dst(skb));
nskb->protocol = htons(ETH_P_IP);
- if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
+ if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC))
goto free_nskb;
if (nfct) {
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 74b19a5c572e..088320ce77a1 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -94,7 +94,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
flow.daddr = iph->saddr;
flow.saddr = rpfilter_get_saddr(iph->daddr);
flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
- flow.flowi4_tos = RT_TOS(iph->tos);
+ flow.flowi4_tos = iph->tos & IPTOS_RT_MASK;
flow.flowi4_scope = RT_SCOPE_UNIVERSE;
flow.flowi4_oif = l3mdev_master_ifindex_rcu(xt_in(par));
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index dea138ca8925..0829f46ddfdd 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -65,7 +65,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
iph->daddr != daddr ||
skb->mark != mark ||
iph->tos != tos) {
- err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
+ err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
if (err < 0)
ret = NF_DROP_ERR(err);
}
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
index df5c2a2061a4..19fff2c589fa 100644
--- a/net/ipv4/netfilter/nf_log_arp.c
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -46,16 +46,31 @@ static void dump_arp_packet(struct nf_log_buf *m,
const struct nf_loginfo *info,
const struct sk_buff *skb, unsigned int nhoff)
{
- const struct arphdr *ah;
- struct arphdr _arph;
const struct arppayload *ap;
struct arppayload _arpp;
+ const struct arphdr *ah;
+ unsigned int logflags;
+ struct arphdr _arph;
ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
if (ah == NULL) {
nf_log_buf_add(m, "TRUNCATED");
return;
}
+
+ if (info->type == NF_LOG_TYPE_LOG)
+ logflags = info->u.log.logflags;
+ else
+ logflags = NF_LOG_DEFAULT_MASK;
+
+ if (logflags & NF_LOG_MACDECODE) {
+ nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
+ eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
+ nf_log_dump_vlan(m, skb);
+ nf_log_buf_add(m, "MACPROTO=%04x ",
+ ntohs(eth_hdr(skb)->h_proto));
+ }
+
nf_log_buf_add(m, "ARP HTYPE=%d PTYPE=0x%04x OPCODE=%d",
ntohs(ah->ar_hrd), ntohs(ah->ar_pro), ntohs(ah->ar_op));
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index 1e6f28c97d3a..cde1918607e9 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -287,8 +287,10 @@ static void dump_ipv4_mac_header(struct nf_log_buf *m,
switch (dev->type) {
case ARPHRD_ETHER:
- nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
- eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
+ nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
+ eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
+ nf_log_dump_vlan(m, skb);
+ nf_log_buf_add(m, "MACPROTO=%04x ",
ntohs(eth_hdr(skb)->h_proto));
return;
default:
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 6115bf1ff6f0..6a27766b7d0f 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -329,7 +329,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
if (ct->tuplehash[dir].tuple.dst.u3.ip !=
ct->tuplehash[!dir].tuple.src.u3.ip) {
- err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
+ err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
if (err < 0)
ret = NF_DROP_ERR(err);
}
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 5cd06ba3535d..4996db1f64a1 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -129,7 +129,7 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
ip4_dst_hoplimit(skb_dst(nskb)));
nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
- if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
+ if (ip_route_me_harder(net, nskb->sk, nskb, RTN_UNSPEC))
goto free_nskb;
niph = ip_hdr(nskb);
diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c
index 4824b1e183a1..bff2b85c5fd6 100644
--- a/net/ipv4/netfilter/nf_socket_ipv4.c
+++ b/net/ipv4/netfilter/nf_socket_ipv4.c
@@ -96,11 +96,11 @@ nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff,
struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb,
const struct net_device *indev)
{
- __be32 uninitialized_var(daddr), uninitialized_var(saddr);
- __be16 uninitialized_var(dport), uninitialized_var(sport);
+ __be32 daddr, saddr;
+ __be16 dport, sport;
const struct iphdr *iph = ip_hdr(skb);
struct sk_buff *data_skb = NULL;
- u8 uninitialized_var(protocol);
+ u8 protocol;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
enum ip_conntrack_info ctinfo;
struct nf_conn const *ct;
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index 7d82934c46f4..61003768e52b 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -50,7 +50,7 @@ static unsigned int nf_route_table_hook(void *priv,
iph->daddr != daddr ||
skb->mark != mark ||
iph->tos != tos) {
- err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
+ err = ip_route_me_harder(state->net, state->sk, skb, RTN_UNSPEC);
if (err < 0)
ret = NF_DROP_ERR(err);
}
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index 0af3d8df70dd..157bca240edc 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -16,8 +16,8 @@
#include <net/netfilter/ipv4/nf_dup_ipv4.h>
struct nft_dup_ipv4 {
- enum nft_registers sreg_addr:8;
- enum nft_registers sreg_dev:8;
+ u8 sreg_addr;
+ u8 sreg_dev;
};
static void nft_dup_ipv4_eval(const struct nft_expr *expr,
@@ -43,16 +43,16 @@ static int nft_dup_ipv4_init(const struct nft_ctx *ctx,
if (tb[NFTA_DUP_SREG_ADDR] == NULL)
return -EINVAL;
- priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]);
- err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in_addr));
+ err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
+ sizeof(struct in_addr));
if (err < 0)
return err;
- if (tb[NFTA_DUP_SREG_DEV] != NULL) {
- priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
- return nft_validate_register_load(priv->sreg_dev, sizeof(int));
- }
- return 0;
+ if (tb[NFTA_DUP_SREG_DEV])
+ err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV],
+ &priv->sreg_dev, sizeof(int));
+
+ return err;
}
static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index e50976e3c213..3b2e8ac45d4e 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -95,6 +95,9 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
else
oif = NULL;
+ if (priv->flags & NFTA_FIB_F_IIF)
+ fl4.flowi4_oif = l3mdev_master_ifindex_rcu(oif);
+
if (nft_hook(pkt) == NF_INET_PRE_ROUTING &&
nft_fib_is_loopback(pkt->skb, nft_in(pkt))) {
nft_fib_store_result(dest, priv, pkt,
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index c59144502ee8..2e7abad48d83 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -177,16 +177,22 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
struct sock *sk = NULL;
struct inet_sock *isk;
struct hlist_nulls_node *hnode;
- int dif = skb->dev->ifindex;
+ int dif, sdif;
if (skb->protocol == htons(ETH_P_IP)) {
+ dif = inet_iif(skb);
+ sdif = inet_sdif(skb);
pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
(int)ident, &ip_hdr(skb)->daddr, dif);
#if IS_ENABLED(CONFIG_IPV6)
} else if (skb->protocol == htons(ETH_P_IPV6)) {
+ dif = inet6_iif(skb);
+ sdif = inet6_sdif(skb);
pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n",
(int)ident, &ipv6_hdr(skb)->daddr, dif);
#endif
+ } else {
+ return NULL;
}
read_lock_bh(&ping_table.lock);
@@ -225,7 +231,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
continue;
}
- if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
+ if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
+ sk->sk_bound_dev_if != sdif)
continue;
sock_hold(sk);
@@ -302,6 +309,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
struct net *net = sock_net(sk);
if (sk->sk_family == AF_INET) {
struct sockaddr_in *addr = (struct sockaddr_in *) uaddr;
+ u32 tb_id = RT_TABLE_LOCAL;
int chk_addr_ret;
if (addr_len < sizeof(*addr))
@@ -315,7 +323,8 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
- chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
+ tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
+ chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
chk_addr_ret = RTN_LOCAL;
@@ -356,6 +365,14 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
return -ENODEV;
}
}
+
+ if (!dev && sk->sk_bound_dev_if) {
+ dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+ if (!dev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+ }
has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev,
scoped);
rcu_read_unlock();
@@ -968,6 +985,7 @@ bool ping_rcv(struct sk_buff *skb)
struct sock *sk;
struct net *net = dev_net(skb->dev);
struct icmphdr *icmph = icmp_hdr(skb);
+ bool rc = false;
/* We assume the packet has already been checked by icmp_rcv */
@@ -982,14 +1000,15 @@ bool ping_rcv(struct sk_buff *skb)
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
pr_debug("rcv on socket %p\n", sk);
- if (skb2)
- ping_queue_rcv_skb(sk, skb2);
+ if (skb2 && !ping_queue_rcv_skb(sk, skb2))
+ rc = true;
sock_put(sk);
- return true;
}
- pr_debug("no socket, dropping\n");
- return false;
+ if (!rc)
+ pr_debug("no socket, dropping\n");
+
+ return rc;
}
EXPORT_SYMBOL_GPL(ping_rcv);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 21800979ed62..8ad120c07096 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -391,7 +391,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
skb->ip_summed = CHECKSUM_NONE;
- sock_tx_timestamp(sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
+ skb_setup_tx_timestamp(skb, sockc->tsflags);
if (flags & MSG_CONFIRM)
skb_set_dst_pending_confirm(skb, 1);
@@ -563,6 +563,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
ipcm_init_sk(&ipc, inet);
+ /* Keep backward compat */
+ if (hdrincl)
+ ipc.protocol = IPPROTO_RAW;
if (msg->msg_controllen) {
err = ip_cmsg_send(sk, msg, &ipc, false);
@@ -630,7 +633,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE,
- hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+ hdrincl ? ipc.protocol : sk->sk_protocol,
inet_sk_flowi_flags(sk) |
(hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0, sk->sk_uid);
@@ -725,6 +728,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
int ret = -EINVAL;
int chk_addr_ret;
+ lock_sock(sk);
if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
goto out;
@@ -744,7 +748,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_saddr = 0; /* Use device */
sk_dst_reset(sk);
ret = 0;
-out: return ret;
+out:
+ release_sock(sk);
+ return ret;
}
/*
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 3db428242b22..f4d41ceef946 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -70,6 +70,7 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/bootmem.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
@@ -470,8 +471,10 @@ static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr)
__ipv4_confirm_neigh(dev, *(__force u32 *)pkey);
}
-#define IP_IDENTS_SZ 2048u
-
+/* Hash tables of size 2048..262144 depending on RAM size.
+ * Each bucket uses 8 bytes.
+ */
+static u32 ip_idents_mask __read_mostly;
static atomic_t *ip_idents __read_mostly;
static u32 *ip_tstamps __read_mostly;
@@ -481,12 +484,16 @@ static u32 *ip_tstamps __read_mostly;
*/
u32 ip_idents_reserve(u32 hash, int segs)
{
- u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
- atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
- u32 old = READ_ONCE(*p_tstamp);
- u32 now = (u32)jiffies;
+ u32 bucket, old, now = (u32)jiffies;
+ atomic_t *p_id;
+ u32 *p_tstamp;
u32 delta = 0;
+ bucket = hash & ip_idents_mask;
+ p_tstamp = ip_tstamps + bucket;
+ p_id = ip_idents + bucket;
+ old = READ_ONCE(*p_tstamp);
+
if (old != now && cmpxchg(p_tstamp, old, now) == old)
delta = prandom_u32_max(now - old);
@@ -597,28 +604,35 @@ static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
}
}
-static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
+static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash)
{
- struct fib_nh_exception *fnhe, *oldest;
+ struct fib_nh_exception __rcu **fnhe_p, **oldest_p;
+ struct fib_nh_exception *fnhe, *oldest = NULL;
- oldest = rcu_dereference(hash->chain);
- for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
- fnhe = rcu_dereference(fnhe->fnhe_next)) {
- if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
+ for (fnhe_p = &hash->chain; ; fnhe_p = &fnhe->fnhe_next) {
+ fnhe = rcu_dereference_protected(*fnhe_p,
+ lockdep_is_held(&fnhe_lock));
+ if (!fnhe)
+ break;
+ if (!oldest ||
+ time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) {
oldest = fnhe;
+ oldest_p = fnhe_p;
+ }
}
fnhe_flush_routes(oldest);
- return oldest;
+ *oldest_p = oldest->fnhe_next;
+ kfree_rcu(oldest, rcu);
}
-static inline u32 fnhe_hashfun(__be32 daddr)
+static u32 fnhe_hashfun(__be32 daddr)
{
- static u32 fnhe_hashrnd __read_mostly;
- u32 hval;
+ static siphash_key_t fnhe_hash_key __read_mostly;
+ u64 hval;
- net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
- hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
- return hash_32(hval, FNHE_HASH_SHIFT);
+ net_get_random_once(&fnhe_hash_key, sizeof(fnhe_hash_key));
+ hval = siphash_1u32((__force u32)daddr, &fnhe_hash_key);
+ return hash_64(hval, FNHE_HASH_SHIFT);
}
static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
@@ -685,16 +699,21 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
if (rt)
fill_route_from_fnhe(rt, fnhe);
} else {
- if (depth > FNHE_RECLAIM_DEPTH)
- fnhe = fnhe_oldest(hash);
- else {
- fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
- if (!fnhe)
- goto out_unlock;
-
- fnhe->fnhe_next = hash->chain;
- rcu_assign_pointer(hash->chain, fnhe);
+ /* Randomize max depth to avoid some side channels attacks. */
+ int max_depth = FNHE_RECLAIM_DEPTH +
+ prandom_u32_max(FNHE_RECLAIM_DEPTH);
+
+ while (depth > max_depth) {
+ fnhe_remove_oldest(hash);
+ depth--;
}
+
+ fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
+ if (!fnhe)
+ goto out_unlock;
+
+ fnhe->fnhe_next = hash->chain;
+
fnhe->fnhe_genid = genid;
fnhe->fnhe_daddr = daddr;
fnhe->fnhe_gw = gw;
@@ -702,6 +721,8 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
fnhe->fnhe_mtu_locked = lock;
fnhe->fnhe_expires = max(1UL, expires);
+ rcu_assign_pointer(hash->chain, fnhe);
+
/* Exception created; mark the cached routes for the nexthop
* stale, so anyone caching it rechecks if this exception
* applies to them.
@@ -770,7 +791,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
goto reject_redirect;
}
- n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
+ n = __ipv4_neigh_lookup(rt->dst.dev, (__force u32)new_gw);
if (!n)
n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
if (!IS_ERR(n)) {
@@ -1194,6 +1215,7 @@ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
static void ipv4_send_dest_unreach(struct sk_buff *skb)
{
+ struct net_device *dev;
struct ip_options opt;
int res;
@@ -1211,7 +1233,8 @@ static void ipv4_send_dest_unreach(struct sk_buff *skb)
opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
rcu_read_lock();
- res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
+ dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev;
+ res = __ip_options_compile(dev_net(dev), &opt, skb, NULL);
rcu_read_unlock();
if (res)
@@ -1311,7 +1334,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
mtu = dst_metric_raw(dst, RTAX_MTU);
if (mtu)
- return mtu;
+ goto out;
mtu = READ_ONCE(dst->dev->mtu);
@@ -1320,6 +1343,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
mtu = 576;
}
+out:
mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
@@ -1397,7 +1421,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
struct net_device *dev = nh->nh_dev;
u32 mtu = 0;
- if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
+ if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) ||
fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
mtu = fi->fib_mtu;
@@ -1706,6 +1730,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
#endif
RT_CACHE_STAT_INC(in_slow_mc);
+ skb_dst_drop(skb);
skb_dst_set(skb, &rth->dst);
return 0;
}
@@ -2634,10 +2659,12 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
if (IS_ERR(rt))
return rt;
- if (flp4->flowi4_proto)
+ if (flp4->flowi4_proto) {
+ flp4->flowi4_oif = rt->dst.dev->ifindex;
rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
flowi4_to_flowi(flp4),
sk, 0);
+ }
return rt;
}
@@ -2791,7 +2818,7 @@ static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst,
udph = skb_put_zero(skb, sizeof(struct udphdr));
udph->source = sport;
udph->dest = dport;
- udph->len = sizeof(struct udphdr);
+ udph->len = htons(sizeof(struct udphdr));
udph->check = 0;
break;
}
@@ -2874,7 +2901,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = dst;
fl4.saddr = src;
- fl4.flowi4_tos = rtm->rtm_tos;
+ fl4.flowi4_tos = rtm->rtm_tos & IPTOS_RT_MASK;
fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
fl4.flowi4_mark = mark;
fl4.flowi4_uid = uid;
@@ -2898,8 +2925,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fl4.flowi4_iif = iif; /* for rt_fill_info */
skb->dev = dev;
skb->mark = mark;
- err = ip_route_input_rcu(skb, dst, src, rtm->rtm_tos,
- dev, &res);
+ err = ip_route_input_rcu(skb, dst, src,
+ rtm->rtm_tos & IPTOS_RT_MASK, dev,
+ &res);
rt = skb_rtable(skb);
if (err == 0 && rt->dst.error)
@@ -3194,18 +3222,25 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
int __init ip_rt_init(void)
{
+ void *idents_hash;
int cpu;
- ip_idents = kmalloc_array(IP_IDENTS_SZ, sizeof(*ip_idents),
- GFP_KERNEL);
- if (!ip_idents)
- panic("IP: failed to allocate ip_idents\n");
+ /* For modern hosts, this will use 2 MB of memory */
+ idents_hash = alloc_large_system_hash("IP idents",
+ sizeof(*ip_idents) + sizeof(*ip_tstamps),
+ 0,
+ 16, /* one bucket per 64 KB */
+ HASH_ZERO,
+ NULL,
+ &ip_idents_mask,
+ 2048,
+ 256*1024);
+
+ ip_idents = idents_hash;
- prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
+ prandom_bytes(ip_idents, (ip_idents_mask + 1) * sizeof(*ip_idents));
- ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
- if (!ip_tstamps)
- panic("IP: failed to allocate ip_tstamps\n");
+ ip_tstamps = idents_hash + (ip_idents_mask + 1) * sizeof(*ip_idents);
for_each_possible_cpu(cpu) {
struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index f66b2e6d97a7..929f989de1f6 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -296,7 +296,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
__u32 cookie = ntohl(th->ack_seq) - 1;
struct sock *ret = sk;
struct request_sock *req;
- int mss;
+ int full_space, mss;
struct rtable *rt;
__u8 rcv_wscale;
struct flowi4 fl4;
@@ -337,6 +337,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq = inet_rsk(req);
treq = tcp_rsk(req);
+ treq->af_specific = &tcp_request_sock_ipv4_ops;
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
treq->ts_off = 0;
@@ -391,8 +392,13 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* Try to redo what tcp_v4_send_synack did. */
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
+ /* limit the window selection if the user enforce a smaller rx buffer */
+ full_space = tcp_full_space(sk);
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
+ (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
+ req->rsk_window_clamp = full_space;
- tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
+ tcp_select_initial_window(sk, full_space, req->mss,
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(&rt->dst, RTAX_INITRWND));
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index ad132b6e8cfa..c97ba2a44b8b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -332,61 +332,6 @@ bad_key:
return ret;
}
-static void proc_configure_early_demux(int enabled, int protocol)
-{
- struct net_protocol *ipprot;
-#if IS_ENABLED(CONFIG_IPV6)
- struct inet6_protocol *ip6prot;
-#endif
-
- rcu_read_lock();
-
- ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot)
- ipprot->early_demux = enabled ? ipprot->early_demux_handler :
- NULL;
-
-#if IS_ENABLED(CONFIG_IPV6)
- ip6prot = rcu_dereference(inet6_protos[protocol]);
- if (ip6prot)
- ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
- NULL;
-#endif
- rcu_read_unlock();
-}
-
-static int proc_tcp_early_demux(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- int ret = 0;
-
- ret = proc_dointvec(table, write, buffer, lenp, ppos);
-
- if (write && !ret) {
- int enabled = init_net.ipv4.sysctl_tcp_early_demux;
-
- proc_configure_early_demux(enabled, IPPROTO_TCP);
- }
-
- return ret;
-}
-
-static int proc_udp_early_demux(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- int ret = 0;
-
- ret = proc_dointvec(table, write, buffer, lenp, ppos);
-
- if (write && !ret) {
- int enabled = init_net.ipv4.sysctl_udp_early_demux;
-
- proc_configure_early_demux(enabled, IPPROTO_UDP);
- }
-
- return ret;
-}
-
static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
int write,
void __user *buffer,
@@ -638,14 +583,14 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_udp_early_demux,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_udp_early_demux
+ .proc_handler = proc_douintvec_minmax,
},
{
.procname = "tcp_early_demux",
.data = &init_net.ipv4.sysctl_tcp_early_demux,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_tcp_early_demux
+ .proc_handler = proc_douintvec_minmax,
},
{
.procname = "ip_default_ttl",
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4ce3397e6fcf..00648a478c6a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -436,7 +436,7 @@ void tcp_init_sock(struct sock *sk)
tp->snd_cwnd_clamp = ~0;
tp->mss_cache = TCP_MSS_DEFAULT;
- tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
+ tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering);
tcp_assign_congestion_control(sk);
tp->tsoffset = 0;
@@ -495,6 +495,8 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
return true;
if (tcp_rmem_pressure(sk))
return true;
+ if (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss)
+ return true;
}
if (sk->sk_prot->stream_memory_read)
return sk->sk_prot->stream_memory_read(sk);
@@ -513,6 +515,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
__poll_t mask;
struct sock *sk = sock->sk;
const struct tcp_sock *tp = tcp_sk(sk);
+ u8 shutdown;
int state;
sock_poll_wait(file, sock, wait);
@@ -555,9 +558,10 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
* NOTE. Check for TCP_CLOSE is added. The goal is to prevent
* blocking on fresh not-connected or disconnected socket. --ANK
*/
- if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
+ shutdown = READ_ONCE(sk->sk_shutdown);
+ if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
mask |= EPOLLHUP;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
+ if (shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
/* Connected or passive Fast Open socket? */
@@ -565,7 +569,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
(state != TCP_SYN_RECV || tp->fastopen_rsk)) {
int target = sock_rcvlowat(sk, 0, INT_MAX);
- if (tp->urg_seq == tp->copied_seq &&
+ if (tp->urg_seq == READ_ONCE(tp->copied_seq) &&
!sock_flag(sk, SOCK_URGINLINE) &&
tp->urg_data)
target++;
@@ -573,8 +577,8 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
if (tcp_stream_is_readable(tp, target, sk))
mask |= EPOLLIN | EPOLLRDNORM;
- if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
- if (sk_stream_is_writeable(sk)) {
+ if (!(shutdown & SEND_SHUTDOWN)) {
+ if (__sk_stream_is_writeable(sk, 1)) {
mask |= EPOLLOUT | EPOLLWRNORM;
} else { /* send SIGIO later */
sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
@@ -586,7 +590,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
* pairs with the input side.
*/
smp_mb__after_atomic();
- if (sk_stream_is_writeable(sk))
+ if (__sk_stream_is_writeable(sk, 1))
mask |= EPOLLOUT | EPOLLWRNORM;
}
} else
@@ -626,7 +630,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
unlock_sock_fast(sk, slow);
break;
case SIOCATMARK:
- answ = tp->urg_data && tp->urg_seq == tp->copied_seq;
+ answ = tp->urg_data && tp->urg_seq == READ_ONCE(tp->copied_seq);
break;
case SIOCOUTQ:
if (sk->sk_state == TCP_LISTEN)
@@ -635,7 +639,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
answ = 0;
else
- answ = tp->write_seq - tp->snd_una;
+ answ = READ_ONCE(tp->write_seq) - tp->snd_una;
break;
case SIOCOUTQNSD:
if (sk->sk_state == TCP_LISTEN)
@@ -644,7 +648,7 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
answ = 0;
else
- answ = tp->write_seq - tp->snd_nxt;
+ answ = READ_ONCE(tp->write_seq) - tp->snd_nxt;
break;
default:
return -ENOIOCTLCMD;
@@ -704,7 +708,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
int size_goal)
{
return skb->len < size_goal &&
- sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) &&
!tcp_rtx_queue_empty(sk) &&
refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
}
@@ -950,7 +954,7 @@ static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
*/
static void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb)
{
- if (skb && !skb->len) {
+ if (skb && TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
tcp_unlink_write_queue(skb, sk);
if (tcp_write_queue_empty(sk))
tcp_chrono_stop(sk, TCP_CHRONO_BUSY);
@@ -1035,7 +1039,7 @@ new_segment:
sk->sk_wmem_queued += copy;
sk_mem_charge(sk, copy);
skb->ip_summed = CHECKSUM_PARTIAL;
- tp->write_seq += copy;
+ WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
TCP_SKB_CB(skb)->end_seq += copy;
tcp_skb_pcount_set(skb, 0);
@@ -1158,7 +1162,8 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
struct sockaddr *uaddr = msg->msg_name;
int err, flags;
- if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+ if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) &
+ TFO_CLIENT_ENABLE) ||
(uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
uaddr->sa_family == AF_UNSPEC))
return -EOPNOTSUPP;
@@ -1389,7 +1394,7 @@ new_segment:
if (!copied)
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
- tp->write_seq += copy;
+ WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
TCP_SKB_CB(skb)->end_seq += copy;
tcp_skb_pcount_set(skb, 0);
@@ -1667,11 +1672,13 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
if (!copied)
copied = used;
break;
- } else if (used <= len) {
- seq += used;
- copied += used;
- offset += used;
}
+ if (WARN_ON_ONCE(used > len))
+ used = len;
+ seq += used;
+ copied += used;
+ offset += used;
+
/* If recv_actor drops the lock (e.g. TCP splice
* receive) the skb pointer might be invalid when
* getting here: tcp_collapse might have deleted it
@@ -1694,9 +1701,9 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
sk_eat_skb(sk, skb);
if (!desc->count)
break;
- tp->copied_seq = seq;
+ WRITE_ONCE(tp->copied_seq, seq);
}
- tp->copied_seq = seq;
+ WRITE_ONCE(tp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
@@ -1833,7 +1840,7 @@ static int tcp_zerocopy_receive(struct sock *sk,
out:
up_read(&current->mm->mmap_sem);
if (length) {
- tp->copied_seq = seq;
+ WRITE_ONCE(tp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
/* Clean up data we have read: This will do ACK frames. */
@@ -2110,7 +2117,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
if (urg_offset < used) {
if (!urg_offset) {
if (!sock_flag(sk, SOCK_URGINLINE)) {
- ++*seq;
+ WRITE_ONCE(*seq, *seq + 1);
urg_hole++;
offset++;
used--;
@@ -2132,7 +2139,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
}
}
- *seq += used;
+ WRITE_ONCE(*seq, *seq + used);
copied += used;
len -= used;
@@ -2161,7 +2168,7 @@ skip_copy:
found_fin_ok:
/* Process the FIN. */
- ++*seq;
+ WRITE_ONCE(*seq, *seq + 1);
if (!(flags & MSG_PEEK))
sk_eat_skb(sk, skb);
break;
@@ -2333,14 +2340,13 @@ bool tcp_check_oom(struct sock *sk, int shift)
return too_many_orphans || out_of_socket_memory;
}
-void tcp_close(struct sock *sk, long timeout)
+void __tcp_close(struct sock *sk, long timeout)
{
struct sk_buff *skb;
int data_was_unread = 0;
int state;
- lock_sock(sk);
- sk->sk_shutdown = SHUTDOWN_MASK;
+ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
if (sk->sk_state == TCP_LISTEN) {
tcp_set_state(sk, TCP_CLOSE);
@@ -2500,6 +2506,12 @@ adjudge_to_death:
out:
bh_unlock_sock(sk);
local_bh_enable();
+}
+
+void tcp_close(struct sock *sk, long timeout)
+{
+ lock_sock(sk);
+ __tcp_close(sk, timeout);
release_sock(sk);
sock_put(sk);
}
@@ -2554,6 +2566,13 @@ int tcp_disconnect(struct sock *sk, int flags)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
int old_state = sk->sk_state;
+ u32 seq;
+
+ /* Deny disconnect if other threads are blocked in sk_wait_event()
+ * or inet_wait_for_connect().
+ */
+ if (sk->sk_wait_pending)
+ return -EBUSY;
if (old_state != TCP_CLOSE)
tcp_set_state(sk, TCP_CLOSE);
@@ -2576,7 +2595,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_clear_xmit_timers(sk);
__skb_queue_purge(&sk->sk_receive_queue);
- tp->copied_seq = tp->rcv_nxt;
+ WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
tp->urg_data = 0;
tcp_write_queue_purge(sk);
tcp_fastopen_active_disable_ofo_check(sk);
@@ -2587,17 +2606,22 @@ int tcp_disconnect(struct sock *sk, int flags)
if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
inet_reset_saddr(sk);
- sk->sk_shutdown = 0;
+ WRITE_ONCE(sk->sk_shutdown, 0);
sock_reset_flag(sk, SOCK_DONE);
tp->srtt_us = 0;
tp->rcv_rtt_last_tsecr = 0;
- tp->write_seq += tp->max_window + 2;
- if (tp->write_seq == 0)
- tp->write_seq = 1;
+
+ seq = tp->write_seq + tp->max_window + 2;
+ if (!seq)
+ seq = 1;
+ WRITE_ONCE(tp->write_seq, seq);
+
tp->snd_cwnd = 2;
icsk->icsk_probes_out = 0;
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tp->snd_cwnd_cnt = 0;
+ tp->is_cwnd_limited = 0;
+ tp->max_packets_out = 0;
tp->window_clamp = 0;
tp->delivered = 0;
tp->delivered_ce = 0;
@@ -2615,8 +2639,7 @@ int tcp_disconnect(struct sock *sk, int flags)
icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
__sk_dst_reset(sk);
- dst_release(sk->sk_rx_dst);
- sk->sk_rx_dst = NULL;
+ dst_release(xchg((__force struct dst_entry **)&sk->sk_rx_dst, NULL));
tcp_saved_syn_free(tp);
tp->compressed_ack = 0;
tp->segs_in = 0;
@@ -2880,16 +2903,23 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
case TCP_QUEUE_SEQ:
- if (sk->sk_state != TCP_CLOSE)
+ if (sk->sk_state != TCP_CLOSE) {
err = -EPERM;
- else if (tp->repair_queue == TCP_SEND_QUEUE)
- tp->write_seq = val;
- else if (tp->repair_queue == TCP_RECV_QUEUE) {
- WRITE_ONCE(tp->rcv_nxt, val);
- WRITE_ONCE(tp->copied_seq, val);
- }
- else
+ } else if (tp->repair_queue == TCP_SEND_QUEUE) {
+ if (!tcp_rtx_queue_empty(sk))
+ err = -EPERM;
+ else
+ WRITE_ONCE(tp->write_seq, val);
+ } else if (tp->repair_queue == TCP_RECV_QUEUE) {
+ if (tp->rcv_nxt != tp->copied_seq) {
+ err = -EPERM;
+ } else {
+ WRITE_ONCE(tp->rcv_nxt, val);
+ WRITE_ONCE(tp->copied_seq, val);
+ }
+ } else {
err = -EINVAL;
+ }
break;
case TCP_REPAIR_OPTIONS:
@@ -2970,18 +3000,18 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_LINGER2:
if (val < 0)
- tp->linger2 = -1;
- else if (val > net->ipv4.sysctl_tcp_fin_timeout / HZ)
- tp->linger2 = 0;
+ WRITE_ONCE(tp->linger2, -1);
+ else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
+ WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX);
else
- tp->linger2 = val * HZ;
+ WRITE_ONCE(tp->linger2, val * HZ);
break;
case TCP_DEFER_ACCEPT:
/* Translate value in seconds to number of retransmits */
- icsk->icsk_accept_queue.rskq_defer_accept =
- secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
- TCP_RTO_MAX / HZ);
+ WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept,
+ secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
+ TCP_RTO_MAX / HZ));
break;
case TCP_WINDOW_CLAMP:
@@ -3041,7 +3071,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_FASTOPEN_CONNECT:
if (val > 1 || val < 0) {
err = -EINVAL;
- } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
+ } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) &
+ TFO_CLIENT_ENABLE) {
if (sk->sk_state == TCP_CLOSE)
tp->fastopen_connect = val;
else
@@ -3068,7 +3099,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
err = tcp_repair_set_window(tp, optval, optlen);
break;
case TCP_NOTSENT_LOWAT:
- tp->notsent_lowat = val;
+ WRITE_ONCE(tp->notsent_lowat, val);
sk->sk_write_space(sk);
break;
case TCP_INQ:
@@ -3345,7 +3376,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
switch (optname) {
case TCP_MAXSEG:
val = tp->mss_cache;
- if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
+ if (tp->rx_opt.user_mss &&
+ ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
val = tp->rx_opt.user_mss;
if (tp->repair)
val = tp->rx_opt.mss_clamp;
@@ -3369,13 +3401,14 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
break;
case TCP_LINGER2:
- val = tp->linger2;
+ val = READ_ONCE(tp->linger2);
if (val >= 0)
- val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ;
+ val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
break;
case TCP_DEFER_ACCEPT:
- val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
- TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
+ val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept);
+ val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ,
+ TCP_RTO_MAX / HZ);
break;
case TCP_WINDOW_CLAMP:
val = tp->window_clamp;
@@ -3521,7 +3554,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
break;
case TCP_FASTOPEN:
- val = icsk->icsk_accept_queue.fastopenq.max_qlen;
+ val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen);
break;
case TCP_FASTOPEN_CONNECT:
@@ -3536,7 +3569,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = tcp_time_stamp_raw() + tp->tsoffset;
break;
case TCP_NOTSENT_LOWAT:
- val = tp->notsent_lowat;
+ val = READ_ONCE(tp->notsent_lowat);
break;
case TCP_INQ:
val = tp->recvmsg_inq;
@@ -3673,12 +3706,16 @@ static void __tcp_alloc_md5sig_pool(void)
* to memory. See smp_rmb() in tcp_get_md5sig_pool()
*/
smp_wmb();
- tcp_md5sig_pool_populated = true;
+ /* Paired with READ_ONCE() from tcp_alloc_md5sig_pool()
+ * and tcp_get_md5sig_pool().
+ */
+ WRITE_ONCE(tcp_md5sig_pool_populated, true);
}
bool tcp_alloc_md5sig_pool(void)
{
- if (unlikely(!tcp_md5sig_pool_populated)) {
+ /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
+ if (unlikely(!READ_ONCE(tcp_md5sig_pool_populated))) {
mutex_lock(&tcp_md5sig_mutex);
if (!tcp_md5sig_pool_populated)
@@ -3686,7 +3723,8 @@ bool tcp_alloc_md5sig_pool(void)
mutex_unlock(&tcp_md5sig_mutex);
}
- return tcp_md5sig_pool_populated;
+ /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
+ return READ_ONCE(tcp_md5sig_pool_populated);
}
EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
@@ -3702,7 +3740,8 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
{
local_bh_disable();
- if (tcp_md5sig_pool_populated) {
+ /* Paired with WRITE_ONCE() from __tcp_alloc_md5sig_pool() */
+ if (READ_ONCE(tcp_md5sig_pool_populated)) {
/* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
smp_rmb();
return this_cpu_ptr(&tcp_md5sig_pool);
@@ -3778,7 +3817,7 @@ void tcp_done(struct sock *sk)
if (req)
reqsk_fastopen_remove(sk, req, false);
- sk->sk_shutdown = SHUTDOWN_MASK;
+ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_state_change(sk);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 93f176336297..1740de053072 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -917,7 +917,7 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs)
filter_expired = after(tcp_jiffies32,
bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ);
if (rs->rtt_us >= 0 &&
- (rs->rtt_us <= bbr->min_rtt_us ||
+ (rs->rtt_us < bbr->min_rtt_us ||
(filter_expired && !rs->is_ack_delayed))) {
bbr->min_rtt_us = rs->rtt_us;
bbr->min_rtt_stamp = tcp_jiffies32;
@@ -985,7 +985,7 @@ static void bbr_init(struct sock *sk)
bbr->prior_cwnd = 0;
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
bbr->rtt_cnt = 0;
- bbr->next_rtt_delivered = 0;
+ bbr->next_rtt_delivered = tp->delivered;
bbr->prev_ca_state = TCP_CA_Open;
bbr->packet_conservation = 0;
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 06fbe102a425..10daea1fcefc 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -374,6 +374,7 @@ static void tcp_cdg_init(struct sock *sk)
struct cdg *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ ca->gradients = NULL;
/* We silently fall back to window = 1 if allocation fails. */
if (window > 1)
ca->gradients = kcalloc(window, sizeof(ca->gradients[0]),
@@ -387,6 +388,7 @@ static void tcp_cdg_release(struct sock *sk)
struct cdg *ca = inet_csk_ca(sk);
kfree(ca->gradients);
+ ca->gradients = NULL;
}
static struct tcp_congestion_ops tcp_cdg __read_mostly = {
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 32c3162baf3e..533f8d84d2f7 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -196,6 +196,11 @@ static void tcp_reinit_congestion_control(struct sock *sk,
icsk->icsk_ca_setsockopt = 1;
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
+ if (ca->flags & TCP_CONG_NEEDS_ECN)
+ INET_ECN_xmit(sk);
+ else
+ INET_ECN_dontxmit(sk);
+
if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
tcp_init_congestion_control(sk);
}
@@ -223,6 +228,10 @@ int tcp_set_default_congestion_control(struct net *net, const char *name)
ret = -ENOENT;
} else if (!try_module_get(ca->owner)) {
ret = -EBUSY;
+ } else if (!net_eq(net, &init_net) &&
+ !(ca->flags & TCP_CONG_NON_RESTRICTED)) {
+ /* Only init netns can set default to a restricted algorithm */
+ ret = -EPERM;
} else {
prev = xchg(&net->ipv4.tcp_congestion_control, ca);
if (prev)
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 8b5ba0a5cd38..93530bd33247 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -340,8 +340,6 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
return;
if (tcp_in_slow_start(tp)) {
- if (hystart && after(ack, ca->end_seq))
- bictcp_hystart_reset(sk);
acked = tcp_slow_start(tp, acked);
if (!acked)
return;
@@ -383,6 +381,9 @@ static void hystart_update(struct sock *sk, u32 delay)
if (ca->found & hystart_detect)
return;
+ if (after(tp->snd_una, ca->end_seq))
+ bictcp_hystart_reset(sk);
+
if (hystart_detect & HYSTART_ACK_TRAIN) {
u32 now = bictcp_clock();
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index c9e97f304f98..2a46f9f81ba0 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -30,8 +30,9 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
} else if (sk->sk_type == SOCK_STREAM) {
const struct tcp_sock *tp = tcp_sk(sk);
- r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) - tp->copied_seq, 0);
- r->idiag_wqueue = tp->write_seq - tp->snd_una;
+ r->idiag_rqueue = max_t(int, READ_ONCE(tp->rcv_nxt) -
+ READ_ONCE(tp->copied_seq), 0);
+ r->idiag_wqueue = READ_ONCE(tp->write_seq) - tp->snd_una;
}
if (info)
tcp_get_info(sk, info);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 018a48477355..f7bb78b443fa 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -276,6 +276,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
static bool tcp_fastopen_queue_check(struct sock *sk)
{
struct fastopen_queue *fastopenq;
+ int max_qlen;
/* Make sure the listener has enabled fastopen, and we don't
* exceed the max # of pending TFO requests allowed before trying
@@ -288,10 +289,11 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
* temporarily vs a server not supporting Fast Open at all.
*/
fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
- if (fastopenq->max_qlen == 0)
+ max_qlen = READ_ONCE(fastopenq->max_qlen);
+ if (max_qlen == 0)
return false;
- if (fastopenq->qlen >= fastopenq->max_qlen) {
+ if (fastopenq->qlen >= max_qlen) {
struct request_sock *req1;
spin_lock(&fastopenq->lock);
req1 = fastopenq->rskq_rst_head;
@@ -313,7 +315,7 @@ static bool tcp_fastopen_no_cookie(const struct sock *sk,
const struct dst_entry *dst,
int flag)
{
- return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) ||
+ return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) ||
tcp_sk(sk)->fastopen_no_cookie ||
(dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE));
}
@@ -328,7 +330,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
const struct dst_entry *dst)
{
bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
- int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
struct tcp_fastopen_cookie valid_foc = { .len = -1 };
struct sock *child;
@@ -342,8 +344,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
return NULL;
}
- if (syn_data &&
- tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
+ if (tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD))
goto fastopen;
if (foc->len >= 0 && /* Client presents or requests a cookie */
@@ -454,8 +455,15 @@ void tcp_fastopen_active_disable(struct sock *sk)
{
struct net *net = sock_net(sk);
+ /* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */
+ WRITE_ONCE(net->ipv4.tfo_active_disable_stamp, jiffies);
+
+ /* Paired with smp_rmb() in tcp_fastopen_active_should_disable().
+ * We want net->ipv4.tfo_active_disable_stamp to be updated first.
+ */
+ smp_mb__before_atomic();
atomic_inc(&net->ipv4.tfo_active_disable_times);
- net->ipv4.tfo_active_disable_stamp = jiffies;
+
NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE);
}
@@ -473,10 +481,16 @@ bool tcp_fastopen_active_should_disable(struct sock *sk)
if (!tfo_da_times)
return false;
+ /* Paired with smp_mb__before_atomic() in tcp_fastopen_active_disable() */
+ smp_rmb();
+
/* Limit timout to max: 2^6 * initial timeout */
multiplier = 1 << min(tfo_da_times - 1, 6);
- timeout = multiplier * tfo_bh_timeout * HZ;
- if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout))
+
+ /* Paired with the WRITE_ONCE() in tcp_fastopen_active_disable(). */
+ timeout = READ_ONCE(sock_net(sk)->ipv4.tfo_active_disable_stamp) +
+ multiplier * tfo_bh_timeout * HZ;
+ if (time_before(jiffies, timeout))
return true;
/* Mark check bit so we can check for successful active TFO
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9813d62de631..407ad07dc598 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -172,6 +172,19 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
if (unlikely(len > icsk->icsk_ack.rcv_mss +
MAX_TCP_OPTION_SPACE))
tcp_gro_dev_warn(sk, skb, len);
+ /* If the skb has a len of exactly 1*MSS and has the PSH bit
+ * set then it is likely the end of an application write. So
+ * more data may not be arriving soon, and yet the data sender
+ * may be waiting for an ACK if cwnd-bound or using TX zero
+ * copy. So we set ICSK_ACK_PUSHED here so that
+ * tcp_cleanup_rbuf() will send an ACK immediately if the app
+ * reads all of the data and is not ping-pong. If len > MSS
+ * then this logic does not matter (and does not hurt) because
+ * tcp_cleanup_rbuf() will always ACK immediately if the app
+ * reads data and there is more than an MSS of unACKed data.
+ */
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_PSH)
+ icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
} else {
/* Otherwise, we make more careful check taking into account,
* that SACKs block is variable.
@@ -216,7 +229,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
icsk->icsk_ack.quick = quickacks;
}
-void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
+static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -224,7 +237,6 @@ void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
icsk->icsk_ack.pingpong = 0;
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
-EXPORT_SYMBOL(tcp_enter_quickack_mode);
/* Send ACKs quickly, if "quick" count is not exhausted
* and the session is not interactive.
@@ -432,14 +444,13 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
*/
void tcp_init_buffer_space(struct sock *sk)
{
- int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
+ int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);
struct tcp_sock *tp = tcp_sk(sk);
int maxwin;
if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK))
tcp_sndbuf_expand(sk);
- tp->rcvq_space.space = min_t(u32, tp->rcv_wnd, TCP_INIT_CWND * tp->advmss);
tcp_mstamp_refresh(tp);
tp->rcvq_space.time = tp->tcp_mstamp;
tp->rcvq_space.seq = tp->copied_seq;
@@ -463,6 +474,8 @@ void tcp_init_buffer_space(struct sock *sk)
tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
tp->snd_cwnd_stamp = tcp_jiffies32;
+ tp->rcvq_space.space = min3(tp->rcv_ssthresh, tp->rcv_wnd,
+ (u32)TCP_INIT_CWND * tp->advmss);
}
/* 4. Recalculate window clamp after socket hit its memory bounds. */
@@ -580,10 +593,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
u32 delta_us;
- if (!delta)
- delta = 1;
- delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
- tcp_rcv_rtt_update(tp, delta_us, 0);
+ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+ if (!delta)
+ delta = 1;
+ delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+ tcp_rcv_rtt_update(tp, delta_us, 0);
+ }
}
}
@@ -890,7 +905,7 @@ static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq,
tp->undo_marker ? tp->undo_retrans : 0);
#endif
tp->reordering = min_t(u32, (metric + mss - 1) / mss,
- sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
}
/* This exciting event is worth to be remembered. 8) */
@@ -1194,7 +1209,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
if (dup_sack && (sacked & TCPCB_RETRANS)) {
if (tp->undo_marker && tp->undo_retrans > 0 &&
after(end_seq, tp->undo_marker))
- tp->undo_retrans--;
+ tp->undo_retrans = max_t(int, 0, tp->undo_retrans - pcount);
if ((sacked & TCPCB_SACKED_ACKED) &&
before(start_seq, state->reord))
state->reord = start_seq;
@@ -1875,7 +1890,7 @@ static void tcp_check_reno_reordering(struct sock *sk, const int addend)
return;
tp->reordering = min_t(u32, tp->packets_out + addend,
- sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
tp->reord_seen++;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
}
@@ -1935,7 +1950,8 @@ static inline void tcp_init_undo(struct tcp_sock *tp)
static bool tcp_is_rack(const struct sock *sk)
{
- return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
+ return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_LOSS_DETECTION;
}
/* If we detect SACK reneging, forget all SACK information
@@ -1979,6 +1995,7 @@ void tcp_enter_loss(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
+ u8 reordering;
tcp_timeout_mark_lost(sk);
@@ -1999,10 +2016,12 @@ void tcp_enter_loss(struct sock *sk)
/* Timeout in disordered state after receiving substantial DUPACKs
* suggests that the degree of reordering is over-estimated.
*/
+ reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering);
if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
- tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
+ tp->sacked_out >= reordering)
tp->reordering = min_t(unsigned int, tp->reordering,
- net->ipv4.sysctl_tcp_reordering);
+ reordering);
+
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
tcp_ecn_queue_cwr(tp);
@@ -2011,7 +2030,7 @@ void tcp_enter_loss(struct sock *sk)
* loss recovery is underway except recurring timeout(s) on
* the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
*/
- tp->frto = net->ipv4.sysctl_tcp_frto &&
+ tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&
(new_recovery || icsk->icsk_retransmits) &&
!inet_csk(sk)->icsk_mtup.probe_size;
}
@@ -2028,7 +2047,8 @@ void tcp_enter_loss(struct sock *sk)
*/
static bool tcp_check_sack_reneging(struct sock *sk, int flag)
{
- if (flag & FLAG_SACK_RENEGING) {
+ if (flag & FLAG_SACK_RENEGING &&
+ flag & FLAG_SND_UNA_ADVANCED) {
struct tcp_sock *tp = tcp_sk(sk);
unsigned long delay = max(usecs_to_jiffies(tp->srtt_us >> 4),
msecs_to_jiffies(10));
@@ -2365,6 +2385,21 @@ static inline bool tcp_may_undo(const struct tcp_sock *tp)
return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
}
+static bool tcp_is_non_sack_preventing_reopen(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
+ /* Hold old state until something *above* high_seq
+ * is ACKed. For Reno it is MUST to prevent false
+ * fast retransmits (RFC2582). SACK TCP is safe. */
+ if (!tcp_any_retrans_done(sk))
+ tp->retrans_stamp = 0;
+ return true;
+ }
+ return false;
+}
+
/* People celebrate: "We love our President!" */
static bool tcp_try_undo_recovery(struct sock *sk)
{
@@ -2387,14 +2422,8 @@ static bool tcp_try_undo_recovery(struct sock *sk)
} else if (tp->rack.reo_wnd_persist) {
tp->rack.reo_wnd_persist--;
}
- if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) {
- /* Hold old state until something *above* high_seq
- * is ACKed. For Reno it is MUST to prevent false
- * fast retransmits (RFC2582). SACK TCP is safe. */
- if (!tcp_any_retrans_done(sk))
- tp->retrans_stamp = 0;
+ if (tcp_is_non_sack_preventing_reopen(sk))
return true;
- }
tcp_set_ca_state(sk, TCP_CA_Open);
tp->is_sack_reneg = 0;
return false;
@@ -2430,6 +2459,8 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPSPURIOUSRTOS);
inet_csk(sk)->icsk_retransmits = 0;
+ if (tcp_is_non_sack_preventing_reopen(sk))
+ return true;
if (frto_undo || tcp_is_sack(tp)) {
tcp_set_ca_state(sk, TCP_CA_Open);
tp->is_sack_reneg = 0;
@@ -2563,12 +2594,15 @@ static void tcp_mtup_probe_success(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ u64 val;
- /* FIXME: breaks with very large cwnd */
tp->prior_ssthresh = tcp_current_ssthresh(sk);
- tp->snd_cwnd = tp->snd_cwnd *
- tcp_mss_to_mtu(sk, tp->mss_cache) /
- icsk->icsk_mtup.probe_size;
+
+ val = (u64)tp->snd_cwnd * tcp_mss_to_mtu(sk, tp->mss_cache);
+ do_div(val, icsk->icsk_mtup.probe_size);
+ WARN_ON_ONCE((u32)val != val);
+ tp->snd_cwnd = max_t(u32, 1U, val);
+
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_jiffies32;
tp->snd_ssthresh = tcp_current_ssthresh(sk);
@@ -2749,7 +2783,8 @@ static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
} else if (tcp_is_rack(sk)) {
u32 prior_retrans = tp->retrans_out;
- tcp_rack_mark_lost(sk);
+ if (tcp_rack_mark_lost(sk))
+ *ack_flag &= ~FLAG_SET_XMIT_TIMER;
if (prior_retrans > tp->retrans_out)
*ack_flag |= FLAG_LOST_RETRANS;
}
@@ -2892,7 +2927,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
{
- u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
+ u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
struct tcp_sock *tp = tcp_sk(sk);
if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
@@ -2929,9 +2964,11 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
flag & FLAG_ACKED) {
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
- u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
- seq_rtt_us = ca_rtt_us = delta_us;
+ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+ seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+ ca_rtt_us = seq_rtt_us;
+ }
}
rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
if (seq_rtt_us < 0)
@@ -3294,7 +3331,8 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
* new SACK or ECE mark may first advance cwnd here and later reduce
* cwnd in tcp_fastretrans_alert() based on more states.
*/
- if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
+ if (tcp_sk(sk)->reordering >
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering))
return flag & FLAG_FORWARD_PROGRESS;
return flag & FLAG_DATA_ACKED;
@@ -3403,16 +3441,23 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
static bool __tcp_oow_rate_limited(struct net *net, int mib_idx,
u32 *last_oow_ack_time)
{
- if (*last_oow_ack_time) {
- s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
+ /* Paired with the WRITE_ONCE() in this function. */
+ u32 val = READ_ONCE(*last_oow_ack_time);
- if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
+ if (val) {
+ s32 elapsed = (s32)(tcp_jiffies32 - val);
+
+ if (0 <= elapsed &&
+ elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
NET_INC_STATS(net, mib_idx);
return true; /* rate-limited: don't send yet! */
}
}
- *last_oow_ack_time = tcp_jiffies32;
+ /* Paired with the prior READ_ONCE() and with itself,
+ * as we might be lockless.
+ */
+ WRITE_ONCE(*last_oow_ack_time, tcp_jiffies32);
return false; /* not rate-limited: go ahead, send dupack now! */
}
@@ -3453,11 +3498,11 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
/* Then check host-wide RFC 5961 rate limit. */
now = jiffies / HZ;
- if (now != challenge_timestamp) {
- u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
+ if (now != READ_ONCE(challenge_timestamp)) {
+ u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit);
u32 half = (ack_limit + 1) >> 1;
- challenge_timestamp = now;
+ WRITE_ONCE(challenge_timestamp, now);
WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit));
}
count = READ_ONCE(challenge_count);
@@ -3595,8 +3640,12 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
* then we can probably ignore it.
*/
if (before(ack, prior_snd_una)) {
+ u32 max_window;
+
+ /* do not accept ACK for bytes we never sent. */
+ max_window = min_t(u64, tp->max_window, tp->bytes_acked);
/* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
- if (before(ack, prior_snd_una - tp->max_window)) {
+ if (before(ack, prior_snd_una - max_window)) {
if (!(flag & FLAG_NO_CHALLENGE_ACK))
tcp_send_challenge_ack(sk, skb);
return -1;
@@ -3692,9 +3741,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
- /* If needed, reset TLP/RTO timer; RACK may later override this. */
- if (flag & FLAG_SET_XMIT_TIMER)
- tcp_set_xmit_timer(sk);
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
@@ -3702,6 +3748,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
&rexmit);
}
+ /* If needed, reset TLP/RTO timer when RACK doesn't set. */
+ if (flag & FLAG_SET_XMIT_TIMER)
+ tcp_set_xmit_timer(sk);
+
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
sk_dst_confirm(sk);
@@ -4102,7 +4152,7 @@ void tcp_fin(struct sock *sk)
inet_csk_schedule_ack(sk);
- sk->sk_shutdown |= RCV_SHUTDOWN;
+ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN);
sock_set_flag(sk, SOCK_DONE);
switch (sk->sk_state) {
@@ -4182,7 +4232,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
+ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
int mib_idx;
if (before(seq, tp->rcv_nxt))
@@ -4217,7 +4267,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
+ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))
@@ -4704,7 +4754,8 @@ void tcp_data_ready(struct sock *sk)
int avail = tp->rcv_nxt - tp->copied_seq;
if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) &&
- !sock_flag(sk, SOCK_DONE))
+ !sock_flag(sk, SOCK_DONE) &&
+ tcp_receive_window(tp) > inet_csk(sk)->icsk_ack.rcv_mss)
return;
sk->sk_data_ready(sk);
@@ -5159,7 +5210,17 @@ static void tcp_new_space(struct sock *sk)
sk->sk_write_space(sk);
}
-static void tcp_check_space(struct sock *sk)
+/* Caller made space either from:
+ * 1) Freeing skbs in rtx queues (after tp->snd_una has advanced)
+ * 2) Sent skbs from output queue (and thus advancing tp->snd_nxt)
+ *
+ * We might be able to generate EPOLLOUT to the application if:
+ * 1) Space consumed in output/rtx queues is below sk->sk_sndbuf/2
+ * 2) notsent amount (tp->write_seq - tp->snd_nxt) became
+ * small enough that tcp_stream_memory_free() decides it
+ * is time to generate EPOLLOUT.
+ */
+void tcp_check_space(struct sock *sk)
{
if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
@@ -5212,7 +5273,7 @@ send_now:
}
if (!tcp_is_sack(tp) ||
- tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
+ tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
goto send_now;
if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
@@ -5235,7 +5296,8 @@ send_now:
if (tp->srtt_us && tp->srtt_us < rtt)
rtt = tp->srtt_us;
- delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
+ delay = min_t(unsigned long,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
rtt * (NSEC_PER_USEC >> 3)/20);
sock_hold(sk);
hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay),
@@ -5266,7 +5328,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th)
struct tcp_sock *tp = tcp_sk(sk);
u32 ptr = ntohs(th->urg_ptr);
- if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
+ if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg))
ptr--;
ptr += ntohl(th->seq);
@@ -5513,7 +5575,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
trace_tcp_probe(sk, skb);
tcp_mstamp_refresh(tp);
- if (unlikely(!sk->sk_rx_dst))
+ if (unlikely(!rcu_access_pointer(sk->sk_rx_dst)))
inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
/*
* Header prediction.
@@ -5631,6 +5693,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
tcp_data_snd_check(sk);
if (!inet_csk_ack_scheduled(sk))
goto no_ack;
+ } else {
+ tcp_update_wl(tp, TCP_SKB_CB(skb)->seq);
}
__tcp_ack_snd_check(sk, 0);
@@ -5883,7 +5947,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
/* Remember, tcp_poll() does not lock socket!
* Change state from SYN-SENT only after copied_seq
* is initialized. */
- tp->copied_seq = tp->rcv_nxt;
+ WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
smc_check_reset_syn(tp);
@@ -5958,7 +6022,7 @@ discard:
}
WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1);
- tp->copied_seq = tp->rcv_nxt;
+ WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
/* RFC1323: The window in SYN & SYN/ACK segments is
@@ -6120,7 +6184,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tcp_rearm_rto(sk);
} else {
tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB);
- tp->copied_seq = tp->rcv_nxt;
+ WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
}
smp_mb();
tcp_set_state(sk, TCP_ESTABLISHED);
@@ -6167,7 +6231,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
break;
tcp_set_state(sk, TCP_FIN_WAIT2);
- sk->sk_shutdown |= SEND_SHUTDOWN;
+ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | SEND_SHUTDOWN);
sk_dst_confirm(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7536f4c0bbf4..bd374eac9a75 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -110,10 +110,10 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
{
+ int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse);
const struct inet_timewait_sock *tw = inet_twsk(sktw);
const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
struct tcp_sock *tp = tcp_sk(sk);
- int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
if (reuse == 2) {
/* Still does not detect *everything* that goes through
@@ -169,9 +169,11 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
* without appearing to create any others.
*/
if (likely(!tp->repair)) {
- tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
- if (tp->write_seq == 0)
- tp->write_seq = 1;
+ u32 seq = tcptw->tw_snd_nxt + 65535 + 2;
+
+ if (!seq)
+ seq = 1;
+ WRITE_ONCE(tp->write_seq, seq);
tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
}
@@ -258,7 +260,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
if (likely(!tp->repair))
- tp->write_seq = 0;
+ WRITE_ONCE(tp->write_seq, 0);
}
inet->inet_dport = usin->sin_port;
@@ -296,10 +298,11 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (likely(!tp->repair)) {
if (!tp->write_seq)
- tp->write_seq = secure_tcp_seq(inet->inet_saddr,
- inet->inet_daddr,
- inet->inet_sport,
- usin->sin_port);
+ WRITE_ONCE(tp->write_seq,
+ secure_tcp_seq(inet->inet_saddr,
+ inet->inet_daddr,
+ inet->inet_sport,
+ usin->sin_port));
tp->tsoffset = secure_tcp_ts_off(sock_net(sk),
inet->inet_saddr,
inet->inet_daddr);
@@ -325,6 +328,8 @@ failure:
* if necessary.
*/
tcp_set_state(sk, TCP_CLOSE);
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+ inet_reset_saddr(sk);
ip_rt_put(rt);
sk->sk_route_caps = 0;
inet->inet_dport = 0;
@@ -345,7 +350,7 @@ void tcp_v4_mtu_reduced(struct sock *sk)
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
return;
- mtu = tcp_sk(sk)->mtu_info;
+ mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
dst = inet_csk_update_pmtu(sk, mtu);
if (!dst)
return;
@@ -513,7 +518,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
if (sk->sk_state == TCP_LISTEN)
goto out;
- tp->mtu_info = info;
+ WRITE_ONCE(tp->mtu_info, info);
if (!sock_owned_by_user(sk)) {
tcp_v4_mtu_reduced(sk);
} else {
@@ -1369,7 +1374,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {
.syn_ack_timeout = tcp_syn_ack_timeout,
};
-static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
+const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
.mss_clamp = TCP_MSS_DEFAULT,
#ifdef CONFIG_TCP_MD5SIG
.req_md5_lookup = tcp_v4_md5_lookup,
@@ -1412,6 +1417,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
bool *own_req)
{
struct inet_request_sock *ireq;
+ bool found_dup_sk = false;
struct inet_sock *newinet;
struct tcp_sock *newtp;
struct sock *newsk;
@@ -1482,12 +1488,22 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
- *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
+ &found_dup_sk);
if (likely(*own_req)) {
tcp_move_syn(newtp, req);
ireq->ireq_opt = NULL;
} else {
newinet->inet_opt = NULL;
+
+ if (!req_unhash && found_dup_sk) {
+ /* This code path should only be executed in the
+ * syncookie case only
+ */
+ bh_unlock_sock(newsk);
+ sock_put(newsk);
+ newsk = NULL;
+ }
}
return newsk;
@@ -1530,15 +1546,18 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
struct sock *rsk;
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
- struct dst_entry *dst = sk->sk_rx_dst;
+ struct dst_entry *dst;
+
+ dst = rcu_dereference_protected(sk->sk_rx_dst,
+ lockdep_sock_is_held(sk));
sock_rps_save_rxhash(sk, skb);
sk_mark_napi_id(sk, skb);
if (dst) {
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
!dst->ops->check(dst, 0)) {
+ RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
dst_release(dst);
- sk->sk_rx_dst = NULL;
}
}
tcp_rcv_established(sk, skb);
@@ -1613,7 +1632,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
skb->sk = sk;
skb->destructor = sock_edemux;
if (sk_fullsock(sk)) {
- struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
+ struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
if (dst)
dst = dst_check(dst, 0);
@@ -1918,7 +1937,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
struct dst_entry *dst = skb_dst(skb);
if (dst && dst_hold_safe(dst)) {
- sk->sk_rx_dst = dst;
+ rcu_assign_pointer(sk->sk_rx_dst, dst);
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
}
}
@@ -2174,6 +2193,7 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
static void *tcp_seek_last_pos(struct seq_file *seq)
{
struct tcp_iter_state *st = seq->private;
+ int bucket = st->bucket;
int offset = st->offset;
int orig_num = st->num;
void *rc = NULL;
@@ -2184,7 +2204,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
break;
st->state = TCP_SEQ_STATE_LISTENING;
rc = listening_get_next(seq, NULL);
- while (offset-- && rc)
+ while (offset-- && rc && bucket == st->bucket)
rc = listening_get_next(seq, rc);
if (rc)
break;
@@ -2195,7 +2215,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
if (st->bucket > tcp_hashinfo.ehash_mask)
break;
rc = established_get_first(seq);
- while (offset-- && rc)
+ while (offset-- && rc && bucket == st->bucket)
rc = established_get_next(seq, rc);
}
@@ -2340,12 +2360,12 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
* we might find a transient negative value.
*/
rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
- tp->copied_seq, 0);
+ READ_ONCE(tp->copied_seq), 0);
seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
"%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
i, src, srcp, dest, destp, state,
- tp->write_seq - tp->snd_una,
+ READ_ONCE(tp->write_seq) - tp->snd_una,
rx_queue,
timer_active,
jiffies_delta_to_clock_t(timer_expires - jiffies),
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 03b51cdcc731..60619b1f4acd 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -40,7 +40,7 @@ struct tcp_fastopen_metrics {
struct tcp_metrics_block {
struct tcp_metrics_block __rcu *tcpm_next;
- possible_net_t tcpm_net;
+ struct net *tcpm_net;
struct inetpeer_addr tcpm_saddr;
struct inetpeer_addr tcpm_daddr;
unsigned long tcpm_stamp;
@@ -51,34 +51,38 @@ struct tcp_metrics_block {
struct rcu_head rcu_head;
};
-static inline struct net *tm_net(struct tcp_metrics_block *tm)
+static inline struct net *tm_net(const struct tcp_metrics_block *tm)
{
- return read_pnet(&tm->tcpm_net);
+ /* Paired with the WRITE_ONCE() in tcpm_new() */
+ return READ_ONCE(tm->tcpm_net);
}
static bool tcp_metric_locked(struct tcp_metrics_block *tm,
enum tcp_metric_index idx)
{
- return tm->tcpm_lock & (1 << idx);
+ /* Paired with WRITE_ONCE() in tcpm_suck_dst() */
+ return READ_ONCE(tm->tcpm_lock) & (1 << idx);
}
-static u32 tcp_metric_get(struct tcp_metrics_block *tm,
+static u32 tcp_metric_get(const struct tcp_metrics_block *tm,
enum tcp_metric_index idx)
{
- return tm->tcpm_vals[idx];
+ /* Paired with WRITE_ONCE() in tcp_metric_set() */
+ return READ_ONCE(tm->tcpm_vals[idx]);
}
static void tcp_metric_set(struct tcp_metrics_block *tm,
enum tcp_metric_index idx,
u32 val)
{
- tm->tcpm_vals[idx] = val;
+ /* Paired with READ_ONCE() in tcp_metric_get() */
+ WRITE_ONCE(tm->tcpm_vals[idx], val);
}
static bool addr_same(const struct inetpeer_addr *a,
const struct inetpeer_addr *b)
{
- return inetpeer_addr_cmp(a, b) == 0;
+ return (a->family == b->family) && !inetpeer_addr_cmp(a, b);
}
struct tcpm_hash_bucket {
@@ -89,6 +93,7 @@ static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly;
static unsigned int tcp_metrics_hash_log __read_mostly;
static DEFINE_SPINLOCK(tcp_metrics_lock);
+static DEFINE_SEQLOCK(fastopen_seqlock);
static void tcpm_suck_dst(struct tcp_metrics_block *tm,
const struct dst_entry *dst,
@@ -97,7 +102,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
u32 msval;
u32 val;
- tm->tcpm_stamp = jiffies;
+ WRITE_ONCE(tm->tcpm_stamp, jiffies);
val = 0;
if (dst_metric_locked(dst, RTAX_RTT))
@@ -110,30 +115,42 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
val |= 1 << TCP_METRIC_CWND;
if (dst_metric_locked(dst, RTAX_REORDERING))
val |= 1 << TCP_METRIC_REORDERING;
- tm->tcpm_lock = val;
+ /* Paired with READ_ONCE() in tcp_metric_locked() */
+ WRITE_ONCE(tm->tcpm_lock, val);
msval = dst_metric_raw(dst, RTAX_RTT);
- tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
+ tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC);
msval = dst_metric_raw(dst, RTAX_RTTVAR);
- tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;
- tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
- tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
- tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
+ tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC);
+ tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
+ dst_metric_raw(dst, RTAX_SSTHRESH));
+ tcp_metric_set(tm, TCP_METRIC_CWND,
+ dst_metric_raw(dst, RTAX_CWND));
+ tcp_metric_set(tm, TCP_METRIC_REORDERING,
+ dst_metric_raw(dst, RTAX_REORDERING));
if (fastopen_clear) {
+ write_seqlock(&fastopen_seqlock);
tm->tcpm_fastopen.mss = 0;
tm->tcpm_fastopen.syn_loss = 0;
tm->tcpm_fastopen.try_exp = 0;
tm->tcpm_fastopen.cookie.exp = false;
tm->tcpm_fastopen.cookie.len = 0;
+ write_sequnlock(&fastopen_seqlock);
}
}
#define TCP_METRICS_TIMEOUT (60 * 60 * HZ)
-static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
+static void tcpm_check_stamp(struct tcp_metrics_block *tm,
+ const struct dst_entry *dst)
{
- if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
+ unsigned long limit;
+
+ if (!tm)
+ return;
+ limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT;
+ if (unlikely(time_after(jiffies, limit)))
tcpm_suck_dst(tm, dst, false);
}
@@ -174,20 +191,23 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
oldest = deref_locked(tcp_metrics_hash[hash].chain);
for (tm = deref_locked(oldest->tcpm_next); tm;
tm = deref_locked(tm->tcpm_next)) {
- if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp))
+ if (time_before(READ_ONCE(tm->tcpm_stamp),
+ READ_ONCE(oldest->tcpm_stamp)))
oldest = tm;
}
tm = oldest;
} else {
- tm = kmalloc(sizeof(*tm), GFP_ATOMIC);
+ tm = kzalloc(sizeof(*tm), GFP_ATOMIC);
if (!tm)
goto out_unlock;
}
- write_pnet(&tm->tcpm_net, net);
+ /* Paired with the READ_ONCE() in tm_net() */
+ WRITE_ONCE(tm->tcpm_net, net);
+
tm->tcpm_saddr = *saddr;
tm->tcpm_daddr = *daddr;
- tcpm_suck_dst(tm, dst, true);
+ tcpm_suck_dst(tm, dst, reclaim);
if (likely(!reclaim)) {
tm->tcpm_next = tcp_metrics_hash[hash].chain;
@@ -329,7 +349,7 @@ void tcp_update_metrics(struct sock *sk)
int m;
sk_dst_confirm(sk);
- if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst)
return;
rcu_read_lock();
@@ -425,12 +445,13 @@ void tcp_update_metrics(struct sock *sk)
if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) {
val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
if (val < tp->reordering &&
- tp->reordering != net->ipv4.sysctl_tcp_reordering)
+ tp->reordering !=
+ READ_ONCE(net->ipv4.sysctl_tcp_reordering))
tcp_metric_set(tm, TCP_METRIC_REORDERING,
tp->reordering);
}
}
- tm->tcpm_stamp = jiffies;
+ WRITE_ONCE(tm->tcpm_stamp, jiffies);
out_unlock:
rcu_read_unlock();
}
@@ -445,11 +466,15 @@ void tcp_init_metrics(struct sock *sk)
u32 val, crtt = 0; /* cached RTT scaled by 8 */
sk_dst_confirm(sk);
+ /* ssthresh may have been reduced unnecessarily during.
+ * 3WHS. Restore it back to its initial default.
+ */
+ tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
if (!dst)
goto reset;
rcu_read_lock();
- tm = tcp_get_metrics(sk, dst, true);
+ tm = tcp_get_metrics(sk, dst, false);
if (!tm) {
rcu_read_unlock();
goto reset;
@@ -463,11 +488,6 @@ void tcp_init_metrics(struct sock *sk)
tp->snd_ssthresh = val;
if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
tp->snd_ssthresh = tp->snd_cwnd_clamp;
- } else {
- /* ssthresh may have been reduced unnecessarily during.
- * 3WHS. Restore it back to its initial default.
- */
- tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
}
val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
if (val && tp->reordering != val)
@@ -543,8 +563,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
return ret;
}
-static DEFINE_SEQLOCK(fastopen_seqlock);
-
void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie)
{
@@ -651,7 +669,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
}
if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE,
- jiffies - tm->tcpm_stamp,
+ jiffies - READ_ONCE(tm->tcpm_stamp),
TCP_METRICS_ATTR_PAD) < 0)
goto nla_put_failure;
@@ -662,7 +680,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
if (!nest)
goto nla_put_failure;
for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
- u32 val = tm->tcpm_vals[i];
+ u32 val = tcp_metric_get(tm, i);
if (!val)
continue;
@@ -894,7 +912,7 @@ static void tcp_metrics_flush_all(struct net *net)
match = net ? net_eq(tm_net(tm), net) :
!refcount_read(&tm_net(tm)->count);
if (match) {
- *pp = tm->tcpm_next;
+ rcu_assign_pointer(*pp, tm->tcpm_next);
kfree_rcu(tm, rcu_head);
} else {
pp = &tm->tcpm_next;
@@ -935,7 +953,7 @@ static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info)
if (addr_same(&tm->tcpm_daddr, &daddr) &&
(!src || addr_same(&tm->tcpm_saddr, &saddr)) &&
net_eq(tm_net(tm), net)) {
- *pp = tm->tcpm_next;
+ rcu_assign_pointer(*pp, tm->tcpm_next);
kfree_rcu(tm, rcu_head);
found = true;
} else {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7ba8a90772b0..bae0199a943b 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -179,7 +179,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
* Oh well... nobody has a sufficient solution to this
* protocol bug yet.
*/
- if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
+ if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) {
kill:
inet_twsk_deschedule_put(tw);
return TCP_TW_SUCCESS;
@@ -470,7 +470,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
seq = treq->rcv_isn + 1;
newtp->rcv_wup = seq;
- newtp->copied_seq = seq;
+ WRITE_ONCE(newtp->copied_seq, seq);
WRITE_ONCE(newtp->rcv_nxt, seq);
newtp->segs_in = 1;
@@ -510,7 +510,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->app_limited = ~0U;
tcp_init_xmit_timers(newsk);
- newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
+ WRITE_ONCE(newtp->write_seq, newtp->pushed_seq = treq->snt_isn + 1);
newtp->rx_opt.saw_tstamp = 0;
@@ -550,7 +550,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->tsoffset = treq->ts_off;
#ifdef CONFIG_TCP_MD5SIG
newtp->md5sig_info = NULL; /*XXX*/
- if (newtp->af_specific->md5_lookup(sk, newsk))
+ if (treq->af_specific->req_md5_lookup(sk, req_to_sk(req)))
newtp->tcp_header_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
if (skb->len >= TCP_MSS_DEFAULT + newtp->tcp_header_len)
@@ -582,6 +582,9 @@ EXPORT_SYMBOL(tcp_create_openreq_child);
* validation and inside tcp_v4_reqsk_send_ack(). Can we do better?
*
* We don't need to initialize tmp_opt.sack_ok as we don't use the results
+ *
+ * Note: If @fastopen is true, this can be called from process context.
+ * Otherwise, this is from BH context.
*/
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
@@ -734,7 +737,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
&tcp_rsk(req)->last_oow_ack_time))
req->rsk_ops->send_ack(sk, skb, req);
if (paws_reject)
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
return NULL;
}
@@ -753,7 +756,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
* "fourth, check the SYN bit"
*/
if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
- __TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
goto embryonic_reset;
}
@@ -815,8 +818,11 @@ embryonic_reset:
tcp_reset(sk);
}
if (!fastopen) {
- inet_csk_reqsk_queue_drop(sk, req);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
+ bool unlinked = inet_csk_reqsk_queue_drop(sk, req);
+
+ if (unlinked)
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
+ *req_stolen = !unlinked;
}
return NULL;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 74fb211e0ea6..670804d4c169 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -69,6 +69,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
tcp_skb_pcount(skb));
+ tcp_check_space(sk);
}
/* SND.NXT, if window was not shrunk or the amount of shrunk was less than one
@@ -163,8 +164,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
}
/* Account for an ACK we sent. */
-static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
- u32 rcv_nxt)
+static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -178,7 +178,7 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
if (unlikely(rcv_nxt != tp->rcv_nxt))
return; /* Special ACK sent by DCTCP to reflect ECN */
- tcp_dec_quickack_mode(sk, pkts);
+ tcp_dec_quickack_mode(sk);
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
}
@@ -968,6 +968,8 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer)
static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+ ktime_t expire, now;
u64 len_ns;
u32 rate;
@@ -979,12 +981,28 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb)
len_ns = (u64)skb->len * NSEC_PER_SEC;
do_div(len_ns, rate);
- hrtimer_start(&tcp_sk(sk)->pacing_timer,
- ktime_add_ns(ktime_get(), len_ns),
+ now = ktime_get();
+ /* If hrtimer is already armed, then our caller has not
+ * used tcp_pacing_check().
+ */
+ if (unlikely(hrtimer_is_queued(&tp->pacing_timer))) {
+ expire = hrtimer_get_softexpires(&tp->pacing_timer);
+ if (ktime_after(expire, now))
+ now = expire;
+ if (hrtimer_try_to_cancel(&tp->pacing_timer) == 1)
+ __sock_put(sk);
+ }
+ hrtimer_start(&tp->pacing_timer, ktime_add_ns(now, len_ns),
HRTIMER_MODE_ABS_PINNED_SOFT);
sock_hold(sk);
}
+static bool tcp_pacing_check(const struct sock *sk)
+{
+ return tcp_needs_internal_pacing(sk) &&
+ hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);
+}
+
static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
{
skb->skb_mstamp = tp->tcp_mstamp;
@@ -1072,7 +1090,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
skb_set_hash_from_sk(skb, sk);
refcount_add(skb->truesize, &sk->sk_wmem_alloc);
- skb_set_dst_pending_confirm(skb, sk->sk_dst_pending_confirm);
+ skb_set_dst_pending_confirm(skb, READ_ONCE(sk->sk_dst_pending_confirm));
/* Build TCP header and checksum it. */
th = (struct tcphdr *)skb->data;
@@ -1120,7 +1138,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
icsk->icsk_af_ops->send_check(sk, skb);
if (likely(tcb->tcp_flags & TCPHDR_ACK))
- tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
+ tcp_event_ack_sent(sk, rcv_nxt);
if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk);
@@ -1175,7 +1193,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
/* Advance write_seq and place onto the write_queue. */
- tp->write_seq = TCP_SKB_CB(skb)->end_seq;
+ WRITE_ONCE(tp->write_seq, TCP_SKB_CB(skb)->end_seq);
__skb_header_release(skb);
tcp_add_write_queue_tail(sk, skb);
sk->sk_wmem_queued += skb->truesize;
@@ -1472,6 +1490,7 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu)
return __tcp_mtu_to_mss(sk, pmtu) -
(tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr));
}
+EXPORT_SYMBOL(tcp_mtu_to_mss);
/* Inverse of above */
int tcp_mss_to_mtu(struct sock *sk, int mss)
@@ -1503,7 +1522,7 @@ void tcp_mtup_init(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct net *net = sock_net(sk);
- icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
+ icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1;
icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
icsk->icsk_af_ops->net_header_len;
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
@@ -1618,14 +1637,20 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
struct tcp_sock *tp = tcp_sk(sk);
- /* Track the maximum number of outstanding packets in each
- * window, and remember whether we were cwnd-limited then.
+ /* Track the strongest available signal of the degree to which the cwnd
+ * is fully utilized. If cwnd-limited then remember that fact for the
+ * current window. If not cwnd-limited then track the maximum number of
+ * outstanding packets in the current window. (If cwnd-limited then we
+ * chose to not update tp->max_packets_out to avoid an extra else
+ * clause with no functional impact.)
*/
- if (!before(tp->snd_una, tp->max_packets_seq) ||
- tp->packets_out > tp->max_packets_out) {
- tp->max_packets_out = tp->packets_out;
- tp->max_packets_seq = tp->snd_nxt;
+ if (!before(tp->snd_una, tp->cwnd_usage_seq) ||
+ is_cwnd_limited ||
+ (!tp->is_cwnd_limited &&
+ tp->packets_out > tp->max_packets_out)) {
tp->is_cwnd_limited = is_cwnd_limited;
+ tp->max_packets_out = tp->packets_out;
+ tp->cwnd_usage_seq = tp->snd_nxt;
}
if (tcp_is_cwnd_limited(sk)) {
@@ -1637,7 +1662,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited)
if (tp->packets_out > tp->snd_cwnd_used)
tp->snd_cwnd_used = tp->packets_out;
- if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) &&
(s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
!ca_ops->cong_control)
tcp_cwnd_application_limited(sk);
@@ -1724,7 +1749,7 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
min_tso = ca_ops->min_tso_segs ?
ca_ops->min_tso_segs(sk) :
- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
return min_t(u32, tso_segs, sk->sk_gso_max_segs);
@@ -2009,7 +2034,7 @@ static inline void tcp_mtu_check_reprobe(struct sock *sk)
u32 interval;
s32 delta;
- interval = net->ipv4.sysctl_tcp_probe_interval;
+ interval = READ_ONCE(net->ipv4.sysctl_tcp_probe_interval);
delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp;
if (unlikely(delta >= interval * HZ)) {
int mss = tcp_current_mss(sk);
@@ -2091,7 +2116,7 @@ static int tcp_mtu_probe(struct sock *sk)
* probing process by not resetting search range to its orignal.
*/
if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
- interval < net->ipv4.sysctl_tcp_probe_threshold) {
+ interval < READ_ONCE(net->ipv4.sysctl_tcp_probe_threshold)) {
/* Check whether enough time has elaplased for
* another round of probing.
*/
@@ -2119,6 +2144,9 @@ static int tcp_mtu_probe(struct sock *sk)
if (!tcp_can_coalesce_send_queue_head(sk, probe_size))
return -1;
+ if (tcp_pacing_check(sk))
+ return -1;
+
/* We're allowed to probe. Build it now. */
nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
if (!nskb)
@@ -2192,10 +2220,16 @@ static int tcp_mtu_probe(struct sock *sk)
return -1;
}
-static bool tcp_pacing_check(const struct sock *sk)
+static bool tcp_rtx_queue_empty_or_single_skb(const struct sock *sk)
{
- return tcp_needs_internal_pacing(sk) &&
- hrtimer_is_queued(&tcp_sk(sk)->pacing_timer);
+ const struct rb_node *node = sk->tcp_rtx_queue.rb_node;
+
+ /* No skb in the rtx queue. */
+ if (!node)
+ return true;
+
+ /* Only one skb in rtx queue. */
+ return !node->rb_left && !node->rb_right;
}
/* TCP Small Queues :
@@ -2220,12 +2254,12 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
limit <<= factor;
if (refcount_read(&sk->sk_wmem_alloc) > limit) {
- /* Always send skb if rtx queue is empty.
+ /* Always send skb if rtx queue is empty or has one skb.
* No need to wait for TX completion to call us back,
* after softirq/tasklet schedule.
* This helps when TX completions are delayed too much.
*/
- if (tcp_rtx_queue_empty(sk))
+ if (tcp_rtx_queue_empty_or_single_skb(sk))
return false;
set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
@@ -2407,6 +2441,10 @@ repair:
else
tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED);
+ is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
+ if (likely(sent_pkts || is_cwnd_limited))
+ tcp_cwnd_validate(sk, is_cwnd_limited);
+
if (likely(sent_pkts)) {
if (tcp_in_cwnd_reduction(sk))
tp->prr_out += sent_pkts;
@@ -2414,8 +2452,6 @@ repair:
/* Send one loss probe per tail loss episode. */
if (push_one != 2)
tcp_schedule_loss_probe(sk, false);
- is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
- tcp_cwnd_validate(sk, is_cwnd_limited);
return false;
}
return !tp->packets_out && !tcp_write_queue_empty(sk);
@@ -2425,7 +2461,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- u32 timeout, rto_delta_us;
+ u32 timeout, timeout_us, rto_delta_us;
int early_retrans;
/* Don't do any loss probe on a Fast Open connection before 3WHS
@@ -2434,7 +2470,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
if (tp->fastopen_rsk)
return false;
- early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
+ early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans);
/* Schedule a loss probe in 2*RTT for SACK capable connections
* not in loss recovery, that are either limited by cwnd or application.
*/
@@ -2449,11 +2485,12 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
* sample is available then probe after TCP_TIMEOUT_INIT.
*/
if (tp->srtt_us) {
- timeout = usecs_to_jiffies(tp->srtt_us >> 2);
+ timeout_us = tp->srtt_us >> 2;
if (tp->packets_out == 1)
- timeout += TCP_RTO_MIN;
+ timeout_us += tcp_rto_min_us(sk);
else
- timeout += TCP_TIMEOUT_MIN;
+ timeout_us += TCP_TIMEOUT_MIN_US;
+ timeout = usecs_to_jiffies(timeout_us);
} else {
timeout = TCP_TIMEOUT_INIT;
}
@@ -2796,7 +2833,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
struct sk_buff *skb = to, *tmp;
bool first = true;
- if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse))
return;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
return;
@@ -2836,7 +2873,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
struct tcp_sock *tp = tcp_sk(sk);
unsigned int cur_mss;
int diff, len, err;
-
+ int avail_wnd;
/* Inconclusive MTU probe */
if (icsk->icsk_mtup.probe_size)
@@ -2853,7 +2890,13 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
if (skb_still_in_host_queue(sk, skb))
return -EBUSY;
+start:
if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) {
+ if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;
+ TCP_SKB_CB(skb)->seq++;
+ goto start;
+ }
if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) {
WARN_ON_ONCE(1);
return -EINVAL;
@@ -2866,17 +2909,25 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
return -EHOSTUNREACH; /* Routing failure or similar. */
cur_mss = tcp_current_mss(sk);
+ avail_wnd = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
/* If receiver has shrunk his window, and skb is out of
* new window, do not retransmit it. The exception is the
* case, when window is shrunk to zero. In this case
- * our retransmit serves as a zero window probe.
+ * our retransmit of one segment serves as a zero window probe.
*/
- if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) &&
- TCP_SKB_CB(skb)->seq != tp->snd_una)
- return -EAGAIN;
+ if (avail_wnd <= 0) {
+ if (TCP_SKB_CB(skb)->seq != tp->snd_una)
+ return -EAGAIN;
+ avail_wnd = cur_mss;
+ }
len = cur_mss * segs;
+ if (len > avail_wnd) {
+ len = rounddown(avail_wnd, cur_mss);
+ if (!len)
+ len = avail_wnd;
+ }
if (skb->len > len) {
if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len,
cur_mss, GFP_ATOMIC))
@@ -2890,8 +2941,9 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
diff -= tcp_skb_pcount(skb);
if (diff)
tcp_adjust_pcount(sk, skb, diff);
- if (skb->len < cur_mss)
- tcp_retrans_try_collapse(sk, skb, cur_mss);
+ avail_wnd = min_t(int, avail_wnd, cur_mss);
+ if (skb->len < avail_wnd)
+ tcp_retrans_try_collapse(sk, skb, avail_wnd);
}
/* RFC3168, section 6.1.1.1. ECN fallback */
@@ -2954,13 +3006,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
#endif
TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
tp->retrans_out += tcp_skb_pcount(skb);
-
- /* Save stamp of the first retransmit. */
- if (!tp->retrans_stamp)
- tp->retrans_stamp = tcp_skb_timestamp(skb);
-
}
+ /* Save stamp of the first (attempted) retransmit. */
+ if (!tp->retrans_stamp)
+ tp->retrans_stamp = tcp_skb_timestamp(skb);
+
if (tp->undo_retrans < 0)
tp->undo_retrans = 0;
tp->undo_retrans += tcp_skb_pcount(skb);
@@ -3051,11 +3102,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
*/
void sk_forced_mem_schedule(struct sock *sk, int size)
{
- int amt;
+ int delta, amt;
- if (size <= sk->sk_forward_alloc)
+ delta = size - sk->sk_forward_alloc;
+ if (delta <= 0)
return;
- amt = sk_mem_pages(size);
+ amt = sk_mem_pages(delta);
sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
sk_memory_allocated_add(sk, amt);
@@ -3273,7 +3325,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
th->window = htons(min(req->rsk_rcv_wnd, 65535U));
tcp_options_write((__be32 *)(th + 1), NULL, &opts);
th->doff = (tcp_header_size >> 2);
- __TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
#ifdef CONFIG_TCP_MD5SIG
/* Okay, we have all we need - do the md5 hash if needed */
@@ -3378,7 +3430,7 @@ static void tcp_connect_init(struct sock *sk)
else
tp->rcv_tstamp = tcp_jiffies32;
tp->rcv_wup = tp->rcv_nxt;
- tp->copied_seq = tp->rcv_nxt;
+ WRITE_ONCE(tp->copied_seq, tp->rcv_nxt);
inet_csk(sk)->icsk_rto = tcp_timeout_init(sk);
inet_csk(sk)->icsk_retransmits = 0;
@@ -3394,7 +3446,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
__skb_header_release(skb);
sk->sk_wmem_queued += skb->truesize;
sk_mem_charge(sk, skb->truesize);
- tp->write_seq = tcb->end_seq;
+ WRITE_ONCE(tp->write_seq, tcb->end_seq);
tp->packets_out += tcp_skb_pcount(skb);
}
@@ -3407,6 +3459,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
*/
static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
{
+ struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req;
int space, err = 0;
@@ -3421,8 +3474,10 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
* private TCP options. The cost is reduced data space in SYN :(
*/
tp->rx_opt.mss_clamp = tcp_mss_clamp(tp, tp->rx_opt.mss_clamp);
+ /* Sync mss_cache after updating the mss_clamp */
+ tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
- space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
+ space = __tcp_mtu_to_mss(sk, icsk->icsk_pmtu_cookie) -
MAX_TCP_OPTION_SPACE;
space = min_t(size_t, space, fo->size);
@@ -3754,7 +3809,7 @@ void tcp_send_probe0(struct sock *sk)
}
if (err <= 0) {
- if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
+ if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2))
icsk->icsk_backoff++;
icsk->icsk_probes_out++;
probe_max = TCP_RTO_MAX;
@@ -3783,8 +3838,8 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
tcp_rsk(req)->txhash = net_tx_rndhash();
res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL);
if (!res) {
- __TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
if (unlikely(tcp_passive_fastopen(sk)))
tcp_sk(sk)->total_retrans++;
trace_tcp_retransmit_synack(sk, req);
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index c81aadff769b..844ff390f726 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -33,7 +33,8 @@ static u32 tcp_rack_reo_wnd(const struct sock *sk)
return 0;
if (tp->sacked_out >= tp->reordering &&
- !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH))
+ !(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_NO_DUPTHRESH))
return 0;
}
@@ -109,22 +110,23 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
}
}
-void tcp_rack_mark_lost(struct sock *sk)
+bool tcp_rack_mark_lost(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 timeout;
if (!tp->rack.advanced)
- return;
+ return false;
/* Reset the advanced flag to avoid unnecessary queue scanning */
tp->rack.advanced = 0;
tcp_rack_detect_loss(sk, &timeout);
if (timeout) {
- timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
+ timeout = usecs_to_jiffies(timeout + TCP_TIMEOUT_MIN_US);
inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
timeout, inet_csk(sk)->icsk_rto);
}
+ return !!timeout;
}
/* Record the most recently (re)sent time among the (s)acked packets
@@ -202,7 +204,8 @@ void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
+ if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_STATIC_REO_WND) ||
!rs->prior_delivered)
return;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 681882a40968..d8d28ba169b4 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -22,33 +22,21 @@
#include <linux/gfp.h>
#include <net/tcp.h>
-static u32 tcp_retransmit_stamp(const struct sock *sk)
-{
- u32 start_ts = tcp_sk(sk)->retrans_stamp;
-
- if (unlikely(!start_ts)) {
- struct sk_buff *head = tcp_rtx_queue_head(sk);
-
- if (!head)
- return 0;
- start_ts = tcp_skb_timestamp(head);
- }
- return start_ts;
-}
-
static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
u32 elapsed, start_ts;
+ s32 remaining;
- start_ts = tcp_retransmit_stamp(sk);
- if (!icsk->icsk_user_timeout || !start_ts)
+ start_ts = tcp_sk(sk)->retrans_stamp;
+ if (!icsk->icsk_user_timeout)
return icsk->icsk_rto;
elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
- if (elapsed >= icsk->icsk_user_timeout)
+ remaining = icsk->icsk_user_timeout - elapsed;
+ if (remaining <= 0)
return 1; /* user timeout has passed; fire ASAP */
- else
- return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(icsk->icsk_user_timeout - elapsed));
+
+ return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining));
}
/**
@@ -136,7 +124,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
*/
static int tcp_orphan_retries(struct sock *sk, bool alive)
{
- int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
+ int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */
/* We know from an ICMP that something is wrong. */
if (sk->sk_err_soft && !alive)
@@ -156,7 +144,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
int mss;
/* Black hole detection */
- if (!net->ipv4.sysctl_tcp_mtu_probing)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing))
return;
if (!icsk->icsk_mtup.enabled) {
@@ -172,7 +160,20 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
+static unsigned int tcp_model_timeout(struct sock *sk,
+ unsigned int boundary,
+ unsigned int rto_base)
+{
+ unsigned int linear_backoff_thresh, timeout;
+ linear_backoff_thresh = ilog2(TCP_RTO_MAX / rto_base);
+ if (boundary <= linear_backoff_thresh)
+ timeout = ((2 << boundary) - 1) * rto_base;
+ else
+ timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
+ (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
+ return jiffies_to_msecs(timeout);
+}
/**
* retransmits_timed_out() - returns true if this connection has timed out
* @sk: The current socket
@@ -190,27 +191,21 @@ static bool retransmits_timed_out(struct sock *sk,
unsigned int boundary,
unsigned int timeout)
{
- const unsigned int rto_base = TCP_RTO_MIN;
- unsigned int linear_backoff_thresh, start_ts;
+ unsigned int start_ts;
if (!inet_csk(sk)->icsk_retransmits)
return false;
- start_ts = tcp_retransmit_stamp(sk);
- if (!start_ts)
- return false;
-
+ start_ts = tcp_sk(sk)->retrans_stamp;
if (likely(timeout == 0)) {
- linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
-
- if (boundary <= linear_backoff_thresh)
- timeout = ((2 << boundary) - 1) * rto_base;
- else
- timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
- (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
- timeout = jiffies_to_msecs(timeout);
+ unsigned int rto_base = TCP_RTO_MIN;
+
+ if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV))
+ rto_base = tcp_timeout_init(sk);
+ timeout = tcp_model_timeout(sk, boundary, rto_base);
}
- return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= timeout;
+
+ return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
}
/* A write timeout has occurred. Process the after effects. */
@@ -231,7 +226,7 @@ static int tcp_write_timeout(struct sock *sk)
retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
expired = icsk->icsk_retransmits >= retry_until;
} else {
- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) {
+ if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) {
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
@@ -240,7 +235,7 @@ static int tcp_write_timeout(struct sock *sk)
sk_rethink_txhash(sk);
}
- retry_until = net->ipv4.sysctl_tcp_retries2;
+ retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
@@ -367,7 +362,7 @@ static void tcp_probe_timer(struct sock *sk)
(s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout)
goto abort;
- max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
+ max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
@@ -550,18 +545,20 @@ out_reset_timer:
* linear-timeout retransmissions into a black hole
*/
if (sk->sk_state == TCP_ESTABLISHED &&
- (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
+ (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) &&
tcp_stream_is_thin(tp) &&
icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
icsk->icsk_backoff = 0;
- icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
+ icsk->icsk_rto = clamp(__tcp_set_rto(tp),
+ tcp_rto_min(sk),
+ TCP_RTO_MAX);
} else {
/* Use normal (exponential) backoff */
icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX);
- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0))
+ if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0))
__sk_dst_reset(sk);
out:;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2aacf2b34834..a6048cc7fc35 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -798,7 +798,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
kfree_skb(skb);
return -EINVAL;
}
- if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
+ if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
kfree_skb(skb);
return -EINVAL;
}
@@ -935,7 +935,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
__be16 dport;
u8 tos;
int err, is_udplite = IS_UDPLITE(sk);
- int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
+ int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
struct sk_buff *skb;
struct ip_options_data opt_copy;
@@ -997,7 +997,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
ipcm_init_sk(&ipc, inet);
- ipc.gso_size = up->gso_size;
+ ipc.gso_size = READ_ONCE(up->gso_size);
if (msg->msg_controllen) {
err = udp_cmsg_send(sk, msg, &ipc.gso_size);
@@ -1243,7 +1243,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
}
up->len += size;
- if (!(up->corkflag || (flags&MSG_MORE)))
+ if (!(READ_ONCE(up->corkflag) || (flags&MSG_MORE)))
ret = udp_push_pending_frames(sk);
if (!ret)
ret = size;
@@ -1458,7 +1458,7 @@ drop:
}
EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
-void udp_destruct_sock(struct sock *sk)
+void udp_destruct_common(struct sock *sk)
{
/* reclaim completely the forward allocated memory */
struct udp_sock *up = udp_sk(sk);
@@ -1471,10 +1471,14 @@ void udp_destruct_sock(struct sock *sk)
kfree_skb(skb);
}
udp_rmem_release(sk, total, 0, true);
+}
+EXPORT_SYMBOL_GPL(udp_destruct_common);
+static void udp_destruct_sock(struct sock *sk)
+{
+ udp_destruct_common(sk);
inet_sock_destruct(sk);
}
-EXPORT_SYMBOL_GPL(udp_destruct_sock);
int udp_init_sock(struct sock *sk)
{
@@ -1482,7 +1486,6 @@ int udp_init_sock(struct sock *sk)
sk->sk_destruct = udp_destruct_sock;
return 0;
}
-EXPORT_SYMBOL_GPL(udp_init_sock);
void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
{
@@ -2050,7 +2053,7 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
struct dst_entry *old;
if (dst_hold_safe(dst)) {
- old = xchg(&sk->sk_rx_dst, dst);
+ old = xchg((__force struct dst_entry **)&sk->sk_rx_dst, dst);
dst_release(old);
return old != dst;
}
@@ -2240,7 +2243,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
struct dst_entry *dst = skb_dst(skb);
int ret;
- if (unlikely(sk->sk_rx_dst != dst))
+ if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst))
udp_sk_rx_dst_set(sk, dst);
ret = udp_unicast_rcv_skb(sk, skb, uh);
@@ -2398,7 +2401,7 @@ int udp_v4_early_demux(struct sk_buff *skb)
skb->sk = sk;
skb->destructor = sock_efree;
- dst = READ_ONCE(sk->sk_rx_dst);
+ dst = rcu_dereference(sk->sk_rx_dst);
if (dst)
dst = dst_check(dst, 0);
@@ -2416,7 +2419,8 @@ int udp_v4_early_demux(struct sk_buff *skb)
*/
if (!inet_sk(sk)->inet_daddr && in_dev)
return ip_mc_validate_source(skb, iph->daddr,
- iph->saddr, iph->tos,
+ iph->saddr,
+ iph->tos & IPTOS_RT_MASK,
skb->dev, in_dev, &itag);
}
return 0;
@@ -2431,6 +2435,9 @@ void udp_destroy_sock(struct sock *sk)
{
struct udp_sock *up = udp_sk(sk);
bool slow = lock_sock_fast(sk);
+
+ /* protects from races with udp_abort() */
+ sock_set_flag(sk, SOCK_DEAD);
udp_flush_pending_frames(sk);
unlock_sock_fast(sk, slow);
if (static_branch_unlikely(&udp_encap_needed_key) && up->encap_type) {
@@ -2464,9 +2471,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case UDP_CORK:
if (val != 0) {
- up->corkflag = 1;
+ WRITE_ONCE(up->corkflag, 1);
} else {
- up->corkflag = 0;
+ WRITE_ONCE(up->corkflag, 0);
lock_sock(sk);
push_pending_frames(sk);
release_sock(sk);
@@ -2501,7 +2508,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
case UDP_SEGMENT:
if (val < 0 || val > USHRT_MAX)
return -EINVAL;
- up->gso_size = val;
+ WRITE_ONCE(up->gso_size, val);
break;
/*
@@ -2579,7 +2586,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case UDP_CORK:
- val = up->corkflag;
+ val = READ_ONCE(up->corkflag);
break;
case UDP_ENCAP:
@@ -2595,7 +2602,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
break;
case UDP_SEGMENT:
- val = up->gso_size;
+ val = READ_ONCE(up->gso_size);
break;
/* The following two cannot be changed on UDP sockets, the return is
@@ -2672,10 +2679,17 @@ int udp_abort(struct sock *sk, int err)
{
lock_sock(sk);
+ /* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing
+ * with close()
+ */
+ if (sock_flag(sk, SOCK_DEAD))
+ goto out;
+
sk->sk_err = err;
sk->sk_error_report(sk);
__udp_disconnect(sk, 0);
+out:
release_sock(sk);
return 0;
@@ -2834,7 +2848,7 @@ int udp4_seq_show(struct seq_file *seq, void *v)
{
seq_setwidth(seq, 127);
if (v == SEQ_START_TOKEN)
- seq_puts(seq, " sl local_address rem_address st tx_queue "
+ seq_puts(seq, " sl local_address rem_address st tx_queue "
"rx_queue tr tm->when retrnsmt uid timeout "
"inode ref pointer drops");
else {
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index aa343654abfc..2d22d39952da 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -359,7 +359,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
struct sock *sk;
if (NAPI_GRO_CB(skb)->encap_mark ||
- (skb->ip_summed != CHECKSUM_PARTIAL &&
+ (uh->check && skb->ip_summed != CHECKSUM_PARTIAL &&
NAPI_GRO_CB(skb)->csum_cnt == 0 &&
!NAPI_GRO_CB(skb)->csum_valid))
goto out;
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 6539ff15e9a3..d03d74388870 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -186,6 +186,7 @@ EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
void udp_tunnel_sock_release(struct socket *sock)
{
rcu_assign_sk_user_data(sock->sk, NULL);
+ synchronize_rcu();
kernel_sock_shutdown(sock, SHUT_RDWR);
sock_release(sock);
}
diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c
index 8545457752fb..27173549b000 100644
--- a/net/ipv4/udplite.c
+++ b/net/ipv4/udplite.c
@@ -20,6 +20,14 @@
struct udp_table udplite_table __read_mostly;
EXPORT_SYMBOL(udplite_table);
+/* Designate sk as UDP-Lite socket */
+static int udplite_sk_init(struct sock *sk)
+{
+ udp_init_sock(sk);
+ udp_sk(sk)->pcflag = UDPLITE_BIT;
+ return 0;
+}
+
static int udplite_rcv(struct sk_buff *skb)
{
return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
@@ -56,6 +64,8 @@ struct proto udplite_prot = {
.get_port = udp_v4_get_port,
.memory_allocated = &udp_memory_allocated,
.sysctl_mem = sysctl_udp_mem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp_sock),
.h.udp_table = &udplite_table,
#ifdef CONFIG_COMPAT
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 1e5e2e4be0b2..e85b5f57d3e9 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -17,6 +17,7 @@
#include <net/xfrm.h>
#include <net/ip.h>
#include <net/l3mdev.h>
+#include <net/inet_ecn.h>
static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
int tos, int oif,
@@ -126,7 +127,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
fl4->flowi4_proto = iph->protocol;
fl4->daddr = reverse ? iph->saddr : iph->daddr;
fl4->saddr = reverse ? iph->daddr : iph->saddr;
- fl4->flowi4_tos = iph->tos;
+ fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK;
if (!ip_is_fragment(iph)) {
switch (iph->protocol) {
diff --git a/net/ipv4/xfrm4_protocol.c b/net/ipv4/xfrm4_protocol.c
index 8dd0e6ab8606..0e1f5dc2766b 100644
--- a/net/ipv4/xfrm4_protocol.c
+++ b/net/ipv4/xfrm4_protocol.c
@@ -297,4 +297,3 @@ void __init xfrm4_protocol_init(void)
{
xfrm_input_register_afinfo(&xfrm4_input_afinfo);
}
-EXPORT_SYMBOL(xfrm4_protocol_init);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 627cd24b7c0d..5ffa8777ab09 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -316,9 +316,8 @@ static void addrconf_del_dad_work(struct inet6_ifaddr *ifp)
static void addrconf_mod_rs_timer(struct inet6_dev *idev,
unsigned long when)
{
- if (!timer_pending(&idev->rs_timer))
+ if (!mod_timer(&idev->rs_timer, jiffies + when))
in6_dev_hold(idev);
- mod_timer(&idev->rs_timer, jiffies + when);
}
static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
@@ -739,6 +738,7 @@ static void dev_forward_change(struct inet6_dev *idev)
{
struct net_device *dev;
struct inet6_ifaddr *ifa;
+ LIST_HEAD(tmp_addr_list);
if (!idev)
return;
@@ -757,14 +757,24 @@ static void dev_forward_change(struct inet6_dev *idev)
}
}
+ read_lock_bh(&idev->lock);
list_for_each_entry(ifa, &idev->addr_list, if_list) {
if (ifa->flags&IFA_F_TENTATIVE)
continue;
+ list_add_tail(&ifa->if_list_aux, &tmp_addr_list);
+ }
+ read_unlock_bh(&idev->lock);
+
+ while (!list_empty(&tmp_addr_list)) {
+ ifa = list_first_entry(&tmp_addr_list,
+ struct inet6_ifaddr, if_list_aux);
+ list_del(&ifa->if_list_aux);
if (idev->cnf.forwarding)
addrconf_join_anycast(ifa);
else
addrconf_leave_anycast(ifa);
}
+
inet6_netconf_notify_devconf(dev_net(dev), RTM_NEWNETCONF,
NETCONFA_FORWARDING,
dev->ifindex, &idev->cnf);
@@ -1311,7 +1321,7 @@ retry:
* idev->desync_factor if it's larger
*/
cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft);
- max_desync_factor = min_t(__u32,
+ max_desync_factor = min_t(long,
idev->cnf.max_desync_factor,
cnf_temp_preferred_lft - regen_advance);
@@ -2395,8 +2405,9 @@ static void addrconf_add_mroute(struct net_device *dev)
.fc_ifindex = dev->ifindex,
.fc_dst_len = 8,
.fc_flags = RTF_UP,
- .fc_type = RTN_UNICAST,
+ .fc_type = RTN_MULTICAST,
.fc_nlinfo.nl_net = dev_net(dev),
+ .fc_protocol = RTPROT_KERNEL,
};
ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0);
@@ -2477,12 +2488,18 @@ static void manage_tempaddrs(struct inet6_dev *idev,
ipv6_ifa_notify(0, ift);
}
- if ((create || list_empty(&idev->tempaddr_list)) &&
- idev->cnf.use_tempaddr > 0) {
+ /* Also create a temporary address if it's enabled but no temporary
+ * address currently exists.
+ * However, we get called with valid_lft == 0, prefered_lft == 0, create == false
+ * as part of cleanup (ie. deleting the mngtmpaddr).
+ * We don't want that to result in creating a new temporary ip address.
+ */
+ if (list_empty(&idev->tempaddr_list) && (valid_lft || prefered_lft))
+ create = true;
+
+ if (create && idev->cnf.use_tempaddr > 0) {
/* When a new public address is created as described
* in [ADDRCONF], also create a new temporary address.
- * Also create a temporary address if it's enabled but
- * no temporary address currently exists.
*/
read_unlock_bh(&idev->lock);
ipv6_create_tempaddr(ifp, NULL, false);
@@ -3053,6 +3070,9 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4);
if (idev->dev->flags&IFF_POINTOPOINT) {
+ if (idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_NONE)
+ return;
+
addr.s6_addr32[0] = htonl(0xfe800000);
scope = IFA_LINK;
plen = 64;
@@ -3654,7 +3674,8 @@ static int addrconf_ifdown(struct net_device *dev, int how)
unsigned long event = how ? NETDEV_UNREGISTER : NETDEV_DOWN;
struct net *net = dev_net(dev);
struct inet6_dev *idev;
- struct inet6_ifaddr *ifa, *tmp;
+ struct inet6_ifaddr *ifa;
+ LIST_HEAD(tmp_addr_list);
bool keep_addr = false;
int state, i;
@@ -3742,16 +3763,23 @@ restart:
write_lock_bh(&idev->lock);
}
- list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) {
+ list_for_each_entry(ifa, &idev->addr_list, if_list)
+ list_add_tail(&ifa->if_list_aux, &tmp_addr_list);
+ write_unlock_bh(&idev->lock);
+
+ while (!list_empty(&tmp_addr_list)) {
struct fib6_info *rt = NULL;
bool keep;
+ ifa = list_first_entry(&tmp_addr_list,
+ struct inet6_ifaddr, if_list_aux);
+ list_del(&ifa->if_list_aux);
+
addrconf_del_dad_work(ifa);
keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) &&
!addr_is_local(&ifa->addr);
- write_unlock_bh(&idev->lock);
spin_lock_bh(&ifa->lock);
if (keep) {
@@ -3782,15 +3810,14 @@ restart:
addrconf_leave_solict(ifa->idev, &ifa->addr);
}
- write_lock_bh(&idev->lock);
if (!keep) {
+ write_lock_bh(&idev->lock);
list_del_rcu(&ifa->if_list);
+ write_unlock_bh(&idev->lock);
in6_ifa_put(ifa);
}
}
- write_unlock_bh(&idev->lock);
-
/* Step 5: Discard anycast and multicast list */
if (how) {
ipv6_ac_destroy_dev(idev);
@@ -4121,7 +4148,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
send_rs = send_mld &&
ipv6_accept_ra(ifp->idev) &&
ifp->idev->cnf.rtr_solicits != 0 &&
- (dev->flags&IFF_LOOPBACK) == 0;
+ (dev->flags & IFF_LOOPBACK) == 0 &&
+ (dev->type != ARPHRD_TUNNEL);
read_unlock_bh(&ifp->idev->lock);
/* While dad is in progress mld report's source address is in6_addrany.
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 1d6ced37ad71..7fdd433b968e 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -306,7 +306,9 @@ static int ip6addrlbl_del(struct net *net,
/* add default label */
static int __net_init ip6addrlbl_net_init(struct net *net)
{
- int err = 0;
+ struct ip6addrlbl_entry *p = NULL;
+ struct hlist_node *n;
+ int err;
int i;
ADDRLABEL(KERN_DEBUG "%s\n", __func__);
@@ -315,14 +317,20 @@ static int __net_init ip6addrlbl_net_init(struct net *net)
INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head);
for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
- int ret = ip6addrlbl_add(net,
- ip6addrlbl_init_table[i].prefix,
- ip6addrlbl_init_table[i].prefixlen,
- 0,
- ip6addrlbl_init_table[i].label, 0);
- /* XXX: should we free all rules when we catch an error? */
- if (ret && (!err || err != -ENOMEM))
- err = ret;
+ err = ip6addrlbl_add(net,
+ ip6addrlbl_init_table[i].prefix,
+ ip6addrlbl_init_table[i].prefixlen,
+ 0,
+ ip6addrlbl_init_table[i].label, 0);
+ if (err)
+ goto err_ip6addrlbl_add;
+ }
+ return 0;
+
+err_ip6addrlbl_add:
+ hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
+ hlist_del_rcu(&p->list);
+ kfree_rcu(p, rcu);
}
return err;
}
@@ -429,6 +437,7 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
{
struct ifaddrlblmsg *ifal = nlmsg_data(nlh);
ifal->ifal_family = AF_INET6;
+ ifal->__ifal_reserved = 0;
ifal->ifal_prefixlen = prefixlen;
ifal->ifal_flags = 0;
ifal->ifal_index = ifindex;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 5c2351deedc8..c8f39d61b51e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -107,6 +107,13 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk)
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
+void inet6_sock_destruct(struct sock *sk)
+{
+ inet6_cleanup_sock(sk);
+ inet_sock_destruct(sk);
+}
+EXPORT_SYMBOL_GPL(inet6_sock_destruct);
+
static int inet6_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
@@ -199,7 +206,7 @@ lookup_protocol:
inet->hdrincl = 1;
}
- sk->sk_destruct = inet_sock_destruct;
+ sk->sk_destruct = inet6_sock_destruct;
sk->sk_family = PF_INET6;
sk->sk_protocol = protocol;
@@ -502,6 +509,12 @@ void inet6_destroy_sock(struct sock *sk)
}
EXPORT_SYMBOL_GPL(inet6_destroy_sock);
+void inet6_cleanup_sock(struct sock *sk)
+{
+ inet6_destroy_sock(sk);
+}
+EXPORT_SYMBOL_GPL(inet6_cleanup_sock);
+
/*
* This does both peername and sockname.
*/
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 78c974391567..2b68bd7c83c4 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -600,7 +600,8 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
memset(ah->auth_data, 0, ahp->icv_trunc_len);
- if (ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN))
+ err = ipv6_clear_mutable_options(ip6h, hdr_len, XFRM_POLICY_IN);
+ if (err)
goto out_free;
ip6h->priority = 0;
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 70611784c071..afc76062e1a1 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -97,6 +97,9 @@ struct calipso_map_cache_entry {
static struct calipso_map_cache_bkt *calipso_cache;
+static void calipso_cache_invalidate(void);
+static void calipso_doi_putdef(struct calipso_doi *doi_def);
+
/* Label Mapping Cache Functions
*/
@@ -458,15 +461,10 @@ static int calipso_doi_remove(u32 doi, struct netlbl_audit *audit_info)
ret_val = -ENOENT;
goto doi_remove_return;
}
- if (!refcount_dec_and_test(&doi_def->refcount)) {
- spin_unlock(&calipso_doi_list_lock);
- ret_val = -EBUSY;
- goto doi_remove_return;
- }
list_del_rcu(&doi_def->list);
spin_unlock(&calipso_doi_list_lock);
- call_rcu(&doi_def->rcu, calipso_doi_free_rcu);
+ calipso_doi_putdef(doi_def);
ret_val = 0;
doi_remove_return:
@@ -522,10 +520,8 @@ static void calipso_doi_putdef(struct calipso_doi *doi_def)
if (!refcount_dec_and_test(&doi_def->refcount))
return;
- spin_lock(&calipso_doi_list_lock);
- list_del_rcu(&doi_def->list);
- spin_unlock(&calipso_doi_list_lock);
+ calipso_cache_invalidate();
call_rcu(&doi_def->rcu, calipso_doi_free_rcu);
}
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 727f958dd869..45ece6a898bf 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -54,7 +54,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk)
fl6->flowi6_mark = sk->sk_mark;
fl6->fl6_dport = inet->inet_dport;
fl6->fl6_sport = inet->inet_sport;
- fl6->flowlabel = np->flow_label;
+ fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
fl6->flowi6_uid = sk->sk_uid;
if (!fl6->flowi6_oif)
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index a7d996148eed..6529e46ad091 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -237,12 +237,15 @@ static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
{
u8 *tail;
- u8 *vaddr;
int nfrags;
struct page *page;
struct sk_buff *trailer;
int tailen = esp->tailen;
+ if (ALIGN(tailen, L1_CACHE_BYTES) > PAGE_SIZE ||
+ ALIGN(skb->data_len, L1_CACHE_BYTES) > PAGE_SIZE)
+ goto cow;
+
if (!skb_cloned(skb)) {
if (tailen <= skb_tailroom(skb)) {
nfrags = 1;
@@ -270,14 +273,10 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
page = pfrag->page;
get_page(page);
- vaddr = kmap_atomic(page);
-
- tail = vaddr + pfrag->offset;
+ tail = page_address(page) + pfrag->offset;
esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);
- kunmap_atomic(vaddr);
-
nfrags = skb_shinfo(skb)->nr_frags;
__skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
@@ -518,7 +517,9 @@ static inline int esp_remove_trailer(struct sk_buff *skb)
skb->csum = csum_block_sub(skb->csum, csumdiff,
skb->len - trimlen);
}
- pskb_trim(skb, skb->len - trimlen);
+ ret = pskb_trim(skb, skb->len - trimlen);
+ if (unlikely(ret))
+ return ret;
ret = nexthdr[1];
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index eeee64a8a72c..69313ec24264 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -272,6 +272,9 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
secpath_reset(skb);
+ if (skb_needs_linearize(skb, skb->dev->features) &&
+ __skb_linearize(skb))
+ return -ENOMEM;
return 0;
}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 20291c2036fc..fe2497ae4523 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -138,18 +138,23 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
len -= 2;
while (len > 0) {
- int optlen = nh[off + 1] + 2;
- int i;
+ int optlen, i;
- switch (nh[off]) {
- case IPV6_TLV_PAD1:
- optlen = 1;
+ if (nh[off] == IPV6_TLV_PAD1) {
padlen++;
if (padlen > 7)
goto bad;
- break;
+ off++;
+ len--;
+ continue;
+ }
+ if (len < 2)
+ goto bad;
+ optlen = nh[off + 1] + 2;
+ if (optlen > len)
+ goto bad;
- case IPV6_TLV_PADN:
+ if (nh[off] == IPV6_TLV_PADN) {
/* RFC 2460 states that the purpose of PadN is
* to align the containing header to multiples
* of 8. 7 is therefore the highest valid value.
@@ -166,12 +171,7 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
if (nh[off + i] != 0)
goto bad;
}
- break;
-
- default: /* Other TLV code so scan list */
- if (optlen > len)
- goto bad;
-
+ } else {
tlv_count++;
if (tlv_count > max_count)
goto bad;
@@ -191,7 +191,6 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
return false;
padlen = 0;
- break;
}
off += optlen;
len -= optlen;
@@ -309,7 +308,7 @@ fail_and_free:
#endif
if (ip6_parse_tlv(tlvprocdestopt_lst, skb,
- init_net.ipv6.sysctl.max_dst_opts_cnt)) {
+ net->ipv6.sysctl.max_dst_opts_cnt)) {
skb->transport_header += extlen;
opt = IP6CB(skb);
#if IS_ENABLED(CONFIG_IPV6_MIP6)
@@ -848,7 +847,7 @@ fail_and_free:
opt->flags |= IP6SKB_HOPBYHOP;
if (ip6_parse_tlv(tlvprochopopt_lst, skb,
- init_net.ipv6.sysctl.max_hbh_opts_cnt)) {
+ net->ipv6.sysctl.max_hbh_opts_cnt)) {
skb->transport_header += extlen;
opt = IP6CB(skb);
opt->nhoff = sizeof(struct ipv6hdr);
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index ae365df8abf7..f356d3049143 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -142,6 +142,8 @@ int ipv6_find_tlv(const struct sk_buff *skb, int offset, int type)
optlen = 1;
break;
default:
+ if (len < 2)
+ goto bad;
optlen = nh[offset + 1] + 2;
if (optlen > len)
goto bad;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6d14cbe443f8..bfafd7649ccb 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -309,10 +309,9 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
}
#if IS_ENABLED(CONFIG_IPV6_MIP6)
-static void mip6_addr_swap(struct sk_buff *skb)
+static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
{
struct ipv6hdr *iph = ipv6_hdr(skb);
- struct inet6_skb_parm *opt = IP6CB(skb);
struct ipv6_destopt_hao *hao;
struct in6_addr tmp;
int off;
@@ -329,7 +328,7 @@ static void mip6_addr_swap(struct sk_buff *skb)
}
}
#else
-static inline void mip6_addr_swap(struct sk_buff *skb) {}
+static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
#endif
static struct dst_entry *icmpv6_route_lookup(struct net *net,
@@ -396,30 +395,39 @@ relookup_failed:
return ERR_PTR(err);
}
-static int icmp6_iif(const struct sk_buff *skb)
+static struct net_device *icmp6_dev(const struct sk_buff *skb)
{
- int iif = skb->dev->ifindex;
+ struct net_device *dev = skb->dev;
/* for local traffic to local address, skb dev is the loopback
* device. Check if there is a dst attached to the skb and if so
* get the real device index. Same is needed for replies to a link
* local address on a device enslaved to an L3 master device
*/
- if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
+ if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
const struct rt6_info *rt6 = skb_rt6_info(skb);
- if (rt6)
- iif = rt6->rt6i_idev->dev->ifindex;
+ /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.),
+ * and ip6_null_entry could be set to skb if no route is found.
+ */
+ if (rt6 && rt6->rt6i_idev)
+ dev = rt6->rt6i_idev->dev;
}
- return iif;
+ return dev;
+}
+
+static int icmp6_iif(const struct sk_buff *skb)
+{
+ return icmp6_dev(skb)->ifindex;
}
/*
* Send an ICMP message in response to a packet in error
*/
-static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
- const struct in6_addr *force_saddr)
+void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
+ const struct in6_addr *force_saddr,
+ const struct inet6_skb_parm *parm)
{
struct inet6_dev *idev = NULL;
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -512,7 +520,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
if (!(skb->dev->flags&IFF_LOOPBACK) && !icmpv6_global_allow(type))
goto out_bh_enable;
- mip6_addr_swap(skb);
+ mip6_addr_swap(skb, parm);
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_ICMPV6;
@@ -592,12 +600,13 @@ out:
out_bh_enable:
local_bh_enable();
}
+EXPORT_SYMBOL(icmp6_send);
/* Slightly more convenient version of icmp6_send.
*/
void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
{
- icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
+ icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
kfree_skb(skb);
}
@@ -654,10 +663,10 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
}
if (type == ICMP_TIME_EXCEEDED)
icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
- info, &temp_saddr);
+ info, &temp_saddr, IP6CB(skb2));
else
icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
- info, &temp_saddr);
+ info, &temp_saddr, IP6CB(skb2));
if (rt)
ip6_rt_put(rt);
@@ -799,7 +808,7 @@ out:
static int icmpv6_rcv(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
- struct net_device *dev = skb->dev;
+ struct net_device *dev = icmp6_dev(skb);
struct inet6_dev *idev = __in6_dev_get(dev);
const struct in6_addr *saddr, *daddr;
struct icmp6hdr *hdr;
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 7858fa9ea103..87744eb8d0c4 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -480,6 +480,7 @@ int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
rcu_read_lock();
+ ret = -ESRCH;
ila = ila_lookup_by_params(&xp, ilan);
if (ila) {
ret = ila_dump_info(ila,
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index d9e2575dad94..d8391921363f 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -311,7 +311,7 @@ not_unique:
return -EADDRNOTAVAIL;
}
-static u32 inet6_sk_port_offset(const struct sock *sk)
+static u64 inet6_sk_port_offset(const struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
@@ -323,7 +323,7 @@ static u32 inet6_sk_port_offset(const struct sock *sk)
int inet6_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk)
{
- u32 port_offset = 0;
+ u64 port_offset = 0;
if (!inet_sk(sk)->inet_num)
port_offset = inet6_sk_port_offset(sk);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index b924941b96a3..92bc56028b8b 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -112,7 +112,7 @@ void fib6_update_sernum(struct net *net, struct fib6_info *f6i)
fn = rcu_dereference_protected(f6i->fib6_node,
lockdep_is_held(&f6i->fib6_table->tb6_lock));
if (fn)
- fn->fn_sernum = fib6_new_sernum(net);
+ WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net));
}
/*
@@ -544,12 +544,13 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
spin_unlock_bh(&table->tb6_lock);
if (res > 0) {
cb->args[4] = 1;
- cb->args[5] = w->root->fn_sernum;
+ cb->args[5] = READ_ONCE(w->root->fn_sernum);
}
} else {
- if (cb->args[5] != w->root->fn_sernum) {
+ int sernum = READ_ONCE(w->root->fn_sernum);
+ if (cb->args[5] != sernum) {
/* Begin at the root if the tree changed */
- cb->args[5] = w->root->fn_sernum;
+ cb->args[5] = sernum;
w->state = FWS_INIT;
w->node = w->root;
w->skip = w->count;
@@ -906,6 +907,8 @@ static void fib6_purge_rt(struct fib6_info *rt, struct fib6_node *fn,
{
struct fib6_table *table = rt->fib6_table;
+ /* Flush all cached dst in exception table */
+ rt6_flush_exceptions(rt);
if (rt->rt6i_pcpu)
fib6_drop_pcpu_from(rt, table);
@@ -1201,7 +1204,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
/* paired with smp_rmb() in rt6_get_cookie_safe() */
smp_wmb();
while (fn) {
- fn->fn_sernum = sernum;
+ WRITE_ONCE(fn->fn_sernum, sernum);
fn = rcu_dereference_protected(fn->parent,
lockdep_is_held(&rt->fib6_table->tb6_lock));
}
@@ -1348,13 +1351,9 @@ out:
if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
pn_leaf = fib6_find_prefix(info->nl_net, table,
pn);
-#if RT6_DEBUG >= 2
- if (!pn_leaf) {
- WARN_ON(!pn_leaf);
+ if (!pn_leaf)
pn_leaf =
info->nl_net->ipv6.fib6_null_entry;
- }
-#endif
fib6_info_hold(pn_leaf);
rcu_assign_pointer(pn->leaf, pn_leaf);
}
@@ -1756,9 +1755,6 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
net->ipv6.rt6_stats->fib_rt_entries--;
net->ipv6.rt6_stats->fib_discarded_routes++;
- /* Flush all cached dst in exception table */
- rt6_flush_exceptions(rt);
-
/* Reset round-robin state, if necessary */
if (rcu_access_pointer(fn->rr_ptr) == rt)
fn->rr_ptr = NULL;
@@ -1984,8 +1980,8 @@ static int fib6_clean_node(struct fib6_walker *w)
};
if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
- w->node->fn_sernum != c->sernum)
- w->node->fn_sernum = c->sernum;
+ READ_ONCE(w->node->fn_sernum) != c->sernum)
+ WRITE_ONCE(w->node->fn_sernum, c->sernum);
if (!c->func) {
WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE);
@@ -2333,7 +2329,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
iter->w.state = FWS_INIT;
iter->w.node = iter->w.root;
iter->w.args = iter;
- iter->sernum = iter->w.root->fn_sernum;
+ iter->sernum = READ_ONCE(iter->w.root->fn_sernum);
INIT_LIST_HEAD(&iter->w.lh);
fib6_walker_link(net, &iter->w);
}
@@ -2361,8 +2357,10 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
{
- if (iter->sernum != iter->w.root->fn_sernum) {
- iter->sernum = iter->w.root->fn_sernum;
+ int sernum = READ_ONCE(iter->w.root->fn_sernum);
+
+ if (iter->sernum != sernum) {
+ iter->sernum = sernum;
iter->w.state = FWS_INIT;
iter->w.node = iter->w.root;
WARN_ON(iter->w.skip);
@@ -2417,8 +2415,10 @@ static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
iter->skip = *pos;
if (iter->tbl) {
+ loff_t p = 0;
+
ipv6_route_seq_setup_walk(iter, net);
- return ipv6_route_seq_next(seq, NULL, pos);
+ return ipv6_route_seq_next(seq, NULL, &p);
} else {
return NULL;
}
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index f994f50e1516..1858cf783a4f 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -518,7 +518,7 @@ int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
{
- int uninitialized_var(err);
+ int err;
struct net *net = sock_net(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_flowlabel_req freq;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 7cc9cd83ecb5..aa8ada354a39 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -392,7 +392,6 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
if (!(nt->parms.o_flags & TUNNEL_SEQ))
dev->features |= NETIF_F_LLTX;
- dev_hold(dev);
ip6gre_tunnel_link(ign, nt);
return nt;
@@ -732,6 +731,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
{
struct ip6_tnl *tunnel = netdev_priv(dev);
__be16 protocol;
+ __be16 flags;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -741,16 +741,13 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
else
fl6->daddr = tunnel->parms.raddr;
- if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
- return -ENOMEM;
-
/* Push GRE header. */
protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
if (tunnel->parms.collect_md) {
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
- __be16 flags;
+ int tun_hlen;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info ||
@@ -768,21 +765,27 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
dsfield = key->tos;
flags = key->tun_flags &
(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
- tunnel->tun_hlen = gre_calc_hlen(flags);
+ tun_hlen = gre_calc_hlen(flags);
- gre_build_header(skb, tunnel->tun_hlen,
+ if (skb_cow_head(skb, dev->needed_headroom ?: tun_hlen + tunnel->encap_hlen))
+ return -ENOMEM;
+
+ gre_build_header(skb, tun_hlen,
flags, protocol,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
: 0);
} else {
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
+ if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen))
+ return -ENOMEM;
+
+ flags = tunnel->parms.o_flags;
- gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+ gre_build_header(skb, tunnel->tun_hlen, flags,
protocol, tunnel->parms.o_key,
- htonl(tunnel->o_seqno));
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
+ : 0);
}
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
@@ -946,7 +949,6 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
__be16 proto;
__u32 mtu;
int nhoff;
- int thoff;
if (!pskb_inet_may_pull(skb))
goto tx_err;
@@ -958,19 +960,26 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
goto tx_err;
if (skb->len > dev->mtu + dev->hard_header_len) {
- pskb_trim(skb, dev->mtu + dev->hard_header_len);
+ if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
+ goto tx_err;
truncate = true;
}
- nhoff = skb_network_header(skb) - skb_mac_header(skb);
+ nhoff = skb_network_offset(skb);
if (skb->protocol == htons(ETH_P_IP) &&
(ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
truncate = true;
- thoff = skb_transport_header(skb) - skb_mac_header(skb);
- if (skb->protocol == htons(ETH_P_IPV6) &&
- (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
- truncate = true;
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ int thoff;
+
+ if (skb_transport_header_was_set(skb))
+ thoff = skb_transport_offset(skb);
+ else
+ thoff = nhoff + sizeof(struct ipv6hdr);
+ if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
+ truncate = true;
+ }
if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen))
goto tx_err;
@@ -1013,12 +1022,14 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
ntohl(tun_id),
ntohl(md->u.index), truncate,
false);
+ proto = htons(ETH_P_ERSPAN);
} else if (md->version == 2) {
erspan_build_header_v2(skb,
ntohl(tun_id),
md->u.md2.dir,
get_hwid(&md->u.md2),
truncate, false);
+ proto = htons(ETH_P_ERSPAN2);
} else {
goto tx_err;
}
@@ -1041,25 +1052,26 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
break;
}
- if (t->parms.erspan_ver == 1)
+ if (t->parms.erspan_ver == 1) {
erspan_build_header(skb, ntohl(t->parms.o_key),
t->parms.index,
truncate, false);
- else if (t->parms.erspan_ver == 2)
+ proto = htons(ETH_P_ERSPAN);
+ } else if (t->parms.erspan_ver == 2) {
erspan_build_header_v2(skb, ntohl(t->parms.o_key),
t->parms.dir,
t->parms.hwid,
truncate, false);
- else
+ proto = htons(ETH_P_ERSPAN2);
+ } else {
goto tx_err;
+ }
fl6.daddr = t->parms.raddr;
}
/* Push GRE header. */
- proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN)
- : htons(ETH_P_ERSPAN2);
- gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++));
+ gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno)));
/* TooBig packet may have updated dst->dev's mtu */
if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
@@ -1140,18 +1152,25 @@ static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu,
return;
if (rt->dst.dev) {
- dev->needed_headroom = rt->dst.dev->hard_header_len +
- t_hlen;
+ unsigned short dst_len = rt->dst.dev->hard_header_len +
+ t_hlen;
+
+ if (t->dev->header_ops)
+ dev->hard_header_len = dst_len;
+ else
+ dev->needed_headroom = dst_len;
if (set_mtu) {
- dev->mtu = rt->dst.dev->mtu - t_hlen;
+ int mtu = rt->dst.dev->mtu - t_hlen;
+
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- dev->mtu -= 8;
+ mtu -= 8;
if (dev->type == ARPHRD_ETHER)
- dev->mtu -= ETH_HLEN;
+ mtu -= ETH_HLEN;
- if (dev->mtu < IPV6_MIN_MTU)
- dev->mtu = IPV6_MIN_MTU;
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ WRITE_ONCE(dev->mtu, mtu);
}
}
ip6_rt_put(rt);
@@ -1166,7 +1185,12 @@ static int ip6gre_calc_hlen(struct ip6_tnl *tunnel)
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
- tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen;
+
+ if (tunnel->dev->header_ops)
+ tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+ else
+ tunnel->dev->needed_headroom = LL_MAX_HEADER + t_hlen;
+
return t_hlen;
}
@@ -1493,6 +1517,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
}
ip6gre_tnl_init_features(dev);
+ dev_hold(dev);
return 0;
cleanup_dst_cache_init:
@@ -1535,8 +1560,6 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
strcpy(tunnel->parms.name, dev->name);
tunnel->hlen = sizeof(struct ipv6hdr) + 4;
-
- dev_hold(dev);
}
static struct inet6_protocol ip6gre_protocol __read_mostly = {
@@ -1886,6 +1909,7 @@ static int ip6erspan_tap_init(struct net_device *dev)
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
ip6erspan_tnl_link_config(tunnel, 1);
+ dev_hold(dev);
return 0;
cleanup_dst_cache_init:
@@ -1991,8 +2015,6 @@ static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev,
if (tb[IFLA_MTU])
ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
- dev_hold(dev);
-
out:
return err;
}
diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c
index 02045494c24c..9e3574880cb0 100644
--- a/net/ipv6/ip6_icmp.c
+++ b/net/ipv6/ip6_icmp.c
@@ -9,6 +9,8 @@
#if IS_ENABLED(CONFIG_IPV6)
+#if !IS_BUILTIN(CONFIG_IPV6)
+
static ip6_icmp_send_t __rcu *ip6_icmp_send;
int inet6_register_icmp_sender(ip6_icmp_send_t *fn)
@@ -31,18 +33,52 @@ int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn)
}
EXPORT_SYMBOL(inet6_unregister_icmp_sender);
-void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
+void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
+ const struct inet6_skb_parm *parm)
{
ip6_icmp_send_t *send;
rcu_read_lock();
send = rcu_dereference(ip6_icmp_send);
+ if (send)
+ send(skb, type, code, info, NULL, parm);
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(__icmpv6_send);
+#endif
+
+#if IS_ENABLED(CONFIG_NF_NAT)
+#include <net/netfilter/nf_conntrack.h>
+void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
+{
+ struct inet6_skb_parm parm = { 0 };
+ struct sk_buff *cloned_skb = NULL;
+ enum ip_conntrack_info ctinfo;
+ struct in6_addr orig_ip;
+ struct nf_conn *ct;
- if (!send)
+ ct = nf_ct_get(skb_in, &ctinfo);
+ if (!ct || !(ct->status & IPS_SRC_NAT)) {
+ __icmpv6_send(skb_in, type, code, info, &parm);
+ return;
+ }
+
+ if (skb_shared(skb_in))
+ skb_in = cloned_skb = skb_clone(skb_in, GFP_ATOMIC);
+
+ if (unlikely(!skb_in || skb_network_header(skb_in) < skb_in->head ||
+ (skb_network_header(skb_in) + sizeof(struct ipv6hdr)) >
+ skb_tail_pointer(skb_in) || skb_ensure_writable(skb_in,
+ skb_network_offset(skb_in) + sizeof(struct ipv6hdr))))
goto out;
- send(skb, type, code, info, NULL);
+
+ orig_ip = ipv6_hdr(skb_in)->saddr;
+ ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6;
+ __icmpv6_send(skb_in, type, code, info, &parm);
+ ipv6_hdr(skb_in)->saddr = orig_ip;
out:
- rcu_read_unlock();
+ consume_skb(cloned_skb);
}
-EXPORT_SYMBOL(icmpv6_send);
+EXPORT_SYMBOL(icmpv6_ndo_send);
+#endif
#endif
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index acf0749ee5bb..2bdb03a45baf 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -47,18 +47,25 @@
#include <net/inet_ecn.h>
#include <net/dst_metadata.h>
+void udp_v6_early_demux(struct sk_buff *);
+void tcp_v6_early_demux(struct sk_buff *);
static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
- void (*edemux)(struct sk_buff *skb);
-
- if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
- const struct inet6_protocol *ipprot;
-
- ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
- edemux(skb);
+ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
+ !skb_dst(skb) && !skb->sk) {
+ switch (ipv6_hdr(skb)->nexthdr) {
+ case IPPROTO_TCP:
+ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux))
+ tcp_v6_early_demux(skb);
+ break;
+ case IPPROTO_UDP:
+ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux))
+ udp_v6_early_demux(skb);
+ break;
+ }
}
+
if (!skb_valid_dst(skb))
ip6_route_input(skb);
}
@@ -222,16 +229,6 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
if (ipv6_addr_is_multicast(&hdr->saddr))
goto err;
- /* While RFC4291 is not explicit about v4mapped addresses
- * in IPv6 headers, it seems clear linux dual-stack
- * model can not deal properly with these.
- * Security models could be fooled by ::ffff:127.0.0.1 for example.
- *
- * https://tools.ietf.org/html/draft-itojun-v6ops-v4mapped-harmful-02
- */
- if (ipv6_addr_v4mapped(&hdr->saddr))
- goto err;
-
skb->transport_header = skb->network_header + sizeof(*hdr);
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index c7e495f12011..6c47cd0ef240 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -98,6 +98,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
if (likely(ops && ops->callbacks.gso_segment)) {
skb_reset_transport_header(skb);
segs = ops->callbacks.gso_segment(skb, features);
+ if (!segs)
+ skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
}
if (IS_ERR_OR_NULL(segs))
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 22665e3638ac..0872df066a4e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -106,7 +106,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
if (lwtunnel_xmit_redirect(dst->lwtstate)) {
int res = lwtunnel_xmit(skb);
- if (res < 0 || res == LWTUNNEL_XMIT_DONE)
+ if (res != LWTUNNEL_XMIT_CONTINUE)
return res;
}
@@ -128,8 +128,48 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
return -EINVAL;
}
+static int
+ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
+ struct sk_buff *skb, unsigned int mtu)
+{
+ struct sk_buff *segs, *nskb;
+ netdev_features_t features;
+ int ret = 0;
+
+ /* Please see corresponding comment in ip_finish_output_gso
+ * describing the cases where GSO segment length exceeds the
+ * egress MTU.
+ */
+ features = netif_skb_features(skb);
+ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+ if (IS_ERR_OR_NULL(segs)) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+
+ consume_skb(skb);
+
+ skb_list_walk_safe(segs, segs, nskb) {
+ int err;
+
+ skb_mark_not_on_list(segs);
+ /* Last GSO segment can be smaller than gso_size (and MTU).
+ * Adding a fragment header would produce an "atomic fragment",
+ * which is considered harmful (RFC-8021). Avoid that.
+ */
+ err = segs->len > mtu ?
+ ip6_fragment(net, sk, segs, ip6_finish_output2) :
+ ip6_finish_output2(net, sk, segs);
+ if (err && ret == 0)
+ ret = err;
+ }
+
+ return ret;
+}
+
static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
+ unsigned int mtu;
int ret;
ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
@@ -141,12 +181,16 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
/* Policy lookup after SNAT yielded a new policy */
if (skb_dst(skb)->xfrm) {
- IPCB(skb)->flags |= IPSKB_REROUTED;
+ IP6CB(skb)->flags |= IP6SKB_REROUTED;
return dst_output(net, sk, skb);
}
#endif
- if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
+ mtu = ip6_skb_dst_mtu(skb);
+ if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
+ return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
+
+ if ((skb->len > mtu && !skb_is_gso(skb)) ||
dst_allfrag(skb_dst(skb)) ||
(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
return ip6_fragment(net, sk, skb, ip6_finish_output2);
@@ -421,7 +465,9 @@ int ip6_forward(struct sk_buff *skb)
if (skb_warn_if_lro(skb))
goto drop;
- if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
+ if (!net->ipv6.devconf_all->disable_policy &&
+ (!idev || !idev->cnf.disable_policy) &&
+ !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -694,6 +740,9 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
ipv6_hdr(skb)->payload_len = htons(first_len -
sizeof(struct ipv6hdr));
+ /* We prevent @rt from being freed. */
+ rcu_read_lock();
+
for (;;) {
/* Prepare header of the next frame,
* before previous one went down. */
@@ -736,6 +785,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
if (err == 0) {
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGOKS);
+ rcu_read_unlock();
return 0;
}
@@ -743,6 +793,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGFAILS);
+ rcu_read_unlock();
return err;
slow_path_clean:
@@ -1219,8 +1270,6 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
if (np->frag_size)
mtu = np->frag_size;
}
- if (mtu < IPV6_MIN_MTU)
- return -EINVAL;
cork->base.fragsize = mtu;
cork->base.gso_size = ipc6->gso_size;
cork->base.tx_flags = 0;
@@ -1280,8 +1329,6 @@ static int __ip6_append_data(struct sock *sk,
fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
(opt ? opt->opt_nflen : 0);
- maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
- sizeof(struct frag_hdr);
headersize = sizeof(struct ipv6hdr) +
(opt ? opt->opt_flen + opt->opt_nflen : 0) +
@@ -1289,6 +1336,13 @@ static int __ip6_append_data(struct sock *sk,
sizeof(struct frag_hdr) : 0) +
rt->rt6i_nfheader_len;
+ if (mtu <= fragheaderlen ||
+ ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
+ goto emsgsize;
+
+ maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
+ sizeof(struct frag_hdr);
+
/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
* the first fragment
*/
@@ -1356,7 +1410,7 @@ emsgsize:
unsigned int datalen;
unsigned int fraglen;
unsigned int fraggap;
- unsigned int alloclen;
+ unsigned int alloclen, alloc_extra;
unsigned int pagedlen;
alloc_new_skb:
/* There's no room in the current skb */
@@ -1383,17 +1437,28 @@ alloc_new_skb:
fraglen = datalen + fragheaderlen;
pagedlen = 0;
+ alloc_extra = hh_len;
+ alloc_extra += dst_exthdrlen;
+ alloc_extra += rt->dst.trailer_len;
+
+ /* We just reserve space for fragment header.
+ * Note: this may be overallocation if the message
+ * (without MSG_MORE) fits into the MTU.
+ */
+ alloc_extra += sizeof(struct frag_hdr);
+
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
alloclen = mtu;
- else if (!paged)
+ else if (!paged &&
+ (fraglen + alloc_extra < SKB_MAX_ALLOC ||
+ !(rt->dst.dev->features & NETIF_F_SG)))
alloclen = fraglen;
else {
alloclen = min_t(int, fraglen, MAX_HEADER);
pagedlen = fraglen - alloclen;
}
-
- alloclen += dst_exthdrlen;
+ alloclen += alloc_extra;
if (datalen != length + fraggap) {
/*
@@ -1403,30 +1468,21 @@ alloc_new_skb:
datalen += rt->dst.trailer_len;
}
- alloclen += rt->dst.trailer_len;
fraglen = datalen + fragheaderlen;
- /*
- * We just reserve space for fragment header.
- * Note: this may be overallocation if the message
- * (without MSG_MORE) fits into the MTU.
- */
- alloclen += sizeof(struct frag_hdr);
-
copy = datalen - transhdrlen - fraggap - pagedlen;
if (copy < 0) {
err = -EINVAL;
goto error;
}
if (transhdrlen) {
- skb = sock_alloc_send_skb(sk,
- alloclen + hh_len,
+ skb = sock_alloc_send_skb(sk, alloclen,
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
- skb = alloc_skb(alloclen + hh_len,
+ skb = alloc_skb(alloclen,
sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;
@@ -1680,8 +1736,13 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
if (proto == IPPROTO_ICMPV6) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+ u8 icmp6_type;
- ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
+ if (sk->sk_socket->type == SOCK_RAW && !inet_sk(sk)->hdrincl)
+ icmp6_type = fl6->fl6_icmp_type;
+ else
+ icmp6_type = icmp6_hdr(skb)->icmp6_type;
+ ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type);
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index b825ac025d5b..56309c851928 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -272,7 +272,6 @@ static int ip6_tnl_create2(struct net_device *dev)
strcpy(t->parms.name, dev->name);
- dev_hold(dev);
ip6_tnl_link(ip6n, t);
return 0;
@@ -405,7 +404,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw;
unsigned int nhoff = raw - skb->data;
unsigned int off = nhoff + sizeof(*ipv6h);
- u8 next, nexthdr = ipv6h->nexthdr;
+ u8 nexthdr = ipv6h->nexthdr;
while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
struct ipv6_opt_hdr *hdr;
@@ -416,25 +415,25 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
hdr = (struct ipv6_opt_hdr *)(skb->data + off);
if (nexthdr == NEXTHDR_FRAGMENT) {
- struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
- if (frag_hdr->frag_off)
- break;
optlen = 8;
} else if (nexthdr == NEXTHDR_AUTH) {
optlen = (hdr->hdrlen + 2) << 2;
} else {
optlen = ipv6_optlen(hdr);
}
- /* cache hdr->nexthdr, since pskb_may_pull() might
- * invalidate hdr
- */
- next = hdr->nexthdr;
- if (nexthdr == NEXTHDR_DEST) {
- u16 i = 2;
- /* Remember : hdr is no longer valid at this point. */
- if (!pskb_may_pull(skb, off + optlen))
+ if (!pskb_may_pull(skb, off + optlen))
+ break;
+
+ hdr = (struct ipv6_opt_hdr *)(skb->data + off);
+ if (nexthdr == NEXTHDR_FRAGMENT) {
+ struct frag_hdr *frag_hdr = (struct frag_hdr *)hdr;
+
+ if (frag_hdr->frag_off)
break;
+ }
+ if (nexthdr == NEXTHDR_DEST) {
+ u16 i = 2;
while (1) {
struct ipv6_tlv_tnl_enc_lim *tel;
@@ -455,7 +454,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
i++;
}
}
- nexthdr = next;
+ nexthdr = hdr->nexthdr;
off += optlen;
}
return 0;
@@ -1006,14 +1005,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
0, IFA_F_TENTATIVE)))
- pr_warn("%s xmit: Local address not yet configured!\n",
- p->name);
+ pr_warn_ratelimited("%s xmit: Local address not yet configured!\n",
+ p->name);
else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
!ipv6_addr_is_multicast(raddr) &&
unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
true, 0, IFA_F_TENTATIVE)))
- pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
- p->name);
+ pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n",
+ p->name);
else
ret = 1;
rcu_read_unlock();
@@ -1207,8 +1206,8 @@ route_lookup:
*/
max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr)
+ dst->header_len + t->hlen;
- if (max_headroom > dev->needed_headroom)
- dev->needed_headroom = max_headroom;
+ if (max_headroom > READ_ONCE(dev->needed_headroom))
+ WRITE_ONCE(dev->needed_headroom, max_headroom);
err = ip6_tnl_encap(skb, t, &proto, fl6);
if (err)
@@ -1436,6 +1435,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
struct __ip6_tnl_parm *p = &t->parms;
struct flowi6 *fl6 = &t->fl.u.ip6;
int t_hlen;
+ int mtu;
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
@@ -1478,12 +1478,13 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
dev->hard_header_len = rt->dst.dev->hard_header_len +
t_hlen;
- dev->mtu = rt->dst.dev->mtu - t_hlen;
+ mtu = rt->dst.dev->mtu - t_hlen;
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- dev->mtu -= 8;
+ mtu -= 8;
- if (dev->mtu < IPV6_MIN_MTU)
- dev->mtu = IPV6_MIN_MTU;
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ WRITE_ONCE(dev->mtu, mtu);
}
ip6_rt_put(rt);
}
@@ -1866,6 +1867,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
dev->min_mtu = ETH_MIN_MTU;
dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len;
+ dev_hold(dev);
return 0;
destroy_dst:
@@ -1909,7 +1911,6 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
t->parms.proto = IPPROTO_IPV6;
- dev_hold(dev);
rcu_assign_pointer(ip6n->tnls_wc[0], t);
return 0;
@@ -2222,6 +2223,16 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head
t = rtnl_dereference(t->next);
}
}
+
+ t = rtnl_dereference(ip6n->tnls_wc[0]);
+ while (t) {
+ /* If dev is in the same netns, it has already
+ * been added to the list by the previous loop.
+ */
+ if (!net_eq(dev_net(t->dev), net))
+ unregister_netdevice_queue(t->dev, list);
+ t = rtnl_dereference(t->next);
+ }
}
static int __net_init ip6_tnl_init_net(struct net *net)
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 94f16e82a458..a64050e77588 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -196,7 +196,6 @@ static int vti6_tnl_create2(struct net_device *dev)
strcpy(t->parms.name, dev->name);
- dev_hold(dev);
vti6_tnl_link(ip6n, t);
return 0;
@@ -563,12 +562,12 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
vti6_addr_conflict(t, ipv6_hdr(skb)))
goto tx_err;
- xfrm_decode_session(skb, &fl, AF_INET6);
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ xfrm_decode_session(skb, &fl, AF_INET6);
break;
case htons(ETH_P_IP):
- xfrm_decode_session(skb, &fl, AF_INET);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ xfrm_decode_session(skb, &fl, AF_INET);
break;
default:
goto tx_err;
@@ -800,6 +799,8 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
struct net *net = dev_net(dev);
struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ memset(&p1, 0, sizeof(p1));
+
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == ip6n->fb_tnl_dev) {
@@ -925,6 +926,7 @@ static inline int vti6_dev_init_gen(struct net_device *dev)
dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
+ dev_hold(dev);
return 0;
}
@@ -956,7 +958,6 @@ static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev)
struct vti6_net *ip6n = net_generic(net, vti6_net_id);
t->parms.proto = IPPROTO_IPV6;
- dev_hold(dev);
rcu_assign_pointer(ip6n->tnls_wc[0], t);
return 0;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 35e7092eceb3..329bad6cbb76 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -245,7 +245,9 @@ static int __net_init ip6mr_rules_init(struct net *net)
return 0;
err2:
+ rtnl_lock();
ip6mr_free_table(mrt);
+ rtnl_unlock();
err1:
fib_rules_unregister(ops);
return err;
@@ -1062,7 +1064,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
And all this only to mangle msg->im6_msgtype and
to set msg->im6_mbz to "mbz" :-)
*/
- skb_push(skb, -skb_network_offset(pkt));
+ __skb_pull(skb, skb_network_offset(pkt));
skb_push(skb, sizeof(*msg));
skb_reset_transport_header(skb);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 4e1da6cb9ed7..1c155e610c06 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -166,15 +166,18 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
rtnl_lock();
lock_sock(sk);
+ /* Another thread has converted the socket into IPv4 with
+ * IPV6_ADDRFORM concurrently.
+ */
+ if (unlikely(sk->sk_family != AF_INET6))
+ goto unlock;
+
switch (optname) {
case IPV6_ADDRFORM:
if (optlen < sizeof(int))
goto e_inval;
if (val == PF_INET) {
- struct ipv6_txoptions *opt;
- struct sk_buff *pktopt;
-
if (sk->sk_type == SOCK_RAW)
break;
@@ -205,7 +208,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
break;
}
- fl6_free_socklist(sk);
__ipv6_sock_mc_close(sk);
__ipv6_sock_ac_close(sk);
@@ -240,14 +242,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
sk->sk_socket->ops = &inet_dgram_ops;
sk->sk_family = PF_INET;
}
- opt = xchg((__force struct ipv6_txoptions **)&np->opt,
- NULL);
- if (opt) {
- atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
- txopt_put(opt);
- }
- pktopt = xchg(&np->pktoptions, NULL);
- kfree_skb(pktopt);
+
+ /* Disable all options not to allocate memory anymore,
+ * but there is still a race. See the lockless path
+ * in udpv6_sendmsg() and ipv6_local_rxpmtu().
+ */
+ np->rxopt.all = 0;
+
+ inet6_cleanup_sock(sk);
/*
* ... and add it to the refcnt debug socks count
@@ -913,6 +915,7 @@ pref_skip_coa:
break;
}
+unlock:
release_sock(sk);
if (needs_rtnl)
rtnl_unlock();
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index f2f8551416c3..3d048401141f 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1606,10 +1606,7 @@ static struct sk_buff *mld_newpack(struct inet6_dev *idev, unsigned int mtu)
IPV6_TLV_PADN, 0 };
/* we assume size > sizeof(ra) here */
- /* limit our allocations to order-0 page */
- size = min_t(int, size, SKB_MAX_ORDER(0, 0));
skb = sock_alloc_send_skb(sk, size, 1, &err);
-
if (!skb)
return NULL;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 673a4a932f2a..a640deb9ab14 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -195,7 +195,8 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
static inline int ndisc_is_useropt(const struct net_device *dev,
struct nd_opt_hdr *opt)
{
- return opt->nd_opt_type == ND_OPT_RDNSS ||
+ return opt->nd_opt_type == ND_OPT_PREFIX_INFO ||
+ opt->nd_opt_type == ND_OPT_RDNSS ||
opt->nd_opt_type == ND_OPT_DNSSL ||
ndisc_ops_is_useropt(dev, opt->nd_opt_type);
}
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 6d0b1f3e927b..5679fa3f696a 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -17,10 +17,10 @@
#include <net/xfrm.h>
#include <net/netfilter/nf_queue.h>
-int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
+int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff *skb)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
- struct sock *sk = sk_to_full_sk(skb->sk);
+ struct sock *sk = sk_to_full_sk(sk_partial);
unsigned int hh_len;
struct dst_entry *dst;
int strict = (ipv6_addr_type(&iph->daddr) &
@@ -81,7 +81,7 @@ static int nf_ip6_reroute(struct sk_buff *skb,
if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
!ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
skb->mark != rt_info->mark)
- return ip6_route_me_harder(entry->state.net, skb);
+ return ip6_route_me_harder(entry->state.net, entry->state.sk, skb);
}
return 0;
}
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index daf2e9e9193d..d93490ac8275 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -276,6 +276,7 @@ ip6t_do_table(struct sk_buff *skb,
* things we don't know, ie. tcp syn flag or ports). If the
* rule is also a fragment-specific rule, non-fragments won't
* match it. */
+ acpar.fragoff = 0;
acpar.hotdrop = false;
acpar.state = state;
@@ -1448,6 +1449,8 @@ translate_compat_table(struct net *net,
if (!newinfo)
goto out_unlock;
+ memset(newinfo->entries, 0, size);
+
newinfo->number = compatr->num_entries;
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
newinfo->hook_entry[i] = compatr->hook_entry[i];
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index b0524b18c4fb..acba3757ff60 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -60,7 +60,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
skb->mark != mark ||
ipv6_hdr(skb)->hop_limit != hop_limit ||
flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
- err = ip6_route_me_harder(state->net, skb);
+ err = ip6_route_me_harder(state->net, state->sk, skb);
if (err < 0)
ret = NF_DROP_ERR(err);
}
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index c6bf580d0f33..c456e2f902b9 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -300,9 +300,11 @@ static void dump_ipv6_mac_header(struct nf_log_buf *m,
switch (dev->type) {
case ARPHRD_ETHER:
- nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
- eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
- ntohs(eth_hdr(skb)->h_proto));
+ nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ",
+ eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest);
+ nf_log_dump_vlan(m, skb);
+ nf_log_buf_add(m, "MACPROTO=%04x ",
+ ntohs(eth_hdr(skb)->h_proto));
return;
default:
break;
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index ca6d38698b1a..2b6a3b27f670 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -352,7 +352,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
&ct->tuplehash[!dir].tuple.src.u3)) {
- err = ip6_route_me_harder(state->net, skb);
+ err = ip6_route_me_harder(state->net, state->sk, skb);
if (err < 0)
ret = NF_DROP_ERR(err);
}
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index f14de4b6d639..5e5463459563 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -102,9 +102,9 @@ nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff,
struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
const struct net_device *indev)
{
- __be16 uninitialized_var(dport), uninitialized_var(sport);
+ __be16 dport, sport;
const struct in6_addr *daddr = NULL, *saddr = NULL;
- struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct ipv6hdr *iph = ipv6_hdr(skb), ipv6_var;
struct sk_buff *data_skb = NULL;
int doff = 0;
int thoff = 0, tproto;
@@ -134,8 +134,6 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
thoff + sizeof(*hp);
} else if (tproto == IPPROTO_ICMPV6) {
- struct ipv6hdr ipv6_var;
-
if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
&sport, &dport, &ipv6_var))
return NULL;
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index da3f1f8cb325..afe79cb46e63 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -52,7 +52,7 @@ static unsigned int nf_route_table_hook(void *priv,
skb->mark != mark ||
ipv6_hdr(skb)->hop_limit != hop_limit ||
flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
- err = ip6_route_me_harder(state->net, skb);
+ err = ip6_route_me_harder(state->net, state->sk, skb);
if (err < 0)
ret = NF_DROP_ERR(err);
}
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index d8b5b60b7d53..d8bb7c85287c 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -16,8 +16,8 @@
#include <net/netfilter/ipv6/nf_dup_ipv6.h>
struct nft_dup_ipv6 {
- enum nft_registers sreg_addr:8;
- enum nft_registers sreg_dev:8;
+ u8 sreg_addr;
+ u8 sreg_dev;
};
static void nft_dup_ipv6_eval(const struct nft_expr *expr,
@@ -41,16 +41,16 @@ static int nft_dup_ipv6_init(const struct nft_ctx *ctx,
if (tb[NFTA_DUP_SREG_ADDR] == NULL)
return -EINVAL;
- priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]);
- err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in6_addr));
+ err = nft_parse_register_load(tb[NFTA_DUP_SREG_ADDR], &priv->sreg_addr,
+ sizeof(struct in6_addr));
if (err < 0)
return err;
- if (tb[NFTA_DUP_SREG_DEV] != NULL) {
- priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
- return nft_validate_register_load(priv->sreg_dev, sizeof(int));
- }
- return 0;
+ if (tb[NFTA_DUP_SREG_DEV])
+ err = nft_parse_register_load(tb[NFTA_DUP_SREG_DEV],
+ &priv->sreg_dev, sizeof(int));
+
+ return err;
}
static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 36be3cf0adef..fa71e40789ed 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -41,6 +41,9 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) {
lookup_flags |= RT6_LOOKUP_F_IFACE;
fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev);
+ } else if ((priv->flags & NFTA_FIB_F_IIF) &&
+ (netif_is_l3_master(dev) || netif_is_l3_slave(dev))) {
+ fl6->flowi6_oif = dev->ifindex;
}
if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST)
@@ -189,7 +192,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL))
goto put_rt_err;
- if (oif && oif != rt->rt6i_idev->dev)
+ if (oif && oif != rt->rt6i_idev->dev &&
+ l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) != oif->ifindex)
goto put_rt_err;
switch (priv->result) {
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 868ae23dbae1..3829b565c645 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -14,29 +14,11 @@ static u32 __ipv6_select_ident(struct net *net,
const struct in6_addr *dst,
const struct in6_addr *src)
{
- const struct {
- struct in6_addr dst;
- struct in6_addr src;
- } __aligned(SIPHASH_ALIGNMENT) combined = {
- .dst = *dst,
- .src = *src,
- };
- u32 hash, id;
-
- /* Note the following code is not safe, but this is okay. */
- if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key)))
- get_random_bytes(&net->ipv4.ip_id_key,
- sizeof(net->ipv4.ip_id_key));
-
- hash = siphash(&combined, sizeof(combined), &net->ipv4.ip_id_key);
-
- /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve,
- * set the hight order instead thus minimizing possible future
- * collisions.
- */
- id = ip_idents_reserve(hash, 1);
- if (unlikely(!id))
- id = 1 << 31;
+ u32 id;
+
+ do {
+ id = prandom_u32();
+ } while (!id);
return id;
}
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 5c9be8594483..e065f49a4ae3 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -101,7 +101,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
addr_type = ipv6_addr_type(daddr);
if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) ||
(addr_type & IPV6_ADDR_MAPPED) ||
- (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if))
+ (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if &&
+ l3mdev_master_ifindex_by_index(sock_net(sk), oif) != sk->sk_bound_dev_if))
return -EINVAL;
/* TODO: use ip6_datagram_send_ctl to get options from cmsg */
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8d19729f8516..31aad22c59fc 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -303,7 +303,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
*/
v4addr = LOOPBACK4_IPV6;
if (!(addr_type & IPV6_ADDR_MULTICAST) &&
- !sock_net(sk)->ipv6.sysctl.ip_nonlocal_bind) {
+ !ipv6_can_nonlocal_bind(sock_net(sk), inet)) {
err = -EADDRNOTAVAIL;
if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr,
dev, 0)) {
@@ -544,6 +544,7 @@ csum_copy_err:
static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
struct raw6_sock *rp)
{
+ struct ipv6_txoptions *opt;
struct sk_buff *skb;
int err = 0;
int offset;
@@ -561,6 +562,9 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
offset = rp->offset;
total_len = inet_sk(sk)->cork.base.length;
+ opt = inet6_sk(sk)->cork.opt;
+ total_len -= opt ? opt->opt_flen : 0;
+
if (offset >= total_len - 1) {
err = -EINVAL;
ip6_flush_pending_frames(sk);
@@ -660,7 +664,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb->ip_summed = CHECKSUM_NONE;
- sock_tx_timestamp(sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
+ skb_setup_tx_timestamp(skb, sockc->tsflags);
if (flags & MSG_CONFIRM)
skb_set_dst_pending_confirm(skb, 1);
@@ -828,7 +832,8 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!proto)
proto = inet->inet_num;
- else if (proto != inet->inet_num)
+ else if (proto != inet->inet_num &&
+ inet->inet_num != IPPROTO_RAW)
return -EINVAL;
if (proto > 255)
@@ -1255,8 +1260,6 @@ static void raw6_destroy(struct sock *sk)
lock_sock(sk);
ip6_flush_pending_frames(sk);
release_sock(sk);
-
- inet6_destroy_sock(sk);
}
static int rawv6_init_sk(struct sock *sk)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index c6132e39ab16..b596727f0497 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -302,7 +302,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
skb_network_header_len(skb));
rcu_read_lock();
- __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
+ __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS);
rcu_read_unlock();
fq->q.fragments = NULL;
fq->q.rb_fragments = RB_ROOT;
@@ -317,7 +317,7 @@ out_oom:
net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
out_fail:
rcu_read_lock();
- __IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
+ __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS);
rcu_read_unlock();
inet_frag_kill(&fq->q);
return -1;
@@ -347,7 +347,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb);
- if (!(fhdr->frag_off & htons(0xFFF9))) {
+ if (!(fhdr->frag_off & htons(IP6_OFFSET | IP6_MF))) {
/* It is not a fragmented frame */
skb->transport_header += sizeof(struct frag_hdr);
__IP6_INC_STATS(net,
@@ -355,6 +355,8 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
+ IP6CB(skb)->frag_max_size = ntohs(hdr->payload_len) +
+ sizeof(struct ipv6hdr);
return 1;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index dad35cd48807..9dbc9c0cbc5a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -45,6 +45,7 @@
#include <linux/nsproxy.h>
#include <linux/slab.h>
#include <linux/jhash.h>
+#include <linux/siphash.h>
#include <net/net_namespace.h>
#include <net/snmp.h>
#include <net/ipv6.h>
@@ -91,7 +92,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *);
static void ip6_dst_destroy(struct dst_entry *);
static void ip6_dst_ifdown(struct dst_entry *,
struct net_device *dev, int how);
-static int ip6_dst_gc(struct dst_ops *ops);
+static void ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
@@ -1337,17 +1338,24 @@ static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket)
static u32 rt6_exception_hash(const struct in6_addr *dst,
const struct in6_addr *src)
{
- static u32 seed __read_mostly;
- u32 val;
+ static siphash_key_t rt6_exception_key __read_mostly;
+ struct {
+ struct in6_addr dst;
+ struct in6_addr src;
+ } __aligned(SIPHASH_ALIGNMENT) combined = {
+ .dst = *dst,
+ };
+ u64 val;
- net_get_random_once(&seed, sizeof(seed));
- val = jhash(dst, sizeof(*dst), seed);
+ net_get_random_once(&rt6_exception_key, sizeof(rt6_exception_key));
#ifdef CONFIG_IPV6_SUBTREES
if (src)
- val = jhash(src, sizeof(*src), val);
+ combined.src = *src;
#endif
- return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
+ val = siphash(&combined, sizeof(combined), &rt6_exception_key);
+
+ return hash_64(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT);
}
/* Helper function to find the cached rt in the hash table
@@ -1446,6 +1454,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,
struct rt6_exception_bucket *bucket;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
+ int max_depth;
int err = 0;
spin_lock_bh(&rt6_exception_lock);
@@ -1507,7 +1516,9 @@ static int rt6_insert_exception(struct rt6_info *nrt,
bucket->depth++;
net->ipv6.rt6_stats->fib_rt_cache++;
- if (bucket->depth > FIB6_MAX_DEPTH)
+ /* Randomize max depth to avoid some side channels attacks. */
+ max_depth = FIB6_MAX_DEPTH + prandom_u32_max(FIB6_MAX_DEPTH);
+ while (bucket->depth > max_depth)
rt6_exception_remove_oldest(bucket);
out:
@@ -2309,7 +2320,7 @@ static void ip6_link_failure(struct sk_buff *skb)
if (from) {
fn = rcu_dereference(from->fib6_node);
if (fn && (rt->rt6i_flags & RTF_DEFAULT))
- fn->fn_sernum = -1;
+ WRITE_ONCE(fn->fn_sernum, -1);
}
}
rcu_read_unlock();
@@ -2756,29 +2767,30 @@ out:
return dst;
}
-static int ip6_dst_gc(struct dst_ops *ops)
+static void ip6_dst_gc(struct dst_ops *ops)
{
struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
- int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
+ unsigned int val;
int entries;
entries = dst_entries_get_fast(ops);
- if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
- entries <= rt_max_size)
+ if (entries > ops->gc_thresh)
+ entries = dst_entries_get_slow(ops);
+
+ if (time_after(rt_last_gc + rt_min_interval, jiffies))
goto out;
- net->ipv6.ip6_rt_gc_expire++;
- fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);
+ fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true);
entries = dst_entries_get_slow(ops);
if (entries < ops->gc_thresh)
- net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
+ atomic_set(&net->ipv6.ip6_rt_gc_expire, rt_gc_timeout >> 1);
out:
- net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
- return entries > rt_max_size;
+ val = atomic_read(&net->ipv6.ip6_rt_gc_expire);
+ atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity));
}
static int ip6_convert_metrics(struct net *net, struct fib6_info *rt,
@@ -4402,6 +4414,19 @@ static void ip6_route_mpath_notify(struct fib6_info *rt,
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
}
+static int fib6_gw_from_attr(struct in6_addr *gw, struct nlattr *nla,
+ struct netlink_ext_ack *extack)
+{
+ if (nla_len(nla) < sizeof(*gw)) {
+ NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_GATEWAY");
+ return -EINVAL;
+ }
+
+ *gw = nla_get_in6_addr(nla);
+
+ return 0;
+}
+
static int ip6_route_multipath_add(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
@@ -4442,7 +4467,11 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
if (nla) {
- r_cfg.fc_gateway = nla_get_in6_addr(nla);
+ err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
+ extack);
+ if (err)
+ goto cleanup;
+
r_cfg.fc_flags |= RTF_GATEWAY;
}
r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
@@ -4514,9 +4543,11 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
* nexthops have been replaced by first new, the rest should
* be added to it.
*/
- cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
- NLM_F_REPLACE);
- cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
+ if (cfg->fc_nlinfo.nlh) {
+ cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
+ NLM_F_REPLACE);
+ cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
+ }
nhn++;
}
@@ -4574,7 +4605,13 @@ static int ip6_route_multipath_del(struct fib6_config *cfg,
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
if (nla) {
- nla_memcpy(&r_cfg.fc_gateway, nla, 16);
+ err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
+ extack);
+ if (err) {
+ last_err = err;
+ goto next_rtnh;
+ }
+
r_cfg.fc_flags |= RTF_GATEWAY;
}
}
@@ -4582,6 +4619,7 @@ static int ip6_route_multipath_del(struct fib6_config *cfg,
if (err)
last_err = err;
+next_rtnh:
rtnh = rtnh_next(rtnh, &remaining);
}
@@ -5295,7 +5333,7 @@ static int __net_init ip6_route_net_init(struct net *net)
#endif
net->ipv6.sysctl.flush_delay = 0;
- net->ipv6.sysctl.ip6_rt_max_size = 4096;
+ net->ipv6.sysctl.ip6_rt_max_size = INT_MAX;
net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
@@ -5303,7 +5341,7 @@ static int __net_init ip6_route_net_init(struct net *net)
net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ;
net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
- net->ipv6.ip6_rt_gc_expire = 30*HZ;
+ atomic_set(&net->ipv6.ip6_rt_gc_expire, 30*HZ);
ret = 0;
out:
@@ -5337,10 +5375,16 @@ static void __net_exit ip6_route_net_exit(struct net *net)
static int __net_init ip6_route_net_init_late(struct net *net)
{
#ifdef CONFIG_PROC_FS
- proc_create_net("ipv6_route", 0, net->proc_net, &ipv6_route_seq_ops,
- sizeof(struct ipv6_route_iter));
- proc_create_net_single("rt6_stats", 0444, net->proc_net,
- rt6_stats_seq_show, NULL);
+ if (!proc_create_net("ipv6_route", 0, net->proc_net,
+ &ipv6_route_seq_ops,
+ sizeof(struct ipv6_route_iter)))
+ return -ENOMEM;
+
+ if (!proc_create_net_single("rt6_stats", 0444, net->proc_net,
+ rt6_stats_seq_show, NULL)) {
+ remove_proc_entry("ipv6_route", net->proc_net);
+ return -ENOMEM;
+ }
#endif
return 0;
}
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 9b2f272ca164..89d55770ac74 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -130,6 +130,11 @@ static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info)
goto out_unlock;
}
+ if (slen > nla_len(info->attrs[SEG6_ATTR_SECRET])) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
if (hinfo) {
err = seg6_hmac_info_del(net, hmackeyid);
if (err)
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index 8546f94f30d4..b801283da28d 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -406,7 +406,6 @@ int __init seg6_hmac_init(void)
{
return seg6_hmac_init_algo();
}
-EXPORT_SYMBOL(seg6_hmac_init);
int __net_init seg6_hmac_net_init(struct net *net)
{
@@ -416,7 +415,6 @@ int __net_init seg6_hmac_net_init(struct net *net)
return 0;
}
-EXPORT_SYMBOL(seg6_hmac_net_init);
void seg6_hmac_exit(void)
{
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index ee5403cbe655..2e90672852c8 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -148,6 +148,14 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+
+ /* the control block has been erased, so we have to set the
+ * iif once again.
+ * We read the receiving interface index directly from the
+ * skb->skb_iif as it is done in the IPv4 receiving path (i.e.:
+ * ip_rcv_core(...)).
+ */
+ IP6CB(skb)->iif = skb->skb_iif;
}
hdr->nexthdr = NEXTHDR_ROUTING;
@@ -168,6 +176,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
}
#endif
+ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
skb_postpush_rcsum(skb, hdr, tot_len);
return 0;
@@ -220,6 +230,8 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
}
#endif
+ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
return 0;
@@ -281,7 +293,6 @@ static int seg6_do_srh(struct sk_buff *skb)
break;
}
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
return 0;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 607709a8847c..18970f6a68c6 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -415,7 +415,6 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
if (err)
goto drop;
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
seg6_lookup_nexthop(skb, NULL, 0);
@@ -447,7 +446,6 @@ static int input_action_end_b6_encap(struct sk_buff *skb,
if (err)
goto drop;
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
seg6_lookup_nexthop(skb, NULL, 0);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index bfed7508ba19..ec1de1e6b8e3 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -215,8 +215,6 @@ static int ipip6_tunnel_create(struct net_device *dev)
ipip6_tunnel_clone_6rd(dev, sitn);
- dev_hold(dev);
-
ipip6_tunnel_link(sitn, t);
return 0;
@@ -316,9 +314,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) :
NULL;
- rcu_read_lock();
-
- ca = t->prl_count < cmax ? t->prl_count : cmax;
+ ca = min(t->prl_count, cmax);
if (!kp) {
/* We don't try hard to allocate much memory for
@@ -333,7 +329,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
}
}
- c = 0;
+ rcu_read_lock();
for_each_prl_rcu(t->prl) {
if (c >= cmax)
break;
@@ -345,7 +341,7 @@ static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
if (kprl.addr != htonl(INADDR_ANY))
break;
}
-out:
+
rcu_read_unlock();
len = sizeof(*kp) * c;
@@ -354,7 +350,7 @@ out:
ret = -EFAULT;
kfree(kp);
-
+out:
return ret;
}
@@ -1057,12 +1053,13 @@ tx_err:
static void ipip6_tunnel_bind_dev(struct net_device *dev)
{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ int t_hlen = tunnel->hlen + sizeof(struct iphdr);
struct net_device *tdev = NULL;
- struct ip_tunnel *tunnel;
+ int hlen = LL_MAX_HEADER;
const struct iphdr *iph;
struct flowi4 fl4;
- tunnel = netdev_priv(dev);
iph = &tunnel->parms.iph;
if (iph->daddr) {
@@ -1085,13 +1082,15 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev)
tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
if (tdev && !netif_is_l3_master(tdev)) {
- int t_hlen = tunnel->hlen + sizeof(struct iphdr);
+ int mtu;
- dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
- dev->mtu = tdev->mtu - t_hlen;
- if (dev->mtu < IPV6_MIN_MTU)
- dev->mtu = IPV6_MIN_MTU;
+ mtu = tdev->mtu - t_hlen;
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ WRITE_ONCE(dev->mtu, mtu);
+ hlen = tdev->hard_header_len + tdev->needed_headroom;
}
+ dev->needed_headroom = t_hlen + hlen;
}
static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p,
@@ -1377,7 +1376,6 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->priv_destructor = ipip6_dev_free;
dev->type = ARPHRD_SIT;
- dev->hard_header_len = LL_MAX_HEADER + t_hlen;
dev->mtu = ETH_DATA_LEN - t_hlen;
dev->min_mtu = IPV6_MIN_MTU;
dev->max_mtu = IP6_MAX_MTU - t_hlen;
@@ -1409,7 +1407,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
dev->tstats = NULL;
return err;
}
-
+ dev_hold(dev);
return 0;
}
@@ -1425,7 +1423,6 @@ static void __net_init ipip6_fb_tunnel_init(struct net_device *dev)
iph->ihl = 5;
iph->ttl = 64;
- dev_hold(dev);
rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
}
@@ -1598,8 +1595,11 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
}
#ifdef CONFIG_IPV6_SIT_6RD
- if (ipip6_netlink_6rd_parms(data, &ip6rd))
+ if (ipip6_netlink_6rd_parms(data, &ip6rd)) {
err = ipip6_tunnel_update_6rd(nt, &ip6rd);
+ if (err < 0)
+ unregister_netdevice_queue(dev, NULL);
+ }
#endif
return err;
@@ -1817,9 +1817,9 @@ static void __net_exit sit_destroy_tunnels(struct net *net,
if (dev->rtnl_link_ops == &sit_link_ops)
unregister_netdevice_queue(dev, head);
- for (prio = 1; prio < 4; prio++) {
+ for (prio = 0; prio < 4; prio++) {
int h;
- for (h = 0; h < IP6_SIT_HASH_SIZE; h++) {
+ for (h = 0; h < (prio ? IP6_SIT_HASH_SIZE : 1); h++) {
struct ip_tunnel *t;
t = rtnl_dereference(sitn->tunnels[prio][h]);
@@ -1877,7 +1877,6 @@ static int __net_init sit_init_net(struct net *net)
return 0;
err_reg_dev:
- ipip6_dev_free(sitn->fb_tunnel_dev);
free_netdev(sitn->fb_tunnel_dev);
err_alloc_dev:
return err;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index a377be8a9fb4..ab073ac3d7ac 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -141,7 +141,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
__u32 cookie = ntohl(th->ack_seq) - 1;
struct sock *ret = sk;
struct request_sock *req;
- int mss;
+ int full_space, mss;
struct dst_entry *dst;
__u8 rcv_wscale;
u32 tsoff = 0;
@@ -181,16 +181,18 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq = inet_rsk(req);
treq = tcp_rsk(req);
+ treq->af_specific = &tcp_request_sock_ipv6_ops;
treq->tfo_listener = false;
- if (security_inet_conn_request(sk, skb, req))
- goto out_free;
-
req->mss = mss;
ireq->ir_rmt_port = th->source;
ireq->ir_num = ntohs(th->dest);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
+
+ if (security_inet_conn_request(sk, skb, req))
+ goto out_free;
+
if (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -246,7 +248,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
}
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
- tcp_select_initial_window(sk, tcp_full_space(sk), req->mss,
+ /* limit the window selection if the user enforce a smaller rx buffer */
+ full_space = tcp_full_space(sk);
+ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
+ (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
+ req->rsk_window_clamp = full_space;
+
+ tcp_select_initial_window(sk, full_space, req->mss,
&req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2e76ebfdc907..033cf81f3483 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -97,7 +97,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
if (dst && dst_hold_safe(dst)) {
const struct rt6_info *rt = (const struct rt6_info *)dst;
- sk->sk_rx_dst = dst;
+ rcu_assign_pointer(sk->sk_rx_dst, dst);
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
}
@@ -206,7 +206,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
!ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
- tp->write_seq = 0;
+ WRITE_ONCE(tp->write_seq, 0);
}
sk->sk_v6_daddr = usin->sin6_addr;
@@ -257,6 +257,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.flowi6_proto = IPPROTO_TCP;
fl6.daddr = sk->sk_v6_daddr;
fl6.saddr = saddr ? *saddr : np->saddr;
+ fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_dport = usin->sin6_port;
@@ -304,10 +305,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (likely(!tp->repair)) {
if (!tp->write_seq)
- tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
- sk->sk_v6_daddr.s6_addr32,
- inet->inet_sport,
- inet->inet_dport);
+ WRITE_ONCE(tp->write_seq,
+ secure_tcpv6_seq(np->saddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32,
+ inet->inet_sport,
+ inet->inet_dport));
tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
np->saddr.s6_addr32,
sk->sk_v6_daddr.s6_addr32);
@@ -326,6 +328,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
late_failure:
tcp_set_state(sk, TCP_CLOSE);
+ if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+ inet_reset_saddr(sk);
failure:
inet->inet_dport = 0;
sk->sk_route_caps = 0;
@@ -335,11 +339,20 @@ failure:
static void tcp_v6_mtu_reduced(struct sock *sk)
{
struct dst_entry *dst;
+ u32 mtu;
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
return;
- dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
+ mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
+
+ /* Drop requests trying to increase our current mss.
+ * Check done in __ip6_rt_update_pmtu() is too late.
+ */
+ if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
+ return;
+
+ dst = inet6_csk_update_pmtu(sk, mtu);
if (!dst)
return;
@@ -418,6 +431,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
if (type == ICMPV6_PKT_TOOBIG) {
+ u32 mtu = ntohl(info);
+
/* We are not interested in TCP_LISTEN and open_requests
* (SYN-ACKs send out by Linux are always <576bytes so
* they should go through unfragmented).
@@ -428,7 +443,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!ip6_sk_accept_pmtu(sk))
goto out;
- tp->mtu_info = ntohl(info);
+ if (mtu < IPV6_MIN_MTU)
+ goto out;
+
+ WRITE_ONCE(tp->mtu_info, mtu);
+
if (!sock_owned_by_user(sk))
tcp_v6_mtu_reduced(sk);
else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
@@ -502,7 +521,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
+ err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
+ np->tclass);
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -772,7 +792,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
.syn_ack_timeout = tcp_syn_ack_timeout,
};
-static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
+const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
sizeof(struct ipv6hdr),
#ifdef CONFIG_TCP_MD5SIG
@@ -1038,6 +1058,11 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (!ipv6_unicast_destination(skb))
goto drop;
+ if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
+ __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
+ return 0;
+ }
+
return tcp_conn_request(&tcp6_request_sock_ops,
&tcp_request_sock_ipv6_ops, sk, skb);
@@ -1068,6 +1093,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
struct ipv6_txoptions *opt;
struct tcp6_sock *newtcp6sk;
struct inet_sock *newinet;
+ bool found_dup_sk = false;
struct tcp_sock *newtp;
struct sock *newsk;
#ifdef CONFIG_TCP_MD5SIG
@@ -1236,20 +1262,27 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
tcp_done(newsk);
goto out;
}
- *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
+ &found_dup_sk);
if (*own_req) {
tcp_move_syn(newtp, req);
/* Clone pktoptions received with SYN, if we own the req */
if (ireq->pktopts) {
- newnp->pktoptions = skb_clone(ireq->pktopts,
- sk_gfp_mask(sk, GFP_ATOMIC));
+ newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
consume_skb(ireq->pktopts);
ireq->pktopts = NULL;
- if (newnp->pktoptions) {
+ if (newnp->pktoptions)
tcp_v6_restore_cb(newnp->pktoptions);
- skb_set_owner_r(newnp->pktoptions, newsk);
- }
+ }
+ } else {
+ if (!req_unhash && found_dup_sk) {
+ /* This code path should only be executed in the
+ * syncookie case only
+ */
+ bh_unlock_sock(newsk);
+ sock_put(newsk);
+ newsk = NULL;
}
}
@@ -1308,18 +1341,21 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
--ANK (980728)
*/
if (np->rxopt.all)
- opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
+ opt_skb = skb_clone_and_charge_r(skb, sk);
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
- struct dst_entry *dst = sk->sk_rx_dst;
+ struct dst_entry *dst;
+
+ dst = rcu_dereference_protected(sk->sk_rx_dst,
+ lockdep_sock_is_held(sk));
sock_rps_save_rxhash(sk, skb);
sk_mark_napi_id(sk, skb);
if (dst) {
if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
+ RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
dst_release(dst);
- sk->sk_rx_dst = NULL;
}
}
@@ -1387,7 +1423,6 @@ ipv6_pktoptions:
if (np->repflow)
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
- skb_set_owner_r(opt_skb, sk);
tcp_v6_restore_cb(opt_skb);
opt_skb = xchg(&np->pktoptions, opt_skb);
} else {
@@ -1637,7 +1672,7 @@ do_time_wait:
goto discard_it;
}
-static void tcp_v6_early_demux(struct sk_buff *skb)
+void tcp_v6_early_demux(struct sk_buff *skb)
{
const struct ipv6hdr *hdr;
const struct tcphdr *th;
@@ -1664,7 +1699,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
skb->sk = sk;
skb->destructor = sock_edemux;
if (sk_fullsock(sk)) {
- struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
+ struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
if (dst)
dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
@@ -1757,12 +1792,6 @@ static int tcp_v6_init_sock(struct sock *sk)
return 0;
}
-static void tcp_v6_destroy_sock(struct sock *sk)
-{
- tcp_v4_destroy_sock(sk);
- inet6_destroy_sock(sk);
-}
-
#ifdef CONFIG_PROC_FS
/* Proc filesystem TCPv6 sock list dumping. */
static void get_openreq6(struct seq_file *seq,
@@ -1839,7 +1868,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
* we might find a transient negative value.
*/
rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
- tp->copied_seq, 0);
+ READ_ONCE(tp->copied_seq), 0);
seq_printf(seq,
"%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
@@ -1850,7 +1879,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
dest->s6_addr32[0], dest->s6_addr32[1],
dest->s6_addr32[2], dest->s6_addr32[3], destp,
state,
- tp->write_seq - tp->snd_una,
+ READ_ONCE(tp->write_seq) - tp->snd_una,
rx_queue,
timer_active,
jiffies_delta_to_clock_t(timer_expires - jiffies),
@@ -1955,7 +1984,7 @@ struct proto tcpv6_prot = {
.accept = inet_csk_accept,
.ioctl = tcp_ioctl,
.init = tcp_v6_init_sock,
- .destroy = tcp_v6_destroy_sock,
+ .destroy = tcp_v4_destroy_sock,
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
@@ -1992,12 +2021,7 @@ struct proto tcpv6_prot = {
.diag_destroy = tcp_abort,
};
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol tcpv6_protocol = {
- .early_demux = tcp_v6_early_demux,
- .early_demux_handler = tcp_v6_early_demux,
+static const struct inet6_protocol tcpv6_protocol = {
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 6799ad462be3..cf0bbe2e3a79 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -66,6 +66,19 @@ static bool udp6_lib_exact_dif_match(struct net *net, struct sk_buff *skb)
return false;
}
+static void udpv6_destruct_sock(struct sock *sk)
+{
+ udp_destruct_common(sk);
+ inet6_sock_destruct(sk);
+}
+
+int udpv6_init_sock(struct sock *sk)
+{
+ skb_queue_head_init(&udp_sk(sk)->reader_queue);
+ sk->sk_destruct = udpv6_destruct_sock;
+ return 0;
+}
+
static u32 udp6_ehashfn(const struct net *net,
const struct in6_addr *laddr,
const u16 lport,
@@ -86,7 +99,7 @@ static u32 udp6_ehashfn(const struct net *net,
fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret);
return __inet6_ehashfn(lhash, lport, fhash, fport,
- udp_ipv6_hash_secret + net_hash_mix(net));
+ udp6_ehash_secret + net_hash_mix(net));
}
int udp_v6_get_port(struct sock *sk, unsigned short snum)
@@ -828,7 +841,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
struct dst_entry *dst = skb_dst(skb);
int ret;
- if (unlikely(sk->sk_rx_dst != dst))
+ if (unlikely(rcu_dereference(sk->sk_rx_dst) != dst))
udp6_sk_rx_dst_set(sk, dst);
if (!uh->check && !udp_sk(sk)->no_check6_rx) {
@@ -912,7 +925,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
return NULL;
}
-static void udp_v6_early_demux(struct sk_buff *skb)
+void udp_v6_early_demux(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
const struct udphdr *uh;
@@ -940,7 +953,7 @@ static void udp_v6_early_demux(struct sk_buff *skb)
skb->sk = sk;
skb->destructor = sock_efree;
- dst = READ_ONCE(sk->sk_rx_dst);
+ dst = rcu_dereference(sk->sk_rx_dst);
if (dst)
dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
@@ -1069,7 +1082,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
kfree_skb(skb);
return -EINVAL;
}
- if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
+ if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
kfree_skb(skb);
return -EINVAL;
}
@@ -1169,13 +1182,13 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int addr_len = msg->msg_namelen;
bool connected = false;
int ulen = len;
- int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
+ int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
int err;
int is_udplite = IS_UDPLITE(sk);
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
ipcm6_init(&ipc6);
- ipc6.gso_size = up->gso_size;
+ ipc6.gso_size = READ_ONCE(up->gso_size);
ipc6.sockc.tsflags = sk->sk_tsflags;
/* destination address check */
@@ -1219,9 +1232,11 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
msg->msg_name = &sin;
msg->msg_namelen = sizeof(sin);
do_udp_sendmsg:
- if (__ipv6_only_sock(sk))
- return -ENETUNREACH;
- return udp_sendmsg(sk, msg, len);
+ err = __ipv6_only_sock(sk) ?
+ -ENETUNREACH : udp_sendmsg(sk, msg, len);
+ msg->msg_name = sin6;
+ msg->msg_namelen = addr_len;
+ return err;
}
}
@@ -1476,6 +1491,9 @@ void udpv6_destroy_sock(struct sock *sk)
{
struct udp_sock *up = udp_sk(sk);
lock_sock(sk);
+
+ /* protects from races with udp_abort() */
+ sock_set_flag(sk, SOCK_DEAD);
udp_v6_flush_pending_frames(sk);
release_sock(sk);
@@ -1485,8 +1503,6 @@ void udpv6_destroy_sock(struct sock *sk)
if (encap_destroy)
encap_destroy(sk);
}
-
- inet6_destroy_sock(sk);
}
/*
@@ -1530,12 +1546,7 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
}
#endif
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol udpv6_protocol = {
- .early_demux = udp_v6_early_demux,
- .early_demux_handler = udp_v6_early_demux,
+static const struct inet6_protocol udpv6_protocol = {
.handler = udpv6_rcv,
.err_handler = udpv6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
@@ -1595,7 +1606,7 @@ struct proto udpv6_prot = {
.connect = ip6_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
- .init = udp_init_sock,
+ .init = udpv6_init_sock,
.destroy = udpv6_destroy_sock,
.setsockopt = udpv6_setsockopt,
.getsockopt = udpv6_getsockopt,
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 7903e21c178b..e5d067b09ccf 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -12,6 +12,7 @@ int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int);
void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int,
__be32, struct udp_table *);
+int udpv6_init_sock(struct sock *sk);
int udp_v6_get_port(struct sock *sk, unsigned short snum);
int udpv6_getsockopt(struct sock *sk, int level, int optname,
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 5000ad6878e6..a26a4b5da09c 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -15,6 +15,13 @@
#include <linux/proc_fs.h>
#include "udp_impl.h"
+static int udplitev6_sk_init(struct sock *sk)
+{
+ udpv6_init_sock(sk);
+ udp_sk(sk)->pcflag = UDPLITE_BIT;
+ return 0;
+}
+
static int udplitev6_rcv(struct sk_buff *skb)
{
return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE);
@@ -40,7 +47,7 @@ struct proto udplitev6_prot = {
.connect = ip6_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
- .init = udplite_sk_init,
+ .init = udplitev6_sk_init,
.destroy = udpv6_destroy_sock,
.setsockopt = udpv6_setsockopt,
.getsockopt = udpv6_getsockopt,
@@ -51,6 +58,8 @@ struct proto udplitev6_prot = {
.get_port = udp_v6_get_port,
.memory_allocated = &udp_memory_allocated,
.sysctl_mem = sysctl_udp_mem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
.obj_size = sizeof(struct udp6_sock),
.h.udp_table = &udplite_table,
#ifdef CONFIG_COMPAT
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 71d022704923..fbcec4827071 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -142,11 +142,24 @@ static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buf
return x->outer_mode->afinfo->output_finish(sk, skb);
}
+static int xfrm6_noneed_fragment(struct sk_buff *skb)
+{
+ struct frag_hdr *fh;
+ u8 prevhdr = ipv6_hdr(skb)->nexthdr;
+
+ if (prevhdr != NEXTHDR_FRAGMENT)
+ return 0;
+ fh = (struct frag_hdr *)(skb->data + sizeof(struct ipv6hdr));
+ if (fh->nexthdr == NEXTHDR_ESP || fh->nexthdr == NEXTHDR_AUTH)
+ return 1;
+ return 0;
+}
+
static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct xfrm_state *x = dst->xfrm;
- int mtu;
+ unsigned int mtu;
bool toobig;
#ifdef CONFIG_NETFILTER
@@ -170,6 +183,9 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
xfrm6_local_rxpmtu(skb, mtu);
kfree_skb(skb);
return -EMSGSIZE;
+ } else if (toobig && xfrm6_noneed_fragment(skb)) {
+ skb->ignore_df = 1;
+ goto skip_frag;
} else if (!skb->ignore_df && toobig && skb->sk) {
xfrm_local_error(skb, mtu);
kfree_skb(skb);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 30232591cf2b..a1dfe4f5ed3a 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -243,11 +243,11 @@ static void xfrm6_dst_destroy(struct dst_entry *dst)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
- if (likely(xdst->u.rt6.rt6i_idev))
- in6_dev_put(xdst->u.rt6.rt6i_idev);
dst_destroy_metrics_generic(dst);
if (xdst->u.rt6.rt6i_uncached_list)
rt6_uncached_list_del(&xdst->u.rt6);
+ if (likely(xdst->u.rt6.rt6i_idev))
+ in6_dev_put(xdst->u.rt6.rt6i_idev);
xfrm_dst_destroy(xdst);
}
@@ -416,9 +416,13 @@ int __init xfrm6_init(void)
if (ret)
goto out_state;
- register_pernet_subsys(&xfrm6_net_ops);
+ ret = register_pernet_subsys(&xfrm6_net_ops);
+ if (ret)
+ goto out_protocol;
out:
return ret;
+out_protocol:
+ xfrm6_protocol_fini();
out_state:
xfrm6_state_fini();
out_policy:
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 23a1002ed86d..d59f2341bfec 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1571,7 +1571,8 @@ static int iucv_sock_shutdown(struct socket *sock, int how)
break;
}
- if (how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) {
+ if ((how == SEND_SHUTDOWN || how == SHUTDOWN_MASK) &&
+ sk->sk_state == IUCV_CONNECTED) {
if (iucv->transport == AF_IUCV_TRANS_IUCV) {
txmsg.class = 0;
txmsg.tag = 0;
@@ -1781,7 +1782,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
}
/* Create the new socket */
- nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0);
+ nsk = iucv_sock_alloc(NULL, sk->sk_protocol, GFP_ATOMIC, 0);
if (!nsk) {
err = pr_iucv->path_sever(path, user_data);
iucv_path_free(path);
@@ -1991,7 +1992,7 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb)
goto out;
}
- nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0);
+ nsk = iucv_sock_alloc(NULL, sk->sk_protocol, GFP_ATOMIC, 0);
bh_lock_sock(sk);
if ((sk->sk_state != IUCV_LISTEN) ||
sk_acceptq_is_full(sk) ||
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index eb502c6290c2..aacaa5119b45 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -119,7 +119,7 @@ struct iucv_irq_data {
u16 ippathid;
u8 ipflags1;
u8 iptype;
- u32 res2[8];
+ u32 res2[9];
};
struct iucv_irq_list {
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index b919db02c7f9..a82892c28860 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -164,7 +164,8 @@ static void kcm_rcv_ready(struct kcm_sock *kcm)
/* Buffer limit is okay now, add to ready list */
list_add_tail(&kcm->wait_rx_list,
&kcm->mux->kcm_rx_waiters);
- kcm->rx_wait = true;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_wait, true);
}
static void kcm_rfree(struct sk_buff *skb)
@@ -180,7 +181,7 @@ static void kcm_rfree(struct sk_buff *skb)
/* For reading rx_wait and rx_psock without holding lock */
smp_mb__after_atomic();
- if (!kcm->rx_wait && !kcm->rx_psock &&
+ if (!READ_ONCE(kcm->rx_wait) && !READ_ONCE(kcm->rx_psock) &&
sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) {
spin_lock_bh(&mux->rx_lock);
kcm_rcv_ready(kcm);
@@ -223,7 +224,7 @@ static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head)
struct sk_buff *skb;
struct kcm_sock *kcm;
- while ((skb = __skb_dequeue(head))) {
+ while ((skb = skb_dequeue(head))) {
/* Reset destructor to avoid calling kcm_rcv_ready */
skb->destructor = sock_rfree;
skb_orphan(skb);
@@ -239,7 +240,8 @@ try_again:
if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
/* Should mean socket buffer full */
list_del(&kcm->wait_rx_list);
- kcm->rx_wait = false;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_wait, false);
/* Commit rx_wait to read in kcm_free */
smp_wmb();
@@ -282,10 +284,12 @@ static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock,
kcm = list_first_entry(&mux->kcm_rx_waiters,
struct kcm_sock, wait_rx_list);
list_del(&kcm->wait_rx_list);
- kcm->rx_wait = false;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_wait, false);
psock->rx_kcm = kcm;
- kcm->rx_psock = psock;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_psock, psock);
spin_unlock_bh(&mux->rx_lock);
@@ -312,7 +316,8 @@ static void unreserve_rx_kcm(struct kcm_psock *psock,
spin_lock_bh(&mux->rx_lock);
psock->rx_kcm = NULL;
- kcm->rx_psock = NULL;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_psock, NULL);
/* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with
* kcm_rfree
@@ -1060,15 +1065,18 @@ partial_message:
out_error:
kcm_push(kcm);
- if (copied && sock->type == SOCK_SEQPACKET) {
+ if (sock->type == SOCK_SEQPACKET) {
/* Wrote some bytes before encountering an
* error, return partial success.
*/
- goto partial_message;
- }
-
- if (head != kcm->seq_skb)
+ if (copied)
+ goto partial_message;
+ if (head != kcm->seq_skb)
+ kfree_skb(head);
+ } else {
kfree_skb(head);
+ kcm->seq_skb = NULL;
+ }
err = sk_stream_error(sk, msg->msg_flags, err);
@@ -1080,53 +1088,18 @@ out_error:
return err;
}
-static struct sk_buff *kcm_wait_data(struct sock *sk, int flags,
- long timeo, int *err)
-{
- struct sk_buff *skb;
-
- while (!(skb = skb_peek(&sk->sk_receive_queue))) {
- if (sk->sk_err) {
- *err = sock_error(sk);
- return NULL;
- }
-
- if (sock_flag(sk, SOCK_DONE))
- return NULL;
-
- if ((flags & MSG_DONTWAIT) || !timeo) {
- *err = -EAGAIN;
- return NULL;
- }
-
- sk_wait_data(sk, &timeo, NULL);
-
- /* Handle signals */
- if (signal_pending(current)) {
- *err = sock_intr_errno(timeo);
- return NULL;
- }
- }
-
- return skb;
-}
-
static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
+ int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
struct kcm_sock *kcm = kcm_sk(sk);
int err = 0;
- long timeo;
struct strp_msg *stm;
int copied = 0;
struct sk_buff *skb;
- timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
-
- lock_sock(sk);
-
- skb = kcm_wait_data(sk, flags, timeo, &err);
+ skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb)
goto out;
@@ -1157,14 +1130,11 @@ msg_finished:
/* Finished with message */
msg->msg_flags |= MSG_EOR;
KCM_STATS_INCR(kcm->stats.rx_msgs);
- skb_unlink(skb, &sk->sk_receive_queue);
- kfree_skb(skb);
}
}
out:
- release_sock(sk);
-
+ skb_free_datagram(sk, skb);
return copied ? : err;
}
@@ -1172,9 +1142,9 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
+ int noblock = flags & MSG_DONTWAIT;
struct sock *sk = sock->sk;
struct kcm_sock *kcm = kcm_sk(sk);
- long timeo;
struct strp_msg *stm;
int err = 0;
ssize_t copied;
@@ -1182,11 +1152,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
/* Only support splice for SOCKSEQPACKET */
- timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
-
- lock_sock(sk);
-
- skb = kcm_wait_data(sk, flags, timeo, &err);
+ skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb)
goto err_out;
@@ -1214,13 +1180,11 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
* finish reading the message.
*/
- release_sock(sk);
-
+ skb_free_datagram(sk, skb);
return copied;
err_out:
- release_sock(sk);
-
+ skb_free_datagram(sk, skb);
return err;
}
@@ -1240,7 +1204,8 @@ static void kcm_recv_disable(struct kcm_sock *kcm)
if (!kcm->rx_psock) {
if (kcm->rx_wait) {
list_del(&kcm->wait_rx_list);
- kcm->rx_wait = false;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_wait, false);
}
requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
@@ -1412,12 +1377,6 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
psock->sk = csk;
psock->bpf_prog = prog;
- err = strp_init(&psock->strp, csk, &cb);
- if (err) {
- kmem_cache_free(kcm_psockp, psock);
- goto out;
- }
-
write_lock_bh(&csk->sk_callback_lock);
/* Check if sk_user_data is aready by KCM or someone else.
@@ -1425,13 +1384,18 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
*/
if (csk->sk_user_data) {
write_unlock_bh(&csk->sk_callback_lock);
- strp_stop(&psock->strp);
- strp_done(&psock->strp);
kmem_cache_free(kcm_psockp, psock);
err = -EALREADY;
goto out;
}
+ err = strp_init(&psock->strp, csk, &cb);
+ if (err) {
+ write_unlock_bh(&csk->sk_callback_lock);
+ kmem_cache_free(kcm_psockp, psock);
+ goto out;
+ }
+
psock->save_data_ready = csk->sk_data_ready;
psock->save_write_space = csk->sk_write_space;
psock->save_state_change = csk->sk_state_change;
@@ -1794,7 +1758,8 @@ static void kcm_done(struct kcm_sock *kcm)
if (kcm->rx_wait) {
list_del(&kcm->wait_rx_list);
- kcm->rx_wait = false;
+ /* paired with lockless reads in kcm_rfree() */
+ WRITE_ONCE(kcm->rx_wait, false);
}
/* Move any pending receive messages to other kcm sockets */
requeue_rx_msgs(mux, &sk->sk_receive_queue);
@@ -1839,10 +1804,10 @@ static int kcm_release(struct socket *sock)
kcm = kcm_sk(sk);
mux = kcm->mux;
+ lock_sock(sk);
sock_orphan(sk);
kfree_skb(kcm->seq_skb);
- lock_sock(sk);
/* Purge queue under lock to avoid race condition with tx_work trying
* to act when queue is nonempty. If tx_work runs after this point
* it will just return.
@@ -2021,6 +1986,8 @@ static __net_exit void kcm_exit_net(struct net *net)
* that all multiplexors and psocks have been destroyed.
*/
WARN_ON(!list_empty(&knet->mux_list));
+
+ mutex_destroy(&knet->mutex);
}
static struct pernet_operations kcm_net_ops = {
diff --git a/net/key/af_key.c b/net/key/af_key.c
index e340e97224c3..47ffa69ca6f6 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1707,9 +1707,12 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad
pfk->registered |= (1<<hdr->sadb_msg_satype);
}
+ mutex_lock(&pfkey_mutex);
xfrm_probe_algs();
- supp_skb = compose_sadb_supported(hdr, GFP_KERNEL);
+ supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO);
+ mutex_unlock(&pfkey_mutex);
+
if (!supp_skb) {
if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC)
pfk->registered &= ~(1<<hdr->sadb_msg_satype);
@@ -1855,9 +1858,9 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
if (ext_hdrs[SADB_X_EXT_FILTER - 1]) {
struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1];
- if ((xfilter->sadb_x_filter_splen >=
+ if ((xfilter->sadb_x_filter_splen >
(sizeof(xfrm_address_t) << 3)) ||
- (xfilter->sadb_x_filter_dplen >=
+ (xfilter->sadb_x_filter_dplen >
(sizeof(xfrm_address_t) << 3))) {
mutex_unlock(&pfk->dump_lock);
return -EINVAL;
@@ -1947,7 +1950,8 @@ static u32 gen_reqid(struct net *net)
}
static int
-parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
+parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_policy *pol,
+ struct sadb_x_ipsecrequest *rq)
{
struct net *net = xp_net(xp);
struct xfrm_tmpl *t = xp->xfrm_vec + xp->xfrm_nr;
@@ -1965,9 +1969,12 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0)
return -EINVAL;
t->mode = mode;
- if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_USE)
+ if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_USE) {
+ if ((mode == XFRM_MODE_TUNNEL || mode == XFRM_MODE_BEET) &&
+ pol->sadb_x_policy_dir == IPSEC_DIR_OUTBOUND)
+ return -EINVAL;
t->optional = 1;
- else if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_UNIQUE) {
+ } else if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_UNIQUE) {
t->reqid = rq->sadb_x_ipsecrequest_reqid;
if (t->reqid > IPSEC_MANUAL_REQID_MAX)
t->reqid = 0;
@@ -2009,7 +2016,7 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
rq->sadb_x_ipsecrequest_len < sizeof(*rq))
return -EINVAL;
- if ((err = parse_ipsecrequest(xp, rq)) < 0)
+ if ((err = parse_ipsecrequest(xp, pol, rq)) < 0)
return err;
len -= rq->sadb_x_ipsecrequest_len;
rq = (void*)((u8*)rq + rq->sadb_x_ipsecrequest_len);
@@ -2413,7 +2420,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa
return err;
}
- xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN,
+ xp = xfrm_policy_bysel_ctx(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN,
pol->sadb_x_policy_dir - 1, &sel, pol_ctx,
1, &err);
security_xfrm_policy_free(pol_ctx);
@@ -2633,7 +2640,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb,
}
return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i,
- kma ? &k : NULL, net, NULL);
+ kma ? &k : NULL, net, NULL, 0);
out:
return err;
@@ -2664,7 +2671,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_
return -EINVAL;
delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2);
- xp = xfrm_policy_byid(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN,
+ xp = xfrm_policy_byid(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN,
dir, pol->sadb_x_policy_id, delete, &err);
if (xp == NULL)
return -ENOENT;
@@ -2836,6 +2843,10 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb
void *ext_hdrs[SADB_EXT_MAX];
int err;
+ /* Non-zero return value of pfkey_broadcast() does not always signal
+ * an error and even on an actual error we may still want to process
+ * the message so rather ignore the return value.
+ */
pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
@@ -2908,7 +2919,7 @@ static int count_ah_combs(const struct xfrm_tmpl *t)
break;
if (!aalg->pfkey_supported)
continue;
- if (aalg_tmpl_set(t, aalg) && aalg->available)
+ if (aalg_tmpl_set(t, aalg))
sz += sizeof(struct sadb_comb);
}
return sz + sizeof(struct sadb_prop);
@@ -2926,7 +2937,7 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
if (!ealg->pfkey_supported)
continue;
- if (!(ealg_tmpl_set(t, ealg) && ealg->available))
+ if (!(ealg_tmpl_set(t, ealg)))
continue;
for (k = 1; ; k++) {
@@ -2937,16 +2948,17 @@ static int count_esp_combs(const struct xfrm_tmpl *t)
if (!aalg->pfkey_supported)
continue;
- if (aalg_tmpl_set(t, aalg) && aalg->available)
+ if (aalg_tmpl_set(t, aalg))
sz += sizeof(struct sadb_comb);
}
}
return sz + sizeof(struct sadb_prop);
}
-static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
+static int dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
{
struct sadb_prop *p;
+ int sz = 0;
int i;
p = skb_put(skb, sizeof(struct sadb_prop));
@@ -2974,13 +2986,17 @@ static void dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
c->sadb_comb_soft_addtime = 20*60*60;
c->sadb_comb_hard_usetime = 8*60*60;
c->sadb_comb_soft_usetime = 7*60*60;
+ sz += sizeof(*c);
}
}
+
+ return sz + sizeof(*p);
}
-static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
+static int dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
{
struct sadb_prop *p;
+ int sz = 0;
int i, k;
p = skb_put(skb, sizeof(struct sadb_prop));
@@ -3022,8 +3038,11 @@ static void dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t)
c->sadb_comb_soft_addtime = 20*60*60;
c->sadb_comb_hard_usetime = 8*60*60;
c->sadb_comb_soft_usetime = 7*60*60;
+ sz += sizeof(*c);
}
}
+
+ return sz + sizeof(*p);
}
static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c)
@@ -3153,6 +3172,7 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
struct sadb_x_sec_ctx *sec_ctx;
struct xfrm_sec_ctx *xfrm_ctx;
int ctx_size = 0;
+ int alg_size = 0;
sockaddr_size = pfkey_sockaddr_size(x->props.family);
if (!sockaddr_size)
@@ -3164,16 +3184,16 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
sizeof(struct sadb_x_policy);
if (x->id.proto == IPPROTO_AH)
- size += count_ah_combs(t);
+ alg_size = count_ah_combs(t);
else if (x->id.proto == IPPROTO_ESP)
- size += count_esp_combs(t);
+ alg_size = count_esp_combs(t);
if ((xfrm_ctx = x->security)) {
ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len);
size += sizeof(struct sadb_x_sec_ctx) + ctx_size;
}
- skb = alloc_skb(size + 16, GFP_ATOMIC);
+ skb = alloc_skb(size + alg_size + 16, GFP_ATOMIC);
if (skb == NULL)
return -ENOMEM;
@@ -3227,10 +3247,13 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
pol->sadb_x_policy_priority = xp->priority;
/* Set sadb_comb's. */
+ alg_size = 0;
if (x->id.proto == IPPROTO_AH)
- dump_ah_combs(skb, t);
+ alg_size = dump_ah_combs(skb, t);
else if (x->id.proto == IPPROTO_ESP)
- dump_esp_combs(skb, t);
+ alg_size = dump_esp_combs(skb, t);
+
+ hdr->sadb_msg_len += alg_size / 8;
/* security context */
if (xfrm_ctx) {
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 9abdc0a04d99..bf2a53d455a1 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -889,8 +889,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
}
if (tunnel->version == L2TP_HDR_VER_3 &&
- l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
+ l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) {
+ l2tp_session_dec_refcount(session);
goto error;
+ }
l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
l2tp_session_dec_refcount(session);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 2ff25c445b82..7342344d99a9 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -272,8 +272,6 @@ static void l2tp_ip6_destroy_sock(struct sock *sk)
if (tunnel)
l2tp_tunnel_delete(tunnel);
-
- inet6_destroy_sock(sk);
}
static int l2tp_ip6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
@@ -519,13 +517,13 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
int transhdrlen = 4; /* zero session-id */
- int ulen = len + transhdrlen;
+ int ulen;
int err;
/* Rough check on arithmetic overflow,
better check is made in ip6_append_data().
*/
- if (len > INT_MAX)
+ if (len > INT_MAX - transhdrlen)
return -EMSGSIZE;
/* Mirror BSD error message compatibility */
@@ -650,6 +648,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
back_from_confirm:
lock_sock(sk);
+ ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0;
err = ip6_append_data(sk, ip_generic_getfrag, msg,
ulen, transhdrlen, &ipc6,
&fl6, (struct rt6_info *)dst,
diff --git a/net/lapb/lapb_out.c b/net/lapb/lapb_out.c
index eda726e22f64..621c66f00117 100644
--- a/net/lapb/lapb_out.c
+++ b/net/lapb/lapb_out.c
@@ -87,7 +87,8 @@ void lapb_kick(struct lapb_cb *lapb)
skb = skb_dequeue(&lapb->write_queue);
do {
- if ((skbn = skb_clone(skb, GFP_ATOMIC)) == NULL) {
+ skbn = skb_copy(skb, GFP_ATOMIC);
+ if (!skbn) {
skb_queue_head(&lapb->write_queue, skb);
break;
}
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index bcba579e292f..5cba9199c3c9 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -98,8 +98,16 @@ static inline u8 llc_ui_header_len(struct sock *sk, struct sockaddr_llc *addr)
{
u8 rc = LLC_PDU_LEN_U;
- if (addr->sllc_test || addr->sllc_xid)
+ if (addr->sllc_test)
rc = LLC_PDU_LEN_U;
+ else if (addr->sllc_xid)
+ /* We need to expand header to sizeof(struct llc_xid_info)
+ * since llc_pdu_init_as_xid_cmd() sets 4,5,6 bytes of LLC header
+ * as XID PDU. In llc_ui_sendmsg() we reserved header size and then
+ * filled all other space with user data. If we won't reserve this
+ * bytes, llc_pdu_init_as_xid_cmd() will overwrite user data
+ */
+ rc = LLC_PDU_LEN_U_XID;
else if (sk->sk_type == SOCK_STREAM)
rc = LLC_PDU_LEN_I;
return rc;
@@ -268,6 +276,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
{
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
+ struct net_device *dev = NULL;
struct llc_sap *sap;
int rc = -EINVAL;
@@ -279,14 +288,14 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
goto out;
rc = -ENODEV;
if (sk->sk_bound_dev_if) {
- llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
- if (llc->dev && addr->sllc_arphrd != llc->dev->type) {
- dev_put(llc->dev);
- llc->dev = NULL;
+ dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+ if (dev && addr->sllc_arphrd != dev->type) {
+ dev_put(dev);
+ dev = NULL;
}
} else
- llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd);
- if (!llc->dev)
+ dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd);
+ if (!dev)
goto out;
rc = -EUSERS;
llc->laddr.lsap = llc_ui_autoport();
@@ -296,6 +305,11 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
sap = llc_sap_open(llc->laddr.lsap, NULL);
if (!sap)
goto out;
+
+ /* Note: We do not expect errors from this point. */
+ llc->dev = dev;
+ dev = NULL;
+
memcpy(llc->laddr.mac, llc->dev->dev_addr, IFHWADDRLEN);
memcpy(&llc->addr, addr, sizeof(llc->addr));
/* assign new connection to its SAP */
@@ -303,6 +317,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
sock_reset_flag(sk, SOCK_ZAPPED);
rc = 0;
out:
+ dev_put(dev);
return rc;
}
@@ -325,6 +340,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr;
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
+ struct net_device *dev = NULL;
struct llc_sap *sap;
int rc = -EINVAL;
@@ -341,25 +357,26 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
rc = -ENODEV;
rcu_read_lock();
if (sk->sk_bound_dev_if) {
- llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
- if (llc->dev) {
+ dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
+ if (dev) {
if (is_zero_ether_addr(addr->sllc_mac))
- memcpy(addr->sllc_mac, llc->dev->dev_addr,
+ memcpy(addr->sllc_mac, dev->dev_addr,
IFHWADDRLEN);
- if (addr->sllc_arphrd != llc->dev->type ||
+ if (addr->sllc_arphrd != dev->type ||
!ether_addr_equal(addr->sllc_mac,
- llc->dev->dev_addr)) {
+ dev->dev_addr)) {
rc = -EINVAL;
- llc->dev = NULL;
+ dev = NULL;
}
}
- } else
- llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
+ } else {
+ dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
addr->sllc_mac);
- if (llc->dev)
- dev_hold(llc->dev);
+ }
+ if (dev)
+ dev_hold(dev);
rcu_read_unlock();
- if (!llc->dev)
+ if (!dev)
goto out;
if (!addr->sllc_sap) {
rc = -EUSERS;
@@ -392,6 +409,11 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
goto out_put;
}
}
+
+ /* Note: We do not expect errors from this point. */
+ llc->dev = dev;
+ dev = NULL;
+
llc->laddr.lsap = addr->sllc_sap;
memcpy(llc->laddr.mac, addr->sllc_mac, IFHWADDRLEN);
memcpy(&llc->addr, addr, sizeof(llc->addr));
@@ -402,6 +424,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
out_put:
llc_sap_put(sap);
out:
+ dev_put(dev);
release_sock(sk);
return rc;
}
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index 82cb93f66b9b..f4fb309185ce 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -127,8 +127,14 @@ static inline int llc_fixup_skb(struct sk_buff *skb)
skb->transport_header += llc_len;
skb_pull(skb, llc_len);
if (skb->protocol == htons(ETH_P_802_2)) {
- __be16 pdulen = eth_hdr(skb)->h_proto;
- s32 data_size = ntohs(pdulen) - llc_len;
+ __be16 pdulen;
+ s32 data_size;
+
+ if (skb->mac_len < ETH_HLEN)
+ return 0;
+
+ pdulen = eth_hdr(skb)->h_proto;
+ data_size = ntohs(pdulen) - llc_len;
if (data_size < 0 ||
!pskb_may_pull(skb, data_size))
@@ -162,9 +168,6 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
void (*sta_handler)(struct sk_buff *skb);
void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb);
- if (!net_eq(dev_net(dev), &init_net))
- goto drop;
-
/*
* When the interface is in promisc. mode, drop all the crap that it
* receives, do not try to analyse it.
diff --git a/net/llc/llc_s_ac.c b/net/llc/llc_s_ac.c
index 7ae4cc684d3a..df26557a0244 100644
--- a/net/llc/llc_s_ac.c
+++ b/net/llc/llc_s_ac.c
@@ -79,7 +79,7 @@ int llc_sap_action_send_xid_c(struct llc_sap *sap, struct sk_buff *skb)
struct llc_sap_state_ev *ev = llc_sap_ev(skb);
int rc;
- llc_pdu_header_init(skb, LLC_PDU_TYPE_U, ev->saddr.lsap,
+ llc_pdu_header_init(skb, LLC_PDU_TYPE_U_XID, ev->saddr.lsap,
ev->daddr.lsap, LLC_PDU_CMD);
llc_pdu_init_as_xid_cmd(skb, LLC_XID_NULL_CLASS_2, 0);
rc = llc_mac_hdr_init(skb, ev->saddr.mac, ev->daddr.mac);
@@ -153,6 +153,9 @@ int llc_sap_action_send_test_r(struct llc_sap *sap, struct sk_buff *skb)
int rc = 1;
u32 data_size;
+ if (skb->mac_len < ETH_HLEN)
+ return 1;
+
llc_pdu_decode_sa(skb, mac_da);
llc_pdu_decode_da(skb, mac_sa);
llc_pdu_decode_ssap(skb, &dsap);
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index c29170e767a8..64e2c67e16ba 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -77,6 +77,9 @@ static int llc_station_ac_send_test_r(struct sk_buff *skb)
u32 data_size;
struct sk_buff *nskb;
+ if (skb->mac_len < ETH_HLEN)
+ goto out;
+
/* The test request command is type U (llc_len = 3) */
data_size = ntohs(eth_hdr(skb)->h_proto) - 3;
nskb = llc_alloc_frame(NULL, skb->dev, LLC_PDU_TYPE_U, data_size);
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 54821fb1a960..79138225e880 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -109,7 +109,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
mgmt->u.action.u.addba_req.start_seq_num =
cpu_to_le16(start_seq_num << 4);
- ieee80211_tx_skb(sdata, skb);
+ ieee80211_tx_skb_tid(sdata, skb, tid);
}
void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn)
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index b6670e74aeb7..77d8ed184c1c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -664,7 +664,8 @@ void sta_set_rate_info_tx(struct sta_info *sta,
u16 brate;
sband = ieee80211_get_sband(sta->sdata);
- if (sband) {
+ WARN_ON_ONCE(sband && !sband->bitrates);
+ if (sband && sband->bitrates) {
brate = sband->bitrates[rate->idx].bitrate;
rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
}
@@ -1547,8 +1548,10 @@ static int ieee80211_change_station(struct wiphy *wiphy,
}
if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
- sta->sdata->u.vlan.sta)
+ sta->sdata->u.vlan.sta) {
+ ieee80211_clear_fast_rx(sta);
RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL);
+ }
if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
ieee80211_vif_dec_num_mcast(sta->sdata);
@@ -1820,13 +1823,11 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
const struct mesh_setup *setup)
{
u8 *new_ie;
- const u8 *old_ie;
struct ieee80211_sub_if_data *sdata = container_of(ifmsh,
struct ieee80211_sub_if_data, u.mesh);
/* allocate information elements */
new_ie = NULL;
- old_ie = ifmsh->ie;
if (setup->ie_len) {
new_ie = kmemdup(setup->ie, setup->ie_len,
@@ -1836,7 +1837,6 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
}
ifmsh->ie_len = setup->ie_len;
ifmsh->ie = new_ie;
- kfree(old_ie);
/* now copy the rest of the setup parameters */
ifmsh->mesh_id_len = setup->mesh_id_len;
@@ -2452,6 +2452,10 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy,
else
*dbm = sdata->vif.bss_conf.txpower;
+ /* INT_MIN indicates no power level was set yet */
+ if (*dbm == INT_MIN)
+ return -EINVAL;
+
return 0;
}
@@ -2776,14 +2780,14 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
continue;
for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++) {
- if (~sdata->rc_rateidx_mcs_mask[i][j]) {
+ if (sdata->rc_rateidx_mcs_mask[i][j] != 0xff) {
sdata->rc_has_mcs_mask[i] = true;
break;
}
}
for (j = 0; j < NL80211_VHT_NSS_MAX; j++) {
- if (~sdata->rc_rateidx_vht_mcs_mask[i][j]) {
+ if (sdata->rc_rateidx_vht_mcs_mask[i][j] != 0xffff) {
sdata->rc_has_vht_mcs_mask[i] = true;
break;
}
@@ -3136,9 +3140,6 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
case NL80211_IFTYPE_MESH_POINT: {
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- if (params->chandef.width != sdata->vif.bss_conf.chandef.width)
- return -EINVAL;
-
/* changes into another band are not supported */
if (sdata->vif.bss_conf.chandef.chan->band !=
params->chandef.chan->band)
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index d9558ffb8acf..6d95ce1c4a27 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1638,12 +1638,9 @@ int ieee80211_vif_use_reserved_context(struct ieee80211_sub_if_data *sdata)
if (new_ctx->replace_state == IEEE80211_CHANCTX_REPLACE_NONE) {
if (old_ctx)
- err = ieee80211_vif_use_reserved_reassign(sdata);
- else
- err = ieee80211_vif_use_reserved_assign(sdata);
+ return ieee80211_vif_use_reserved_reassign(sdata);
- if (err)
- return err;
+ return ieee80211_vif_use_reserved_assign(sdata);
}
/*
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 4105081dc1df..6f390c2e4c8e 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -80,6 +80,7 @@ static const char * const sta_flag_names[] = {
FLAG(MPSP_OWNER),
FLAG(MPSP_RECIPIENT),
FLAG(PS_DELIVER),
+ FLAG(USES_ENCRYPTION),
#undef FLAG
};
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index f783d1377d9a..9f0f437a09b9 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -128,8 +128,11 @@ int drv_sta_state(struct ieee80211_local *local,
} else if (old_state == IEEE80211_STA_AUTH &&
new_state == IEEE80211_STA_ASSOC) {
ret = drv_sta_add(local, sdata, &sta->sta);
- if (ret == 0)
+ if (ret == 0) {
sta->uploaded = true;
+ if (rcu_access_pointer(sta->sta.rates))
+ drv_sta_rate_tbl_update(local, sdata, &sta->sta);
+ }
} else if (old_state == IEEE80211_STA_ASSOC &&
new_state == IEEE80211_STA_AUTH) {
drv_sta_remove(local, sdata, &sta->sta);
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index f0f5fedb8caa..38383b7e4419 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -544,6 +544,10 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
sdata_assert_lock(sdata);
+ /* When not connected/joined, sending CSA doesn't make sense. */
+ if (ifibss->state != IEEE80211_IBSS_MLME_JOINED)
+ return -ENOLINK;
+
/* update cfg80211 bss information with the new channel */
if (!is_zero_ether_addr(ifibss->bssid)) {
cbss = cfg80211_get_bss(sdata->local->hw.wiphy,
@@ -1861,6 +1865,8 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
/* remove beacon */
kfree(sdata->u.ibss.ie);
+ sdata->u.ibss.ie = NULL;
+ sdata->u.ibss.ie_len = 0;
/* on the next join, re-program HT parameters */
memset(&ifibss->ht_capa, 0, sizeof(ifibss->ht_capa));
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a879d8071712..c5e5e978d3ed 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -53,12 +53,6 @@ struct ieee80211_local;
#define IEEE80211_ENCRYPT_HEADROOM 8
#define IEEE80211_ENCRYPT_TAILROOM 18
-/* IEEE 802.11 (Ch. 9.5 Defragmentation) requires support for concurrent
- * reception of at least three fragmented frames. This limit can be increased
- * by changing this define, at the cost of slower frame reassembly and
- * increased memory use (about 2 kB of RAM per entry). */
-#define IEEE80211_FRAGMENT_MAX 4
-
/* power level hasn't been configured (or set to automatic) */
#define IEEE80211_UNSET_POWER_LEVEL INT_MIN
@@ -91,18 +85,6 @@ extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS];
#define IEEE80211_MAX_NAN_INSTANCE_ID 255
-struct ieee80211_fragment_entry {
- struct sk_buff_head skb_list;
- unsigned long first_frag_time;
- u16 seq;
- u16 extra_len;
- u16 last_frag;
- u8 rx_queue;
- bool check_sequential_pn; /* needed for CCMP/GCMP */
- u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
-};
-
-
struct ieee80211_bss {
u32 device_ts_beacon, device_ts_presp;
@@ -243,8 +225,15 @@ struct ieee80211_rx_data {
*/
int security_idx;
- u32 tkip_iv32;
- u16 tkip_iv16;
+ union {
+ struct {
+ u32 iv32;
+ u16 iv16;
+ } tkip;
+ struct {
+ u8 pn[IEEE80211_CCMP_PN_LEN];
+ } ccm_gcm;
+ };
};
struct ieee80211_csa_settings {
@@ -638,6 +627,26 @@ struct mesh_csa_settings {
struct cfg80211_csa_settings settings;
};
+/**
+ * struct mesh_table
+ *
+ * @known_gates: list of known mesh gates and their mpaths by the station. The
+ * gate's mpath may or may not be resolved and active.
+ * @gates_lock: protects updates to known_gates
+ * @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr
+ * @walk_head: linked list containing all mesh_path objects
+ * @walk_lock: lock protecting walk_head
+ * @entries: number of entries in the table
+ */
+struct mesh_table {
+ struct hlist_head known_gates;
+ spinlock_t gates_lock;
+ struct rhashtable rhead;
+ struct hlist_head walk_head;
+ spinlock_t walk_lock;
+ atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */
+};
+
struct ieee80211_if_mesh {
struct timer_list housekeeping_timer;
struct timer_list mesh_path_timer;
@@ -712,8 +721,8 @@ struct ieee80211_if_mesh {
/* offset from skb->data while building IE */
int meshconf_offset;
- struct mesh_table *mesh_paths;
- struct mesh_table *mpp_paths; /* Store paths for MPP&MAP */
+ struct mesh_table mesh_paths;
+ struct mesh_table mpp_paths; /* Store paths for MPP&MAP */
int mesh_paths_generation;
int mpp_paths_generation;
};
@@ -884,9 +893,7 @@ struct ieee80211_sub_if_data {
char name[IFNAMSIZ];
- /* Fragment table for host-based reassembly */
- struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX];
- unsigned int fragment_next;
+ struct ieee80211_fragment_cache frags;
/* TID bitmap for NoAck policy */
u16 noack_map;
@@ -1051,6 +1058,7 @@ enum queue_stop_reason {
IEEE80211_QUEUE_STOP_REASON_FLUSH,
IEEE80211_QUEUE_STOP_REASON_TDLS_TEARDOWN,
IEEE80211_QUEUE_STOP_REASON_RESERVE_TID,
+ IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE,
IEEE80211_QUEUE_STOP_REASONS,
};
@@ -1084,6 +1092,9 @@ struct tpt_led_trigger {
* a scan complete for an aborted scan.
* @SCAN_HW_CANCELLED: Set for our scan work function when the scan is being
* cancelled.
+ * @SCAN_BEACON_WAIT: Set whenever we're passive scanning because of radar/no-IR
+ * and could send a probe request after receiving a beacon.
+ * @SCAN_BEACON_DONE: Beacon received, we can now send a probe request
*/
enum {
SCAN_SW_SCANNING,
@@ -1092,6 +1103,8 @@ enum {
SCAN_COMPLETED,
SCAN_ABORTED,
SCAN_HW_CANCELLED,
+ SCAN_BEACON_WAIT,
+ SCAN_BEACON_DONE,
};
/**
@@ -1410,7 +1423,7 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata)
rcu_read_lock();
chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
- if (WARN_ON_ONCE(!chanctx_conf)) {
+ if (!chanctx_conf) {
rcu_read_unlock();
return NULL;
}
@@ -2203,4 +2216,7 @@ extern const struct ethtool_ops ieee80211_ethtool_ops;
#define debug_noinline
#endif
+void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache);
+void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache);
+
#endif /* IEEE80211_I_H */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 152d4365f961..358028a09ce4 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -7,7 +7,7 @@
* Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (c) 2016 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -1111,16 +1111,12 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
*/
static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata)
{
- int i;
-
/* free extra data */
ieee80211_free_keys(sdata, false);
ieee80211_debugfs_remove_netdev(sdata);
- for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
- __skb_queue_purge(&sdata->fragments[i].skb_list);
- sdata->fragment_next = 0;
+ ieee80211_destroy_frag_cache(&sdata->frags);
if (ieee80211_vif_is_mesh(&sdata->vif))
ieee80211_mesh_teardown_sdata(sdata);
@@ -1542,6 +1538,10 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
if (ret)
return ret;
+ ieee80211_stop_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE);
+ synchronize_net();
+
ieee80211_do_stop(sdata, false);
ieee80211_teardown_sdata(sdata);
@@ -1562,6 +1562,8 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
err = ieee80211_do_open(&sdata->wdev, false);
WARN(err, "type change: do_open returned %d", err);
+ ieee80211_wake_vif_queues(local, sdata,
+ IEEE80211_QUEUE_STOP_REASON_IFTYPE_CHANGE);
return ret;
}
@@ -1826,8 +1828,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
sdata->wdev.wiphy = local->hw.wiphy;
sdata->local = local;
- for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++)
- skb_queue_head_init(&sdata->fragments[i].skb_list);
+ ieee80211_init_frag_cache(&sdata->frags);
INIT_LIST_HEAD(&sdata->key_list);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index f20bb39f492d..7fc55177db84 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -341,6 +341,7 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
if (sta) {
if (pairwise) {
rcu_assign_pointer(sta->ptk[idx], new);
+ set_sta_flag(sta, WLAN_STA_USES_ENCRYPTION);
sta->ptk_idx = idx;
ieee80211_check_fast_xmit(sta);
} else {
@@ -653,6 +654,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
struct ieee80211_sub_if_data *sdata,
struct sta_info *sta)
{
+ static atomic_t key_color = ATOMIC_INIT(0);
struct ieee80211_local *local = sdata->local;
struct ieee80211_key *old_key;
int idx = key->conf.keyidx;
@@ -688,6 +690,12 @@ int ieee80211_key_link(struct ieee80211_key *key,
key->sdata = sdata;
key->sta = sta;
+ /*
+ * Assign a unique ID to every key so we can easily prevent mixed
+ * key and fragment cache attacks.
+ */
+ key->color = atomic_inc_return(&key_color);
+
increment_tailroom_need_count(sdata);
ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index ebdb80b85dc3..d8e187bcb751 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -127,6 +127,8 @@ struct ieee80211_key {
} debugfs;
#endif
+ unsigned int color;
+
/*
* key config, must be last because it contains key
* material as variable length member
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 68db2a356443..e8c4e9c0c5a0 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -931,8 +931,19 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
continue;
if (!dflt_chandef.chan) {
+ /*
+ * Assign the first enabled channel to dflt_chandef
+ * from the list of channels
+ */
+ for (i = 0; i < sband->n_channels; i++)
+ if (!(sband->channels[i].flags &
+ IEEE80211_CHAN_DISABLED))
+ break;
+ /* if none found then use the first anyway */
+ if (i == sband->n_channels)
+ i = 0;
cfg80211_chandef_create(&dflt_chandef,
- &sband->channels[0],
+ &sband->channels[i],
NL80211_CHAN_NO_HT);
/* init channel we're on */
if (!local->use_chanctx && !local->_oper_chandef.chan) {
@@ -1069,8 +1080,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
if (local->hw.wiphy->max_scan_ie_len)
local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len;
- WARN_ON(!ieee80211_cs_list_valid(local->hw.cipher_schemes,
- local->hw.n_cipher_schemes));
+ if (WARN_ON(!ieee80211_cs_list_valid(local->hw.cipher_schemes,
+ local->hw.n_cipher_schemes))) {
+ result = -EINVAL;
+ goto fail_workqueue;
+ }
result = ieee80211_init_cipher_suites(local);
if (result < 0)
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index e84103b40534..e60444039e76 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -128,26 +128,6 @@ struct mesh_path {
bool is_gate;
};
-/**
- * struct mesh_table
- *
- * @known_gates: list of known mesh gates and their mpaths by the station. The
- * gate's mpath may or may not be resolved and active.
- * @gates_lock: protects updates to known_gates
- * @rhead: the rhashtable containing struct mesh_paths, keyed by dest addr
- * @walk_head: linked list containging all mesh_path objects
- * @walk_lock: lock protecting walk_head
- * @entries: number of entries in the table
- */
-struct mesh_table {
- struct hlist_head known_gates;
- spinlock_t gates_lock;
- struct rhashtable rhead;
- struct hlist_head walk_head;
- spinlock_t walk_lock;
- atomic_t entries; /* Up to MAX_MESH_NEIGHBOURS */
-};
-
/* Recent multicast cache */
/* RMC_BUCKETS must be a power of 2, maximum 256 */
#define RMC_BUCKETS 256
@@ -300,7 +280,7 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata,
void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta);
void mesh_path_flush_pending(struct mesh_path *mpath);
void mesh_path_tx_pending(struct mesh_path *mpath);
-int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata);
+void mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata);
void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata);
int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr);
void mesh_path_timer(struct timer_list *t);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 6219b6b0c7e1..18158855d98c 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -355,7 +355,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
*/
tx_time = (device_constant + 10 * test_frame_len / rate);
estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err));
- result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT);
+ result = ((u64)tx_time * estimated_retx) >> (2 * ARITH_SHIFT);
return (u32)result;
}
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 4fc720c77e37..8efb2bf08bf4 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -50,28 +50,24 @@ static void mesh_path_rht_free(void *ptr, void *tblptr)
mesh_path_free_rcu(tbl, mpath);
}
-static struct mesh_table *mesh_table_alloc(void)
+static void mesh_table_init(struct mesh_table *tbl)
{
- struct mesh_table *newtbl;
+ INIT_HLIST_HEAD(&tbl->known_gates);
+ INIT_HLIST_HEAD(&tbl->walk_head);
+ atomic_set(&tbl->entries, 0);
+ spin_lock_init(&tbl->gates_lock);
+ spin_lock_init(&tbl->walk_lock);
- newtbl = kmalloc(sizeof(struct mesh_table), GFP_ATOMIC);
- if (!newtbl)
- return NULL;
-
- INIT_HLIST_HEAD(&newtbl->known_gates);
- INIT_HLIST_HEAD(&newtbl->walk_head);
- atomic_set(&newtbl->entries, 0);
- spin_lock_init(&newtbl->gates_lock);
- spin_lock_init(&newtbl->walk_lock);
-
- return newtbl;
+ /* rhashtable_init() may fail only in case of wrong
+ * mesh_rht_params
+ */
+ WARN_ON(rhashtable_init(&tbl->rhead, &mesh_rht_params));
}
static void mesh_table_free(struct mesh_table *tbl)
{
rhashtable_free_and_destroy(&tbl->rhead,
mesh_path_rht_free, tbl);
- kfree(tbl);
}
/**
@@ -239,13 +235,13 @@ static struct mesh_path *mpath_lookup(struct mesh_table *tbl, const u8 *dst,
struct mesh_path *
mesh_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst)
{
- return mpath_lookup(sdata->u.mesh.mesh_paths, dst, sdata);
+ return mpath_lookup(&sdata->u.mesh.mesh_paths, dst, sdata);
}
struct mesh_path *
mpp_path_lookup(struct ieee80211_sub_if_data *sdata, const u8 *dst)
{
- return mpath_lookup(sdata->u.mesh.mpp_paths, dst, sdata);
+ return mpath_lookup(&sdata->u.mesh.mpp_paths, dst, sdata);
}
static struct mesh_path *
@@ -282,7 +278,7 @@ __mesh_path_lookup_by_idx(struct mesh_table *tbl, int idx)
struct mesh_path *
mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx)
{
- return __mesh_path_lookup_by_idx(sdata->u.mesh.mesh_paths, idx);
+ return __mesh_path_lookup_by_idx(&sdata->u.mesh.mesh_paths, idx);
}
/**
@@ -297,7 +293,7 @@ mesh_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx)
struct mesh_path *
mpp_path_lookup_by_idx(struct ieee80211_sub_if_data *sdata, int idx)
{
- return __mesh_path_lookup_by_idx(sdata->u.mesh.mpp_paths, idx);
+ return __mesh_path_lookup_by_idx(&sdata->u.mesh.mpp_paths, idx);
}
/**
@@ -310,7 +306,7 @@ int mesh_path_add_gate(struct mesh_path *mpath)
int err;
rcu_read_lock();
- tbl = mpath->sdata->u.mesh.mesh_paths;
+ tbl = &mpath->sdata->u.mesh.mesh_paths;
spin_lock_bh(&mpath->state_lock);
if (mpath->is_gate) {
@@ -420,7 +416,7 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata,
if (!new_mpath)
return ERR_PTR(-ENOMEM);
- tbl = sdata->u.mesh.mesh_paths;
+ tbl = &sdata->u.mesh.mesh_paths;
spin_lock_bh(&tbl->walk_lock);
do {
ret = rhashtable_lookup_insert_fast(&tbl->rhead,
@@ -469,7 +465,7 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata,
return -ENOMEM;
memcpy(new_mpath->mpp, mpp, ETH_ALEN);
- tbl = sdata->u.mesh.mpp_paths;
+ tbl = &sdata->u.mesh.mpp_paths;
spin_lock_bh(&tbl->walk_lock);
ret = rhashtable_lookup_insert_fast(&tbl->rhead,
@@ -498,7 +494,7 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata,
void mesh_plink_broken(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
- struct mesh_table *tbl = sdata->u.mesh.mesh_paths;
+ struct mesh_table *tbl = &sdata->u.mesh.mesh_paths;
static const u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
struct mesh_path *mpath;
@@ -557,7 +553,7 @@ static void __mesh_path_del(struct mesh_table *tbl, struct mesh_path *mpath)
void mesh_path_flush_by_nexthop(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
- struct mesh_table *tbl = sdata->u.mesh.mesh_paths;
+ struct mesh_table *tbl = &sdata->u.mesh.mesh_paths;
struct mesh_path *mpath;
struct hlist_node *n;
@@ -572,7 +568,7 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta)
static void mpp_flush_by_proxy(struct ieee80211_sub_if_data *sdata,
const u8 *proxy)
{
- struct mesh_table *tbl = sdata->u.mesh.mpp_paths;
+ struct mesh_table *tbl = &sdata->u.mesh.mpp_paths;
struct mesh_path *mpath;
struct hlist_node *n;
@@ -606,8 +602,8 @@ static void table_flush_by_iface(struct mesh_table *tbl)
*/
void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
{
- table_flush_by_iface(sdata->u.mesh.mesh_paths);
- table_flush_by_iface(sdata->u.mesh.mpp_paths);
+ table_flush_by_iface(&sdata->u.mesh.mesh_paths);
+ table_flush_by_iface(&sdata->u.mesh.mpp_paths);
}
/**
@@ -653,7 +649,7 @@ int mesh_path_del(struct ieee80211_sub_if_data *sdata, const u8 *addr)
/* flush relevant mpp entries first */
mpp_flush_by_proxy(sdata, addr);
- err = table_path_del(sdata->u.mesh.mesh_paths, sdata, addr);
+ err = table_path_del(&sdata->u.mesh.mesh_paths, sdata, addr);
sdata->u.mesh.mesh_paths_generation++;
return err;
}
@@ -691,7 +687,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath)
struct mesh_path *gate;
bool copy = false;
- tbl = sdata->u.mesh.mesh_paths;
+ tbl = &sdata->u.mesh.mesh_paths;
rcu_read_lock();
hlist_for_each_entry_rcu(gate, &tbl->known_gates, gate_list) {
@@ -727,7 +723,7 @@ int mesh_path_send_to_gates(struct mesh_path *mpath)
void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
- kfree_skb(skb);
+ ieee80211_free_txskb(&sdata->local->hw, skb);
sdata->u.mesh.mshstats.dropped_frames_no_route++;
}
@@ -771,32 +767,10 @@ void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop)
mesh_path_tx_pending(mpath);
}
-int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata)
+void mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata)
{
- struct mesh_table *tbl_path, *tbl_mpp;
- int ret;
-
- tbl_path = mesh_table_alloc();
- if (!tbl_path)
- return -ENOMEM;
-
- tbl_mpp = mesh_table_alloc();
- if (!tbl_mpp) {
- ret = -ENOMEM;
- goto free_path;
- }
-
- rhashtable_init(&tbl_path->rhead, &mesh_rht_params);
- rhashtable_init(&tbl_mpp->rhead, &mesh_rht_params);
-
- sdata->u.mesh.mesh_paths = tbl_path;
- sdata->u.mesh.mpp_paths = tbl_mpp;
-
- return 0;
-
-free_path:
- mesh_table_free(tbl_path);
- return ret;
+ mesh_table_init(&sdata->u.mesh.mesh_paths);
+ mesh_table_init(&sdata->u.mesh.mpp_paths);
}
static
@@ -818,12 +792,12 @@ void mesh_path_tbl_expire(struct ieee80211_sub_if_data *sdata,
void mesh_path_expire(struct ieee80211_sub_if_data *sdata)
{
- mesh_path_tbl_expire(sdata, sdata->u.mesh.mesh_paths);
- mesh_path_tbl_expire(sdata, sdata->u.mesh.mpp_paths);
+ mesh_path_tbl_expire(sdata, &sdata->u.mesh.mesh_paths);
+ mesh_path_tbl_expire(sdata, &sdata->u.mesh.mpp_paths);
}
void mesh_pathtbl_unregister(struct ieee80211_sub_if_data *sdata)
{
- mesh_table_free(sdata->u.mesh.mesh_paths);
- mesh_table_free(sdata->u.mesh.mpp_paths);
+ mesh_table_free(&sdata->u.mesh.mesh_paths);
+ mesh_table_free(&sdata->u.mesh.mpp_paths);
}
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 5b5b0f95ffd1..c7f47dba884e 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -1022,8 +1022,8 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata,
case WLAN_SP_MESH_PEERING_OPEN:
if (!matches_local)
event = OPN_RJCT;
- if (!mesh_plink_free_count(sdata) ||
- (sta->mesh->plid && sta->mesh->plid != plid))
+ else if (!mesh_plink_free_count(sdata) ||
+ (sta->mesh->plid && sta->mesh->plid != plid))
event = OPN_IGNR;
else
event = OPN_ACPT;
@@ -1031,9 +1031,9 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata,
case WLAN_SP_MESH_PEERING_CONFIRM:
if (!matches_local)
event = CNF_RJCT;
- if (!mesh_plink_free_count(sdata) ||
- sta->mesh->llid != llid ||
- (sta->mesh->plid && sta->mesh->plid != plid))
+ else if (!mesh_plink_free_count(sdata) ||
+ sta->mesh->llid != llid ||
+ (sta->mesh->plid && sta->mesh->plid != plid))
event = CNF_IGNR;
else
event = CNF_ACPT;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c53a332f7d65..8603168b70e4 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1185,6 +1185,11 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_sub_if_data *sdata)
sdata->vif.csa_active = false;
ifmgd->csa_waiting_bcn = false;
+ /*
+ * If the CSA IE is still present on the beacon after the switch,
+ * we need to consider it as a new CSA (possibly to self).
+ */
+ ifmgd->beacon_crc_valid = false;
ret = drv_post_channel_switch(sdata);
if (ret) {
@@ -2346,11 +2351,18 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata,
u16 tx_time)
{
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
- u16 tid = ieee80211_get_tid(hdr);
- int ac = ieee80211_ac_from_tid(tid);
- struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac];
+ u16 tid;
+ int ac;
+ struct ieee80211_sta_tx_tspec *tx_tspec;
unsigned long now = jiffies;
+ if (!ieee80211_is_data_qos(hdr->frame_control))
+ return;
+
+ tid = ieee80211_get_tid(hdr);
+ ac = ieee80211_ac_from_tid(tid);
+ tx_tspec = &ifmgd->tx_tspec[ac];
+
if (likely(!tx_tspec->admitted_time))
return;
@@ -4776,7 +4788,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
*/
if (new_sta) {
u32 rates = 0, basic_rates = 0;
- bool have_higher_than_11mbit;
+ bool have_higher_than_11mbit = false;
int min_rate = INT_MAX, min_rate_index = -1;
const struct cfg80211_bss_ies *ies;
int shift = ieee80211_vif_get_shift(&sdata->vif);
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 76f303fda3ed..954b932fd7b8 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -941,7 +941,8 @@ int rate_control_set_rates(struct ieee80211_hw *hw,
if (old)
kfree_rcu(old, rcu_head);
- drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta);
+ if (sta->uploaded)
+ drv_sta_rate_tbl_update(hw_to_local(hw), sta->sdata, pubsta);
ieee80211_sta_set_expected_throughput(pubsta, sta_get_expected_throughput(sta));
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 07fb219327d6..1f3c420c01c7 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -276,7 +276,7 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband,
success = !!(info->flags & IEEE80211_TX_STAT_ACK);
for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
- if (ar[i].idx < 0)
+ if (ar[i].idx < 0 || !ar[i].count)
break;
ndx = rix_to_ndx(mi, ar[i].idx);
@@ -289,12 +289,6 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband,
mi->r[ndx].stats.success += success;
}
- if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && (i >= 0))
- mi->sample_packets++;
-
- if (mi->sample_deferred > 0)
- mi->sample_deferred--;
-
if (time_after(jiffies, mi->last_stats_update +
(mp->update_interval * HZ) / 1000))
minstrel_update_stats(mp, mi);
@@ -373,7 +367,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
return;
delta = (mi->total_packets * sampling_ratio / 100) -
- (mi->sample_packets + mi->sample_deferred / 2);
+ mi->sample_packets;
/* delta < 0: no sampling required */
prev_sample = mi->prev_sample;
@@ -382,7 +376,6 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
return;
if (mi->total_packets >= 10000) {
- mi->sample_deferred = 0;
mi->sample_packets = 0;
mi->total_packets = 0;
} else if (delta > mi->n_rates * 2) {
@@ -407,19 +400,8 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
* rate sampling method should be used.
* Respect such rates that are not sampled for 20 interations.
*/
- if (mrr_capable &&
- msr->perfect_tx_time > mr->perfect_tx_time &&
- msr->stats.sample_skipped < 20) {
- /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark
- * packets that have the sampling rate deferred to the
- * second MRR stage. Increase the sample counter only
- * if the deferred sample rate was actually used.
- * Use the sample_deferred counter to make sure that
- * the sampling is not done in large bursts */
- info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
- rate++;
- mi->sample_deferred++;
- } else {
+ if (msr->perfect_tx_time < mr->perfect_tx_time ||
+ msr->stats.sample_skipped >= 20) {
if (!msr->sample_limit)
return;
@@ -439,6 +421,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
rate->idx = mi->r[ndx].rix;
rate->count = minstrel_get_retry_count(&mi->r[ndx], info);
+ info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
}
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index be6c3f35f48b..d60413adb215 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -98,7 +98,6 @@ struct minstrel_sta_info {
u8 max_prob_rate;
unsigned int total_packets;
unsigned int sample_packets;
- int sample_deferred;
unsigned int sample_row;
unsigned int sample_column;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5e56719f999c..3598ebe52d08 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -5,7 +5,7 @@
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -1322,8 +1322,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
goto dont_reorder;
/* not part of a BA session */
- if (ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK &&
- ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL)
+ if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_NOACK)
goto dont_reorder;
/* new, potentially un-ordered, ampdu frame - process it */
@@ -2009,19 +2008,34 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
return result;
}
+void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cache->entries); i++)
+ skb_queue_head_init(&cache->entries[i].skb_list);
+}
+
+void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cache->entries); i++)
+ __skb_queue_purge(&cache->entries[i].skb_list);
+}
+
static inline struct ieee80211_fragment_entry *
-ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
+ieee80211_reassemble_add(struct ieee80211_fragment_cache *cache,
unsigned int frag, unsigned int seq, int rx_queue,
struct sk_buff **skb)
{
struct ieee80211_fragment_entry *entry;
- entry = &sdata->fragments[sdata->fragment_next++];
- if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX)
- sdata->fragment_next = 0;
+ entry = &cache->entries[cache->next++];
+ if (cache->next >= IEEE80211_FRAGMENT_MAX)
+ cache->next = 0;
- if (!skb_queue_empty(&entry->skb_list))
- __skb_queue_purge(&entry->skb_list);
+ __skb_queue_purge(&entry->skb_list);
__skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */
*skb = NULL;
@@ -2036,14 +2050,14 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata,
}
static inline struct ieee80211_fragment_entry *
-ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
+ieee80211_reassemble_find(struct ieee80211_fragment_cache *cache,
unsigned int frag, unsigned int seq,
int rx_queue, struct ieee80211_hdr *hdr)
{
struct ieee80211_fragment_entry *entry;
int i, idx;
- idx = sdata->fragment_next;
+ idx = cache->next;
for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) {
struct ieee80211_hdr *f_hdr;
@@ -2051,7 +2065,7 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
if (idx < 0)
idx = IEEE80211_FRAGMENT_MAX - 1;
- entry = &sdata->fragments[idx];
+ entry = &cache->entries[idx];
if (skb_queue_empty(&entry->skb_list) || entry->seq != seq ||
entry->rx_queue != rx_queue ||
entry->last_frag + 1 != frag)
@@ -2078,15 +2092,27 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata,
return NULL;
}
+static bool requires_sequential_pn(struct ieee80211_rx_data *rx, __le16 fc)
+{
+ return rx->key &&
+ (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP ||
+ rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 ||
+ rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP ||
+ rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) &&
+ ieee80211_has_protected(fc);
+}
+
static ieee80211_rx_result debug_noinline
ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
{
+ struct ieee80211_fragment_cache *cache = &rx->sdata->frags;
struct ieee80211_hdr *hdr;
u16 sc;
__le16 fc;
unsigned int frag, seq;
struct ieee80211_fragment_entry *entry;
struct sk_buff *skb;
+ struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
hdr = (struct ieee80211_hdr *)rx->skb->data;
fc = hdr->frame_control;
@@ -2097,14 +2123,15 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
sc = le16_to_cpu(hdr->seq_ctrl);
frag = sc & IEEE80211_SCTL_FRAG;
- if (is_multicast_ether_addr(hdr->addr1)) {
- I802_DEBUG_INC(rx->local->dot11MulticastReceivedFrameCount);
- goto out_no_led;
- }
+ if (rx->sta)
+ cache = &rx->sta->frags;
if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
goto out;
+ if (is_multicast_ether_addr(hdr->addr1))
+ return RX_DROP_MONITOR;
+
I802_DEBUG_INC(rx->local->rx_handlers_fragments);
if (skb_linearize(rx->skb))
@@ -2120,20 +2147,17 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
if (frag == 0) {
/* This is the first fragment of a new frame. */
- entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
+ entry = ieee80211_reassemble_add(cache, frag, seq,
rx->seqno_idx, &(rx->skb));
- if (rx->key &&
- (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP ||
- rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 ||
- rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP ||
- rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) &&
- ieee80211_has_protected(fc)) {
+ if (requires_sequential_pn(rx, fc)) {
int queue = rx->security_idx;
/* Store CCMP/GCMP PN so that we can verify that the
* next fragment has a sequential PN value.
*/
entry->check_sequential_pn = true;
+ entry->is_protected = true;
+ entry->key_color = rx->key->color;
memcpy(entry->last_pn,
rx->key->u.ccmp.rx_pn[queue],
IEEE80211_CCMP_PN_LEN);
@@ -2145,6 +2169,11 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
sizeof(rx->key->u.gcmp.rx_pn[queue]));
BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN !=
IEEE80211_GCMP_PN_LEN);
+ } else if (rx->key &&
+ (ieee80211_has_protected(fc) ||
+ (status->flag & RX_FLAG_DECRYPTED))) {
+ entry->is_protected = true;
+ entry->key_color = rx->key->color;
}
return RX_QUEUED;
}
@@ -2152,7 +2181,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
/* This is a fragment for a frame that should already be pending in
* fragment cache. Add this fragment to the end of the pending entry.
*/
- entry = ieee80211_reassemble_find(rx->sdata, frag, seq,
+ entry = ieee80211_reassemble_find(cache, frag, seq,
rx->seqno_idx, hdr);
if (!entry) {
I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag);
@@ -2167,25 +2196,39 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
if (entry->check_sequential_pn) {
int i;
u8 pn[IEEE80211_CCMP_PN_LEN], *rpn;
- int queue;
- if (!rx->key ||
- (rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP &&
- rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256 &&
- rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP &&
- rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP_256))
+ if (!requires_sequential_pn(rx, fc))
+ return RX_DROP_UNUSABLE;
+
+ /* Prevent mixed key and fragment cache attacks */
+ if (entry->key_color != rx->key->color)
return RX_DROP_UNUSABLE;
+
memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN);
for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) {
pn[i]++;
if (pn[i])
break;
}
- queue = rx->security_idx;
- rpn = rx->key->u.ccmp.rx_pn[queue];
+
+ rpn = rx->ccm_gcm.pn;
if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN))
return RX_DROP_UNUSABLE;
memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN);
+ } else if (entry->is_protected &&
+ (!rx->key ||
+ (!ieee80211_has_protected(fc) &&
+ !(status->flag & RX_FLAG_DECRYPTED)) ||
+ rx->key->color != entry->key_color)) {
+ /* Drop this as a mixed key or fragment cache attack, even
+ * if for TKIP Michael MIC should protect us, and WEP is a
+ * lost cause anyway.
+ */
+ return RX_DROP_UNUSABLE;
+ } else if (entry->is_protected && rx->key &&
+ entry->key_color != rx->key->color &&
+ (status->flag & RX_FLAG_DECRYPTED)) {
+ return RX_DROP_UNUSABLE;
}
skb_pull(rx->skb, ieee80211_hdrlen(fc));
@@ -2214,7 +2257,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
out:
ieee80211_led_rx(rx->local);
- out_no_led:
if (rx->sta)
rx->sta->rx_stats.packets++;
return RX_CONTINUE;
@@ -2371,13 +2413,13 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc)
struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
/*
- * Allow EAPOL frames to us/the PAE group address regardless
- * of whether the frame was encrypted or not.
+ * Allow EAPOL frames to us/the PAE group address regardless of
+ * whether the frame was encrypted or not, and always disallow
+ * all other destination addresses for them.
*/
- if (ehdr->h_proto == rx->sdata->control_port_protocol &&
- (ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) ||
- ether_addr_equal(ehdr->h_dest, pae_group_addr)))
- return true;
+ if (unlikely(ehdr->h_proto == rx->sdata->control_port_protocol))
+ return ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) ||
+ ether_addr_equal(ehdr->h_dest, pae_group_addr);
if (ieee80211_802_1x_port_control(rx) ||
ieee80211_drop_unencrypted(rx, fc))
@@ -2401,8 +2443,28 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb,
cfg80211_rx_control_port(dev, skb, noencrypt);
dev_kfree_skb(skb);
} else {
+ struct ethhdr *ehdr = (void *)skb_mac_header(skb);
+
memset(skb->cb, 0, sizeof(skb->cb));
+ /*
+ * 802.1X over 802.11 requires that the authenticator address
+ * be used for EAPOL frames. However, 802.1X allows the use of
+ * the PAE group address instead. If the interface is part of
+ * a bridge and we pass the frame with the PAE group address,
+ * then the bridge will forward it to the network (even if the
+ * client was not associated yet), which isn't supposed to
+ * happen.
+ * To avoid that, rewrite the destination address to our own
+ * address, so that the authenticator (e.g. hostapd) will see
+ * the frame, but bridge won't forward it anywhere else. Note
+ * that due to earlier filtering, the only other address can
+ * be the PAE group address.
+ */
+ if (unlikely(skb->protocol == sdata->control_port_protocol &&
+ !ether_addr_equal(ehdr->h_dest, sdata->vif.addr)))
+ ether_addr_copy(ehdr->h_dest, sdata->vif.addr);
+
/* deliver to local stack */
if (rx->napi)
napi_gro_receive(rx->napi, skb);
@@ -2442,6 +2504,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
if ((sdata->vif.type == NL80211_IFTYPE_AP ||
sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
!(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
+ ehdr->h_proto != rx->sdata->control_port_protocol &&
(sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) {
if (is_multicast_ether_addr(ehdr->h_dest) &&
ieee80211_vif_get_num_mcast_if(sdata) != 0) {
@@ -2550,7 +2613,7 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
if (ieee80211_data_to_8023_exthdr(skb, &ethhdr,
rx->sdata->vif.addr,
rx->sdata->vif.type,
- data_offset))
+ data_offset, true))
return RX_DROP_UNUSABLE;
ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
@@ -2607,6 +2670,23 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
if (is_multicast_ether_addr(hdr->addr1))
return RX_DROP_UNUSABLE;
+ if (rx->key) {
+ /*
+ * We should not receive A-MSDUs on pre-HT connections,
+ * and HT connections cannot use old ciphers. Thus drop
+ * them, as in those cases we couldn't even have SPP
+ * A-MSDUs or such.
+ */
+ switch (rx->key->conf.cipher) {
+ case WLAN_CIPHER_SUITE_WEP40:
+ case WLAN_CIPHER_SUITE_WEP104:
+ case WLAN_CIPHER_SUITE_TKIP:
+ return RX_DROP_UNUSABLE;
+ default:
+ break;
+ }
+ }
+
return __ieee80211_rx_h_amsdu(rx, 0);
}
@@ -2693,13 +2773,13 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
ether_addr_equal(sdata->vif.addr, hdr->addr3))
return RX_CONTINUE;
- ac = ieee80211_select_queue_80211(sdata, skb, hdr);
+ ac = ieee802_1d_to_ac[skb->priority];
q = sdata->vif.hw_queue[ac];
if (ieee80211_queue_stopped(&local->hw, q)) {
IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion);
return RX_DROP_MONITOR;
}
- skb_set_queue_mapping(skb, q);
+ skb_set_queue_mapping(skb, ac);
if (!--mesh_hdr->ttl) {
if (!is_multicast_ether_addr(hdr->addr1))
@@ -3794,7 +3874,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
if (!bssid)
return false;
if (ether_addr_equal(sdata->vif.addr, hdr->addr2) ||
- ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2))
+ ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2) ||
+ !is_valid_ether_addr(hdr->addr2))
return false;
if (ieee80211_is_beacon(hdr->frame_control))
return true;
@@ -4003,6 +4084,8 @@ void ieee80211_check_fast_rx(struct sta_info *sta)
rcu_read_lock();
key = rcu_dereference(sta->ptk[sta->ptk_idx]);
+ if (!key)
+ key = rcu_dereference(sdata->default_unicast_key);
if (key) {
switch (key->conf.cipher) {
case WLAN_CIPHER_SUITE_TKIP:
@@ -4536,7 +4619,7 @@ void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
goto drop;
break;
case RX_ENC_VHT:
- if (WARN_ONCE(status->rate_idx > 9 ||
+ if (WARN_ONCE(status->rate_idx > 11 ||
!status->nss ||
status->nss > 8,
"Rate marked as a VHT rate but data is invalid: MCS: %d, NSS: %d\n",
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 5d2a11777718..e3d8be4feea5 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -222,6 +222,16 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
if (likely(!sdata1 && !sdata2))
return;
+ if (test_and_clear_bit(SCAN_BEACON_WAIT, &local->scanning)) {
+ /*
+ * we were passive scanning because of radar/no-IR, but
+ * the beacon/proberesp rx gives us an opportunity to upgrade
+ * to active scan
+ */
+ set_bit(SCAN_BEACON_DONE, &local->scanning);
+ ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
+ }
+
if (ieee80211_is_probe_resp(mgmt->frame_control)) {
struct cfg80211_scan_request *scan_req;
struct cfg80211_sched_scan_request *sched_scan_req;
@@ -402,10 +412,6 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
scan_req = rcu_dereference_protected(local->scan_req,
lockdep_is_held(&local->mtx));
- if (scan_req != local->int_scan_req) {
- local->scan_info.aborted = aborted;
- cfg80211_scan_done(scan_req, &local->scan_info);
- }
RCU_INIT_POINTER(local->scan_req, NULL);
scan_sdata = rcu_dereference_protected(local->scan_sdata,
@@ -415,6 +421,13 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
local->scanning = 0;
local->scan_chandef.chan = NULL;
+ synchronize_rcu();
+
+ if (scan_req != local->int_scan_req) {
+ local->scan_info.aborted = aborted;
+ cfg80211_scan_done(scan_req, &local->scan_info);
+ }
+
/* Set power back to normal operating levels. */
ieee80211_hw_config(local, 0);
@@ -706,6 +719,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
IEEE80211_CHAN_RADAR)) ||
!req->n_ssids) {
next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
+ if (req->n_ssids)
+ set_bit(SCAN_BEACON_WAIT, &local->scanning);
} else {
ieee80211_scan_state_send_probe(local, &next_delay);
next_delay = IEEE80211_CHANNEL_TIME;
@@ -886,6 +901,8 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local,
!scan_req->n_ssids) {
*next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
local->next_scan_state = SCAN_DECISION;
+ if (scan_req->n_ssids)
+ set_bit(SCAN_BEACON_WAIT, &local->scanning);
return;
}
@@ -978,6 +995,8 @@ void ieee80211_scan_work(struct work_struct *work)
goto out;
}
+ clear_bit(SCAN_BEACON_WAIT, &local->scanning);
+
/*
* as long as no delay is required advance immediately
* without scheduling a new work
@@ -988,6 +1007,10 @@ void ieee80211_scan_work(struct work_struct *work)
goto out_complete;
}
+ if (test_and_clear_bit(SCAN_BEACON_DONE, &local->scanning) &&
+ local->next_scan_state == SCAN_DECISION)
+ local->next_scan_state = SCAN_SEND_PROBE;
+
switch (local->next_scan_state) {
case SCAN_DECISION:
/* if no more bands/channels left, complete scan */
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 2a82d438991b..5c209f72de70 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -3,7 +3,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2020 Intel Corporation
+ * Copyright (C) 2018-2021 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -244,6 +244,24 @@ struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata,
*/
void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
{
+ /*
+ * If we had used sta_info_pre_move_state() then we might not
+ * have gone through the state transitions down again, so do
+ * it here now (and warn if it's inserted).
+ *
+ * This will clear state such as fast TX/RX that may have been
+ * allocated during state transitions.
+ */
+ while (sta->sta_state > IEEE80211_STA_NONE) {
+ int ret;
+
+ WARN_ON_ONCE(test_sta_flag(sta, WLAN_STA_INSERTED));
+
+ ret = sta_info_move_state(sta, sta->sta_state - 1);
+ if (WARN_ONCE(ret, "sta_info_move_state() returned %d\n", ret))
+ break;
+ }
+
if (sta->rate_ctrl)
rate_control_free_sta(sta);
@@ -351,6 +369,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
u64_stats_init(&sta->rx_stats.syncp);
+ ieee80211_init_frag_cache(&sta->frags);
+
sta->sta_state = IEEE80211_STA_NONE;
/* Mark TID as unreserved */
@@ -616,7 +636,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
out_drop_sta:
local->num_sta--;
synchronize_net();
- __cleanup_single_sta(sta);
+ cleanup_single_sta(sta);
out_err:
mutex_unlock(&local->sta_mtx);
kfree(sinfo);
@@ -635,19 +655,13 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
err = sta_info_insert_check(sta);
if (err) {
+ sta_info_free(local, sta);
mutex_unlock(&local->sta_mtx);
rcu_read_lock();
- goto out_free;
+ return err;
}
- err = sta_info_insert_finish(sta);
- if (err)
- goto out_free;
-
- return 0;
- out_free:
- sta_info_free(local, sta);
- return err;
+ return sta_info_insert_finish(sta);
}
int sta_info_insert(struct sta_info *sta)
@@ -955,7 +969,8 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta)
list_del_rcu(&sta->list);
sta->removed = true;
- drv_sta_pre_rcu_remove(local, sta->sdata, sta);
+ if (sta->uploaded)
+ drv_sta_pre_rcu_remove(local, sta->sdata, sta);
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
rcu_access_pointer(sdata->u.vlan.sta) == sta)
@@ -1020,6 +1035,8 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
rate_control_remove_sta_debugfs(sta);
ieee80211_sta_debugfs_remove(sta);
+ ieee80211_destroy_frag_cache(&sta->frags);
+
cleanup_single_sta(sta);
}
@@ -1328,11 +1345,6 @@ static void ieee80211_send_null_response(struct sta_info *sta, int tid,
struct ieee80211_tx_info *info;
struct ieee80211_chanctx_conf *chanctx_conf;
- /* Don't send NDPs when STA is connected HE */
- if (sdata->vif.type == NL80211_IFTYPE_STATION &&
- !(sdata->u.mgd.flags & IEEE80211_STA_DISABLE_HE))
- return;
-
if (qos) {
fc = cpu_to_le16(IEEE80211_FTYPE_DATA |
IEEE80211_STYPE_QOS_NULLFUNC |
@@ -2009,6 +2021,10 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u32 rate,
int rate_idx = STA_STATS_GET(LEGACY_IDX, rate);
sband = local->hw.wiphy->bands[band];
+
+ if (WARN_ON_ONCE(!sband->bitrates))
+ break;
+
brate = sband->bitrates[rate_idx].bitrate;
if (rinfo->bw == RATE_INFO_BW_5)
shift = 2;
@@ -2032,7 +2048,7 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u32 rate,
static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
{
- u16 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate);
+ u32 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate);
if (rate == STA_STATS_RATE_INVALID)
return -EINVAL;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 9a04327d71d1..75d982ff7f3d 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -2,6 +2,7 @@
* Copyright 2002-2005, Devicescape Software, Inc.
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2015-2017 Intel Deutschland GmbH
+ * Copyright(c) 2020-2021 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -101,6 +102,7 @@ enum ieee80211_sta_info_flags {
WLAN_STA_MPSP_OWNER,
WLAN_STA_MPSP_RECIPIENT,
WLAN_STA_PS_DELIVER,
+ WLAN_STA_USES_ENCRYPTION,
NUM_WLAN_STA_FLAGS,
};
@@ -412,6 +414,34 @@ struct ieee80211_sta_rx_stats {
};
/*
+ * IEEE 802.11-2016 (10.6 "Defragmentation") recommends support for "concurrent
+ * reception of at least one MSDU per access category per associated STA"
+ * on APs, or "at least one MSDU per access category" on other interface types.
+ *
+ * This limit can be increased by changing this define, at the cost of slower
+ * frame reassembly and increased memory use while fragments are pending.
+ */
+#define IEEE80211_FRAGMENT_MAX 4
+
+struct ieee80211_fragment_entry {
+ struct sk_buff_head skb_list;
+ unsigned long first_frag_time;
+ u16 seq;
+ u16 extra_len;
+ u16 last_frag;
+ u8 rx_queue;
+ u8 check_sequential_pn:1, /* needed for CCMP/GCMP */
+ is_protected:1;
+ u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
+ unsigned int key_color;
+};
+
+struct ieee80211_fragment_cache {
+ struct ieee80211_fragment_entry entries[IEEE80211_FRAGMENT_MAX];
+ unsigned int next;
+};
+
+/*
* The bandwidth threshold below which the per-station CoDel parameters will be
* scaled to be more lenient (to prevent starvation of slow stations). This
* value will be scaled by the number of active stations when it is being
@@ -482,6 +512,7 @@ struct ieee80211_sta_rx_stats {
* @pcpu_rx_stats: per-CPU RX statistics, assigned only if the driver needs
* this (by advertising the USES_RSS hw flag)
* @status_stats: TX status statistics
+ * @frags: fragment cache
*/
struct sta_info {
/* General information, mostly static */
@@ -583,6 +614,8 @@ struct sta_info {
struct cfg80211_chan_def tdls_chandef;
+ struct ieee80211_fragment_cache frags;
+
/* keep last! */
struct ieee80211_sta sta;
};
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 3160ffd93a15..3a0aadf881fc 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -593,10 +593,13 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
- if (unlikely(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT))
+ if (unlikely(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)) {
tx->key = NULL;
- else if (tx->sta &&
- (key = rcu_dereference(tx->sta->ptk[tx->sta->ptk_idx])))
+ return TX_CONTINUE;
+ }
+
+ if (tx->sta &&
+ (key = rcu_dereference(tx->sta->ptk[tx->sta->ptk_idx])))
tx->key = key;
else if (ieee80211_is_group_privacy_action(tx->skb) &&
(key = rcu_dereference(tx->sdata->default_multicast_key)))
@@ -651,12 +654,16 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
}
if (unlikely(tx->key && tx->key->flags & KEY_FLAG_TAINTED &&
- !ieee80211_is_deauth(hdr->frame_control)))
+ !ieee80211_is_deauth(hdr->frame_control)) &&
+ tx->skb->protocol != tx->sdata->control_port_protocol)
return TX_DROP;
if (!skip_hw && tx->key &&
tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)
info->control.hw_key = &tx->key->conf;
+ } else if (!ieee80211_is_mgmt(hdr->frame_control) && tx->sta &&
+ test_sta_flag(tx->sta, WLAN_STA_USES_ENCRYPTION)) {
+ return TX_DROP;
}
return TX_CONTINUE;
@@ -1908,19 +1915,24 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata,
/* device xmit handlers */
+enum ieee80211_encrypt {
+ ENCRYPT_NO,
+ ENCRYPT_MGMT,
+ ENCRYPT_DATA,
+};
+
static int ieee80211_skb_resize(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb,
- int head_need, bool may_encrypt)
+ int head_need,
+ enum ieee80211_encrypt encrypt)
{
struct ieee80211_local *local = sdata->local;
- struct ieee80211_hdr *hdr;
bool enc_tailroom;
int tail_need = 0;
- hdr = (struct ieee80211_hdr *) skb->data;
- enc_tailroom = may_encrypt &&
- (sdata->crypto_tx_tailroom_needed_cnt ||
- ieee80211_is_mgmt(hdr->frame_control));
+ enc_tailroom = encrypt == ENCRYPT_MGMT ||
+ (encrypt == ENCRYPT_DATA &&
+ sdata->crypto_tx_tailroom_needed_cnt);
if (enc_tailroom) {
tail_need = IEEE80211_ENCRYPT_TAILROOM;
@@ -1952,23 +1964,29 @@ void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
- struct ieee80211_hdr *hdr;
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
int headroom;
- bool may_encrypt;
+ enum ieee80211_encrypt encrypt;
- may_encrypt = !(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT);
+ if (info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)
+ encrypt = ENCRYPT_NO;
+ else if (ieee80211_is_mgmt(hdr->frame_control))
+ encrypt = ENCRYPT_MGMT;
+ else
+ encrypt = ENCRYPT_DATA;
headroom = local->tx_headroom;
- if (may_encrypt)
+ if (encrypt != ENCRYPT_NO)
headroom += sdata->encrypt_headroom;
headroom -= skb_headroom(skb);
headroom = max_t(int, 0, headroom);
- if (ieee80211_skb_resize(sdata, skb, headroom, may_encrypt)) {
+ if (ieee80211_skb_resize(sdata, skb, headroom, encrypt)) {
ieee80211_free_txskb(&local->hw, skb);
return;
}
+ /* reload after potential resize */
hdr = (struct ieee80211_hdr *) skb->data;
info->control.vif = &sdata->vif;
@@ -2109,7 +2127,11 @@ static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local,
}
vht_mcs = iterator.this_arg[4] >> 4;
+ if (vht_mcs > 11)
+ vht_mcs = 0;
vht_nss = iterator.this_arg[4] & 0xF;
+ if (!vht_nss || vht_nss > 8)
+ vht_nss = 1;
break;
/*
@@ -2751,7 +2773,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
head_need += sdata->encrypt_headroom;
head_need += local->tx_headroom;
head_need = max_t(int, 0, head_need);
- if (ieee80211_skb_resize(sdata, skb, head_need, true)) {
+ if (ieee80211_skb_resize(sdata, skb, head_need, ENCRYPT_DATA)) {
ieee80211_free_txskb(&local->hw, skb);
skb = NULL;
return ERR_PTR(-ENOMEM);
@@ -3125,7 +3147,9 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata,
if (info->control.flags & IEEE80211_TX_CTRL_AMSDU)
return true;
- if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr)))
+ if (!ieee80211_amsdu_realloc_pad(local, skb,
+ sizeof(*amsdu_hdr) +
+ local->hw.extra_tx_headroom))
return false;
data = skb_push(skb, sizeof(*amsdu_hdr));
@@ -3245,6 +3269,14 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head))
goto out;
+ /* If n == 2, the "while (*frag_tail)" loop above didn't execute
+ * and frag_tail should be &skb_shinfo(head)->frag_list.
+ * However, ieee80211_amsdu_prepare_head() can reallocate it.
+ * Reload frag_tail to have it pointing to the correct place.
+ */
+ if (n == 2)
+ frag_tail = &skb_shinfo(head)->frag_list;
+
/*
* Pad out the previous subframe to a multiple of 4 by adding the
* padding to the next one, that's being added. Note that head->len
@@ -3414,7 +3446,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
if (unlikely(ieee80211_skb_resize(sdata, skb,
max_t(int, extra_head + hw_headroom -
skb_headroom(skb), 0),
- false))) {
+ ENCRYPT_NO))) {
kfree_skb(skb);
return true;
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 7fa9871b1db9..9c1a20ca6344 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1102,6 +1102,8 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
elems->max_idle_period_ie = (void *)pos;
break;
case WLAN_EID_EXTENSION:
+ if (!elen)
+ break;
if (pos[0] == WLAN_EID_EXT_HE_MU_EDCA &&
elen >= (sizeof(*elems->mu_edca_param_set) + 1)) {
elems->mu_edca_param_set = (void *)&pos[1];
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 4d154efb80c8..d691c2f2e92e 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -421,12 +421,18 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
* IEEE80211-2016 specification makes higher bandwidth operation
* possible on the TDLS link if the peers have wider bandwidth
* capability.
+ *
+ * However, in this case, and only if the TDLS peer is authorized,
+ * limit to the tdls_chandef so that the configuration here isn't
+ * wider than what's actually requested on the channel context.
*/
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
- test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW))
- return bw;
-
- bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width));
+ test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW) &&
+ test_sta_flag(sta, WLAN_STA_AUTHORIZED) &&
+ sta->tdls_chandef.chan)
+ bw = min(bw, ieee80211_chan_width_to_rx_bw(sta->tdls_chandef.width));
+ else
+ bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width));
return bw;
}
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 5dd48f0a4b1b..b1d2e2b54f85 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -2,6 +2,7 @@
* Copyright 2002-2004, Instant802 Networks, Inc.
* Copyright 2008, Jouni Malinen <j@w1.fi>
* Copyright (C) 2016-2017 Intel Deutschland GmbH
+ * Copyright (C) 2020-2021 Intel Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -170,8 +171,8 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
update_iv:
/* update IV in key information to be able to detect replays */
- rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip_iv32;
- rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip_iv16;
+ rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip.iv32;
+ rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip.iv16;
return RX_CONTINUE;
@@ -297,8 +298,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
key, skb->data + hdrlen,
skb->len - hdrlen, rx->sta->sta.addr,
hdr->addr1, hwaccel, rx->security_idx,
- &rx->tkip_iv32,
- &rx->tkip_iv16);
+ &rx->tkip.iv32,
+ &rx->tkip.iv16);
if (res != TKIP_DECRYPT_OK)
return RX_DROP_UNUSABLE;
@@ -522,6 +523,9 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
return RX_DROP_UNUSABLE;
}
+ /* reload hdr - skb might have been reallocated */
+ hdr = (void *)rx->skb->data;
+
data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len;
if (!rx->sta || data_len < 0)
return RX_DROP_UNUSABLE;
@@ -556,6 +560,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
}
memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN);
+ if (unlikely(ieee80211_is_frag(hdr)))
+ memcpy(rx->ccm_gcm.pn, pn, IEEE80211_CCMP_PN_LEN);
}
/* Remove CCMP header and MIC */
@@ -749,6 +755,9 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
return RX_DROP_UNUSABLE;
}
+ /* reload hdr - skb might have been reallocated */
+ hdr = (void *)rx->skb->data;
+
data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - mic_len;
if (!rx->sta || data_len < 0)
return RX_DROP_UNUSABLE;
@@ -784,6 +793,8 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
}
memcpy(key->u.gcmp.rx_pn[queue], pn, IEEE80211_GCMP_PN_LEN);
+ if (unlikely(ieee80211_is_frag(hdr)))
+ memcpy(rx->ccm_gcm.pn, pn, IEEE80211_CCMP_PN_LEN);
}
/* Remove GCMP header and MIC */
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index bd88a9b80773..8c2aedf3fa74 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -669,6 +669,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
sdata->dev = ndev;
sdata->wpan_dev.wpan_phy = local->hw.phy;
sdata->local = local;
+ INIT_LIST_HEAD(&sdata->wpan_dev.list);
/* setup type-dependent data */
ret = ieee802154_setup_sdata(sdata, type);
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 2fb703d70803..d742e635ad07 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -160,7 +160,7 @@ err_tfm0:
crypto_free_skcipher(key->tfm0);
err_tfm:
for (i = 0; i < ARRAY_SIZE(key->tfm); i++)
- if (key->tfm[i])
+ if (!IS_ERR_OR_NULL(key->tfm[i]))
crypto_free_aead(key->tfm[i]);
kzfree(key);
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index 4dcf6e18563a..dc1a384bc137 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -52,7 +52,7 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata,
switch (mac_cb(skb)->dest.mode) {
case IEEE802154_ADDR_NONE:
- if (mac_cb(skb)->dest.mode != IEEE802154_ADDR_NONE)
+ if (hdr->source.mode != IEEE802154_ADDR_NONE)
/* FIXME: check if we are PAN coordinator */
skb->pkt_type = PACKET_OTHERHOST;
else
@@ -140,7 +140,7 @@ static int
ieee802154_parse_frame_start(struct sk_buff *skb, struct ieee802154_hdr *hdr)
{
int hlen;
- struct ieee802154_mac_cb *cb = mac_cb_init(skb);
+ struct ieee802154_mac_cb *cb = mac_cb(skb);
skb_reset_mac_header(skb);
@@ -302,8 +302,9 @@ void
ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb, u8 lqi)
{
struct ieee802154_local *local = hw_to_local(hw);
+ struct ieee802154_mac_cb *cb = mac_cb_init(skb);
- mac_cb(skb)->lqi = lqi;
+ cb->lqi = lqi;
skb->pkt_type = IEEE802154_RX_MSG;
skb_queue_tail(&local->skb_queue, skb);
tasklet_schedule(&local->tasklet);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 7623d9aec636..40b3e6a52f92 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1375,6 +1375,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev,
free:
kfree(table);
out:
+ mdev->sysctl = NULL;
return -ENOBUFS;
}
@@ -1384,6 +1385,9 @@ static void mpls_dev_sysctl_unregister(struct net_device *dev,
struct net *net = dev_net(dev);
struct ctl_table *table;
+ if (!mdev->sysctl)
+ return;
+
table = mdev->sysctl->ctl_table_arg;
unregister_net_sysctl_table(mdev->sysctl);
kfree(table);
@@ -1438,22 +1442,52 @@ static void mpls_dev_destroy_rcu(struct rcu_head *head)
kfree(mdev);
}
-static void mpls_ifdown(struct net_device *dev, int event)
+static int mpls_ifdown(struct net_device *dev, int event)
{
struct mpls_route __rcu **platform_label;
struct net *net = dev_net(dev);
- u8 alive, deleted;
unsigned index;
platform_label = rtnl_dereference(net->mpls.platform_label);
for (index = 0; index < net->mpls.platform_labels; index++) {
struct mpls_route *rt = rtnl_dereference(platform_label[index]);
+ bool nh_del = false;
+ u8 alive = 0;
if (!rt)
continue;
- alive = 0;
- deleted = 0;
+ if (event == NETDEV_UNREGISTER) {
+ u8 deleted = 0;
+
+ for_nexthops(rt) {
+ struct net_device *nh_dev =
+ rtnl_dereference(nh->nh_dev);
+
+ if (!nh_dev || nh_dev == dev)
+ deleted++;
+ if (nh_dev == dev)
+ nh_del = true;
+ } endfor_nexthops(rt);
+
+ /* if there are no more nexthops, delete the route */
+ if (deleted == rt->rt_nhn) {
+ mpls_route_update(net, index, NULL, NULL);
+ continue;
+ }
+
+ if (nh_del) {
+ size_t size = sizeof(*rt) + rt->rt_nhn *
+ rt->rt_nh_size;
+ struct mpls_route *orig = rt;
+
+ rt = kmalloc(size, GFP_KERNEL);
+ if (!rt)
+ return -ENOMEM;
+ memcpy(rt, orig, size);
+ }
+ }
+
change_nexthops(rt) {
unsigned int nh_flags = nh->nh_flags;
@@ -1477,16 +1511,15 @@ static void mpls_ifdown(struct net_device *dev, int event)
next:
if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)))
alive++;
- if (!rtnl_dereference(nh->nh_dev))
- deleted++;
} endfor_nexthops(rt);
WRITE_ONCE(rt->rt_nhn_alive, alive);
- /* if there are no more nexthops, delete the route */
- if (event == NETDEV_UNREGISTER && deleted == rt->rt_nhn)
- mpls_route_update(net, index, NULL, NULL);
+ if (nh_del)
+ mpls_route_update(net, index, rt, NULL);
}
+
+ return 0;
}
static void mpls_ifup(struct net_device *dev, unsigned int flags)
@@ -1554,8 +1587,12 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
return NOTIFY_OK;
switch (event) {
+ int err;
+
case NETDEV_DOWN:
- mpls_ifdown(dev, event);
+ err = mpls_ifdown(dev, event);
+ if (err)
+ return notifier_from_errno(err);
break;
case NETDEV_UP:
flags = dev_get_flags(dev);
@@ -1566,13 +1603,18 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
break;
case NETDEV_CHANGE:
flags = dev_get_flags(dev);
- if (flags & (IFF_RUNNING | IFF_LOWER_UP))
+ if (flags & (IFF_RUNNING | IFF_LOWER_UP)) {
mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN);
- else
- mpls_ifdown(dev, event);
+ } else {
+ err = mpls_ifdown(dev, event);
+ if (err)
+ return notifier_from_errno(err);
+ }
break;
case NETDEV_UNREGISTER:
- mpls_ifdown(dev, event);
+ err = mpls_ifdown(dev, event);
+ if (err)
+ return notifier_from_errno(err);
mdev = mpls_dev_get(dev);
if (mdev) {
mpls_dev_sysctl_unregister(dev, mdev);
@@ -1583,8 +1625,6 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
case NETDEV_CHANGENAME:
mdev = mpls_dev_get(dev);
if (mdev) {
- int err;
-
mpls_dev_sysctl_unregister(dev, mdev);
err = mpls_dev_sysctl_register(dev, mdev);
if (err)
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index b4da6d8e8632..2129856b5933 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -18,6 +18,7 @@
#include <linux/netdev_features.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
+#include <net/mpls.h>
static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
netdev_features_t features)
@@ -31,6 +32,8 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
skb_reset_network_header(skb);
mpls_hlen = skb_inner_network_header(skb) - skb_network_header(skb);
+ if (unlikely(!mpls_hlen || mpls_hlen % MPLS_HLEN))
+ goto out;
if (unlikely(!pskb_may_pull(skb, mpls_hlen)))
goto out;
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 8055e3965cef..2477caf9c967 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -69,9 +69,12 @@ enum {
};
struct ncsi_channel_version {
- u32 version; /* Supported BCD encoded NCSI version */
- u32 alpha2; /* Supported BCD encoded NCSI version */
- u8 fw_name[12]; /* Firware name string */
+ u8 major; /* NCSI version major */
+ u8 minor; /* NCSI version minor */
+ u8 update; /* NCSI version update */
+ char alpha1; /* NCSI version alpha1 */
+ char alpha2; /* NCSI version alpha2 */
+ u8 fw_name[12]; /* Firmware name string */
u32 fw_version; /* Firmware version */
u16 pci_ids[4]; /* PCI identification */
u32 mf_id; /* Manufacture ID */
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 091284760d21..9fd20fa90000 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -84,13 +84,20 @@ static void ncsi_channel_monitor(struct timer_list *t)
monitor_state = nc->monitor.state;
spin_unlock_irqrestore(&nc->lock, flags);
- if (!enabled || chained) {
- ncsi_stop_channel_monitor(nc);
- return;
- }
+ if (!enabled)
+ return; /* expected race disabling timer */
+ if (WARN_ON_ONCE(chained))
+ goto bad_state;
+
if (state != NCSI_CHANNEL_INACTIVE &&
state != NCSI_CHANNEL_ACTIVE) {
- ncsi_stop_channel_monitor(nc);
+bad_state:
+ netdev_warn(ndp->ndev.dev,
+ "Bad NCSI monitor state channel %d 0x%x %s queue\n",
+ nc->id, state, chained ? "on" : "off");
+ spin_lock_irqsave(&nc->lock, flags);
+ nc->monitor.enabled = false;
+ spin_unlock_irqrestore(&nc->lock, flags);
return;
}
@@ -117,10 +124,9 @@ static void ncsi_channel_monitor(struct timer_list *t)
ndp->flags |= NCSI_DEV_RESHUFFLE;
}
- ncsi_stop_channel_monitor(nc);
-
ncm = &nc->modes[NCSI_MODE_LINK];
spin_lock_irqsave(&nc->lock, flags);
+ nc->monitor.enabled = false;
nc->state = NCSI_CHANNEL_INVISIBLE;
ncm->data[2] &= ~0x1;
spin_unlock_irqrestore(&nc->lock, flags);
@@ -1484,9 +1490,6 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
ndp->ptype.dev = dev;
dev_add_pack(&ndp->ptype);
- /* Set up generic netlink interface */
- ncsi_init_netlink(dev);
-
return nd;
}
EXPORT_SYMBOL_GPL(ncsi_register_dev);
@@ -1566,8 +1569,6 @@ void ncsi_unregister_dev(struct ncsi_dev *nd)
#endif
spin_unlock_irqrestore(&ncsi_dev_lock, flags);
- ncsi_unregister_netlink(nd->dev);
-
kfree(ndp);
}
EXPORT_SYMBOL_GPL(ncsi_unregister_dev);
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index 45f33d6dedf7..d0169bf0fcce 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -71,8 +71,8 @@ static int ncsi_write_channel_info(struct sk_buff *skb,
if (ndp->force_channel == nc)
nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED);
- nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version);
- nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.alpha2);
+ nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.major);
+ nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.minor);
nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name);
vid_nest = nla_nest_start(skb, NCSI_CHANNEL_ATTR_VLAN_LIST);
@@ -397,24 +397,8 @@ static struct genl_family ncsi_genl_family __ro_after_init = {
.n_ops = ARRAY_SIZE(ncsi_ops),
};
-int ncsi_init_netlink(struct net_device *dev)
+static int __init ncsi_init_netlink(void)
{
- int rc;
-
- rc = genl_register_family(&ncsi_genl_family);
- if (rc)
- netdev_err(dev, "ncsi: failed to register netlink family\n");
-
- return rc;
-}
-
-int ncsi_unregister_netlink(struct net_device *dev)
-{
- int rc;
-
- rc = genl_unregister_family(&ncsi_genl_family);
- if (rc)
- netdev_err(dev, "ncsi: failed to unregister netlink family\n");
-
- return rc;
+ return genl_register_family(&ncsi_genl_family);
}
+subsys_initcall(ncsi_init_netlink);
diff --git a/net/ncsi/ncsi-netlink.h b/net/ncsi/ncsi-netlink.h
index 91a5c256f8c4..6c55a6775157 100644
--- a/net/ncsi/ncsi-netlink.h
+++ b/net/ncsi/ncsi-netlink.h
@@ -14,7 +14,4 @@
#include "internal.h"
-int ncsi_init_netlink(struct net_device *dev);
-int ncsi_unregister_netlink(struct net_device *dev);
-
#endif /* __NCSI_NETLINK_H__ */
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index 91b4b66438df..0bf62b4883d4 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -164,9 +164,12 @@ struct ncsi_rsp_gls_pkt {
/* Get Version ID */
struct ncsi_rsp_gvi_pkt {
struct ncsi_rsp_pkt_hdr rsp; /* Response header */
- __be32 ncsi_version; /* NCSI version */
+ unsigned char major; /* NCSI version major */
+ unsigned char minor; /* NCSI version minor */
+ unsigned char update; /* NCSI version update */
+ unsigned char alpha1; /* NCSI version alpha1 */
unsigned char reserved[3]; /* Reserved */
- unsigned char alpha2; /* NCSI version */
+ unsigned char alpha2; /* NCSI version alpha2 */
unsigned char fw_name[12]; /* f/w name string */
__be32 fw_version; /* f/w version */
__be16 pci_ids[4]; /* PCI IDs */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 930c1d3796f0..05dea43bbc66 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -20,6 +20,19 @@
#include "internal.h"
#include "ncsi-pkt.h"
+/* Nibbles within [0xA, 0xF] add zero "0" to the returned value.
+ * Optional fields (encoded as 0xFF) will default to zero.
+ */
+static u8 decode_bcd_u8(u8 x)
+{
+ int lo = x & 0xF;
+ int hi = x >> 4;
+
+ lo = lo < 0xA ? lo : 0;
+ hi = hi < 0xA ? hi : 0;
+ return lo + hi * 10;
+}
+
static int ncsi_validate_rsp_pkt(struct ncsi_request *nr,
unsigned short payload)
{
@@ -611,9 +624,18 @@ static int ncsi_rsp_handler_gvi(struct ncsi_request *nr)
if (!nc)
return -ENODEV;
- /* Update to channel's version info */
+ /* Update channel's version info
+ *
+ * Major, minor, and update fields are supposed to be
+ * unsigned integers encoded as packed BCD.
+ *
+ * Alpha1 and alpha2 are ISO/IEC 8859-1 characters.
+ */
ncv = &nc->version;
- ncv->version = ntohl(rsp->ncsi_version);
+ ncv->major = decode_bcd_u8(rsp->major);
+ ncv->minor = decode_bcd_u8(rsp->minor);
+ ncv->update = decode_bcd_u8(rsp->update);
+ ncv->alpha1 = rsp->alpha1;
ncv->alpha2 = rsp->alpha2;
memcpy(ncv->fw_name, rsp->fw_name, 12);
ncv->fw_version = ntohl(rsp->fw_version);
@@ -949,7 +971,7 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
int payload, i, ret;
/* Find the NCSI device */
- nd = ncsi_find_dev(dev);
+ nd = ncsi_find_dev(orig_dev);
ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL;
if (!ndp)
return -ENODEV;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e0fb56d67d42..92e0514f624f 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -93,7 +93,7 @@ config NF_CONNTRACK_MARK
config NF_CONNTRACK_SECMARK
bool 'Connection tracking security mark support'
depends on NETWORK_SECMARK
- default m if NETFILTER_ADVANCED=n
+ default y if NETFILTER_ADVANCED=n
help
This option enables security markings to be applied to
connections. Typically they are copied to connections from
@@ -117,7 +117,6 @@ config NF_CONNTRACK_ZONES
config NF_CONNTRACK_PROCFS
bool "Supply CT list in procfs (OBSOLETE)"
- default y
depends on PROC_FS
---help---
This option enables for the list of known conntrack entries
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 93aaec3a54ec..0c6540780cb4 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -289,12 +289,6 @@ nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum))
return NULL;
return net->nf.hooks_ipv6 + hooknum;
-#if IS_ENABLED(CONFIG_DECNET)
- case NFPROTO_DECNET:
- if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum))
- return NULL;
- return net->nf.hooks_decnet + hooknum;
-#endif
default:
WARN_ON_ONCE(1);
return NULL;
@@ -335,14 +329,15 @@ static int __nf_register_net_hook(struct net *net, int pf,
p = nf_entry_dereference(*pp);
new_hooks = nf_hook_entries_grow(p, reg);
- if (!IS_ERR(new_hooks))
+ if (!IS_ERR(new_hooks)) {
+ hooks_validate(new_hooks);
rcu_assign_pointer(*pp, new_hooks);
+ }
mutex_unlock(&nf_hook_mutex);
if (IS_ERR(new_hooks))
return PTR_ERR(new_hooks);
- hooks_validate(new_hooks);
#ifdef CONFIG_NETFILTER_INGRESS
if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
net_inc_ingress_queue();
@@ -645,10 +640,6 @@ static int __net_init netfilter_net_init(struct net *net)
#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
__netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge));
#endif
-#if IS_ENABLED(CONFIG_DECNET)
- __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet));
-#endif
-
#ifdef CONFIG_PROC_FS
net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
net->proc_net);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index e3257077158f..1b9df64c6236 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -299,8 +299,8 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -IPSET_ERR_BITMAP_RANGE;
pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask);
- hosts = 2 << (32 - netmask - 1);
- elements = 2 << (netmask - mask_bits - 1);
+ hosts = 2U << (32 - netmask - 1);
+ elements = 2UL << (netmask - mask_bits - 1);
}
if (elements > IPSET_BITMAP_MAX_RANGE + 1)
return -IPSET_ERR_BITMAP_RANGE_SIZE;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 36ebc40a4313..031bb83aed70 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -64,6 +64,8 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
ip_set_dereference((inst)->ip_set_list)[id]
#define ip_set_ref_netlink(inst,id) \
rcu_dereference_raw((inst)->ip_set_list)[id]
+#define ip_set_dereference_nfnl(p) \
+ rcu_dereference_check(p, lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET))
/* The set types are implemented in modules and registered set types
* can be found in ip_set_type_list. Adding/deleting types is
@@ -488,13 +490,14 @@ ip_set_match_extensions(struct ip_set *set, const struct ip_set_ext *ext,
if (SET_WITH_COUNTER(set)) {
struct ip_set_counter *counter = ext_counter(data, set);
+ ip_set_update_counter(counter, ext, flags);
+
if (flags & IPSET_FLAG_MATCH_COUNTERS &&
!(ip_set_match_counter(ip_set_get_packets(counter),
mext->packets, mext->packets_op) &&
ip_set_match_counter(ip_set_get_bytes(counter),
mext->bytes, mext->bytes_op)))
return false;
- ip_set_update_counter(counter, ext, flags);
}
if (SET_WITH_SKBINFO(set))
ip_set_get_skbinfo(ext_skbinfo(data, set),
@@ -551,15 +554,10 @@ __ip_set_put_netlink(struct ip_set *set)
static inline struct ip_set *
ip_set_rcu_get(struct net *net, ip_set_id_t index)
{
- struct ip_set *set;
struct ip_set_net *inst = ip_set_pernet(net);
- rcu_read_lock();
- /* ip_set_list itself needs to be protected */
- set = rcu_dereference(inst->ip_set_list)[index];
- rcu_read_unlock();
-
- return set;
+ /* ip_set_list and the set pointer need to be protected */
+ return ip_set_dereference_nfnl(inst->ip_set_list)[index];
}
int
@@ -790,20 +788,9 @@ static struct nlmsghdr *
start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
enum ipset_cmd cmd)
{
- struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
-
- nlh = nlmsg_put(skb, portid, seq, nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd),
- sizeof(*nfmsg), flags);
- if (!nlh)
- return NULL;
-
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = NFPROTO_IPV4;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
- return nlh;
+ return nfnl_msg_put(skb, portid, seq,
+ nfnl_msg_type(NFNL_SUBSYS_IPSET, cmd), flags,
+ NFPROTO_IPV4, NFNETLINK_V0, 0);
}
/* Create a set */
@@ -1237,6 +1224,9 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
ip_set(inst, to_id) = from;
write_unlock_bh(&ip_set_ref_lock);
+ /* Make sure all readers of the old set pointers are completed. */
+ synchronize_rcu();
+
return 0;
}
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index ddfe06d7530b..b4b474556339 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -104,31 +104,17 @@ htable_size(u8 hbits)
{
size_t hsize;
- /* We must fit both into u32 in jhash and size_t */
+ /* We must fit both into u32 in jhash and INT_MAX in kvmalloc_node() */
if (hbits > 31)
return 0;
hsize = jhash_size(hbits);
- if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *)
+ if ((INT_MAX - sizeof(struct htable)) / sizeof(struct hbucket *)
< hsize)
return 0;
return hsize * sizeof(struct hbucket *) + sizeof(struct htable);
}
-/* Compute htable_bits from the user input parameter hashsize */
-static u8
-htable_bits(u32 hashsize)
-{
- /* Assume that hashsize == 2^htable_bits */
- u8 bits = fls(hashsize - 1);
-
- if (jhash_size(bits) != hashsize)
- /* Round up to the first 2^n value */
- bits = fls(hashsize);
-
- return bits;
-}
-
#ifdef IP_SET_HASH_WITH_NETS
#if IPSET_NET_COUNT > 1
#define __CIDR(cidr, i) (cidr[i])
@@ -1287,7 +1273,11 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set,
if (!h)
return -ENOMEM;
- hbits = htable_bits(hashsize);
+ /* Compute htable_bits from the user input parameter hashsize.
+ * Assume that hashsize == 2^htable_bits,
+ * otherwise round up to the first 2^n value.
+ */
+ hbits = fls(hashsize - 1);
hsize = htable_size(hbits);
if (hsize == 0) {
kfree(h);
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 613e18e720a4..9290a4d7b862 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -39,6 +39,7 @@ MODULE_ALIAS("ip_set_hash:net,port,net");
#define IP_SET_HASH_WITH_PROTO
#define IP_SET_HASH_WITH_NETS
#define IPSET_NET_COUNT 2
+#define IP_SET_HASH_WITH_NET0
/* IPv4 variant */
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 80759aadd3e0..21149f4e0b6e 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -604,13 +604,19 @@ static const struct seq_operations ip_vs_app_seq_ops = {
int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
{
INIT_LIST_HEAD(&ipvs->app_list);
- proc_create_net("ip_vs_app", 0, ipvs->net->proc_net, &ip_vs_app_seq_ops,
- sizeof(struct seq_net_private));
+#ifdef CONFIG_PROC_FS
+ if (!proc_create_net("ip_vs_app", 0, ipvs->net->proc_net,
+ &ip_vs_app_seq_ops,
+ sizeof(struct seq_net_private)))
+ return -ENOMEM;
+#endif
return 0;
}
void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
{
unregister_ip_vs_app(ipvs, NULL /* all */);
+#ifdef CONFIG_PROC_FS
remove_proc_entry("ip_vs_app", ipvs->net->proc_net);
+#endif
}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 5b2b17867cb1..51679d1e2d7d 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1230,8 +1230,8 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
* The drop rate array needs tuning for real environments.
* Called from timer bh only => no locking
*/
- static const char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
- static char todrop_counter[9] = {0};
+ static const signed char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8};
+ static signed char todrop_counter[9] = {0};
int i;
/* if the conn entry hasn't lasted for 60 seconds, don't drop it.
@@ -1378,20 +1378,36 @@ int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs)
{
atomic_set(&ipvs->conn_count, 0);
- proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net,
- &ip_vs_conn_seq_ops, sizeof(struct ip_vs_iter_state));
- proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net,
- &ip_vs_conn_sync_seq_ops,
- sizeof(struct ip_vs_iter_state));
+#ifdef CONFIG_PROC_FS
+ if (!proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net,
+ &ip_vs_conn_seq_ops,
+ sizeof(struct ip_vs_iter_state)))
+ goto err_conn;
+
+ if (!proc_create_net("ip_vs_conn_sync", 0, ipvs->net->proc_net,
+ &ip_vs_conn_sync_seq_ops,
+ sizeof(struct ip_vs_iter_state)))
+ goto err_conn_sync;
+#endif
+
return 0;
+
+#ifdef CONFIG_PROC_FS
+err_conn_sync:
+ remove_proc_entry("ip_vs_conn", ipvs->net->proc_net);
+err_conn:
+ return -ENOMEM;
+#endif
}
void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs)
{
/* flush all the connection entries first */
ip_vs_conn_flush(ipvs);
+#ifdef CONFIG_PROC_FS
remove_proc_entry("ip_vs_conn", ipvs->net->proc_net);
remove_proc_entry("ip_vs_conn_sync", ipvs->net->proc_net);
+#endif
}
int __init ip_vs_conn_init(void)
@@ -1399,6 +1415,10 @@ int __init ip_vs_conn_init(void)
int idx;
/* Compute size and mask */
+ if (ip_vs_conn_tab_bits < 8 || ip_vs_conn_tab_bits > 20) {
+ pr_info("conn_tab_bits not in [8, 20]. Using default value\n");
+ ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
+ }
ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;
@@ -1422,7 +1442,7 @@ int __init ip_vs_conn_init(void)
pr_info("Connection hash table configured "
"(size=%d, memory=%ldKbytes)\n",
ip_vs_conn_tab_size,
- (long)(ip_vs_conn_tab_size*sizeof(struct list_head))/1024);
+ (long)(ip_vs_conn_tab_size*sizeof(*ip_vs_conn_tab))/1024);
IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n",
sizeof(struct ip_vs_conn));
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index d5e4329579e2..f20b08db9fe9 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -725,12 +725,12 @@ static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af,
struct dst_entry *dst = skb_dst(skb);
if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) &&
- ip6_route_me_harder(ipvs->net, skb) != 0)
+ ip6_route_me_harder(ipvs->net, skb->sk, skb) != 0)
return 1;
} else
#endif
if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
- ip_route_me_harder(ipvs->net, skb, RTN_LOCAL) != 0)
+ ip_route_me_harder(ipvs->net, skb->sk, skb, RTN_LOCAL) != 0)
return 1;
return 0;
@@ -1850,7 +1850,6 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
int ret, pkts;
- int conn_reuse_mode;
struct sock *sk;
/* Already marked as IPVS request or reply? */
@@ -1926,15 +1925,16 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
*/
cp = pp->conn_in_get(ipvs, af, skb, &iph);
- conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
- if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+ if (!iph.fragoffs && is_new_conn(skb, &iph) && cp) {
+ int conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
bool old_ct = false, resched = false;
if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest &&
unlikely(!atomic_read(&cp->dest->weight))) {
resched = true;
old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
- } else if (is_new_conn_expected(cp, conn_reuse_mode)) {
+ } else if (conn_reuse_mode &&
+ is_new_conn_expected(cp, conn_reuse_mode)) {
old_ct = ip_vs_conn_uses_old_conntrack(cp, skb);
if (!atomic_read(&cp->n_control)) {
resched = true;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c339b5e386b7..0909f32eabfd 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1269,7 +1269,7 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
svc->port = u->port;
svc->fwmark = u->fwmark;
- svc->flags = u->flags;
+ svc->flags = u->flags & ~IP_VS_SVC_F_HASHED;
svc->timeout = u->timeout * HZ;
svc->netmask = u->netmask;
svc->ipvs = ipvs;
@@ -1656,6 +1656,7 @@ static int ip_vs_zero_all(struct netns_ipvs *ipvs)
#ifdef CONFIG_SYSCTL
static int zero;
+static int one = 1;
static int three = 3;
static int
@@ -1667,12 +1668,18 @@ proc_do_defense_mode(struct ctl_table *table, int write,
int val = *valp;
int rc;
- rc = proc_dointvec(table, write, buffer, lenp, ppos);
+ struct ctl_table tmp = {
+ .data = &val,
+ .maxlen = sizeof(int),
+ .mode = table->mode,
+ };
+
+ rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
if (write && (*valp != val)) {
- if ((*valp < 0) || (*valp > 3)) {
- /* Restore the correct value */
- *valp = val;
+ if (val < 0 || val > 3) {
+ rc = -EINVAL;
} else {
+ *valp = val;
update_defense_level(ipvs);
}
}
@@ -1683,37 +1690,27 @@ static int
proc_do_sync_threshold(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
+ struct netns_ipvs *ipvs = table->extra2;
int *valp = table->data;
int val[2];
int rc;
+ struct ctl_table tmp = {
+ .data = &val,
+ .maxlen = table->maxlen,
+ .mode = table->mode,
+ };
- /* backup the value first */
+ mutex_lock(&ipvs->sync_mutex);
memcpy(val, valp, sizeof(val));
-
- rc = proc_dointvec(table, write, buffer, lenp, ppos);
- if (write && (valp[0] < 0 || valp[1] < 0 ||
- (valp[0] >= valp[1] && valp[1]))) {
- /* Restore the correct value */
- memcpy(valp, val, sizeof(val));
- }
- return rc;
-}
-
-static int
-proc_do_sync_mode(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- int *valp = table->data;
- int val = *valp;
- int rc;
-
- rc = proc_dointvec(table, write, buffer, lenp, ppos);
- if (write && (*valp != val)) {
- if ((*valp < 0) || (*valp > 1)) {
- /* Restore the correct value */
- *valp = val;
- }
+ rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+ if (write) {
+ if (val[0] < 0 || val[1] < 0 ||
+ (val[0] >= val[1] && val[1]))
+ rc = -EINVAL;
+ else
+ memcpy(valp, val, sizeof(val));
}
+ mutex_unlock(&ipvs->sync_mutex);
return rc;
}
@@ -1725,12 +1722,18 @@ proc_do_sync_ports(struct ctl_table *table, int write,
int val = *valp;
int rc;
- rc = proc_dointvec(table, write, buffer, lenp, ppos);
+ struct ctl_table tmp = {
+ .data = &val,
+ .maxlen = sizeof(int),
+ .mode = table->mode,
+ };
+
+ rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
if (write && (*valp != val)) {
- if (*valp < 1 || !is_power_of_2(*valp)) {
- /* Restore the correct value */
+ if (val < 1 || !is_power_of_2(val))
+ rc = -EINVAL;
+ else
*valp = val;
- }
}
return rc;
}
@@ -1790,7 +1793,9 @@ static struct ctl_table vs_vars[] = {
.procname = "sync_version",
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_do_sync_mode,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
},
{
.procname = "sync_ports",
@@ -2393,6 +2398,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
/* Set timeout values for (tcp tcpfin udp) */
ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg);
goto out_unlock;
+ } else if (!len) {
+ /* No more commands with len == 0 below */
+ ret = -EINVAL;
+ goto out_unlock;
}
usvc_compat = (struct ip_vs_service_user *)arg;
@@ -2469,9 +2478,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
break;
case IP_VS_SO_SET_DELDEST:
ret = ip_vs_del_dest(svc, &udest);
- break;
- default:
- ret = -EINVAL;
}
out_unlock:
@@ -3941,6 +3947,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
tbl[idx].data = &ipvs->sysctl_sync_threshold;
+ tbl[idx].extra2 = ipvs;
tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
@@ -3954,6 +3961,11 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
+#ifdef CONFIG_IP_VS_DEBUG
+ /* Global sysctls must be ro in non-init netns */
+ if (!net_eq(net, &init_net))
+ tbl[idx++].mode = 0444;
+#endif
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
if (ipvs->sysctl_hdr == NULL) {
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index f6af13c16cf5..c133ce825c2d 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1444,7 +1444,7 @@ static int bind_mcastif_addr(struct socket *sock, struct net_device *dev)
sin.sin_addr.s_addr = addr;
sin.sin_port = 0;
- return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin));
+ return kernel_bind(sock, (struct sockaddr *)&sin, sizeof(sin));
}
static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
@@ -1510,8 +1510,8 @@ static int make_send_sock(struct netns_ipvs *ipvs, int id,
}
get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->mcfg, id);
- result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
- salen, 0);
+ result = kernel_connect(sock, (struct sockaddr *)&mcast_addr,
+ salen, 0);
if (result < 0) {
pr_err("Error connecting to the multicast addr\n");
goto error;
@@ -1551,7 +1551,7 @@ static int make_receive_sock(struct netns_ipvs *ipvs, int id,
get_mcast_sockaddr(&mcast_addr, &salen, &ipvs->bcfg, id);
sock->sk->sk_bound_dev_if = dev->ifindex;
- result = sock->ops->bind(sock, (struct sockaddr *)&mcast_addr, salen);
+ result = kernel_bind(sock, (struct sockaddr *)&mcast_addr, salen);
if (result < 0) {
pr_err("Error binding to the multicast addr\n");
goto error;
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 3f75cd947045..e47d1a29c140 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -272,7 +272,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs,
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_TIME_EXCEED,
ICMPV6_EXC_HOPLIMIT, 0);
- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
return false;
}
@@ -287,7 +287,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs,
{
if (ip_hdr(skb)->ttl <= 1) {
/* Tell the sender its packet died... */
- __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
+ IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
return false;
}
@@ -586,6 +586,8 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
if (ret == NF_ACCEPT) {
nf_reset(skb);
skb_forward_csum(skb);
+ if (skb->dev)
+ skb->tstamp = 0;
}
return ret;
}
@@ -626,6 +628,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
if (!local) {
skb_forward_csum(skb);
+ if (skb->dev)
+ skb->tstamp = 0;
NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output);
} else
@@ -646,6 +650,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
if (!local) {
ip_vs_drop_early_demux_sk(skb);
skb_forward_csum(skb);
+ if (skb->dev)
+ skb->tstamp = 0;
NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
NULL, skb_dst(skb)->dev, dst_output);
} else
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ad1da6b2fb60..a38caf317dbb 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -70,10 +70,9 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash);
struct conntrack_gc_work {
struct delayed_work dwork;
- u32 last_bucket;
+ u32 next_bucket;
bool exiting;
bool early_drop;
- long next_gc_run;
};
static __read_mostly struct kmem_cache *nf_conntrack_cachep;
@@ -81,12 +80,8 @@ static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
-/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
-#define GC_MAX_BUCKETS_DIV 128u
-/* upper bound of full table scan */
-#define GC_MAX_SCAN_JIFFIES (16u * HZ)
-/* desired ratio of entries found to be expired */
-#define GC_EVICT_RATIO 50u
+#define GC_SCAN_INTERVAL (120u * HZ)
+#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
static struct conntrack_gc_work conntrack_gc_work;
@@ -630,8 +625,13 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report)
return false;
tstamp = nf_conn_tstamp_find(ct);
- if (tstamp && tstamp->stop == 0)
+ if (tstamp) {
+ s32 timeout = ct->timeout - nfct_time_stamp;
+
tstamp->stop = ktime_get_real_ns();
+ if (timeout < 0)
+ tstamp->stop -= jiffies_to_nsecs(-timeout);
+ }
if (nf_conntrack_event_report(IPCT_DESTROY, ct,
portid, report) < 0) {
@@ -1063,7 +1063,8 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
* Let nf_ct_resolve_clash() deal with this later.
*/
if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple))
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) &&
+ nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL))
continue;
NF_CT_STAT_INC_ATOMIC(net, found);
@@ -1192,17 +1193,13 @@ static void nf_ct_offload_timeout(struct nf_conn *ct)
static void gc_worker(struct work_struct *work)
{
- unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
- unsigned int i, goal, buckets = 0, expired_count = 0;
- unsigned int nf_conntrack_max95 = 0;
+ unsigned long end_time = jiffies + GC_SCAN_MAX_DURATION;
+ unsigned int i, hashsz, nf_conntrack_max95 = 0;
+ unsigned long next_run = GC_SCAN_INTERVAL;
struct conntrack_gc_work *gc_work;
- unsigned int ratio, scanned = 0;
- unsigned long next_run;
-
gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
- goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
- i = gc_work->last_bucket;
+ i = gc_work->next_bucket;
if (gc_work->early_drop)
nf_conntrack_max95 = nf_conntrack_max / 100u * 95u;
@@ -1210,22 +1207,21 @@ static void gc_worker(struct work_struct *work)
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_head *ct_hash;
struct hlist_nulls_node *n;
- unsigned int hashsz;
struct nf_conn *tmp;
- i++;
rcu_read_lock();
nf_conntrack_get_ht(&ct_hash, &hashsz);
- if (i >= hashsz)
- i = 0;
+ if (i >= hashsz) {
+ rcu_read_unlock();
+ break;
+ }
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
struct net *net;
tmp = nf_ct_tuplehash_to_ctrack(h);
- scanned++;
if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
nf_ct_offload_timeout(tmp);
continue;
@@ -1233,7 +1229,6 @@ static void gc_worker(struct work_struct *work)
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
- expired_count++;
continue;
}
@@ -1265,7 +1260,14 @@ static void gc_worker(struct work_struct *work)
*/
rcu_read_unlock();
cond_resched();
- } while (++buckets < goal);
+ i++;
+
+ if (time_after(jiffies, end_time) && i < hashsz) {
+ gc_work->next_bucket = i;
+ next_run = 0;
+ break;
+ }
+ } while (i < hashsz);
if (gc_work->exiting)
return;
@@ -1276,40 +1278,17 @@ static void gc_worker(struct work_struct *work)
*
* This worker is only here to reap expired entries when system went
* idle after a busy period.
- *
- * The heuristics below are supposed to balance conflicting goals:
- *
- * 1. Minimize time until we notice a stale entry
- * 2. Maximize scan intervals to not waste cycles
- *
- * Normally, expire ratio will be close to 0.
- *
- * As soon as a sizeable fraction of the entries have expired
- * increase scan frequency.
*/
- ratio = scanned ? expired_count * 100 / scanned : 0;
- if (ratio > GC_EVICT_RATIO) {
- gc_work->next_gc_run = min_interval;
- } else {
- unsigned int max = GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV;
-
- BUILD_BUG_ON((GC_MAX_SCAN_JIFFIES / GC_MAX_BUCKETS_DIV) == 0);
-
- gc_work->next_gc_run += min_interval;
- if (gc_work->next_gc_run > max)
- gc_work->next_gc_run = max;
+ if (next_run) {
+ gc_work->early_drop = false;
+ gc_work->next_bucket = 0;
}
-
- next_run = gc_work->next_gc_run;
- gc_work->last_bucket = i;
- gc_work->early_drop = false;
queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run);
}
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
- gc_work->next_gc_run = HZ;
gc_work->exiting = false;
}
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index efc14c7b4f8e..c2fece0593ea 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -383,7 +383,7 @@ static int help(struct sk_buff *skb,
int ret;
u32 seq;
int dir = CTINFO2DIR(ctinfo);
- unsigned int uninitialized_var(matchlen), uninitialized_var(matchoff);
+ unsigned int matchlen, matchoff;
struct nf_ct_ftp_master *ct_ftp_info = nfct_help_data(ct);
struct nf_conntrack_expect *exp;
union nf_inet_addr *daddr;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index e24b762ffa1d..06c70d4584cf 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -400,6 +400,9 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES);
BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1);
+ if (!nf_ct_helper_hash)
+ return -ENOENT;
+
if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
return -EINVAL;
@@ -570,4 +573,5 @@ void nf_conntrack_helper_fini(void)
{
nf_ct_extend_unregister(&helper_extend);
kvfree(nf_ct_helper_hash);
+ nf_ct_helper_hash = NULL;
}
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 4099f4d79bae..23ead02c6aa5 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -150,15 +150,37 @@ static int help(struct sk_buff *skb, unsigned int protoff,
data = ib_ptr;
data_limit = ib_ptr + skb->len - dataoff;
- /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
- * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
- while (data < data_limit - (19 + MINMATCHLEN)) {
- if (memcmp(data, "\1DCC ", 5)) {
+ /* Skip any whitespace */
+ while (data < data_limit - 10) {
+ if (*data == ' ' || *data == '\r' || *data == '\n')
+ data++;
+ else
+ break;
+ }
+
+ /* strlen("PRIVMSG x ")=10 */
+ if (data < data_limit - 10) {
+ if (strncasecmp("PRIVMSG ", data, 8))
+ goto out;
+ data += 8;
+ }
+
+ /* strlen(" :\1DCC SENT t AAAAAAAA P\1\n")=26
+ * 7+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=26
+ */
+ while (data < data_limit - (21 + MINMATCHLEN)) {
+ /* Find first " :", the start of message */
+ if (memcmp(data, " :", 2)) {
data++;
continue;
}
+ data += 2;
+
+ /* then check that place only for the DCC command */
+ if (memcmp(data, "\1DCC ", 5))
+ goto out;
data += 5;
- /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
+ /* we have at least (21+MINMATCHLEN)-(2+5) bytes valid data left */
iph = ip_hdr(skb);
pr_debug("DCC found in master %pI4:%u %pI4:%u\n",
@@ -174,7 +196,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
pr_debug("DCC %s detected\n", dccprotos[i]);
/* we have at least
- * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid
+ * (21+MINMATCHLEN)-7-dccprotos[i].matchlen bytes valid
* data left (== 14/13 bytes) */
if (parse_dcc(data, data_limit, &dcc_ip,
&dcc_port, &addr_beg_p, &addr_end_p)) {
@@ -187,8 +209,9 @@ static int help(struct sk_buff *skb, unsigned int protoff,
/* dcc_ip can be the internal OR external (NAT'ed) IP */
tuple = &ct->tuplehash[dir].tuple;
- if (tuple->src.u3.ip != dcc_ip &&
- tuple->dst.u3.ip != dcc_ip) {
+ if ((tuple->src.u3.ip != dcc_ip &&
+ ct->tuplehash[!dir].tuple.dst.u3.ip != dcc_ip) ||
+ dcc_port == 0) {
net_warn_ratelimited("Forged DCC command from %pI4: %pI4:%u\n",
&tuple->src.u3.ip,
&dcc_ip, dcc_port);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 0b89609a6e9d..83e8566ec3f0 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -213,6 +213,7 @@ static int ctnetlink_dump_helpinfo(struct sk_buff *skb,
if (!help)
return 0;
+ rcu_read_lock();
helper = rcu_dereference(help->helper);
if (!helper)
goto out;
@@ -228,9 +229,11 @@ static int ctnetlink_dump_helpinfo(struct sk_buff *skb,
nla_nest_end(skb, nest_helper);
out:
+ rcu_read_unlock();
return 0;
nla_put_failure:
+ rcu_read_unlock();
return -1;
}
@@ -514,20 +517,15 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
{
const struct nf_conntrack_zone *zone;
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
unsigned int flags = portid ? NLM_F_MULTI : 0, event;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_NEW);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, nf_ct_l3num(ct),
+ NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = nf_ct_l3num(ct);
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
zone = nf_ct_zone(ct);
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
@@ -684,7 +682,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
const struct nf_conntrack_zone *zone;
struct net *net;
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
struct nf_conn *ct = item->ct;
struct sk_buff *skb;
@@ -714,15 +711,11 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
goto errout;
type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, type);
- nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, item->portid, 0, type, flags, nf_ct_l3num(ct),
+ NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = nf_ct_l3num(ct);
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
zone = nf_ct_zone(ct);
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
@@ -1213,9 +1206,6 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
{
- if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
- return 0;
-
return ctnetlink_filter_match(ct, data);
}
@@ -1277,11 +1267,6 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl,
ct = nf_ct_tuplehash_to_ctrack(h);
- if (test_bit(IPS_OFFLOAD_BIT, &ct->status)) {
- nf_ct_put(ct);
- return -EBUSY;
- }
-
if (cda[CTA_ID]) {
__be32 id = nla_get_be32(cda[CTA_ID]);
@@ -2053,12 +2038,15 @@ ctnetlink_create_conntrack(struct net *net,
err = nf_conntrack_hash_check_insert(ct);
if (err < 0)
- goto err2;
+ goto err3;
rcu_read_unlock();
return ct;
+err3:
+ if (ct->master)
+ nf_ct_put(ct->master);
err2:
rcu_read_unlock();
err1:
@@ -2172,20 +2160,15 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
__u16 cpu, const struct ip_conntrack_stat *st)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0, event;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK,
IPCTNL_MSG_CT_GET_STATS_CPU);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+ NFNETLINK_V0, htons(cpu));
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = AF_UNSPEC;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(cpu);
-
if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) ||
nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) ||
nla_put_be32(skb, CTA_STATS_IGNORE, htonl(st->ignore)) ||
@@ -2256,20 +2239,15 @@ ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
struct net *net)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0, event;
unsigned int nr_conntracks = atomic_read(&net->ct.count);
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_GET_STATS);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+ NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = AF_UNSPEC;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
if (nla_put_be32(skb, CTA_STATS_GLOBAL_ENTRIES, htonl(nr_conntracks)))
goto nla_put_failure;
@@ -2656,6 +2634,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
memset(&m, 0xFF, sizeof(m));
memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3));
m.src.u.all = mask->src.u.all;
+ m.src.l3num = tuple->src.l3num;
m.dst.protonum = tuple->dst.protonum;
nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK | NLA_F_NESTED);
@@ -2682,7 +2661,9 @@ nla_put_failure:
return -1;
}
+#if IS_ENABLED(CONFIG_NF_NAT)
static const union nf_inet_addr any_addr;
+#endif
static __be32 nf_expect_get_id(const struct nf_conntrack_expect *exp)
{
@@ -2779,19 +2760,14 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
int event, const struct nf_conntrack_expect *exp)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags,
+ exp->tuple.src.l3num, NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = exp->tuple.src.l3num;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
if (ctnetlink_exp_dump_expect(skb, exp) < 0)
goto nla_put_failure;
@@ -2811,7 +2787,6 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
struct nf_conntrack_expect *exp = item->exp;
struct net *net = nf_ct_exp_net(exp);
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
struct sk_buff *skb;
unsigned int type, group;
int flags = 0;
@@ -2834,15 +2809,11 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
goto errout;
type = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_EXP, type);
- nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, item->portid, 0, type, flags,
+ exp->tuple.src.l3num, NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = exp->tuple.src.l3num;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
if (ctnetlink_exp_dump_expect(skb, exp) < 0)
goto nla_put_failure;
@@ -3182,10 +3153,12 @@ ctnetlink_change_expect(struct nf_conntrack_expect *x,
return 0;
}
+#if IS_ENABLED(CONFIG_NF_NAT)
static const struct nla_policy exp_nat_nla_policy[CTA_EXPECT_NAT_MAX+1] = {
[CTA_EXPECT_NAT_DIR] = { .type = NLA_U32 },
[CTA_EXPECT_NAT_TUPLE] = { .type = NLA_NESTED },
};
+#endif
static int
ctnetlink_parse_expect_nat(const struct nlattr *attr,
@@ -3410,20 +3383,15 @@ ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int cpu,
const struct ip_conntrack_stat *st)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0, event;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK,
IPCTNL_MSG_EXP_GET_STATS_CPU);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+ NFNETLINK_V0, htons(cpu));
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = AF_UNSPEC;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(cpu);
-
if (nla_put_be32(skb, CTA_STATS_EXP_NEW, htonl(st->expect_new)) ||
nla_put_be32(skb, CTA_STATS_EXP_CREATE, htonl(st->expect_create)) ||
nla_put_be32(skb, CTA_STATS_EXP_DELETE, htonl(st->expect_delete)))
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index a937d4f75613..8453e92936ac 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -58,8 +58,8 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
[SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
[SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
[SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS,
- [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
- [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
+ [SCTP_CONNTRACK_SHUTDOWN_SENT] = 3 SECS,
+ [SCTP_CONNTRACK_SHUTDOWN_RECD] = 3 SECS,
[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
[SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS,
[SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS,
@@ -119,7 +119,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */
-/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA},
+/* init */ {sCW, sCW, sCW, sCE, sES, sCL, sCL, sSA, sCW, sHA},
/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},
/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS},
@@ -317,22 +317,29 @@ static int sctp_packet(struct nf_conn *ct,
for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
/* Special cases of Verification tag check (Sec 8.5.1) */
if (sch->type == SCTP_CID_INIT) {
- /* Sec 8.5.1 (A) */
+ /* (A) vtag MUST be zero */
if (sh->vtag != 0)
goto out_unlock;
} else if (sch->type == SCTP_CID_ABORT) {
- /* Sec 8.5.1 (B) */
- if (sh->vtag != ct->proto.sctp.vtag[dir] &&
- sh->vtag != ct->proto.sctp.vtag[!dir])
+ /* (B) vtag MUST match own vtag if T flag is unset OR
+ * MUST match peer's vtag if T flag is set
+ */
+ if ((!(sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[dir]) ||
+ ((sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[!dir]))
goto out_unlock;
} else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
- /* Sec 8.5.1 (C) */
- if (sh->vtag != ct->proto.sctp.vtag[dir] &&
- sh->vtag != ct->proto.sctp.vtag[!dir] &&
- sch->flags & SCTP_CHUNK_FLAG_T)
+ /* (C) vtag MUST match own vtag if T flag is unset OR
+ * MUST match peer's vtag if T flag is set
+ */
+ if ((!(sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[dir]) ||
+ ((sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[!dir]))
goto out_unlock;
} else if (sch->type == SCTP_CID_COOKIE_ECHO) {
- /* Sec 8.5.1 (D) */
+ /* (D) vtag must be same as init_vtag as found in INIT_ACK */
if (sh->vtag != ct->proto.sctp.vtag[dir])
goto out_unlock;
} else if (sch->type == SCTP_CID_HEARTBEAT) {
@@ -394,6 +401,15 @@ static int sctp_packet(struct nf_conn *ct,
pr_debug("Setting vtag %x for dir %d\n",
ih->init_tag, !dir);
ct->proto.sctp.vtag[!dir] = ih->init_tag;
+
+ /* don't renew timeout on init retransmit so
+ * port reuse by client or NAT middlebox cannot
+ * keep entry alive indefinitely (incl. nat info).
+ */
+ if (new_state == SCTP_CONNTRACK_CLOSED &&
+ old_state == SCTP_CONNTRACK_CLOSED &&
+ nf_ct_is_confirmed(ct))
+ ignore = true;
}
ct->proto.sctp.state = new_state;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 7011ab27c437..955b73a9a05e 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -362,8 +362,8 @@ static void tcp_options(const struct sk_buff *skb,
length, buff);
BUG_ON(ptr == NULL);
- state->td_scale =
- state->flags = 0;
+ state->td_scale = 0;
+ state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL;
while (length > 0) {
int opcode=*ptr++;
@@ -549,13 +549,20 @@ static bool tcp_in_window(const struct nf_conn *ct,
swin = win << sender->td_scale;
sender->td_maxwin = (swin == 0 ? 1 : swin);
sender->td_maxend = end + sender->td_maxwin;
- /*
- * We haven't seen traffic in the other direction yet
- * but we have to tweak window tracking to pass III
- * and IV until that happens.
- */
- if (receiver->td_maxwin == 0)
+ if (receiver->td_maxwin == 0) {
+ /* We haven't seen traffic in the other
+ * direction yet but we have to tweak window
+ * tracking to pass III and IV until that
+ * happens.
+ */
receiver->td_end = receiver->td_maxend = sack;
+ } else if (sack == receiver->td_end + 1) {
+ /* Likely a reply to a keepalive.
+ * Needed for III.
+ */
+ receiver->td_end++;
+ }
+
}
} else if (((state->state == TCP_CONNTRACK_SYN_SENT
&& dir == IP_CT_DIR_ORIGINAL)
@@ -777,6 +784,16 @@ static bool nf_conntrack_tcp_established(const struct nf_conn *ct)
test_bit(IPS_ASSURED_BIT, &ct->status);
}
+static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state)
+{
+ state->td_end = 0;
+ state->td_maxend = 0;
+ state->td_maxwin = 0;
+ state->td_maxack = 0;
+ state->td_scale = 0;
+ state->flags &= IP_CT_TCP_FLAG_BE_LIBERAL;
+}
+
/* Returns verdict for packet, or -1 for invalid. */
static int tcp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
@@ -875,8 +892,7 @@ static int tcp_packet(struct nf_conn *ct,
ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
ct->proto.tcp.last_flags;
- memset(&ct->proto.tcp.seen[dir], 0,
- sizeof(struct ip_ct_tcp_state));
+ nf_ct_tcp_state_reset(&ct->proto.tcp.seen[dir]);
break;
}
ct->proto.tcp.last_index = index;
@@ -1078,6 +1094,16 @@ static int tcp_packet(struct nf_conn *ct,
nf_ct_kill_acct(ct, ctinfo, skb);
return NF_ACCEPT;
}
+
+ if (index == TCP_SYN_SET && old_state == TCP_CONNTRACK_SYN_SENT) {
+ /* do not renew timeout on SYN retransmit.
+ *
+ * Else port reuse by client or NAT middlebox can keep
+ * entry alive indefinitely (including nat info).
+ */
+ return NF_ACCEPT;
+ }
+
/* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
* pickup with loose=1. Avoid large ESTABLISHED timeout.
*/
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index c8d2b6688a2a..d16aa43ebd4d 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -471,7 +471,7 @@ static int ct_sip_walk_headers(const struct nf_conn *ct, const char *dptr,
return ret;
if (ret == 0)
break;
- dataoff += *matchoff;
+ dataoff = *matchoff;
}
*in_header = 0;
}
@@ -483,7 +483,7 @@ static int ct_sip_walk_headers(const struct nf_conn *ct, const char *dptr,
break;
if (ret == 0)
return ret;
- dataoff += *matchoff;
+ dataoff = *matchoff;
}
if (in_header)
@@ -605,7 +605,7 @@ int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr,
start += strlen(name);
*val = simple_strtoul(start, &end, 0);
if (start == end)
- return 0;
+ return -1;
if (matchoff && matchlen) {
*matchoff = start - dptr;
*matchlen = end - start;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 4c94f3ba2ae4..da0c9fa381d2 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -262,6 +262,7 @@ static const char* l4proto_name(u16 proto)
case IPPROTO_GRE: return "gre";
case IPPROTO_SCTP: return "sctp";
case IPPROTO_UDPLITE: return "udplite";
+ case IPPROTO_ICMPV6: return "icmpv6";
}
return "unknown";
@@ -500,6 +501,9 @@ nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
{
int ret;
+ /* module_param hashsize could have changed value */
+ nf_conntrack_htable_size_user = nf_conntrack_htable_size;
+
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (ret < 0 || !write)
return ret;
@@ -590,8 +594,11 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
if (net->user_ns != &init_user_ns)
table[0].procname = NULL;
- if (!net_eq(&init_net, net))
+ if (!net_eq(&init_net, net)) {
+ table[0].mode = 0444;
table[2].mode = 0444;
+ table[5].mode = 0444;
+ }
net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table);
if (!net->ct.sysctl_header)
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index f4a566e67213..98d117f3340c 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -21,6 +21,7 @@ static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev)
skb_push(skb, skb->mac_len);
skb->dev = dev;
+ skb->tstamp = 0;
dev_queue_xmit(skb);
}
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 890799c16aa4..b3957fe7eced 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -360,7 +360,7 @@ static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
return -1;
tcph = (void *)(skb_network_header(skb) + thoff);
- inet_proto_csum_replace2(&tcph->check, skb, port, new_port, true);
+ inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
return 0;
}
@@ -377,7 +377,7 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
udph = (void *)(skb_network_header(skb) + thoff);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace2(&udph->check, skb, port,
- new_port, true);
+ new_port, false);
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index a8c5c846aec1..b164a0e1e053 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -176,6 +176,18 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
}
EXPORT_SYMBOL_GPL(nf_log_dump_packet_common);
+void nf_log_dump_vlan(struct nf_log_buf *m, const struct sk_buff *skb)
+{
+ u16 vid;
+
+ if (!skb_vlan_tag_present(skb))
+ return;
+
+ vid = skb_vlan_tag_get(skb);
+ nf_log_buf_add(m, "VPROTO=%04x VID=%u ", ntohs(skb->vlan_proto), vid);
+}
+EXPORT_SYMBOL_GPL(nf_log_dump_vlan);
+
/* bridge and netdev logging families share this code. */
void nf_log_l2packet(struct net *net, u_int8_t pf,
__be16 protocol,
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 2268b10a9dcf..c31df6a76504 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -1068,6 +1068,7 @@ static int __init nf_nat_init(void)
ret = register_pernet_subsys(&nat_net_ops);
if (ret < 0) {
nf_ct_extend_unregister(&nat_extend);
+ kvfree(nf_nat_bysource);
return ret;
}
diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c
index 5d849d835561..234f535d350e 100644
--- a/net/netfilter/nf_nat_proto_common.c
+++ b/net/netfilter/nf_nat_proto_common.c
@@ -38,12 +38,12 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range,
enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct,
- u16 *rover)
+ const struct nf_conn *ct)
{
- unsigned int range_size, min, max, i;
+ unsigned int range_size, min, max, i, attempts;
__be16 *portptr;
- u_int16_t off;
+ u16 off;
+ static const unsigned int max_attempts = 128;
if (maniptype == NF_NAT_MANIP_SRC)
portptr = &tuple->src.u.all;
@@ -86,18 +86,31 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
} else if (range->flags & NF_NAT_RANGE_PROTO_OFFSET) {
off = (ntohs(*portptr) - ntohs(range->base_proto.all));
} else {
- off = *rover;
+ off = prandom_u32();
}
- for (i = 0; ; ++off) {
+ attempts = range_size;
+ if (attempts > max_attempts)
+ attempts = max_attempts;
+
+ /* We are in softirq; doing a search of the entire range risks
+ * soft lockup when all tuples are already used.
+ *
+ * If we can't find any free port from first offset, pick a new
+ * one and try again, with ever smaller search window.
+ */
+another_round:
+ for (i = 0; i < attempts; i++, off++) {
*portptr = htons(min + off % range_size);
- if (++i != range_size && nf_nat_used_tuple(tuple, ct))
- continue;
- if (!(range->flags & (NF_NAT_RANGE_PROTO_RANDOM_ALL|
- NF_NAT_RANGE_PROTO_OFFSET)))
- *rover = off;
- return;
+ if (!nf_nat_used_tuple(tuple, ct))
+ return;
}
+
+ if (attempts >= range_size || attempts < 16)
+ return;
+ attempts /= 2;
+ off = prandom_u32();
+ goto another_round;
}
EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple);
diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c
index 67ea0d83aa5a..7d4d2c124990 100644
--- a/net/netfilter/nf_nat_proto_dccp.c
+++ b/net/netfilter/nf_nat_proto_dccp.c
@@ -18,8 +18,6 @@
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/netfilter/nf_nat_l4proto.h>
-static u_int16_t dccp_port_rover;
-
static void
dccp_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
@@ -27,8 +25,7 @@ dccp_unique_tuple(const struct nf_nat_l3proto *l3proto,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
- nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
- &dccp_port_rover);
+ nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct);
}
static bool
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
index 1c5d9b65fbba..f05ad8fa7b20 100644
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ b/net/netfilter/nf_nat_proto_sctp.c
@@ -12,8 +12,6 @@
#include <net/netfilter/nf_nat_l4proto.h>
-static u_int16_t nf_sctp_port_rover;
-
static void
sctp_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
@@ -21,8 +19,7 @@ sctp_unique_tuple(const struct nf_nat_l3proto *l3proto,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
- nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
- &nf_sctp_port_rover);
+ nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct);
}
static bool
diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c
index f15fcd475f98..c312e6b3e2ea 100644
--- a/net/netfilter/nf_nat_proto_tcp.c
+++ b/net/netfilter/nf_nat_proto_tcp.c
@@ -18,8 +18,6 @@
#include <net/netfilter/nf_nat_l4proto.h>
#include <net/netfilter/nf_nat_core.h>
-static u16 tcp_port_rover;
-
static void
tcp_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
@@ -27,8 +25,7 @@ tcp_unique_tuple(const struct nf_nat_l3proto *l3proto,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
- nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
- &tcp_port_rover);
+ nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct);
}
static bool
diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c
index d85c31c2433c..357539d15849 100644
--- a/net/netfilter/nf_nat_proto_udp.c
+++ b/net/netfilter/nf_nat_proto_udp.c
@@ -17,8 +17,6 @@
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/netfilter/nf_nat_l4proto.h>
-static u16 udp_port_rover;
-
static void
udp_unique_tuple(const struct nf_nat_l3proto *l3proto,
struct nf_conntrack_tuple *tuple,
@@ -26,8 +24,7 @@ udp_unique_tuple(const struct nf_nat_l3proto *l3proto,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
- nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
- &udp_port_rover);
+ nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct);
}
static void
@@ -78,8 +75,6 @@ static bool udp_manip_pkt(struct sk_buff *skb,
}
#ifdef CONFIG_NF_NAT_PROTO_UDPLITE
-static u16 udplite_port_rover;
-
static bool udplite_manip_pkt(struct sk_buff *skb,
const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
@@ -103,8 +98,7 @@ udplite_unique_tuple(const struct nf_nat_l3proto *l3proto,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
{
- nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
- &udplite_port_rover);
+ nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct);
}
const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index ee6d98355081..b3a0385290a1 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -46,6 +46,15 @@ void nf_unregister_queue_handler(struct net *net)
}
EXPORT_SYMBOL(nf_unregister_queue_handler);
+static void nf_queue_sock_put(struct sock *sk)
+{
+#ifdef CONFIG_INET
+ sock_gen_put(sk);
+#else
+ sock_put(sk);
+#endif
+}
+
void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
@@ -56,7 +65,7 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
if (state->out)
dev_put(state->out);
if (state->sk)
- sock_put(state->sk);
+ nf_queue_sock_put(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
struct net_device *physdev;
@@ -73,16 +82,17 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
/* Bump dev refs so they don't vanish while packet is out */
-void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
+ if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt))
+ return false;
+
if (state->in)
dev_hold(state->in);
if (state->out)
dev_hold(state->out);
- if (state->sk)
- sock_hold(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
struct net_device *physdev;
@@ -95,6 +105,7 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
dev_hold(physdev);
}
#endif
+ return true;
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
@@ -186,7 +197,10 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
.size = sizeof(*entry) + route_key_size,
};
- nf_queue_entry_get_refs(entry);
+ if (!nf_queue_entry_get_refs(entry)) {
+ kfree(entry);
+ return -ENOTCONN;
+ }
switch (entry->state.pf) {
case AF_INET:
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 353a2aa80c3c..04b07b63c540 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -34,6 +34,9 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
int length = (th->doff * 4) - sizeof(*th);
u8 buf[40], *ptr;
+ if (unlikely(length < 0))
+ return false;
+
ptr = skb_header_pointer(skb, doff + sizeof(*th), length, buf);
if (ptr == NULL)
return false;
@@ -50,6 +53,8 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff,
length--;
continue;
default:
+ if (length < 2)
+ return true;
opsize = *ptr++;
if (opsize < 2)
return true;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5b4632826dc6..e0c224dea316 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -22,10 +22,13 @@
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/sock.h>
#define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))
+unsigned int nf_tables_net_id __read_mostly;
+
static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
@@ -53,7 +56,9 @@ static const struct rhashtable_params nft_chain_ht_params = {
static void nft_validate_state_update(struct net *net, u8 new_validate_state)
{
- switch (net->nft.validate_state) {
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+
+ switch (nft_net->validate_state) {
case NFT_VALIDATE_SKIP:
WARN_ON_ONCE(new_validate_state == NFT_VALIDATE_DO);
break;
@@ -64,7 +69,7 @@ static void nft_validate_state_update(struct net *net, u8 new_validate_state)
return;
}
- net->nft.validate_state = new_validate_state;
+ nft_net->validate_state = new_validate_state;
}
static void nft_ctx_init(struct nft_ctx *ctx,
@@ -96,6 +101,8 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
if (trans == NULL)
return NULL;
+ INIT_LIST_HEAD(&trans->list);
+ INIT_LIST_HEAD(&trans->binding_list);
trans->msg_type = msg_type;
trans->ctx = *ctx;
@@ -108,34 +115,68 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL);
}
-static void nft_trans_destroy(struct nft_trans *trans)
+static void nft_trans_list_del(struct nft_trans *trans)
{
list_del(&trans->list);
+ list_del(&trans->binding_list);
+}
+
+static void nft_trans_destroy(struct nft_trans *trans)
+{
+ nft_trans_list_del(trans);
kfree(trans);
}
-static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
+static void __nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set,
+ bool bind)
{
+ struct nftables_pernet *nft_net;
struct net *net = ctx->net;
struct nft_trans *trans;
if (!nft_set_is_anonymous(set))
return;
- list_for_each_entry_reverse(trans, &net->nft.commit_list, list) {
+ nft_net = net_generic(net, nf_tables_net_id);
+ list_for_each_entry_reverse(trans, &nft_net->commit_list, list) {
switch (trans->msg_type) {
case NFT_MSG_NEWSET:
if (nft_trans_set(trans) == set)
- nft_trans_set_bound(trans) = true;
+ nft_trans_set_bound(trans) = bind;
break;
case NFT_MSG_NEWSETELEM:
if (nft_trans_elem_set(trans) == set)
- nft_trans_elem_set_bound(trans) = true;
+ nft_trans_elem_set_bound(trans) = bind;
break;
}
}
}
+static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ return __nft_set_trans_bind(ctx, set, true);
+}
+
+static void nft_set_trans_unbind(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ return __nft_set_trans_bind(ctx, set, false);
+}
+
+static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans)
+{
+ struct nftables_pernet *nft_net;
+
+ nft_net = net_generic(net, nf_tables_net_id);
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSET:
+ if (nft_set_is_anonymous(nft_trans_set(trans)))
+ list_add_tail(&trans->binding_list, &nft_net->binding_list);
+ break;
+ }
+
+ list_add_tail(&trans->list, &nft_net->commit_list);
+}
+
static int nf_tables_register_hook(struct net *net,
const struct nft_table *table,
struct nft_chain *chain)
@@ -186,7 +227,7 @@ static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
if (msg_type == NFT_MSG_NEWTABLE)
nft_activate_next(ctx->net, ctx->table);
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
}
@@ -213,7 +254,7 @@ static int nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
if (msg_type == NFT_MSG_NEWCHAIN)
nft_activate_next(ctx->net, ctx->chain);
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
}
@@ -225,7 +266,7 @@ static int nft_delchain(struct nft_ctx *ctx)
if (err < 0)
return err;
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
nft_deactivate_next(ctx->net, ctx->chain);
return err;
@@ -266,7 +307,7 @@ nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
/* You cannot delete the same rule twice */
if (nft_is_active_next(ctx->net, rule)) {
nft_deactivate_next(ctx->net, rule);
- ctx->chain->use--;
+ nft_use_dec(&ctx->chain->use);
return 0;
}
return -ENOENT;
@@ -286,7 +327,7 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID]));
}
nft_trans_rule(trans) = rule;
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ nft_trans_commit_list_add_tail(ctx->net, trans);
return trans;
}
@@ -341,7 +382,7 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
nft_activate_next(ctx->net, set);
}
nft_trans_set(trans) = set;
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
}
@@ -355,7 +396,7 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
return err;
nft_deactivate_next(ctx->net, set);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
return err;
}
@@ -373,7 +414,7 @@ static int nft_trans_obj_add(struct nft_ctx *ctx, int msg_type,
nft_activate_next(ctx->net, obj);
nft_trans_obj(trans) = obj;
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
}
@@ -387,7 +428,7 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
return err;
nft_deactivate_next(ctx->net, obj);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
return err;
}
@@ -406,7 +447,7 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
nft_activate_next(ctx->net, flowtable);
nft_trans_flowtable(trans) = flowtable;
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
}
@@ -421,7 +462,7 @@ static int nft_delflowtable(struct nft_ctx *ctx,
return err;
nft_deactivate_next(ctx->net, flowtable);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
return err;
}
@@ -434,12 +475,14 @@ static struct nft_table *nft_table_lookup(const struct net *net,
const struct nlattr *nla,
u8 family, u8 genmask)
{
+ struct nftables_pernet *nft_net;
struct nft_table *table;
if (nla == NULL)
return ERR_PTR(-EINVAL);
- list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ nft_net = net_generic(net, nf_tables_net_id);
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (!nla_strcmp(nla, table->name) &&
table->family == family &&
nft_active_genmask(table, genmask))
@@ -453,9 +496,11 @@ static struct nft_table *nft_table_lookup_byhandle(const struct net *net,
const struct nlattr *nla,
u8 genmask)
{
+ struct nftables_pernet *nft_net;
struct nft_table *table;
- list_for_each_entry(table, &net->nft.tables, list) {
+ nft_net = net_generic(net, nf_tables_net_id);
+ list_for_each_entry(table, &nft_net->tables, list) {
if (be64_to_cpu(nla_get_be64(nla)) == table->handle &&
nft_active_genmask(table, genmask))
return table;
@@ -508,11 +553,13 @@ __nf_tables_chain_type_lookup(const struct nlattr *nla, u8 family)
static void nft_request_module(struct net *net, const char *fmt, ...)
{
char module_name[MODULE_NAME_LEN];
+ struct nftables_pernet *nft_net;
LIST_HEAD(commit_list);
va_list args;
int ret;
- list_splice_init(&net->nft.commit_list, &commit_list);
+ nft_net = net_generic(net, nf_tables_net_id);
+ list_splice_init(&nft_net->commit_list, &commit_list);
va_start(args, fmt);
ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args);
@@ -520,19 +567,20 @@ static void nft_request_module(struct net *net, const char *fmt, ...)
if (ret >= MODULE_NAME_LEN)
return;
- mutex_unlock(&net->nft.commit_mutex);
+ mutex_unlock(&nft_net->commit_mutex);
request_module("%s", module_name);
- mutex_lock(&net->nft.commit_mutex);
+ mutex_lock(&nft_net->commit_mutex);
- WARN_ON_ONCE(!list_empty(&net->nft.commit_list));
- list_splice(&commit_list, &net->nft.commit_list);
+ WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+ list_splice(&commit_list, &nft_net->commit_list);
}
#endif
static void lockdep_nfnl_nft_mutex_not_held(void)
{
#ifdef CONFIG_PROVE_LOCKING
- WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+ if (debug_locks)
+ WARN_ON_ONCE(lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
#endif
}
@@ -559,6 +607,13 @@ nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
return ERR_PTR(-ENOENT);
}
+static __be16 nft_base_seq(const struct net *net)
+{
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+
+ return htons(nft_net->base_seq & 0xffff);
+}
+
static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
[NFTA_TABLE_NAME] = { .type = NLA_STRING,
.len = NFT_TABLE_MAXNAMELEN - 1 },
@@ -571,18 +626,13 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
int family, const struct nft_table *table)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+ NFNETLINK_V0, nft_base_seq(net));
+ if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
-
if (nla_put_string(skb, NFTA_TABLE_NAME, table->name) ||
nla_put_be32(skb, NFTA_TABLE_FLAGS, htonl(table->flags)) ||
nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)) ||
@@ -629,15 +679,17 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ struct nftables_pernet *nft_net;
const struct nft_table *table;
unsigned int idx = 0, s_idx = cb->args[0];
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
rcu_read_lock();
- cb->seq = net->nft.base_seq;
+ nft_net = net_generic(net, nf_tables_net_id);
+ cb->seq = nft_net->base_seq;
- list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
continue;
@@ -741,7 +793,7 @@ static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt)
if (cnt && i++ == cnt)
break;
- nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
+ nf_tables_unregister_hook(net, table, chain);
}
}
@@ -756,7 +808,7 @@ static int nf_tables_table_enable(struct net *net, struct nft_table *table)
if (!nft_is_base_chain(chain))
continue;
- err = nf_register_net_hook(net, &nft_base_chain(chain)->ops);
+ err = nf_tables_register_hook(net, table, chain);
if (err < 0)
goto err;
@@ -800,17 +852,18 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
nft_trans_table_enable(trans) = false;
} else if (!(flags & NFT_TABLE_F_DORMANT) &&
ctx->table->flags & NFT_TABLE_F_DORMANT) {
+ ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
ret = nf_tables_table_enable(ctx->net, ctx->table);
- if (ret >= 0) {
- ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
+ if (ret >= 0)
nft_trans_table_enable(trans) = true;
- }
+ else
+ ctx->table->flags |= NFT_TABLE_F_DORMANT;
}
if (ret < 0)
goto err;
nft_trans_table_update(trans) = true;
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
err:
nft_trans_destroy(trans);
@@ -840,11 +893,36 @@ static int nft_chain_hash_cmp(struct rhashtable_compare_arg *arg,
return strcmp(chain->name, name);
}
+static bool nft_supported_family(u8 family)
+{
+ return false
+#ifdef CONFIG_NF_TABLES_INET
+ || family == NFPROTO_INET
+#endif
+#ifdef CONFIG_NF_TABLES_IPV4
+ || family == NFPROTO_IPV4
+#endif
+#ifdef CONFIG_NF_TABLES_ARP
+ || family == NFPROTO_ARP
+#endif
+#ifdef CONFIG_NF_TABLES_NETDEV
+ || family == NFPROTO_NETDEV
+#endif
+#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE)
+ || family == NFPROTO_BRIDGE
+#endif
+#ifdef CONFIG_NF_TABLES_IPV6
+ || family == NFPROTO_IPV6
+#endif
+ ;
+}
+
static int nf_tables_newtable(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
struct netlink_ext_ack *extack)
{
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
@@ -854,7 +932,10 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
struct nft_ctx ctx;
int err;
- lockdep_assert_held(&net->nft.commit_mutex);
+ if (!nft_supported_family(family))
+ return -EOPNOTSUPP;
+
+ lockdep_assert_held(&nft_net->commit_mutex);
attr = nla[NFTA_TABLE_NAME];
table = nft_table_lookup(net, attr, family, genmask);
if (IS_ERR(table)) {
@@ -904,7 +985,7 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
if (err < 0)
goto err_trans;
- list_add_tail_rcu(&table->list, &net->nft.tables);
+ list_add_tail_rcu(&table->list, &nft_net->tables);
return 0;
err_trans:
rhltable_destroy(&table->chains_ht);
@@ -939,8 +1020,7 @@ static int nft_flush_table(struct nft_ctx *ctx)
if (!nft_is_active_next(ctx->net, set))
continue;
- if (nft_set_is_anonymous(set) &&
- !list_empty(&set->bindings))
+ if (nft_set_is_anonymous(set))
continue;
err = nft_delset(ctx, set);
@@ -984,11 +1064,12 @@ out:
static int nft_flush(struct nft_ctx *ctx, int family)
{
+ struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
struct nft_table *table, *nt;
const struct nlattr * const *nla = ctx->nla;
int err = 0;
- list_for_each_entry_safe(table, nt, &ctx->net->nft.tables, list) {
+ list_for_each_entry_safe(table, nt, &nft_net->tables, list) {
if (family != AF_UNSPEC && table->family != family)
continue;
@@ -1102,7 +1183,9 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
static bool lockdep_commit_lock_is_held(struct net *net)
{
#ifdef CONFIG_PROVE_LOCKING
- return lockdep_is_held(&net->nft.commit_mutex);
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+
+ return lockdep_is_held(&nft_net->commit_mutex);
#else
return true;
#endif
@@ -1205,18 +1288,13 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
const struct nft_chain *chain)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+ NFNETLINK_V0, nft_base_seq(net));
+ if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
-
if (nla_put_string(skb, NFTA_CHAIN_TABLE, table->name))
goto nla_put_failure;
if (nla_put_be64(skb, NFTA_CHAIN_HANDLE, cpu_to_be64(chain->handle),
@@ -1304,11 +1382,13 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
unsigned int idx = 0, s_idx = cb->args[0];
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
+ struct nftables_pernet *nft_net;
rcu_read_lock();
- cb->seq = net->nft.base_seq;
+ nft_net = net_generic(net, nf_tables_net_id);
+ cb->seq = nft_net->base_seq;
- list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
continue;
@@ -1501,12 +1581,13 @@ static int nft_chain_parse_hook(struct net *net,
struct nft_chain_hook *hook, u8 family,
bool autoload)
{
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
struct nlattr *ha[NFTA_HOOK_MAX + 1];
const struct nft_chain_type *type;
struct net_device *dev;
int err;
- lockdep_assert_held(&net->nft.commit_mutex);
+ lockdep_assert_held(&nft_net->commit_mutex);
lockdep_nfnl_nft_mutex_not_held();
err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK],
@@ -1605,9 +1686,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct nft_rule **rules;
int err;
- if (table->use == UINT_MAX)
- return -EOVERFLOW;
-
if (nla[NFTA_CHAIN_HOOK]) {
struct nft_chain_hook hook;
struct nf_hook_ops *ops;
@@ -1679,6 +1757,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (err < 0)
goto err1;
+ if (!nft_use_inc(&table->use)) {
+ err = -EMFILE;
+ goto err_use;
+ }
+
err = rhltable_insert_key(&table->chains_ht, chain->name,
&chain->rhlhead, nft_chain_ht_params);
if (err)
@@ -1691,11 +1774,12 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
goto err2;
}
- table->use++;
list_add_tail_rcu(&chain->list, &table->chains);
return 0;
err2:
+ nft_use_dec_restore(&table->use);
+err_use:
nf_tables_unregister_hook(net, table, chain);
err1:
nf_tables_chain_destroy(ctx);
@@ -1775,6 +1859,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy)
if (nla[NFTA_CHAIN_HANDLE] &&
nla[NFTA_CHAIN_NAME]) {
+ struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
struct nft_trans *tmp;
char *name;
@@ -1784,7 +1869,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy)
goto err;
err = -EEXIST;
- list_for_each_entry(tmp, &ctx->net->nft.commit_list, list) {
+ list_for_each_entry(tmp, &nft_net->commit_list, list) {
if (tmp->msg_type == NFT_MSG_NEWCHAIN &&
tmp->ctx.table == table &&
nft_trans_chain_update(tmp) &&
@@ -1797,7 +1882,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy)
nft_trans_chain_name(trans) = name;
}
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
err:
@@ -1811,6 +1896,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[],
struct netlink_ext_ack *extack)
{
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
int family = nfmsg->nfgen_family;
@@ -1821,7 +1907,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
struct nft_ctx ctx;
u64 handle = 0;
- lockdep_assert_held(&net->nft.commit_mutex);
+ lockdep_assert_held(&nft_net->commit_mutex);
table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
if (IS_ERR(table)) {
@@ -2166,27 +2252,31 @@ struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
err = nf_tables_expr_parse(ctx, nla, &info);
if (err < 0)
- goto err1;
+ goto err_expr_parse;
+
+ err = -EOPNOTSUPP;
+ if (!(info.ops->type->flags & NFT_EXPR_STATEFUL))
+ goto err_expr_stateful;
err = -ENOMEM;
expr = kzalloc(info.ops->size, GFP_KERNEL);
if (expr == NULL)
- goto err2;
+ goto err_expr_stateful;
err = nf_tables_newexpr(ctx, &info, expr);
if (err < 0)
- goto err3;
+ goto err_expr_new;
return expr;
-err3:
+err_expr_new:
kfree(expr);
-err2:
+err_expr_stateful:
owner = info.ops->type->owner;
if (info.ops->type->release_ops)
info.ops->type->release_ops(info.ops);
module_put(owner);
-err1:
+err_expr_parse:
return ERR_PTR(err);
}
@@ -2245,21 +2335,16 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
const struct nft_rule *rule)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
const struct nft_expr *expr, *next;
struct nlattr *list;
const struct nft_rule *prule;
u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nlmsg_put(skb, portid, seq, type, sizeof(struct nfgenmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, type, flags, family, NFNETLINK_V0,
+ nft_base_seq(net));
+ if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
-
if (nla_put_string(skb, NFTA_RULE_TABLE, table->name))
goto nla_put_failure;
if (nla_put_string(skb, NFTA_RULE_CHAIN, chain->name))
@@ -2345,11 +2430,13 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
unsigned int idx = 0, s_idx = cb->args[0];
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
+ struct nftables_pernet *nft_net;
rcu_read_lock();
- cb->seq = net->nft.base_seq;
+ nft_net = net_generic(net, nf_tables_net_id);
+ cb->seq = nft_net->base_seq;
- list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
continue;
@@ -2502,7 +2589,6 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
{
struct nft_expr *expr, *next;
- lockdep_assert_held(&ctx->net->nft.commit_mutex);
/*
* Careful: some expressions might not be initialized in case this
* is called on error from nf_tables_newrule().
@@ -2568,6 +2654,8 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
err = nft_chain_validate(&ctx, chain);
if (err < 0)
return err;
+
+ cond_resched();
}
return 0;
@@ -2580,6 +2668,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[],
struct netlink_ext_ack *extack)
{
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_next(net);
struct nft_expr_info *info = NULL;
@@ -2596,7 +2685,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
int err, rem;
u64 handle, pos_handle;
- lockdep_assert_held(&net->nft.commit_mutex);
+ lockdep_assert_held(&nft_net->commit_mutex);
table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
if (IS_ERR(table)) {
@@ -2632,9 +2721,6 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
return -EINVAL;
handle = nf_tables_alloc_handle(table);
- if (chain->use == UINT_MAX)
- return -EOVERFLOW;
-
if (nla[NFTA_RULE_POSITION]) {
pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
old_rule = __nft_rule_lookup(chain, pos_handle);
@@ -2710,23 +2796,28 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
expr = nft_expr_next(expr);
}
+ if (!nft_use_inc(&chain->use)) {
+ err = -EMFILE;
+ goto err2;
+ }
+
if (nlh->nlmsg_flags & NLM_F_REPLACE) {
trans = nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule);
if (trans == NULL) {
err = -ENOMEM;
- goto err2;
+ goto err_destroy_flow_rule;
}
err = nft_delrule(&ctx, old_rule);
if (err < 0) {
nft_trans_destroy(trans);
- goto err2;
+ goto err_destroy_flow_rule;
}
list_add_tail_rcu(&rule->list, &old_rule->list);
} else {
if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) {
err = -ENOMEM;
- goto err2;
+ goto err_destroy_flow_rule;
}
if (nlh->nlmsg_flags & NLM_F_APPEND) {
@@ -2742,14 +2833,17 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
}
}
kvfree(info);
- chain->use++;
- if (net->nft.validate_state == NFT_VALIDATE_DO)
+ if (nft_net->validate_state == NFT_VALIDATE_DO)
return nft_table_validate(net, table);
return 0;
+
+err_destroy_flow_rule:
+ nft_use_dec_restore(&chain->use);
err2:
- nf_tables_rule_release(&ctx, rule);
+ nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE_ERROR);
+ nf_tables_rule_destroy(&ctx, rule);
err1:
for (i = 0; i < n; i++) {
if (info[i].ops) {
@@ -2763,15 +2857,18 @@ err1:
}
static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
+ const struct nft_chain *chain,
const struct nlattr *nla)
{
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
u32 id = ntohl(nla_get_be32(nla));
struct nft_trans *trans;
- list_for_each_entry(trans, &net->nft.commit_list, list) {
+ list_for_each_entry(trans, &nft_net->commit_list, list) {
struct nft_rule *rule = nft_trans_rule(trans);
if (trans->msg_type == NFT_MSG_NEWRULE &&
+ trans->ctx.chain == chain &&
id == nft_trans_rule_id(trans))
return rule;
}
@@ -2818,7 +2915,7 @@ static int nf_tables_delrule(struct net *net, struct sock *nlsk,
err = nft_delrule(&ctx, rule);
} else if (nla[NFTA_RULE_ID]) {
- rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_ID]);
+ rule = nft_rule_lookup_byid(net, chain, nla[NFTA_RULE_ID]);
if (IS_ERR(rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_ID]);
return PTR_ERR(rule);
@@ -2886,12 +2983,13 @@ nft_select_set_ops(const struct nft_ctx *ctx,
const struct nft_set_desc *desc,
enum nft_set_policies policy)
{
+ struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
const struct nft_set_ops *ops, *bops;
struct nft_set_estimate est, best;
const struct nft_set_type *type;
u32 flags = 0;
- lockdep_assert_held(&ctx->net->nft.commit_mutex);
+ lockdep_assert_held(&nft_net->commit_mutex);
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
if (list_empty(&nf_tables_set_types)) {
@@ -3034,16 +3132,19 @@ static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table,
}
static struct nft_set *nft_set_lookup_byid(const struct net *net,
+ const struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
struct nft_trans *trans;
u32 id = ntohl(nla_get_be32(nla));
- list_for_each_entry(trans, &net->nft.commit_list, list) {
+ list_for_each_entry(trans, &nft_net->commit_list, list) {
if (trans->msg_type == NFT_MSG_NEWSET) {
struct nft_set *set = nft_trans_set(trans);
if (id == nft_trans_set_id(trans) &&
+ set->table == table &&
nft_active_genmask(set, genmask))
return set;
}
@@ -3064,7 +3165,7 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
if (!nla_set_id)
return set;
- set = nft_set_lookup_byid(net, nla_set_id, genmask);
+ set = nft_set_lookup_byid(net, table, nla_set_id, genmask);
}
return set;
}
@@ -3090,7 +3191,7 @@ cont:
list_for_each_entry(i, &ctx->table->sets, list) {
int tmp;
- if (!nft_is_active_next(ctx->net, set))
+ if (!nft_is_active_next(ctx->net, i))
continue;
if (!sscanf(i->name, name, &tmp))
continue;
@@ -3148,23 +3249,17 @@ static __be64 nf_jiffies64_to_msecs(u64 input)
static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
const struct nft_set *set, u16 event, u16 flags)
{
- struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
struct nlattr *desc;
u32 portid = ctx->portid;
u32 seq = ctx->seq;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
- flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family,
+ NFNETLINK_V0, nft_base_seq(ctx->net));
+ if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = ctx->family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff);
-
if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
goto nla_put_failure;
if (nla_put_string(skb, NFTA_SET_NAME, set->name))
@@ -3260,14 +3355,16 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
struct nft_table *table, *cur_table = (struct nft_table *)cb->args[2];
struct net *net = sock_net(skb->sk);
struct nft_ctx *ctx = cb->data, ctx_set;
+ struct nftables_pernet *nft_net;
if (cb->args[1])
return skb->len;
rcu_read_lock();
- cb->seq = net->nft.base_seq;
+ nft_net = net_generic(net, nf_tables_net_id);
+ cb->seq = nft_net->base_seq;
- list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (ctx->family != NFPROTO_UNSPEC &&
ctx->family != table->family)
continue;
@@ -3561,10 +3658,15 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
if (ops->privsize != NULL)
size = ops->privsize(nla, &desc);
+ if (!nft_use_inc(&table->use)) {
+ err = -EMFILE;
+ goto err1;
+ }
+
set = kvzalloc(sizeof(*set) + size + udlen, GFP_KERNEL);
if (!set) {
err = -ENOMEM;
- goto err1;
+ goto err_alloc;
}
name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL);
@@ -3611,7 +3713,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
goto err4;
list_add_tail_rcu(&set->list, &table->sets);
- table->use++;
+
return 0;
err4:
@@ -3620,6 +3722,8 @@ err3:
kfree(set->name);
err2:
kvfree(set);
+err_alloc:
+ nft_use_dec_restore(&table->use);
err1:
module_put(to_set_type(ops)->owner);
return err;
@@ -3679,6 +3783,12 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
return nft_delset(&ctx, set);
}
+static int nft_validate_register_store(const struct nft_ctx *ctx,
+ enum nft_registers reg,
+ const struct nft_data *data,
+ enum nft_data_types type,
+ unsigned int len);
+
static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
@@ -3700,9 +3810,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *i;
struct nft_set_iter iter;
- if (set->use == UINT_MAX)
- return -EOVERFLOW;
-
if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
return -EBUSY;
@@ -3727,10 +3834,12 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
return iter.err;
}
bind:
+ if (!nft_use_inc(&set->use))
+ return -EMFILE;
+
binding->chain = ctx->chain;
list_add_tail_rcu(&binding->list, &set->bindings);
nft_set_trans_bind(ctx, set);
- set->use++;
return 0;
}
@@ -3750,17 +3859,38 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
}
EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
+void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ if (nft_set_is_anonymous(set))
+ nft_clear(ctx->net, set);
+
+ nft_use_inc_restore(&set->use);
+}
+EXPORT_SYMBOL_GPL(nf_tables_activate_set);
+
void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *binding,
enum nft_trans_phase phase)
{
switch (phase) {
+ case NFT_TRANS_PREPARE_ERROR:
+ nft_set_trans_unbind(ctx, set);
+ if (nft_set_is_anonymous(set))
+ nft_deactivate_next(ctx->net, set);
+ else
+ list_del_rcu(&binding->list);
+
+ nft_use_dec(&set->use);
+ break;
case NFT_TRANS_PREPARE:
- set->use--;
+ if (nft_set_is_anonymous(set))
+ nft_deactivate_next(ctx->net, set);
+
+ nft_use_dec(&set->use);
return;
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
- set->use--;
+ nft_use_dec(&set->use);
/* fall through */
default:
nf_tables_unbind_set(ctx, set, binding,
@@ -3956,18 +4086,19 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
{
struct nft_set_dump_ctx *dump_ctx = cb->data;
struct net *net = sock_net(skb->sk);
+ struct nftables_pernet *nft_net;
struct nft_table *table;
struct nft_set *set;
struct nft_set_dump_args args;
bool set_found = false;
- struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
struct nlattr *nest;
u32 portid, seq;
int event;
rcu_read_lock();
- list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ nft_net = net_generic(net, nf_tables_net_id);
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
dump_ctx->ctx.family != table->family)
continue;
@@ -3993,16 +4124,11 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
portid = NETLINK_CB(cb->skb).portid;
seq = cb->nlh->nlmsg_seq;
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
- NLM_F_MULTI);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, NLM_F_MULTI,
+ table->family, NFNETLINK_V0, nft_base_seq(net));
+ if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = table->family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
-
if (nla_put_string(skb, NFTA_SET_ELEM_LIST_TABLE, table->name))
goto nla_put_failure;
if (nla_put_string(skb, NFTA_SET_ELEM_LIST_SET, set->name))
@@ -4059,22 +4185,16 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
const struct nft_set *set,
const struct nft_set_elem *elem)
{
- struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
struct nlattr *nest;
int err;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg),
- flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family,
+ NFNETLINK_V0, nft_base_seq(ctx->net));
+ if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = ctx->family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(ctx->net->nft.base_seq & 0xffff);
-
if (nla_put_string(skb, NFTA_SET_TABLE, ctx->table->name))
goto nla_put_failure;
if (nla_put_string(skb, NFTA_SET_NAME, set->name))
@@ -4114,11 +4234,54 @@ static int nft_setelem_parse_flags(const struct nft_set *set,
return 0;
}
+static int nft_setelem_parse_key(struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_data *key, struct nlattr *attr)
+{
+ struct nft_data_desc desc;
+ int err;
+
+ err = nft_data_init(ctx, key, NFT_DATA_VALUE_MAXLEN, &desc, attr);
+ if (err < 0)
+ return err;
+
+ if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) {
+ nft_data_release(key, desc.type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_data_desc *desc,
+ struct nft_data *data,
+ struct nlattr *attr)
+{
+ u32 dtype;
+ int err;
+
+ err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr);
+ if (err < 0)
+ return err;
+
+ if (set->dtype == NFT_DATA_VERDICT)
+ dtype = NFT_DATA_VERDICT;
+ else
+ dtype = NFT_DATA_VALUE;
+
+ if (dtype != desc->type ||
+ set->dlen != desc->len) {
+ nft_data_release(data, desc->type);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
- struct nft_data_desc desc;
struct nft_set_elem elem;
struct sk_buff *skb;
uint32_t flags = 0;
@@ -4137,17 +4300,11 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
return err;
- err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
- nla[NFTA_SET_ELEM_KEY]);
+ err = nft_setelem_parse_key(ctx, set, &elem.key.val,
+ nla[NFTA_SET_ELEM_KEY]);
if (err < 0)
return err;
- err = -EINVAL;
- if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) {
- nft_data_release(&elem.key.val, desc.type);
- return err;
- }
-
priv = set->ops->get(ctx->net, set, &elem, flags);
if (IS_ERR(priv))
return PTR_ERR(priv);
@@ -4315,7 +4472,7 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
}
}
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
- (*nft_set_ext_obj(ext))->use--;
+ nft_use_dec(&(*nft_set_ext_obj(ext))->use);
kfree(elem);
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
@@ -4338,14 +4495,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
u8 genmask = nft_genmask_next(ctx->net);
- struct nft_data_desc d1, d2;
struct nft_set_ext_tmpl tmpl;
struct nft_set_ext *ext, *ext2;
struct nft_set_elem elem;
struct nft_set_binding *binding;
struct nft_object *obj = NULL;
struct nft_userdata *udata;
- struct nft_data data;
+ struct nft_data_desc desc;
enum nft_registers dreg;
struct nft_trans *trans;
u32 flags = 0;
@@ -4378,6 +4534,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return -EINVAL;
}
+ if (set->flags & NFT_SET_OBJECT) {
+ if (!nla[NFTA_SET_ELEM_OBJREF] &&
+ !(flags & NFT_SET_ELEM_INTERVAL_END))
+ return -EINVAL;
+ } else {
+ if (nla[NFTA_SET_ELEM_OBJREF])
+ return -EINVAL;
+ }
+
if ((flags & NFT_SET_ELEM_INTERVAL_END) &&
(nla[NFTA_SET_ELEM_DATA] ||
nla[NFTA_SET_ELEM_OBJREF] ||
@@ -4399,15 +4564,12 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
timeout = set->timeout;
}
- err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &d1,
- nla[NFTA_SET_ELEM_KEY]);
+ err = nft_setelem_parse_key(ctx, set, &elem.key.val,
+ nla[NFTA_SET_ELEM_KEY]);
if (err < 0)
goto err1;
- err = -EINVAL;
- if (d1.type != NFT_DATA_VALUE || d1.len != set->klen)
- goto err2;
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, d1.len);
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
if (timeout > 0) {
nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
if (timeout != set->timeout)
@@ -4415,29 +4577,29 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
}
if (nla[NFTA_SET_ELEM_OBJREF] != NULL) {
- if (!(set->flags & NFT_SET_OBJECT)) {
- err = -EINVAL;
- goto err2;
- }
obj = nft_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF],
set->objtype, genmask);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
+ obj = NULL;
+ goto err2;
+ }
+
+ if (!nft_use_inc(&obj->use)) {
+ err = -EMFILE;
+ obj = NULL;
goto err2;
}
+
nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
}
if (nla[NFTA_SET_ELEM_DATA] != NULL) {
- err = nft_data_init(ctx, &data, sizeof(data), &d2,
- nla[NFTA_SET_ELEM_DATA]);
+ err = nft_setelem_parse_data(ctx, set, &desc, &elem.data.val,
+ nla[NFTA_SET_ELEM_DATA]);
if (err < 0)
goto err2;
- err = -EINVAL;
- if (set->dtype != NFT_DATA_VERDICT && d2.len != set->dlen)
- goto err3;
-
dreg = nft_type_to_reg(set->dtype);
list_for_each_entry(binding, &set->bindings, list) {
struct nft_ctx bind_ctx = {
@@ -4451,19 +4613,19 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
continue;
err = nft_validate_register_store(&bind_ctx, dreg,
- &data,
- d2.type, d2.len);
+ &elem.data.val,
+ desc.type, desc.len);
if (err < 0)
goto err3;
- if (d2.type == NFT_DATA_VERDICT &&
- (data.verdict.code == NFT_GOTO ||
- data.verdict.code == NFT_JUMP))
+ if (desc.type == NFT_DATA_VERDICT &&
+ (elem.data.val.verdict.code == NFT_GOTO ||
+ elem.data.val.verdict.code == NFT_JUMP))
nft_validate_state_update(ctx->net,
NFT_VALIDATE_NEED);
}
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, d2.len);
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len);
}
/* The full maximum length of userdata can exceed the maximum
@@ -4479,7 +4641,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
}
err = -ENOMEM;
- elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, data.data,
+ elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data,
+ elem.data.val.data,
timeout, GFP_KERNEL);
if (elem.priv == NULL)
goto err3;
@@ -4492,10 +4655,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
udata->len = ulen - 1;
nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
}
- if (obj) {
+ if (obj)
*nft_set_ext_obj(ext) = obj;
- obj->use++;
- }
trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
if (trans == NULL)
@@ -4533,7 +4694,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
}
nft_trans_elem(trans) = elem;
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
err6:
@@ -4541,14 +4702,15 @@ err6:
err5:
kfree(trans);
err4:
- if (obj)
- obj->use--;
kfree(elem.priv);
err3:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
- nft_data_release(&data, d2.type);
+ nft_data_release(&elem.data.val, desc.type);
err2:
- nft_data_release(&elem.key.val, d1.type);
+ if (obj)
+ nft_use_dec_restore(&obj->use);
+
+ nft_data_release(&elem.key.val, NFT_DATA_VALUE);
err1:
return err;
}
@@ -4558,6 +4720,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
const struct nlattr * const nla[],
struct netlink_ext_ack *extack)
{
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
u8 genmask = nft_genmask_next(net);
const struct nlattr *attr;
struct nft_set *set;
@@ -4577,7 +4740,8 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
if (IS_ERR(set))
return PTR_ERR(set);
- if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
+ if (!list_empty(&set->bindings) &&
+ (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
return -EBUSY;
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
@@ -4586,7 +4750,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
return err;
}
- if (net->nft.validate_state == NFT_VALIDATE_DO)
+ if (nft_net->validate_state == NFT_VALIDATE_DO)
return nft_table_validate(net, ctx.table);
return 0;
@@ -4605,11 +4769,14 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
*/
void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
{
+ struct nft_chain *chain;
+
if (type == NFT_DATA_VERDICT) {
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- data->verdict.chain->use++;
+ chain = data->verdict.chain;
+ nft_use_inc_restore(&chain->use);
break;
}
}
@@ -4624,7 +4791,7 @@ static void nft_set_elem_activate(const struct net *net,
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_hold(nft_set_ext_data(ext), set->dtype);
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
- (*nft_set_ext_obj(ext))->use++;
+ nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
}
static void nft_set_elem_deactivate(const struct net *net,
@@ -4636,7 +4803,7 @@ static void nft_set_elem_deactivate(const struct net *net,
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_release(nft_set_ext_data(ext), set->dtype);
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
- (*nft_set_ext_obj(ext))->use--;
+ nft_use_dec(&(*nft_set_ext_obj(ext))->use);
}
static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
@@ -4644,7 +4811,6 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
struct nft_set_ext_tmpl tmpl;
- struct nft_data_desc desc;
struct nft_set_elem elem;
struct nft_set_ext *ext;
struct nft_trans *trans;
@@ -4655,11 +4821,10 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
nft_set_elem_policy, NULL);
if (err < 0)
- goto err1;
+ return err;
- err = -EINVAL;
if (nla[NFTA_SET_ELEM_KEY] == NULL)
- goto err1;
+ return -EINVAL;
nft_set_ext_prepare(&tmpl);
@@ -4669,37 +4834,31 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
if (flags != 0)
nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
- err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
- nla[NFTA_SET_ELEM_KEY]);
+ err = nft_setelem_parse_key(ctx, set, &elem.key.val,
+ nla[NFTA_SET_ELEM_KEY]);
if (err < 0)
- goto err1;
-
- err = -EINVAL;
- if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
- goto err2;
+ return err;
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, desc.len);
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
err = -ENOMEM;
elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, NULL, 0,
GFP_KERNEL);
if (elem.priv == NULL)
- goto err2;
+ goto fail_elem;
ext = nft_set_elem_ext(set, elem.priv);
if (flags)
*nft_set_ext_flags(ext) = flags;
trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
- if (trans == NULL) {
- err = -ENOMEM;
- goto err3;
- }
+ if (trans == NULL)
+ goto fail_trans;
priv = set->ops->deactivate(ctx->net, set, &elem);
if (priv == NULL) {
err = -ENOENT;
- goto err4;
+ goto fail_ops;
}
kfree(elem.priv);
elem.priv = priv;
@@ -4707,16 +4866,15 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
nft_set_elem_deactivate(ctx->net, set, &elem);
nft_trans_elem(trans) = elem;
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
-err4:
+fail_ops:
kfree(trans);
-err3:
+fail_trans:
kfree(elem.priv);
-err2:
- nft_data_release(&elem.key.val, desc.type);
-err1:
+fail_elem:
+ nft_data_release(&elem.key.val, NFT_DATA_VALUE);
return err;
}
@@ -4742,7 +4900,7 @@ static int nft_flush_set(const struct nft_ctx *ctx,
nft_set_elem_deactivate(ctx->net, set, elem);
nft_trans_elem_set(trans) = set;
nft_trans_elem(trans) = *elem;
- list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+ nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
err1:
@@ -4769,7 +4927,11 @@ static int nf_tables_delsetelem(struct net *net, struct sock *nlsk,
set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
- if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
+
+ if (nft_set_is_anonymous(set))
+ return -EOPNOTSUPP;
+
+ if (!list_empty(&set->bindings) && (set->flags & NFT_SET_CONSTANT))
return -EBUSY;
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) {
@@ -5040,9 +5202,14 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+ if (!nft_use_inc(&table->use))
+ return -EMFILE;
+
type = nft_obj_type_get(net, objtype);
- if (IS_ERR(type))
- return PTR_ERR(type);
+ if (IS_ERR(type)) {
+ err = PTR_ERR(type);
+ goto err_type;
+ }
obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]);
if (IS_ERR(obj)) {
@@ -5063,7 +5230,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
goto err3;
list_add_tail_rcu(&obj->list, &table->objects);
- table->use++;
+
return 0;
err3:
kfree(obj->name);
@@ -5073,6 +5240,9 @@ err2:
kfree(obj);
err1:
module_put(type->owner);
+err_type:
+ nft_use_dec_restore(&table->use);
+
return err;
}
@@ -5081,19 +5251,14 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
int family, const struct nft_table *table,
struct nft_object *obj, bool reset)
{
- struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+ NFNETLINK_V0, nft_base_seq(net));
+ if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
-
if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
@@ -5124,6 +5289,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
struct nft_obj_filter *filter = cb->data;
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
+ struct nftables_pernet *nft_net;
struct nft_object *obj;
bool reset = false;
@@ -5131,9 +5297,10 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
reset = true;
rcu_read_lock();
- cb->seq = net->nft.base_seq;
+ nft_net = net_generic(net, nf_tables_net_id);
+ cb->seq = nft_net->base_seq;
- list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
continue;
@@ -5411,6 +5578,23 @@ struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
}
EXPORT_SYMBOL_GPL(nft_flowtable_lookup);
+void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
+ struct nft_flowtable *flowtable,
+ enum nft_trans_phase phase)
+{
+ switch (phase) {
+ case NFT_TRANS_PREPARE_ERROR:
+ case NFT_TRANS_PREPARE:
+ case NFT_TRANS_ABORT:
+ case NFT_TRANS_RELEASE:
+ nft_use_dec(&flowtable->use);
+ /* fall through */
+ default:
+ return;
+ }
+}
+EXPORT_SYMBOL_GPL(nf_tables_deactivate_flowtable);
+
static struct nft_flowtable *
nft_flowtable_lookup_byhandle(const struct nft_table *table,
const struct nlattr *nla, u8 genmask)
@@ -5610,9 +5794,14 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+ if (!nft_use_inc(&table->use))
+ return -EMFILE;
+
flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL);
- if (!flowtable)
- return -ENOMEM;
+ if (!flowtable) {
+ err = -ENOMEM;
+ goto flowtable_alloc;
+ }
flowtable->table = table;
flowtable->handle = nf_tables_alloc_handle(table);
@@ -5666,7 +5855,6 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
goto err6;
list_add_tail_rcu(&flowtable->list, &table->flowtables);
- table->use++;
return 0;
err6:
@@ -5684,6 +5872,9 @@ err2:
kfree(flowtable->name);
err1:
kfree(flowtable);
+flowtable_alloc:
+ nft_use_dec_restore(&table->use);
+
return err;
}
@@ -5741,20 +5932,15 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
struct nft_flowtable *flowtable)
{
struct nlattr *nest, *nest_devs;
- struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
int i;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+ NFNETLINK_V0, nft_base_seq(net));
+ if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
-
if (nla_put_string(skb, NFTA_FLOWTABLE_TABLE, flowtable->table->name) ||
nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
@@ -5804,12 +5990,14 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb,
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
struct nft_flowtable *flowtable;
+ struct nftables_pernet *nft_net;
const struct nft_table *table;
rcu_read_lock();
- cb->seq = net->nft.base_seq;
+ nft_net = net_generic(net, nf_tables_net_id);
+ cb->seq = nft_net->base_seq;
- list_for_each_entry_rcu(table, &net->nft.tables, list) {
+ list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
continue;
@@ -5979,21 +6167,17 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
u32 portid, u32 seq)
{
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
char buf[TASK_COMM_LEN];
int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, 0, AF_UNSPEC,
+ NFNETLINK_V0, nft_base_seq(net));
+ if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = AF_UNSPEC;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
-
- if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) ||
+ if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_net->base_seq)) ||
nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) ||
nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current)))
goto nla_put_failure;
@@ -6026,6 +6210,7 @@ static int nf_tables_flowtable_event(struct notifier_block *this,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct nft_flowtable *flowtable;
+ struct nftables_pernet *nft_net;
struct nft_table *table;
struct net *net;
@@ -6033,13 +6218,14 @@ static int nf_tables_flowtable_event(struct notifier_block *this,
return 0;
net = dev_net(dev);
- mutex_lock(&net->nft.commit_mutex);
- list_for_each_entry(table, &net->nft.tables, list) {
+ nft_net = net_generic(net, nf_tables_net_id);
+ mutex_lock(&nft_net->commit_mutex);
+ list_for_each_entry(table, &nft_net->tables, list) {
list_for_each_entry(flowtable, &table->flowtables, list) {
nft_flowtable_event(event, dev, flowtable);
}
}
- mutex_unlock(&net->nft.commit_mutex);
+ mutex_unlock(&nft_net->commit_mutex);
return NOTIFY_DONE;
}
@@ -6220,19 +6406,22 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
static int nf_tables_validate(struct net *net)
{
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
struct nft_table *table;
- switch (net->nft.validate_state) {
+ switch (nft_net->validate_state) {
case NFT_VALIDATE_SKIP:
break;
case NFT_VALIDATE_NEED:
nft_validate_state_update(net, NFT_VALIDATE_DO);
/* fall through */
case NFT_VALIDATE_DO:
- list_for_each_entry(table, &net->nft.tables, list) {
+ list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_validate(net, table) < 0)
return -EAGAIN;
}
+
+ nft_validate_state_update(net, NFT_VALIDATE_SKIP);
break;
}
@@ -6304,15 +6493,16 @@ static void nft_commit_release(struct nft_trans *trans)
static void nf_tables_commit_release(struct net *net)
{
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
struct nft_trans *trans, *next;
- if (list_empty(&net->nft.commit_list))
+ if (list_empty(&nft_net->commit_list))
return;
synchronize_rcu();
- list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
- list_del(&trans->list);
+ list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) {
+ nft_trans_list_del(trans);
nft_commit_release(trans);
}
}
@@ -6350,9 +6540,10 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
static void nf_tables_commit_chain_prepare_cancel(struct net *net)
{
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
struct nft_trans *trans, *next;
- list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) {
struct nft_chain *chain = trans->ctx.chain;
if (trans->msg_type == NFT_MSG_NEWRULE ||
@@ -6444,17 +6635,30 @@ static void nft_chain_del(struct nft_chain *chain)
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
struct nft_chain *chain;
struct nft_table *table;
+ list_for_each_entry(trans, &nft_net->binding_list, binding_list) {
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSET:
+ if (nft_set_is_anonymous(nft_trans_set(trans)) &&
+ !nft_trans_set_bound(trans)) {
+ pr_warn_once("nftables ruleset with unbound set\n");
+ return -EINVAL;
+ }
+ break;
+ }
+ }
+
/* 0. Validate ruleset, otherwise roll back for error reporting. */
if (nf_tables_validate(net) < 0)
return -EAGAIN;
/* 1. Allocate space for next generation rules_gen_X[] */
- list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) {
int ret;
if (trans->msg_type == NFT_MSG_NEWRULE ||
@@ -6470,7 +6674,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
}
/* step 2. Make rules_gen_X visible to packet path */
- list_for_each_entry(table, &net->nft.tables, list) {
+ list_for_each_entry(table, &nft_net->tables, list) {
list_for_each_entry(chain, &table->chains, list)
nf_tables_commit_chain(net, chain);
}
@@ -6479,12 +6683,13 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
* Bump generation counter, invalidate any dump in progress.
* Cannot fail after this point.
*/
- while (++net->nft.base_seq == 0);
+ while (++nft_net->base_seq == 0)
+ ;
/* step 3. Start new generation, rules_gen_X now in use. */
net->nft.gencursor = nft_gencursor_next(net);
- list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) {
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
@@ -6544,7 +6749,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
*/
if (nft_set_is_anonymous(nft_trans_set(trans)) &&
!list_empty(&nft_trans_set(trans)->bindings))
- trans->ctx.table->use--;
+ nft_use_dec(&trans->ctx.table->use);
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
NFT_MSG_NEWSET, GFP_KERNEL);
@@ -6605,7 +6810,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_commit_release(net);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
- mutex_unlock(&net->nft.commit_mutex);
+ mutex_unlock(&nft_net->commit_mutex);
return 0;
}
@@ -6641,10 +6846,11 @@ static void nf_tables_abort_release(struct nft_trans *trans)
static int __nf_tables_abort(struct net *net)
{
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
- list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list,
+ list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list,
list) {
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
@@ -6669,7 +6875,7 @@ static int __nf_tables_abort(struct net *net)
kfree(nft_trans_chain_name(trans));
nft_trans_destroy(trans);
} else {
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
nft_chain_del(trans->ctx.chain);
nf_tables_unregister_hook(trans->ctx.net,
trans->ctx.table,
@@ -6677,25 +6883,25 @@ static int __nf_tables_abort(struct net *net)
}
break;
case NFT_MSG_DELCHAIN:
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, trans->ctx.chain);
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWRULE:
- trans->ctx.chain->use--;
+ nft_use_dec_restore(&trans->ctx.chain->use);
list_del_rcu(&nft_trans_rule(trans)->list);
nft_rule_expr_deactivate(&trans->ctx,
nft_trans_rule(trans),
NFT_TRANS_ABORT);
break;
case NFT_MSG_DELRULE:
- trans->ctx.chain->use++;
+ nft_use_inc_restore(&trans->ctx.chain->use);
nft_clear(trans->ctx.net, nft_trans_rule(trans));
nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSET:
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
if (nft_trans_set_bound(trans)) {
nft_trans_destroy(trans);
break;
@@ -6703,7 +6909,7 @@ static int __nf_tables_abort(struct net *net)
list_del_rcu(&nft_trans_set(trans)->list);
break;
case NFT_MSG_DELSET:
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, nft_trans_set(trans));
nft_trans_destroy(trans);
break;
@@ -6726,22 +6932,22 @@ static int __nf_tables_abort(struct net *net)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWOBJ:
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
list_del_rcu(&nft_trans_obj(trans)->list);
break;
case NFT_MSG_DELOBJ:
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, nft_trans_obj(trans));
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWFLOWTABLE:
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
list_del_rcu(&nft_trans_flowtable(trans)->list);
nft_unregister_flowtable_net_hooks(net,
nft_trans_flowtable(trans));
break;
case NFT_MSG_DELFLOWTABLE:
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
nft_trans_destroy(trans);
break;
@@ -6751,37 +6957,34 @@ static int __nf_tables_abort(struct net *net)
synchronize_rcu();
list_for_each_entry_safe_reverse(trans, next,
- &net->nft.commit_list, list) {
- list_del(&trans->list);
+ &nft_net->commit_list, list) {
+ nft_trans_list_del(trans);
nf_tables_abort_release(trans);
}
return 0;
}
-static void nf_tables_cleanup(struct net *net)
-{
- nft_validate_state_update(net, NFT_VALIDATE_SKIP);
-}
-
static int nf_tables_abort(struct net *net, struct sk_buff *skb)
{
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
int ret = __nf_tables_abort(net);
- mutex_unlock(&net->nft.commit_mutex);
+ mutex_unlock(&nft_net->commit_mutex);
return ret;
}
static bool nf_tables_valid_genid(struct net *net, u32 genid)
{
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
bool genid_ok;
- mutex_lock(&net->nft.commit_mutex);
+ mutex_lock(&nft_net->commit_mutex);
- genid_ok = genid == 0 || net->nft.base_seq == genid;
+ genid_ok = genid == 0 || nft_net->base_seq == genid;
if (!genid_ok)
- mutex_unlock(&net->nft.commit_mutex);
+ mutex_unlock(&nft_net->commit_mutex);
/* else, commit mutex has to be released by commit or abort function */
return genid_ok;
@@ -6794,7 +6997,6 @@ static const struct nfnetlink_subsystem nf_tables_subsys = {
.cb = nf_tables_cb,
.commit = nf_tables_commit,
.abort = nf_tables_abort,
- .cleanup = nf_tables_cleanup,
.valid_genid = nf_tables_valid_genid,
.owner = THIS_MODULE,
};
@@ -6956,28 +7158,24 @@ int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
}
EXPORT_SYMBOL_GPL(nft_parse_u32_check);
-/**
- * nft_parse_register - parse a register value from a netlink attribute
- *
- * @attr: netlink attribute
- *
- * Parse and translate a register value from a netlink attribute.
- * Registers used to be 128 bit wide, these register numbers will be
- * mapped to the corresponding 32 bit register numbers.
- */
-unsigned int nft_parse_register(const struct nlattr *attr)
+static int nft_parse_register(const struct nlattr *attr, u32 *preg)
{
unsigned int reg;
reg = ntohl(nla_get_be32(attr));
switch (reg) {
case NFT_REG_VERDICT...NFT_REG_4:
- return reg * NFT_REG_SIZE / NFT_REG32_SIZE;
+ *preg = reg * NFT_REG_SIZE / NFT_REG32_SIZE;
+ break;
+ case NFT_REG32_00...NFT_REG32_15:
+ *preg = reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
+ break;
default:
- return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
+ return -ERANGE;
}
+
+ return 0;
}
-EXPORT_SYMBOL_GPL(nft_parse_register);
/**
* nft_dump_register - dump a register value to a netlink attribute
@@ -7010,7 +7208,7 @@ EXPORT_SYMBOL_GPL(nft_dump_register);
* Validate that the input register is one of the general purpose
* registers and that the length of the load is within the bounds.
*/
-int nft_validate_register_load(enum nft_registers reg, unsigned int len)
+static int nft_validate_register_load(enum nft_registers reg, unsigned int len)
{
if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
return -EINVAL;
@@ -7021,7 +7219,24 @@ int nft_validate_register_load(enum nft_registers reg, unsigned int len)
return 0;
}
-EXPORT_SYMBOL_GPL(nft_validate_register_load);
+
+int nft_parse_register_load(const struct nlattr *attr, u8 *sreg, u32 len)
+{
+ u32 reg;
+ int err;
+
+ err = nft_parse_register(attr, &reg);
+ if (err < 0)
+ return err;
+
+ err = nft_validate_register_load(reg, len);
+ if (err < 0)
+ return err;
+
+ *sreg = reg;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_parse_register_load);
/**
* nft_validate_register_store - validate an expressions' register store
@@ -7037,10 +7252,11 @@ EXPORT_SYMBOL_GPL(nft_validate_register_load);
* A value of NULL for the data means that its runtime gathered
* data.
*/
-int nft_validate_register_store(const struct nft_ctx *ctx,
- enum nft_registers reg,
- const struct nft_data *data,
- enum nft_data_types type, unsigned int len)
+static int nft_validate_register_store(const struct nft_ctx *ctx,
+ enum nft_registers reg,
+ const struct nft_data *data,
+ enum nft_data_types type,
+ unsigned int len)
{
int err;
@@ -7072,7 +7288,27 @@ int nft_validate_register_store(const struct nft_ctx *ctx,
return 0;
}
}
-EXPORT_SYMBOL_GPL(nft_validate_register_store);
+
+int nft_parse_register_store(const struct nft_ctx *ctx,
+ const struct nlattr *attr, u8 *dreg,
+ const struct nft_data *data,
+ enum nft_data_types type, unsigned int len)
+{
+ int err;
+ u32 reg;
+
+ err = nft_parse_register(attr, &reg);
+ if (err < 0)
+ return err;
+
+ err = nft_validate_register_store(ctx, reg, data, type, len);
+ if (err < 0)
+ return err;
+
+ *dreg = reg;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_parse_register_store);
static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = {
[NFTA_VERDICT_CODE] = { .type = NLA_U32 },
@@ -7095,6 +7331,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
if (!tb[NFTA_VERDICT_CODE])
return -EINVAL;
+
+ /* zero padding hole for memcmp */
+ memset(data, 0, sizeof(*data));
data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
switch (data->verdict.code) {
@@ -7122,8 +7361,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
return PTR_ERR(chain);
if (nft_is_base_chain(chain))
return -EOPNOTSUPP;
+ if (!nft_use_inc(&chain->use))
+ return -EMFILE;
- chain->use++;
data->verdict.chain = chain;
break;
}
@@ -7135,10 +7375,13 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
static void nft_verdict_uninit(const struct nft_data *data)
{
+ struct nft_chain *chain;
+
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- data->verdict.chain->use--;
+ chain = data->verdict.chain;
+ nft_use_dec(&chain->use);
break;
}
}
@@ -7291,21 +7534,20 @@ int __nft_release_basechain(struct nft_ctx *ctx)
nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
list_del(&rule->list);
- ctx->chain->use--;
+ nft_use_dec(&ctx->chain->use);
nf_tables_rule_release(ctx, rule);
}
nft_chain_del(ctx->chain);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
nf_tables_chain_destroy(ctx);
return 0;
}
EXPORT_SYMBOL_GPL(__nft_release_basechain);
-static void __nft_release_tables(struct net *net)
+static void __nft_release_table(struct net *net, struct nft_table *table)
{
struct nft_flowtable *flowtable, *nf;
- struct nft_table *table, *nt;
struct nft_chain *chain, *nc;
struct nft_object *obj, *ne;
struct nft_rule *rule, *nr;
@@ -7315,71 +7557,85 @@ static void __nft_release_tables(struct net *net)
.family = NFPROTO_NETDEV,
};
- list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
- ctx.family = table->family;
+ ctx.family = table->family;
- list_for_each_entry(chain, &table->chains, list)
- nf_tables_unregister_hook(net, table, chain);
- /* No packets are walking on these chains anymore. */
- ctx.table = table;
- list_for_each_entry(chain, &table->chains, list) {
- ctx.chain = chain;
- list_for_each_entry_safe(rule, nr, &chain->rules, list) {
- list_del(&rule->list);
- chain->use--;
- nf_tables_rule_release(&ctx, rule);
- }
- }
- list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
- list_del(&flowtable->list);
- table->use--;
- nf_tables_flowtable_destroy(flowtable);
- }
- list_for_each_entry_safe(set, ns, &table->sets, list) {
- list_del(&set->list);
- table->use--;
- nft_set_destroy(set);
- }
- list_for_each_entry_safe(obj, ne, &table->objects, list) {
- list_del(&obj->list);
- table->use--;
- nft_obj_destroy(&ctx, obj);
- }
- list_for_each_entry_safe(chain, nc, &table->chains, list) {
- ctx.chain = chain;
- nft_chain_del(chain);
- table->use--;
- nf_tables_chain_destroy(&ctx);
+ list_for_each_entry(chain, &table->chains, list)
+ nf_tables_unregister_hook(net, table, chain);
+ /* No packets are walking on these chains anymore. */
+ ctx.table = table;
+ list_for_each_entry(chain, &table->chains, list) {
+ ctx.chain = chain;
+ list_for_each_entry_safe(rule, nr, &chain->rules, list) {
+ list_del(&rule->list);
+ nft_use_dec(&chain->use);
+ nf_tables_rule_release(&ctx, rule);
}
- list_del(&table->list);
- nf_tables_table_destroy(&ctx);
}
+ list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
+ list_del(&flowtable->list);
+ nft_use_dec(&table->use);
+ nf_tables_flowtable_destroy(flowtable);
+ }
+ list_for_each_entry_safe(set, ns, &table->sets, list) {
+ list_del(&set->list);
+ nft_use_dec(&table->use);
+ nft_set_destroy(set);
+ }
+ list_for_each_entry_safe(obj, ne, &table->objects, list) {
+ list_del(&obj->list);
+ nft_use_dec(&table->use);
+ nft_obj_destroy(&ctx, obj);
+ }
+ list_for_each_entry_safe(chain, nc, &table->chains, list) {
+ ctx.chain = chain;
+ nft_chain_del(chain);
+ nft_use_dec(&table->use);
+ nf_tables_chain_destroy(&ctx);
+ }
+ list_del(&table->list);
+ nf_tables_table_destroy(&ctx);
+}
+
+static void __nft_release_tables(struct net *net)
+{
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+ struct nft_table *table, *nt;
+
+ list_for_each_entry_safe(table, nt, &nft_net->tables, list)
+ __nft_release_table(net, table);
}
static int __net_init nf_tables_init_net(struct net *net)
{
- INIT_LIST_HEAD(&net->nft.tables);
- INIT_LIST_HEAD(&net->nft.commit_list);
- mutex_init(&net->nft.commit_mutex);
- net->nft.base_seq = 1;
- net->nft.validate_state = NFT_VALIDATE_SKIP;
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+
+ INIT_LIST_HEAD(&nft_net->tables);
+ INIT_LIST_HEAD(&nft_net->commit_list);
+ INIT_LIST_HEAD(&nft_net->binding_list);
+ mutex_init(&nft_net->commit_mutex);
+ nft_net->base_seq = 1;
+ nft_net->validate_state = NFT_VALIDATE_SKIP;
return 0;
}
static void __net_exit nf_tables_exit_net(struct net *net)
{
- mutex_lock(&net->nft.commit_mutex);
- if (!list_empty(&net->nft.commit_list))
+ struct nftables_pernet *nft_net = net_generic(net, nf_tables_net_id);
+
+ mutex_lock(&nft_net->commit_mutex);
+ if (!list_empty(&nft_net->commit_list))
__nf_tables_abort(net);
__nft_release_tables(net);
- mutex_unlock(&net->nft.commit_mutex);
- WARN_ON_ONCE(!list_empty(&net->nft.tables));
+ mutex_unlock(&nft_net->commit_mutex);
+ WARN_ON_ONCE(!list_empty(&nft_net->tables));
}
static struct pernet_operations nf_tables_net_ops = {
.init = nf_tables_init_net,
.exit = nf_tables_exit_net,
+ .id = &nf_tables_net_id,
+ .size = sizeof(struct nftables_pernet),
};
static int __init nf_tables_module_init(void)
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index a3850414dba2..7dfaad783cd5 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -144,7 +144,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
struct nft_rule *const *rules;
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
- struct nft_regs regs;
+ struct nft_regs regs = {};
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
bool genbit = READ_ONCE(net->nft.gencursor);
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index e1dc527a493b..7a19c517b191 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -186,7 +186,6 @@ static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info)
void nft_trace_notify(struct nft_traceinfo *info)
{
const struct nft_pktinfo *pkt = info->pkt;
- struct nfgenmsg *nfmsg;
struct nlmsghdr *nlh;
struct sk_buff *skb;
unsigned int size;
@@ -222,15 +221,11 @@ void nft_trace_notify(struct nft_traceinfo *info)
return;
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_TRACE);
- nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0);
+ nlh = nfnl_msg_put(skb, 0, 0, event, 0, info->basechain->type->family,
+ NFNETLINK_V0, 0);
if (!nlh)
goto nla_put_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = info->basechain->type->family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(nft_pf(pkt))))
goto nla_put_failure;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 9bacddc761ba..0267be2e9cfe 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -452,7 +452,8 @@ ack:
* processed, this avoids that the same error is
* reported several times when replaying the batch.
*/
- if (nfnl_err_add(&err_list, nlh, err, &extack) < 0) {
+ if (err == -ENOMEM ||
+ nfnl_err_add(&err_list, nlh, err, &extack) < 0) {
/* We failed to enqueue an error, reset the
* list of errors and send OOM to userspace
* pointing to the batch header.
@@ -495,8 +496,6 @@ done:
} else {
ss->abort(net, oskb);
}
- if (ss->cleanup)
- ss->cleanup(net);
nfnl_err_deliver(&err_list, oskb);
kfree_skb(skb);
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 8fa8bf7c48e6..7c5f428dc5c9 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -135,21 +135,16 @@ nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
int event, struct nf_acct *acct)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
u64 pkts, bytes;
u32 old_flags;
event = nfnl_msg_type(NFNL_SUBSYS_ACCT, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+ NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = AF_UNSPEC;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
if (nla_put_string(skb, NFACCT_NAME, acct->name))
goto nla_put_failure;
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index ddcb1b607474..720177721e3c 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -381,10 +381,14 @@ static int
nfnl_cthelper_update(const struct nlattr * const tb[],
struct nf_conntrack_helper *helper)
{
+ u32 size;
int ret;
- if (tb[NFCTH_PRIV_DATA_LEN])
- return -EBUSY;
+ if (tb[NFCTH_PRIV_DATA_LEN]) {
+ size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
+ if (size != helper->data_len)
+ return -EBUSY;
+ }
if (tb[NFCTH_POLICY]) {
ret = nfnl_cthelper_update_policy(helper, tb[NFCTH_POLICY]);
@@ -528,20 +532,15 @@ nfnl_cthelper_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
int event, struct nf_conntrack_helper *helper)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
int status;
event = nfnl_msg_type(NFNL_SUBSYS_CTHELPER, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+ NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = AF_UNSPEC;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
if (nla_put_string(skb, NFCTH_NAME, helper->name))
goto nla_put_failure;
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 70a7382b9787..ae01e9ad5546 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -164,20 +164,15 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
int event, struct ctnl_timeout *timeout)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
const struct nf_conntrack_l4proto *l4proto = timeout->timeout.l4proto;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+ NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = AF_UNSPEC;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
if (nla_put_string(skb, CTA_TIMEOUT_NAME, timeout->name) ||
nla_put_be16(skb, CTA_TIMEOUT_L3PROTO,
htons(timeout->timeout.l3num)) ||
@@ -396,19 +391,14 @@ cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid,
const unsigned int *timeouts)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, AF_UNSPEC,
+ NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = AF_UNSPEC;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
if (nla_put_be16(skb, CTA_TIMEOUT_L3PROTO, htons(l4proto->l3proto)) ||
nla_put_u8(skb, CTA_TIMEOUT_L4PROTO, l4proto->l4proto))
goto nla_put_failure;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 25298b3eb854..1735bcb07381 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -404,20 +404,15 @@ __build_packet_message(struct nfnl_log_net *log,
{
struct nfulnl_msg_packet_hdr pmsg;
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
sk_buff_data_t old_tail = inst->skb->tail;
struct sock *sk;
const unsigned char *hwhdrp;
- nlh = nlmsg_put(inst->skb, 0, 0,
- nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
- sizeof(struct nfgenmsg), 0);
+ nlh = nfnl_msg_put(inst->skb, 0, 0,
+ nfnl_msg_type(NFNL_SUBSYS_ULOG, NFULNL_MSG_PACKET),
+ 0, pf, NFNETLINK_V0, htons(inst->group_num));
if (!nlh)
return -1;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = pf;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(inst->group_num);
memset(&pmsg, 0, sizeof(pmsg));
pmsg.hw_protocol = skb->protocol;
@@ -509,7 +504,8 @@ __build_packet_message(struct nfnl_log_net *log,
goto nla_put_failure;
if (indev && skb->dev &&
- skb->mac_header != skb->network_header) {
+ skb_mac_header_was_set(skb) &&
+ skb_mac_header_len(skb) != 0) {
struct nfulnl_msg_packet_hw phw;
int len;
@@ -635,8 +631,8 @@ nfulnl_log_packet(struct net *net,
unsigned int plen = 0;
struct nfnl_log_net *log = nfnl_log_pernet(net);
const struct nfnl_ct_hook *nfnl_ct = NULL;
+ enum ip_conntrack_info ctinfo = 0;
struct nf_conn *ct = NULL;
- enum ip_conntrack_info uninitialized_var(ctinfo);
if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
li = li_user;
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 131f9f8c0b09..f3676238e64f 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -191,6 +191,8 @@ static const struct tcphdr *nf_osf_hdr_ctx_init(struct nf_osf_hdr_ctx *ctx,
ctx->optp = skb_header_pointer(skb, ip_hdrlen(skb) +
sizeof(struct tcphdr), ctx->optsize, opts);
+ if (!ctx->optp)
+ return NULL;
}
return tcp;
@@ -316,6 +318,14 @@ static int nfnl_osf_add_callback(struct net *net, struct sock *ctnl,
f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
+ if (f->opt_num > ARRAY_SIZE(f->opt))
+ return -EINVAL;
+
+ if (!memchr(f->genre, 0, MAXGENRELEN) ||
+ !memchr(f->subtype, 0, MAXGENRELEN) ||
+ !memchr(f->version, 0, MAXGENRELEN))
+ return -EINVAL;
+
kf = kmalloc(sizeof(struct nf_osf_finger), GFP_KERNEL);
if (!kf)
return -ENOMEM;
@@ -440,3 +450,4 @@ module_init(nfnl_osf_init);
module_exit(nfnl_osf_fini);
MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index f81a3ce0fe48..1aacc31a6bf9 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -387,12 +387,11 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
struct nlattr *nla;
struct nfqnl_msg_packet_hdr *pmsg;
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
struct sk_buff *entskb = entry->skb;
struct net_device *indev;
struct net_device *outdev;
struct nf_conn *ct = NULL;
- enum ip_conntrack_info uninitialized_var(ctinfo);
+ enum ip_conntrack_info ctinfo;
struct nfnl_ct_hook *nfnl_ct;
bool csum_verify;
char *secdata = NULL;
@@ -473,18 +472,15 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
goto nlmsg_failure;
}
- nlh = nlmsg_put(skb, 0, 0,
- nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET),
- sizeof(struct nfgenmsg), 0);
+ nlh = nfnl_msg_put(skb, 0, 0,
+ nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET),
+ 0, entry->state.pf, NFNETLINK_V0,
+ htons(queue->queue_num));
if (!nlh) {
skb_tx_error(entskb);
kfree_skb(skb);
goto nlmsg_failure;
}
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = entry->state.pf;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = htons(queue->queue_num);
nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg));
pmsg = nla_data(nla);
@@ -566,7 +562,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
goto nla_put_failure;
if (indev && entskb->dev &&
- entskb->mac_header != entskb->network_header) {
+ skb_mac_header_was_set(entskb) &&
+ skb_mac_header_len(entskb) != 0) {
struct nfqnl_msg_packet_hw phw;
int len;
@@ -715,9 +712,15 @@ static struct nf_queue_entry *
nf_queue_entry_dup(struct nf_queue_entry *e)
{
struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
- if (entry)
- nf_queue_entry_get_refs(entry);
- return entry;
+
+ if (!entry)
+ return NULL;
+
+ if (nf_queue_entry_get_refs(entry))
+ return entry;
+
+ kfree(entry);
+ return NULL;
}
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
@@ -843,11 +846,16 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
}
static int
-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
+nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff)
{
struct sk_buff *nskb;
if (diff < 0) {
+ unsigned int min_len = skb_transport_offset(e->skb);
+
+ if (data_len < min_len)
+ return -EINVAL;
+
if (pskb_trim(e->skb, data_len))
return -ENOMEM;
} else if (diff > 0) {
@@ -1179,7 +1187,7 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl,
struct nfqnl_instance *queue;
unsigned int verdict;
struct nf_queue_entry *entry;
- enum ip_conntrack_info uninitialized_var(ctinfo);
+ enum ip_conntrack_info ctinfo;
struct nfnl_ct_hook *nfnl_ct;
struct nf_conn *ct = NULL;
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 058ee84ea531..c1055251ebde 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -18,8 +18,8 @@
#include <net/netfilter/nf_tables.h>
struct nft_bitwise {
- enum nft_registers sreg:8;
- enum nft_registers dreg:8;
+ u8 sreg;
+ u8 dreg;
u8 len;
struct nft_data mask;
struct nft_data xor;
@@ -68,14 +68,14 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
priv->len = len;
- priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]);
- err = nft_validate_register_load(priv->sreg, priv->len);
+ err = nft_parse_register_load(tb[NFTA_BITWISE_SREG], &priv->sreg,
+ priv->len);
if (err < 0)
return err;
- priv->dreg = nft_parse_register(tb[NFTA_BITWISE_DREG]);
- err = nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, priv->len);
+ err = nft_parse_register_store(ctx, tb[NFTA_BITWISE_DREG],
+ &priv->dreg, NULL, NFT_DATA_VALUE,
+ priv->len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 13d4e421a6b3..dba16126c7ee 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -19,8 +19,8 @@
#include <net/netfilter/nf_tables.h>
struct nft_byteorder {
- enum nft_registers sreg:8;
- enum nft_registers dreg:8;
+ u8 sreg;
+ u8 dreg;
enum nft_byteorder_ops op:8;
u8 len;
u8 size;
@@ -33,11 +33,11 @@ static void nft_byteorder_eval(const struct nft_expr *expr,
const struct nft_byteorder *priv = nft_expr_priv(expr);
u32 *src = &regs->data[priv->sreg];
u32 *dst = &regs->data[priv->dreg];
- union { u32 u32; u16 u16; } *s, *d;
+ u16 *s16, *d16;
unsigned int i;
- s = (void *)src;
- d = (void *)dst;
+ s16 = (void *)src;
+ d16 = (void *)dst;
switch (priv->size) {
case 8: {
@@ -63,11 +63,11 @@ static void nft_byteorder_eval(const struct nft_expr *expr,
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
for (i = 0; i < priv->len / 4; i++)
- d[i].u32 = ntohl((__force __be32)s[i].u32);
+ dst[i] = ntohl((__force __be32)src[i]);
break;
case NFT_BYTEORDER_HTON:
for (i = 0; i < priv->len / 4; i++)
- d[i].u32 = (__force __u32)htonl(s[i].u32);
+ dst[i] = (__force __u32)htonl(src[i]);
break;
}
break;
@@ -75,11 +75,11 @@ static void nft_byteorder_eval(const struct nft_expr *expr,
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
for (i = 0; i < priv->len / 2; i++)
- d[i].u16 = ntohs((__force __be16)s[i].u16);
+ d16[i] = ntohs((__force __be16)s16[i]);
break;
case NFT_BYTEORDER_HTON:
for (i = 0; i < priv->len / 2; i++)
- d[i].u16 = (__force __u16)htons(s[i].u16);
+ d16[i] = (__force __u16)htons(s16[i]);
break;
}
break;
@@ -133,20 +133,20 @@ static int nft_byteorder_init(const struct nft_ctx *ctx,
return -EINVAL;
}
- priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]);
err = nft_parse_u32_check(tb[NFTA_BYTEORDER_LEN], U8_MAX, &len);
if (err < 0)
return err;
priv->len = len;
- err = nft_validate_register_load(priv->sreg, priv->len);
+ err = nft_parse_register_load(tb[NFTA_BYTEORDER_SREG], &priv->sreg,
+ priv->len);
if (err < 0)
return err;
- priv->dreg = nft_parse_register(tb[NFTA_BYTEORDER_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, priv->len);
+ return nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG],
+ &priv->dreg, NULL, NFT_DATA_VALUE,
+ priv->len);
}
static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 3fd540b2c6ba..a308d45ee95e 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -2,6 +2,7 @@
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/netfilter/nf_tables.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
@@ -10,6 +11,8 @@
#include <net/netfilter/nf_tables_ipv4.h>
#include <net/netfilter/nf_tables_ipv6.h>
+extern unsigned int nf_tables_net_id;
+
#ifdef CONFIG_NF_TABLES_IPV4
static unsigned int nft_do_chain_ipv4(void *priv,
struct sk_buff *skb,
@@ -315,6 +318,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct nftables_pernet *nft_net;
struct nft_table *table;
struct nft_chain *chain, *nr;
struct nft_ctx ctx = {
@@ -325,8 +329,9 @@ static int nf_tables_netdev_event(struct notifier_block *this,
event != NETDEV_CHANGENAME)
return NOTIFY_DONE;
- mutex_lock(&ctx.net->nft.commit_mutex);
- list_for_each_entry(table, &ctx.net->nft.tables, list) {
+ nft_net = net_generic(ctx.net, nf_tables_net_id);
+ mutex_lock(&nft_net->commit_mutex);
+ list_for_each_entry(table, &nft_net->tables, list) {
if (table->family != NFPROTO_NETDEV)
continue;
@@ -340,7 +345,7 @@ static int nf_tables_netdev_event(struct notifier_block *this,
nft_netdev_event(event, dev, &ctx);
}
}
- mutex_unlock(&ctx.net->nft.commit_mutex);
+ mutex_unlock(&nft_net->commit_mutex);
return NOTIFY_DONE;
}
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 7007045c0849..36bf64ebc892 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -19,7 +19,7 @@
struct nft_cmp_expr {
struct nft_data data;
- enum nft_registers sreg:8;
+ u8 sreg;
u8 len;
enum nft_cmp_ops op:8;
};
@@ -88,8 +88,7 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return err;
}
- priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
- err = nft_validate_register_load(priv->sreg, desc.len);
+ err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
if (err < 0)
return err;
@@ -139,8 +138,7 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
- err = nft_validate_register_load(priv->sreg, desc.len);
+ err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 469f9da5073b..2846d64659f2 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -575,19 +575,14 @@ nfnl_compat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
int rev, int target)
{
struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
unsigned int flags = portid ? NLM_F_MULTI : 0;
event = nfnl_msg_type(NFNL_SUBSYS_NFT_COMPAT, event);
- nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
- if (nlh == NULL)
+ nlh = nfnl_msg_put(skb, portid, seq, event, flags, family,
+ NFNETLINK_V0, 0);
+ if (!nlh)
goto nlmsg_failure;
- nfmsg = nlmsg_data(nlh);
- nfmsg->nfgen_family = family;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
if (nla_put_string(skb, NFTA_COMPAT_NAME, name) ||
nla_put_be32(skb, NFTA_COMPAT_REV, htonl(rev)) ||
nla_put_be32(skb, NFTA_COMPAT_TYPE, htonl(target)))
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 5dd87748afa8..f29f02805bcc 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -29,8 +29,8 @@ struct nft_ct {
enum nft_ct_keys key:8;
enum ip_conntrack_dir dir:8;
union {
- enum nft_registers dreg:8;
- enum nft_registers sreg:8;
+ u8 dreg;
+ u8 sreg;
};
};
@@ -486,9 +486,8 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
}
}
- priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]);
- err = nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ err = nft_parse_register_store(ctx, tb[NFTA_CT_DREG], &priv->dreg, NULL,
+ NFT_DATA_VALUE, len);
if (err < 0)
return err;
@@ -581,8 +580,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
}
}
- priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
- err = nft_validate_register_load(priv->sreg, len);
+ err = nft_parse_register_load(tb[NFTA_CT_SREG], &priv->sreg, len);
if (err < 0)
goto err1;
diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c
index 2cc1e0ef56e8..e862f916efa0 100644
--- a/net/netfilter/nft_dup_netdev.c
+++ b/net/netfilter/nft_dup_netdev.c
@@ -16,7 +16,7 @@
#include <net/netfilter/nf_dup_netdev.h>
struct nft_dup_netdev {
- enum nft_registers sreg_dev:8;
+ u8 sreg_dev;
};
static void nft_dup_netdev_eval(const struct nft_expr *expr,
@@ -42,8 +42,8 @@ static int nft_dup_netdev_init(const struct nft_ctx *ctx,
if (tb[NFTA_DUP_SREG_DEV] == NULL)
return -EINVAL;
- priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
- return nft_validate_register_load(priv->sreg_dev, sizeof(int));
+ return nft_parse_register_load(tb[NFTA_DUP_SREG_DEV], &priv->sreg_dev,
+ sizeof(int));
}
static const struct nft_expr_ops nft_dup_netdev_ingress_ops;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index eb7f9a5f2aeb..a4c6aba7da7e 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -15,13 +15,16 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
+#include <net/netns/generic.h>
+
+extern unsigned int nf_tables_net_id;
struct nft_dynset {
struct nft_set *set;
struct nft_set_ext_tmpl tmpl;
enum nft_dynset_ops op:8;
- enum nft_registers sreg_key:8;
- enum nft_registers sreg_data:8;
+ u8 sreg_key;
+ u8 sreg_data;
bool invert;
u64 timeout;
struct nft_expr *expr;
@@ -112,13 +115,14 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
+ struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
struct nft_dynset *priv = nft_expr_priv(expr);
u8 genmask = nft_genmask_next(ctx->net);
struct nft_set *set;
u64 timeout;
int err;
- lockdep_assert_held(&ctx->net->nft.commit_mutex);
+ lockdep_assert_held(&nft_net->commit_mutex);
if (tb[NFTA_DYNSET_SET_NAME] == NULL ||
tb[NFTA_DYNSET_OP] == NULL ||
@@ -140,6 +144,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_OBJECT)
+ return -EOPNOTSUPP;
+
if (set->ops->update == NULL)
return -EOPNOTSUPP;
@@ -166,8 +173,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
tb[NFTA_DYNSET_TIMEOUT])));
}
- priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]);
- err = nft_validate_register_load(priv->sreg_key, set->klen);
+ err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_KEY], &priv->sreg_key,
+ set->klen);
if (err < 0)
return err;
@@ -177,8 +184,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (set->dtype == NFT_DATA_VERDICT)
return -EOPNOTSUPP;
- priv->sreg_data = nft_parse_register(tb[NFTA_DYNSET_SREG_DATA]);
- err = nft_validate_register_load(priv->sreg_data, set->dlen);
+ err = nft_parse_register_load(tb[NFTA_DYNSET_SREG_DATA],
+ &priv->sreg_data, set->dlen);
if (err < 0)
return err;
} else if (set->flags & NFT_SET_MAP)
@@ -193,9 +200,6 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
return PTR_ERR(priv->expr);
err = -EOPNOTSUPP;
- if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL))
- goto err1;
-
if (priv->expr->ops->type->flags & NFT_EXPR_GC) {
if (set->flags & NFT_SET_TIMEOUT)
goto err1;
@@ -213,8 +217,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_EXPR,
priv->expr->ops->size);
if (set->flags & NFT_SET_TIMEOUT) {
- if (timeout || set->timeout)
+ if (timeout || set->timeout) {
+ nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_TIMEOUT);
nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION);
+ }
}
priv->timeout = timeout;
@@ -249,7 +255,7 @@ static void nft_dynset_activate(const struct nft_ctx *ctx,
{
struct nft_dynset *priv = nft_expr_priv(expr);
- priv->set->use++;
+ nf_tables_activate_set(ctx, priv->set);
}
static void nft_dynset_destroy(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index a940c9fd9045..8d0f14cd7cc3 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -22,8 +22,8 @@ struct nft_exthdr {
u8 offset;
u8 len;
u8 op;
- enum nft_registers dreg:8;
- enum nft_registers sreg:8;
+ u8 dreg;
+ u8 sreg;
u8 flags;
};
@@ -45,6 +45,9 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
unsigned int offset = 0;
int err;
+ if (pkt->skb->protocol != htons(ETH_P_IPV6))
+ goto err;
+
err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL);
if (priv->flags & NFT_EXTHDR_F_PRESENT) {
*dest = (err >= 0);
@@ -134,7 +137,6 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
unsigned int i, optl, tcphdr_len, offset;
struct tcphdr *tcph;
u8 *opt;
- u32 src;
tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
if (!tcph)
@@ -143,7 +145,6 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
opt = (u8 *)tcph;
for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
union {
- u8 octet;
__be16 v16;
__be32 v32;
} old, new;
@@ -164,13 +165,13 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
if (!tcph)
return;
- src = regs->data[priv->sreg];
offset = i + priv->offset;
switch (priv->len) {
case 2:
old.v16 = get_unaligned((u16 *)(opt + offset));
- new.v16 = src;
+ new.v16 = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg]);
switch (priv->type) {
case TCPOPT_MSS:
@@ -188,7 +189,7 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
old.v16, new.v16, false);
break;
case 4:
- new.v32 = src;
+ new.v32 = regs->data[priv->sreg];
old.v32 = get_unaligned((u32 *)(opt + offset));
if (old.v32 == new.v32)
@@ -257,12 +258,12 @@ static int nft_exthdr_init(const struct nft_ctx *ctx,
priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
priv->offset = offset;
priv->len = len;
- priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]);
priv->flags = flags;
priv->op = op;
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, priv->len);
+ return nft_parse_register_store(ctx, tb[NFTA_EXTHDR_DREG],
+ &priv->dreg, NULL, NFT_DATA_VALUE,
+ priv->len);
}
static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
@@ -307,11 +308,11 @@ static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
priv->offset = offset;
priv->len = len;
- priv->sreg = nft_parse_register(tb[NFTA_EXTHDR_SREG]);
priv->flags = flags;
priv->op = op;
- return nft_validate_register_load(priv->sreg, priv->len);
+ return nft_parse_register_load(tb[NFTA_EXTHDR_SREG], &priv->sreg,
+ priv->len);
}
static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 21df8cccea65..ce6891337304 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -88,7 +88,6 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return -EINVAL;
priv->result = ntohl(nla_get_be32(tb[NFTA_FIB_RESULT]));
- priv->dreg = nft_parse_register(tb[NFTA_FIB_DREG]);
switch (priv->result) {
case NFT_FIB_RESULT_OIF:
@@ -108,8 +107,8 @@ int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
return -EINVAL;
}
- err = nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ err = nft_parse_register_store(ctx, tb[NFTA_FIB_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 166edea0e452..ec35a41c7262 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -169,18 +169,34 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
if (IS_ERR(flowtable))
return PTR_ERR(flowtable);
+ if (!nft_use_inc(&flowtable->use))
+ return -EMFILE;
+
priv->flowtable = flowtable;
- flowtable->use++;
return nf_ct_netns_get(ctx->net, ctx->family);
}
-static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
+static void nft_flow_offload_deactivate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ enum nft_trans_phase phase)
{
struct nft_flow_offload *priv = nft_expr_priv(expr);
- priv->flowtable->use--;
+ nf_tables_deactivate_flowtable(ctx, priv->flowtable, phase);
+}
+
+static void nft_flow_offload_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_flow_offload *priv = nft_expr_priv(expr);
+
+ nft_use_inc_restore(&priv->flowtable->use);
+}
+
+static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
nf_ct_netns_put(ctx->net, ctx->family);
}
@@ -203,6 +219,8 @@ static const struct nft_expr_ops nft_flow_offload_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)),
.eval = nft_flow_offload_eval,
.init = nft_flow_offload_init,
+ .activate = nft_flow_offload_activate,
+ .deactivate = nft_flow_offload_deactivate,
.destroy = nft_flow_offload_destroy,
.validate = nft_flow_offload_validate,
.dump = nft_flow_offload_dump,
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index 649edbe77a20..2efbe78de3b2 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -20,7 +20,7 @@
#include <net/ip.h>
struct nft_fwd_netdev {
- enum nft_registers sreg_dev:8;
+ u8 sreg_dev;
};
static void nft_fwd_netdev_eval(const struct nft_expr *expr,
@@ -49,8 +49,8 @@ static int nft_fwd_netdev_init(const struct nft_ctx *ctx,
if (tb[NFTA_FWD_SREG_DEV] == NULL)
return -EINVAL;
- priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]);
- return nft_validate_register_load(priv->sreg_dev, sizeof(int));
+ return nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
+ sizeof(int));
}
static const struct nft_expr_ops nft_fwd_netdev_ingress_ops;
@@ -69,8 +69,8 @@ nla_put_failure:
}
struct nft_fwd_neigh {
- enum nft_registers sreg_dev:8;
- enum nft_registers sreg_addr:8;
+ u8 sreg_dev;
+ u8 sreg_addr;
u8 nfproto;
};
@@ -129,6 +129,7 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr,
return;
skb->dev = dev;
+ skb->tstamp = 0;
neigh_xmit(neigh_table, dev, addr, skb);
out:
regs->verdict.code = verdict;
@@ -147,8 +148,6 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx,
!tb[NFTA_FWD_NFPROTO])
return -EINVAL;
- priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]);
- priv->sreg_addr = nft_parse_register(tb[NFTA_FWD_SREG_ADDR]);
priv->nfproto = ntohl(nla_get_be32(tb[NFTA_FWD_NFPROTO]));
switch (priv->nfproto) {
@@ -162,11 +161,13 @@ static int nft_fwd_neigh_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- err = nft_validate_register_load(priv->sreg_dev, sizeof(int));
+ err = nft_parse_register_load(tb[NFTA_FWD_SREG_DEV], &priv->sreg_dev,
+ sizeof(int));
if (err < 0)
return err;
- return nft_validate_register_load(priv->sreg_addr, addr_len);
+ return nft_parse_register_load(tb[NFTA_FWD_SREG_ADDR], &priv->sreg_addr,
+ addr_len);
}
static const struct nft_expr_ops nft_fwd_netdev_ingress_ops;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index b8f23f75aea6..513419aca9c6 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -18,8 +18,8 @@
#include <linux/jhash.h>
struct nft_jhash {
- enum nft_registers sreg:8;
- enum nft_registers dreg:8;
+ u8 sreg;
+ u8 dreg;
u8 len;
bool autogen_seed:1;
u32 modulus;
@@ -65,7 +65,7 @@ static void nft_jhash_map_eval(const struct nft_expr *expr,
}
struct nft_symhash {
- enum nft_registers dreg:8;
+ u8 dreg;
u32 modulus;
u32 offset;
struct nft_set *map;
@@ -136,9 +136,6 @@ static int nft_jhash_init(const struct nft_ctx *ctx,
if (tb[NFTA_HASH_OFFSET])
priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET]));
- priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]);
- priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
-
err = nft_parse_u32_check(tb[NFTA_HASH_LEN], U8_MAX, &len);
if (err < 0)
return err;
@@ -147,6 +144,10 @@ static int nft_jhash_init(const struct nft_ctx *ctx,
priv->len = len;
+ err = nft_parse_register_load(tb[NFTA_HASH_SREG], &priv->sreg, len);
+ if (err < 0)
+ return err;
+
priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
if (priv->modulus < 1)
return -ERANGE;
@@ -161,9 +162,8 @@ static int nft_jhash_init(const struct nft_ctx *ctx,
get_random_bytes(&priv->seed, sizeof(priv->seed));
}
- return nft_validate_register_load(priv->sreg, len) &&
- nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, sizeof(u32));
+ return nft_parse_register_store(ctx, tb[NFTA_HASH_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, sizeof(u32));
}
static int nft_jhash_map_init(const struct nft_ctx *ctx,
@@ -193,8 +193,6 @@ static int nft_symhash_init(const struct nft_ctx *ctx,
if (tb[NFTA_HASH_OFFSET])
priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET]));
- priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
-
priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
if (priv->modulus < 1)
return -ERANGE;
@@ -202,8 +200,9 @@ static int nft_symhash_init(const struct nft_ctx *ctx,
if (priv->offset + priv->modulus - 1 < priv->offset)
return -EOVERFLOW;
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, sizeof(u32));
+ return nft_parse_register_store(ctx, tb[NFTA_HASH_DREG],
+ &priv->dreg, NULL, NFT_DATA_VALUE,
+ sizeof(u32));
}
static int nft_symhash_map_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 3f6d1d2a6281..af4e2a4bce93 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -50,9 +50,9 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
priv->dlen = desc.len;
- priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]);
- err = nft_validate_register_store(ctx, priv->dreg, &priv->data,
- desc.type, desc.len);
+ err = nft_parse_register_store(ctx, tb[NFTA_IMMEDIATE_DREG],
+ &priv->dreg, &priv->data, desc.type,
+ desc.len);
if (err < 0)
goto err1;
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 72f13a1144dd..a7bdc532479a 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -79,13 +79,13 @@ static int nft_limit_init(struct nft_limit *limit,
return -EOVERFLOW;
if (pkts) {
- tokens = div_u64(limit->nsecs, limit->rate) * limit->burst;
+ tokens = div64_u64(limit->nsecs, limit->rate) * limit->burst;
} else {
/* The token bucket size limits the number of tokens can be
* accumulated. tokens_max specifies the bucket size.
* tokens_max = unit * (rate + burst) / rate.
*/
- tokens = div_u64(limit->nsecs * (limit->rate + limit->burst),
+ tokens = div64_u64(limit->nsecs * (limit->rate + limit->burst),
limit->rate);
}
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 55754d9939b5..3c380fb32651 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -20,8 +20,8 @@
struct nft_lookup {
struct nft_set *set;
- enum nft_registers sreg:8;
- enum nft_registers dreg:8;
+ u8 sreg;
+ u8 dreg;
bool invert;
struct nft_set_binding binding;
};
@@ -76,8 +76,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (IS_ERR(set))
return PTR_ERR(set);
- priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]);
- err = nft_validate_register_load(priv->sreg, set->klen);
+ err = nft_parse_register_load(tb[NFTA_LOOKUP_SREG], &priv->sreg,
+ set->klen);
if (err < 0)
return err;
@@ -100,9 +100,9 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (!(set->flags & NFT_SET_MAP))
return -EINVAL;
- priv->dreg = nft_parse_register(tb[NFTA_LOOKUP_DREG]);
- err = nft_validate_register_store(ctx, priv->dreg, NULL,
- set->dtype, set->dlen);
+ err = nft_parse_register_store(ctx, tb[NFTA_LOOKUP_DREG],
+ &priv->dreg, NULL, set->dtype,
+ set->dlen);
if (err < 0)
return err;
} else if (set->flags & NFT_SET_MAP)
@@ -132,7 +132,7 @@ static void nft_lookup_activate(const struct nft_ctx *ctx,
{
struct nft_lookup *priv = nft_expr_priv(expr);
- priv->set->use++;
+ nf_tables_activate_set(ctx, priv->set);
}
static void nft_lookup_destroy(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 9d8655bc1bea..4ecfebc2fdc4 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -53,19 +53,15 @@ int nft_masq_init(const struct nft_ctx *ctx,
}
if (tb[NFTA_MASQ_REG_PROTO_MIN]) {
- priv->sreg_proto_min =
- nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MIN]);
-
- err = nft_validate_register_load(priv->sreg_proto_min, plen);
+ err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MIN],
+ &priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_MASQ_REG_PROTO_MAX]) {
- priv->sreg_proto_max =
- nft_parse_register(tb[NFTA_MASQ_REG_PROTO_MAX]);
-
- err = nft_validate_register_load(priv->sreg_proto_max,
- plen);
+ err = nft_parse_register_load(tb[NFTA_MASQ_REG_PROTO_MAX],
+ &priv->sreg_proto_max,
+ plen);
if (err < 0)
return err;
} else {
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 297fe7d97c18..061a29bd3066 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -30,8 +30,8 @@
struct nft_meta {
enum nft_meta_keys key:8;
union {
- enum nft_registers dreg:8;
- enum nft_registers sreg:8;
+ u8 dreg;
+ u8 sreg;
};
};
@@ -358,9 +358,8 @@ static int nft_meta_get_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- priv->dreg = nft_parse_register(tb[NFTA_META_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ return nft_parse_register_store(ctx, tb[NFTA_META_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
}
static int nft_meta_get_validate(const struct nft_ctx *ctx,
@@ -448,8 +447,7 @@ static int nft_meta_set_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- priv->sreg = nft_parse_register(tb[NFTA_META_SREG]);
- err = nft_validate_register_load(priv->sreg, len);
+ err = nft_parse_register_load(tb[NFTA_META_SREG], &priv->sreg, len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 3e82a7d0df2a..aa6149cc8c87 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -27,10 +27,10 @@
#include <net/ip.h>
struct nft_nat {
- enum nft_registers sreg_addr_min:8;
- enum nft_registers sreg_addr_max:8;
- enum nft_registers sreg_proto_min:8;
- enum nft_registers sreg_proto_max:8;
+ u8 sreg_addr_min;
+ u8 sreg_addr_max;
+ u8 sreg_proto_min;
+ u8 sreg_proto_max;
enum nf_nat_manip_type type:8;
u8 family;
u16 flags;
@@ -153,23 +153,22 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
alen = FIELD_SIZEOF(struct nf_nat_range, min_addr.ip6);
break;
default:
- return -EAFNOSUPPORT;
+ if (tb[NFTA_NAT_REG_ADDR_MIN])
+ return -EAFNOSUPPORT;
+ break;
}
priv->family = family;
if (tb[NFTA_NAT_REG_ADDR_MIN]) {
- priv->sreg_addr_min =
- nft_parse_register(tb[NFTA_NAT_REG_ADDR_MIN]);
- err = nft_validate_register_load(priv->sreg_addr_min, alen);
+ err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MIN],
+ &priv->sreg_addr_min, alen);
if (err < 0)
return err;
if (tb[NFTA_NAT_REG_ADDR_MAX]) {
- priv->sreg_addr_max =
- nft_parse_register(tb[NFTA_NAT_REG_ADDR_MAX]);
-
- err = nft_validate_register_load(priv->sreg_addr_max,
- alen);
+ err = nft_parse_register_load(tb[NFTA_NAT_REG_ADDR_MAX],
+ &priv->sreg_addr_max,
+ alen);
if (err < 0)
return err;
} else {
@@ -179,19 +178,15 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
if (tb[NFTA_NAT_REG_PROTO_MIN]) {
- priv->sreg_proto_min =
- nft_parse_register(tb[NFTA_NAT_REG_PROTO_MIN]);
-
- err = nft_validate_register_load(priv->sreg_proto_min, plen);
+ err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN],
+ &priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_NAT_REG_PROTO_MAX]) {
- priv->sreg_proto_max =
- nft_parse_register(tb[NFTA_NAT_REG_PROTO_MAX]);
-
- err = nft_validate_register_load(priv->sreg_proto_max,
- plen);
+ err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MAX],
+ &priv->sreg_proto_max,
+ plen);
if (err < 0)
return err;
} else {
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 3cc1b3dc3c3c..8ff82f17ecba 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -20,7 +20,7 @@
static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
struct nft_ng_inc {
- enum nft_registers dreg:8;
+ u8 dreg;
u32 modulus;
atomic_t counter;
u32 offset;
@@ -70,11 +70,10 @@ static int nft_ng_inc_init(const struct nft_ctx *ctx,
if (priv->offset + priv->modulus - 1 < priv->offset)
return -EOVERFLOW;
- priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
atomic_set(&priv->counter, priv->modulus - 1);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, sizeof(u32));
+ return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, sizeof(u32));
}
static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
@@ -104,7 +103,7 @@ static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr)
}
struct nft_ng_random {
- enum nft_registers dreg:8;
+ u8 dreg;
u32 modulus;
u32 offset;
};
@@ -144,10 +143,8 @@ static int nft_ng_random_init(const struct nft_ctx *ctx,
prandom_init_once(&nft_numgen_prandom_state);
- priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
-
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, sizeof(u32));
+ return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, sizeof(u32));
}
static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr)
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index bf92a40dd1b2..2401e9fa17c4 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -43,8 +43,10 @@ static int nft_objref_init(const struct nft_ctx *ctx,
if (IS_ERR(obj))
return -ENOENT;
+ if (!nft_use_inc(&obj->use))
+ return -EMFILE;
+
nft_objref_priv(expr) = obj;
- obj->use++;
return 0;
}
@@ -73,7 +75,7 @@ static void nft_objref_deactivate(const struct nft_ctx *ctx,
if (phase == NFT_TRANS_COMMIT)
return;
- obj->use--;
+ nft_use_dec(&obj->use);
}
static void nft_objref_activate(const struct nft_ctx *ctx,
@@ -81,7 +83,7 @@ static void nft_objref_activate(const struct nft_ctx *ctx,
{
struct nft_object *obj = nft_objref_priv(expr);
- obj->use++;
+ nft_use_inc_restore(&obj->use);
}
static struct nft_expr_type nft_objref_type;
@@ -97,7 +99,7 @@ static const struct nft_expr_ops nft_objref_ops = {
struct nft_objref_map {
struct nft_set *set;
- enum nft_registers sreg:8;
+ u8 sreg;
struct nft_set_binding binding;
};
@@ -139,8 +141,8 @@ static int nft_objref_map_init(const struct nft_ctx *ctx,
if (!(set->flags & NFT_SET_OBJECT))
return -EINVAL;
- priv->sreg = nft_parse_register(tb[NFTA_OBJREF_SET_SREG]);
- err = nft_validate_register_load(priv->sreg, set->klen);
+ err = nft_parse_register_load(tb[NFTA_OBJREF_SET_SREG], &priv->sreg,
+ set->klen);
if (err < 0)
return err;
@@ -182,7 +184,7 @@ static void nft_objref_map_activate(const struct nft_ctx *ctx,
{
struct nft_objref_map *priv = nft_expr_priv(expr);
- priv->set->use++;
+ nf_tables_activate_set(ctx, priv->set);
}
static void nft_objref_map_destroy(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c
index a003533ff4d9..af2ce7a8c587 100644
--- a/net/netfilter/nft_osf.c
+++ b/net/netfilter/nft_osf.c
@@ -5,7 +5,7 @@
#include <linux/netfilter/nfnetlink_osf.h>
struct nft_osf {
- enum nft_registers dreg:8;
+ u8 dreg;
};
static const struct nla_policy nft_osf_policy[NFTA_OSF_MAX + 1] = {
@@ -22,6 +22,11 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs,
struct tcphdr _tcph;
const char *os_name;
+ if (pkt->tprot != IPPROTO_TCP) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
tcp = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(struct tcphdr), &_tcph);
if (!tcp) {
@@ -50,9 +55,9 @@ static int nft_osf_init(const struct nft_ctx *ctx,
if (!tb[NFTA_OSF_DREG])
return -EINVAL;
- priv->dreg = nft_parse_register(tb[NFTA_OSF_DREG]);
- err = nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, NFT_OSF_MAXGENRELEN);
+ err = nft_parse_register_store(ctx, tb[NFTA_OSF_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE,
+ NFT_OSF_MAXGENRELEN);
if (err < 0)
return err;
@@ -76,9 +81,21 @@ static int nft_osf_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
- return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) |
- (1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_FORWARD));
+ unsigned int hooks;
+
+ switch (ctx->family) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ hooks = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_FORWARD);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
}
static struct nft_expr_type nft_osf_type;
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index b1a9f330a51f..0ef51c81ec94 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -84,7 +84,7 @@ static void nft_payload_eval(const struct nft_expr *expr,
switch (priv->base) {
case NFT_PAYLOAD_LL_HEADER:
- if (!skb_mac_header_was_set(skb))
+ if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) == 0)
goto err;
if (skb_vlan_tag_present(skb)) {
@@ -135,10 +135,10 @@ static int nft_payload_init(const struct nft_ctx *ctx,
priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
- priv->dreg = nft_parse_register(tb[NFTA_PAYLOAD_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, priv->len);
+ return nft_parse_register_store(ctx, tb[NFTA_PAYLOAD_DREG],
+ &priv->dreg, NULL, NFT_DATA_VALUE,
+ priv->len);
}
static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -194,6 +194,9 @@ static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
struct sk_buff *skb,
unsigned int *l4csum_offset)
{
+ if (pkt->xt.fragoff)
+ return -1;
+
switch (pkt->tprot) {
case IPPROTO_TCP:
*l4csum_offset = offsetof(struct tcphdr, check);
@@ -329,18 +332,23 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_payload_set *priv = nft_expr_priv(expr);
+ u32 csum_offset, csum_type = NFT_PAYLOAD_CSUM_NONE;
+ int err;
priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
- priv->sreg = nft_parse_register(tb[NFTA_PAYLOAD_SREG]);
if (tb[NFTA_PAYLOAD_CSUM_TYPE])
- priv->csum_type =
- ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
- if (tb[NFTA_PAYLOAD_CSUM_OFFSET])
- priv->csum_offset =
- ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET]));
+ csum_type = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
+ if (tb[NFTA_PAYLOAD_CSUM_OFFSET]) {
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_CSUM_OFFSET], U8_MAX,
+ &csum_offset);
+ if (err < 0)
+ return err;
+
+ priv->csum_offset = csum_offset;
+ }
if (tb[NFTA_PAYLOAD_CSUM_FLAGS]) {
u32 flags;
@@ -351,15 +359,17 @@ static int nft_payload_set_init(const struct nft_ctx *ctx,
priv->csum_flags = flags;
}
- switch (priv->csum_type) {
+ switch (csum_type) {
case NFT_PAYLOAD_CSUM_NONE:
case NFT_PAYLOAD_CSUM_INET:
break;
default:
return -EOPNOTSUPP;
}
+ priv->csum_type = csum_type;
- return nft_validate_register_load(priv->sreg, priv->len);
+ return nft_parse_register_load(tb[NFTA_PAYLOAD_SREG], &priv->sreg,
+ priv->len);
}
static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -395,6 +405,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
{
enum nft_payload_bases base;
unsigned int offset, len;
+ int err;
if (tb[NFTA_PAYLOAD_BASE] == NULL ||
tb[NFTA_PAYLOAD_OFFSET] == NULL ||
@@ -420,8 +431,13 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_PAYLOAD_DREG] == NULL)
return ERR_PTR(-EINVAL);
- offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
- len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U8_MAX, &offset);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_LEN], U8_MAX, &len);
+ if (err < 0)
+ return ERR_PTR(err);
if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) &&
base != NFT_PAYLOAD_LL_HEADER)
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 98613658d4ac..de5f1bda9d6f 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -22,10 +22,10 @@
static u32 jhash_initval __read_mostly;
struct nft_queue {
- enum nft_registers sreg_qnum:8;
- u16 queuenum;
- u16 queues_total;
- u16 flags;
+ u8 sreg_qnum;
+ u16 queuenum;
+ u16 queues_total;
+ u16 flags;
};
static void nft_queue_eval(const struct nft_expr *expr,
@@ -114,8 +114,8 @@ static int nft_queue_sreg_init(const struct nft_ctx *ctx,
struct nft_queue *priv = nft_expr_priv(expr);
int err;
- priv->sreg_qnum = nft_parse_register(tb[NFTA_QUEUE_SREG_QNUM]);
- err = nft_validate_register_load(priv->sreg_qnum, sizeof(u32));
+ err = nft_parse_register_load(tb[NFTA_QUEUE_SREG_QNUM],
+ &priv->sreg_qnum, sizeof(u32));
if (err < 0)
return err;
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index 2e1d2ec2f52a..a5f74e5b8184 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -18,7 +18,7 @@
struct nft_range_expr {
struct nft_data data_from;
struct nft_data data_to;
- enum nft_registers sreg:8;
+ u8 sreg;
u8 len;
enum nft_range_ops op:8;
};
@@ -90,8 +90,8 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
goto err2;
}
- priv->sreg = nft_parse_register(tb[NFTA_RANGE_SREG]);
- err = nft_validate_register_load(priv->sreg, desc_from.len);
+ err = nft_parse_register_load(tb[NFTA_RANGE_SREG], &priv->sreg,
+ desc_from.len);
if (err < 0)
goto err2;
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index c64cbe78dee7..08a05bd1e817 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -49,19 +49,15 @@ int nft_redir_init(const struct nft_ctx *ctx,
plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
- priv->sreg_proto_min =
- nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MIN]);
-
- err = nft_validate_register_load(priv->sreg_proto_min, plen);
+ err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN],
+ &priv->sreg_proto_min, plen);
if (err < 0)
return err;
if (tb[NFTA_REDIR_REG_PROTO_MAX]) {
- priv->sreg_proto_max =
- nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MAX]);
-
- err = nft_validate_register_load(priv->sreg_proto_max,
- plen);
+ err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MAX],
+ &priv->sreg_proto_max,
+ plen);
if (err < 0)
return err;
} else {
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 76dba9f6b6f6..edce109ef4b0 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -18,7 +18,7 @@
struct nft_rt {
enum nft_rt_keys key:8;
- enum nft_registers dreg:8;
+ u8 dreg;
};
static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skbdst)
@@ -134,9 +134,8 @@ static int nft_rt_get_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- priv->dreg = nft_parse_register(tb[NFTA_RT_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ return nft_parse_register_store(ctx, tb[NFTA_RT_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
}
static int nft_rt_get_dump(struct sk_buff *skb,
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 05118e03c3e4..0b8510a4185d 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -145,6 +145,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
/* Another cpu may race to insert the element with the same key */
if (prev) {
nft_set_elem_destroy(set, he, true);
+ atomic_dec(&set->nelems);
he = prev;
}
@@ -154,6 +155,7 @@ out:
err2:
nft_set_elem_destroy(set, he, true);
+ atomic_dec(&set->nelems);
err1:
return false;
}
@@ -392,9 +394,17 @@ static void nft_rhash_destroy(const struct nft_set *set)
(void *)set);
}
+/* Number of buckets is stored in u32, so cap our result to 1U<<31 */
+#define NFT_MAX_BUCKETS (1U << 31)
+
static u32 nft_hash_buckets(u32 size)
{
- return roundup_pow_of_two(size * 4 / 3);
+ u64 val = div_u64((u64)size * 4, 3);
+
+ if (val >= NFT_MAX_BUCKETS)
+ return NFT_MAX_BUCKETS;
+
+ return roundup_pow_of_two(val);
}
static bool nft_rhash_estimate(const struct nft_set_desc *desc, u32 features,
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 84d317418d18..9c7ec2ec1fcf 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -326,6 +326,8 @@ static void *nft_rbtree_deactivate(const struct net *net,
nft_rbtree_interval_end(this)) {
parent = parent->rb_right;
continue;
+ } else if (nft_set_elem_expired(&rbe->ext)) {
+ break;
} else if (!nft_set_elem_active(&rbe->ext, genmask)) {
parent = parent->rb_left;
continue;
@@ -375,23 +377,37 @@ static void nft_rbtree_gc(struct work_struct *work)
struct nft_rbtree *priv;
struct rb_node *node;
struct nft_set *set;
+ struct net *net;
+ u8 genmask;
priv = container_of(work, struct nft_rbtree, gc_work.work);
set = nft_set_container_of(priv);
+ net = read_pnet(&set->net);
+ genmask = nft_genmask_cur(net);
write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
rbe = rb_entry(node, struct nft_rbtree_elem, node);
+ if (!nft_set_elem_active(&rbe->ext, genmask))
+ continue;
+
+ /* elements are reversed in the rbtree for historical reasons,
+ * from highest to lowest value, that is why end element is
+ * always visited before the start element.
+ */
if (nft_rbtree_interval_end(rbe)) {
rbe_end = rbe;
continue;
}
if (!nft_set_elem_expired(&rbe->ext))
continue;
- if (nft_set_elem_mark_busy(&rbe->ext))
+
+ if (nft_set_elem_mark_busy(&rbe->ext)) {
+ rbe_end = NULL;
continue;
+ }
if (rbe_prev) {
rb_erase(&rbe_prev->node, &priv->root);
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 637ce3e8c575..7e4f7063f481 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -10,10 +10,36 @@
struct nft_socket {
enum nft_socket_keys key:8;
union {
- enum nft_registers dreg:8;
+ u8 dreg;
};
};
+static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt)
+{
+ const struct net_device *indev = nft_in(pkt);
+ const struct sk_buff *skb = pkt->skb;
+ struct sock *sk = NULL;
+
+ if (!indev)
+ return NULL;
+
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, indev);
+ break;
+#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
+ case NFPROTO_IPV6:
+ sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, indev);
+ break;
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return sk;
+}
+
static void nft_socket_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -27,20 +53,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
sk = NULL;
if (!sk)
- switch(nft_pf(pkt)) {
- case NFPROTO_IPV4:
- sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt));
- break;
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
- case NFPROTO_IPV6:
- sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt));
- break;
-#endif
- default:
- WARN_ON_ONCE(1);
- regs->verdict.code = NFT_BREAK;
- return;
- }
+ sk = nft_socket_do_lookup(pkt);
if (!sk) {
regs->verdict.code = NFT_BREAK;
@@ -106,9 +119,8 @@ static int nft_socket_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- priv->dreg = nft_parse_register(tb[NFTA_SOCKET_DREG]);
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ return nft_parse_register_store(ctx, tb[NFTA_SOCKET_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
}
static int nft_socket_dump(struct sk_buff *skb,
@@ -123,6 +135,16 @@ static int nft_socket_dump(struct sk_buff *skb,
return 0;
}
+static int nft_socket_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ return nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT));
+}
+
static struct nft_expr_type nft_socket_type;
static const struct nft_expr_ops nft_socket_ops = {
.type = &nft_socket_type,
@@ -130,6 +152,7 @@ static const struct nft_expr_ops nft_socket_ops = {
.eval = nft_socket_eval,
.init = nft_socket_init,
.dump = nft_socket_dump,
+ .validate = nft_socket_validate,
};
static struct nft_expr_type nft_socket_type __read_mostly = {
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index 95980154ef02..db780b5985ab 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -13,9 +13,9 @@
#endif
struct nft_tproxy {
- enum nft_registers sreg_addr:8;
- enum nft_registers sreg_port:8;
- u8 family;
+ u8 sreg_addr;
+ u8 sreg_port;
+ u8 family;
};
static void nft_tproxy_eval_v4(const struct nft_expr *expr,
@@ -30,6 +30,12 @@ static void nft_tproxy_eval_v4(const struct nft_expr *expr,
__be16 tport = 0;
struct sock *sk;
+ if (pkt->tprot != IPPROTO_TCP &&
+ pkt->tprot != IPPROTO_UDP) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
if (!hp) {
regs->verdict.code = NFT_BREAK;
@@ -91,7 +97,8 @@ static void nft_tproxy_eval_v6(const struct nft_expr *expr,
memset(&taddr, 0, sizeof(taddr));
- if (!pkt->tprot_set) {
+ if (pkt->tprot != IPPROTO_TCP &&
+ pkt->tprot != IPPROTO_UDP) {
regs->verdict.code = NFT_BREAK;
return;
}
@@ -247,15 +254,15 @@ static int nft_tproxy_init(const struct nft_ctx *ctx,
}
if (tb[NFTA_TPROXY_REG_ADDR]) {
- priv->sreg_addr = nft_parse_register(tb[NFTA_TPROXY_REG_ADDR]);
- err = nft_validate_register_load(priv->sreg_addr, alen);
+ err = nft_parse_register_load(tb[NFTA_TPROXY_REG_ADDR],
+ &priv->sreg_addr, alen);
if (err < 0)
return err;
}
if (tb[NFTA_TPROXY_REG_PORT]) {
- priv->sreg_port = nft_parse_register(tb[NFTA_TPROXY_REG_PORT]);
- err = nft_validate_register_load(priv->sreg_port, sizeof(u16));
+ err = nft_parse_register_load(tb[NFTA_TPROXY_REG_PORT],
+ &priv->sreg_port, sizeof(u16));
if (err < 0)
return err;
}
@@ -282,6 +289,13 @@ static int nft_tproxy_dump(struct sk_buff *skb,
return 0;
}
+static int nft_tproxy_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING);
+}
+
static struct nft_expr_type nft_tproxy_type;
static const struct nft_expr_ops nft_tproxy_ops = {
.type = &nft_tproxy_type,
@@ -289,6 +303,7 @@ static const struct nft_expr_ops nft_tproxy_ops = {
.eval = nft_tproxy_eval,
.init = nft_tproxy_init,
.dump = nft_tproxy_dump,
+ .validate = nft_tproxy_validate,
};
static struct nft_expr_type nft_tproxy_type __read_mostly = {
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 8ae948fd9dcf..ab69a34210a8 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -14,7 +14,7 @@
struct nft_tunnel {
enum nft_tunnel_keys key:8;
- enum nft_registers dreg:8;
+ u8 dreg;
};
static void nft_tunnel_get_eval(const struct nft_expr *expr,
@@ -72,10 +72,8 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
- priv->dreg = nft_parse_register(tb[NFTA_TUNNEL_DREG]);
-
- return nft_validate_register_store(ctx, priv->dreg, NULL,
- NFT_DATA_VALUE, len);
+ return nft_parse_register_store(ctx, tb[NFTA_TUNNEL_DREG], &priv->dreg,
+ NULL, NFT_DATA_VALUE, len);
}
static int nft_tunnel_get_dump(struct sk_buff *skb,
@@ -104,6 +102,7 @@ static const struct nft_expr_ops nft_tunnel_get_ops = {
static struct nft_expr_type nft_tunnel_type __read_mostly = {
.name = "tunnel",
+ .family = NFPROTO_NETDEV,
.ops = &nft_tunnel_get_ops,
.policy = nft_tunnel_policy,
.maxattr = NFTA_TUNNEL_MAX,
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 3bab89dbc371..5dd6f6ce92e6 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -335,6 +335,7 @@ static int match_revfn(u8 af, const char *name, u8 revision, int *bestp)
const struct xt_match *m;
int have_rev = 0;
+ mutex_lock(&xt[af].mutex);
list_for_each_entry(m, &xt[af].match, list) {
if (strcmp(m->name, name) == 0) {
if (m->revision > *bestp)
@@ -343,6 +344,7 @@ static int match_revfn(u8 af, const char *name, u8 revision, int *bestp)
have_rev = 1;
}
}
+ mutex_unlock(&xt[af].mutex);
if (af != NFPROTO_UNSPEC && !have_rev)
return match_revfn(NFPROTO_UNSPEC, name, revision, bestp);
@@ -355,6 +357,7 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp)
const struct xt_target *t;
int have_rev = 0;
+ mutex_lock(&xt[af].mutex);
list_for_each_entry(t, &xt[af].target, list) {
if (strcmp(t->name, name) == 0) {
if (t->revision > *bestp)
@@ -363,6 +366,7 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp)
have_rev = 1;
}
}
+ mutex_unlock(&xt[af].mutex);
if (af != NFPROTO_UNSPEC && !have_rev)
return target_revfn(NFPROTO_UNSPEC, name, revision, bestp);
@@ -376,12 +380,10 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target,
{
int have_rev, best = -1;
- mutex_lock(&xt[af].mutex);
if (target == 1)
have_rev = target_revfn(af, name, revision, &best);
else
have_rev = match_revfn(af, name, revision, &best);
- mutex_unlock(&xt[af].mutex);
/* Nothing at all? Return 0 to try loading module. */
if (best == -1) {
@@ -736,7 +738,7 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
{
const struct xt_match *match = m->u.kernel.match;
struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
- int pad, off = xt_compat_match_offset(match);
+ int off = xt_compat_match_offset(match);
u_int16_t msize = cm->u.user.match_size;
char name[sizeof(m->u.user.name)];
@@ -746,9 +748,6 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
match->compat_from_user(m->data, cm->data);
else
memcpy(m->data, cm->data, msize - sizeof(*cm));
- pad = XT_ALIGN(match->matchsize) - match->matchsize;
- if (pad > 0)
- memset(m->data + match->matchsize, 0, pad);
msize += off;
m->u.user.match_size = msize;
@@ -1119,7 +1118,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
{
const struct xt_target *target = t->u.kernel.target;
struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
- int pad, off = xt_compat_target_offset(target);
+ int off = xt_compat_target_offset(target);
u_int16_t tsize = ct->u.user.target_size;
char name[sizeof(t->u.user.name)];
@@ -1129,9 +1128,6 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
target->compat_from_user(t->data, ct->data);
else
memcpy(t->data, ct->data, tsize - sizeof(*ct));
- pad = XT_ALIGN(target->targetsize) - target->targetsize;
- if (pad > 0)
- memset(t->data + target->targetsize, 0, pad);
tsize += off;
t->u.user.target_size = tsize;
@@ -1392,7 +1388,7 @@ xt_replace_table(struct xt_table *table,
table->private = newinfo;
/* make sure all cpus see new ->private value */
- smp_wmb();
+ smp_mb();
/*
* Even though table entries have now been swapped, other CPU's
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 9e05c86ba5c4..932c0ae99bdc 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -118,6 +118,9 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
} cfg;
int ret;
+ if (strnlen(info->name, sizeof(est->name)) >= sizeof(est->name))
+ return -ENAMETOOLONG;
+
net_get_random_once(&jhash_rnd, sizeof(jhash_rnd));
mutex_lock(&xn->hash_lock);
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 4ad5fe27e08b..097534dbc622 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -30,10 +30,9 @@ MODULE_ALIAS("ip6t_SECMARK");
static u8 mode;
static unsigned int
-secmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
+secmark_tg(struct sk_buff *skb, const struct xt_secmark_target_info_v1 *info)
{
u32 secmark = 0;
- const struct xt_secmark_target_info *info = par->targinfo;
BUG_ON(info->mode != mode);
@@ -49,7 +48,7 @@ secmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
-static int checkentry_lsm(struct xt_secmark_target_info *info)
+static int checkentry_lsm(struct xt_secmark_target_info_v1 *info)
{
int err;
@@ -81,15 +80,15 @@ static int checkentry_lsm(struct xt_secmark_target_info *info)
return 0;
}
-static int secmark_tg_check(const struct xt_tgchk_param *par)
+static int
+secmark_tg_check(const char *table, struct xt_secmark_target_info_v1 *info)
{
- struct xt_secmark_target_info *info = par->targinfo;
int err;
- if (strcmp(par->table, "mangle") != 0 &&
- strcmp(par->table, "security") != 0) {
+ if (strcmp(table, "mangle") != 0 &&
+ strcmp(table, "security") != 0) {
pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n",
- par->table);
+ table);
return -EINVAL;
}
@@ -124,25 +123,76 @@ static void secmark_tg_destroy(const struct xt_tgdtor_param *par)
}
}
-static struct xt_target secmark_tg_reg __read_mostly = {
- .name = "SECMARK",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .checkentry = secmark_tg_check,
- .destroy = secmark_tg_destroy,
- .target = secmark_tg,
- .targetsize = sizeof(struct xt_secmark_target_info),
- .me = THIS_MODULE,
+static int secmark_tg_check_v0(const struct xt_tgchk_param *par)
+{
+ struct xt_secmark_target_info *info = par->targinfo;
+ struct xt_secmark_target_info_v1 newinfo = {
+ .mode = info->mode,
+ };
+ int ret;
+
+ memcpy(newinfo.secctx, info->secctx, SECMARK_SECCTX_MAX);
+
+ ret = secmark_tg_check(par->table, &newinfo);
+ info->secid = newinfo.secid;
+
+ return ret;
+}
+
+static unsigned int
+secmark_tg_v0(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_secmark_target_info *info = par->targinfo;
+ struct xt_secmark_target_info_v1 newinfo = {
+ .secid = info->secid,
+ };
+
+ return secmark_tg(skb, &newinfo);
+}
+
+static int secmark_tg_check_v1(const struct xt_tgchk_param *par)
+{
+ return secmark_tg_check(par->table, par->targinfo);
+}
+
+static unsigned int
+secmark_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ return secmark_tg(skb, par->targinfo);
+}
+
+static struct xt_target secmark_tg_reg[] __read_mostly = {
+ {
+ .name = "SECMARK",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = secmark_tg_check_v0,
+ .destroy = secmark_tg_destroy,
+ .target = secmark_tg_v0,
+ .targetsize = sizeof(struct xt_secmark_target_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "SECMARK",
+ .revision = 1,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = secmark_tg_check_v1,
+ .destroy = secmark_tg_destroy,
+ .target = secmark_tg_v1,
+ .targetsize = sizeof(struct xt_secmark_target_info_v1),
+ .usersize = offsetof(struct xt_secmark_target_info_v1, secid),
+ .me = THIS_MODULE,
+ },
};
static int __init secmark_tg_init(void)
{
- return xt_register_target(&secmark_tg_reg);
+ return xt_register_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg));
}
static void __exit secmark_tg_exit(void)
{
- xt_unregister_target(&secmark_tg_reg);
+ xt_unregister_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg));
}
module_init(secmark_tg_init);
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index bf7bba80e24c..226a317d52a0 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -90,4 +90,3 @@ MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
MODULE_DESCRIPTION("Passive OS fingerprint matching.");
MODULE_ALIAS("ipt_osf");
MODULE_ALIAS("ip6t_osf");
-MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 46686fb73784..0c101b25cacf 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -76,29 +76,54 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
*/
return false;
- filp = sk->sk_socket->file;
- if (filp == NULL)
+ read_lock_bh(&sk->sk_callback_lock);
+ filp = sk->sk_socket ? sk->sk_socket->file : NULL;
+ if (filp == NULL) {
+ read_unlock_bh(&sk->sk_callback_lock);
return ((info->match ^ info->invert) &
(XT_OWNER_UID | XT_OWNER_GID)) == 0;
+ }
if (info->match & XT_OWNER_UID) {
kuid_t uid_min = make_kuid(net->user_ns, info->uid_min);
kuid_t uid_max = make_kuid(net->user_ns, info->uid_max);
if ((uid_gte(filp->f_cred->fsuid, uid_min) &&
uid_lte(filp->f_cred->fsuid, uid_max)) ^
- !(info->invert & XT_OWNER_UID))
+ !(info->invert & XT_OWNER_UID)) {
+ read_unlock_bh(&sk->sk_callback_lock);
return false;
+ }
}
if (info->match & XT_OWNER_GID) {
+ unsigned int i, match = false;
kgid_t gid_min = make_kgid(net->user_ns, info->gid_min);
kgid_t gid_max = make_kgid(net->user_ns, info->gid_max);
- if ((gid_gte(filp->f_cred->fsgid, gid_min) &&
- gid_lte(filp->f_cred->fsgid, gid_max)) ^
- !(info->invert & XT_OWNER_GID))
+ struct group_info *gi = filp->f_cred->group_info;
+
+ if (gid_gte(filp->f_cred->fsgid, gid_min) &&
+ gid_lte(filp->f_cred->fsgid, gid_max))
+ match = true;
+
+ if (!match && (info->match & XT_OWNER_SUPPL_GROUPS) && gi) {
+ for (i = 0; i < gi->ngroups; ++i) {
+ kgid_t group = gi->gid[i];
+
+ if (gid_gte(group, gid_min) &&
+ gid_lte(group, gid_max)) {
+ match = true;
+ break;
+ }
+ }
+ }
+
+ if (match ^ !(info->invert & XT_OWNER_GID)) {
+ read_unlock_bh(&sk->sk_callback_lock);
return false;
+ }
}
+ read_unlock_bh(&sk->sk_callback_lock);
return true;
}
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 570144507df1..2dbf92346a7e 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -155,7 +155,8 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
/*
* Drop entries with timestamps older then 'time'.
*/
-static void recent_entry_reap(struct recent_table *t, unsigned long time)
+static void recent_entry_reap(struct recent_table *t, unsigned long time,
+ struct recent_entry *working, bool update)
{
struct recent_entry *e;
@@ -165,6 +166,12 @@ static void recent_entry_reap(struct recent_table *t, unsigned long time)
e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
/*
+ * Do not reap the entry which are going to be updated.
+ */
+ if (e == working && update)
+ return;
+
+ /*
* The last time stamp is the most recent.
*/
if (time_after(time, e->stamps[e->index-1]))
@@ -306,7 +313,8 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
/* info->seconds must be non-zero */
if (info->check_set & XT_RECENT_REAP)
- recent_entry_reap(t, time);
+ recent_entry_reap(t, time, e,
+ info->check_set & XT_RECENT_UPDATE && ret);
}
if (info->check_set & XT_RECENT_SET ||
@@ -558,7 +566,7 @@ recent_mt_proc_write(struct file *file, const char __user *input,
{
struct recent_table *t = PDE_DATA(file_inode(file));
struct recent_entry *e;
- char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")];
+ char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:255.255.255.255")];
const char *c = buf;
union nf_inet_addr addr = {};
u_int16_t family;
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 2d2fa1d53ea6..05495d3f47b8 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -149,6 +149,8 @@ static int sctp_mt_check(const struct xt_mtchk_param *par)
{
const struct xt_sctp_info *info = par->matchinfo;
+ if (info->flag_count > ARRAY_SIZE(info->flag_info))
+ return -EINVAL;
if (info->flags & ~XT_SCTP_VALID_FLAGS)
return -EINVAL;
if (info->invflags & ~XT_SCTP_VALID_FLAGS)
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index a95b50342dbb..58ba402bc0b0 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -95,11 +95,32 @@ static bool u32_mt(const struct sk_buff *skb, struct xt_action_param *par)
return ret ^ data->invert;
}
+static int u32_mt_checkentry(const struct xt_mtchk_param *par)
+{
+ const struct xt_u32 *data = par->matchinfo;
+ const struct xt_u32_test *ct;
+ unsigned int i;
+
+ if (data->ntests > ARRAY_SIZE(data->tests))
+ return -EINVAL;
+
+ for (i = 0; i < data->ntests; ++i) {
+ ct = &data->tests[i];
+
+ if (ct->nnums > ARRAY_SIZE(ct->location) ||
+ ct->nvalues > ARRAY_SIZE(ct->value))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static struct xt_match xt_u32_mt_reg __read_mostly = {
.name = "u32",
.revision = 0,
.family = NFPROTO_UNSPEC,
.match = u32_mt,
+ .checkentry = u32_mt_checkentry,
.matchsize = sizeof(struct xt_u32),
.me = THIS_MODULE,
};
diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c
index 4d748975117d..a0b7269cf190 100644
--- a/net/netlabel/netlabel_calipso.c
+++ b/net/netlabel/netlabel_calipso.c
@@ -68,6 +68,28 @@ static const struct nla_policy calipso_genl_policy[NLBL_CALIPSO_A_MAX + 1] = {
[NLBL_CALIPSO_A_MTYPE] = { .type = NLA_U32 },
};
+static const struct netlbl_calipso_ops *calipso_ops;
+
+/**
+ * netlbl_calipso_ops_register - Register the CALIPSO operations
+ * @ops: ops to register
+ *
+ * Description:
+ * Register the CALIPSO packet engine operations.
+ *
+ */
+const struct netlbl_calipso_ops *
+netlbl_calipso_ops_register(const struct netlbl_calipso_ops *ops)
+{
+ return xchg(&calipso_ops, ops);
+}
+EXPORT_SYMBOL(netlbl_calipso_ops_register);
+
+static const struct netlbl_calipso_ops *netlbl_calipso_ops_get(void)
+{
+ return READ_ONCE(calipso_ops);
+}
+
/* NetLabel Command Handlers
*/
/**
@@ -110,16 +132,19 @@ static int netlbl_calipso_add_pass(struct genl_info *info,
*
*/
static int netlbl_calipso_add(struct sk_buff *skb, struct genl_info *info)
-
{
int ret_val = -EINVAL;
struct netlbl_audit audit_info;
+ const struct netlbl_calipso_ops *ops = netlbl_calipso_ops_get();
if (!info->attrs[NLBL_CALIPSO_A_DOI] ||
!info->attrs[NLBL_CALIPSO_A_MTYPE])
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ if (!ops)
+ return -EOPNOTSUPP;
+
+ netlbl_netlink_auditinfo(&audit_info);
switch (nla_get_u32(info->attrs[NLBL_CALIPSO_A_MTYPE])) {
case CALIPSO_MAP_PASS:
ret_val = netlbl_calipso_add_pass(info, &audit_info);
@@ -301,7 +326,7 @@ static int netlbl_calipso_remove(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[NLBL_CALIPSO_A_DOI])
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
cb_arg.doi = nla_get_u32(info->attrs[NLBL_CALIPSO_A_DOI]);
cb_arg.audit_info = &audit_info;
ret_val = netlbl_domhsh_walk(&skip_bkt, &skip_chain,
@@ -375,27 +400,6 @@ int __init netlbl_calipso_genl_init(void)
return genl_register_family(&netlbl_calipso_gnl_family);
}
-static const struct netlbl_calipso_ops *calipso_ops;
-
-/**
- * netlbl_calipso_ops_register - Register the CALIPSO operations
- *
- * Description:
- * Register the CALIPSO packet engine operations.
- *
- */
-const struct netlbl_calipso_ops *
-netlbl_calipso_ops_register(const struct netlbl_calipso_ops *ops)
-{
- return xchg(&calipso_ops, ops);
-}
-EXPORT_SYMBOL(netlbl_calipso_ops_register);
-
-static const struct netlbl_calipso_ops *netlbl_calipso_ops_get(void)
-{
- return READ_ONCE(calipso_ops);
-}
-
/**
* calipso_doi_add - Add a new DOI to the CALIPSO protocol engine
* @doi_def: the DOI structure
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index 9aacf2da3d98..a0a145db3fc7 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -156,8 +156,8 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
return -ENOMEM;
doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL);
if (doi_def->map.std == NULL) {
- ret_val = -ENOMEM;
- goto add_std_failure;
+ kfree(doi_def);
+ return -ENOMEM;
}
doi_def->type = CIPSO_V4_MAP_TRANS;
@@ -198,14 +198,14 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
}
doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size,
sizeof(u32),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (doi_def->map.std->lvl.local == NULL) {
ret_val = -ENOMEM;
goto add_std_failure;
}
doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size,
sizeof(u32),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (doi_def->map.std->lvl.cipso == NULL) {
ret_val = -ENOMEM;
goto add_std_failure;
@@ -273,7 +273,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
doi_def->map.std->cat.local = kcalloc(
doi_def->map.std->cat.local_size,
sizeof(u32),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (doi_def->map.std->cat.local == NULL) {
ret_val = -ENOMEM;
goto add_std_failure;
@@ -281,7 +281,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info,
doi_def->map.std->cat.cipso = kcalloc(
doi_def->map.std->cat.cipso_size,
sizeof(u32),
- GFP_KERNEL);
+ GFP_KERNEL | __GFP_NOWARN);
if (doi_def->map.std->cat.cipso == NULL) {
ret_val = -ENOMEM;
goto add_std_failure;
@@ -420,7 +420,7 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info)
!info->attrs[NLBL_CIPSOV4_A_MTYPE])
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
switch (nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE])) {
case CIPSO_V4_MAP_TRANS:
ret_val = netlbl_cipsov4_add_std(info, &audit_info);
@@ -581,6 +581,7 @@ list_start:
break;
}
+ cipso_v4_doi_putdef(doi_def);
rcu_read_unlock();
genlmsg_end(ans_skb, data);
@@ -589,12 +590,14 @@ list_start:
list_retry:
/* XXX - this limit is a guesstimate */
if (nlsze_mult < 4) {
+ cipso_v4_doi_putdef(doi_def);
rcu_read_unlock();
kfree_skb(ans_skb);
nlsze_mult *= 2;
goto list_start;
}
list_failure_lock:
+ cipso_v4_doi_putdef(doi_def);
rcu_read_unlock();
list_failure:
kfree_skb(ans_skb);
@@ -712,7 +715,7 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[NLBL_CIPSOV4_A_DOI])
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
cb_arg.doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]);
cb_arg.audit_info = &audit_info;
ret_val = netlbl_domhsh_walk(&skip_bkt, &skip_chain,
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 15fe2120b310..14c3d640f94b 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -871,7 +871,8 @@ int netlbl_catmap_setlong(struct netlbl_lsm_catmap **catmap,
offset -= iter->startbit;
idx = offset / NETLBL_CATMAP_MAPSIZE;
- iter->bitmap[idx] |= bitmap << (offset % NETLBL_CATMAP_MAPSIZE);
+ iter->bitmap[idx] |= (NETLBL_CATMAP_MAPTYPE)bitmap
+ << (offset % NETLBL_CATMAP_MAPSIZE);
return 0;
}
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 21e0095b1d14..43c51242dcd2 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -90,6 +90,7 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = {
static int netlbl_mgmt_add_common(struct genl_info *info,
struct netlbl_audit *audit_info)
{
+ void *pmap = NULL;
int ret_val = -EINVAL;
struct netlbl_domaddr_map *addrmap = NULL;
struct cipso_v4_doi *cipsov4 = NULL;
@@ -189,6 +190,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
ret_val = -ENOMEM;
goto add_free_addrmap;
}
+ pmap = map;
map->list.addr = addr->s_addr & mask->s_addr;
map->list.mask = mask->s_addr;
map->list.valid = 1;
@@ -197,10 +199,8 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
map->def.cipso = cipsov4;
ret_val = netlbl_af4list_add(&map->list, &addrmap->list4);
- if (ret_val != 0) {
- kfree(map);
- goto add_free_addrmap;
- }
+ if (ret_val != 0)
+ goto add_free_map;
entry->family = AF_INET;
entry->def.type = NETLBL_NLTYPE_ADDRSELECT;
@@ -237,6 +237,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
ret_val = -ENOMEM;
goto add_free_addrmap;
}
+ pmap = map;
map->list.addr = *addr;
map->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
map->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
@@ -249,10 +250,8 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
map->def.calipso = calipso;
ret_val = netlbl_af6list_add(&map->list, &addrmap->list6);
- if (ret_val != 0) {
- kfree(map);
- goto add_free_addrmap;
- }
+ if (ret_val != 0)
+ goto add_free_map;
entry->family = AF_INET6;
entry->def.type = NETLBL_NLTYPE_ADDRSELECT;
@@ -262,10 +261,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
ret_val = netlbl_domhsh_add(entry, audit_info);
if (ret_val != 0)
- goto add_free_addrmap;
+ goto add_free_map;
return 0;
+add_free_map:
+ kfree(pmap);
add_free_addrmap:
kfree(addrmap);
add_doi_put_def:
@@ -446,7 +447,7 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info)
(info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL)))
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
return netlbl_mgmt_add_common(info, &audit_info);
}
@@ -469,7 +470,7 @@ static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[NLBL_MGMT_A_DOMAIN])
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
domain = nla_data(info->attrs[NLBL_MGMT_A_DOMAIN]);
return netlbl_domhsh_remove(domain, AF_UNSPEC, &audit_info);
@@ -569,7 +570,7 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info)
(info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL)))
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
return netlbl_mgmt_add_common(info, &audit_info);
}
@@ -588,7 +589,7 @@ static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info)
{
struct netlbl_audit audit_info;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
return netlbl_domhsh_remove_default(AF_UNSPEC, &audit_info);
}
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index c92894c3e40a..ff52ff2278ed 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -827,7 +827,7 @@ static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) {
value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]);
if (value == 1 || value == 0) {
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
netlbl_unlabel_acceptflg_set(value, &audit_info);
return 0;
}
@@ -910,7 +910,7 @@ static int netlbl_unlabel_staticadd(struct sk_buff *skb,
!info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
if (ret_val != 0)
@@ -960,7 +960,7 @@ static int netlbl_unlabel_staticadddef(struct sk_buff *skb,
!info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
if (ret_val != 0)
@@ -1007,7 +1007,7 @@ static int netlbl_unlabel_staticremove(struct sk_buff *skb,
!info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
if (ret_val != 0)
@@ -1047,7 +1047,7 @@ static int netlbl_unlabel_staticremovedef(struct sk_buff *skb,
!info->attrs[NLBL_UNLABEL_A_IPV6MASK])))
return -EINVAL;
- netlbl_netlink_auditinfo(skb, &audit_info);
+ netlbl_netlink_auditinfo(&audit_info);
ret_val = netlbl_unlabel_addrinfo_get(info, &addr, &mask, &addr_len);
if (ret_val != 0)
@@ -1179,12 +1179,13 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
struct netlbl_unlhsh_walk_arg cb_arg;
u32 skip_bkt = cb->args[0];
u32 skip_chain = cb->args[1];
- u32 iter_bkt;
- u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0;
+ u32 skip_addr4 = cb->args[2];
+ u32 iter_bkt, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0;
struct netlbl_unlhsh_iface *iface;
struct list_head *iter_list;
struct netlbl_af4list *addr4;
#if IS_ENABLED(CONFIG_IPV6)
+ u32 skip_addr6 = cb->args[3];
struct netlbl_af6list *addr6;
#endif
@@ -1195,7 +1196,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
rcu_read_lock();
for (iter_bkt = skip_bkt;
iter_bkt < rcu_dereference(netlbl_unlhsh)->size;
- iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) {
+ iter_bkt++) {
iter_list = &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt];
list_for_each_entry_rcu(iface, iter_list, list) {
if (!iface->valid ||
@@ -1203,7 +1204,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
continue;
netlbl_af4list_foreach_rcu(addr4,
&iface->addr4_list) {
- if (iter_addr4++ < cb->args[2])
+ if (iter_addr4++ < skip_addr4)
continue;
if (netlbl_unlabel_staticlist_gen(
NLBL_UNLABEL_C_STATICLIST,
@@ -1216,10 +1217,12 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
goto unlabel_staticlist_return;
}
}
+ iter_addr4 = 0;
+ skip_addr4 = 0;
#if IS_ENABLED(CONFIG_IPV6)
netlbl_af6list_foreach_rcu(addr6,
&iface->addr6_list) {
- if (iter_addr6++ < cb->args[3])
+ if (iter_addr6++ < skip_addr6)
continue;
if (netlbl_unlabel_staticlist_gen(
NLBL_UNLABEL_C_STATICLIST,
@@ -1232,8 +1235,12 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
goto unlabel_staticlist_return;
}
}
+ iter_addr6 = 0;
+ skip_addr6 = 0;
#endif /* IPv6 */
}
+ iter_chain = 0;
+ skip_chain = 0;
}
unlabel_staticlist_return:
diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h
index 4a397cde1a48..2c608677b43b 100644
--- a/net/netlabel/netlabel_user.h
+++ b/net/netlabel/netlabel_user.h
@@ -42,11 +42,9 @@
/**
* netlbl_netlink_auditinfo - Fetch the audit information from a NETLINK msg
- * @skb: the packet
* @audit_info: NetLabel audit information
*/
-static inline void netlbl_netlink_auditinfo(struct sk_buff *skb,
- struct netlbl_audit *audit_info)
+static inline void netlbl_netlink_auditinfo(struct netlbl_audit *audit_info)
{
security_task_getsecid(current, &audit_info->secid);
audit_info->loginuid = audit_get_loginuid(current);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 1bb9f219f07d..e91489b3274c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -152,6 +152,8 @@ static const struct rhashtable_params netlink_rhashtable_params;
static inline u32 netlink_group_mask(u32 group)
{
+ if (group > 32)
+ return 0;
return group ? 1 << (group - 1) : 0;
}
@@ -461,11 +463,13 @@ void netlink_table_ungrab(void)
static inline void
netlink_lock_table(void)
{
+ unsigned long flags;
+
/* read_lock() synchronizes us to netlink_table_grab */
- read_lock(&nl_table_lock);
+ read_lock_irqsave(&nl_table_lock, flags);
atomic_inc(&nl_table_users);
- read_unlock(&nl_table_lock);
+ read_unlock_irqrestore(&nl_table_lock, flags);
}
static inline void
@@ -574,12 +578,9 @@ static int netlink_insert(struct sock *sk, u32 portid)
if (nlk_sk(sk)->bound)
goto err;
- err = -ENOMEM;
- if (BITS_PER_LONG > 32 &&
- unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX))
- goto err;
+ /* portid can be read locklessly from netlink_getname(). */
+ WRITE_ONCE(nlk_sk(sk)->portid, portid);
- nlk_sk(sk)->portid = portid;
sock_hold(sk);
err = __netlink_insert(table, sk);
@@ -597,7 +598,10 @@ static int netlink_insert(struct sock *sk, u32 portid)
/* We need to ensure that the socket is hashed and visible. */
smp_wmb();
- nlk_sk(sk)->bound = portid;
+ /* Paired with lockless reads from netlink_bind(),
+ * netlink_connect() and netlink_sendmsg().
+ */
+ WRITE_ONCE(nlk_sk(sk)->bound, portid);
err:
release_sock(sk);
@@ -1016,7 +1020,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
else if (nlk->ngroups < 8*sizeof(groups))
groups &= (1UL << nlk->ngroups) - 1;
- bound = nlk->bound;
+ /* Paired with WRITE_ONCE() in netlink_insert() */
+ bound = READ_ONCE(nlk->bound);
if (bound) {
/* Ensure nlk->portid is up-to-date. */
smp_rmb();
@@ -1025,7 +1030,6 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
return -EINVAL;
}
- netlink_lock_table();
if (nlk->netlink_bind && groups) {
int group;
@@ -1037,13 +1041,14 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
if (!err)
continue;
netlink_undo_bind(group, groups, sk);
- goto unlock;
+ return err;
}
}
/* No need for barriers here as we return to user-space without
* using any of the bound attributes.
*/
+ netlink_lock_table();
if (!bound) {
err = nladdr->nl_pid ?
netlink_insert(sk, nladdr->nl_pid) :
@@ -1085,9 +1090,11 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
return -EINVAL;
if (addr->sa_family == AF_UNSPEC) {
- sk->sk_state = NETLINK_UNCONNECTED;
- nlk->dst_portid = 0;
- nlk->dst_group = 0;
+ /* paired with READ_ONCE() in netlink_getsockbyportid() */
+ WRITE_ONCE(sk->sk_state, NETLINK_UNCONNECTED);
+ /* dst_portid and dst_group can be read locklessly */
+ WRITE_ONCE(nlk->dst_portid, 0);
+ WRITE_ONCE(nlk->dst_group, 0);
return 0;
}
if (addr->sa_family != AF_NETLINK)
@@ -1102,14 +1109,17 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
/* No need for barriers here as we return to user-space without
* using any of the bound attributes.
+ * Paired with WRITE_ONCE() in netlink_insert().
*/
- if (!nlk->bound)
+ if (!READ_ONCE(nlk->bound))
err = netlink_autobind(sock);
if (err == 0) {
- sk->sk_state = NETLINK_CONNECTED;
- nlk->dst_portid = nladdr->nl_pid;
- nlk->dst_group = ffs(nladdr->nl_groups);
+ /* paired with READ_ONCE() in netlink_getsockbyportid() */
+ WRITE_ONCE(sk->sk_state, NETLINK_CONNECTED);
+ /* dst_portid and dst_group can be read locklessly */
+ WRITE_ONCE(nlk->dst_portid, nladdr->nl_pid);
+ WRITE_ONCE(nlk->dst_group, ffs(nladdr->nl_groups));
}
return err;
@@ -1126,10 +1136,12 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
nladdr->nl_pad = 0;
if (peer) {
- nladdr->nl_pid = nlk->dst_portid;
- nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
+ /* Paired with WRITE_ONCE() in netlink_connect() */
+ nladdr->nl_pid = READ_ONCE(nlk->dst_portid);
+ nladdr->nl_groups = netlink_group_mask(READ_ONCE(nlk->dst_group));
} else {
- nladdr->nl_pid = nlk->portid;
+ /* Paired with WRITE_ONCE() in netlink_insert() */
+ nladdr->nl_pid = READ_ONCE(nlk->portid);
netlink_lock_table();
nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
netlink_unlock_table();
@@ -1156,8 +1168,9 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
/* Don't bother queuing skb if kernel socket has no input function */
nlk = nlk_sk(sock);
- if (sock->sk_state == NETLINK_CONNECTED &&
- nlk->dst_portid != nlk_sk(ssk)->portid) {
+ /* dst_portid and sk_state can be changed in netlink_connect() */
+ if (READ_ONCE(sock->sk_state) == NETLINK_CONNECTED &&
+ READ_ONCE(nlk->dst_portid) != nlk_sk(ssk)->portid) {
sock_put(sock);
return ERR_PTR(-ECONNREFUSED);
}
@@ -1590,6 +1603,7 @@ out:
int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
{
struct netlink_set_err_data info;
+ unsigned long flags;
struct sock *sk;
int ret = 0;
@@ -1599,12 +1613,12 @@ int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
/* sk->sk_err wants a positive error value */
info.code = -code;
- read_lock(&nl_table_lock);
+ read_lock_irqsave(&nl_table_lock, flags);
sk_for_each_bound(sk, &nl_table[ssk->sk_protocol].mc_list)
ret += do_one_set_err(sk, &info);
- read_unlock(&nl_table_lock);
+ read_unlock_irqrestore(&nl_table_lock, flags);
return ret;
}
EXPORT_SYMBOL(netlink_set_err);
@@ -1725,7 +1739,8 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
{
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
- int len, val, err;
+ unsigned int flag;
+ int len, val;
if (level != SOL_NETLINK)
return -ENOPROTOOPT;
@@ -1737,39 +1752,17 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case NETLINK_PKTINFO:
- if (len < sizeof(int))
- return -EINVAL;
- len = sizeof(int);
- val = nlk->flags & NETLINK_F_RECV_PKTINFO ? 1 : 0;
- if (put_user(len, optlen) ||
- put_user(val, optval))
- return -EFAULT;
- err = 0;
+ flag = NETLINK_F_RECV_PKTINFO;
break;
case NETLINK_BROADCAST_ERROR:
- if (len < sizeof(int))
- return -EINVAL;
- len = sizeof(int);
- val = nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR ? 1 : 0;
- if (put_user(len, optlen) ||
- put_user(val, optval))
- return -EFAULT;
- err = 0;
+ flag = NETLINK_F_BROADCAST_SEND_ERROR;
break;
case NETLINK_NO_ENOBUFS:
- if (len < sizeof(int))
- return -EINVAL;
- len = sizeof(int);
- val = nlk->flags & NETLINK_F_RECV_NO_ENOBUFS ? 1 : 0;
- if (put_user(len, optlen) ||
- put_user(val, optval))
- return -EFAULT;
- err = 0;
+ flag = NETLINK_F_RECV_NO_ENOBUFS;
break;
case NETLINK_LIST_MEMBERSHIPS: {
- int pos, idx, shift;
+ int pos, idx, shift, err = 0;
- err = 0;
netlink_lock_table();
for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) {
if (len - pos < sizeof(u32))
@@ -1783,34 +1776,32 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
break;
}
}
- if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen))
+ if (put_user(ALIGN(BITS_TO_BYTES(nlk->ngroups), sizeof(u32)), optlen))
err = -EFAULT;
netlink_unlock_table();
- break;
+ return err;
}
case NETLINK_CAP_ACK:
- if (len < sizeof(int))
- return -EINVAL;
- len = sizeof(int);
- val = nlk->flags & NETLINK_F_CAP_ACK ? 1 : 0;
- if (put_user(len, optlen) ||
- put_user(val, optval))
- return -EFAULT;
- err = 0;
+ flag = NETLINK_F_CAP_ACK;
break;
case NETLINK_EXT_ACK:
- if (len < sizeof(int))
- return -EINVAL;
- len = sizeof(int);
- val = nlk->flags & NETLINK_F_EXT_ACK ? 1 : 0;
- if (put_user(len, optlen) || put_user(val, optval))
- return -EFAULT;
- err = 0;
+ flag = NETLINK_F_EXT_ACK;
break;
default:
- err = -ENOPROTOOPT;
+ return -ENOPROTOOPT;
}
- return err;
+
+ if (len < sizeof(int))
+ return -EINVAL;
+
+ len = sizeof(int);
+ val = nlk->flags & flag ? 1 : 0;
+
+ if (put_user(len, optlen) ||
+ copy_to_user(optval, &val, len))
+ return -EFAULT;
+
+ return 0;
}
static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
@@ -1846,6 +1837,11 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (msg->msg_flags&MSG_OOB)
return -EOPNOTSUPP;
+ if (len == 0) {
+ pr_warn_once("Zero length message leads to an empty skb\n");
+ return -ENODATA;
+ }
+
err = scm_send(sock, msg, &scm, true);
if (err < 0)
return err;
@@ -1864,11 +1860,13 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
goto out;
netlink_skb_flags |= NETLINK_SKB_DST;
} else {
- dst_portid = nlk->dst_portid;
- dst_group = nlk->dst_group;
+ /* Paired with WRITE_ONCE() in netlink_connect() */
+ dst_portid = READ_ONCE(nlk->dst_portid);
+ dst_group = READ_ONCE(nlk->dst_group);
}
- if (!nlk->bound) {
+ /* Paired with WRITE_ONCE() in netlink_insert() */
+ if (!READ_ONCE(nlk->bound)) {
err = netlink_autobind(sock);
if (err)
goto out;
@@ -1963,7 +1961,6 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
copied = len;
}
- skb_reset_transport_header(data_skb);
err = skb_copy_datagram_msg(data_skb, 0, msg, copied);
if (msg->msg_name) {
@@ -1987,7 +1984,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
skb_free_datagram(sk, skb);
- if (nlk->cb_running &&
+ if (READ_ONCE(nlk->cb_running) &&
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
ret = netlink_dump(sk);
if (ret) {
@@ -2228,6 +2225,13 @@ static int netlink_dump(struct sock *sk)
* single netdev. The outcome is MSG_TRUNC error.
*/
skb_reserve(skb, skb_tailroom(skb) - alloc_size);
+
+ /* Make sure malicious BPF programs can not read unitialized memory
+ * from skb->head -> skb->data
+ */
+ skb_reset_network_header(skb);
+ skb_reset_mac_header(skb);
+
netlink_skb_set_owner_r(skb, sk);
if (nlk->dump_done_errno > 0)
@@ -2262,7 +2266,7 @@ static int netlink_dump(struct sock *sk)
if (cb->done)
cb->done(cb);
- nlk->cb_running = false;
+ WRITE_ONCE(nlk->cb_running, false);
module = cb->module;
skb = cb->skb;
mutex_unlock(nlk->cb_mutex);
@@ -2322,7 +2326,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
goto error_put;
}
- nlk->cb_running = true;
+ WRITE_ONCE(nlk->cb_running, true);
nlk->dump_done_errno = INT_MAX;
mutex_unlock(nlk->cb_mutex);
@@ -2496,13 +2500,15 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid,
/* errors reported via destination sk->sk_err, but propagate
* delivery errors if NETLINK_BROADCAST_ERROR flag is set */
err = nlmsg_multicast(sk, skb, exclude_portid, group, flags);
+ if (err == -ESRCH)
+ err = 0;
}
if (report) {
int err2;
err2 = nlmsg_unicast(sk, skb, portid);
- if (!err || err == -ESRCH)
+ if (!err)
err = err2;
}
@@ -2626,7 +2632,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
nlk->groups ? (u32)nlk->groups[0] : 0,
sk_rmem_alloc_get(s),
sk_wmem_alloc_get(s),
- nlk->cb_running,
+ READ_ONCE(nlk->cb_running),
refcount_read(&s->sk_refcnt),
atomic_read(&s->sk_drops),
sock_i_ino(s)
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index 7dda33b9b784..85ee4891c2c7 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -93,6 +93,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net *net = sock_net(skb->sk);
struct netlink_diag_req *req;
struct netlink_sock *nlsk;
+ unsigned long flags;
struct sock *sk;
int num = 2;
int ret = 0;
@@ -151,7 +152,7 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
num++;
mc_list:
- read_lock(&nl_table_lock);
+ read_lock_irqsave(&nl_table_lock, flags);
sk_for_each_bound(sk, &tbl->mc_list) {
if (sk_hashed(sk))
continue;
@@ -166,13 +167,13 @@ mc_list:
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI,
- sock_i_ino(sk)) < 0) {
+ __sock_i_ino(sk)) < 0) {
ret = 1;
break;
}
num++;
}
- read_unlock(&nl_table_lock);
+ read_unlock_irqrestore(&nl_table_lock, flags);
done:
cb->args[0] = num;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index ede73ecfb1f5..f449be93b375 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -961,11 +961,46 @@ static struct genl_family genl_ctrl __ro_after_init = {
.netnsok = true,
};
+static int genl_bind(struct net *net, int group)
+{
+ const struct genl_family *family;
+ unsigned int id;
+ int ret = 0;
+
+ genl_lock_all();
+
+ idr_for_each_entry(&genl_fam_idr, family, id) {
+ const struct genl_multicast_group *grp;
+ int i;
+
+ if (family->n_mcgrps == 0)
+ continue;
+
+ i = group - family->mcgrp_offset;
+ if (i < 0 || i >= family->n_mcgrps)
+ continue;
+
+ grp = &family->mcgrps[i];
+ if ((grp->flags & GENL_UNS_ADMIN_PERM) &&
+ !ns_capable(net->user_ns, CAP_NET_ADMIN))
+ ret = -EPERM;
+ if (grp->cap_sys_admin &&
+ !ns_capable(net->user_ns, CAP_SYS_ADMIN))
+ ret = -EPERM;
+
+ break;
+ }
+
+ genl_unlock_all();
+ return ret;
+}
+
static int __net_init genl_pernet_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
.input = genl_rcv,
.flags = NL_CFG_F_NONROOT_RECV,
+ .bind = genl_bind,
};
/* we'll bump the group number right afterwards */
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 43910e50752c..146550ce0ac6 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -403,6 +403,11 @@ static int nr_listen(struct socket *sock, int backlog)
struct sock *sk = sock->sk;
lock_sock(sk);
+ if (sock->state != SS_UNCONNECTED) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+
if (sk->sk_state != TCP_LISTEN) {
memset(&nr_sk(sk)->user_addr, 0, AX25_ADDR_LEN);
sk->sk_max_ack_backlog = backlog;
@@ -658,6 +663,11 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
goto out_release;
}
+ if (sock->state == SS_CONNECTING) {
+ err = -EALREADY;
+ goto out_release;
+ }
+
sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
index 029c8bb90f4c..a7d3a265befb 100644
--- a/net/netrom/nr_subr.c
+++ b/net/netrom/nr_subr.c
@@ -126,7 +126,7 @@ void nr_write_internal(struct sock *sk, int frametype)
unsigned char *dptr;
int len, timeout;
- len = NR_NETWORK_LEN + NR_TRANSPORT_LEN;
+ len = NR_TRANSPORT_LEN;
switch (frametype & 0x0F) {
case NR_CONNREQ:
@@ -144,7 +144,8 @@ void nr_write_internal(struct sock *sk, int frametype)
return;
}
- if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
+ skb = alloc_skb(NR_NETWORK_LEN + len, GFP_ATOMIC);
+ if (!skb)
return;
/*
@@ -152,7 +153,7 @@ void nr_write_internal(struct sock *sk, int frametype)
*/
skb_reserve(skb, NR_NETWORK_LEN);
- dptr = skb_put(skb, skb_tailroom(skb));
+ dptr = skb_put(skb, len);
switch (frametype & 0x0F) {
case NR_CONNREQ:
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index 908e53ab47a4..2bf99bd5be58 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -127,8 +127,7 @@ static void nr_heartbeat_expiry(struct timer_list *t)
sock_hold(sk);
bh_unlock_sock(sk);
nr_destroy_socket(sk);
- sock_put(sk);
- return;
+ goto out;
}
break;
@@ -149,6 +148,8 @@ static void nr_heartbeat_expiry(struct timer_list *t)
nr_start_heartbeat(sk);
bh_unlock_sock(sk);
+out:
+ sock_put(sk);
}
static void nr_t2timer_expiry(struct timer_list *t)
@@ -162,6 +163,7 @@ static void nr_t2timer_expiry(struct timer_list *t)
nr_enquiry_response(sk);
}
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void nr_t4timer_expiry(struct timer_list *t)
@@ -172,6 +174,7 @@ static void nr_t4timer_expiry(struct timer_list *t)
bh_lock_sock(sk);
nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY;
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void nr_idletimer_expiry(struct timer_list *t)
@@ -200,6 +203,7 @@ static void nr_idletimer_expiry(struct timer_list *t)
sock_set_flag(sk, SOCK_DEAD);
}
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void nr_t1timer_expiry(struct timer_list *t)
@@ -212,8 +216,7 @@ static void nr_t1timer_expiry(struct timer_list *t)
case NR_STATE_1:
if (nr->n2count == nr->n2) {
nr_disconnect(sk, ETIMEDOUT);
- bh_unlock_sock(sk);
- return;
+ goto out;
} else {
nr->n2count++;
nr_write_internal(sk, NR_CONNREQ);
@@ -223,8 +226,7 @@ static void nr_t1timer_expiry(struct timer_list *t)
case NR_STATE_2:
if (nr->n2count == nr->n2) {
nr_disconnect(sk, ETIMEDOUT);
- bh_unlock_sock(sk);
- return;
+ goto out;
} else {
nr->n2count++;
nr_write_internal(sk, NR_DISCREQ);
@@ -234,8 +236,7 @@ static void nr_t1timer_expiry(struct timer_list *t)
case NR_STATE_3:
if (nr->n2count == nr->n2) {
nr_disconnect(sk, ETIMEDOUT);
- bh_unlock_sock(sk);
- return;
+ goto out;
} else {
nr->n2count++;
nr_requeue_frames(sk);
@@ -244,5 +245,7 @@ static void nr_t1timer_expiry(struct timer_list *t)
}
nr_start_t1timer(sk);
+out:
bh_unlock_sock(sk);
+ sock_put(sk);
}
diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c
index d3e594eb36d0..adf16ff007cc 100644
--- a/net/nfc/af_nfc.c
+++ b/net/nfc/af_nfc.c
@@ -72,6 +72,9 @@ int nfc_proto_register(const struct nfc_protocol *nfc_proto)
proto_tab[nfc_proto->id] = nfc_proto;
write_unlock(&proto_tab_lock);
+ if (rc)
+ proto_unregister(nfc_proto->proto);
+
return rc;
}
EXPORT_SYMBOL(nfc_proto_register);
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 947a470f929d..dd12ee46ac73 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -50,7 +50,7 @@ int nfc_fw_download(struct nfc_dev *dev, const char *firmware_name)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -106,13 +106,13 @@ int nfc_dev_up(struct nfc_dev *dev)
device_lock(&dev->dev);
- if (dev->rfkill && rfkill_blocked(dev->rfkill)) {
- rc = -ERFKILL;
+ if (dev->shutting_down) {
+ rc = -ENODEV;
goto error;
}
- if (!device_is_registered(&dev->dev)) {
- rc = -ENODEV;
+ if (dev->rfkill && rfkill_blocked(dev->rfkill)) {
+ rc = -ERFKILL;
goto error;
}
@@ -154,7 +154,7 @@ int nfc_dev_down(struct nfc_dev *dev)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -218,7 +218,7 @@ int nfc_start_poll(struct nfc_dev *dev, u32 im_protocols, u32 tm_protocols)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -257,7 +257,7 @@ int nfc_stop_poll(struct nfc_dev *dev)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -302,7 +302,7 @@ int nfc_dep_link_up(struct nfc_dev *dev, int target_index, u8 comm_mode)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -346,7 +346,7 @@ int nfc_dep_link_down(struct nfc_dev *dev)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -412,7 +412,7 @@ int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -458,7 +458,7 @@ int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx, u8 mode)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -505,7 +505,7 @@ int nfc_data_exchange(struct nfc_dev *dev, u32 target_idx, struct sk_buff *skb,
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
kfree_skb(skb);
goto error;
@@ -562,7 +562,7 @@ int nfc_enable_se(struct nfc_dev *dev, u32 se_idx)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -611,7 +611,7 @@ int nfc_disable_se(struct nfc_dev *dev, u32 se_idx)
device_lock(&dev->dev);
- if (!device_is_registered(&dev->dev)) {
+ if (dev->shutting_down) {
rc = -ENODEV;
goto error;
}
@@ -646,7 +646,7 @@ error:
return rc;
}
-int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gb, u8 gb_len)
+int nfc_set_remote_general_bytes(struct nfc_dev *dev, const u8 *gb, u8 gb_len)
{
pr_debug("dev_name=%s gb_len=%d\n", dev_name(&dev->dev), gb_len);
@@ -675,7 +675,7 @@ int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb)
EXPORT_SYMBOL(nfc_tm_data_received);
int nfc_tm_activated(struct nfc_dev *dev, u32 protocol, u8 comm_mode,
- u8 *gb, size_t gb_len)
+ const u8 *gb, size_t gb_len)
{
int rc;
@@ -1130,11 +1130,7 @@ int nfc_register_device(struct nfc_dev *dev)
if (rc)
pr_err("Could not register llcp device\n");
- rc = nfc_genl_device_added(dev);
- if (rc)
- pr_debug("The userspace won't be notified that the device %s was added\n",
- dev_name(&dev->dev));
-
+ device_lock(&dev->dev);
dev->rfkill = rfkill_alloc(dev_name(&dev->dev), &dev->dev,
RFKILL_TYPE_NFC, &nfc_rfkill_ops, dev);
if (dev->rfkill) {
@@ -1143,6 +1139,13 @@ int nfc_register_device(struct nfc_dev *dev)
dev->rfkill = NULL;
}
}
+ dev->shutting_down = false;
+ device_unlock(&dev->dev);
+
+ rc = nfc_genl_device_added(dev);
+ if (rc)
+ pr_debug("The userspace won't be notified that the device %s was added\n",
+ dev_name(&dev->dev));
return 0;
}
@@ -1159,24 +1162,25 @@ void nfc_unregister_device(struct nfc_dev *dev)
pr_debug("dev_name=%s\n", dev_name(&dev->dev));
+ rc = nfc_genl_device_removed(dev);
+ if (rc)
+ pr_debug("The userspace won't be notified that the device %s "
+ "was removed\n", dev_name(&dev->dev));
+
+ device_lock(&dev->dev);
if (dev->rfkill) {
rfkill_unregister(dev->rfkill);
rfkill_destroy(dev->rfkill);
+ dev->rfkill = NULL;
}
+ dev->shutting_down = true;
+ device_unlock(&dev->dev);
if (dev->ops->check_presence) {
- device_lock(&dev->dev);
- dev->shutting_down = true;
- device_unlock(&dev->dev);
del_timer_sync(&dev->check_pres_timer);
cancel_work_sync(&dev->check_pres_work);
}
- rc = nfc_genl_device_removed(dev);
- if (rc)
- pr_debug("The userspace won't be notified that the device %s "
- "was removed\n", dev_name(&dev->dev));
-
nfc_llcp_unregister_device(dev);
mutex_lock(&nfc_devlist_mutex);
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index ec0a8998e52d..d6258638a07a 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -286,6 +286,7 @@ int digital_tg_configure_hw(struct nfc_digital_dev *ddev, int type, int param)
static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech)
{
struct digital_tg_mdaa_params *params;
+ int rc;
params = kzalloc(sizeof(*params), GFP_KERNEL);
if (!params)
@@ -300,8 +301,12 @@ static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech)
get_random_bytes(params->nfcid2 + 2, NFC_NFCID2_MAXSIZE - 2);
params->sc = DIGITAL_SENSF_FELICA_SC;
- return digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params,
- 500, digital_tg_recv_atr_req, NULL);
+ rc = digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params,
+ 500, digital_tg_recv_atr_req, NULL);
+ if (rc)
+ kfree(params);
+
+ return rc;
}
static int digital_tg_listen_md(struct nfc_digital_dev *ddev, u8 rf_tech)
diff --git a/net/nfc/digital_dep.c b/net/nfc/digital_dep.c
index 4f9a973988b2..1eed0cf59190 100644
--- a/net/nfc/digital_dep.c
+++ b/net/nfc/digital_dep.c
@@ -1285,6 +1285,8 @@ static void digital_tg_recv_dep_req(struct nfc_digital_dev *ddev, void *arg,
}
rc = nfc_tm_data_received(ddev->nfc_dev, resp);
+ if (rc)
+ resp = NULL;
exit:
kfree_skb(ddev->chaining_skb);
diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c
index 2021d1d58a75..c092b02fde8a 100644
--- a/net/nfc/digital_technology.c
+++ b/net/nfc/digital_technology.c
@@ -474,8 +474,12 @@ static int digital_in_send_sdd_req(struct nfc_digital_dev *ddev,
skb_put_u8(skb, sel_cmd);
skb_put_u8(skb, DIGITAL_SDD_REQ_SEL_PAR);
- return digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res,
- target);
+ rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res,
+ target);
+ if (rc)
+ kfree_skb(skb);
+
+ return rc;
}
static void digital_in_recv_sens_res(struct nfc_digital_dev *ddev, void *arg,
diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c
index fe988936ad92..e6863c71f566 100644
--- a/net/nfc/hci/llc_shdlc.c
+++ b/net/nfc/hci/llc_shdlc.c
@@ -134,7 +134,7 @@ static bool llc_shdlc_x_lteq_y_lt_z(int x, int y, int z)
return ((y >= x) || (y < z)) ? true : false;
}
-static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc,
+static struct sk_buff *llc_shdlc_alloc_skb(const struct llc_shdlc *shdlc,
int payload_len)
{
struct sk_buff *skb;
@@ -148,7 +148,7 @@ static struct sk_buff *llc_shdlc_alloc_skb(struct llc_shdlc *shdlc,
}
/* immediately sends an S frame. */
-static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc,
+static int llc_shdlc_send_s_frame(const struct llc_shdlc *shdlc,
enum sframe_type sframe_type, int nr)
{
int r;
@@ -170,7 +170,7 @@ static int llc_shdlc_send_s_frame(struct llc_shdlc *shdlc,
}
/* immediately sends an U frame. skb may contain optional payload */
-static int llc_shdlc_send_u_frame(struct llc_shdlc *shdlc,
+static int llc_shdlc_send_u_frame(const struct llc_shdlc *shdlc,
struct sk_buff *skb,
enum uframe_modifier uframe_modifier)
{
@@ -372,7 +372,7 @@ static void llc_shdlc_connect_complete(struct llc_shdlc *shdlc, int r)
wake_up(shdlc->connect_wq);
}
-static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc)
+static int llc_shdlc_connect_initiate(const struct llc_shdlc *shdlc)
{
struct sk_buff *skb;
@@ -388,7 +388,7 @@ static int llc_shdlc_connect_initiate(struct llc_shdlc *shdlc)
return llc_shdlc_send_u_frame(shdlc, skb, U_FRAME_RSET);
}
-static int llc_shdlc_connect_send_ua(struct llc_shdlc *shdlc)
+static int llc_shdlc_connect_send_ua(const struct llc_shdlc *shdlc)
{
struct sk_buff *skb;
diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h
index 1f68724d44d3..a070a57fc151 100644
--- a/net/nfc/llcp.h
+++ b/net/nfc/llcp.h
@@ -233,15 +233,15 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *sk, struct socket *newsock);
/* TLV API */
int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
- u8 *tlv_array, u16 tlv_array_len);
+ const u8 *tlv_array, u16 tlv_array_len);
int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
- u8 *tlv_array, u16 tlv_array_len);
+ const u8 *tlv_array, u16 tlv_array_len);
/* Commands API */
void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
-u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length);
+u8 *nfc_llcp_build_tlv(u8 type, const u8 *value, u8 value_length, u8 *tlv_length);
struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap);
-struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
+struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, const char *uri,
size_t uri_len);
void nfc_llcp_free_sdp_tlv(struct nfc_llcp_sdp_tlv *sdp);
void nfc_llcp_free_sdp_tlv_list(struct hlist_head *sdp_head);
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index d1fc019e932e..737c7aa384f4 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -27,7 +27,7 @@
#include "nfc.h"
#include "llcp.h"
-static u8 llcp_tlv_length[LLCP_TLV_MAX] = {
+static const u8 llcp_tlv_length[LLCP_TLV_MAX] = {
0,
1, /* VERSION */
2, /* MIUX */
@@ -41,7 +41,7 @@ static u8 llcp_tlv_length[LLCP_TLV_MAX] = {
};
-static u8 llcp_tlv8(u8 *tlv, u8 type)
+static u8 llcp_tlv8(const u8 *tlv, u8 type)
{
if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]])
return 0;
@@ -49,7 +49,7 @@ static u8 llcp_tlv8(u8 *tlv, u8 type)
return tlv[2];
}
-static u16 llcp_tlv16(u8 *tlv, u8 type)
+static u16 llcp_tlv16(const u8 *tlv, u8 type)
{
if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]])
return 0;
@@ -58,37 +58,37 @@ static u16 llcp_tlv16(u8 *tlv, u8 type)
}
-static u8 llcp_tlv_version(u8 *tlv)
+static u8 llcp_tlv_version(const u8 *tlv)
{
return llcp_tlv8(tlv, LLCP_TLV_VERSION);
}
-static u16 llcp_tlv_miux(u8 *tlv)
+static u16 llcp_tlv_miux(const u8 *tlv)
{
return llcp_tlv16(tlv, LLCP_TLV_MIUX) & 0x7ff;
}
-static u16 llcp_tlv_wks(u8 *tlv)
+static u16 llcp_tlv_wks(const u8 *tlv)
{
return llcp_tlv16(tlv, LLCP_TLV_WKS);
}
-static u16 llcp_tlv_lto(u8 *tlv)
+static u16 llcp_tlv_lto(const u8 *tlv)
{
return llcp_tlv8(tlv, LLCP_TLV_LTO);
}
-static u8 llcp_tlv_opt(u8 *tlv)
+static u8 llcp_tlv_opt(const u8 *tlv)
{
return llcp_tlv8(tlv, LLCP_TLV_OPT);
}
-static u8 llcp_tlv_rw(u8 *tlv)
+static u8 llcp_tlv_rw(const u8 *tlv)
{
return llcp_tlv8(tlv, LLCP_TLV_RW) & 0xf;
}
-u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length)
+u8 *nfc_llcp_build_tlv(u8 type, const u8 *value, u8 value_length, u8 *tlv_length)
{
u8 *tlv, length;
@@ -142,7 +142,7 @@ struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdres_tlv(u8 tid, u8 sap)
return sdres;
}
-struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, char *uri,
+struct nfc_llcp_sdp_tlv *nfc_llcp_build_sdreq_tlv(u8 tid, const char *uri,
size_t uri_len)
{
struct nfc_llcp_sdp_tlv *sdreq;
@@ -202,9 +202,10 @@ void nfc_llcp_free_sdp_tlv_list(struct hlist_head *head)
}
int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
- u8 *tlv_array, u16 tlv_array_len)
+ const u8 *tlv_array, u16 tlv_array_len)
{
- u8 *tlv = tlv_array, type, length, offset = 0;
+ const u8 *tlv = tlv_array;
+ u8 type, length, offset = 0;
pr_debug("TLV array length %d\n", tlv_array_len);
@@ -251,9 +252,10 @@ int nfc_llcp_parse_gb_tlv(struct nfc_llcp_local *local,
}
int nfc_llcp_parse_connection_tlv(struct nfc_llcp_sock *sock,
- u8 *tlv_array, u16 tlv_array_len)
+ const u8 *tlv_array, u16 tlv_array_len)
{
- u8 *tlv = tlv_array, type, length, offset = 0;
+ const u8 *tlv = tlv_array;
+ u8 type, length, offset = 0;
pr_debug("TLV array length %d\n", tlv_array_len);
@@ -307,7 +309,7 @@ static struct sk_buff *llcp_add_header(struct sk_buff *pdu,
return pdu;
}
-static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, u8 *tlv,
+static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, const u8 *tlv,
u8 tlv_length)
{
/* XXX Add an skb length check */
@@ -401,9 +403,11 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
{
struct nfc_llcp_local *local;
struct sk_buff *skb;
- u8 *service_name_tlv = NULL, service_name_tlv_length;
- u8 *miux_tlv = NULL, miux_tlv_length;
- u8 *rw_tlv = NULL, rw_tlv_length, rw;
+ const u8 *service_name_tlv = NULL;
+ const u8 *miux_tlv = NULL;
+ const u8 *rw_tlv = NULL;
+ u8 service_name_tlv_length = 0;
+ u8 miux_tlv_length, rw_tlv_length, rw;
int err;
u16 size = 0;
__be16 miux;
@@ -477,8 +481,9 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
{
struct nfc_llcp_local *local;
struct sk_buff *skb;
- u8 *miux_tlv = NULL, miux_tlv_length;
- u8 *rw_tlv = NULL, rw_tlv_length, rw;
+ const u8 *miux_tlv = NULL;
+ const u8 *rw_tlv = NULL;
+ u8 miux_tlv_length, rw_tlv_length, rw;
int err;
u16 size = 0;
__be16 miux;
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 4fa015208aab..a217830f0f34 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -157,6 +157,13 @@ static void nfc_llcp_socket_release(struct nfc_llcp_local *local, bool device,
struct nfc_llcp_local *nfc_llcp_local_get(struct nfc_llcp_local *local)
{
+ /* Since using nfc_llcp_local may result in usage of nfc_dev, whenever
+ * we hold a reference to local, we also need to hold a reference to
+ * the device to avoid UAF.
+ */
+ if (!nfc_get_device(local->dev->idx))
+ return NULL;
+
kref_get(&local->ref);
return local;
@@ -171,6 +178,7 @@ static void local_cleanup(struct nfc_llcp_local *local)
cancel_work_sync(&local->rx_work);
cancel_work_sync(&local->timeout_work);
kfree_skb(local->rx_pending);
+ local->rx_pending = NULL;
del_timer_sync(&local->sdreq_timer);
cancel_work_sync(&local->sdreq_timeout_work);
nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs);
@@ -189,10 +197,18 @@ static void local_release(struct kref *ref)
int nfc_llcp_local_put(struct nfc_llcp_local *local)
{
+ struct nfc_dev *dev;
+ int ret;
+
if (local == NULL)
return 0;
- return kref_put(&local->ref, local_release);
+ dev = local->dev;
+
+ ret = kref_put(&local->ref, local_release);
+ nfc_put_device(dev);
+
+ return ret;
}
static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local,
@@ -215,17 +231,13 @@ static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local,
if (tmp_sock->ssap == ssap && tmp_sock->dsap == dsap) {
llcp_sock = tmp_sock;
+ sock_hold(&llcp_sock->sk);
break;
}
}
read_unlock(&local->sockets.lock);
- if (llcp_sock == NULL)
- return NULL;
-
- sock_hold(&llcp_sock->sk);
-
return llcp_sock;
}
@@ -313,7 +325,7 @@ static char *wks[] = {
"urn:nfc:sn:snep",
};
-static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len)
+static int nfc_llcp_wks_sap(const char *service_name, size_t service_name_len)
{
int sap, num_wks;
@@ -337,7 +349,8 @@ static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len)
static
struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local,
- u8 *sn, size_t sn_len)
+ const u8 *sn, size_t sn_len,
+ bool needref)
{
struct sock *sk;
struct nfc_llcp_sock *llcp_sock, *tmp_sock;
@@ -373,6 +386,8 @@ struct nfc_llcp_sock *nfc_llcp_sock_from_sn(struct nfc_llcp_local *local,
if (memcmp(sn, tmp_sock->service_name, sn_len) == 0) {
llcp_sock = tmp_sock;
+ if (needref)
+ sock_hold(&llcp_sock->sk);
break;
}
}
@@ -414,7 +429,8 @@ u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local,
* to this service name.
*/
if (nfc_llcp_sock_from_sn(local, sock->service_name,
- sock->service_name_len) != NULL) {
+ sock->service_name_len,
+ false) != NULL) {
mutex_unlock(&local->sdp_lock);
return LLCP_SAP_MAX;
@@ -534,7 +550,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local)
{
u8 *gb_cur, version, version_length;
u8 lto_length, wks_length, miux_length;
- u8 *version_tlv = NULL, *lto_tlv = NULL,
+ const u8 *version_tlv = NULL, *lto_tlv = NULL,
*wks_tlv = NULL, *miux_tlv = NULL;
__be16 wks = cpu_to_be16(local->local_wks);
u8 gb_len = 0;
@@ -624,7 +640,7 @@ u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len)
return local->gb;
}
-int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len)
+int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len)
{
struct nfc_llcp_local *local;
@@ -651,27 +667,27 @@ int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len)
local->remote_gb_len - 3);
}
-static u8 nfc_llcp_dsap(struct sk_buff *pdu)
+static u8 nfc_llcp_dsap(const struct sk_buff *pdu)
{
return (pdu->data[0] & 0xfc) >> 2;
}
-static u8 nfc_llcp_ptype(struct sk_buff *pdu)
+static u8 nfc_llcp_ptype(const struct sk_buff *pdu)
{
return ((pdu->data[0] & 0x03) << 2) | ((pdu->data[1] & 0xc0) >> 6);
}
-static u8 nfc_llcp_ssap(struct sk_buff *pdu)
+static u8 nfc_llcp_ssap(const struct sk_buff *pdu)
{
return pdu->data[1] & 0x3f;
}
-static u8 nfc_llcp_ns(struct sk_buff *pdu)
+static u8 nfc_llcp_ns(const struct sk_buff *pdu)
{
return pdu->data[2] >> 4;
}
-static u8 nfc_llcp_nr(struct sk_buff *pdu)
+static u8 nfc_llcp_nr(const struct sk_buff *pdu)
{
return pdu->data[2] & 0xf;
}
@@ -813,23 +829,15 @@ out:
}
static struct nfc_llcp_sock *nfc_llcp_sock_get_sn(struct nfc_llcp_local *local,
- u8 *sn, size_t sn_len)
+ const u8 *sn, size_t sn_len)
{
- struct nfc_llcp_sock *llcp_sock;
-
- llcp_sock = nfc_llcp_sock_from_sn(local, sn, sn_len);
-
- if (llcp_sock == NULL)
- return NULL;
-
- sock_hold(&llcp_sock->sk);
-
- return llcp_sock;
+ return nfc_llcp_sock_from_sn(local, sn, sn_len, true);
}
-static u8 *nfc_llcp_connect_sn(struct sk_buff *skb, size_t *sn_len)
+static const u8 *nfc_llcp_connect_sn(const struct sk_buff *skb, size_t *sn_len)
{
- u8 *tlv = &skb->data[2], type, length;
+ u8 type, length;
+ const u8 *tlv = &skb->data[2];
size_t tlv_array_len = skb->len - LLCP_HEADER_SIZE, offset = 0;
while (offset < tlv_array_len) {
@@ -887,7 +895,7 @@ static void nfc_llcp_recv_ui(struct nfc_llcp_local *local,
}
static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
struct sock *new_sk, *parent;
struct nfc_llcp_sock *sock, *new_sock;
@@ -905,7 +913,7 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
goto fail;
}
} else {
- u8 *sn;
+ const u8 *sn;
size_t sn_len;
sn = nfc_llcp_connect_sn(skb, &sn_len);
@@ -958,8 +966,17 @@ static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
}
new_sock = nfc_llcp_sock(new_sk);
- new_sock->dev = local->dev;
+
new_sock->local = nfc_llcp_local_get(local);
+ if (!new_sock->local) {
+ reason = LLCP_DM_REJ;
+ sock_put(&new_sock->sk);
+ release_sock(&sock->sk);
+ sock_put(&sock->sk);
+ goto fail;
+ }
+
+ new_sock->dev = local->dev;
new_sock->rw = sock->rw;
new_sock->miux = sock->miux;
new_sock->nfc_protocol = sock->nfc_protocol;
@@ -1124,7 +1141,7 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local,
}
static void nfc_llcp_recv_disc(struct nfc_llcp_local *local,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
struct nfc_llcp_sock *llcp_sock;
struct sock *sk;
@@ -1167,7 +1184,8 @@ static void nfc_llcp_recv_disc(struct nfc_llcp_local *local,
nfc_llcp_sock_put(llcp_sock);
}
-static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, struct sk_buff *skb)
+static void nfc_llcp_recv_cc(struct nfc_llcp_local *local,
+ const struct sk_buff *skb)
{
struct nfc_llcp_sock *llcp_sock;
struct sock *sk;
@@ -1200,7 +1218,8 @@ static void nfc_llcp_recv_cc(struct nfc_llcp_local *local, struct sk_buff *skb)
nfc_llcp_sock_put(llcp_sock);
}
-static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, struct sk_buff *skb)
+static void nfc_llcp_recv_dm(struct nfc_llcp_local *local,
+ const struct sk_buff *skb)
{
struct nfc_llcp_sock *llcp_sock;
struct sock *sk;
@@ -1238,12 +1257,13 @@ static void nfc_llcp_recv_dm(struct nfc_llcp_local *local, struct sk_buff *skb)
}
static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
- struct sk_buff *skb)
+ const struct sk_buff *skb)
{
struct nfc_llcp_sock *llcp_sock;
- u8 dsap, ssap, *tlv, type, length, tid, sap;
+ u8 dsap, ssap, type, length, tid, sap;
+ const u8 *tlv;
u16 tlv_len, offset;
- char *service_name;
+ const char *service_name;
size_t service_name_len;
struct nfc_llcp_sdp_tlv *sdp;
HLIST_HEAD(llc_sdres_list);
@@ -1285,7 +1305,8 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
}
llcp_sock = nfc_llcp_sock_from_sn(local, service_name,
- service_name_len);
+ service_name_len,
+ true);
if (!llcp_sock) {
sap = 0;
goto add_snl;
@@ -1305,6 +1326,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
if (sap == LLCP_SAP_MAX) {
sap = 0;
+ nfc_llcp_sock_put(llcp_sock);
goto add_snl;
}
@@ -1322,6 +1344,7 @@ static void nfc_llcp_recv_snl(struct nfc_llcp_local *local,
pr_debug("%p %d\n", llcp_sock, sap);
+ nfc_llcp_sock_put(llcp_sock);
add_snl:
sdp = nfc_llcp_build_sdres_tlv(tid, sap);
if (sdp == NULL)
@@ -1585,7 +1608,16 @@ int nfc_llcp_register_device(struct nfc_dev *ndev)
if (local == NULL)
return -ENOMEM;
- local->dev = ndev;
+ /* As we are going to initialize local's refcount, we need to get the
+ * nfc_dev to avoid UAF, otherwise there is no point in continuing.
+ * See nfc_llcp_local_get().
+ */
+ local->dev = nfc_get_device(ndev->idx);
+ if (!local->dev) {
+ kfree(local);
+ return -ENODEV;
+ }
+
INIT_LIST_HEAD(&local->list);
kref_init(&local->ref);
mutex_init(&local->sdp_lock);
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 7d766350c08e..605d7448c3de 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -120,13 +120,19 @@ static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
llcp_sock->service_name_len,
GFP_KERNEL);
if (!llcp_sock->service_name) {
+ nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
+ llcp_sock->dev = NULL;
ret = -ENOMEM;
goto put_dev;
}
llcp_sock->ssap = nfc_llcp_get_sdp_ssap(local, llcp_sock);
if (llcp_sock->ssap == LLCP_SAP_MAX) {
+ nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
kfree(llcp_sock->service_name);
llcp_sock->service_name = NULL;
+ llcp_sock->dev = NULL;
ret = -EADDRINUSE;
goto put_dev;
}
@@ -683,6 +689,10 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
ret = -EISCONN;
goto error;
}
+ if (sk->sk_state == LLCP_CONNECTING) {
+ ret = -EINPROGRESS;
+ goto error;
+ }
dev = nfc_get_device(addr->dev_idx);
if (dev == NULL) {
@@ -714,6 +724,8 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
llcp_sock->local = nfc_llcp_local_get(local);
llcp_sock->ssap = nfc_llcp_get_local_ssap(local);
if (llcp_sock->ssap == LLCP_SAP_MAX) {
+ nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
ret = -ENOMEM;
goto put_dev;
}
@@ -751,8 +763,12 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
sock_unlink:
nfc_llcp_put_ssap(local, llcp_sock->ssap);
+ nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
nfc_llcp_sock_unlink(&local->connecting_sockets, sk);
+ kfree(llcp_sock->service_name);
+ llcp_sock->service_name = NULL;
put_dev:
nfc_put_device(dev);
@@ -780,6 +796,11 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg,
lock_sock(sk);
+ if (!llcp_sock->local) {
+ release_sock(sk);
+ return -ENODEV;
+ }
+
if (sk->sk_type == SOCK_DGRAM) {
DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr,
msg->msg_name);
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 074960154993..33723d843e47 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -156,12 +156,15 @@ inline int nci_request(struct nci_dev *ndev,
{
int rc;
- if (!test_bit(NCI_UP, &ndev->flags))
- return -ENETDOWN;
-
/* Serialize all requests */
mutex_lock(&ndev->req_lock);
- rc = __nci_request(ndev, req, opt, timeout);
+ /* check the state after obtaing the lock against any races
+ * from nci_close_device when the device gets removed.
+ */
+ if (test_bit(NCI_UP, &ndev->flags))
+ rc = __nci_request(ndev, req, opt, timeout);
+ else
+ rc = -ENETDOWN;
mutex_unlock(&ndev->req_lock);
return rc;
@@ -482,6 +485,11 @@ static int nci_open_device(struct nci_dev *ndev)
mutex_lock(&ndev->req_lock);
+ if (test_bit(NCI_UNREG, &ndev->flags)) {
+ rc = -ENODEV;
+ goto done;
+ }
+
if (test_bit(NCI_UP, &ndev->flags)) {
rc = -EALREADY;
goto done;
@@ -534,7 +542,7 @@ static int nci_open_device(struct nci_dev *ndev)
skb_queue_purge(&ndev->tx_q);
ndev->ops->close(ndev);
- ndev->flags = 0;
+ ndev->flags &= BIT(NCI_UNREG);
}
done:
@@ -545,9 +553,17 @@ done:
static int nci_close_device(struct nci_dev *ndev)
{
nci_req_cancel(ndev, ENODEV);
+
+ /* This mutex needs to be held as a barrier for
+ * caller nci_unregister_device
+ */
mutex_lock(&ndev->req_lock);
if (!test_and_clear_bit(NCI_UP, &ndev->flags)) {
+ /* Need to flush the cmd wq in case
+ * there is a queued/running cmd_work
+ */
+ flush_workqueue(ndev->cmd_wq);
del_timer_sync(&ndev->cmd_timer);
del_timer_sync(&ndev->data_timer);
mutex_unlock(&ndev->req_lock);
@@ -582,8 +598,8 @@ static int nci_close_device(struct nci_dev *ndev)
/* Flush cmd wq */
flush_workqueue(ndev->cmd_wq);
- /* Clear flags */
- ndev->flags = 0;
+ /* Clear flags except NCI_UNREG */
+ ndev->flags &= BIT(NCI_UNREG);
mutex_unlock(&ndev->req_lock);
@@ -890,6 +906,11 @@ static int nci_activate_target(struct nfc_dev *nfc_dev,
return -EINVAL;
}
+ if (protocol >= NFC_PROTO_MAX) {
+ pr_err("the requested nfc protocol is invalid\n");
+ return -EINVAL;
+ }
+
if (!(nci_target->supported_protocols & (1 << protocol))) {
pr_err("target does not support the requested protocol 0x%x\n",
protocol);
@@ -1187,6 +1208,7 @@ EXPORT_SYMBOL(nci_allocate_device);
void nci_free_device(struct nci_dev *ndev)
{
nfc_free_device(ndev->nfc_dev);
+ nci_hci_deallocate(ndev);
kfree(ndev);
}
EXPORT_SYMBOL(nci_free_device);
@@ -1264,6 +1286,12 @@ void nci_unregister_device(struct nci_dev *ndev)
{
struct nci_conn_info *conn_info, *n;
+ /* This set_bit is not protected with specialized barrier,
+ * However, it is fine because the mutex_lock(&ndev->req_lock);
+ * in nci_close_device() will help to emit one.
+ */
+ set_bit(NCI_UNREG, &ndev->flags);
+
nci_close_device(ndev);
destroy_workqueue(ndev->cmd_wq);
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index 5405d073804c..5d46d8dfad72 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -130,7 +130,7 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev,
skb_frag = nci_skb_alloc(ndev,
(NCI_DATA_HDR_SIZE + frag_len),
- GFP_KERNEL);
+ GFP_ATOMIC);
if (skb_frag == NULL) {
rc = -ENOMEM;
goto free_exit;
@@ -291,8 +291,10 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb)
nci_plen(skb->data));
conn_info = nci_get_conn_info_by_conn_id(ndev, nci_conn_id(skb->data));
- if (!conn_info)
+ if (!conn_info) {
+ kfree_skb(skb);
return;
+ }
/* strip the nci data header */
skb_pull(skb, NCI_DATA_HDR_SIZE);
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index c0d323b58e73..e5c5cff33236 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -165,7 +165,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
i = 0;
skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len +
- NCI_DATA_HDR_SIZE, GFP_KERNEL);
+ NCI_DATA_HDR_SIZE, GFP_ATOMIC);
if (!skb)
return -ENOMEM;
@@ -198,7 +198,7 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
if (i < data_len) {
skb = nci_skb_alloc(ndev,
conn_info->max_pkt_payload_len +
- NCI_DATA_HDR_SIZE, GFP_KERNEL);
+ NCI_DATA_HDR_SIZE, GFP_ATOMIC);
if (!skb)
return -ENOMEM;
@@ -807,3 +807,8 @@ struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev)
return hdev;
}
+
+void nci_hci_deallocate(struct nci_dev *ndev)
+{
+ kfree(ndev->hci_dev);
+}
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 1e8c1a12aaec..4f75453c07aa 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -230,6 +230,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
target->sens_res = nfca_poll->sens_res;
target->sel_res = nfca_poll->sel_res;
target->nfcid1_len = nfca_poll->nfcid1_len;
+ if (target->nfcid1_len > ARRAY_SIZE(target->nfcid1))
+ return -EPROTO;
if (target->nfcid1_len > 0) {
memcpy(target->nfcid1, nfca_poll->nfcid1,
target->nfcid1_len);
@@ -238,6 +240,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
nfcb_poll = (struct rf_tech_specific_params_nfcb_poll *)params;
target->sensb_res_len = nfcb_poll->sensb_res_len;
+ if (target->sensb_res_len > ARRAY_SIZE(target->sensb_res))
+ return -EPROTO;
if (target->sensb_res_len > 0) {
memcpy(target->sensb_res, nfcb_poll->sensb_res,
target->sensb_res_len);
@@ -246,6 +250,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev,
nfcf_poll = (struct rf_tech_specific_params_nfcf_poll *)params;
target->sensf_res_len = nfcf_poll->sensf_res_len;
+ if (target->sensf_res_len > ARRAY_SIZE(target->sensf_res))
+ return -EPROTO;
if (target->sensf_res_len > 0) {
memcpy(target->sensf_res, nfcf_poll->sensf_res,
target->sensf_res_len);
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index e3bbf1937d0e..7681f89dc312 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -289,6 +289,8 @@ static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev,
ndev->cur_conn_id);
if (conn_info) {
list_del(&conn_info->list);
+ if (conn_info == ndev->rf_conn_info)
+ ndev->rf_conn_info = NULL;
devm_kfree(&ndev->nfc_dev->dev, conn_info);
}
}
diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c
index 452f4c16b7a9..d2de7fc226f0 100644
--- a/net/nfc/nci/spi.c
+++ b/net/nfc/nci/spi.c
@@ -163,6 +163,8 @@ static int send_acknowledge(struct nci_spi *nspi, u8 acknowledge)
int ret;
skb = nci_skb_alloc(nspi->ndev, 0, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
/* add the NCI SPI header to the start of the buffer */
hdr = skb_push(skb, NCI_SPI_HDR_LEN);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index a65a5a5f434b..f705800b2248 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -655,8 +655,10 @@ static int nfc_genl_dump_devices_done(struct netlink_callback *cb)
{
struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0];
- nfc_device_iter_exit(iter);
- kfree(iter);
+ if (iter) {
+ nfc_device_iter_exit(iter);
+ kfree(iter);
+ }
return 0;
}
@@ -871,6 +873,7 @@ static int nfc_genl_stop_poll(struct sk_buff *skb, struct genl_info *info)
if (!dev->polling) {
device_unlock(&dev->dev);
+ nfc_put_device(dev);
return -EINVAL;
}
@@ -1235,7 +1238,7 @@ static int nfc_genl_fw_download(struct sk_buff *skb, struct genl_info *info)
u32 idx;
char firmware_name[NFC_FIRMWARE_NAME_MAXSIZE + 1];
- if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+ if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_FIRMWARE_NAME])
return -EINVAL;
idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
@@ -1259,7 +1262,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
struct sk_buff *msg;
void *hdr;
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
if (!msg)
return -ENOMEM;
@@ -1275,7 +1278,7 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
genlmsg_end(msg, hdr);
- genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
+ genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);
return 0;
@@ -1409,8 +1412,10 @@ static int nfc_genl_dump_ses_done(struct netlink_callback *cb)
{
struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0];
- nfc_device_iter_exit(iter);
- kfree(iter);
+ if (iter) {
+ nfc_device_iter_exit(iter);
+ kfree(iter);
+ }
return 0;
}
@@ -1455,8 +1460,12 @@ static int nfc_se_io(struct nfc_dev *dev, u32 se_idx,
rc = dev->ops->se_io(dev, se_idx, apdu,
apdu_length, cb, cb_context);
+ device_unlock(&dev->dev);
+ return rc;
+
error:
device_unlock(&dev->dev);
+ kfree(cb_context);
return rc;
}
@@ -1510,6 +1519,7 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
u32 dev_idx, se_idx;
u8 *apdu;
size_t apdu_len;
+ int rc;
if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
!info->attrs[NFC_ATTR_SE_INDEX] ||
@@ -1523,25 +1533,37 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
if (!dev)
return -ENODEV;
- if (!dev->ops || !dev->ops->se_io)
- return -ENOTSUPP;
+ if (!dev->ops || !dev->ops->se_io) {
+ rc = -EOPNOTSUPP;
+ goto put_dev;
+ }
apdu_len = nla_len(info->attrs[NFC_ATTR_SE_APDU]);
- if (apdu_len == 0)
- return -EINVAL;
+ if (apdu_len == 0) {
+ rc = -EINVAL;
+ goto put_dev;
+ }
apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]);
- if (!apdu)
- return -EINVAL;
+ if (!apdu) {
+ rc = -EINVAL;
+ goto put_dev;
+ }
ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
+ if (!ctx) {
+ rc = -ENOMEM;
+ goto put_dev;
+ }
ctx->dev_idx = dev_idx;
ctx->se_idx = se_idx;
- return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
+ rc = nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
+
+put_dev:
+ nfc_put_device(dev);
+ return rc;
}
static int nfc_genl_vendor_cmd(struct sk_buff *skb,
@@ -1564,14 +1586,21 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]);
dev = nfc_get_device(dev_idx);
- if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds)
+ if (!dev)
return -ENODEV;
+ if (!dev->vendor_cmds || !dev->n_vendor_cmds) {
+ err = -ENODEV;
+ goto put_dev;
+ }
+
if (info->attrs[NFC_ATTR_VENDOR_DATA]) {
data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]);
data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]);
- if (data_len == 0)
- return -EINVAL;
+ if (data_len == 0) {
+ err = -EINVAL;
+ goto put_dev;
+ }
} else {
data = NULL;
data_len = 0;
@@ -1586,10 +1615,14 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
dev->cur_cmd_info = info;
err = cmd->doit(dev, data, data_len);
dev->cur_cmd_info = NULL;
- return err;
+ goto put_dev;
}
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+
+put_dev:
+ nfc_put_device(dev);
+ return err;
}
/* message building helper */
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 6c6f76b370b1..c792165f523f 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -60,7 +60,7 @@ void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
u8 comm_mode, u8 rf_mode);
int nfc_llcp_register_device(struct nfc_dev *dev);
void nfc_llcp_unregister_device(struct nfc_dev *dev);
-int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len);
+int nfc_llcp_set_remote_gb(struct nfc_dev *dev, const u8 *gb, u8 gb_len);
u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len);
int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb);
struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index b927730d9ab0..bdc72737fe24 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -117,7 +117,7 @@ static int rawsock_connect(struct socket *sock, struct sockaddr *_addr,
if (addr->target_idx > dev->target_next_idx - 1 ||
addr->target_idx < dev->target_next_idx - dev->n_targets) {
rc = -EINVAL;
- goto error;
+ goto put_dev;
}
rc = nfc_activate_target(dev, addr->target_idx, addr->nfc_protocol);
@@ -345,7 +345,7 @@ static int rawsock_create(struct net *net, struct socket *sock,
return -ESOCKTNOSUPPORT;
if (sock->type == SOCK_RAW) {
- if (!capable(CAP_NET_RAW))
+ if (!ns_capable(net->user_ns, CAP_NET_RAW))
return -EPERM;
sock->ops = &rawsock_raw_ops;
} else {
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index 1a30e165eeb4..a5fa25555d7e 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c
@@ -80,13 +80,12 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
+ u16 mac_offset = skb->mac_header;
unsigned int nsh_len, mac_len;
__be16 proto;
- int nhoff;
skb_reset_network_header(skb);
- nhoff = skb->network_header - skb->mac_header;
mac_len = skb->mac_len;
if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN)))
@@ -111,15 +110,14 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
segs = skb_mac_gso_segment(skb, features);
if (IS_ERR_OR_NULL(segs)) {
skb_gso_error_unwind(skb, htons(ETH_P_NSH), nsh_len,
- skb->network_header - nhoff,
- mac_len);
+ mac_offset, mac_len);
goto out;
}
for (skb = segs; skb; skb = skb->next) {
skb->protocol = htons(ETH_P_NSH);
__skb_push(skb, nsh_len);
- skb_set_mac_header(skb, -nhoff);
+ skb->mac_header = mac_offset;
skb->network_header = skb->mac_header + mac_len;
skb->mac_len = mac_len;
}
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 8211e8e97c96..091202b84b6e 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -443,6 +443,7 @@ static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
update_ip_l4_checksum(skb, nh, *addr, new_addr);
csum_replace4(&nh->check, *addr, new_addr);
skb_clear_hash(skb);
+ ovs_ct_clear(skb, NULL);
*addr = new_addr;
}
@@ -490,15 +491,47 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
update_ipv6_checksum(skb, l4_proto, addr, new_addr);
skb_clear_hash(skb);
+ ovs_ct_clear(skb, NULL);
memcpy(addr, new_addr, sizeof(__be32[4]));
}
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
+static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 ipv6_tclass, u8 mask)
{
+ u8 old_ipv6_tclass = ipv6_get_dsfield(nh);
+
+ ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask);
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ csum_replace(&skb->csum, (__force __wsum)(old_ipv6_tclass << 12),
+ (__force __wsum)(ipv6_tclass << 12));
+
+ ipv6_change_dsfield(nh, ~mask, ipv6_tclass);
+}
+
+static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 mask)
+{
+ u32 ofl;
+
+ ofl = nh->flow_lbl[0] << 16 | nh->flow_lbl[1] << 8 | nh->flow_lbl[2];
+ fl = OVS_MASKED(ofl, fl, mask);
+
/* Bits 21-24 are always unmasked, so this retains their values. */
- OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
- OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
- OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
+ nh->flow_lbl[0] = (u8)(fl >> 16);
+ nh->flow_lbl[1] = (u8)(fl >> 8);
+ nh->flow_lbl[2] = (u8)fl;
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ csum_replace(&skb->csum, (__force __wsum)htonl(ofl), (__force __wsum)htonl(fl));
+}
+
+static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, u8 mask)
+{
+ new_ttl = OVS_MASKED(nh->hop_limit, new_ttl, mask);
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ csum_replace(&skb->csum, (__force __wsum)(nh->hop_limit << 8),
+ (__force __wsum)(new_ttl << 8));
+ nh->hop_limit = new_ttl;
}
static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
@@ -616,18 +649,17 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
}
}
if (mask->ipv6_tclass) {
- ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
+ set_ipv6_dsfield(skb, nh, key->ipv6_tclass, mask->ipv6_tclass);
flow_key->ip.tos = ipv6_get_dsfield(nh);
}
if (mask->ipv6_label) {
- set_ipv6_fl(nh, ntohl(key->ipv6_label),
+ set_ipv6_fl(skb, nh, ntohl(key->ipv6_label),
ntohl(mask->ipv6_label));
flow_key->ipv6.label =
*(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
}
if (mask->ipv6_hlimit) {
- OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit,
- mask->ipv6_hlimit);
+ set_ipv6_ttl(skb, nh, key->ipv6_hlimit, mask->ipv6_hlimit);
flow_key->ip.ttl = nh->hop_limit;
}
return 0;
@@ -700,6 +732,7 @@ static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
static void set_tp_port(struct sk_buff *skb, __be16 *port,
__be16 new_port, __sum16 *check)
{
+ ovs_ct_clear(skb, NULL);
inet_proto_csum_replace2(check, skb, *port, new_port, false);
*port = new_port;
}
@@ -739,6 +772,7 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
uh->dest = dst;
flow_key->tp.src = src;
flow_key->tp.dst = dst;
+ ovs_ct_clear(skb, NULL);
}
skb_clear_hash(skb);
@@ -801,6 +835,8 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
skb_clear_hash(skb);
+ ovs_ct_clear(skb, NULL);
+
flow_key->tp.src = sh->source;
flow_key->tp.dst = sh->dest;
@@ -892,17 +928,17 @@ static void ovs_fragment(struct net *net, struct vport *vport,
}
if (key->eth.type == htons(ETH_P_IP)) {
- struct dst_entry ovs_dst;
+ struct rtable ovs_rt = { 0 };
unsigned long orig_dst;
prepare_frag(vport, skb, orig_network_offset,
ovs_key_mac_proto(key));
- dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
+ dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
DST_OBSOLETE_NONE, DST_NOCOUNT);
- ovs_dst.dev = vport->dev;
+ ovs_rt.dst.dev = vport->dev;
orig_dst = skb->_skb_refdst;
- skb_dst_set_noref(skb, &ovs_dst);
+ skb_dst_set_noref(skb, &ovs_rt.dst);
IPCB(skb)->frag_max_size = mru;
ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
@@ -1068,7 +1104,7 @@ static int clone(struct datapath *dp, struct sk_buff *skb,
int rem = nla_len(attr);
bool dont_clone_flow_key;
- /* The first action is always 'OVS_CLONE_ATTR_ARG'. */
+ /* The first action is always 'OVS_CLONE_ATTR_EXEC'. */
clone_arg = nla_data(attr);
dont_clone_flow_key = nla_get_u32(clone_arg);
actions = nla_next(clone_arg, &rem);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index fb13fcfedaf4..0777c8d416f1 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1303,7 +1303,8 @@ int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
if (skb_nfct(skb)) {
nf_conntrack_put(skb_nfct(skb));
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
- ovs_ct_fill_key(skb, key);
+ if (key)
+ ovs_ct_fill_key(skb, key);
}
return 0;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index f350faef044d..0551915519d9 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -249,10 +249,17 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
upcall.portid = ovs_vport_find_upcall_portid(p, skb);
upcall.mru = OVS_CB(skb)->mru;
error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
- if (unlikely(error))
- kfree_skb(skb);
- else
+ switch (error) {
+ case 0:
+ case -EAGAIN:
+ case -ERESTARTSYS:
+ case -EINTR:
consume_skb(skb);
+ break;
+ default:
+ kfree_skb(skb);
+ break;
+ }
stats_counter = &stats->n_missed;
goto out;
}
@@ -519,8 +526,9 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
out:
if (err)
skb_tx_error(skb);
- kfree_skb(user_skb);
- kfree_skb(nskb);
+ consume_skb(user_skb);
+ consume_skb(nskb);
+
return err;
}
@@ -895,6 +903,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
struct sw_flow_mask mask;
struct sk_buff *reply;
struct datapath *dp;
+ struct sw_flow_key *key;
struct sw_flow_actions *acts;
struct sw_flow_match match;
u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
@@ -922,30 +931,32 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
}
/* Extract key. */
- ovs_match_init(&match, &new_flow->key, false, &mask);
+ key = kzalloc(sizeof(*key), GFP_KERNEL);
+ if (!key) {
+ error = -ENOMEM;
+ goto err_kfree_flow;
+ }
+
+ ovs_match_init(&match, key, false, &mask);
error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
a[OVS_FLOW_ATTR_MASK], log);
if (error)
- goto err_kfree_flow;
+ goto err_kfree_key;
+
+ ovs_flow_mask_key(&new_flow->key, key, true, &mask);
/* Extract flow identifier. */
error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
- &new_flow->key, log);
+ key, log);
if (error)
- goto err_kfree_flow;
-
- /* unmasked key is needed to match when ufid is not used. */
- if (ovs_identifier_is_key(&new_flow->id))
- match.key = new_flow->id.unmasked_key;
-
- ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);
+ goto err_kfree_key;
/* Validate actions. */
error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
&new_flow->key, &acts, log);
if (error) {
OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
- goto err_kfree_flow;
+ goto err_kfree_key;
}
reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
@@ -966,7 +977,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (ovs_identifier_is_ufid(&new_flow->id))
flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
if (!flow)
- flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
+ flow = ovs_flow_tbl_lookup(&dp->table, key);
if (likely(!flow)) {
rcu_assign_pointer(new_flow->sf_acts, acts);
@@ -1036,6 +1047,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (reply)
ovs_notify(&dp_flow_genl_family, reply, info);
+
+ kfree(key);
return 0;
err_unlock_ovs:
@@ -1043,6 +1056,8 @@ err_unlock_ovs:
kfree_skb(reply);
err_kfree_acts:
ovs_nla_free_flow_actions(acts);
+err_kfree_key:
+ kfree(key);
err_kfree_flow:
ovs_flow_free(new_flow, false);
error:
@@ -1543,7 +1558,8 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *in
if (IS_ERR(dp))
return;
- WARN(dp->user_features, "Dropping previously announced user features\n");
+ pr_warn("%s: Dropping previously announced user features\n",
+ ovs_dp_name(dp));
dp->user_features = 0;
}
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index eab5e8eaddaa..eba94cf3d2d0 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2166,8 +2166,8 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
icmpv6_key->icmpv6_type = ntohs(output->tp.src);
icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
- if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
- icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
+ if (swkey->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+ swkey->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
struct ovs_key_nd *nd_key;
nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
@@ -2253,6 +2253,36 @@ static struct sw_flow_actions *nla_alloc_flow_actions(int size)
return sfa;
}
+static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len);
+
+static void ovs_nla_free_clone_action(const struct nlattr *action)
+{
+ const struct nlattr *a = nla_data(action);
+ int rem = nla_len(action);
+
+ switch (nla_type(a)) {
+ case OVS_CLONE_ATTR_EXEC:
+ /* The real list of actions follows this attribute. */
+ a = nla_next(a, &rem);
+ ovs_nla_free_nested_actions(a, rem);
+ break;
+ }
+}
+
+static void ovs_nla_free_sample_action(const struct nlattr *action)
+{
+ const struct nlattr *a = nla_data(action);
+ int rem = nla_len(action);
+
+ switch (nla_type(a)) {
+ case OVS_SAMPLE_ATTR_ARG:
+ /* The real list of actions follows this attribute. */
+ a = nla_next(a, &rem);
+ ovs_nla_free_nested_actions(a, rem);
+ break;
+ }
+}
+
static void ovs_nla_free_set_action(const struct nlattr *a)
{
const struct nlattr *ovs_key = nla_data(a);
@@ -2266,25 +2296,46 @@ static void ovs_nla_free_set_action(const struct nlattr *a)
}
}
-void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len)
{
const struct nlattr *a;
int rem;
- if (!sf_acts)
+ /* Whenever new actions are added, the need to update this
+ * function should be considered.
+ */
+ BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 20);
+
+ if (!actions)
return;
- nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) {
+ nla_for_each_attr(a, actions, len, rem) {
switch (nla_type(a)) {
- case OVS_ACTION_ATTR_SET:
- ovs_nla_free_set_action(a);
+ case OVS_ACTION_ATTR_CLONE:
+ ovs_nla_free_clone_action(a);
break;
+
case OVS_ACTION_ATTR_CT:
ovs_ct_free_action(a);
break;
+
+ case OVS_ACTION_ATTR_SAMPLE:
+ ovs_nla_free_sample_action(a);
+ break;
+
+ case OVS_ACTION_ATTR_SET:
+ ovs_nla_free_set_action(a);
+ break;
}
}
+}
+
+void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+{
+ if (!sf_acts)
+ return;
+ ovs_nla_free_nested_actions(sf_acts->actions, sf_acts->actions_len);
kfree(sf_acts);
}
@@ -2316,7 +2367,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2);
if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
- if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
+ if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) {
OVS_NLERR(log, "Flow action size exceeds max %u",
MAX_ACTIONS_BUFSIZE);
return ERR_PTR(-EMSGSIZE);
@@ -3173,7 +3224,9 @@ static int clone_action_to_attr(const struct nlattr *attr,
if (!start)
return -EMSGSIZE;
- err = ovs_nla_put_actions(nla_data(attr), rem, skb);
+ /* Skipping the OVS_CLONE_ATTR_EXEC that is always the first attribute. */
+ attr = nla_next(nla_data(attr), &rem);
+ err = ovs_nla_put_actions(attr, rem, skb);
if (err)
nla_nest_cancel(skb, start);
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 5ea2471ffc03..9b0c54f0702c 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -464,6 +464,14 @@ bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
spin_lock(&meter->lock);
long_delta_ms = (now_ms - meter->used); /* ms */
+ if (long_delta_ms < 0) {
+ /* This condition means that we have several threads fighting
+ * for a meter lock, and the one who received the packets a
+ * bit later wins. Assuming that all racing threads received
+ * packets at the same time to avoid overflow.
+ */
+ long_delta_ms = 0;
+ }
/* Make sure delta_ms will not be too large, so that bucket will not
* wrap around below.
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 16b745d254fe..377832981178 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -370,18 +370,20 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
{
union tpacket_uhdr h;
+ /* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */
+
h.raw = frame;
switch (po->tp_version) {
case TPACKET_V1:
- h.h1->tp_status = status;
+ WRITE_ONCE(h.h1->tp_status, status);
flush_dcache_page(pgv_to_page(&h.h1->tp_status));
break;
case TPACKET_V2:
- h.h2->tp_status = status;
+ WRITE_ONCE(h.h2->tp_status, status);
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
break;
case TPACKET_V3:
- h.h3->tp_status = status;
+ WRITE_ONCE(h.h3->tp_status, status);
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
break;
default:
@@ -398,17 +400,19 @@ static int __packet_get_status(struct packet_sock *po, void *frame)
smp_rmb();
+ /* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */
+
h.raw = frame;
switch (po->tp_version) {
case TPACKET_V1:
flush_dcache_page(pgv_to_page(&h.h1->tp_status));
- return h.h1->tp_status;
+ return READ_ONCE(h.h1->tp_status);
case TPACKET_V2:
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
- return h.h2->tp_status;
+ return READ_ONCE(h.h2->tp_status);
case TPACKET_V3:
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
- return h.h3->tp_status;
+ return READ_ONCE(h.h3->tp_status);
default:
WARN(1, "TPACKET version not supported.\n");
BUG();
@@ -1716,6 +1720,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
match->prot_hook.dev = po->prot_hook.dev;
match->prot_hook.func = packet_rcv_fanout;
match->prot_hook.af_packet_priv = match;
+ match->prot_hook.af_packet_net = read_pnet(&match->net);
match->prot_hook.id_match = match_fanout_group;
list_add(&match->list, &fanout_list);
}
@@ -1729,7 +1734,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
err = -ENOSPC;
if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
__dev_remove_pack(&po->prot_hook);
- po->fanout = match;
+
+ /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
+ WRITE_ONCE(po->fanout, match);
+
po->rollover = rollover;
rollover = NULL;
refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
@@ -1951,7 +1959,7 @@ retry:
goto retry;
}
- if (!dev_validate_header(dev, skb->data, len)) {
+ if (!dev_validate_header(dev, skb->data, len) || !skb->len) {
err = -EINVAL;
goto out_unlock;
}
@@ -1974,7 +1982,7 @@ retry:
skb->mark = sk->sk_mark;
skb->tstamp = sockc.transmit_time;
- sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags);
+ skb_setup_tx_timestamp(skb, sockc.tsflags);
if (unlikely(extra_len == 4))
skb->no_fcs = 1;
@@ -2101,7 +2109,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
sll = &PACKET_SKB_CB(skb)->sa.ll;
sll->sll_hatype = dev->type;
sll->sll_pkttype = skb->pkt_type;
- if (unlikely(po->origdev))
+ if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
sll->sll_ifindex = orig_dev->ifindex;
else
sll->sll_ifindex = dev->ifindex;
@@ -2205,8 +2213,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
if (skb->ip_summed == CHECKSUM_PARTIAL)
status |= TP_STATUS_CSUMNOTREADY;
else if (skb->pkt_type != PACKET_OUTGOING &&
- (skb->ip_summed == CHECKSUM_COMPLETE ||
- skb_csum_unnecessary(skb)))
+ skb_csum_unnecessary(skb))
status |= TP_STATUS_CSUM_VALID;
if (snaplen > res)
@@ -2242,8 +2249,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
copy_skb = skb_get(skb);
skb_head = skb->data;
}
- if (copy_skb)
+ if (copy_skb) {
+ memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0,
+ sizeof(PACKET_SKB_CB(copy_skb)->sa.ll));
skb_set_owner_r(copy_skb, sk);
+ }
}
snaplen = po->rx_ring.frame_size - macoff;
if ((int)snaplen < 0) {
@@ -2365,7 +2375,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
sll->sll_hatype = dev->type;
sll->sll_protocol = skb->protocol;
sll->sll_pkttype = skb->pkt_type;
- if (unlikely(po->origdev))
+ if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
sll->sll_ifindex = orig_dev->ifindex;
else
sll->sll_ifindex = dev->ifindex;
@@ -2494,7 +2504,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->priority = po->sk.sk_priority;
skb->mark = po->sk.sk_mark;
skb->tstamp = sockc->transmit_time;
- sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
+ skb_setup_tx_timestamp(skb, sockc->tsflags);
skb_zcopy_set_nouarg(skb, ph.raw);
skb_reserve(skb, hlen);
@@ -2656,7 +2666,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
}
if (likely(saddr == NULL)) {
dev = packet_cached_dev_get(po);
- proto = po->num;
+ proto = READ_ONCE(po->num);
} else {
err = -EINVAL;
if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -2784,8 +2794,9 @@ tpacket_error:
status = TP_STATUS_SEND_REQUEST;
err = po->xmit(skb);
- if (unlikely(err > 0)) {
- err = net_xmit_errno(err);
+ if (unlikely(err != 0)) {
+ if (err > 0)
+ err = net_xmit_errno(err);
if (err && __packet_get_status(po, ph) ==
TP_STATUS_AVAILABLE) {
/* skb was destructed already */
@@ -2869,7 +2880,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (likely(saddr == NULL)) {
dev = packet_cached_dev_get(po);
- proto = po->num;
+ proto = READ_ONCE(po->num);
} else {
err = -EINVAL;
if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@ -2958,7 +2969,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
goto out_free;
}
- sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags);
+ skb_setup_tx_timestamp(skb, sockc.tsflags);
if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
!packet_extra_vlan_len_allowed(dev, skb)) {
@@ -2986,8 +2997,12 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->no_fcs = 1;
err = po->xmit(skb);
- if (err > 0 && (err = net_xmit_errno(err)) != 0)
- goto out_unlock;
+ if (unlikely(err != 0)) {
+ if (err > 0)
+ err = net_xmit_errno(err);
+ if (err)
+ goto out_unlock;
+ }
dev_put(dev);
@@ -3106,6 +3121,9 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
lock_sock(sk);
spin_lock(&po->bind_lock);
+ if (!proto)
+ proto = po->num;
+
rcu_read_lock();
if (po->fanout) {
@@ -3141,7 +3159,7 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
/* prevents packet_notifier() from calling
* register_prot_hook()
*/
- po->num = 0;
+ WRITE_ONCE(po->num, 0);
__unregister_prot_hook(sk, true);
rcu_read_lock();
dev_curr = po->prot_hook.dev;
@@ -3151,17 +3169,17 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
}
BUG_ON(po->running);
- po->num = proto;
+ WRITE_ONCE(po->num, proto);
po->prot_hook.type = proto;
if (unlikely(unlisted)) {
dev_put(dev);
po->prot_hook.dev = NULL;
- po->ifindex = -1;
+ WRITE_ONCE(po->ifindex, -1);
packet_cached_dev_reset(po);
} else {
po->prot_hook.dev = dev;
- po->ifindex = dev ? dev->ifindex : 0;
+ WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0);
packet_cached_dev_assign(po, dev);
}
}
@@ -3208,7 +3226,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data));
name[sizeof(uaddr->sa_data)] = 0;
- return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
+ return packet_do_bind(sk, name, 0, 0);
}
static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
@@ -3225,8 +3243,7 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len
if (sll->sll_family != AF_PACKET)
return -EINVAL;
- return packet_do_bind(sk, NULL, sll->sll_ifindex,
- sll->sll_protocol ? : pkt_sk(sk)->num);
+ return packet_do_bind(sk, NULL, sll->sll_ifindex, sll->sll_protocol);
}
static struct proto packet_proto = {
@@ -3294,6 +3311,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
po->prot_hook.func = packet_rcv_spkt;
po->prot_hook.af_packet_priv = sk;
+ po->prot_hook.af_packet_net = sock_net(sk);
if (proto) {
po->prot_hook.type = proto;
@@ -3401,6 +3419,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
sock_recv_ts_and_drops(msg, sk, skb);
if (msg->msg_name) {
+ const size_t max_len = min(sizeof(skb->cb),
+ sizeof(struct sockaddr_storage));
int copy_len;
/* If the address length field is there to be filled
@@ -3423,18 +3443,21 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
msg->msg_namelen = sizeof(struct sockaddr_ll);
}
}
+ if (WARN_ON_ONCE(copy_len > max_len)) {
+ copy_len = max_len;
+ msg->msg_namelen = copy_len;
+ }
memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
}
- if (pkt_sk(sk)->auxdata) {
+ if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) {
struct tpacket_auxdata aux;
aux.tp_status = TP_STATUS_USER;
if (skb->ip_summed == CHECKSUM_PARTIAL)
aux.tp_status |= TP_STATUS_CSUMNOTREADY;
else if (skb->pkt_type != PACKET_OUTGOING &&
- (skb->ip_summed == CHECKSUM_COMPLETE ||
- skb_csum_unnecessary(skb)))
+ skb_csum_unnecessary(skb))
aux.tp_status |= TP_STATUS_CSUM_VALID;
aux.tp_len = origlen;
@@ -3476,7 +3499,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
uaddr->sa_family = AF_PACKET;
memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
rcu_read_lock();
- dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
+ dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex));
if (dev)
strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
rcu_read_unlock();
@@ -3491,16 +3514,18 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
+ int ifindex;
if (peer)
return -EOPNOTSUPP;
+ ifindex = READ_ONCE(po->ifindex);
sll->sll_family = AF_PACKET;
- sll->sll_ifindex = po->ifindex;
- sll->sll_protocol = po->num;
+ sll->sll_ifindex = ifindex;
+ sll->sll_protocol = READ_ONCE(po->num);
sll->sll_pkttype = 0;
rcu_read_lock();
- dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
+ dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
if (dev) {
sll->sll_hatype = dev->type;
sll->sll_halen = dev->addr_len;
@@ -3808,9 +3833,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
- lock_sock(sk);
- po->auxdata = !!val;
- release_sock(sk);
+ packet_sock_flag_set(po, PACKET_SOCK_AUXDATA, val);
return 0;
}
case PACKET_ORIGDEV:
@@ -3822,9 +3845,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
- lock_sock(sk);
- po->origdev = !!val;
- release_sock(sk);
+ packet_sock_flag_set(po, PACKET_SOCK_ORIGDEV, val);
return 0;
}
case PACKET_VNET_HDR:
@@ -3873,7 +3894,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
}
case PACKET_FANOUT_DATA:
{
- if (!po->fanout)
+ /* Paired with the WRITE_ONCE() in fanout_add() */
+ if (!READ_ONCE(po->fanout))
return -EINVAL;
return fanout_set_data(po, optval, optlen);
@@ -3953,10 +3975,10 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
break;
case PACKET_AUXDATA:
- val = po->auxdata;
+ val = packet_sock_flag(po, PACKET_SOCK_AUXDATA);
break;
case PACKET_ORIGDEV:
- val = po->origdev;
+ val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV);
break;
case PACKET_VNET_HDR:
val = po->has_vnet_hdr;
@@ -4079,7 +4101,7 @@ static int packet_notifier(struct notifier_block *this,
}
if (msg == NETDEV_UNREGISTER) {
packet_cached_dev_reset(po);
- po->ifindex = -1;
+ WRITE_ONCE(po->ifindex, -1);
if (po->prot_hook.dev)
dev_put(po->prot_hook.dev);
po->prot_hook.dev = NULL;
@@ -4192,7 +4214,7 @@ static void packet_mm_open(struct vm_area_struct *vma)
struct sock *sk = sock->sk;
if (sk)
- atomic_inc(&pkt_sk(sk)->mapped);
+ atomic_long_inc(&pkt_sk(sk)->mapped);
}
static void packet_mm_close(struct vm_area_struct *vma)
@@ -4202,7 +4224,7 @@ static void packet_mm_close(struct vm_area_struct *vma)
struct sock *sk = sock->sk;
if (sk)
- atomic_dec(&pkt_sk(sk)->mapped);
+ atomic_long_dec(&pkt_sk(sk)->mapped);
}
static const struct vm_operations_struct packet_mmap_ops = {
@@ -4297,7 +4319,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
err = -EBUSY;
if (!closing) {
- if (atomic_read(&po->mapped))
+ if (atomic_long_read(&po->mapped))
goto out;
if (packet_read_pending(rb))
goto out;
@@ -4391,7 +4413,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
was_running = po->running;
num = po->num;
if (was_running) {
- po->num = 0;
+ WRITE_ONCE(po->num, 0);
__unregister_prot_hook(sk, false);
}
spin_unlock(&po->bind_lock);
@@ -4400,7 +4422,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
err = -EBUSY;
mutex_lock(&po->pg_vec_lock);
- if (closing || atomic_read(&po->mapped) == 0) {
+ if (closing || atomic_long_read(&po->mapped) == 0) {
err = 0;
spin_lock_bh(&rb_queue->lock);
swap(rb->pg_vec, pg_vec);
@@ -4418,15 +4440,15 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
po->prot_hook.func = (po->rx_ring.pg_vec) ?
tpacket_rcv : packet_rcv;
skb_queue_purge(rb_queue);
- if (atomic_read(&po->mapped))
- pr_err("packet_mmap: vma is busy: %d\n",
- atomic_read(&po->mapped));
+ if (atomic_long_read(&po->mapped))
+ pr_err("packet_mmap: vma is busy: %ld\n",
+ atomic_long_read(&po->mapped));
}
mutex_unlock(&po->pg_vec_lock);
spin_lock(&po->bind_lock);
if (was_running) {
- po->num = num;
+ WRITE_ONCE(po->num, num);
register_prot_hook(sk);
}
spin_unlock(&po->bind_lock);
@@ -4437,9 +4459,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
}
out_free_pg_vec:
- bitmap_free(rx_owner_map);
- if (pg_vec)
+ if (pg_vec) {
+ bitmap_free(rx_owner_map);
free_pg_vec(pg_vec, order, req->tp_block_nr);
+ }
out:
return err;
}
@@ -4497,7 +4520,7 @@ static int packet_mmap(struct file *file, struct socket *sock,
}
}
- atomic_inc(&po->mapped);
+ atomic_long_inc(&po->mapped);
vma->vm_ops = &packet_mmap_ops;
err = 0;
@@ -4597,8 +4620,8 @@ static int packet_seq_show(struct seq_file *seq, void *v)
s,
refcount_read(&s->sk_refcnt),
s->sk_type,
- ntohs(po->num),
- po->ifindex,
+ ntohs(READ_ONCE(po->num)),
+ READ_ONCE(po->ifindex),
po->running,
atomic_read(&s->sk_rmem_alloc),
from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 7ef1c881ae74..ecabf78d29b8 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -22,9 +22,9 @@ static int pdiag_put_info(const struct packet_sock *po, struct sk_buff *nlskb)
pinfo.pdi_flags = 0;
if (po->running)
pinfo.pdi_flags |= PDI_RUNNING;
- if (po->auxdata)
+ if (packet_sock_flag(po, PACKET_SOCK_AUXDATA))
pinfo.pdi_flags |= PDI_AUXDATA;
- if (po->origdev)
+ if (packet_sock_flag(po, PACKET_SOCK_ORIGDEV))
pinfo.pdi_flags |= PDI_ORIGDEV;
if (po->has_vnet_hdr)
pinfo.pdi_flags |= PDI_VNETHDR;
@@ -142,7 +142,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
rp = nlmsg_data(nlh);
rp->pdiag_family = AF_PACKET;
rp->pdiag_type = sk->sk_type;
- rp->pdiag_num = ntohs(po->num);
+ rp->pdiag_num = ntohs(READ_ONCE(po->num));
rp->pdiag_ino = sk_ino;
sock_diag_save_cookie(sk, rp->pdiag_cookie);
diff --git a/net/packet/internal.h b/net/packet/internal.h
index f10294800aaf..7f2d5eed5e00 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -115,10 +115,9 @@ struct packet_sock {
int copy_thresh;
spinlock_t bind_lock;
struct mutex pg_vec_lock;
+ unsigned long flags;
unsigned int running; /* bind_lock must be held */
- unsigned int auxdata:1, /* writer must hold sock lock */
- origdev:1,
- has_vnet_hdr:1,
+ unsigned int has_vnet_hdr:1, /* writer must hold sock lock */
tp_loss:1,
tp_tx_has_off:1;
int pressure;
@@ -126,7 +125,7 @@ struct packet_sock {
__be16 num;
struct packet_rollover *rollover;
struct packet_mclist *mclist;
- atomic_t mapped;
+ atomic_long_t mapped;
enum tpacket_versions tp_version;
unsigned int tp_hdrlen;
unsigned int tp_reserve;
@@ -142,4 +141,25 @@ static struct packet_sock *pkt_sk(struct sock *sk)
return (struct packet_sock *)sk;
}
+enum packet_sock_flags {
+ PACKET_SOCK_ORIGDEV,
+ PACKET_SOCK_AUXDATA,
+};
+
+static inline void packet_sock_flag_set(struct packet_sock *po,
+ enum packet_sock_flags flag,
+ bool val)
+{
+ if (val)
+ set_bit(flag, &po->flags);
+ else
+ clear_bit(flag, &po->flags);
+}
+
+static inline bool packet_sock_flag(const struct packet_sock *po,
+ enum packet_sock_flags flag)
+{
+ return test_bit(flag, &po->flags);
+}
+
#endif
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index db3473540303..fe01cc788448 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -881,6 +881,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
err = pep_accept_conn(newsk, skb);
if (err) {
+ __sock_put(sk);
sock_put(newsk);
newsk = NULL;
goto drop;
@@ -959,6 +960,8 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
ret = -EBUSY;
else if (sk->sk_state == TCP_ESTABLISHED)
ret = -EISCONN;
+ else if (!pn->pn_sk.sobject)
+ ret = -EADDRNOTAVAIL;
else
ret = pep_sock_enable(sk, NULL, 0);
release_sock(sk);
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 30e8239bd774..196fbf674dc1 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -31,7 +31,8 @@ enum psample_nl_multicast_groups {
static const struct genl_multicast_group psample_nl_mcgrps[] = {
[PSAMPLE_NL_MCGRP_CONFIG] = { .name = PSAMPLE_NL_MCGRP_CONFIG_NAME },
- [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME },
+ [PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME,
+ .flags = GENL_UNS_ADMIN_PERM },
};
static struct genl_family psample_nl_family __ro_after_init;
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index a05c5cb3429c..128d0a48478d 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -194,7 +194,7 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
hdr->src_port_id = cpu_to_le32(from->sq_port);
if (to->sq_port == QRTR_PORT_CTRL) {
hdr->dst_node_id = cpu_to_le32(node->nid);
- hdr->dst_port_id = cpu_to_le32(QRTR_NODE_BCAST);
+ hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL);
} else {
hdr->dst_node_id = cpu_to_le32(to->sq_node);
hdr->dst_port_id = cpu_to_le32(to->sq_port);
@@ -264,14 +264,14 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
const struct qrtr_hdr_v2 *v2;
struct sk_buff *skb;
struct qrtr_cb *cb;
- unsigned int size;
+ size_t size;
unsigned int ver;
size_t hdrlen;
if (len == 0 || len & 3)
return -EINVAL;
- skb = netdev_alloc_skb(NULL, len);
+ skb = __netdev_alloc_skb(NULL, len, GFP_ATOMIC | __GFP_NOWARN);
if (!skb)
return -ENOMEM;
@@ -321,7 +321,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
goto err;
}
- if (len != ALIGN(size, 4) + hdrlen)
+ if (!size || len != ALIGN(size, 4) + hdrlen)
goto err;
if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA)
@@ -797,8 +797,10 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
plen = (len + 3) & ~3;
skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE,
msg->msg_flags & MSG_DONTWAIT, &rc);
- if (!skb)
+ if (!skb) {
+ rc = -ENOMEM;
goto out_node;
+ }
skb_reserve(skb, QRTR_HDR_MAX_SIZE);
@@ -866,6 +868,11 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
rc = copied;
if (addr) {
+ /* There is an anonymous 2-byte hole after sq_family,
+ * make sure to clear it.
+ */
+ memset(addr, 0, sizeof(*addr));
+
cb = (struct qrtr_cb *)skb->cb;
addr->sq_family = AF_QIPCRTR;
addr->sq_node = cb->src_node;
diff --git a/net/qrtr/tun.c b/net/qrtr/tun.c
index e35869e81766..cb425e216d46 100644
--- a/net/qrtr/tun.c
+++ b/net/qrtr/tun.c
@@ -31,6 +31,7 @@ static int qrtr_tun_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
static int qrtr_tun_open(struct inode *inode, struct file *filp)
{
struct qrtr_tun *tun;
+ int ret;
tun = kzalloc(sizeof(*tun), GFP_KERNEL);
if (!tun)
@@ -43,7 +44,16 @@ static int qrtr_tun_open(struct inode *inode, struct file *filp)
filp->private_data = tun;
- return qrtr_endpoint_register(&tun->ep, QRTR_EP_NID_AUTO);
+ ret = qrtr_endpoint_register(&tun->ep, QRTR_EP_NID_AUTO);
+ if (ret)
+ goto out;
+
+ return 0;
+
+out:
+ filp->private_data = NULL;
+ kfree(tun);
+ return ret;
}
static ssize_t qrtr_tun_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -80,6 +90,12 @@ static ssize_t qrtr_tun_write_iter(struct kiocb *iocb, struct iov_iter *from)
ssize_t ret;
void *kbuf;
+ if (!len)
+ return -EINVAL;
+
+ if (len > KMALLOC_MAX_SIZE)
+ return -ENOMEM;
+
kbuf = kzalloc(len, GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 3bd2f4a5a30d..af9f7d184003 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -237,12 +237,24 @@ static struct rds_connection *__rds_conn_create(struct net *net,
if (loop_trans) {
rds_trans_put(loop_trans);
conn->c_loopback = 1;
- if (is_outgoing && trans->t_prefer_loopback) {
- /* "outgoing" connection - and the transport
- * says it wants the connection handled by the
- * loopback transport. This is what TCP does.
- */
- trans = &rds_loop_transport;
+ if (trans->t_prefer_loopback) {
+ if (likely(is_outgoing)) {
+ /* "outgoing" connection to local address.
+ * Protocol says it wants the connection
+ * handled by the loopback transport.
+ * This is what TCP does.
+ */
+ trans = &rds_loop_transport;
+ } else {
+ /* No transport currently in use
+ * should end up here, but if it
+ * does, reset/destroy the connection.
+ */
+ kfree(conn->c_path);
+ kmem_cache_free(rds_conn_slab, conn);
+ conn = ERR_PTR(-EOPNOTSUPP);
+ goto out;
+ }
}
}
diff --git a/net/rds/ib_frmr.c b/net/rds/ib_frmr.c
index 6431a023ac89..46988c009a3e 100644
--- a/net/rds/ib_frmr.c
+++ b/net/rds/ib_frmr.c
@@ -111,9 +111,9 @@ static int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
cpu_relax();
}
- ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_len,
+ ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_dma_len,
&off, PAGE_SIZE);
- if (unlikely(ret != ibmr->sg_len))
+ if (unlikely(ret != ibmr->sg_dma_len))
return ret < 0 ? ret : -EINVAL;
/* Perform a WR for the fast_reg_mr. Each individual page
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 2f16146e4ec9..18e0e3cba1ac 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -362,6 +362,7 @@ static int acquire_refill(struct rds_connection *conn)
static void release_refill(struct rds_connection *conn)
{
clear_bit(RDS_RECV_REFILL, &conn->c_flags);
+ smp_mb__after_atomic();
/* We don't use wait_on_bit()/wake_up_bit() because our waking is in a
* hot path and finding waiters is very rare. We don't want to walk
diff --git a/net/rds/message.c b/net/rds/message.c
index 4b00b1152a5f..29f67ad483ea 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -104,9 +104,9 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs,
spin_lock_irqsave(&q->lock, flags);
head = &q->zcookie_head;
if (!list_empty(head)) {
- info = list_entry(head, struct rds_msg_zcopy_info,
- rs_zcookie_next);
- if (info && rds_zcookie_add(info, cookie)) {
+ info = list_first_entry(head, struct rds_msg_zcopy_info,
+ rs_zcookie_next);
+ if (rds_zcookie_add(info, cookie)) {
spin_unlock_irqrestore(&q->lock, flags);
kfree(rds_info_from_znotifier(znotif));
/* caller invokes rds_wake_sk_sleep() */
@@ -118,7 +118,7 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs,
ck = &info->zcookies;
memset(ck, 0, sizeof(*ck));
WARN_ON(!rds_zcookie_add(info, cookie));
- list_add_tail(&q->zcookie_head, &info->rs_zcookie_next);
+ list_add_tail(&info->rs_zcookie_next, &q->zcookie_head);
spin_unlock_irqrestore(&q->lock, flags);
/* caller invokes rds_wake_sk_sleep() */
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index e1965d9cbcf8..9882cebfcad6 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -531,6 +531,9 @@ int rds_rdma_extra_size(struct rds_rdma_args *args,
if (args->nr_local == 0)
return -EINVAL;
+ if (args->nr_local > UIO_MAXIOV)
+ return -EMSGSIZE;
+
iov->iov = kcalloc(args->nr_local,
sizeof(struct rds_iovec),
GFP_KERNEL);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 3ca278988b52..ccf0bf283002 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -705,7 +705,7 @@ int rds_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
if (rds_cmsg_recv(inc, msg, rs)) {
ret = -EFAULT;
- goto out;
+ break;
}
rds_recvmsg_zcookie(rs, msg);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 18bb522df282..645cbb17c13a 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -176,10 +176,10 @@ void rds_tcp_reset_callbacks(struct socket *sock,
*/
atomic_set(&cp->cp_state, RDS_CONN_RESETTING);
wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags));
- lock_sock(osock->sk);
/* reset receive side state for rds_tcp_data_recv() for osock */
cancel_delayed_work_sync(&cp->cp_send_w);
cancel_delayed_work_sync(&cp->cp_recv_w);
+ lock_sock(osock->sk);
if (tc->t_tinc) {
rds_inc_put(&tc->t_tinc->ti_inc);
tc->t_tinc = NULL;
@@ -322,8 +322,8 @@ out:
}
#endif
-static int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr,
- __u32 scope_id)
+int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr,
+ __u32 scope_id)
{
struct net_device *dev = NULL;
#if IS_ENABLED(CONFIG_IPV6)
@@ -502,7 +502,7 @@ void rds_tcp_tune(struct socket *sock)
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
}
if (rtn->rcvbuf_size > 0) {
- sk->sk_sndbuf = rtn->rcvbuf_size;
+ sk->sk_rcvbuf = rtn->rcvbuf_size;
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
}
release_sock(sk);
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 3c69361d21c7..4620549ecbeb 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -60,7 +60,8 @@ u32 rds_tcp_snd_una(struct rds_tcp_connection *tc);
u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq);
extern struct rds_transport rds_tcp_transport;
void rds_tcp_accept_work(struct sock *sk);
-
+int rds_tcp_laddr_check(struct net *net, const struct in6_addr *addr,
+ __u32 scope_id);
/* tcp_connect.c */
int rds_tcp_conn_path_connect(struct rds_conn_path *cp);
void rds_tcp_conn_path_shutdown(struct rds_conn_path *conn);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 008f50fb25dd..63efe60fda1f 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -141,7 +141,7 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
addrlen = sizeof(sin);
}
- ret = sock->ops->bind(sock, addr, addrlen);
+ ret = kernel_bind(sock, addr, addrlen);
if (ret) {
rdsdebug("bind failed with %d at address %pI6c\n",
ret, &conn->c_laddr);
@@ -169,7 +169,7 @@ int rds_tcp_conn_path_connect(struct rds_conn_path *cp)
* own the socket
*/
rds_tcp_set_callbacks(sock, cp);
- ret = sock->ops->connect(sock, addr, addrlen, O_NONBLOCK);
+ ret = kernel_connect(sock, addr, addrlen, O_NONBLOCK);
rdsdebug("connect to address %pI6c returned %d\n", &conn->c_faddr, ret);
if (ret == -EINPROGRESS)
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index c12203f646da..37f4a8ca3ac8 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -198,6 +198,12 @@ int rds_tcp_accept_one(struct socket *sock)
}
#endif
+ if (!rds_tcp_laddr_check(sock_net(sock->sk), peer_addr, dev_if)) {
+ /* local address connection is only allowed via loopback */
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
conn = rds_conn_create(sock_net(sock->sk),
my_addr, peer_addr,
&rds_tcp_transport, GFP_KERNEL, dev_if);
@@ -326,7 +332,7 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6)
addr_len = sizeof(*sin);
}
- ret = sock->ops->bind(sock, (struct sockaddr *)&ss, addr_len);
+ ret = kernel_bind(sock, (struct sockaddr *)&ss, addr_len);
if (ret < 0) {
rdsdebug("could not bind %s listener socket: %d\n",
isv6 ? "IPv6" : "IPv4", ret);
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 0f8465852254..7524544a965f 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -112,13 +112,13 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
rfkill->clk = devm_clk_get(&pdev->dev, NULL);
- gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_OUT_LOW);
+ gpio = devm_gpiod_get_optional(&pdev->dev, "reset", GPIOD_ASIS);
if (IS_ERR(gpio))
return PTR_ERR(gpio);
rfkill->reset_gpio = gpio;
- gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_OUT_LOW);
+ gpio = devm_gpiod_get_optional(&pdev->dev, "shutdown", GPIOD_ASIS);
if (IS_ERR(gpio))
return PTR_ERR(gpio);
@@ -130,6 +130,14 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
return -EINVAL;
}
+ ret = gpiod_direction_output(rfkill->reset_gpio, true);
+ if (ret)
+ return ret;
+
+ ret = gpiod_direction_output(rfkill->shutdown_gpio, true);
+ if (ret)
+ return ret;
+
rfkill->rfkill_dev = rfkill_alloc(rfkill->name, &pdev->dev,
rfkill->type, &rfkill_gpio_ops,
rfkill);
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index d00a0ef39a56..015e475f5554 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -184,20 +184,47 @@ void rose_kill_by_neigh(struct rose_neigh *neigh)
*/
static void rose_kill_by_device(struct net_device *dev)
{
- struct sock *s;
+ struct sock *sk, *array[16];
+ struct rose_sock *rose;
+ bool rescan;
+ int i, cnt;
+start:
+ rescan = false;
+ cnt = 0;
spin_lock_bh(&rose_list_lock);
- sk_for_each(s, &rose_list) {
- struct rose_sock *rose = rose_sk(s);
+ sk_for_each(sk, &rose_list) {
+ rose = rose_sk(sk);
+ if (rose->device == dev) {
+ if (cnt == ARRAY_SIZE(array)) {
+ rescan = true;
+ break;
+ }
+ sock_hold(sk);
+ array[cnt++] = sk;
+ }
+ }
+ spin_unlock_bh(&rose_list_lock);
+ for (i = 0; i < cnt; i++) {
+ sk = array[cnt];
+ rose = rose_sk(sk);
+ lock_sock(sk);
+ spin_lock_bh(&rose_list_lock);
if (rose->device == dev) {
- rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0);
+ rose_disconnect(sk, ENETUNREACH, ROSE_OUT_OF_ORDER, 0);
if (rose->neighbour)
rose->neighbour->use--;
+ dev_put(rose->device);
rose->device = NULL;
}
+ spin_unlock_bh(&rose_list_lock);
+ release_sock(sk);
+ sock_put(sk);
+ cond_resched();
}
- spin_unlock_bh(&rose_list_lock);
+ if (rescan)
+ goto start;
}
/*
@@ -489,6 +516,12 @@ static int rose_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
+ lock_sock(sk);
+ if (sock->state != SS_UNCONNECTED) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+
if (sk->sk_state != TCP_LISTEN) {
struct rose_sock *rose = rose_sk(sk);
@@ -498,8 +531,10 @@ static int rose_listen(struct socket *sock, int backlog)
memset(rose->dest_digis, 0, AX25_ADDR_LEN * ROSE_MAX_DIGIS);
sk->sk_max_ack_backlog = backlog;
sk->sk_state = TCP_LISTEN;
+ release_sock(sk);
return 0;
}
+ release_sock(sk);
return -EOPNOTSUPP;
}
@@ -594,6 +629,8 @@ static struct sock *rose_make_new(struct sock *osk)
rose->idle = orose->idle;
rose->defer = orose->defer;
rose->device = orose->device;
+ if (rose->device)
+ dev_hold(rose->device);
rose->qbitincl = orose->qbitincl;
return sk;
@@ -647,6 +684,10 @@ static int rose_release(struct socket *sock)
break;
}
+ spin_lock_bh(&rose_list_lock);
+ dev_put(rose->device);
+ rose->device = NULL;
+ spin_unlock_bh(&rose_list_lock);
sock->sk = NULL;
release_sock(sk);
sock_put(sk);
@@ -721,7 +762,6 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
struct rose_sock *rose = rose_sk(sk);
struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr;
unsigned char cause, diagnostic;
- struct net_device *dev;
ax25_uid_assoc *user;
int n, err = 0;
@@ -778,9 +818,12 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
}
if (sock_flag(sk, SOCK_ZAPPED)) { /* Must bind first - autobinding in this may or may not work */
+ struct net_device *dev;
+
sock_reset_flag(sk, SOCK_ZAPPED);
- if ((dev = rose_dev_first()) == NULL) {
+ dev = rose_dev_first();
+ if (!dev) {
err = -ENETUNREACH;
goto out_release;
}
@@ -788,6 +831,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le
user = ax25_findbyuid(current_euid());
if (!user) {
err = -EINVAL;
+ dev_put(dev);
goto out_release;
}
@@ -1293,9 +1337,11 @@ static int rose_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
case TIOCINQ: {
struct sk_buff *skb;
long amount = 0L;
- /* These two are safe on a single CPU system as only user tasks fiddle here */
+
+ spin_lock_irq(&sk->sk_receive_queue.lock);
if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL)
amount = skb->len;
+ spin_unlock_irq(&sk->sk_receive_queue.lock);
return put_user(amount, (unsigned int __user *) argp);
}
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index 62055d3069d2..f3d7105c6cdd 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -239,6 +239,9 @@ void rose_transmit_clear_request(struct rose_neigh *neigh, unsigned int lci, uns
unsigned char *dptr;
int len;
+ if (!neigh->dev)
+ return;
+
len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 3;
if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL)
diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 094a6621f8e8..56eea298b8ef 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -99,10 +99,20 @@ static void rose_loopback_timer(struct timer_list *unused)
}
if (frametype == ROSE_CALL_REQUEST) {
- if ((dev = rose_dev_get(dest)) != NULL) {
- if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0)
- kfree_skb(skb);
- } else {
+ if (!rose_loopback_neigh->dev &&
+ !rose_loopback_neigh->loopback) {
+ kfree_skb(skb);
+ continue;
+ }
+
+ dev = rose_dev_get(dest);
+ if (!dev) {
+ kfree_skb(skb);
+ continue;
+ }
+
+ if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0) {
+ dev_put(dev);
kfree_skb(skb);
}
} else {
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index f2ff21d7df08..5671853bef83 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -230,8 +230,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh)
{
struct rose_neigh *s;
- rose_stop_ftimer(rose_neigh);
- rose_stop_t0timer(rose_neigh);
+ del_timer_sync(&rose_neigh->ftimer);
+ del_timer_sync(&rose_neigh->t0timer);
skb_queue_purge(&rose_neigh->queue);
@@ -616,6 +616,8 @@ struct net_device *rose_dev_first(void)
if (first == NULL || strncmp(dev->name, first->name, 3) < 0)
first = dev;
}
+ if (first)
+ dev_hold(first);
rcu_read_unlock();
return first;
diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c
index 74555fb95615..cede9d0ceff8 100644
--- a/net/rose/rose_timer.c
+++ b/net/rose/rose_timer.c
@@ -34,89 +34,89 @@ static void rose_idletimer_expiry(struct timer_list *);
void rose_start_heartbeat(struct sock *sk)
{
- del_timer(&sk->sk_timer);
+ sk_stop_timer(sk, &sk->sk_timer);
sk->sk_timer.function = rose_heartbeat_expiry;
sk->sk_timer.expires = jiffies + 5 * HZ;
- add_timer(&sk->sk_timer);
+ sk_reset_timer(sk, &sk->sk_timer, sk->sk_timer.expires);
}
void rose_start_t1timer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->t1;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_t2timer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->t2;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_t3timer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->t3;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_hbtimer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->hb;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_idletimer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->idletimer);
+ sk_stop_timer(sk, &rose->idletimer);
if (rose->idle > 0) {
rose->idletimer.function = rose_idletimer_expiry;
rose->idletimer.expires = jiffies + rose->idle;
- add_timer(&rose->idletimer);
+ sk_reset_timer(sk, &rose->idletimer, rose->idletimer.expires);
}
}
void rose_stop_heartbeat(struct sock *sk)
{
- del_timer(&sk->sk_timer);
+ sk_stop_timer(sk, &sk->sk_timer);
}
void rose_stop_timer(struct sock *sk)
{
- del_timer(&rose_sk(sk)->timer);
+ sk_stop_timer(sk, &rose_sk(sk)->timer);
}
void rose_stop_idletimer(struct sock *sk)
{
- del_timer(&rose_sk(sk)->idletimer);
+ sk_stop_timer(sk, &rose_sk(sk)->idletimer);
}
static void rose_heartbeat_expiry(struct timer_list *t)
@@ -133,6 +133,7 @@ static void rose_heartbeat_expiry(struct timer_list *t)
(sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
bh_unlock_sock(sk);
rose_destroy_socket(sk);
+ sock_put(sk);
return;
}
break;
@@ -155,6 +156,7 @@ static void rose_heartbeat_expiry(struct timer_list *t)
rose_start_heartbeat(sk);
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void rose_timer_expiry(struct timer_list *t)
@@ -184,6 +186,7 @@ static void rose_timer_expiry(struct timer_list *t)
break;
}
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void rose_idletimer_expiry(struct timer_list *t)
@@ -208,4 +211,5 @@ static void rose_idletimer_expiry(struct timer_list *t)
sock_set_flag(sk, SOCK_DEAD);
}
bh_unlock_sock(sk);
+ sock_put(sk);
}
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 57f835d2442e..fb7e3fffcb5e 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -1010,7 +1010,7 @@ static int __init af_rxrpc_init(void)
goto error_security;
}
- ret = register_pernet_subsys(&rxrpc_net_ops);
+ ret = register_pernet_device(&rxrpc_net_ops);
if (ret)
goto error_pernet;
@@ -1055,7 +1055,7 @@ error_key_type:
error_sock:
proto_unregister(&rxrpc_proto);
error_proto:
- unregister_pernet_subsys(&rxrpc_net_ops);
+ unregister_pernet_device(&rxrpc_net_ops);
error_pernet:
rxrpc_exit_security();
error_security:
@@ -1077,7 +1077,7 @@ static void __exit af_rxrpc_exit(void)
unregister_key_type(&key_type_rxrpc);
sock_unregister(PF_RXRPC);
proto_unregister(&rxrpc_proto);
- unregister_pernet_subsys(&rxrpc_net_ops);
+ unregister_pernet_device(&rxrpc_net_ops);
ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0);
ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0);
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 7778f0fb48cf..7ecfbff1f0d0 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -211,6 +211,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
tail = b->peer_backlog_tail;
while (CIRC_CNT(head, tail, size) > 0) {
struct rxrpc_peer *peer = b->peer_backlog[tail];
+ rxrpc_put_local(peer->local);
kfree(peer);
tail = (tail + 1) & (size - 1);
}
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 468efc3660c0..12f5c1870103 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -429,7 +429,8 @@ recheck_state:
goto recheck_state;
}
- if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) {
+ if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) &&
+ call->state != RXRPC_CALL_CLIENT_RECV_REPLY) {
rxrpc_resend(call, now);
goto recheck_state;
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 58bd558a277a..40711f410828 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -446,7 +446,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
if (state >= RXRPC_CALL_COMPLETE)
return;
- if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST) {
+ if (state == RXRPC_CALL_SERVER_RECV_REQUEST) {
unsigned long timo = READ_ONCE(call->next_req_timo);
unsigned long now, expect_req_by;
diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c
index 2fe2add62a8e..9be6b35fd9b2 100644
--- a/net/rxrpc/key.c
+++ b/net/rxrpc/key.c
@@ -1112,7 +1112,7 @@ static long rxrpc_read(const struct key *key,
default: /* we have a ticket we can't encode */
pr_err("Unsupported key token type (%u)\n",
token->security_index);
- continue;
+ return -ENOPKG;
}
_debug("token[%u]: toksize=%u", ntoks, toksize);
@@ -1227,7 +1227,9 @@ static long rxrpc_read(const struct key *key,
break;
default:
- break;
+ pr_err("Unsupported key token type (%u)\n",
+ token->security_index);
+ return -ENOPKG;
}
ASSERTCMP((unsigned long)xdr - (unsigned long)oldxdr, ==,
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index fe190a691872..5a01479aae3f 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -452,6 +452,9 @@ static void rxrpc_local_processor(struct work_struct *work)
container_of(work, struct rxrpc_local, processor);
bool again;
+ if (local->dead)
+ return;
+
trace_rxrpc_local(local->debug_id, rxrpc_local_processing,
atomic_read(&local->usage), NULL);
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 417d80867c4f..39579cfcf9b8 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -119,6 +119,8 @@ static __net_exit void rxrpc_exit_net(struct net *net)
rxnet->live = false;
del_timer_sync(&rxnet->peer_keepalive_timer);
cancel_work_sync(&rxnet->peer_keepalive_work);
+ /* Remove the timer again as the worker may have restarted it. */
+ del_timer_sync(&rxnet->peer_keepalive_timer);
rxrpc_destroy_all_calls(rxnet);
rxrpc_destroy_all_connections(rxnet);
rxrpc_destroy_all_peers(rxnet);
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index b91b090217cd..d0a1ebecc6fb 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -299,6 +299,12 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx,
return peer;
}
+static void rxrpc_free_peer(struct rxrpc_peer *peer)
+{
+ rxrpc_put_local(peer->local);
+ kfree_rcu(peer, rcu);
+}
+
/*
* Set up a new incoming peer. There shouldn't be any other matching peers
* since we've already done a search in the list from the non-reentrant context
@@ -365,7 +371,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
spin_unlock_bh(&rxnet->peer_hash_lock);
if (peer)
- kfree(candidate);
+ rxrpc_free_peer(candidate);
else
peer = candidate;
}
@@ -420,8 +426,7 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer)
list_del_init(&peer->keepalive_link);
spin_unlock_bh(&rxnet->peer_hash_lock);
- rxrpc_put_local(peer->local);
- kfree_rcu(peer, rcu);
+ rxrpc_free_peer(peer);
}
/*
@@ -457,8 +462,7 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *peer)
if (n == 0) {
hash_del_rcu(&peer->hash_link);
list_del_init(&peer->keepalive_link);
- rxrpc_put_local(peer->local);
- kfree_rcu(peer, rcu);
+ rxrpc_free_peer(peer);
}
}
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index edd76c41765f..eaa032c498c9 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -440,6 +440,12 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
success:
ret = copied;
+ if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) {
+ read_lock_bh(&call->state_lock);
+ if (call->error < 0)
+ ret = call->error;
+ read_unlock_bh(&call->state_lock);
+ }
out:
call->tx_pending = skb;
_leave(" = %d", ret);
@@ -683,7 +689,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
if (call->tx_total_len != -1 ||
call->tx_pending ||
call->tx_top != 0)
- goto error_put;
+ goto out_put_unlock;
call->tx_total_len = p.call.tx_total_len;
}
}
@@ -703,7 +709,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
/* Fall through */
case 1:
if (p.call.timeouts.hard > 0) {
- j = msecs_to_jiffies(p.call.timeouts.hard);
+ j = p.call.timeouts.hard * HZ;
now = jiffies;
j += now;
WRITE_ONCE(call->expect_term_by, j);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index d75bd15151e6..50f825f55c21 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -17,7 +17,7 @@
static struct ctl_table_header *rxrpc_sysctl_reg_table;
static const unsigned int one = 1;
static const unsigned int four = 4;
-static const unsigned int thirtytwo = 32;
+static const unsigned int max_backlog = RXRPC_BACKLOG_MAX - 1;
static const unsigned int n_65535 = 65535;
static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1;
static const unsigned long one_jiffy = 1;
@@ -111,7 +111,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = (void *)&four,
- .extra2 = (void *)&thirtytwo,
+ .extra2 = (void *)&max_backlog,
},
{
.procname = "rx_window_size",
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index e95741388311..7698a8974a47 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -458,17 +458,6 @@ config NET_CLS_BASIC
To compile this code as a module, choose M here: the
module will be called cls_basic.
-config NET_CLS_TCINDEX
- tristate "Traffic-Control Index (TCINDEX)"
- select NET_CLS
- ---help---
- Say Y here if you want to be able to classify packets based on
- traffic control indices. You will want this feature if you want
- to implement Differentiated Services together with DSMARK.
-
- To compile this code as a module, choose M here: the
- module will be called cls_tcindex.
-
config NET_CLS_ROUTE4
tristate "Routing decision (ROUTE)"
depends on INET
@@ -514,34 +503,6 @@ config CLS_U32_MARK
---help---
Say Y here to be able to use netfilter marks as u32 key.
-config NET_CLS_RSVP
- tristate "IPv4 Resource Reservation Protocol (RSVP)"
- select NET_CLS
- ---help---
- The Resource Reservation Protocol (RSVP) permits end systems to
- request a minimum and maximum data flow rate for a connection; this
- is important for real time data such as streaming sound or video.
-
- Say Y here if you want to be able to classify outgoing packets based
- on their RSVP requests.
-
- To compile this code as a module, choose M here: the
- module will be called cls_rsvp.
-
-config NET_CLS_RSVP6
- tristate "IPv6 Resource Reservation Protocol (RSVP6)"
- select NET_CLS
- ---help---
- The Resource Reservation Protocol (RSVP) permits end systems to
- request a minimum and maximum data flow rate for a connection; this
- is important for real time data such as streaming sound or video.
-
- Say Y here if you want to be able to classify outgoing packets based
- on their RSVP requests and you are using the IPv6 protocol.
-
- To compile this code as a module, choose M here: the
- module will be called cls_rsvp6.
-
config NET_CLS_FLOW
tristate "Flow classifier"
select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index f0403f49edcb..3139c32e1947 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -61,9 +61,6 @@ obj-$(CONFIG_NET_SCH_ETF) += sch_etf.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
obj-$(CONFIG_NET_CLS_FW) += cls_fw.o
-obj-$(CONFIG_NET_CLS_RSVP) += cls_rsvp.o
-obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o
-obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o
obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f2c4bfc79663..ad0773b20d83 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -609,15 +609,24 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
restart_act_graph:
for (i = 0; i < nr_actions; i++) {
const struct tc_action *a = actions[i];
+ int repeat_ttl;
if (jmp_prgcnt > 0) {
jmp_prgcnt -= 1;
continue;
}
+
+ repeat_ttl = 32;
repeat:
ret = a->ops->act(skb, a, res);
- if (ret == TC_ACT_REPEAT)
- goto repeat; /* we need a ttl - JHS */
+
+ if (unlikely(ret == TC_ACT_REPEAT)) {
+ if (--repeat_ttl != 0)
+ goto repeat;
+ /* suspicious opcode, stop pipeline */
+ net_warn_ratelimited("TC_ACT_REPEAT abuse ?\n");
+ return TC_ACT_OK;
+ }
if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) {
jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK;
@@ -900,6 +909,9 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
return ERR_PTR(-EINVAL);
}
+ if (!bind && ovr && err == ACT_P_CREATED)
+ refcount_set(&a->tcfa_refcnt, 2);
+
return a;
err_mod:
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index a30c17a28281..9aad86e4a0fb 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -220,7 +220,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
goto out;
}
- if (unlikely(!(dev->flags & IFF_UP))) {
+ if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) {
net_notice_ratelimited("tc mirred to Houston: device %s is down\n",
dev->name);
goto out;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index ce14fafb36a1..255d4ecf6252 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -29,6 +29,7 @@ static struct tc_action_ops act_pedit_ops;
static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = {
[TCA_PEDIT_PARMS] = { .len = sizeof(struct tc_pedit) },
+ [TCA_PEDIT_PARMS_EX] = { .len = sizeof(struct tc_pedit) },
[TCA_PEDIT_KEYS_EX] = { .type = NLA_NESTED },
};
@@ -148,7 +149,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *pattr;
struct tcf_pedit *p;
int ret = 0, err;
- int ksize;
+ int i, ksize;
u32 index;
if (!nla) {
@@ -221,6 +222,22 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
p->tcfp_nkeys = parm->nkeys;
}
memcpy(p->tcfp_keys, parm->keys, ksize);
+ p->tcfp_off_max_hint = 0;
+ for (i = 0; i < p->tcfp_nkeys; ++i) {
+ u32 cur = p->tcfp_keys[i].off;
+
+ /* sanitize the shift value for any later use */
+ p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1,
+ p->tcfp_keys[i].shift);
+
+ /* The AT option can read a single byte, we can bound the actual
+ * value with uchar max.
+ */
+ cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift;
+
+ /* Each key touches 4 bytes starting from the computed offset */
+ p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4);
+ }
p->tcfp_flags = parm->flags;
p->tcf_action = parm->action;
@@ -298,13 +315,18 @@ static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_pedit *p = to_pedit(a);
+ u32 max_offset;
int i;
- if (skb_unclone(skb, GFP_ATOMIC))
- return p->tcf_action;
-
spin_lock(&p->tcf_lock);
+ max_offset = (skb_transport_header_was_set(skb) ?
+ skb_transport_offset(skb) :
+ skb_network_offset(skb)) +
+ p->tcfp_off_max_hint;
+ if (skb_ensure_writable(skb, min(skb->len, max_offset)))
+ goto unlock;
+
tcf_lastuse_update(&p->tcf_tm);
if (p->tcfp_nkeys > 0) {
@@ -393,6 +415,7 @@ bad:
p->tcf_qstats.overlimits++;
done:
bstats_update(&p->tcf_bstats, skb);
+unlock:
spin_unlock(&p->tcf_lock);
return p->tcf_action;
}
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 21d195296121..03a272af664a 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -10,6 +10,7 @@
*/
#include <linux/module.h>
+#include <linux/if_arp.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
@@ -36,6 +37,13 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
tcf_lastuse_update(&d->tcf_tm);
bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb);
+ action = READ_ONCE(d->tcf_action);
+ if (unlikely(action == TC_ACT_SHOT))
+ goto drop;
+
+ if (!skb->dev || skb->dev->type != ARPHRD_ETHER)
+ return action;
+
/* XXX: if you are going to edit more fields beyond ethernet header
* (example when you add IP header replacement or vlan swap)
* then MAX_EDIT_LEN needs to change appropriately
@@ -44,10 +52,6 @@ static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a,
if (unlikely(err)) /* best policy is to drop on the floor */
goto drop;
- action = READ_ONCE(d->tcf_action);
- if (unlikely(action == TC_ACT_SHOT))
- goto drop;
-
p = rcu_dereference_bh(d->skbmod_p);
flags = p->flags;
if (flags & SKBMOD_F_DMAC)
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index e4fc6b2bc29d..f43234be5695 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -314,7 +314,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
metadata = __ipv6_tun_set_dst(&saddr, &daddr, tos, ttl, dst_port,
0, flags,
- key_id, 0);
+ key_id, opts_len);
} else {
NL_SET_ERR_MSG(extack, "Missing either ipv4 or ipv6 src and dst");
ret = -EINVAL;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 184c20b86393..6166bbad9753 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -31,8 +31,6 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
-extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
-
/* The list of all installed classifier types */
static LIST_HEAD(tcf_proto_base);
@@ -539,6 +537,7 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
struct netlink_ext_ack *extack)
{
struct tcf_block *block;
+ int err = 0;
if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
block = tcf_block_lookup(net, block_index);
@@ -550,55 +549,95 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
const struct Qdisc_class_ops *cops;
struct net_device *dev;
+ rcu_read_lock();
+
/* Find link */
- dev = __dev_get_by_index(net, ifindex);
- if (!dev)
+ dev = dev_get_by_index_rcu(net, ifindex);
+ if (!dev) {
+ rcu_read_unlock();
return ERR_PTR(-ENODEV);
+ }
/* Find qdisc */
if (!*parent) {
*q = dev->qdisc;
*parent = (*q)->handle;
} else {
- *q = qdisc_lookup(dev, TC_H_MAJ(*parent));
+ *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
if (!*q) {
NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
- return ERR_PTR(-EINVAL);
+ err = -EINVAL;
+ goto errout_rcu;
}
}
+ *q = qdisc_refcount_inc_nz(*q);
+ if (!*q) {
+ NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
+ err = -EINVAL;
+ goto errout_rcu;
+ }
+
/* Is it classful? */
cops = (*q)->ops->cl_ops;
if (!cops) {
NL_SET_ERR_MSG(extack, "Qdisc not classful");
- return ERR_PTR(-EINVAL);
+ err = -EINVAL;
+ goto errout_rcu;
}
if (!cops->tcf_block) {
NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
- return ERR_PTR(-EOPNOTSUPP);
+ err = -EOPNOTSUPP;
+ goto errout_rcu;
}
+ /* At this point we know that qdisc is not noop_qdisc,
+ * which means that qdisc holds a reference to net_device
+ * and we hold a reference to qdisc, so it is safe to release
+ * rcu read lock.
+ */
+ rcu_read_unlock();
+
/* Do we search for filter, attached to class? */
if (TC_H_MIN(*parent)) {
*cl = cops->find(*q, *parent);
if (*cl == 0) {
NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
- return ERR_PTR(-ENOENT);
+ err = -ENOENT;
+ goto errout_qdisc;
}
}
/* And the last stroke */
block = cops->tcf_block(*q, *cl, extack);
- if (!block)
- return ERR_PTR(-EINVAL);
+ if (!block) {
+ err = -EINVAL;
+ goto errout_qdisc;
+ }
if (tcf_block_shared(block)) {
NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
- return ERR_PTR(-EOPNOTSUPP);
+ err = -EOPNOTSUPP;
+ goto errout_qdisc;
}
}
return block;
+
+errout_rcu:
+ rcu_read_unlock();
+errout_qdisc:
+ if (*q) {
+ qdisc_put(*q);
+ *q = NULL;
+ }
+ return ERR_PTR(err);
+}
+
+static void tcf_block_release(struct Qdisc *q, struct tcf_block *block)
+{
+ if (q)
+ qdisc_put(q);
}
struct tcf_block_owner_item {
@@ -1336,6 +1375,7 @@ replay:
errout:
if (chain)
tcf_chain_put(chain);
+ tcf_block_release(q, block);
if (err == -EAGAIN)
/* Replay the request. */
goto replay;
@@ -1457,6 +1497,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
errout:
if (chain)
tcf_chain_put(chain);
+ tcf_block_release(q, block);
return err;
}
@@ -1542,6 +1583,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
errout:
if (chain)
tcf_chain_put(chain);
+ tcf_block_release(q, block);
return err;
}
@@ -1796,6 +1838,7 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
return PTR_ERR(ops);
if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
+ module_put(ops->owner);
return -EOPNOTSUPP;
}
@@ -1858,7 +1901,8 @@ replay:
chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
if (chain_index > TC_ACT_EXT_VAL_MASK) {
NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
- return -EINVAL;
+ err = -EINVAL;
+ goto errout_block;
}
chain = tcf_chain_lookup(block, chain_index);
if (n->nlmsg_type == RTM_NEWCHAIN) {
@@ -1870,23 +1914,27 @@ replay:
tcf_chain_hold(chain);
} else {
NL_SET_ERR_MSG(extack, "Filter chain already exists");
- return -EEXIST;
+ err = -EEXIST;
+ goto errout_block;
}
} else {
if (!(n->nlmsg_flags & NLM_F_CREATE)) {
NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
- return -ENOENT;
+ err = -ENOENT;
+ goto errout_block;
}
chain = tcf_chain_create(block, chain_index);
if (!chain) {
NL_SET_ERR_MSG(extack, "Failed to create filter chain");
- return -ENOMEM;
+ err = -ENOMEM;
+ goto errout_block;
}
}
} else {
if (!chain || tcf_chain_held_by_acts_only(chain)) {
NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
- return -EINVAL;
+ err = -EINVAL;
+ goto errout_block;
}
tcf_chain_hold(chain);
}
@@ -1918,7 +1966,7 @@ replay:
break;
case RTM_GETCHAIN:
err = tc_chain_notify(chain, skb, n->nlmsg_seq,
- n->nlmsg_seq, n->nlmsg_type, true);
+ n->nlmsg_flags, n->nlmsg_type, true);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
break;
@@ -1930,6 +1978,8 @@ replay:
errout:
tcf_chain_put(chain);
+errout_block:
+ tcf_block_release(q, block);
if (err == -EAGAIN)
/* Replay the request. */
goto replay;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 55bf75cb1f16..164049d20f4d 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -229,7 +229,7 @@ static u32 flow_get_skgid(const struct sk_buff *skb)
static u32 flow_get_vlan_tag(const struct sk_buff *skb)
{
- u16 uninitialized_var(tag);
+ u16 tag;
if (vlan_get_tag(skb, &tag) < 0)
return 0;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 208436eb107c..7ffa28a98d74 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -554,6 +554,7 @@ static int fl_set_key_mpls(struct nlattr **tb,
static void fl_set_key_vlan(struct nlattr **tb,
__be16 ethertype,
int vlan_id_key, int vlan_prio_key,
+ int vlan_next_eth_type_key,
struct flow_dissector_key_vlan *key_val,
struct flow_dissector_key_vlan *key_mask)
{
@@ -572,6 +573,11 @@ static void fl_set_key_vlan(struct nlattr **tb,
}
key_val->vlan_tpid = ethertype;
key_mask->vlan_tpid = cpu_to_be16(~0);
+ if (tb[vlan_next_eth_type_key]) {
+ key_val->vlan_eth_type =
+ nla_get_be16(tb[vlan_next_eth_type_key]);
+ key_mask->vlan_eth_type = cpu_to_be16(~0);
+ }
}
static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
@@ -634,6 +640,9 @@ static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key,
if (option_len > sizeof(struct geneve_opt))
data_len = option_len - sizeof(struct geneve_opt);
+ if (key->enc_opts.len > FLOW_DIS_TUN_OPTS_MAX - 4)
+ return -ERANGE;
+
opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len];
memset(opt, 0xff, option_len);
opt->length = data_len / 4;
@@ -801,8 +810,9 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
if (eth_type_vlan(ethertype)) {
fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID,
- TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan,
- &mask->vlan);
+ TCA_FLOWER_KEY_VLAN_PRIO,
+ TCA_FLOWER_KEY_VLAN_ETH_TYPE,
+ &key->vlan, &mask->vlan);
if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) {
ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]);
@@ -810,6 +820,7 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
fl_set_key_vlan(tb, ethertype,
TCA_FLOWER_KEY_CVLAN_ID,
TCA_FLOWER_KEY_CVLAN_PRIO,
+ TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
&key->cvlan, &mask->cvlan);
fl_set_key_val(tb, &key->basic.n_proto,
TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
@@ -1717,13 +1728,13 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
goto nla_put_failure;
if (mask->basic.n_proto) {
- if (mask->cvlan.vlan_tpid) {
+ if (mask->cvlan.vlan_eth_type) {
if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
key->basic.n_proto))
goto nla_put_failure;
- } else if (mask->vlan.vlan_tpid) {
+ } else if (mask->vlan.vlan_eth_type) {
if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
- key->basic.n_proto))
+ key->vlan.vlan_eth_type))
goto nla_put_failure;
}
}
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index cb2c62605fc7..f15089c24a32 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -221,11 +221,6 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
if (err < 0)
return err;
- if (tb[TCA_FW_CLASSID]) {
- f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
- tcf_bind_filter(tp, &f->res, base);
- }
-
#ifdef CONFIG_NET_CLS_IND
if (tb[TCA_FW_INDEV]) {
int ret;
@@ -244,6 +239,11 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
} else if (head->mask != 0xFFFFFFFF)
return err;
+ if (tb[TCA_FW_CLASSID]) {
+ f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]);
+ tcf_bind_filter(tp, &f->res, base);
+ }
+
return 0;
}
@@ -277,7 +277,6 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
return -ENOBUFS;
fnew->id = f->id;
- fnew->res = f->res;
#ifdef CONFIG_NET_CLS_IND
fnew->ifindex = f->ifindex;
#endif /* CONFIG_NET_CLS_IND */
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 0256777b838e..a924292623ec 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -427,6 +427,11 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
return -EINVAL;
}
+ if (!nhandle) {
+ NL_SET_ERR_MSG(extack, "Replacing with handle of 0 is invalid");
+ return -EINVAL;
+ }
+
h1 = to_hash(nhandle);
b = rtnl_dereference(head->table[h1]);
if (!b) {
@@ -480,6 +485,11 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
int err;
bool new = true;
+ if (!handle) {
+ NL_SET_ERR_MSG(extack, "Creating with handle of 0 is invalid");
+ return -EINVAL;
+ }
+
if (opt == NULL)
return handle ? -EINVAL : 0;
@@ -503,7 +513,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (fold) {
f->id = fold->id;
f->iif = fold->iif;
- f->res = fold->res;
f->handle = fold->handle;
f->tp = fold->tp;
@@ -528,7 +537,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
rcu_assign_pointer(f->next, f1);
rcu_assign_pointer(*fp, f);
- if (fold && fold->handle && f->handle != fold->handle) {
+ if (fold) {
th = to_hash(fold->handle);
h = from_hash(fold->handle >> 16);
b = rtnl_dereference(head->table[th]);
diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c
deleted file mode 100644
index cbb5e0d600f3..000000000000
--- a/net/sched/cls_rsvp.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * net/sched/cls_rsvp.c Special RSVP packet classifier for IPv4.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <net/ip.h>
-#include <net/netlink.h>
-#include <net/act_api.h>
-#include <net/pkt_cls.h>
-
-#define RSVP_DST_LEN 1
-#define RSVP_ID "rsvp"
-#define RSVP_OPS cls_rsvp_ops
-
-#include "cls_rsvp.h"
-MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
deleted file mode 100644
index eb1dd2afc5a1..000000000000
--- a/net/sched/cls_rsvp.h
+++ /dev/null
@@ -1,775 +0,0 @@
-/*
- * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- */
-
-/*
- Comparing to general packet classification problem,
- RSVP needs only sevaral relatively simple rules:
-
- * (dst, protocol) are always specified,
- so that we are able to hash them.
- * src may be exact, or may be wildcard, so that
- we can keep a hash table plus one wildcard entry.
- * source port (or flow label) is important only if src is given.
-
- IMPLEMENTATION.
-
- We use a two level hash table: The top level is keyed by
- destination address and protocol ID, every bucket contains a list
- of "rsvp sessions", identified by destination address, protocol and
- DPI(="Destination Port ID"): triple (key, mask, offset).
-
- Every bucket has a smaller hash table keyed by source address
- (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
- Every bucket is again a list of "RSVP flows", selected by
- source address and SPI(="Source Port ID" here rather than
- "security parameter index"): triple (key, mask, offset).
-
-
- NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
- and all fragmented packets go to the best-effort traffic class.
-
-
- NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
- only one "Generalized Port Identifier". So that for classic
- ah, esp (and udp,tcp) both *pi should coincide or one of them
- should be wildcard.
-
- At first sight, this redundancy is just a waste of CPU
- resources. But DPI and SPI add the possibility to assign different
- priorities to GPIs. Look also at note 4 about tunnels below.
-
-
- NOTE 3. One complication is the case of tunneled packets.
- We implement it as following: if the first lookup
- matches a special session with "tunnelhdr" value not zero,
- flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
- In this case, we pull tunnelhdr bytes and restart lookup
- with tunnel ID added to the list of keys. Simple and stupid 8)8)
- It's enough for PIMREG and IPIP.
-
-
- NOTE 4. Two GPIs make it possible to parse even GRE packets.
- F.e. DPI can select ETH_P_IP (and necessary flags to make
- tunnelhdr correct) in GRE protocol field and SPI matches
- GRE key. Is it not nice? 8)8)
-
-
- Well, as result, despite its simplicity, we get a pretty
- powerful classification engine. */
-
-
-struct rsvp_head {
- u32 tmap[256/32];
- u32 hgenerator;
- u8 tgenerator;
- struct rsvp_session __rcu *ht[256];
- struct rcu_head rcu;
-};
-
-struct rsvp_session {
- struct rsvp_session __rcu *next;
- __be32 dst[RSVP_DST_LEN];
- struct tc_rsvp_gpi dpi;
- u8 protocol;
- u8 tunnelid;
- /* 16 (src,sport) hash slots, and one wildcard source slot */
- struct rsvp_filter __rcu *ht[16 + 1];
- struct rcu_head rcu;
-};
-
-
-struct rsvp_filter {
- struct rsvp_filter __rcu *next;
- __be32 src[RSVP_DST_LEN];
- struct tc_rsvp_gpi spi;
- u8 tunnelhdr;
-
- struct tcf_result res;
- struct tcf_exts exts;
-
- u32 handle;
- struct rsvp_session *sess;
- struct rcu_work rwork;
-};
-
-static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
-{
- unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
-
- h ^= h>>16;
- h ^= h>>8;
- return (h ^ protocol ^ tunnelid) & 0xFF;
-}
-
-static inline unsigned int hash_src(__be32 *src)
-{
- unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
-
- h ^= h>>16;
- h ^= h>>8;
- h ^= h>>4;
- return h & 0xF;
-}
-
-#define RSVP_APPLY_RESULT() \
-{ \
- int r = tcf_exts_exec(skb, &f->exts, res); \
- if (r < 0) \
- continue; \
- else if (r > 0) \
- return r; \
-}
-
-static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
-{
- struct rsvp_head *head = rcu_dereference_bh(tp->root);
- struct rsvp_session *s;
- struct rsvp_filter *f;
- unsigned int h1, h2;
- __be32 *dst, *src;
- u8 protocol;
- u8 tunnelid = 0;
- u8 *xprt;
-#if RSVP_DST_LEN == 4
- struct ipv6hdr *nhptr;
-
- if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
- return -1;
- nhptr = ipv6_hdr(skb);
-#else
- struct iphdr *nhptr;
-
- if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
- return -1;
- nhptr = ip_hdr(skb);
-#endif
-restart:
-
-#if RSVP_DST_LEN == 4
- src = &nhptr->saddr.s6_addr32[0];
- dst = &nhptr->daddr.s6_addr32[0];
- protocol = nhptr->nexthdr;
- xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
-#else
- src = &nhptr->saddr;
- dst = &nhptr->daddr;
- protocol = nhptr->protocol;
- xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
- if (ip_is_fragment(nhptr))
- return -1;
-#endif
-
- h1 = hash_dst(dst, protocol, tunnelid);
- h2 = hash_src(src);
-
- for (s = rcu_dereference_bh(head->ht[h1]); s;
- s = rcu_dereference_bh(s->next)) {
- if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
- protocol == s->protocol &&
- !(s->dpi.mask &
- (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
-#if RSVP_DST_LEN == 4
- dst[0] == s->dst[0] &&
- dst[1] == s->dst[1] &&
- dst[2] == s->dst[2] &&
-#endif
- tunnelid == s->tunnelid) {
-
- for (f = rcu_dereference_bh(s->ht[h2]); f;
- f = rcu_dereference_bh(f->next)) {
- if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
- !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
-#if RSVP_DST_LEN == 4
- &&
- src[0] == f->src[0] &&
- src[1] == f->src[1] &&
- src[2] == f->src[2]
-#endif
- ) {
- *res = f->res;
- RSVP_APPLY_RESULT();
-
-matched:
- if (f->tunnelhdr == 0)
- return 0;
-
- tunnelid = f->res.classid;
- nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
- goto restart;
- }
- }
-
- /* And wildcard bucket... */
- for (f = rcu_dereference_bh(s->ht[16]); f;
- f = rcu_dereference_bh(f->next)) {
- *res = f->res;
- RSVP_APPLY_RESULT();
- goto matched;
- }
- return -1;
- }
- }
- return -1;
-}
-
-static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- struct rsvp_session *s;
- struct rsvp_filter __rcu **ins;
- struct rsvp_filter *pins;
- unsigned int h1 = h & 0xFF;
- unsigned int h2 = (h >> 8) & 0xFF;
-
- for (s = rtnl_dereference(head->ht[h1]); s;
- s = rtnl_dereference(s->next)) {
- for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ;
- ins = &pins->next, pins = rtnl_dereference(*ins)) {
- if (pins->handle == h) {
- RCU_INIT_POINTER(n->next, pins->next);
- rcu_assign_pointer(*ins, n);
- return;
- }
- }
- }
-
- /* Something went wrong if we are trying to replace a non-existant
- * node. Mind as well halt instead of silently failing.
- */
- BUG_ON(1);
-}
-
-static void *rsvp_get(struct tcf_proto *tp, u32 handle)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- struct rsvp_session *s;
- struct rsvp_filter *f;
- unsigned int h1 = handle & 0xFF;
- unsigned int h2 = (handle >> 8) & 0xFF;
-
- if (h2 > 16)
- return NULL;
-
- for (s = rtnl_dereference(head->ht[h1]); s;
- s = rtnl_dereference(s->next)) {
- for (f = rtnl_dereference(s->ht[h2]); f;
- f = rtnl_dereference(f->next)) {
- if (f->handle == handle)
- return f;
- }
- }
- return NULL;
-}
-
-static int rsvp_init(struct tcf_proto *tp)
-{
- struct rsvp_head *data;
-
- data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
- if (data) {
- rcu_assign_pointer(tp->root, data);
- return 0;
- }
- return -ENOBUFS;
-}
-
-static void __rsvp_delete_filter(struct rsvp_filter *f)
-{
- tcf_exts_destroy(&f->exts);
- tcf_exts_put_net(&f->exts);
- kfree(f);
-}
-
-static void rsvp_delete_filter_work(struct work_struct *work)
-{
- struct rsvp_filter *f = container_of(to_rcu_work(work),
- struct rsvp_filter,
- rwork);
- rtnl_lock();
- __rsvp_delete_filter(f);
- rtnl_unlock();
-}
-
-static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
-{
- tcf_unbind_filter(tp, &f->res);
- /* all classifiers are required to call tcf_exts_destroy() after rcu
- * grace period, since converted-to-rcu actions are relying on that
- * in cleanup() callback
- */
- if (tcf_exts_get_net(&f->exts))
- tcf_queue_work(&f->rwork, rsvp_delete_filter_work);
- else
- __rsvp_delete_filter(f);
-}
-
-static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
-{
- struct rsvp_head *data = rtnl_dereference(tp->root);
- int h1, h2;
-
- if (data == NULL)
- return;
-
- for (h1 = 0; h1 < 256; h1++) {
- struct rsvp_session *s;
-
- while ((s = rtnl_dereference(data->ht[h1])) != NULL) {
- RCU_INIT_POINTER(data->ht[h1], s->next);
-
- for (h2 = 0; h2 <= 16; h2++) {
- struct rsvp_filter *f;
-
- while ((f = rtnl_dereference(s->ht[h2])) != NULL) {
- rcu_assign_pointer(s->ht[h2], f->next);
- rsvp_delete_filter(tp, f);
- }
- }
- kfree_rcu(s, rcu);
- }
- }
- kfree_rcu(data, rcu);
-}
-
-static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- struct rsvp_filter *nfp, *f = arg;
- struct rsvp_filter __rcu **fp;
- unsigned int h = f->handle;
- struct rsvp_session __rcu **sp;
- struct rsvp_session *nsp, *s = f->sess;
- int i, h1;
-
- fp = &s->ht[(h >> 8) & 0xFF];
- for (nfp = rtnl_dereference(*fp); nfp;
- fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
- if (nfp == f) {
- RCU_INIT_POINTER(*fp, f->next);
- rsvp_delete_filter(tp, f);
-
- /* Strip tree */
-
- for (i = 0; i <= 16; i++)
- if (s->ht[i])
- goto out;
-
- /* OK, session has no flows */
- sp = &head->ht[h & 0xFF];
- for (nsp = rtnl_dereference(*sp); nsp;
- sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
- if (nsp == s) {
- RCU_INIT_POINTER(*sp, s->next);
- kfree_rcu(s, rcu);
- goto out;
- }
- }
-
- break;
- }
- }
-
-out:
- *last = true;
- for (h1 = 0; h1 < 256; h1++) {
- if (rcu_access_pointer(head->ht[h1])) {
- *last = false;
- break;
- }
- }
-
- return 0;
-}
-
-static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
-{
- struct rsvp_head *data = rtnl_dereference(tp->root);
- int i = 0xFFFF;
-
- while (i-- > 0) {
- u32 h;
-
- if ((data->hgenerator += 0x10000) == 0)
- data->hgenerator = 0x10000;
- h = data->hgenerator|salt;
- if (!rsvp_get(tp, h))
- return h;
- }
- return 0;
-}
-
-static int tunnel_bts(struct rsvp_head *data)
-{
- int n = data->tgenerator >> 5;
- u32 b = 1 << (data->tgenerator & 0x1F);
-
- if (data->tmap[n] & b)
- return 0;
- data->tmap[n] |= b;
- return 1;
-}
-
-static void tunnel_recycle(struct rsvp_head *data)
-{
- struct rsvp_session __rcu **sht = data->ht;
- u32 tmap[256/32];
- int h1, h2;
-
- memset(tmap, 0, sizeof(tmap));
-
- for (h1 = 0; h1 < 256; h1++) {
- struct rsvp_session *s;
- for (s = rtnl_dereference(sht[h1]); s;
- s = rtnl_dereference(s->next)) {
- for (h2 = 0; h2 <= 16; h2++) {
- struct rsvp_filter *f;
-
- for (f = rtnl_dereference(s->ht[h2]); f;
- f = rtnl_dereference(f->next)) {
- if (f->tunnelhdr == 0)
- continue;
- data->tgenerator = f->res.classid;
- tunnel_bts(data);
- }
- }
- }
- }
-
- memcpy(data->tmap, tmap, sizeof(tmap));
-}
-
-static u32 gen_tunnel(struct rsvp_head *data)
-{
- int i, k;
-
- for (k = 0; k < 2; k++) {
- for (i = 255; i > 0; i--) {
- if (++data->tgenerator == 0)
- data->tgenerator = 1;
- if (tunnel_bts(data))
- return data->tgenerator;
- }
- tunnel_recycle(data);
- }
- return 0;
-}
-
-static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
- [TCA_RSVP_CLASSID] = { .type = NLA_U32 },
- [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) },
- [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) },
- [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
-};
-
-static int rsvp_change(struct net *net, struct sk_buff *in_skb,
- struct tcf_proto *tp, unsigned long base,
- u32 handle,
- struct nlattr **tca,
- void **arg, bool ovr, struct netlink_ext_ack *extack)
-{
- struct rsvp_head *data = rtnl_dereference(tp->root);
- struct rsvp_filter *f, *nfp;
- struct rsvp_filter __rcu **fp;
- struct rsvp_session *nsp, *s;
- struct rsvp_session __rcu **sp;
- struct tc_rsvp_pinfo *pinfo = NULL;
- struct nlattr *opt = tca[TCA_OPTIONS];
- struct nlattr *tb[TCA_RSVP_MAX + 1];
- struct tcf_exts e;
- unsigned int h1, h2;
- __be32 *dst;
- int err;
-
- if (opt == NULL)
- return handle ? -EINVAL : 0;
-
- err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy, NULL);
- if (err < 0)
- return err;
-
- err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
- if (err < 0)
- return err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack);
- if (err < 0)
- goto errout2;
-
- f = *arg;
- if (f) {
- /* Node exists: adjust only classid */
- struct rsvp_filter *n;
-
- if (f->handle != handle && handle)
- goto errout2;
-
- n = kmemdup(f, sizeof(*f), GFP_KERNEL);
- if (!n) {
- err = -ENOMEM;
- goto errout2;
- }
-
- err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
- if (err < 0) {
- kfree(n);
- goto errout2;
- }
-
- if (tb[TCA_RSVP_CLASSID]) {
- n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
- tcf_bind_filter(tp, &n->res, base);
- }
-
- tcf_exts_change(&n->exts, &e);
- rsvp_replace(tp, n, handle);
- return 0;
- }
-
- /* Now more serious part... */
- err = -EINVAL;
- if (handle)
- goto errout2;
- if (tb[TCA_RSVP_DST] == NULL)
- goto errout2;
-
- err = -ENOBUFS;
- f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
- if (f == NULL)
- goto errout2;
-
- err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
- if (err < 0)
- goto errout;
- h2 = 16;
- if (tb[TCA_RSVP_SRC]) {
- memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
- h2 = hash_src(f->src);
- }
- if (tb[TCA_RSVP_PINFO]) {
- pinfo = nla_data(tb[TCA_RSVP_PINFO]);
- f->spi = pinfo->spi;
- f->tunnelhdr = pinfo->tunnelhdr;
- }
- if (tb[TCA_RSVP_CLASSID])
- f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
-
- dst = nla_data(tb[TCA_RSVP_DST]);
- h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
-
- err = -ENOMEM;
- if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
- goto errout;
-
- if (f->tunnelhdr) {
- err = -EINVAL;
- if (f->res.classid > 255)
- goto errout;
-
- err = -ENOMEM;
- if (f->res.classid == 0 &&
- (f->res.classid = gen_tunnel(data)) == 0)
- goto errout;
- }
-
- for (sp = &data->ht[h1];
- (s = rtnl_dereference(*sp)) != NULL;
- sp = &s->next) {
- if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
- pinfo && pinfo->protocol == s->protocol &&
- memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
-#if RSVP_DST_LEN == 4
- dst[0] == s->dst[0] &&
- dst[1] == s->dst[1] &&
- dst[2] == s->dst[2] &&
-#endif
- pinfo->tunnelid == s->tunnelid) {
-
-insert:
- /* OK, we found appropriate session */
-
- fp = &s->ht[h2];
-
- f->sess = s;
- if (f->tunnelhdr == 0)
- tcf_bind_filter(tp, &f->res, base);
-
- tcf_exts_change(&f->exts, &e);
-
- fp = &s->ht[h2];
- for (nfp = rtnl_dereference(*fp); nfp;
- fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
- __u32 mask = nfp->spi.mask & f->spi.mask;
-
- if (mask != f->spi.mask)
- break;
- }
- RCU_INIT_POINTER(f->next, nfp);
- rcu_assign_pointer(*fp, f);
-
- *arg = f;
- return 0;
- }
- }
-
- /* No session found. Create new one. */
-
- err = -ENOBUFS;
- s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
- if (s == NULL)
- goto errout;
- memcpy(s->dst, dst, sizeof(s->dst));
-
- if (pinfo) {
- s->dpi = pinfo->dpi;
- s->protocol = pinfo->protocol;
- s->tunnelid = pinfo->tunnelid;
- }
- sp = &data->ht[h1];
- for (nsp = rtnl_dereference(*sp); nsp;
- sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
- if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask)
- break;
- }
- RCU_INIT_POINTER(s->next, nsp);
- rcu_assign_pointer(*sp, s);
-
- goto insert;
-
-errout:
- tcf_exts_destroy(&f->exts);
- kfree(f);
-errout2:
- tcf_exts_destroy(&e);
- return err;
-}
-
-static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- unsigned int h, h1;
-
- if (arg->stop)
- return;
-
- for (h = 0; h < 256; h++) {
- struct rsvp_session *s;
-
- for (s = rtnl_dereference(head->ht[h]); s;
- s = rtnl_dereference(s->next)) {
- for (h1 = 0; h1 <= 16; h1++) {
- struct rsvp_filter *f;
-
- for (f = rtnl_dereference(s->ht[h1]); f;
- f = rtnl_dereference(f->next)) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (arg->fn(tp, f, arg) < 0) {
- arg->stop = 1;
- return;
- }
- arg->count++;
- }
- }
- }
- }
-}
-
-static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
-{
- struct rsvp_filter *f = fh;
- struct rsvp_session *s;
- struct nlattr *nest;
- struct tc_rsvp_pinfo pinfo;
-
- if (f == NULL)
- return skb->len;
- s = f->sess;
-
- t->tcm_handle = f->handle;
-
- nest = nla_nest_start(skb, TCA_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
-
- if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
- goto nla_put_failure;
- pinfo.dpi = s->dpi;
- pinfo.spi = f->spi;
- pinfo.protocol = s->protocol;
- pinfo.tunnelid = s->tunnelid;
- pinfo.tunnelhdr = f->tunnelhdr;
- pinfo.pad = 0;
- if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
- goto nla_put_failure;
- if (f->res.classid &&
- nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
- goto nla_put_failure;
- if (((f->handle >> 8) & 0xFF) != 16 &&
- nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
- goto nla_put_failure;
-
- if (tcf_exts_dump(skb, &f->exts) < 0)
- goto nla_put_failure;
-
- nla_nest_end(skb, nest);
-
- if (tcf_exts_dump_stats(skb, &f->exts) < 0)
- goto nla_put_failure;
- return skb->len;
-
-nla_put_failure:
- nla_nest_cancel(skb, nest);
- return -1;
-}
-
-static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
- unsigned long base)
-{
- struct rsvp_filter *f = fh;
-
- if (f && f->res.classid == classid) {
- if (cl)
- __tcf_bind_filter(q, &f->res, base);
- else
- __tcf_unbind_filter(q, &f->res);
- }
-}
-
-static struct tcf_proto_ops RSVP_OPS __read_mostly = {
- .kind = RSVP_ID,
- .classify = rsvp_classify,
- .init = rsvp_init,
- .destroy = rsvp_destroy,
- .get = rsvp_get,
- .change = rsvp_change,
- .delete = rsvp_delete,
- .walk = rsvp_walk,
- .dump = rsvp_dump,
- .bind_class = rsvp_bind_class,
- .owner = THIS_MODULE,
-};
-
-static int __init init_rsvp(void)
-{
- return register_tcf_proto_ops(&RSVP_OPS);
-}
-
-static void __exit exit_rsvp(void)
-{
- unregister_tcf_proto_ops(&RSVP_OPS);
-}
-
-module_init(init_rsvp)
-module_exit(exit_rsvp)
diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c
deleted file mode 100644
index dd08aea2aee5..000000000000
--- a/net/sched/cls_rsvp6.c
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * net/sched/cls_rsvp6.c Special RSVP packet classifier for IPv6.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/ipv6.h>
-#include <linux/skbuff.h>
-#include <net/act_api.h>
-#include <net/pkt_cls.h>
-#include <net/netlink.h>
-
-#define RSVP_DST_LEN 4
-#define RSVP_ID "rsvp6"
-#define RSVP_OPS cls_rsvp6_ops
-
-#include "cls_rsvp.h"
-MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
deleted file mode 100644
index 0d7a0aac8dbb..000000000000
--- a/net/sched/cls_tcindex.c
+++ /dev/null
@@ -1,694 +0,0 @@
-/*
- * net/sched/cls_tcindex.c Packet classifier for skb->tc_index
- *
- * Written 1998,1999 by Werner Almesberger, EPFL ICA
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <net/act_api.h>
-#include <net/netlink.h>
-#include <net/pkt_cls.h>
-#include <net/sch_generic.h>
-
-/*
- * Passing parameters to the root seems to be done more awkwardly than really
- * necessary. At least, u32 doesn't seem to use such dirty hacks. To be
- * verified. FIXME.
- */
-
-#define PERFECT_HASH_THRESHOLD 64 /* use perfect hash if not bigger */
-#define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */
-
-
-struct tcindex_filter_result {
- struct tcf_exts exts;
- struct tcf_result res;
- struct rcu_work rwork;
-};
-
-struct tcindex_filter {
- u16 key;
- struct tcindex_filter_result result;
- struct tcindex_filter __rcu *next;
- struct rcu_work rwork;
-};
-
-
-struct tcindex_data {
- struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
- struct tcindex_filter __rcu **h; /* imperfect hash; */
- struct tcf_proto *tp;
- u16 mask; /* AND key with mask */
- u32 shift; /* shift ANDed key to the right */
- u32 hash; /* hash table size; 0 if undefined */
- u32 alloc_hash; /* allocated size */
- u32 fall_through; /* 0: only classify if explicit match */
- struct rcu_work rwork;
-};
-
-static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
-{
- return tcf_exts_has_actions(&r->exts) || r->res.classid;
-}
-
-static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
- u16 key)
-{
- if (p->perfect) {
- struct tcindex_filter_result *f = p->perfect + key;
-
- return tcindex_filter_is_set(f) ? f : NULL;
- } else if (p->h) {
- struct tcindex_filter __rcu **fp;
- struct tcindex_filter *f;
-
- fp = &p->h[key % p->hash];
- for (f = rcu_dereference_bh_rtnl(*fp);
- f;
- fp = &f->next, f = rcu_dereference_bh_rtnl(*fp))
- if (f->key == key)
- return &f->result;
- }
-
- return NULL;
-}
-
-
-static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
-{
- struct tcindex_data *p = rcu_dereference_bh(tp->root);
- struct tcindex_filter_result *f;
- int key = (skb->tc_index & p->mask) >> p->shift;
-
- pr_debug("tcindex_classify(skb %p,tp %p,res %p),p %p\n",
- skb, tp, res, p);
-
- f = tcindex_lookup(p, key);
- if (!f) {
- struct Qdisc *q = tcf_block_q(tp->chain->block);
-
- if (!p->fall_through)
- return -1;
- res->classid = TC_H_MAKE(TC_H_MAJ(q->handle), key);
- res->class = 0;
- pr_debug("alg 0x%x\n", res->classid);
- return 0;
- }
- *res = f->res;
- pr_debug("map 0x%x\n", res->classid);
-
- return tcf_exts_exec(skb, &f->exts, res);
-}
-
-
-static void *tcindex_get(struct tcf_proto *tp, u32 handle)
-{
- struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcindex_filter_result *r;
-
- pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
- if (p->perfect && handle >= p->alloc_hash)
- return NULL;
- r = tcindex_lookup(p, handle);
- return r && tcindex_filter_is_set(r) ? r : NULL;
-}
-
-static int tcindex_init(struct tcf_proto *tp)
-{
- struct tcindex_data *p;
-
- pr_debug("tcindex_init(tp %p)\n", tp);
- p = kzalloc(sizeof(struct tcindex_data), GFP_KERNEL);
- if (!p)
- return -ENOMEM;
-
- p->mask = 0xffff;
- p->hash = DEFAULT_HASH_SIZE;
- p->fall_through = 1;
-
- rcu_assign_pointer(tp->root, p);
- return 0;
-}
-
-static void __tcindex_destroy_rexts(struct tcindex_filter_result *r)
-{
- tcf_exts_destroy(&r->exts);
- tcf_exts_put_net(&r->exts);
-}
-
-static void tcindex_destroy_rexts_work(struct work_struct *work)
-{
- struct tcindex_filter_result *r;
-
- r = container_of(to_rcu_work(work),
- struct tcindex_filter_result,
- rwork);
- rtnl_lock();
- __tcindex_destroy_rexts(r);
- rtnl_unlock();
-}
-
-static void __tcindex_destroy_fexts(struct tcindex_filter *f)
-{
- tcf_exts_destroy(&f->result.exts);
- tcf_exts_put_net(&f->result.exts);
- kfree(f);
-}
-
-static void tcindex_destroy_fexts_work(struct work_struct *work)
-{
- struct tcindex_filter *f = container_of(to_rcu_work(work),
- struct tcindex_filter,
- rwork);
-
- rtnl_lock();
- __tcindex_destroy_fexts(f);
- rtnl_unlock();
-}
-
-static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
-{
- struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcindex_filter_result *r = arg;
- struct tcindex_filter __rcu **walk;
- struct tcindex_filter *f = NULL;
-
- pr_debug("tcindex_delete(tp %p,arg %p),p %p\n", tp, arg, p);
- if (p->perfect) {
- if (!r->res.class)
- return -ENOENT;
- } else {
- int i;
-
- for (i = 0; i < p->hash; i++) {
- walk = p->h + i;
- for (f = rtnl_dereference(*walk); f;
- walk = &f->next, f = rtnl_dereference(*walk)) {
- if (&f->result == r)
- goto found;
- }
- }
- return -ENOENT;
-
-found:
- rcu_assign_pointer(*walk, rtnl_dereference(f->next));
- }
- tcf_unbind_filter(tp, &r->res);
- /* all classifiers are required to call tcf_exts_destroy() after rcu
- * grace period, since converted-to-rcu actions are relying on that
- * in cleanup() callback
- */
- if (f) {
- if (tcf_exts_get_net(&f->result.exts))
- tcf_queue_work(&f->rwork, tcindex_destroy_fexts_work);
- else
- __tcindex_destroy_fexts(f);
- } else {
- if (tcf_exts_get_net(&r->exts))
- tcf_queue_work(&r->rwork, tcindex_destroy_rexts_work);
- else
- __tcindex_destroy_rexts(r);
- }
-
- *last = false;
- return 0;
-}
-
-static void tcindex_destroy_work(struct work_struct *work)
-{
- struct tcindex_data *p = container_of(to_rcu_work(work),
- struct tcindex_data,
- rwork);
-
- kfree(p->perfect);
- kfree(p->h);
- kfree(p);
-}
-
-static inline int
-valid_perfect_hash(struct tcindex_data *p)
-{
- return p->hash > (p->mask >> p->shift);
-}
-
-static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
- [TCA_TCINDEX_HASH] = { .type = NLA_U32 },
- [TCA_TCINDEX_MASK] = { .type = NLA_U16 },
- [TCA_TCINDEX_SHIFT] = { .type = NLA_U32 },
- [TCA_TCINDEX_FALL_THROUGH] = { .type = NLA_U32 },
- [TCA_TCINDEX_CLASSID] = { .type = NLA_U32 },
-};
-
-static int tcindex_filter_result_init(struct tcindex_filter_result *r)
-{
- memset(r, 0, sizeof(*r));
- return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
-}
-
-static void tcindex_partial_destroy_work(struct work_struct *work)
-{
- struct tcindex_data *p = container_of(to_rcu_work(work),
- struct tcindex_data,
- rwork);
-
- kfree(p->perfect);
- kfree(p);
-}
-
-static void tcindex_free_perfect_hash(struct tcindex_data *cp)
-{
- int i;
-
- for (i = 0; i < cp->hash; i++)
- tcf_exts_destroy(&cp->perfect[i].exts);
- kfree(cp->perfect);
-}
-
-static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp)
-{
- int i, err = 0;
-
- cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result),
- GFP_KERNEL);
- if (!cp->perfect)
- return -ENOMEM;
-
- for (i = 0; i < cp->hash; i++) {
- err = tcf_exts_init(&cp->perfect[i].exts,
- TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
- if (err < 0)
- goto errout;
-#ifdef CONFIG_NET_CLS_ACT
- cp->perfect[i].exts.net = net;
-#endif
- }
-
- return 0;
-
-errout:
- tcindex_free_perfect_hash(cp);
- return err;
-}
-
-static int
-tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
- u32 handle, struct tcindex_data *p,
- struct tcindex_filter_result *r, struct nlattr **tb,
- struct nlattr *est, bool ovr, struct netlink_ext_ack *extack)
-{
- struct tcindex_filter_result new_filter_result, *old_r = r;
- struct tcindex_data *cp = NULL, *oldp;
- struct tcindex_filter *f = NULL; /* make gcc behave */
- struct tcf_result cr = {};
- int err, balloc = 0;
- struct tcf_exts e;
-
- err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
- if (err < 0)
- return err;
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr, extack);
- if (err < 0)
- goto errout;
-
- err = -ENOMEM;
- /* tcindex_data attributes must look atomic to classifier/lookup so
- * allocate new tcindex data and RCU assign it onto root. Keeping
- * perfect hash and hash pointers from old data.
- */
- cp = kzalloc(sizeof(*cp), GFP_KERNEL);
- if (!cp)
- goto errout;
-
- cp->mask = p->mask;
- cp->shift = p->shift;
- cp->hash = p->hash;
- cp->alloc_hash = p->alloc_hash;
- cp->fall_through = p->fall_through;
- cp->tp = tp;
-
- if (tb[TCA_TCINDEX_HASH])
- cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
-
- if (tb[TCA_TCINDEX_MASK])
- cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
-
- if (tb[TCA_TCINDEX_SHIFT])
- cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
-
- if (!cp->hash) {
- /* Hash not specified, use perfect hash if the upper limit
- * of the hashing index is below the threshold.
- */
- if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
- cp->hash = (cp->mask >> cp->shift) + 1;
- else
- cp->hash = DEFAULT_HASH_SIZE;
- }
-
- if (p->perfect) {
- int i;
-
- if (tcindex_alloc_perfect_hash(net, cp) < 0)
- goto errout;
- cp->alloc_hash = cp->hash;
- for (i = 0; i < min(cp->hash, p->hash); i++)
- cp->perfect[i].res = p->perfect[i].res;
- balloc = 1;
- }
- cp->h = p->h;
-
- err = tcindex_filter_result_init(&new_filter_result);
- if (err < 0)
- goto errout_alloc;
- if (old_r)
- cr = r->res;
-
- err = -EBUSY;
-
- /* Hash already allocated, make sure that we still meet the
- * requirements for the allocated hash.
- */
- if (cp->perfect) {
- if (!valid_perfect_hash(cp) ||
- cp->hash > cp->alloc_hash)
- goto errout_alloc;
- } else if (cp->h && cp->hash != cp->alloc_hash) {
- goto errout_alloc;
- }
-
- err = -EINVAL;
- if (tb[TCA_TCINDEX_FALL_THROUGH])
- cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
-
- if (!cp->perfect && !cp->h)
- cp->alloc_hash = cp->hash;
-
- /* Note: this could be as restrictive as if (handle & ~(mask >> shift))
- * but then, we'd fail handles that may become valid after some future
- * mask change. While this is extremely unlikely to ever matter,
- * the check below is safer (and also more backwards-compatible).
- */
- if (cp->perfect || valid_perfect_hash(cp))
- if (handle >= cp->alloc_hash)
- goto errout_alloc;
-
-
- err = -ENOMEM;
- if (!cp->perfect && !cp->h) {
- if (valid_perfect_hash(cp)) {
- if (tcindex_alloc_perfect_hash(net, cp) < 0)
- goto errout_alloc;
- balloc = 1;
- } else {
- struct tcindex_filter __rcu **hash;
-
- hash = kcalloc(cp->hash,
- sizeof(struct tcindex_filter *),
- GFP_KERNEL);
-
- if (!hash)
- goto errout_alloc;
-
- cp->h = hash;
- balloc = 2;
- }
- }
-
- if (cp->perfect)
- r = cp->perfect + handle;
- else
- r = tcindex_lookup(cp, handle) ? : &new_filter_result;
-
- if (r == &new_filter_result) {
- f = kzalloc(sizeof(*f), GFP_KERNEL);
- if (!f)
- goto errout_alloc;
- f->key = handle;
- f->next = NULL;
- err = tcindex_filter_result_init(&f->result);
- if (err < 0) {
- kfree(f);
- goto errout_alloc;
- }
- }
-
- if (tb[TCA_TCINDEX_CLASSID]) {
- cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
- tcf_bind_filter(tp, &cr, base);
- }
-
- if (old_r && old_r != r) {
- err = tcindex_filter_result_init(old_r);
- if (err < 0) {
- kfree(f);
- goto errout_alloc;
- }
- }
-
- oldp = p;
- r->res = cr;
- tcf_exts_change(&r->exts, &e);
-
- rcu_assign_pointer(tp->root, cp);
-
- if (r == &new_filter_result) {
- struct tcindex_filter *nfp;
- struct tcindex_filter __rcu **fp;
-
- f->result.res = r->res;
- tcf_exts_change(&f->result.exts, &r->exts);
-
- fp = cp->h + (handle % cp->hash);
- for (nfp = rtnl_dereference(*fp);
- nfp;
- fp = &nfp->next, nfp = rtnl_dereference(*fp))
- ; /* nothing */
-
- rcu_assign_pointer(*fp, f);
- } else {
- tcf_exts_destroy(&new_filter_result.exts);
- }
-
- if (oldp)
- tcf_queue_work(&oldp->rwork, tcindex_partial_destroy_work);
- return 0;
-
-errout_alloc:
- if (balloc == 1)
- tcindex_free_perfect_hash(cp);
- else if (balloc == 2)
- kfree(cp->h);
- tcf_exts_destroy(&new_filter_result.exts);
-errout:
- kfree(cp);
- tcf_exts_destroy(&e);
- return err;
-}
-
-static int
-tcindex_change(struct net *net, struct sk_buff *in_skb,
- struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr,
- struct netlink_ext_ack *extack)
-{
- struct nlattr *opt = tca[TCA_OPTIONS];
- struct nlattr *tb[TCA_TCINDEX_MAX + 1];
- struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcindex_filter_result *r = *arg;
- int err;
-
- pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
- "p %p,r %p,*arg %p\n",
- tp, handle, tca, arg, opt, p, r, arg ? *arg : NULL);
-
- if (!opt)
- return 0;
-
- err = nla_parse_nested(tb, TCA_TCINDEX_MAX, opt, tcindex_policy, NULL);
- if (err < 0)
- return err;
-
- return tcindex_set_parms(net, tp, base, handle, p, r, tb,
- tca[TCA_RATE], ovr, extack);
-}
-
-static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
-{
- struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcindex_filter *f, *next;
- int i;
-
- pr_debug("tcindex_walk(tp %p,walker %p),p %p\n", tp, walker, p);
- if (p->perfect) {
- for (i = 0; i < p->hash; i++) {
- if (!p->perfect[i].res.class)
- continue;
- if (walker->count >= walker->skip) {
- if (walker->fn(tp, p->perfect + i, walker) < 0) {
- walker->stop = 1;
- return;
- }
- }
- walker->count++;
- }
- }
- if (!p->h)
- return;
- for (i = 0; i < p->hash; i++) {
- for (f = rtnl_dereference(p->h[i]); f; f = next) {
- next = rtnl_dereference(f->next);
- if (walker->count >= walker->skip) {
- if (walker->fn(tp, &f->result, walker) < 0) {
- walker->stop = 1;
- return;
- }
- }
- walker->count++;
- }
- }
-}
-
-static void tcindex_destroy(struct tcf_proto *tp,
- struct netlink_ext_ack *extack)
-{
- struct tcindex_data *p = rtnl_dereference(tp->root);
- int i;
-
- pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
-
- if (p->perfect) {
- for (i = 0; i < p->hash; i++) {
- struct tcindex_filter_result *r = p->perfect + i;
-
- tcf_unbind_filter(tp, &r->res);
- if (tcf_exts_get_net(&r->exts))
- tcf_queue_work(&r->rwork,
- tcindex_destroy_rexts_work);
- else
- __tcindex_destroy_rexts(r);
- }
- }
-
- for (i = 0; p->h && i < p->hash; i++) {
- struct tcindex_filter *f, *next;
- bool last;
-
- for (f = rtnl_dereference(p->h[i]); f; f = next) {
- next = rtnl_dereference(f->next);
- tcindex_delete(tp, &f->result, &last, NULL);
- }
- }
-
- tcf_queue_work(&p->rwork, tcindex_destroy_work);
-}
-
-
-static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
-{
- struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcindex_filter_result *r = fh;
- struct nlattr *nest;
-
- pr_debug("tcindex_dump(tp %p,fh %p,skb %p,t %p),p %p,r %p\n",
- tp, fh, skb, t, p, r);
- pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
-
- nest = nla_nest_start(skb, TCA_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
-
- if (!fh) {
- t->tcm_handle = ~0; /* whatever ... */
- if (nla_put_u32(skb, TCA_TCINDEX_HASH, p->hash) ||
- nla_put_u16(skb, TCA_TCINDEX_MASK, p->mask) ||
- nla_put_u32(skb, TCA_TCINDEX_SHIFT, p->shift) ||
- nla_put_u32(skb, TCA_TCINDEX_FALL_THROUGH, p->fall_through))
- goto nla_put_failure;
- nla_nest_end(skb, nest);
- } else {
- if (p->perfect) {
- t->tcm_handle = r - p->perfect;
- } else {
- struct tcindex_filter *f;
- struct tcindex_filter __rcu **fp;
- int i;
-
- t->tcm_handle = 0;
- for (i = 0; !t->tcm_handle && i < p->hash; i++) {
- fp = &p->h[i];
- for (f = rtnl_dereference(*fp);
- !t->tcm_handle && f;
- fp = &f->next, f = rtnl_dereference(*fp)) {
- if (&f->result == r)
- t->tcm_handle = f->key;
- }
- }
- }
- pr_debug("handle = %d\n", t->tcm_handle);
- if (r->res.class &&
- nla_put_u32(skb, TCA_TCINDEX_CLASSID, r->res.classid))
- goto nla_put_failure;
-
- if (tcf_exts_dump(skb, &r->exts) < 0)
- goto nla_put_failure;
- nla_nest_end(skb, nest);
-
- if (tcf_exts_dump_stats(skb, &r->exts) < 0)
- goto nla_put_failure;
- }
-
- return skb->len;
-
-nla_put_failure:
- nla_nest_cancel(skb, nest);
- return -1;
-}
-
-static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl,
- void *q, unsigned long base)
-{
- struct tcindex_filter_result *r = fh;
-
- if (r && r->res.classid == classid) {
- if (cl)
- __tcf_bind_filter(q, &r->res, base);
- else
- __tcf_unbind_filter(q, &r->res);
- }
-}
-
-static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
- .kind = "tcindex",
- .classify = tcindex_classify,
- .init = tcindex_init,
- .destroy = tcindex_destroy,
- .get = tcindex_get,
- .change = tcindex_change,
- .delete = tcindex_delete,
- .walk = tcindex_walk,
- .dump = tcindex_dump,
- .bind_class = tcindex_bind_class,
- .owner = THIS_MODULE,
-};
-
-static int __init init_tcindex(void)
-{
- return register_tcf_proto_ops(&cls_tcindex_ops);
-}
-
-static void __exit exit_tcindex(void)
-{
- unregister_tcf_proto_ops(&cls_tcindex_ops);
-}
-
-module_init(init_tcindex)
-module_exit(exit_tcindex)
-MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index fe246e03fcd9..1e71ff093c91 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -404,15 +404,20 @@ static int u32_init(struct tcf_proto *tp)
return 0;
}
-static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
- bool free_pf)
+static void __u32_destroy_key(struct tc_u_knode *n)
{
struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
tcf_exts_destroy(&n->exts);
- tcf_exts_put_net(&n->exts);
if (ht && --ht->refcnt == 0)
kfree(ht);
+ kfree(n);
+}
+
+static void u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
+ bool free_pf)
+{
+ tcf_exts_put_net(&n->exts);
#ifdef CONFIG_CLS_U32_PERF
if (free_pf)
free_percpu(n->pf);
@@ -421,8 +426,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
if (free_pf)
free_percpu(n->pcpu_success);
#endif
- kfree(n);
- return 0;
+ __u32_destroy_key(n);
}
/* u32_delete_key_rcu should be called when free'ing a copied
@@ -774,11 +778,22 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
int err;
+#ifdef CONFIG_NET_CLS_IND
+ int ifindex = -1;
+#endif
err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack);
if (err < 0)
return err;
+#ifdef CONFIG_NET_CLS_IND
+ if (tb[TCA_U32_INDEV]) {
+ ifindex = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
+ if (ifindex < 0)
+ return -EINVAL;
+ }
+#endif
+
if (tb[TCA_U32_LINK]) {
u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
struct tc_u_hnode *ht_down = NULL, *ht_old;
@@ -810,13 +825,8 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
}
#ifdef CONFIG_NET_CLS_IND
- if (tb[TCA_U32_INDEV]) {
- int ret;
- ret = tcf_change_indev(net, tb[TCA_U32_INDEV], extack);
- if (ret < 0)
- return -EINVAL;
- n->ifindex = ret;
- }
+ if (ifindex >= 0)
+ n->ifindex = ifindex;
#endif
return 0;
}
@@ -869,14 +879,9 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
new->ifindex = n->ifindex;
#endif
new->fshift = n->fshift;
- new->res = n->res;
new->flags = n->flags;
RCU_INIT_POINTER(new->ht_down, ht);
- /* bump reference count as long as we hold pointer to structure */
- if (ht)
- ht->refcnt++;
-
#ifdef CONFIG_CLS_U32_PERF
/* Statistics may be incremented by readers during update
* so we must keep them in tact. When the node is later destroyed
@@ -899,6 +904,10 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
return NULL;
}
+ /* bump reference count as long as we hold pointer to structure */
+ if (ht)
+ ht->refcnt++;
+
return new;
}
@@ -965,13 +974,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
tca[TCA_RATE], ovr, extack);
if (err) {
- u32_destroy_key(tp, new, false);
+ __u32_destroy_key(new);
return err;
}
err = u32_replace_hw_knode(tp, new, flags, extack);
if (err) {
- u32_destroy_key(tp, new, false);
+ __u32_destroy_key(new);
return err;
}
@@ -1057,18 +1066,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
+ /* At this point, we need to derive the new handle that will be used to
+ * uniquely map the identity of this table match entry. The
+ * identity of the entry that we need to construct is 32 bits made of:
+ * htid(12b):bucketid(8b):node/entryid(12b)
+ *
+ * At this point _we have the table(ht)_ in which we will insert this
+ * entry. We carry the table's id in variable "htid".
+ * Note that earlier code picked the ht selection either by a) the user
+ * providing the htid specified via TCA_U32_HASH attribute or b) when
+ * no such attribute is passed then the root ht, is default to at ID
+ * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0.
+ * If OTOH the user passed us the htid, they may also pass a bucketid of
+ * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is
+ * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be
+ * passed via the htid, so even if it was non-zero it will be ignored.
+ *
+ * We may also have a handle, if the user passed one. The handle also
+ * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b).
+ * Rule: the bucketid on the handle is ignored even if one was passed;
+ * rather the value on "htid" is always assumed to be the bucketid.
+ */
if (handle) {
+ /* Rule: The htid from handle and tableid from htid must match */
if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");
return -EINVAL;
}
- handle = htid | TC_U32_NODE(handle);
- err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle,
- GFP_KERNEL);
- if (err)
- return err;
- } else
+ /* Ok, so far we have a valid htid(12b):bucketid(8b) but we
+ * need to finalize the table entry identification with the last
+ * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for
+ * entries. Rule: nodeid of 0 is reserved only for tables(see
+ * earlier code which processes TC_U32_DIVISOR attribute).
+ * Rule: The nodeid can only be derived from the handle (and not
+ * htid).
+ * Rule: if the handle specified zero for the node id example
+ * 0x60000000, then pick a new nodeid from the pool of IDs
+ * this hash table has been allocating from.
+ * If OTOH it is specified (i.e for example the user passed a
+ * handle such as 0x60000123), then we use it generate our final
+ * handle which is used to uniquely identify the match entry.
+ */
+ if (!TC_U32_NODE(handle)) {
+ handle = gen_new_kid(ht, htid);
+ } else {
+ handle = htid | TC_U32_NODE(handle);
+ err = idr_alloc_u32(&ht->handle_idr, NULL, &handle,
+ handle, GFP_KERNEL);
+ if (err)
+ return err;
+ }
+ } else {
+ /* The user did not give us a handle; lets just generate one
+ * from the table's pool of nodeids.
+ */
handle = gen_new_kid(ht, htid);
+ }
if (tb[TCA_U32_SEL] == NULL) {
NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index 73e2ed576ceb..cbf44783024f 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -101,8 +101,10 @@ retry:
static void em_text_destroy(struct tcf_ematch *m)
{
- if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config)
+ if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config) {
textsearch_destroy(EM_TEXT_PRIV(m)->config);
+ kfree(EM_TEXT_PRIV(m));
+ }
}
static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m)
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 113a133ee544..5ba3548d2eb7 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -259,6 +259,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
* the value carried.
*/
if (em_hdr->flags & TCF_EM_SIMPLE) {
+ if (em->ops->datalen > 0)
+ goto errout;
if (data_len < sizeof(u32))
goto errout;
em->data = *(u32 *) data;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 39e319d04bb8..ab57c0ee9923 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -315,6 +315,24 @@ out:
return q;
}
+struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
+{
+ struct netdev_queue *nq;
+ struct Qdisc *q;
+
+ if (!handle)
+ return NULL;
+ q = qdisc_match_from_root(dev->qdisc, handle);
+ if (q)
+ goto out;
+
+ nq = dev_ingress_queue_rcu(dev);
+ if (nq)
+ q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
+out:
+ return q;
+}
+
static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
{
unsigned long cl;
@@ -398,7 +416,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
{
struct qdisc_rate_table *rtab;
- if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
+ if (tab == NULL || r->rate == 0 ||
+ r->cell_log == 0 || r->cell_log >= 32 ||
nla_len(tab) != TC_RTAB_SIZE) {
NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
return NULL;
@@ -497,6 +516,12 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
return stab;
}
+ if (s->size_log > STAB_SIZE_LOG_MAX ||
+ s->cell_log > STAB_SIZE_LOG_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
+ return ERR_PTR(-EINVAL);
+ }
+
stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
if (!stab)
return ERR_PTR(-ENOMEM);
@@ -921,7 +946,7 @@ static void notify_and_destroy(struct net *net, struct sk_buff *skb,
qdisc_notify(net, skb, n, clid, old, new);
if (old)
- qdisc_destroy(old);
+ qdisc_put(old);
}
/* Graft qdisc "new" to class "classid" of qdisc "parent" or
@@ -974,17 +999,18 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
qdisc_refcount_inc(new);
if (!ingress)
- qdisc_destroy(old);
+ qdisc_put(old);
}
skip:
if (!ingress) {
- notify_and_destroy(net, skb, n, classid,
- dev->qdisc, new);
+ old = dev->qdisc;
if (new && !new->ops->attach)
qdisc_refcount_inc(new);
dev->qdisc = new ? : &noop_qdisc;
+ notify_and_destroy(net, skb, n, classid, old, new);
+
if (new && new->ops->attach)
new->ops->attach(new);
} else {
@@ -1006,8 +1032,12 @@ skip:
unsigned long cl = cops->find(parent, classid);
if (cl) {
- err = cops->graft(parent, cl, new, &old,
- extack);
+ if (new && new->ops == &noqueue_qdisc_ops) {
+ NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class");
+ err = -EINVAL;
+ } else {
+ err = cops->graft(parent, cl, new, &old, extack);
+ }
} else {
NL_SET_ERR_MSG(extack, "Specified class not found");
err = -ENOENT;
@@ -1106,7 +1136,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
err = -ENOENT;
if (!ops) {
- NL_SET_ERR_MSG(extack, "Specified qdisc not found");
+ NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
goto err_out;
}
@@ -1119,7 +1149,12 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
sch->parent = parent;
if (handle == TC_H_INGRESS) {
- sch->flags |= TCQ_F_INGRESS;
+ if (!(sch->flags & TCQ_F_INGRESS)) {
+ NL_SET_ERR_MSG(extack,
+ "Specified parent ID is reserved for ingress and clsact Qdiscs");
+ err = -EINVAL;
+ goto err_out3;
+ }
handle = TC_H_MAKE(TC_H_INGRESS, 0);
lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
} else {
@@ -1404,10 +1439,28 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
return 0;
}
+static bool req_create_or_replace(struct nlmsghdr *n)
+{
+ return (n->nlmsg_flags & NLM_F_CREATE &&
+ n->nlmsg_flags & NLM_F_REPLACE);
+}
+
+static bool req_create_exclusive(struct nlmsghdr *n)
+{
+ return (n->nlmsg_flags & NLM_F_CREATE &&
+ n->nlmsg_flags & NLM_F_EXCL);
+}
+
+static bool req_change(struct nlmsghdr *n)
+{
+ return (!(n->nlmsg_flags & NLM_F_CREATE) &&
+ !(n->nlmsg_flags & NLM_F_REPLACE) &&
+ !(n->nlmsg_flags & NLM_F_EXCL));
+}
+
/*
* Create/change qdisc.
*/
-
static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
struct netlink_ext_ack *extack)
{
@@ -1480,11 +1533,20 @@ replay:
NL_SET_ERR_MSG(extack, "Invalid qdisc name");
return -EINVAL;
}
+ if (q->flags & TCQ_F_INGRESS) {
+ NL_SET_ERR_MSG(extack,
+ "Cannot regraft ingress or clsact Qdiscs");
+ return -EINVAL;
+ }
if (q == p ||
(p && check_loop(q, p, 0))) {
NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
return -ELOOP;
}
+ if (clid == TC_H_INGRESS) {
+ NL_SET_ERR_MSG(extack, "Ingress cannot graft directly");
+ return -EINVAL;
+ }
qdisc_refcount_inc(q);
goto graft;
} else {
@@ -1495,27 +1557,35 @@ replay:
*
* We know, that some child q is already
* attached to this parent and have choice:
- * either to change it or to create/graft new one.
+ * 1) change it or 2) create/graft new one.
+ * If the requested qdisc kind is different
+ * than the existing one, then we choose graft.
+ * If they are the same then this is "change"
+ * operation - just let it fallthrough..
*
* 1. We are allowed to create/graft only
- * if CREATE and REPLACE flags are set.
+ * if the request is explicitly stating
+ * "please create if it doesn't exist".
*
- * 2. If EXCL is set, requestor wanted to say,
- * that qdisc tcm_handle is not expected
+ * 2. If the request is to exclusive create
+ * then the qdisc tcm_handle is not expected
* to exist, so that we choose create/graft too.
*
* 3. The last case is when no flags are set.
+ * This will happen when for example tc
+ * utility issues a "change" command.
* Alas, it is sort of hole in API, we
* cannot decide what to do unambiguously.
- * For now we select create/graft, if
- * user gave KIND, which does not match existing.
+ * For now we select create/graft.
*/
- if ((n->nlmsg_flags & NLM_F_CREATE) &&
- (n->nlmsg_flags & NLM_F_REPLACE) &&
- ((n->nlmsg_flags & NLM_F_EXCL) ||
- (tca[TCA_KIND] &&
- nla_strcmp(tca[TCA_KIND], q->ops->id))))
- goto create_n_graft;
+ if (tca[TCA_KIND] &&
+ nla_strcmp(tca[TCA_KIND], q->ops->id)) {
+ if (req_create_or_replace(n) ||
+ req_create_exclusive(n))
+ goto create_n_graft;
+ else if (req_change(n))
+ goto create_n_graft2;
+ }
}
}
} else {
@@ -1549,6 +1619,7 @@ create_n_graft:
NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
return -ENOENT;
}
+create_n_graft2:
if (clid == TC_H_INGRESS) {
if (dev_ingress_queue(dev)) {
q = qdisc_create(dev, dev_ingress_queue(dev), p,
@@ -1582,7 +1653,7 @@ graft:
err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
if (err) {
if (q)
- qdisc_destroy(q);
+ qdisc_put(q);
return err;
}
@@ -2047,7 +2118,7 @@ static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
struct tcmsg *tcm, struct netlink_callback *cb,
- int *t_p, int s_t)
+ int *t_p, int s_t, bool recur)
{
struct Qdisc *q;
int b;
@@ -2058,7 +2129,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
return -1;
- if (!qdisc_dev(root))
+ if (!qdisc_dev(root) || !recur)
return 0;
if (tcm->tcm_parent) {
@@ -2093,13 +2164,13 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0];
t = 0;
- if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
+ if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0)
goto done;
dev_queue = dev_ingress_queue(dev);
if (dev_queue &&
tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
- &t, s_t) < 0)
+ &t, s_t, false) < 0)
goto done;
done:
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 91bd5c839303..ff825f40ea04 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -150,7 +150,7 @@ static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
pr_debug("atm_tc_put: destroying\n");
list_del_init(&flow->list);
pr_debug("atm_tc_put: qdisc %p\n", flow->q);
- qdisc_destroy(flow->q);
+ qdisc_put(flow->q);
tcf_block_put(flow->block);
if (flow->sock) {
pr_debug("atm_tc_put: f_count %ld\n",
@@ -394,10 +394,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
result = tcf_classify(skb, fl, &res, true);
if (result < 0)
continue;
+ if (result == TC_ACT_SHOT)
+ goto done;
+
flow = (struct atm_flow_data *)res.class;
if (!flow)
flow = lookup_flow(sch, res.classid);
- goto done;
+ goto drop;
}
}
flow = NULL;
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 32712e7dcbdc..d91665ea7b14 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -900,7 +900,7 @@ static struct tcphdr *cake_get_tcphdr(const struct sk_buff *skb,
}
tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
- if (!tcph)
+ if (!tcph || tcph->doff < 5)
return NULL;
return skb_header_pointer(skb, offset,
@@ -924,6 +924,8 @@ static const void *cake_get_tcpopt(const struct tcphdr *tcph,
length--;
continue;
}
+ if (length < 2)
+ break;
opsize = *ptr++;
if (opsize < 2 || opsize > length)
break;
@@ -1061,6 +1063,8 @@ static bool cake_tcph_may_drop(const struct tcphdr *tcph,
length--;
continue;
}
+ if (length < 2)
+ break;
opsize = *ptr++;
if (opsize < 2 || opsize > length)
break;
@@ -1645,7 +1649,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
{
struct cake_sched_data *q = qdisc_priv(sch);
int len = qdisc_pkt_len(skb);
- int uninitialized_var(ret);
+ int ret;
struct sk_buff *ack = NULL;
ktime_t now = ktime_get();
struct cake_tin_data *b;
@@ -2145,8 +2149,12 @@ retry:
static void cake_reset(struct Qdisc *sch)
{
+ struct cake_sched_data *q = qdisc_priv(sch);
u32 c;
+ if (!q->tins)
+ return;
+
for (c = 0; c < CAKE_MAX_TINS; c++)
cake_clear_tin(sch, c);
}
@@ -2671,7 +2679,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data),
GFP_KERNEL);
if (!q->tins)
- goto nomem;
+ return -ENOMEM;
for (i = 0; i < CAKE_MAX_TINS; i++) {
struct cake_tin_data *b = q->tins + i;
@@ -2701,10 +2709,6 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt,
q->min_netlen = ~0;
q->min_adjlen = ~0;
return 0;
-
-nomem:
- cake_destroy(sch);
- return -ENOMEM;
}
static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index ebc3c8c7e666..7f0a5d22deaf 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -236,6 +236,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
result = tcf_classify(skb, fl, &res, true);
if (!fl || result < 0)
goto fallback;
+ if (result == TC_ACT_SHOT)
+ return NULL;
cl = (void *)res.class;
if (!cl) {
@@ -256,8 +258,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
case TC_ACT_TRAP:
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
/* fall through */
- case TC_ACT_SHOT:
- return NULL;
case TC_ACT_RECLASSIFY:
return cbq_reclassify(skb, cl);
}
@@ -365,7 +365,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct cbq_sched_data *q = qdisc_priv(sch);
- int uninitialized_var(ret);
+ int ret;
struct cbq_class *cl = cbq_classify(skb, sch, &ret);
#ifdef CONFIG_NET_CLS_ACT
@@ -1439,7 +1439,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
WARN_ON(cl->filters);
tcf_block_put(cl->block);
- qdisc_destroy(cl->q);
+ qdisc_put(cl->q);
qdisc_put_rtab(cl->R_tab);
gen_kill_estimator(&cl->rate_est);
if (cl != &q->link)
@@ -1616,7 +1616,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
if (err) {
kfree(cl);
- return err;
+ goto failure;
}
if (tca[TCA_RATE]) {
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index d5e22452d597..f95dc899e989 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -452,7 +452,7 @@ static void cbs_destroy(struct Qdisc *sch)
cbs_disable_offload(dev, q);
if (q->qdisc)
- qdisc_destroy(q->qdisc);
+ qdisc_put(q->qdisc);
}
static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 63bfceeb8e3c..8b72aed77ed4 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -355,6 +355,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt,
struct sk_buff **old = NULL;
unsigned int mask;
u32 max_P;
+ u8 *stab;
if (opt == NULL)
return -EINVAL;
@@ -370,8 +371,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt,
max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
ctl = nla_data(tb[TCA_CHOKE_PARMS]);
-
- if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+ stab = nla_data(tb[TCA_CHOKE_STAB]);
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab))
return -EINVAL;
if (ctl->limit > CHOKE_MAX_QUEUE)
@@ -421,7 +422,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt,
red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
ctl->Plog, ctl->Scell_log,
- nla_data(tb[TCA_CHOKE_STAB]),
+ stab,
max_P);
red_set_vars(&q->vars);
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index e0b0cf8a9939..cdebaed0f8cf 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -134,7 +134,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
tca[TCA_RATE]);
if (err) {
NL_SET_ERR_MSG(extack, "Failed to replace estimator");
- qdisc_destroy(cl->qdisc);
+ qdisc_put(cl->qdisc);
kfree(cl);
return err;
}
@@ -153,7 +153,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl)
{
gen_kill_estimator(&cl->rate_est);
- qdisc_destroy(cl->qdisc);
+ qdisc_put(cl->qdisc);
kfree(cl);
}
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 1c2fce8c45b2..47a61689dadb 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -402,7 +402,8 @@ static void dsmark_reset(struct Qdisc *sch)
struct dsmark_qdisc_data *p = qdisc_priv(sch);
pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
- qdisc_reset(p->q);
+ if (p->q)
+ qdisc_reset(p->q);
sch->qstats.backlog = 0;
sch->q.qlen = 0;
}
@@ -414,7 +415,7 @@ static void dsmark_destroy(struct Qdisc *sch)
pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
tcf_block_put(p->block);
- qdisc_destroy(p->q);
+ qdisc_put(p->q);
if (p->mv != p->embedded)
kfree(p->mv);
}
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 24893d3b5d22..3697cd799767 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -152,6 +152,9 @@ int fifo_set_limit(struct Qdisc *q, unsigned int limit)
if (strncmp(q->ops->id + 1, "fifo", 4) != 0)
return 0;
+ if (!q->ops->change)
+ return 0;
+
nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL);
if (nla) {
nla->nla_type = RTM_NEWQDISC;
@@ -177,7 +180,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
if (q) {
err = fifo_set_limit(q, limit);
if (err < 0) {
- qdisc_destroy(q);
+ qdisc_put(q);
q = NULL;
}
}
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index a862d9990be7..7a4777ee0536 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -192,7 +192,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct fq_codel_sched_data *q = qdisc_priv(sch);
unsigned int idx, prev_backlog, prev_qlen;
struct fq_codel_flow *flow;
- int uninitialized_var(ret);
+ int ret;
unsigned int pkt_len;
bool memory_limited;
@@ -382,6 +382,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_FQ_CODEL_MAX + 1];
+ u32 quantum = 0;
int err;
if (!opt)
@@ -399,6 +400,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
q->flows_cnt > 65536)
return -EINVAL;
}
+ if (tb[TCA_FQ_CODEL_QUANTUM]) {
+ quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
+ if (quantum > FQ_CODEL_QUANTUM_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid quantum");
+ return -EINVAL;
+ }
+ }
sch_tree_lock(sch);
if (tb[TCA_FQ_CODEL_TARGET]) {
@@ -425,8 +433,8 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_CODEL_ECN])
q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]);
- if (tb[TCA_FQ_CODEL_QUANTUM])
- q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
+ if (quantum)
+ q->quantum = quantum;
if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])
q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index bd96fd261dba..c966dacf1130 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -397,7 +397,7 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets)
void __qdisc_run(struct Qdisc *q)
{
- int quota = dev_tx_weight;
+ int quota = READ_ONCE(dev_tx_weight);
int packets;
while (qdisc_restart(q, &packets)) {
@@ -918,7 +918,7 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
if (!ops->init || ops->init(sch, NULL, extack) == 0)
return sch;
- qdisc_destroy(sch);
+ qdisc_put(sch);
return NULL;
}
EXPORT_SYMBOL(qdisc_create_dflt);
@@ -958,19 +958,20 @@ void qdisc_free(struct Qdisc *qdisc)
kfree((char *) qdisc - qdisc->padded);
}
-void qdisc_destroy(struct Qdisc *qdisc)
+static void qdisc_free_cb(struct rcu_head *head)
+{
+ struct Qdisc *q = container_of(head, struct Qdisc, rcu);
+
+ qdisc_free(q);
+}
+
+static void qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops;
struct sk_buff *skb, *tmp;
- if (!qdisc)
- return;
ops = qdisc->ops;
- if (qdisc->flags & TCQ_F_BUILTIN ||
- !refcount_dec_and_test(&qdisc->refcnt))
- return;
-
#ifdef CONFIG_NET_SCHED
qdisc_hash_del(qdisc);
@@ -995,9 +996,37 @@ void qdisc_destroy(struct Qdisc *qdisc)
kfree_skb_list(skb);
}
- qdisc_free(qdisc);
+ call_rcu(&qdisc->rcu, qdisc_free_cb);
}
-EXPORT_SYMBOL(qdisc_destroy);
+
+void qdisc_put(struct Qdisc *qdisc)
+{
+ if (!qdisc)
+ return;
+
+ if (qdisc->flags & TCQ_F_BUILTIN ||
+ !refcount_dec_and_test(&qdisc->refcnt))
+ return;
+
+ qdisc_destroy(qdisc);
+}
+EXPORT_SYMBOL(qdisc_put);
+
+/* Version of qdisc_put() that is called with rtnl mutex unlocked.
+ * Intended to be used as optimization, this function only takes rtnl lock if
+ * qdisc reference counter reached zero.
+ */
+
+void qdisc_put_unlocked(struct Qdisc *qdisc)
+{
+ if (qdisc->flags & TCQ_F_BUILTIN ||
+ !refcount_dec_and_rtnl_lock(&qdisc->refcnt))
+ return;
+
+ qdisc_destroy(qdisc);
+ rtnl_unlock();
+}
+EXPORT_SYMBOL(qdisc_put_unlocked);
/* Attach toplevel qdisc to device queue. */
struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
@@ -1116,10 +1145,13 @@ static void dev_deactivate_queue(struct net_device *dev,
void *_qdisc_default)
{
struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc);
+ struct Qdisc *qdisc_default = _qdisc_default;
if (qdisc) {
if (!(qdisc->flags & TCQ_F_BUILTIN))
set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
+
+ rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
}
}
@@ -1253,6 +1285,15 @@ static int qdisc_change_tx_queue_len(struct net_device *dev,
return 0;
}
+void dev_qdisc_change_real_num_tx(struct net_device *dev,
+ unsigned int new_real_tx)
+{
+ struct Qdisc *qdisc = dev->qdisc;
+
+ if (qdisc->ops->change_real_num_tx)
+ qdisc->ops->change_real_num_tx(qdisc, new_real_tx);
+}
+
int dev_qdisc_change_tx_queue_len(struct net_device *dev)
{
bool up = dev->flags & IFF_UP;
@@ -1306,7 +1347,7 @@ static void shutdown_scheduler_queue(struct net_device *dev,
rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
dev_queue->qdisc_sleeping = qdisc_default;
- qdisc_destroy(qdisc);
+ qdisc_put(qdisc);
}
}
@@ -1315,7 +1356,7 @@ void dev_shutdown(struct net_device *dev)
netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
if (dev_ingress_queue(dev))
shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
- qdisc_destroy(dev->qdisc);
+ qdisc_put(dev->qdisc);
dev->qdisc = &noop_qdisc;
WARN_ON(timer_pending(&dev->watchdog_timer));
@@ -1327,6 +1368,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
{
memset(r, 0, sizeof(*r));
r->overhead = conf->overhead;
+ r->mpu = conf->mpu;
r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
r->mult = 1;
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 4a042abf844c..db7a91a29b59 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -357,7 +357,7 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
struct gred_sched *table = qdisc_priv(sch);
struct gred_sched_data *q = table->tab[dp];
- if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab))
return -EINVAL;
if (!q) {
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 3278a76f6861..e71443623d67 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -913,6 +913,14 @@ hfsc_change_usc(struct hfsc_class *cl, struct tc_service_curve *usc,
cl->cl_flags |= HFSC_USC;
}
+static void
+hfsc_upgrade_rt(struct hfsc_class *cl)
+{
+ cl->cl_fsc = cl->cl_rsc;
+ rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total);
+ cl->cl_flags |= HFSC_FSC;
+}
+
static const struct nla_policy hfsc_policy[TCA_HFSC_MAX + 1] = {
[TCA_HFSC_RSC] = { .len = sizeof(struct tc_service_curve) },
[TCA_HFSC_FSC] = { .len = sizeof(struct tc_service_curve) },
@@ -1073,6 +1081,12 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
cl->cf_tree = RB_ROOT;
sch_tree_lock(sch);
+ /* Check if the inner class is a misconfigured 'rt' */
+ if (!(parent->cl_flags & HFSC_FSC) && parent != &q->root) {
+ NL_SET_ERR_MSG(extack,
+ "Forced curve change on parent 'rt' to 'sc'");
+ hfsc_upgrade_rt(parent);
+ }
qdisc_class_hash_insert(&q->clhash, &cl->cl_common);
list_add_tail(&cl->siblings, &parent->children);
if (parent->level == 0)
@@ -1092,7 +1106,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl)
struct hfsc_sched *q = qdisc_priv(sch);
tcf_block_put(cl->block);
- qdisc_destroy(cl->qdisc);
+ qdisc_put(cl->qdisc);
gen_kill_estimator(&cl->rate_est);
if (cl != &q->root)
kfree(cl);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 43c4bfe625a9..862a33b9e2e0 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1224,7 +1224,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
{
if (!cl->level) {
WARN_ON(!cl->un.leaf.q);
- qdisc_destroy(cl->un.leaf.q);
+ qdisc_put(cl->un.leaf.q);
}
gen_kill_estimator(&cl->rate_est);
tcf_block_put(cl->block);
@@ -1425,7 +1425,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
/* turn parent into inner node */
qdisc_reset(parent->un.leaf.q);
qdisc_tree_reduce_backlog(parent->un.leaf.q, qlen, backlog);
- qdisc_destroy(parent->un.leaf.q);
+ qdisc_put(parent->un.leaf.q);
if (parent->prio_activity)
htb_deactivate(q, parent);
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index ce3f55259d0d..834960cc755e 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -83,6 +83,9 @@ static int ingress_init(struct Qdisc *sch, struct nlattr *opt,
struct ingress_sched_data *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ if (sch->parent != TC_H_INGRESS)
+ return -EOPNOTSUPP;
+
net_inc_ingress_queue();
mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress);
@@ -98,6 +101,9 @@ static void ingress_destroy(struct Qdisc *sch)
{
struct ingress_sched_data *q = qdisc_priv(sch);
+ if (sch->parent != TC_H_INGRESS)
+ return;
+
tcf_block_put_ext(q->block, sch, &q->block_info);
net_dec_ingress_queue();
}
@@ -130,7 +136,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
.cl_ops = &ingress_class_ops,
.id = "ingress",
.priv_size = sizeof(struct ingress_sched_data),
- .static_flags = TCQ_F_CPUSTATS,
+ .static_flags = TCQ_F_INGRESS | TCQ_F_CPUSTATS,
.init = ingress_init,
.destroy = ingress_destroy,
.dump = ingress_dump,
@@ -215,6 +221,9 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt,
struct net_device *dev = qdisc_dev(sch);
int err;
+ if (sch->parent != TC_H_CLSACT)
+ return -EOPNOTSUPP;
+
net_inc_ingress_queue();
net_inc_egress_queue();
@@ -242,6 +251,9 @@ static void clsact_destroy(struct Qdisc *sch)
{
struct clsact_sched_data *q = qdisc_priv(sch);
+ if (sch->parent != TC_H_CLSACT)
+ return;
+
tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info);
tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info);
@@ -262,7 +274,7 @@ static struct Qdisc_ops clsact_qdisc_ops __read_mostly = {
.cl_ops = &clsact_class_ops,
.id = "clsact",
.priv_size = sizeof(struct clsact_sched_data),
- .static_flags = TCQ_F_CPUSTATS,
+ .static_flags = TCQ_F_INGRESS | TCQ_F_CPUSTATS,
.init = clsact_init,
.destroy = clsact_destroy,
.dump = ingress_dump,
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index c008a316e943..0ab13a495af9 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -65,7 +65,7 @@ static void mq_destroy(struct Qdisc *sch)
if (!priv->qdiscs)
return;
for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
- qdisc_destroy(priv->qdiscs[ntx]);
+ qdisc_put(priv->qdiscs[ntx]);
kfree(priv->qdiscs);
}
@@ -119,7 +119,7 @@ static void mq_attach(struct Qdisc *sch)
qdisc = priv->qdiscs[ntx];
old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
if (old)
- qdisc_destroy(old);
+ qdisc_put(old);
#ifdef CONFIG_NET_SCHED
if (ntx < dev->real_num_tx_queues)
qdisc_hash_add(qdisc, false);
@@ -130,6 +130,29 @@ static void mq_attach(struct Qdisc *sch)
priv->qdiscs = NULL;
}
+static void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx)
+{
+#ifdef CONFIG_NET_SCHED
+ struct net_device *dev = qdisc_dev(sch);
+ struct Qdisc *qdisc;
+ unsigned int i;
+
+ for (i = new_real_tx; i < dev->real_num_tx_queues; i++) {
+ qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
+ /* Only update the default qdiscs we created,
+ * qdiscs with handles are always hashed.
+ */
+ if (qdisc != &noop_qdisc && !qdisc->handle)
+ qdisc_hash_del(qdisc);
+ }
+ for (i = dev->real_num_tx_queues; i < new_real_tx; i++) {
+ qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
+ if (qdisc != &noop_qdisc && !qdisc->handle)
+ qdisc_hash_add(qdisc, false);
+ }
+#endif
+}
+
static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct net_device *dev = qdisc_dev(sch);
@@ -285,6 +308,7 @@ struct Qdisc_ops mq_qdisc_ops __read_mostly = {
.init = mq_init,
.destroy = mq_destroy,
.attach = mq_attach,
+ .change_real_num_tx = mq_change_real_num_tx,
.dump = mq_dump,
.owner = THIS_MODULE,
};
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 008db8d59ace..c0ab1e38e80c 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -40,7 +40,7 @@ static void mqprio_destroy(struct Qdisc *sch)
for (ntx = 0;
ntx < dev->num_tx_queues && priv->qdiscs[ntx];
ntx++)
- qdisc_destroy(priv->qdiscs[ntx]);
+ qdisc_put(priv->qdiscs[ntx]);
kfree(priv->qdiscs);
}
@@ -132,6 +132,97 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
return 0;
}
+static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt,
+ struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct mqprio_sched *priv = qdisc_priv(sch);
+ struct nlattr *tb[TCA_MQPRIO_MAX + 1];
+ struct nlattr *attr;
+ int i, rem, err;
+
+ err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
+ sizeof(*qopt));
+ if (err < 0)
+ return err;
+
+ if (!qopt->hw) {
+ NL_SET_ERR_MSG(extack,
+ "mqprio TCA_OPTIONS can only contain netlink attributes in hardware mode");
+ return -EINVAL;
+ }
+
+ if (tb[TCA_MQPRIO_MODE]) {
+ priv->flags |= TC_MQPRIO_F_MODE;
+ priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]);
+ }
+
+ if (tb[TCA_MQPRIO_SHAPER]) {
+ priv->flags |= TC_MQPRIO_F_SHAPER;
+ priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]);
+ }
+
+ if (tb[TCA_MQPRIO_MIN_RATE64]) {
+ if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_MIN_RATE64],
+ "min_rate accepted only when shaper is in bw_rlimit mode");
+ return -EINVAL;
+ }
+ i = 0;
+ nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
+ rem) {
+ if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Attribute type expected to be TCA_MQPRIO_MIN_RATE64");
+ return -EINVAL;
+ }
+
+ if (nla_len(attr) != sizeof(u64)) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Attribute TCA_MQPRIO_MIN_RATE64 expected to have 8 bytes length");
+ return -EINVAL;
+ }
+
+ if (i >= qopt->num_tc)
+ break;
+ priv->min_rate[i] = *(u64 *)nla_data(attr);
+ i++;
+ }
+ priv->flags |= TC_MQPRIO_F_MIN_RATE;
+ }
+
+ if (tb[TCA_MQPRIO_MAX_RATE64]) {
+ if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[TCA_MQPRIO_MAX_RATE64],
+ "max_rate accepted only when shaper is in bw_rlimit mode");
+ return -EINVAL;
+ }
+ i = 0;
+ nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
+ rem) {
+ if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Attribute type expected to be TCA_MQPRIO_MAX_RATE64");
+ return -EINVAL;
+ }
+
+ if (nla_len(attr) != sizeof(u64)) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Attribute TCA_MQPRIO_MAX_RATE64 expected to have 8 bytes length");
+ return -EINVAL;
+ }
+
+ if (i >= qopt->num_tc)
+ break;
+ priv->max_rate[i] = *(u64 *)nla_data(attr);
+ i++;
+ }
+ priv->flags |= TC_MQPRIO_F_MAX_RATE;
+ }
+
+ return 0;
+}
+
static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -141,9 +232,6 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
struct Qdisc *qdisc;
int i, err = -EOPNOTSUPP;
struct tc_mqprio_qopt *qopt = NULL;
- struct nlattr *tb[TCA_MQPRIO_MAX + 1];
- struct nlattr *attr;
- int rem;
int len;
BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
@@ -168,55 +256,9 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt));
if (len > 0) {
- err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
- sizeof(*qopt));
- if (err < 0)
+ err = mqprio_parse_nlattr(sch, qopt, opt, extack);
+ if (err)
return err;
-
- if (!qopt->hw)
- return -EINVAL;
-
- if (tb[TCA_MQPRIO_MODE]) {
- priv->flags |= TC_MQPRIO_F_MODE;
- priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]);
- }
-
- if (tb[TCA_MQPRIO_SHAPER]) {
- priv->flags |= TC_MQPRIO_F_SHAPER;
- priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]);
- }
-
- if (tb[TCA_MQPRIO_MIN_RATE64]) {
- if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
- return -EINVAL;
- i = 0;
- nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
- rem) {
- if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64)
- return -EINVAL;
- if (i >= qopt->num_tc)
- break;
- priv->min_rate[i] = *(u64 *)nla_data(attr);
- i++;
- }
- priv->flags |= TC_MQPRIO_F_MIN_RATE;
- }
-
- if (tb[TCA_MQPRIO_MAX_RATE64]) {
- if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
- return -EINVAL;
- i = 0;
- nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
- rem) {
- if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64)
- return -EINVAL;
- if (i >= qopt->num_tc)
- break;
- priv->max_rate[i] = *(u64 *)nla_data(attr);
- i++;
- }
- priv->flags |= TC_MQPRIO_F_MAX_RATE;
- }
}
/* pre-allocate qdisc, attachment can't fail */
@@ -300,7 +342,7 @@ static void mqprio_attach(struct Qdisc *sch)
qdisc = priv->qdiscs[ntx];
old = dev_graft_qdisc(qdisc->dev_queue, qdisc);
if (old)
- qdisc_destroy(old);
+ qdisc_put(old);
if (ntx < dev->real_num_tx_queues)
qdisc_hash_add(qdisc, false);
}
@@ -308,6 +350,28 @@ static void mqprio_attach(struct Qdisc *sch)
priv->qdiscs = NULL;
}
+static void mqprio_change_real_num_tx(struct Qdisc *sch,
+ unsigned int new_real_tx)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct Qdisc *qdisc;
+ unsigned int i;
+
+ for (i = new_real_tx; i < dev->real_num_tx_queues; i++) {
+ qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
+ /* Only update the default qdiscs we created,
+ * qdiscs with handles are always hashed.
+ */
+ if (qdisc != &noop_qdisc && !qdisc->handle)
+ qdisc_hash_del(qdisc);
+ }
+ for (i = dev->real_num_tx_queues; i < new_real_tx; i++) {
+ qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
+ if (qdisc != &noop_qdisc && !qdisc->handle)
+ qdisc_hash_add(qdisc, false);
+ }
+}
+
static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
unsigned long cl)
{
@@ -531,22 +595,28 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
for (i = tc.offset; i < tc.offset + tc.count; i++) {
struct netdev_queue *q = netdev_get_tx_queue(dev, i);
struct Qdisc *qdisc = rtnl_dereference(q->qdisc);
- struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
- struct gnet_stats_queue __percpu *cpu_qstats = NULL;
spin_lock_bh(qdisc_lock(qdisc));
+
if (qdisc_is_percpu_stats(qdisc)) {
- cpu_bstats = qdisc->cpu_bstats;
- cpu_qstats = qdisc->cpu_qstats;
+ qlen = qdisc_qlen_sum(qdisc);
+
+ __gnet_stats_copy_basic(NULL, &bstats,
+ qdisc->cpu_bstats,
+ &qdisc->bstats);
+ __gnet_stats_copy_queue(&qstats,
+ qdisc->cpu_qstats,
+ &qdisc->qstats,
+ qlen);
+ } else {
+ qlen += qdisc->q.qlen;
+ bstats.bytes += qdisc->bstats.bytes;
+ bstats.packets += qdisc->bstats.packets;
+ qstats.backlog += qdisc->qstats.backlog;
+ qstats.drops += qdisc->qstats.drops;
+ qstats.requeues += qdisc->qstats.requeues;
+ qstats.overlimits += qdisc->qstats.overlimits;
}
-
- qlen = qdisc_qlen_sum(qdisc);
- __gnet_stats_copy_basic(NULL, &sch->bstats,
- cpu_bstats, &qdisc->bstats);
- __gnet_stats_copy_queue(&sch->qstats,
- cpu_qstats,
- &qdisc->qstats,
- qlen);
spin_unlock_bh(qdisc_lock(qdisc));
}
@@ -626,6 +696,7 @@ static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
.init = mqprio_init,
.destroy = mqprio_destroy,
.attach = mqprio_attach,
+ .change_real_num_tx = mqprio_change_real_num_tx,
.dump = mqprio_dump,
.owner = THIS_MODULE,
};
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 1df78e361ef9..1c2f9a3ab1ca 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -175,7 +175,7 @@ multiq_destroy(struct Qdisc *sch)
tcf_block_put(q->block);
for (band = 0; band < q->bands; band++)
- qdisc_destroy(q->queues[band]);
+ qdisc_put(q->queues[band]);
kfree(q->queues);
}
@@ -204,7 +204,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
q->queues[i] = &noop_qdisc;
qdisc_tree_reduce_backlog(child, child->q.qlen,
child->qstats.backlog);
- qdisc_destroy(child);
+ qdisc_put(child);
}
}
@@ -228,7 +228,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
qdisc_tree_reduce_backlog(old,
old->q.qlen,
old->qstats.backlog);
- qdisc_destroy(old);
+ qdisc_put(old);
}
sch_tree_unlock(sch);
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 014a28d8dd4f..cf93dbe3d040 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -330,7 +330,7 @@ static s64 tabledist(s64 mu, s32 sigma,
/* default uniform distribution */
if (dist == NULL)
- return ((rnd % (2 * sigma)) + mu) - sigma;
+ return ((rnd % (2 * (u32)sigma)) + mu) - sigma;
t = dist->table[rnd % dist->size];
x = (sigma % NETEM_DIST_SCALE) * t;
@@ -748,12 +748,10 @@ static void dist_free(struct disttable *d)
* signed 16 bit values.
*/
-static int get_dist_table(struct Qdisc *sch, struct disttable **tbl,
- const struct nlattr *attr)
+static int get_dist_table(struct disttable **tbl, const struct nlattr *attr)
{
size_t n = nla_len(attr)/sizeof(__s16);
const __s16 *data = nla_data(attr);
- spinlock_t *root_lock;
struct disttable *d;
int i;
@@ -768,13 +766,7 @@ static int get_dist_table(struct Qdisc *sch, struct disttable **tbl,
for (i = 0; i < n; i++)
d->table[i] = data[i];
- root_lock = qdisc_root_sleeping_lock(sch);
-
- spin_lock_bh(root_lock);
- swap(*tbl, d);
- spin_unlock_bh(root_lock);
-
- dist_free(d);
+ *tbl = d;
return 0;
}
@@ -787,6 +779,10 @@ static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
q->slot_config.max_packets = INT_MAX;
if (q->slot_config.max_bytes == 0)
q->slot_config.max_bytes = INT_MAX;
+
+ /* capping dist_jitter to the range acceptable by tabledist() */
+ q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter));
+
q->slot.packets_left = q->slot_config.max_packets;
q->slot.bytes_left = q->slot_config.max_bytes;
if (q->slot_config.min_delay | q->slot_config.max_delay |
@@ -926,6 +922,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
{
struct netem_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_NETEM_MAX + 1];
+ struct disttable *delay_dist = NULL;
+ struct disttable *slot_dist = NULL;
struct tc_netem_qopt *qopt;
struct clgstate old_clg;
int old_loss_model = CLG_RANDOM;
@@ -939,6 +937,19 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
if (ret < 0)
return ret;
+ if (tb[TCA_NETEM_DELAY_DIST]) {
+ ret = get_dist_table(&delay_dist, tb[TCA_NETEM_DELAY_DIST]);
+ if (ret)
+ goto table_free;
+ }
+
+ if (tb[TCA_NETEM_SLOT_DIST]) {
+ ret = get_dist_table(&slot_dist, tb[TCA_NETEM_SLOT_DIST]);
+ if (ret)
+ goto table_free;
+ }
+
+ sch_tree_lock(sch);
/* backup q->clg and q->loss_model */
old_clg = q->clg;
old_loss_model = q->loss_model;
@@ -947,26 +958,17 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
if (ret) {
q->loss_model = old_loss_model;
- return ret;
+ q->clg = old_clg;
+ goto unlock;
}
} else {
q->loss_model = CLG_RANDOM;
}
- if (tb[TCA_NETEM_DELAY_DIST]) {
- ret = get_dist_table(sch, &q->delay_dist,
- tb[TCA_NETEM_DELAY_DIST]);
- if (ret)
- goto get_table_failure;
- }
-
- if (tb[TCA_NETEM_SLOT_DIST]) {
- ret = get_dist_table(sch, &q->slot_dist,
- tb[TCA_NETEM_SLOT_DIST]);
- if (ret)
- goto get_table_failure;
- }
-
+ if (delay_dist)
+ swap(q->delay_dist, delay_dist);
+ if (slot_dist)
+ swap(q->slot_dist, slot_dist);
sch->limit = qopt->limit;
q->latency = PSCHED_TICKS2NS(qopt->latency);
@@ -1011,15 +1013,15 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_NETEM_SLOT])
get_slot(q, tb[TCA_NETEM_SLOT]);
- return ret;
+ /* capping jitter to the range acceptable by tabledist() */
+ q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
-get_table_failure:
- /* recover clg and loss_model, in case of
- * q->clg and q->loss_model were modified
- * in get_loss_clg()
- */
- q->clg = old_clg;
- q->loss_model = old_loss_model;
+unlock:
+ sch_tree_unlock(sch);
+
+table_free:
+ dist_free(delay_dist);
+ dist_free(slot_dist);
return ret;
}
@@ -1047,7 +1049,7 @@ static void netem_destroy(struct Qdisc *sch)
qdisc_watchdog_cancel(&q->watchdog);
if (q->qdisc)
- qdisc_destroy(q->qdisc);
+ qdisc_put(q->qdisc);
dist_free(q->delay_dist);
dist_free(q->slot_dist);
}
@@ -1113,9 +1115,9 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
struct tc_netem_rate rate;
struct tc_netem_slot slot;
- qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
+ qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency),
UINT_MAX);
- qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
+ qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter),
UINT_MAX);
qopt.limit = q->limit;
qopt.loss = q->loss;
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index 5619d2eb17b6..4ddb4af61d10 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -214,7 +214,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
.priv_size = sizeof(struct plug_sched_data),
.enqueue = plug_enqueue,
.dequeue = plug_dequeue,
- .peek = qdisc_peek_head,
+ .peek = qdisc_peek_dequeued,
.init = plug_init,
.change = plug_change,
.reset = qdisc_reset_queue,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 1cbbd8c31405..0e6f34bd9a68 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -175,7 +175,7 @@ prio_destroy(struct Qdisc *sch)
tcf_block_put(q->block);
prio_offload(sch, NULL);
for (prio = 0; prio < q->bands; prio++)
- qdisc_destroy(q->queues[prio]);
+ qdisc_put(q->queues[prio]);
}
static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
@@ -205,7 +205,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
extack);
if (!queues[i]) {
while (i > oldbands)
- qdisc_destroy(queues[--i]);
+ qdisc_put(queues[--i]);
return -ENOMEM;
}
}
@@ -220,7 +220,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
qdisc_tree_reduce_backlog(child, child->q.qlen,
child->qstats.backlog);
- qdisc_destroy(child);
+ qdisc_put(child);
}
for (i = oldbands; i < q->bands; i++) {
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index bb1a9c11fc54..4f246599734e 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -387,8 +387,13 @@ static int qfq_change_agg(struct Qdisc *sch, struct qfq_class *cl, u32 weight,
u32 lmax)
{
struct qfq_sched *q = qdisc_priv(sch);
- struct qfq_aggregate *new_agg = qfq_find_agg(q, lmax, weight);
+ struct qfq_aggregate *new_agg;
+ /* 'lmax' can range from [QFQ_MIN_LMAX, pktlen + stab overhead] */
+ if (lmax > (1UL << QFQ_MTU_SHIFT))
+ return -EINVAL;
+
+ new_agg = qfq_find_agg(q, lmax, weight);
if (new_agg == NULL) { /* create new aggregate */
new_agg = kzalloc(sizeof(*new_agg), GFP_ATOMIC);
if (new_agg == NULL)
@@ -433,15 +438,16 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
} else
weight = 1;
- if (tb[TCA_QFQ_LMAX]) {
+ if (tb[TCA_QFQ_LMAX])
lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
- if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
- pr_notice("qfq: invalid max length %u\n", lmax);
- return -EINVAL;
- }
- } else
+ else
lmax = psched_mtu(qdisc_dev(sch));
+ if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
+ pr_notice("qfq: invalid max length %u\n", lmax);
+ return -EINVAL;
+ }
+
inv_w = ONE_FP / weight;
weight = ONE_FP / inv_w;
@@ -497,11 +503,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
if (cl->qdisc != &noop_qdisc)
qdisc_hash_add(cl->qdisc, true);
- sch_tree_lock(sch);
- qdisc_class_hash_insert(&q->clhash, &cl->common);
- sch_tree_unlock(sch);
-
- qdisc_class_hash_grow(sch, &q->clhash);
set_change_agg:
sch_tree_lock(sch);
@@ -519,14 +520,17 @@ set_change_agg:
}
if (existing)
qfq_deact_rm_from_agg(q, cl);
+ else
+ qdisc_class_hash_insert(&q->clhash, &cl->common);
qfq_add_to_agg(q, new_agg, cl);
sch_tree_unlock(sch);
+ qdisc_class_hash_grow(sch, &q->clhash);
*arg = (unsigned long)cl;
return 0;
destroy_class:
- qdisc_destroy(cl->qdisc);
+ qdisc_put(cl->qdisc);
kfree(cl);
return err;
}
@@ -537,7 +541,7 @@ static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
qfq_rm_from_agg(q, cl);
gen_kill_estimator(&cl->rate_est);
- qdisc_destroy(cl->qdisc);
+ qdisc_put(cl->qdisc);
kfree(cl);
}
@@ -984,10 +988,13 @@ static void qfq_update_eligible(struct qfq_sched *q)
}
/* Dequeue head packet of the head class in the DRR queue of the aggregate. */
-static void agg_dequeue(struct qfq_aggregate *agg,
- struct qfq_class *cl, unsigned int len)
+static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg,
+ struct qfq_class *cl, unsigned int len)
{
- qdisc_dequeue_peeked(cl->qdisc);
+ struct sk_buff *skb = qdisc_dequeue_peeked(cl->qdisc);
+
+ if (!skb)
+ return NULL;
cl->deficit -= (int) len;
@@ -997,6 +1004,8 @@ static void agg_dequeue(struct qfq_aggregate *agg,
cl->deficit += agg->lmax;
list_move_tail(&cl->alist, &agg->active);
}
+
+ return skb;
}
static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
@@ -1142,11 +1151,18 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
if (!skb)
return NULL;
- qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
+
+ skb = agg_dequeue(in_serv_agg, cl, len);
+
+ if (!skb) {
+ sch->q.qlen++;
+ return NULL;
+ }
+
+ qdisc_qstats_backlog_dec(sch, skb);
qdisc_bstats_update(sch, skb);
- agg_dequeue(in_serv_agg, cl, len);
/* If lmax is lowered, through qfq_change_class, for a class
* owning pending packets with larger size than the new value
* of lmax, then the following condition may hold.
@@ -1432,10 +1448,8 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt,
if (err < 0)
return err;
- if (qdisc_dev(sch)->tx_queue_len + 1 > QFQ_MAX_AGG_CLASSES)
- max_classes = QFQ_MAX_AGG_CLASSES;
- else
- max_classes = qdisc_dev(sch)->tx_queue_len + 1;
+ max_classes = min_t(u64, (u64)qdisc_dev(sch)->tx_queue_len + 1,
+ QFQ_MAX_AGG_CLASSES);
/* max_cl_shift = floor(log_2(max_classes)) */
max_cl_shift = __fls(max_classes);
q->max_agg_classes = 1<<max_cl_shift;
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 56c181c3feeb..afe0c2d689b1 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -63,6 +63,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
{
struct red_sched_data *q = qdisc_priv(sch);
struct Qdisc *child = q->qdisc;
+ unsigned int len;
int ret;
q->vars.qavg = red_calc_qavg(&q->parms,
@@ -98,9 +99,10 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
break;
}
+ len = qdisc_pkt_len(skb);
ret = qdisc_enqueue(skb, child, to_free);
if (likely(ret == NET_XMIT_SUCCESS)) {
- qdisc_qstats_backlog_inc(sch, skb);
+ sch->qstats.backlog += len;
sch->q.qlen++;
} else if (net_xmit_drop_count(ret)) {
q->stats.pdrop++;
@@ -181,7 +183,7 @@ static void red_destroy(struct Qdisc *sch)
del_timer_sync(&q->adapt_timer);
red_offload(sch, false);
- qdisc_destroy(q->qdisc);
+ qdisc_put(q->qdisc);
}
static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
@@ -199,6 +201,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
struct Qdisc *child = NULL;
int err;
u32 max_P;
+ u8 *stab;
if (opt == NULL)
return -EINVAL;
@@ -214,7 +217,9 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
ctl = nla_data(tb[TCA_RED_PARMS]);
- if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
+ stab = nla_data(tb[TCA_RED_STAB]);
+ if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
+ ctl->Scell_log, stab))
return -EINVAL;
if (ctl->limit > 0) {
@@ -233,14 +238,14 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
if (child) {
qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
q->qdisc->qstats.backlog);
- qdisc_destroy(q->qdisc);
+ qdisc_put(q->qdisc);
q->qdisc = child;
}
red_set_parms(&q->parms,
ctl->qth_min, ctl->qth_max, ctl->Wlog,
ctl->Plog, ctl->Scell_log,
- nla_data(tb[TCA_RED_STAB]),
+ stab,
max_P);
red_set_vars(&q->vars);
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 1aa95e761671..a8ef8efa62fc 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -139,15 +139,15 @@ static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q)
}
}
-static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q)
+static void increment_qlen(const struct sfb_skb_cb *cb, struct sfb_sched_data *q)
{
u32 sfbhash;
- sfbhash = sfb_hash(skb, 0);
+ sfbhash = cb->hashes[0];
if (sfbhash)
increment_one_qlen(sfbhash, 0, q);
- sfbhash = sfb_hash(skb, 1);
+ sfbhash = cb->hashes[1];
if (sfbhash)
increment_one_qlen(sfbhash, 1, q);
}
@@ -285,8 +285,10 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
{
struct sfb_sched_data *q = qdisc_priv(sch);
+ unsigned int len = qdisc_pkt_len(skb);
struct Qdisc *child = q->qdisc;
struct tcf_proto *fl;
+ struct sfb_skb_cb cb;
int i;
u32 p_min = ~0;
u32 minqlen = ~0;
@@ -403,11 +405,12 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
enqueue:
+ memcpy(&cb, sfb_skb_cb(skb), sizeof(cb));
ret = qdisc_enqueue(skb, child, to_free);
if (likely(ret == NET_XMIT_SUCCESS)) {
- qdisc_qstats_backlog_inc(sch, skb);
+ sch->qstats.backlog += len;
sch->q.qlen++;
- increment_qlen(skb, q);
+ increment_qlen(&cb, q);
} else if (net_xmit_drop_count(ret)) {
q->stats.childdrop++;
qdisc_qstats_drop(sch);
@@ -470,7 +473,7 @@ static void sfb_destroy(struct Qdisc *sch)
struct sfb_sched_data *q = qdisc_priv(sch);
tcf_block_put(q->block);
- qdisc_destroy(q->qdisc);
+ qdisc_put(q->qdisc);
}
static const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = {
@@ -524,7 +527,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
q->qdisc->qstats.backlog);
- qdisc_destroy(q->qdisc);
+ qdisc_put(q->qdisc);
q->qdisc = child;
q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval);
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index b89cf0971d3d..07721a1e98d8 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -353,7 +353,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
unsigned int hash, dropped;
sfq_index x, qlen;
struct sfq_slot *slot;
- int uninitialized_var(ret);
+ int ret;
struct sk_buff *head;
int delta;
@@ -651,7 +651,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
}
if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
- ctl_v1->Wlog))
+ ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
return -EINVAL;
if (ctl_v1 && ctl_v1->qth_min) {
p = kmalloc(sizeof(*p), GFP_KERNEL);
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 6f74a426f159..dd29de1418b7 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -392,7 +392,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
if (child) {
qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
q->qdisc->qstats.backlog);
- qdisc_destroy(q->qdisc);
+ qdisc_put(q->qdisc);
q->qdisc = child;
}
q->limit = qopt->limit;
@@ -438,7 +438,7 @@ static void tbf_destroy(struct Qdisc *sch)
struct tbf_sched_data *q = qdisc_priv(sch);
qdisc_watchdog_cancel(&q->watchdog);
- qdisc_destroy(q->qdisc);
+ qdisc_put(q->qdisc);
}
static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 7ed53124069c..163364124691 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -138,6 +138,9 @@ teql_destroy(struct Qdisc *sch)
struct teql_sched_data *dat = qdisc_priv(sch);
struct teql_master *master = dat->m;
+ if (!master)
+ return;
+
prev = master->slaves;
if (prev) {
do {
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index d17708800652..78c1429d1301 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1181,8 +1181,7 @@ int sctp_assoc_update(struct sctp_association *asoc,
/* Add any peer addresses from the new association. */
list_for_each_entry(trans, &new->peer.transport_addr_list,
transports)
- if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr) &&
- !sctp_assoc_add_peer(asoc, &trans->ipaddr,
+ if (!sctp_assoc_add_peer(asoc, &trans->ipaddr,
GFP_ATOMIC, trans->state))
return -ENOMEM;
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 2bd8c80bd85f..9cf61a18098a 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -880,12 +880,23 @@ int sctp_auth_set_key(struct sctp_endpoint *ep,
memcpy(key->data, &auth_key->sca_key[0], auth_key->sca_keylength);
cur_key->key = key;
- if (replace) {
- list_del_init(&shkey->key_list);
- sctp_auth_shkey_release(shkey);
+ if (!replace) {
+ list_add(&cur_key->key_list, sh_keys);
+ return 0;
}
+
+ list_del_init(&shkey->key_list);
list_add(&cur_key->key_list, sh_keys);
+ if (asoc && asoc->active_key_id == auth_key->sca_keynumber &&
+ sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL)) {
+ list_del_init(&cur_key->key_list);
+ sctp_auth_shkey_release(cur_key);
+ list_add(&shkey->key_list, sh_keys);
+ return -ENOMEM;
+ }
+
+ sctp_auth_shkey_release(shkey);
return 0;
}
@@ -914,8 +925,13 @@ int sctp_auth_set_active_key(struct sctp_endpoint *ep,
return -EINVAL;
if (asoc) {
+ __u16 active_key_id = asoc->active_key_id;
+
asoc->active_key_id = key_id;
- sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL);
+ if (sctp_auth_asoc_init_active_key(asoc, GFP_KERNEL)) {
+ asoc->active_key_id = active_key_id;
+ return -ENOMEM;
+ }
} else
ep->active_key_id = key_id;
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 38d01cfb313e..d723942e5e65 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -88,6 +88,12 @@ int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
}
}
+ /* If somehow no addresses were found that can be used with this
+ * scope, it's an error.
+ */
+ if (list_empty(&dest->address_list))
+ error = -ENETUNREACH;
+
out:
if (error)
sctp_bind_addr_clean(dest);
@@ -285,22 +291,19 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
rawaddr = (union sctp_addr_param *)raw_addr_list;
af = sctp_get_af_specific(param_type2af(param->type));
- if (unlikely(!af)) {
+ if (unlikely(!af) ||
+ !af->from_addr_param(&addr, rawaddr, htons(port), 0)) {
retval = -EINVAL;
- sctp_bind_addr_clean(bp);
- break;
+ goto out_err;
}
- af->from_addr_param(&addr, rawaddr, htons(port), 0);
if (sctp_bind_addr_state(bp, &addr) != -1)
goto next;
retval = sctp_add_bind_addr(bp, &addr, sizeof(addr),
SCTP_ADDR_SRC, gfp);
- if (retval) {
+ if (retval)
/* Can't finish building the list, clean up. */
- sctp_bind_addr_clean(bp);
- break;
- }
+ goto out_err;
next:
len = ntohs(param->length);
@@ -309,6 +312,12 @@ next:
}
return retval;
+
+out_err:
+ if (retval)
+ sctp_bind_addr_clean(bp);
+
+ return retval;
}
/********************************************************************
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index 8767405de9fa..5f10984bf0f5 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -76,10 +76,6 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r,
r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX;
r->idiag_retrans = asoc->rtx_data_chunks;
r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies);
- } else {
- r->idiag_timer = 0;
- r->idiag_retrans = 0;
- r->idiag_expires = 0;
}
}
@@ -159,13 +155,14 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc,
r = nlmsg_data(nlh);
BUG_ON(!sk_fullsock(sk));
+ r->idiag_timer = 0;
+ r->idiag_retrans = 0;
+ r->idiag_expires = 0;
if (asoc) {
inet_diag_msg_sctpasoc_fill(r, sk, asoc);
} else {
inet_diag_msg_common_fill(r, sk);
r->idiag_state = sk->sk_state;
- r->idiag_timer = 0;
- r->idiag_retrans = 0;
}
if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin))
@@ -307,9 +304,8 @@ out:
return err;
}
-static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
+static int sctp_sock_dump(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p)
{
- struct sctp_endpoint *ep = tsp->asoc->ep;
struct sctp_comm_param *commp = p;
struct sock *sk = ep->base.sk;
struct sk_buff *skb = commp->skb;
@@ -319,6 +315,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p)
int err = 0;
lock_sock(sk);
+ if (ep != tsp->asoc->ep)
+ goto release;
list_for_each_entry(assoc, &ep->asocs, asocs) {
if (cb->args[4] < cb->args[1])
goto next;
@@ -361,9 +359,8 @@ release:
return err;
}
-static int sctp_sock_filter(struct sctp_transport *tsp, void *p)
+static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p)
{
- struct sctp_endpoint *ep = tsp->asoc->ep;
struct sctp_comm_param *commp = p;
struct sock *sk = ep->base.sk;
const struct inet_diag_req_v2 *r = commp->r;
@@ -521,8 +518,8 @@ skip:
if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE)))
goto done;
- sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump,
- net, &pos, &commp);
+ sctp_transport_traverse_process(sctp_sock_filter, sctp_sock_dump,
+ net, &pos, &commp);
cb->args[2] = pos;
done:
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 8640dedcf64f..c4068451b9c7 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -242,6 +242,18 @@ void sctp_endpoint_free(struct sctp_endpoint *ep)
}
/* Final destructor for endpoint. */
+static void sctp_endpoint_destroy_rcu(struct rcu_head *head)
+{
+ struct sctp_endpoint *ep = container_of(head, struct sctp_endpoint, rcu);
+ struct sock *sk = ep->base.sk;
+
+ sctp_sk(sk)->ep = NULL;
+ sock_put(sk);
+
+ kfree(ep);
+ SCTP_DBG_OBJCNT_DEC(ep);
+}
+
static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
{
struct sock *sk;
@@ -275,18 +287,13 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep)
if (sctp_sk(sk)->bind_hash)
sctp_put_port(sk);
- sctp_sk(sk)->ep = NULL;
- /* Give up our hold on the sock */
- sock_put(sk);
-
- kfree(ep);
- SCTP_DBG_OBJCNT_DEC(ep);
+ call_rcu(&ep->rcu, sctp_endpoint_destroy_rcu);
}
/* Hold a reference to an endpoint. */
-void sctp_endpoint_hold(struct sctp_endpoint *ep)
+int sctp_endpoint_hold(struct sctp_endpoint *ep)
{
- refcount_inc(&ep->base.refcnt);
+ return refcount_inc_not_zero(&ep->base.refcnt);
}
/* Release a reference to an endpoint and clean up if there are
diff --git a/net/sctp/input.c b/net/sctp/input.c
index f64d882c8698..0e2503e536ed 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -104,6 +104,7 @@ int sctp_rcv(struct sk_buff *skb)
struct sctp_chunk *chunk;
union sctp_addr src;
union sctp_addr dest;
+ int bound_dev_if;
int family;
struct sctp_af *af;
struct net *net = dev_net(skb->dev);
@@ -181,7 +182,8 @@ int sctp_rcv(struct sk_buff *skb)
* If a frame arrives on an interface and the receiving socket is
* bound to another interface, via SO_BINDTODEVICE, treat it as OOTB
*/
- if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) {
+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ if (bound_dev_if && (bound_dev_if != af->skb_iif(skb))) {
if (transport) {
sctp_transport_put(transport);
asoc = NULL;
@@ -461,7 +463,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
else {
if (!mod_timer(&t->proto_unreach_timer,
jiffies + (HZ/20)))
- sctp_association_hold(asoc);
+ sctp_transport_hold(t);
}
} else {
struct net *net = sock_net(sk);
@@ -470,7 +472,7 @@ void sctp_icmp_proto_unreachable(struct sock *sk,
"encountered!\n", __func__);
if (del_timer(&t->proto_unreach_timer))
- sctp_association_put(asoc);
+ sctp_transport_put(t);
sctp_do_sm(net, SCTP_EVENT_T_OTHER,
SCTP_ST_OTHER(SCTP_EVENT_ICMP_PROTO_UNREACH),
@@ -687,7 +689,7 @@ static int sctp_rcv_ootb(struct sk_buff *skb)
ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch);
/* Break out if chunk length is less then minimal. */
- if (ntohs(ch->length) < sizeof(_ch))
+ if (!ch || ntohs(ch->length) < sizeof(_ch))
break;
ch_end = offset + SCTP_PAD4(ntohs(ch->length));
@@ -1088,7 +1090,8 @@ static struct sctp_association *__sctp_rcv_init_lookup(struct net *net,
if (!af)
continue;
- af->from_addr_param(paddr, params.addr, sh->source, 0);
+ if (!af->from_addr_param(paddr, params.addr, sh->source, 0))
+ continue;
asoc = __sctp_lookup_association(net, laddr, paddr, transportp);
if (asoc)
@@ -1124,6 +1127,9 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
union sctp_addr_param *param;
union sctp_addr paddr;
+ if (ntohs(ch->length) < sizeof(*asconf) + sizeof(struct sctp_paramhdr))
+ return NULL;
+
/* Skip over the ADDIP header and find the Address parameter */
param = (union sctp_addr_param *)(asconf + 1);
@@ -1131,7 +1137,8 @@ static struct sctp_association *__sctp_rcv_asconf_lookup(
if (unlikely(!af))
return NULL;
- af->from_addr_param(&paddr, param, peer_port, 0);
+ if (!af->from_addr_param(&paddr, param, peer_port, 0))
+ return NULL;
return __sctp_lookup_association(net, laddr, &paddr, transportp);
}
@@ -1202,7 +1209,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net,
ch = (struct sctp_chunkhdr *)ch_end;
chunk_num++;
- } while (ch_end < skb_tail_pointer(skb));
+ } while (ch_end + sizeof(*ch) < skb_tail_pointer(skb));
return asoc;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 736d8ca9821b..fc82617b6076 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -542,15 +542,20 @@ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
}
/* Initialize a sctp_addr from an address parameter. */
-static void sctp_v6_from_addr_param(union sctp_addr *addr,
+static bool sctp_v6_from_addr_param(union sctp_addr *addr,
union sctp_addr_param *param,
__be16 port, int iif)
{
+ if (ntohs(param->v6.param_hdr.length) < sizeof(struct sctp_ipv6addr_param))
+ return false;
+
addr->v6.sin6_family = AF_INET6;
addr->v6.sin6_port = port;
addr->v6.sin6_flowinfo = 0; /* BUG */
addr->v6.sin6_addr = param->v6.addr;
addr->v6.sin6_scope_id = iif;
+
+ return true;
}
/* Initialize an address parameter from a sctp_addr and return the length
@@ -655,8 +660,8 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp)
if (!(type & IPV6_ADDR_UNICAST))
return 0;
- return sp->inet.freebind || net->ipv6.sysctl.ip_nonlocal_bind ||
- ipv6_chk_addr(net, in6, NULL, 0);
+ return ipv6_can_nonlocal_bind(net, &sp->inet) ||
+ ipv6_chk_addr(net, in6, NULL, 0);
}
/* This function checks if the address is a valid address to be used for
@@ -945,8 +950,7 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
net = sock_net(&opt->inet.sk);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id);
- if (!dev || !(opt->inet.freebind ||
- net->ipv6.sysctl.ip_nonlocal_bind ||
+ if (!dev || !(ipv6_can_nonlocal_bind(net, &opt->inet) ||
ipv6_chk_addr(net, &addr->v6.sin6_addr,
dev, 0))) {
rcu_read_unlock();
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index a644292f9faf..84f79ac4b984 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -230,6 +230,12 @@ static void sctp_transport_seq_stop(struct seq_file *seq, void *v)
{
struct sctp_ht_iter *iter = seq->private;
+ if (v && v != SEQ_START_TOKEN) {
+ struct sctp_transport *transport = v;
+
+ sctp_transport_put(transport);
+ }
+
sctp_transport_walk_stop(&iter->hti);
}
@@ -237,6 +243,12 @@ static void *sctp_transport_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct sctp_ht_iter *iter = seq->private;
+ if (v && v != SEQ_START_TOKEN) {
+ struct sctp_transport *transport = v;
+
+ sctp_transport_put(transport);
+ }
+
++*pos;
return sctp_transport_get_next(seq_file_net(seq), &iter->hti);
@@ -292,8 +304,6 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
sk->sk_rcvbuf);
seq_printf(seq, "\n");
- sctp_transport_put(transport);
-
return 0;
}
@@ -369,8 +379,6 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "\n");
}
- sctp_transport_put(transport);
-
return 0;
}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index af054f38341b..8db8209c5b61 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -268,14 +268,19 @@ static void sctp_v4_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
}
/* Initialize a sctp_addr from an address parameter. */
-static void sctp_v4_from_addr_param(union sctp_addr *addr,
+static bool sctp_v4_from_addr_param(union sctp_addr *addr,
union sctp_addr_param *param,
__be16 port, int iif)
{
+ if (ntohs(param->v4.param_hdr.length) < sizeof(struct sctp_ipv4addr_param))
+ return false;
+
addr->v4.sin_family = AF_INET;
addr->v4.sin_port = port;
addr->v4.sin_addr.s_addr = param->v4.addr.s_addr;
memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
+
+ return true;
}
/* Initialize an address parameter from a sctp_addr and return the length
@@ -368,7 +373,7 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) &&
ret != RTN_LOCAL &&
!sp->inet.freebind &&
- !net->ipv4.sysctl_ip_nonlocal_bind)
+ !READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind))
return 0;
if (ipv6_only_sock(sctp_opt2sk(sp)))
@@ -407,7 +412,8 @@ static enum sctp_scope sctp_v4_scope(union sctp_addr *addr)
retval = SCTP_SCOPE_LINK;
} else if (ipv4_is_private_10(addr->v4.sin_addr.s_addr) ||
ipv4_is_private_172(addr->v4.sin_addr.s_addr) ||
- ipv4_is_private_192(addr->v4.sin_addr.s_addr)) {
+ ipv4_is_private_192(addr->v4.sin_addr.s_addr) ||
+ ipv4_is_test_198(addr->v4.sin_addr.s_addr)) {
retval = SCTP_SCOPE_PRIVATE;
} else {
retval = SCTP_SCOPE_GLOBAL;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index ce6053be60bc..35e1fb708f4a 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2172,9 +2172,16 @@ static enum sctp_ierror sctp_verify_param(struct net *net,
break;
case SCTP_PARAM_SET_PRIMARY:
- if (net->sctp.addip_enable)
- break;
- goto fallthrough;
+ if (!net->sctp.addip_enable)
+ goto fallthrough;
+
+ if (ntohs(param.p->length) < sizeof(struct sctp_addip_param) +
+ sizeof(struct sctp_paramhdr)) {
+ sctp_process_inv_paramlength(asoc, param.p,
+ chunk, err_chunk);
+ retval = SCTP_IERROR_ABORT;
+ }
+ break;
case SCTP_PARAM_HOST_NAME_ADDRESS:
/* Tell the peer, we won't support this param. */
@@ -2352,11 +2359,13 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
/* Process the initialization parameters. */
sctp_walk_params(param, peer_init, init_hdr.params) {
- if (!src_match && (param.p->type == SCTP_PARAM_IPV4_ADDRESS ||
- param.p->type == SCTP_PARAM_IPV6_ADDRESS)) {
+ if (!src_match &&
+ (param.p->type == SCTP_PARAM_IPV4_ADDRESS ||
+ param.p->type == SCTP_PARAM_IPV6_ADDRESS)) {
af = sctp_get_af_specific(param_type2af(param.p->type));
- af->from_addr_param(&addr, param.addr,
- chunk->sctp_hdr->source, 0);
+ if (!af->from_addr_param(&addr, param.addr,
+ chunk->sctp_hdr->source, 0))
+ continue;
if (sctp_cmp_addr_exact(sctp_source(chunk), &addr))
src_match = 1;
}
@@ -2537,7 +2546,8 @@ static int sctp_process_param(struct sctp_association *asoc,
break;
do_addr_param:
af = sctp_get_af_specific(param_type2af(param.p->type));
- af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0);
+ if (!af->from_addr_param(&addr, param.addr, htons(asoc->peer.port), 0))
+ break;
scope = sctp_scope(peer_addr);
if (sctp_in_scope(net, &addr, scope))
if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED))
@@ -2634,15 +2644,13 @@ do_addr_param:
addr_param = param.v + sizeof(struct sctp_addip_param);
af = sctp_get_af_specific(param_type2af(addr_param->p.type));
- if (af == NULL)
+ if (!af)
break;
- af->from_addr_param(&addr, addr_param,
- htons(asoc->peer.port), 0);
+ if (!af->from_addr_param(&addr, addr_param,
+ htons(asoc->peer.port), 0))
+ break;
- /* if the address is invalid, we can't process it.
- * XXX: see spec for what to do.
- */
if (!af->addr_valid(&addr, NULL, NULL))
break;
@@ -3059,7 +3067,8 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
if (unlikely(!af))
return SCTP_ERROR_DNS_FAILED;
- af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0);
+ if (!af->from_addr_param(&addr, addr_param, htons(asoc->peer.port), 0))
+ return SCTP_ERROR_DNS_FAILED;
/* ADDIP 4.2.1 This parameter MUST NOT contain a broadcast
* or multicast address.
@@ -3148,7 +3157,7 @@ static __be16 sctp_process_asconf_param(struct sctp_association *asoc,
* primary.
*/
if (af->is_any(&addr))
- memcpy(&addr.v4, sctp_source(asconf), sizeof(addr));
+ memcpy(&addr, sctp_source(asconf), sizeof(addr));
if (security_sctp_bind_connect(asoc->ep->base.sk,
SCTP_PARAM_SET_PRIMARY,
@@ -3336,7 +3345,8 @@ static void sctp_asconf_param_success(struct sctp_association *asoc,
/* We have checked the packet before, so we do not check again. */
af = sctp_get_af_specific(param_type2af(addr_param->p.type));
- af->from_addr_param(&addr, addr_param, htons(bp->port), 0);
+ if (!af->from_addr_param(&addr, addr_param, htons(bp->port), 0))
+ return;
switch (asconf_param->param_hdr.type) {
case SCTP_PARAM_ADD_IP:
@@ -3663,7 +3673,7 @@ struct sctp_chunk *sctp_make_strreset_req(
outlen = (sizeof(outreq) + stream_len) * out;
inlen = (sizeof(inreq) + stream_len) * in;
- retval = sctp_make_reconf(asoc, outlen + inlen);
+ retval = sctp_make_reconf(asoc, SCTP_PAD4(outlen) + SCTP_PAD4(inlen));
if (!retval)
return NULL;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 567517e44811..c4a2d647e6cc 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -434,7 +434,7 @@ void sctp_generate_proto_unreach_event(struct timer_list *t)
/* Try again later. */
if (!mod_timer(&transport->proto_unreach_timer,
jiffies + (HZ/20)))
- sctp_association_hold(asoc);
+ sctp_transport_hold(transport);
goto out_unlock;
}
@@ -450,7 +450,7 @@ void sctp_generate_proto_unreach_event(struct timer_list *t)
out_unlock:
bh_unlock_sock(sk);
- sctp_association_put(asoc);
+ sctp_transport_put(transport);
}
/* Handle the timeout of the RE-CONFIG timer. */
@@ -473,6 +473,10 @@ void sctp_generate_reconf_event(struct timer_list *t)
goto out_unlock;
}
+ /* This happens when the response arrives after the timer is triggered. */
+ if (!asoc->strreset_chunk)
+ goto out_unlock;
+
error = sctp_do_sm(net, SCTP_EVENT_T_TIMEOUT,
SCTP_ST_TIMEOUT(SCTP_EVENT_TIMEOUT_RECONF),
asoc->state, asoc->ep, asoc,
@@ -1251,7 +1255,10 @@ static int sctp_side_effects(enum sctp_event event_type,
default:
pr_err("impossible disposition %d in state %d, event_type %d, event_id %d\n",
status, state, event_type, subtype.chunk);
- BUG();
+ error = status;
+ if (error >= 0)
+ error = -EINVAL;
+ WARN_ON_ONCE(1);
break;
}
@@ -1615,12 +1622,12 @@ static int sctp_cmd_interpreter(enum sctp_event event_type,
break;
case SCTP_CMD_INIT_FAILED:
- sctp_cmd_init_failed(commands, asoc, cmd->obj.u32);
+ sctp_cmd_init_failed(commands, asoc, cmd->obj.u16);
break;
case SCTP_CMD_ASSOC_FAILED:
sctp_cmd_assoc_failed(commands, asoc, event_type,
- subtype, chunk, cmd->obj.u32);
+ subtype, chunk, cmd->obj.u16);
break;
case SCTP_CMD_INIT_COUNTER_INC:
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index a3033b74df54..8298f27e8de0 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -164,6 +164,12 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort(
void *arg,
struct sctp_cmd_seq *commands);
+static enum sctp_disposition
+__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type, void *arg,
+ struct sctp_cmd_seq *commands);
+
/* Small helper function that checks if the chunk length
* is of the appropriate length. The 'required_length' argument
* is set to be the size of a specific chunk we are testing.
@@ -345,6 +351,14 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
if (!chunk->singleton)
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ /* Make sure that the INIT chunk has a valid length.
+ * Normally, this would cause an ABORT with a Protocol Violation
+ * error, but since we don't have an association, we'll
+ * just discard the packet.
+ */
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* If the packet is an OOTB packet which is temporarily on the
* control endpoint, respond with an ABORT.
*/
@@ -359,14 +373,6 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net,
if (chunk->sctp_hdr->vtag != 0)
return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
- /* Make sure that the INIT chunk has a valid length.
- * Normally, this would cause an ABORT with a Protocol Violation
- * error, but since we don't have an association, we'll
- * just discard the packet.
- */
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
-
/* If the INIT is coming toward a closing socket, we'll send back
* and ABORT. Essentially, this catches the race of INIT being
* backloged to the socket at the same time as the user isses close().
@@ -712,6 +718,9 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
struct sock *sk;
int error = 0;
+ if (asoc && !sctp_vtag_verify(chunk, asoc))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* If the packet is an OOTB packet which is temporarily on the
* control endpoint, respond with an ABORT.
*/
@@ -726,7 +735,8 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net,
* in sctp_unpack_cookie().
*/
if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
+ commands);
/* If the endpoint is not listening or if the number of associations
* on the TCP-style socket exceed the max backlog, respond with an
@@ -1495,19 +1505,16 @@ static enum sctp_disposition sctp_sf_do_unexpected_init(
if (!chunk->singleton)
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ /* Make sure that the INIT chunk has a valid length. */
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* 3.1 A packet containing an INIT chunk MUST have a zero Verification
* Tag.
*/
if (chunk->sctp_hdr->vtag != 0)
return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
- /* Make sure that the INIT chunk has a valid length.
- * In this case, we generate a protocol violation since we have
- * an association established.
- */
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
- return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
- commands);
/* Grab the INIT header. */
chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data;
@@ -1825,9 +1832,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_a(
* its peer.
*/
if (sctp_state(asoc, SHUTDOWN_ACK_SENT)) {
- disposition = sctp_sf_do_9_2_reshutack(net, ep, asoc,
- SCTP_ST_CHUNK(chunk->chunk_hdr->type),
- chunk, commands);
+ disposition = __sctp_sf_do_9_2_reshutack(net, ep, asoc,
+ SCTP_ST_CHUNK(chunk->chunk_hdr->type),
+ chunk, commands);
if (SCTP_DISPOSITION_NOMEM == disposition)
goto nomem;
@@ -1856,20 +1863,35 @@ static enum sctp_disposition sctp_sf_do_dupcook_a(
SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_ASCONF_QUEUE, SCTP_NULL());
- repl = sctp_make_cookie_ack(new_asoc, chunk);
+ /* Update the content of current association. */
+ if (sctp_assoc_update((struct sctp_association *)asoc, new_asoc)) {
+ struct sctp_chunk *abort;
+
+ abort = sctp_make_abort(asoc, NULL, sizeof(struct sctp_errhdr));
+ if (abort) {
+ sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0);
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+ }
+ sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED));
+ sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED,
+ SCTP_PERR(SCTP_ERROR_RSRC_LOW));
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
+ goto nomem;
+ }
+
+ repl = sctp_make_cookie_ack(asoc, chunk);
if (!repl)
goto nomem;
/* Report association restart to upper layer. */
ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0,
- new_asoc->c.sinit_num_ostreams,
- new_asoc->c.sinit_max_instreams,
+ asoc->c.sinit_num_ostreams,
+ asoc->c.sinit_max_instreams,
NULL, GFP_ATOMIC);
if (!ev)
goto nomem_ev;
- /* Update the content of current association. */
- sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
if ((sctp_state(asoc, SHUTDOWN_PENDING) ||
sctp_state(asoc, SHUTDOWN_SENT)) &&
@@ -1933,7 +1955,8 @@ static enum sctp_disposition sctp_sf_do_dupcook_b(
sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_ESTABLISHED));
- SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
+ if (asoc->state < SCTP_STATE_ESTABLISHED)
+ SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
repl = sctp_make_cookie_ack(new_asoc, chunk);
@@ -2140,9 +2163,11 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook(
* enough for the chunk header. Cookie length verification is
* done later.
*/
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
- return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
- commands);
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) {
+ if (!sctp_vtag_verify(chunk, asoc))
+ asoc = NULL;
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands);
+ }
/* "Decode" the chunk. We have no optional parameters so we
* are in good shape.
@@ -2279,7 +2304,7 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort(
*/
if (SCTP_ADDR_DEL ==
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
- return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
if (!sctp_err_chunk_valid(chunk))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2325,7 +2350,7 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort(
*/
if (SCTP_ADDR_DEL ==
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
- return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
if (!sctp_err_chunk_valid(chunk))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2595,7 +2620,7 @@ enum sctp_disposition sctp_sf_do_9_1_abort(
*/
if (SCTP_ADDR_DEL ==
sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest))
- return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
if (!sctp_err_chunk_valid(chunk))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -2908,13 +2933,11 @@ enum sctp_disposition sctp_sf_do_9_2_shut_ctsn(
* that belong to this association, it should discard the INIT chunk and
* retransmit the SHUTDOWN ACK chunk.
*/
-enum sctp_disposition sctp_sf_do_9_2_reshutack(
- struct net *net,
- const struct sctp_endpoint *ep,
- const struct sctp_association *asoc,
- const union sctp_subtype type,
- void *arg,
- struct sctp_cmd_seq *commands)
+static enum sctp_disposition
+__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type, void *arg,
+ struct sctp_cmd_seq *commands)
{
struct sctp_chunk *chunk = arg;
struct sctp_chunk *reply;
@@ -2948,6 +2971,26 @@ nomem:
return SCTP_DISPOSITION_NOMEM;
}
+enum sctp_disposition
+sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep,
+ const struct sctp_association *asoc,
+ const union sctp_subtype type, void *arg,
+ struct sctp_cmd_seq *commands)
+{
+ struct sctp_chunk *chunk = arg;
+
+ if (!chunk->singleton)
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk)))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
+ if (chunk->sctp_hdr->vtag != 0)
+ return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands);
+
+ return __sctp_sf_do_9_2_reshutack(net, ep, asoc, type, arg, commands);
+}
+
/*
* sctp_sf_do_ecn_cwr
*
@@ -3561,6 +3604,9 @@ enum sctp_disposition sctp_sf_ootb(struct net *net,
SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES);
+ if (asoc && !sctp_vtag_verify(chunk, asoc))
+ asoc = NULL;
+
ch = (struct sctp_chunkhdr *)chunk->chunk_hdr;
do {
/* Report violation if the chunk is less then minimal */
@@ -3676,12 +3722,6 @@ static enum sctp_disposition sctp_sf_shut_8_4_5(
SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS);
- /* If the chunk length is invalid, we don't want to process
- * the reset of the packet.
- */
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
-
/* We need to discard the rest of the packet to prevent
* potential bomming attacks from additional bundled chunks.
* This is documented in SCTP Threats ID.
@@ -3709,6 +3749,9 @@ enum sctp_disposition sctp_sf_do_8_5_1_E_sa(struct net *net,
{
struct sctp_chunk *chunk = arg;
+ if (!sctp_vtag_verify(chunk, asoc))
+ asoc = NULL;
+
/* Make sure that the SHUTDOWN_ACK chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
@@ -3744,6 +3787,11 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net,
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
+ /* Make sure that the ASCONF ADDIP chunk has a valid length. */
+ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk)))
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
+ commands);
+
/* ADD-IP: Section 4.1.1
* This chunk MUST be sent in an authenticated way by using
* the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk
@@ -3751,13 +3799,7 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net,
* described in [I-D.ietf-tsvwg-sctp-auth].
*/
if (!net->sctp.addip_noauth && !chunk->auth)
- return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
- commands);
-
- /* Make sure that the ASCONF ADDIP chunk has a valid length. */
- if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk)))
- return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
- commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
hdr = (struct sctp_addiphdr *)chunk->skb->data;
serial = ntohl(hdr->serial);
@@ -3886,6 +3928,12 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net,
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
}
+ /* Make sure that the ADDIP chunk has a valid length. */
+ if (!sctp_chunk_length_valid(asconf_ack,
+ sizeof(struct sctp_addip_chunk)))
+ return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
+ commands);
+
/* ADD-IP, Section 4.1.2:
* This chunk MUST be sent in an authenticated way by using
* the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk
@@ -3893,14 +3941,7 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net,
* described in [I-D.ietf-tsvwg-sctp-auth].
*/
if (!net->sctp.addip_noauth && !asconf_ack->auth)
- return sctp_sf_discard_chunk(net, ep, asoc, type, arg,
- commands);
-
- /* Make sure that the ADDIP chunk has a valid length. */
- if (!sctp_chunk_length_valid(asconf_ack,
- sizeof(struct sctp_addip_chunk)))
- return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
- commands);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
addip_hdr = (struct sctp_addiphdr *)asconf_ack->skb->data;
rcvd_serial = ntohl(addip_hdr->serial);
@@ -4351,7 +4392,7 @@ enum sctp_disposition sctp_sf_eat_auth(struct net *net,
SCTP_AUTH_NEW_KEY, GFP_ATOMIC);
if (!ev)
- return -ENOMEM;
+ return SCTP_DISPOSITION_NOMEM;
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP,
SCTP_ULPEVENT(ev));
@@ -4472,6 +4513,9 @@ enum sctp_disposition sctp_sf_discard_chunk(struct net *net,
{
struct sctp_chunk *chunk = arg;
+ if (asoc && !sctp_vtag_verify(chunk, asoc))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* Make sure that the chunk has a valid length.
* Since we don't know the chunk type, we use a general
* chunkhdr structure to make a comparison.
@@ -4539,6 +4583,9 @@ enum sctp_disposition sctp_sf_violation(struct net *net,
{
struct sctp_chunk *chunk = arg;
+ if (!sctp_vtag_verify(chunk, asoc))
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+
/* Make sure that the chunk has a valid length. */
if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr)))
return sctp_sf_violation_chunklen(net, ep, asoc, type, arg,
@@ -6245,6 +6292,7 @@ static struct sctp_packet *sctp_ootb_pkt_new(
* yet.
*/
switch (chunk->chunk_hdr->type) {
+ case SCTP_CID_INIT:
case SCTP_CID_INIT_ACK:
{
struct sctp_initack_chunk *initack;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 4a2873f70b37..f954d3c8876d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -112,7 +112,7 @@ struct percpu_counter sctp_sockets_allocated;
static void sctp_enter_memory_pressure(struct sock *sk)
{
- sctp_memory_pressure = 1;
+ WRITE_ONCE(sctp_memory_pressure, 1);
}
@@ -375,6 +375,18 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
return af;
}
+static void sctp_auto_asconf_init(struct sctp_sock *sp)
+{
+ struct net *net = sock_net(&sp->inet.sk);
+
+ if (net->sctp.default_auto_asconf) {
+ spin_lock_bh(&net->sctp.addr_wq_lock);
+ list_add_tail(&sp->auto_asconf_list, &net->sctp.auto_asconf_splist);
+ spin_unlock_bh(&net->sctp.addr_wq_lock);
+ sp->do_auto_asconf = 1;
+ }
+}
+
/* Bind a local address either to an endpoint or to an association. */
static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
{
@@ -437,8 +449,10 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
}
/* Refresh ephemeral port. */
- if (!bp->port)
+ if (!bp->port) {
bp->port = inet_sk(sk)->inet_num;
+ sctp_auto_asconf_init(sp);
+ }
/* Add the address to the bind address list.
* Use GFP_ATOMIC since BHs will be disabled.
@@ -1939,6 +1953,10 @@ static int sctp_sendmsg_to_asoc(struct sctp_association *asoc,
err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len);
if (err)
goto err;
+ if (unlikely(sinfo->sinfo_stream >= asoc->stream.outcnt)) {
+ err = -EINVAL;
+ goto err;
+ }
}
if (sctp_state(asoc, CLOSED)) {
@@ -2560,6 +2578,7 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
if (trans) {
trans->hbinterval =
msecs_to_jiffies(params->spp_hbinterval);
+ sctp_transport_reset_hb_timer(trans);
} else if (asoc) {
asoc->hbinterval =
msecs_to_jiffies(params->spp_hbinterval);
@@ -4776,19 +4795,6 @@ static int sctp_init_sock(struct sock *sk)
sk_sockets_allocated_inc(sk);
sock_prot_inuse_add(net, sk->sk_prot, 1);
- /* Nothing can fail after this block, otherwise
- * sctp_destroy_sock() will be called without addr_wq_lock held
- */
- if (net->sctp.default_auto_asconf) {
- spin_lock(&sock_net(sk)->sctp.addr_wq_lock);
- list_add_tail(&sp->auto_asconf_list,
- &net->sctp.auto_asconf_splist);
- sp->do_auto_asconf = 1;
- spin_unlock(&sock_net(sk)->sctp.addr_wq_lock);
- } else {
- sp->do_auto_asconf = 0;
- }
-
local_bh_enable();
return 0;
@@ -4823,13 +4829,17 @@ static void sctp_destroy_sock(struct sock *sk)
}
/* Triggered when there are no references on the socket anymore */
-static void sctp_destruct_sock(struct sock *sk)
+static void sctp_destruct_common(struct sock *sk)
{
struct sctp_sock *sp = sctp_sk(sk);
/* Free up the HMAC transform. */
crypto_free_shash(sp->hmac);
+}
+static void sctp_destruct_sock(struct sock *sk)
+{
+ sctp_destruct_common(sk);
inet_sock_destruct(sk);
}
@@ -5055,11 +5065,12 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *),
}
EXPORT_SYMBOL_GPL(sctp_transport_lookup_process);
-int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
- int (*cb_done)(struct sctp_transport *, void *),
- struct net *net, int *pos, void *p) {
+int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done,
+ struct net *net, int *pos, void *p)
+{
struct rhashtable_iter hti;
struct sctp_transport *tsp;
+ struct sctp_endpoint *ep;
int ret;
again:
@@ -5068,26 +5079,32 @@ again:
tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) {
- ret = cb(tsp, p);
- if (ret)
- break;
+ ep = tsp->asoc->ep;
+ if (sctp_endpoint_hold(ep)) { /* asoc can be peeled off */
+ ret = cb(ep, tsp, p);
+ if (ret)
+ break;
+ sctp_endpoint_put(ep);
+ }
(*pos)++;
sctp_transport_put(tsp);
}
sctp_transport_walk_stop(&hti);
if (ret) {
- if (cb_done && !cb_done(tsp, p)) {
+ if (cb_done && !cb_done(ep, tsp, p)) {
(*pos)++;
+ sctp_endpoint_put(ep);
sctp_transport_put(tsp);
goto again;
}
+ sctp_endpoint_put(ep);
sctp_transport_put(tsp);
}
return ret;
}
-EXPORT_SYMBOL_GPL(sctp_for_each_transport);
+EXPORT_SYMBOL_GPL(sctp_transport_traverse_process);
/* 7.2.1 Association Status (SCTP_STATUS)
@@ -5325,7 +5342,7 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
* Set the daddr and initialize id to something more random and also
* copy over any ip options.
*/
- sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sk);
+ sp->pf->to_sk_daddr(&asoc->peer.primary_addr, sock->sk);
sp->pf->copy_ip_options(sk, sock->sk);
/* Populate the fields of the newsk from the oldsk and migrate the
@@ -8752,7 +8769,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
sctp_sk(newsk)->reuse = sp->reuse;
newsk->sk_shutdown = sk->sk_shutdown;
- newsk->sk_destruct = sctp_destruct_sock;
+ newsk->sk_destruct = sk->sk_destruct;
newsk->sk_family = sk->sk_family;
newsk->sk_protocol = IPPROTO_SCTP;
newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
@@ -8848,6 +8865,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
sctp_bind_addr_dup(&newsp->ep->base.bind_addr,
&oldsp->ep->base.bind_addr, GFP_KERNEL);
+ sctp_auto_asconf_init(newsp);
+
/* Move any messages in the old socket's receive queue that are for the
* peeled off association to the new socket's receive queue.
*/
@@ -8970,11 +8989,20 @@ struct proto sctp_prot = {
#if IS_ENABLED(CONFIG_IPV6)
-#include <net/transp_v6.h>
-static void sctp_v6_destroy_sock(struct sock *sk)
+static void sctp_v6_destruct_sock(struct sock *sk)
+{
+ sctp_destruct_common(sk);
+ inet6_sock_destruct(sk);
+}
+
+static int sctp_v6_init_sock(struct sock *sk)
{
- sctp_destroy_sock(sk);
- inet6_destroy_sock(sk);
+ int ret = sctp_init_sock(sk);
+
+ if (!ret)
+ sk->sk_destruct = sctp_v6_destruct_sock;
+
+ return ret;
}
struct proto sctpv6_prot = {
@@ -8984,8 +9012,8 @@ struct proto sctpv6_prot = {
.disconnect = sctp_disconnect,
.accept = sctp_accept,
.ioctl = sctp_ioctl,
- .init = sctp_init_sock,
- .destroy = sctp_v6_destroy_sock,
+ .init = sctp_v6_init_sock,
+ .destroy = sctp_destroy_sock,
.shutdown = sctp_shutdown,
.setsockopt = sctp_setsockopt,
.getsockopt = sctp_getsockopt,
diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c
index 0a78cdf86463..3290e6f5b6c6 100644
--- a/net/sctp/stream_interleave.c
+++ b/net/sctp/stream_interleave.c
@@ -1151,7 +1151,8 @@ static void sctp_generate_iftsn(struct sctp_outq *q, __u32 ctsn)
#define _sctp_walk_ifwdtsn(pos, chunk, end) \
for (pos = chunk->subh.ifwdtsn_hdr->skip; \
- (void *)pos < (void *)chunk->subh.ifwdtsn_hdr->skip + (end); pos++)
+ (void *)pos <= (void *)chunk->subh.ifwdtsn_hdr->skip + (end) - \
+ sizeof(struct sctp_ifwdtsn_skip); pos++)
#define sctp_walk_ifwdtsn(pos, ch) \
_sctp_walk_ifwdtsn((pos), (ch), ntohs((ch)->chunk_hdr->length) - \
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index a6c04a94b08f..3a5c0d00e96c 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -178,7 +178,7 @@ int sctp_sched_set_sched(struct sctp_association *asoc,
if (!SCTP_SO(&asoc->stream, i)->ext)
continue;
- ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
+ ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC);
if (ret)
goto err;
}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index c0d55ed62d2e..78302e547b2b 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -148,7 +148,7 @@ void sctp_transport_free(struct sctp_transport *transport)
/* Delete the ICMP proto unreachable timer if it's active. */
if (del_timer(&transport->proto_unreach_timer))
- sctp_association_put(transport->asoc);
+ sctp_transport_put(transport);
sctp_transport_put(transport);
}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 26dcd02b2d0c..ad0ac657fe12 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -141,34 +141,34 @@ static int smc_release(struct socket *sock)
if (!smc->use_fallback) {
rc = smc_close_active(smc);
- sock_set_flag(sk, SOCK_DEAD);
+ smc_sock_set_flag(sk, SOCK_DEAD);
sk->sk_shutdown |= SHUTDOWN_MASK;
- }
-
- sk->sk_prot->unhash(sk);
-
- if (smc->clcsock) {
- if (smc->use_fallback && sk->sk_state == SMC_LISTEN) {
+ } else {
+ if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
+ sock_put(sk); /* passive closing */
+ if (sk->sk_state == SMC_LISTEN) {
/* wake up clcsock accept */
rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
}
- mutex_lock(&smc->clcsock_release_lock);
- sock_release(smc->clcsock);
- smc->clcsock = NULL;
- mutex_unlock(&smc->clcsock_release_lock);
- }
- if (smc->use_fallback) {
- if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
- sock_put(sk); /* passive closing */
sk->sk_state = SMC_CLOSED;
sk->sk_state_change(sk);
}
+ sk->sk_prot->unhash(sk);
+
+ if (sk->sk_state == SMC_CLOSED) {
+ if (smc->clcsock) {
+ release_sock(sk);
+ smc_clcsock_release(smc);
+ lock_sock(sk);
+ }
+ if (!smc->use_fallback)
+ smc_conn_free(&smc->conn);
+ }
+
/* detach socket */
sock_orphan(sk);
sock->sk = NULL;
- if (!smc->use_fallback && sk->sk_state == SMC_CLOSED)
- smc_conn_free(&smc->conn);
release_sock(sk);
sock_put(sk); /* final sock_put */
@@ -852,7 +852,7 @@ static int smc_clcsock_accept(struct smc_sock *lsmc, struct smc_sock **new_smc)
if (new_clcsock)
sock_release(new_clcsock);
new_sk->sk_state = SMC_CLOSED;
- sock_set_flag(new_sk, SOCK_DEAD);
+ smc_sock_set_flag(new_sk, SOCK_DEAD);
sock_put(new_sk); /* final */
*new_smc = NULL;
goto out;
@@ -1013,13 +1013,13 @@ static void smc_listen_out(struct smc_sock *new_smc)
struct smc_sock *lsmc = new_smc->listen_smc;
struct sock *newsmcsk = &new_smc->sk;
- lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
if (lsmc->sk.sk_state == SMC_LISTEN) {
+ lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING);
smc_accept_enqueue(&lsmc->sk, newsmcsk);
+ release_sock(&lsmc->sk);
} else { /* no longer listening */
smc_close_non_accepted(newsmcsk);
}
- release_sock(&lsmc->sk);
/* Wake up accept */
lsmc->sk.sk_data_ready(&lsmc->sk);
@@ -1031,7 +1031,6 @@ static void smc_listen_out_connected(struct smc_sock *new_smc)
{
struct sock *newsmcsk = &new_smc->sk;
- sk_refcnt_debug_inc(newsmcsk);
if (newsmcsk->sk_state == SMC_INIT)
newsmcsk->sk_state = SMC_ACTIVE;
@@ -1216,6 +1215,9 @@ static void smc_listen_work(struct work_struct *work)
int rc = 0;
u8 ibport;
+ if (new_smc->listen_smc->sk.sk_state != SMC_LISTEN)
+ return smc_listen_out_err(new_smc);
+
if (new_smc->use_fallback) {
smc_listen_out_connected(new_smc);
return;
@@ -1589,8 +1591,10 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
static int smc_shutdown(struct socket *sock, int how)
{
struct sock *sk = sock->sk;
+ bool do_shutdown = true;
struct smc_sock *smc;
int rc = -EINVAL;
+ int old_state;
int rc1 = 0;
smc = smc_sk(sk);
@@ -1617,7 +1621,11 @@ static int smc_shutdown(struct socket *sock, int how)
}
switch (how) {
case SHUT_RDWR: /* shutdown in both directions */
+ old_state = sk->sk_state;
rc = smc_close_active(smc);
+ if (old_state == SMC_ACTIVE &&
+ sk->sk_state == SMC_PEERCLOSEWAIT1)
+ do_shutdown = false;
break;
case SHUT_WR:
rc = smc_close_shutdown_write(smc);
@@ -1627,7 +1635,7 @@ static int smc_shutdown(struct socket *sock, int how)
/* nothing more to do because peer is not involved */
break;
}
- if (smc->clcsock)
+ if (do_shutdown && smc->clcsock)
rc1 = kernel_sock_shutdown(smc->clcsock, how);
/* map sock_shutdown_cmd constants to sk_shutdown value range */
sk->sk_shutdown |= how + 1;
@@ -1644,6 +1652,9 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
struct smc_sock *smc;
int val, rc;
+ if (level == SOL_TCP && optname == TCP_ULP)
+ return -EOPNOTSUPP;
+
smc = smc_sk(sk);
/* generic setsockopts reaching us here always apply to the
@@ -1665,7 +1676,6 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
lock_sock(sk);
switch (optname) {
- case TCP_ULP:
case TCP_FASTOPEN:
case TCP_FASTOPEN_CONNECT:
case TCP_FASTOPEN_KEY:
diff --git a/net/smc/smc.h b/net/smc/smc.h
index adbdf195eb08..c3b0e1e3f505 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -268,4 +268,9 @@ static inline bool using_ipsec(struct smc_sock *smc)
struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock);
void smc_close_non_accepted(struct sock *sk);
+static inline void smc_sock_set_flag(struct sock *sk, enum sock_flags flag)
+{
+ set_bit(flag, &sk->sk_flags);
+}
+
#endif /* __SMC_H */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 333e4353498f..c657fd29ff5d 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -304,7 +304,7 @@ static void smc_cdc_msg_recv_action(struct smc_sock *smc,
smc->sk.sk_shutdown |= RCV_SHUTDOWN;
if (smc->clcsock && smc->clcsock->sk)
smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
- sock_set_flag(&smc->sk, SOCK_DONE);
+ smc_sock_set_flag(&smc->sk, SOCK_DONE);
sock_hold(&smc->sk); /* sock_put in close_work */
if (!schedule_work(&conn->close_work))
sock_put(&smc->sk);
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index aa9a17ac1f7b..063acfbdcd89 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -162,7 +162,8 @@ static int smc_clc_prfx_set(struct socket *clcsock,
goto out_rel;
}
/* get address to which the internal TCP socket is bound */
- kernel_getsockname(clcsock, (struct sockaddr *)&addrs);
+ if (kernel_getsockname(clcsock, (struct sockaddr *)&addrs) < 0)
+ goto out_rel;
/* analyze IP specific data of net_device belonging to TCP socket */
addr6 = (struct sockaddr_in6 *)&addrs;
rcu_read_lock();
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index ea2b87f29469..4ea28ec7ad13 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -21,6 +21,22 @@
#define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ)
+/* release the clcsock that is assigned to the smc_sock */
+void smc_clcsock_release(struct smc_sock *smc)
+{
+ struct socket *tcp;
+
+ if (smc->listen_smc && current_work() != &smc->smc_listen_work)
+ cancel_work_sync(&smc->smc_listen_work);
+ mutex_lock(&smc->clcsock_release_lock);
+ if (smc->clcsock) {
+ tcp = smc->clcsock;
+ smc->clcsock = NULL;
+ sock_release(tcp);
+ }
+ mutex_unlock(&smc->clcsock_release_lock);
+}
+
static void smc_close_cleanup_listen(struct sock *parent)
{
struct sock *sk;
@@ -148,7 +164,7 @@ static void smc_close_active_abort(struct smc_sock *smc)
break;
}
- sock_set_flag(sk, SOCK_DEAD);
+ smc_sock_set_flag(sk, SOCK_DEAD);
sk->sk_state_change(sk);
}
@@ -167,6 +183,7 @@ int smc_close_active(struct smc_sock *smc)
int old_state;
long timeout;
int rc = 0;
+ int rc1 = 0;
timeout = current->flags & PF_EXITING ?
0 : sock_flag(sk, SOCK_LINGER) ?
@@ -202,6 +219,15 @@ again:
if (rc)
break;
sk->sk_state = SMC_PEERCLOSEWAIT1;
+
+ /* actively shutdown clcsock before peer close it,
+ * prevent peer from entering TIME_WAIT state.
+ */
+ if (smc->clcsock && smc->clcsock->sk) {
+ rc1 = kernel_sock_shutdown(smc->clcsock,
+ SHUT_RDWR);
+ rc = rc ? rc : rc1;
+ }
} else {
/* peer event has changed the state */
goto again;
@@ -321,6 +347,7 @@ static void smc_close_passive_work(struct work_struct *work)
close_work);
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
struct smc_cdc_conn_state_flags *rxflags;
+ bool release_clcsock = false;
struct sock *sk = &smc->sk;
int old_state;
@@ -405,10 +432,15 @@ wakeup:
if (old_state != sk->sk_state) {
sk->sk_state_change(sk);
if ((sk->sk_state == SMC_CLOSED) &&
- (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket))
+ (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
smc_conn_free(conn);
+ if (smc->clcsock)
+ release_clcsock = true;
+ }
}
release_sock(sk);
+ if (release_clcsock)
+ smc_clcsock_release(smc);
sock_put(sk); /* sock_hold done by schedulers of close_work */
}
diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h
index 19eb6a211c23..e0e3b5df25d2 100644
--- a/net/smc/smc_close.h
+++ b/net/smc/smc_close.h
@@ -23,5 +23,6 @@ void smc_close_wake_tx_prepared(struct smc_sock *smc);
int smc_close_active(struct smc_sock *smc);
int smc_close_shutdown_write(struct smc_sock *smc);
void smc_close_init(struct smc_sock *smc);
+void smc_clcsock_release(struct smc_sock *smc);
#endif /* SMC_CLOSE_H */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2c9baf8bf118..4d421407d6fc 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -321,8 +321,8 @@ void smc_conn_free(struct smc_connection *conn)
} else {
smc_cdc_tx_dismiss_slots(conn);
}
- smc_lgr_unregister_conn(conn); /* unsets conn->lgr */
smc_buf_unuse(conn, lgr); /* allow buffer reuse */
+ smc_lgr_unregister_conn(conn); /* unsets conn->lgr */
if (!lgr->conns_num)
smc_lgr_schedule_free_work(lgr);
@@ -609,7 +609,8 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
!lgr->sync_err &&
lgr->vlan_id == vlan_id &&
(role == SMC_CLNT ||
- lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) {
+ (lgr->conns_num < SMC_RMBS_PER_LGR_MAX &&
+ !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) {
/* link group found */
local_contact = SMC_REUSE_CONTACT;
conn->lgr = lgr;
@@ -708,7 +709,7 @@ static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
*/
static inline int smc_rmb_wnd_update_limit(int rmbe_size)
{
- return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
+ return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
}
static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
@@ -770,7 +771,7 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
return buf_desc;
}
-#define SMCD_DMBE_SIZES 7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
+#define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr,
bool is_dmb, int bufsize)
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 36340912df48..a7a4e3ce211a 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -349,12 +349,12 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg,
}
break;
}
+ if (!timeo)
+ return -EAGAIN;
if (signal_pending(current)) {
read_done = sock_intr_errno(timeo);
break;
}
- if (!timeo)
- return -EAGAIN;
}
if (!smc_rx_data_available(conn)) {
diff --git a/net/socket.c b/net/socket.c
index 29169045dcfe..49ac98cfda42 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -90,6 +90,7 @@
#include <linux/slab.h>
#include <linux/xattr.h>
#include <linux/nospec.h>
+#include <linux/indirect_call_wrapper.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -108,6 +109,13 @@
#include <net/busy_poll.h>
#include <linux/errqueue.h>
+/* proto_ops for ipv4 and ipv6 use the same {recv,send}msg function */
+#if IS_ENABLED(CONFIG_INET)
+#define INDIRECT_CALL_INET4(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__)
+#else
+#define INDIRECT_CALL_INET4(f, f1, ...) f(__VA_ARGS__)
+#endif
+
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sysctl_net_busy_read __read_mostly;
unsigned int sysctl_net_busy_poll __read_mostly;
@@ -384,6 +392,18 @@ static struct file_system_type sock_fs_type = {
* but we take care of internal coherence yet.
*/
+/**
+ * sock_alloc_file - Bind a &socket to a &file
+ * @sock: socket
+ * @flags: file status flags
+ * @dname: protocol name
+ *
+ * Returns the &file bound with @sock, implicitly storing it
+ * in sock->file. If dname is %NULL, sets to "".
+ * On failure the return is a ERR pointer (see linux/err.h).
+ * This function uses GFP_KERNEL internally.
+ */
+
struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
{
struct file *file;
@@ -424,6 +444,14 @@ static int sock_map_fd(struct socket *sock, int flags)
return PTR_ERR(newfile);
}
+/**
+ * sock_from_file - Return the &socket bounded to @file.
+ * @file: file
+ * @err: pointer to an error code return
+ *
+ * On failure returns %NULL and assigns -ENOTSOCK to @err.
+ */
+
struct socket *sock_from_file(struct file *file, int *err)
{
if (file->f_op == &socket_file_ops)
@@ -532,11 +560,11 @@ static const struct inode_operations sockfs_inode_ops = {
};
/**
- * sock_alloc - allocate a socket
+ * sock_alloc - allocate a socket
*
* Allocate a new inode and socket object. The two are bound together
* and initialised. The socket is then returned. If we are out of inodes
- * NULL is returned.
+ * NULL is returned. This functions uses GFP_KERNEL internally.
*/
struct socket *sock_alloc(void)
@@ -561,7 +589,7 @@ struct socket *sock_alloc(void)
EXPORT_SYMBOL(sock_alloc);
/**
- * sock_release - close a socket
+ * sock_release - close a socket
* @sock: socket to close
*
* The socket is released from the protocol stack if it has a release
@@ -617,22 +645,64 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
}
EXPORT_SYMBOL(__sock_tx_timestamp);
+INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
+ size_t));
static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
{
- int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
+ int ret = INDIRECT_CALL_INET4(sock->ops->sendmsg, inet_sendmsg, sock,
+ msg, msg_data_left(msg));
BUG_ON(ret == -EIOCBQUEUED);
return ret;
}
-int sock_sendmsg(struct socket *sock, struct msghdr *msg)
+static int __sock_sendmsg(struct socket *sock, struct msghdr *msg)
{
int err = security_socket_sendmsg(sock, msg,
msg_data_left(msg));
return err ?: sock_sendmsg_nosec(sock, msg);
}
+
+/**
+ * sock_sendmsg - send a message through @sock
+ * @sock: socket
+ * @msg: message to send
+ *
+ * Sends @msg through @sock, passing through LSM.
+ * Returns the number of bytes sent, or an error code.
+ */
+int sock_sendmsg(struct socket *sock, struct msghdr *msg)
+{
+ struct sockaddr_storage *save_addr = (struct sockaddr_storage *)msg->msg_name;
+ struct sockaddr_storage address;
+ int save_len = msg->msg_namelen;
+ int ret;
+
+ if (msg->msg_name) {
+ memcpy(&address, msg->msg_name, msg->msg_namelen);
+ msg->msg_name = &address;
+ }
+
+ ret = __sock_sendmsg(sock, msg);
+ msg->msg_name = save_addr;
+ msg->msg_namelen = save_len;
+
+ return ret;
+}
EXPORT_SYMBOL(sock_sendmsg);
+/**
+ * kernel_sendmsg - send a message through @sock (kernel-space)
+ * @sock: socket
+ * @msg: message header
+ * @vec: kernel vec
+ * @num: vec array length
+ * @size: total message data size
+ *
+ * Builds the message data with @vec and sends it through @sock.
+ * Returns the number of bytes sent, or an error code.
+ */
+
int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size)
{
@@ -641,6 +711,19 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
}
EXPORT_SYMBOL(kernel_sendmsg);
+/**
+ * kernel_sendmsg_locked - send a message through @sock (kernel-space)
+ * @sk: sock
+ * @msg: message header
+ * @vec: output s/g array
+ * @num: output s/g array length
+ * @size: total message data size
+ *
+ * Builds the message data with @vec and sends it through @sock.
+ * Returns the number of bytes sent, or an error code.
+ * Caller must hold @sk.
+ */
+
int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size)
{
@@ -789,12 +872,24 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
}
EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
+INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
+ size_t , int ));
static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
int flags)
{
- return sock->ops->recvmsg(sock, msg, msg_data_left(msg), flags);
+ return INDIRECT_CALL_INET4(sock->ops->recvmsg, inet_recvmsg, sock, msg,
+ msg_data_left(msg), flags);
}
+/**
+ * sock_recvmsg - receive a message from @sock
+ * @sock: socket
+ * @msg: message to receive
+ * @flags: message flags
+ *
+ * Receives @msg from @sock, passing through LSM. Returns the total number
+ * of bytes received, or an error.
+ */
int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
{
int err = security_socket_recvmsg(sock, msg, msg_data_left(msg), flags);
@@ -804,20 +899,21 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
EXPORT_SYMBOL(sock_recvmsg);
/**
- * kernel_recvmsg - Receive a message from a socket (kernel space)
- * @sock: The socket to receive the message from
- * @msg: Received message
- * @vec: Input s/g array for message data
- * @num: Size of input s/g array
- * @size: Number of bytes to read
- * @flags: Message flags (MSG_DONTWAIT, etc...)
+ * kernel_recvmsg - Receive a message from a socket (kernel space)
+ * @sock: The socket to receive the message from
+ * @msg: Received message
+ * @vec: Input s/g array for message data
+ * @num: Size of input s/g array
+ * @size: Number of bytes to read
+ * @flags: Message flags (MSG_DONTWAIT, etc...)
*
- * On return the msg structure contains the scatter/gather array passed in the
- * vec argument. The array is modified so that it consists of the unfilled
- * portion of the original array.
+ * On return the msg structure contains the scatter/gather array passed in the
+ * vec argument. The array is modified so that it consists of the unfilled
+ * portion of the original array.
*
- * The returned value is the total number of bytes received, or an error.
+ * The returned value is the total number of bytes received, or an error.
*/
+
int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size, int flags)
{
@@ -898,7 +994,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (sock->type == SOCK_SEQPACKET)
msg.msg_flags |= MSG_EOR;
- res = sock_sendmsg(sock, &msg);
+ res = __sock_sendmsg(sock, &msg);
*from = msg.msg_iter;
return res;
}
@@ -965,7 +1061,7 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
rtnl_unlock();
if (!err && copy_to_user(argp, &ifc, sizeof(struct ifconf)))
err = -EFAULT;
- } else {
+ } else if (is_socket_ioctl_cmd(cmd)) {
struct ifreq ifr;
bool need_copyout;
if (copy_from_user(&ifr, argp, sizeof(struct ifreq)))
@@ -974,6 +1070,8 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
if (!err && need_copyout)
if (copy_to_user(argp, &ifr, sizeof(struct ifreq)))
return -EFAULT;
+ } else {
+ err = -ENOTTY;
}
return err;
}
@@ -983,12 +1081,6 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
* what to do with it - that's up to the protocol still.
*/
-struct ns_common *get_net_ns(struct ns_common *ns)
-{
- return &get_net(container_of(ns, struct net, ns))->ns;
-}
-EXPORT_SYMBOL_GPL(get_net_ns);
-
static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
struct socket *sock;
@@ -1077,6 +1169,19 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
return err;
}
+/**
+ * sock_create_lite - creates a socket
+ * @family: protocol family (AF_INET, ...)
+ * @type: communication type (SOCK_STREAM, ...)
+ * @protocol: protocol (0, ...)
+ * @res: new socket
+ *
+ * Creates a new socket and assigns it to @res, passing through LSM.
+ * The new socket initialization is not complete, see kernel_accept().
+ * Returns 0 or an error. On failure @res is set to %NULL.
+ * This function internally uses GFP_KERNEL.
+ */
+
int sock_create_lite(int family, int type, int protocol, struct socket **res)
{
int err;
@@ -1202,6 +1307,21 @@ call_kill:
}
EXPORT_SYMBOL(sock_wake_async);
+/**
+ * __sock_create - creates a socket
+ * @net: net namespace
+ * @family: protocol family (AF_INET, ...)
+ * @type: communication type (SOCK_STREAM, ...)
+ * @protocol: protocol (0, ...)
+ * @res: new socket
+ * @kern: boolean for kernel space sockets
+ *
+ * Creates a new socket and assigns it to @res, passing through LSM.
+ * Returns 0 or an error. On failure @res is set to %NULL. @kern must
+ * be set to true if the socket resides in kernel space.
+ * This function internally uses GFP_KERNEL.
+ */
+
int __sock_create(struct net *net, int family, int type, int protocol,
struct socket **res, int kern)
{
@@ -1311,12 +1431,35 @@ out_release:
}
EXPORT_SYMBOL(__sock_create);
+/**
+ * sock_create - creates a socket
+ * @family: protocol family (AF_INET, ...)
+ * @type: communication type (SOCK_STREAM, ...)
+ * @protocol: protocol (0, ...)
+ * @res: new socket
+ *
+ * A wrapper around __sock_create().
+ * Returns 0 or an error. This function internally uses GFP_KERNEL.
+ */
+
int sock_create(int family, int type, int protocol, struct socket **res)
{
return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
}
EXPORT_SYMBOL(sock_create);
+/**
+ * sock_create_kern - creates a socket (kernel space)
+ * @net: net namespace
+ * @family: protocol family (AF_INET, ...)
+ * @type: communication type (SOCK_STREAM, ...)
+ * @protocol: protocol (0, ...)
+ * @res: new socket
+ *
+ * A wrapper around __sock_create().
+ * Returns 0 or an error. This function internally uses GFP_KERNEL.
+ */
+
int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
{
return __sock_create(net, family, type, protocol, res, 1);
@@ -1507,7 +1650,7 @@ int __sys_listen(int fd, int backlog)
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
- somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
+ somaxconn = READ_ONCE(sock_net(sock->sk)->core.sysctl_somaxconn);
if ((unsigned int)backlog > somaxconn)
backlog = somaxconn;
@@ -1784,7 +1927,7 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
msg.msg_flags = flags;
- err = sock_sendmsg(sock, &msg);
+ err = __sock_sendmsg(sock, &msg);
out_put:
fput_light(sock->file, fput_needed);
@@ -2112,7 +2255,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
err = sock_sendmsg_nosec(sock, msg_sys);
goto out_freectl;
}
- err = sock_sendmsg(sock, msg_sys);
+ err = __sock_sendmsg(sock, msg_sys);
/*
* If this is sendmmsg() and sending to current destination address was
* successful, remember it.
@@ -2443,7 +2586,7 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
* error to return on the next call or if the
* app asks about it using getsockopt(SO_ERROR).
*/
- sock->sk->sk_err = -err;
+ WRITE_ONCE(sock->sk->sk_err, -err);
}
out_put:
fput_light(sock->file, fput_needed);
@@ -2954,6 +3097,8 @@ static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd,
struct ifreq ifreq;
u32 data32;
+ if (!is_socket_ioctl_cmd(cmd))
+ return -ENOTTY;
if (copy_from_user(ifreq.ifr_name, u_ifreq32->ifr_name, IFNAMSIZ))
return -EFAULT;
if (get_user(data32, &u_ifreq32->ifr_data))
@@ -3273,18 +3418,50 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd,
}
#endif
+/**
+ * kernel_bind - bind an address to a socket (kernel space)
+ * @sock: socket
+ * @addr: address
+ * @addrlen: length of address
+ *
+ * Returns 0 or an error.
+ */
+
int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
{
- return sock->ops->bind(sock, addr, addrlen);
+ struct sockaddr_storage address;
+
+ memcpy(&address, addr, addrlen);
+
+ return sock->ops->bind(sock, (struct sockaddr *)&address, addrlen);
}
EXPORT_SYMBOL(kernel_bind);
+/**
+ * kernel_listen - move socket to listening state (kernel space)
+ * @sock: socket
+ * @backlog: pending connections queue size
+ *
+ * Returns 0 or an error.
+ */
+
int kernel_listen(struct socket *sock, int backlog)
{
return sock->ops->listen(sock, backlog);
}
EXPORT_SYMBOL(kernel_listen);
+/**
+ * kernel_accept - accept a connection (kernel space)
+ * @sock: listening socket
+ * @newsock: new connected socket
+ * @flags: flags
+ *
+ * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
+ * If it fails, @newsock is guaranteed to be %NULL.
+ * Returns 0 or an error.
+ */
+
int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
{
struct sock *sk = sock->sk;
@@ -3310,25 +3487,72 @@ done:
}
EXPORT_SYMBOL(kernel_accept);
+/**
+ * kernel_connect - connect a socket (kernel space)
+ * @sock: socket
+ * @addr: address
+ * @addrlen: address length
+ * @flags: flags (O_NONBLOCK, ...)
+ *
+ * For datagram sockets, @addr is the addres to which datagrams are sent
+ * by default, and the only address from which datagrams are received.
+ * For stream sockets, attempts to connect to @addr.
+ * Returns 0 or an error code.
+ */
+
int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
int flags)
{
- return sock->ops->connect(sock, addr, addrlen, flags);
+ struct sockaddr_storage address;
+
+ memcpy(&address, addr, addrlen);
+
+ return sock->ops->connect(sock, (struct sockaddr *)&address, addrlen, flags);
}
EXPORT_SYMBOL(kernel_connect);
+/**
+ * kernel_getsockname - get the address which the socket is bound (kernel space)
+ * @sock: socket
+ * @addr: address holder
+ *
+ * Fills the @addr pointer with the address which the socket is bound.
+ * Returns 0 or an error code.
+ */
+
int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
{
return sock->ops->getname(sock, addr, 0);
}
EXPORT_SYMBOL(kernel_getsockname);
+/**
+ * kernel_peername - get the address which the socket is connected (kernel space)
+ * @sock: socket
+ * @addr: address holder
+ *
+ * Fills the @addr pointer with the address which the socket is connected.
+ * Returns 0 or an error code.
+ */
+
int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
{
return sock->ops->getname(sock, addr, 1);
}
EXPORT_SYMBOL(kernel_getpeername);
+/**
+ * kernel_getsockopt - get a socket option (kernel space)
+ * @sock: socket
+ * @level: API level (SOL_SOCKET, ...)
+ * @optname: option tag
+ * @optval: option value
+ * @optlen: option length
+ *
+ * Assigns the option length to @optlen.
+ * Returns 0 or an error.
+ */
+
int kernel_getsockopt(struct socket *sock, int level, int optname,
char *optval, int *optlen)
{
@@ -3351,6 +3575,17 @@ int kernel_getsockopt(struct socket *sock, int level, int optname,
}
EXPORT_SYMBOL(kernel_getsockopt);
+/**
+ * kernel_setsockopt - set a socket option (kernel space)
+ * @sock: socket
+ * @level: API level (SOL_SOCKET, ...)
+ * @optname: option tag
+ * @optval: option value
+ * @optlen: option length
+ *
+ * Returns 0 or an error.
+ */
+
int kernel_setsockopt(struct socket *sock, int level, int optname,
char *optval, unsigned int optlen)
{
@@ -3371,6 +3606,17 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
}
EXPORT_SYMBOL(kernel_setsockopt);
+/**
+ * kernel_sendpage - send a &page through a socket (kernel space)
+ * @sock: socket
+ * @page: page
+ * @offset: page offset
+ * @size: total size in bytes
+ * @flags: flags (MSG_DONTWAIT, ...)
+ *
+ * Returns the total amount sent in bytes or an error.
+ */
+
int kernel_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags)
{
@@ -3381,6 +3627,18 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset,
}
EXPORT_SYMBOL(kernel_sendpage);
+/**
+ * kernel_sendpage_locked - send a &page through the locked sock (kernel space)
+ * @sk: sock
+ * @page: page
+ * @offset: page offset
+ * @size: total size in bytes
+ * @flags: flags (MSG_DONTWAIT, ...)
+ *
+ * Returns the total amount sent in bytes or an error.
+ * Caller must hold @sk.
+ */
+
int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags)
{
@@ -3394,17 +3652,30 @@ int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
}
EXPORT_SYMBOL(kernel_sendpage_locked);
+/**
+ * kernel_shutdown - shut down part of a full-duplex connection (kernel space)
+ * @sock: socket
+ * @how: connection part
+ *
+ * Returns 0 or an error.
+ */
+
int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
{
return sock->ops->shutdown(sock, how);
}
EXPORT_SYMBOL(kernel_sock_shutdown);
-/* This routine returns the IP overhead imposed by a socket i.e.
- * the length of the underlying IP header, depending on whether
- * this is an IPv4 or IPv6 socket and the length from IP options turned
- * on at the socket. Assumes that the caller has a lock on the socket.
+/**
+ * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
+ * @sk: socket
+ *
+ * This routine returns the IP overhead imposed by a socket i.e.
+ * the length of the underlying IP header, depending on whether
+ * this is an IPv4 or IPv6 socket and the length from IP options turned
+ * on at the socket. Assumes that the caller has a lock on the socket.
*/
+
u32 kernel_sock_ip_overhead(struct sock *sk)
{
struct inet_sock *inet;
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index 8391c2785550..7404f02702a1 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -184,7 +184,7 @@ static int rpc_parse_scope_id(struct net *net, const char *buf,
scope_id = dev->ifindex;
dev_put(dev);
} else {
- if (kstrtou32(p, 10, &scope_id) == 0) {
+ if (kstrtou32(p, 10, &scope_id) != 0) {
kfree(p);
return 0;
}
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 8cb7d812ccb8..c11e68539602 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -53,6 +53,7 @@
#include <linux/uaccess.h>
#include <linux/hashtable.h>
+#include "auth_gss_internal.h"
#include "../netns.h"
static const struct rpc_authops authgss_ops;
@@ -147,35 +148,6 @@ gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx)
clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags);
}
-static const void *
-simple_get_bytes(const void *p, const void *end, void *res, size_t len)
-{
- const void *q = (const void *)((const char *)p + len);
- if (unlikely(q > end || q < p))
- return ERR_PTR(-EFAULT);
- memcpy(res, p, len);
- return q;
-}
-
-static inline const void *
-simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
-{
- const void *q;
- unsigned int len;
-
- p = simple_get_bytes(p, end, &len, sizeof(len));
- if (IS_ERR(p))
- return p;
- q = (const void *)((const char *)p + len);
- if (unlikely(q > end || q < p))
- return ERR_PTR(-EFAULT);
- dest->data = kmemdup(p, len, GFP_NOFS);
- if (unlikely(dest->data == NULL))
- return ERR_PTR(-ENOMEM);
- dest->len = len;
- return q;
-}
-
static struct gss_cl_ctx *
gss_cred_get_ctx(struct rpc_cred *cred)
{
@@ -351,7 +323,7 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth
list_for_each_entry(pos, &pipe->in_downcall, list) {
if (!uid_eq(pos->uid, uid))
continue;
- if (auth && pos->auth->service != auth->service)
+ if (pos->auth->service != auth->service)
continue;
refcount_inc(&pos->count);
dprintk("RPC: %s found msg %p\n", __func__, pos);
@@ -705,6 +677,21 @@ out:
return err;
}
+static struct gss_upcall_msg *
+gss_find_downcall(struct rpc_pipe *pipe, kuid_t uid)
+{
+ struct gss_upcall_msg *pos;
+ list_for_each_entry(pos, &pipe->in_downcall, list) {
+ if (!uid_eq(pos->uid, uid))
+ continue;
+ if (!rpc_msg_is_inflight(&pos->msg))
+ continue;
+ refcount_inc(&pos->count);
+ return pos;
+ }
+ return NULL;
+}
+
#define MSG_BUF_MAXSIZE 1024
static ssize_t
@@ -751,7 +738,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
err = -ENOENT;
/* Find a matching upcall */
spin_lock(&pipe->lock);
- gss_msg = __gss_find_upcall(pipe, uid, NULL);
+ gss_msg = gss_find_downcall(pipe, uid);
if (gss_msg == NULL) {
spin_unlock(&pipe->lock);
goto err_put_ctx;
diff --git a/net/sunrpc/auth_gss/auth_gss_internal.h b/net/sunrpc/auth_gss/auth_gss_internal.h
new file mode 100644
index 000000000000..f6d9631bd9d0
--- /dev/null
+++ b/net/sunrpc/auth_gss/auth_gss_internal.h
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/*
+ * linux/net/sunrpc/auth_gss/auth_gss_internal.h
+ *
+ * Internal definitions for RPCSEC_GSS client authentication
+ *
+ * Copyright (c) 2000 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ */
+#include <linux/err.h>
+#include <linux/string.h>
+#include <linux/sunrpc/xdr.h>
+
+static inline const void *
+simple_get_bytes(const void *p, const void *end, void *res, size_t len)
+{
+ const void *q = (const void *)((const char *)p + len);
+ if (unlikely(q > end || q < p))
+ return ERR_PTR(-EFAULT);
+ memcpy(res, p, len);
+ return q;
+}
+
+static inline const void *
+simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest)
+{
+ const void *q;
+ unsigned int len;
+
+ p = simple_get_bytes(p, end, &len, sizeof(len));
+ if (IS_ERR(p))
+ return p;
+ q = (const void *)((const char *)p + len);
+ if (unlikely(q > end || q < p))
+ return ERR_PTR(-EFAULT);
+ if (len) {
+ dest->data = kmemdup(p, len, GFP_NOFS);
+ if (unlikely(dest->data == NULL))
+ return ERR_PTR(-ENOMEM);
+ } else
+ dest->data = NULL;
+ dest->len = len;
+ return q;
+}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 7bb2514aadd9..14f2823ad6c2 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -46,6 +46,8 @@
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/gss_krb5_enctypes.h>
+#include "auth_gss_internal.h"
+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
@@ -187,35 +189,6 @@ get_gss_krb5_enctype(int etype)
return NULL;
}
-static const void *
-simple_get_bytes(const void *p, const void *end, void *res, int len)
-{
- const void *q = (const void *)((const char *)p + len);
- if (unlikely(q > end || q < p))
- return ERR_PTR(-EFAULT);
- memcpy(res, p, len);
- return q;
-}
-
-static const void *
-simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
-{
- const void *q;
- unsigned int len;
-
- p = simple_get_bytes(p, end, &len, sizeof(len));
- if (IS_ERR(p))
- return p;
- q = (const void *)((const char *)p + len);
- if (unlikely(q > end || q < p))
- return ERR_PTR(-EFAULT);
- res->data = kmemdup(p, len, GFP_NOFS);
- if (unlikely(res->data == NULL))
- return ERR_PTR(-ENOMEM);
- res->len = len;
- return q;
-}
-
static inline const void *
get_key(const void *p, const void *end,
struct krb5_ctx *ctx, struct crypto_skcipher **res)
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 68259eec6afd..ed6b2a155f44 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1079,27 +1079,32 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp,
struct gssp_in_token *in_token)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
- unsigned int page_base, length;
- int pages, i, res;
- size_t inlen;
+ unsigned int length, pgto_offs, pgfrom_offs;
+ int pages, i, res, pgto, pgfrom;
+ size_t inlen, to_offs, from_offs;
res = gss_read_common_verf(gc, argv, authp, in_handle);
if (res)
return res;
inlen = svc_getnl(argv);
- if (inlen > (argv->iov_len + rqstp->rq_arg.page_len))
+ if (inlen > (argv->iov_len + rqstp->rq_arg.page_len)) {
+ kfree(in_handle->data);
return SVC_DENIED;
+ }
pages = DIV_ROUND_UP(inlen, PAGE_SIZE);
in_token->pages = kcalloc(pages, sizeof(struct page *), GFP_KERNEL);
- if (!in_token->pages)
+ if (!in_token->pages) {
+ kfree(in_handle->data);
return SVC_DENIED;
+ }
in_token->page_base = 0;
in_token->page_len = inlen;
for (i = 0; i < pages; i++) {
in_token->pages[i] = alloc_page(GFP_KERNEL);
if (!in_token->pages[i]) {
+ kfree(in_handle->data);
gss_free_in_token_pages(in_token);
return SVC_DENIED;
}
@@ -1109,17 +1114,24 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp,
memcpy(page_address(in_token->pages[0]), argv->iov_base, length);
inlen -= length;
- i = 1;
- page_base = rqstp->rq_arg.page_base;
+ to_offs = length;
+ from_offs = rqstp->rq_arg.page_base;
while (inlen) {
- length = min_t(unsigned int, inlen, PAGE_SIZE);
- memcpy(page_address(in_token->pages[i]),
- page_address(rqstp->rq_arg.pages[i]) + page_base,
+ pgto = to_offs >> PAGE_SHIFT;
+ pgfrom = from_offs >> PAGE_SHIFT;
+ pgto_offs = to_offs & ~PAGE_MASK;
+ pgfrom_offs = from_offs & ~PAGE_MASK;
+
+ length = min_t(unsigned int, inlen,
+ min_t(unsigned int, PAGE_SIZE - pgto_offs,
+ PAGE_SIZE - pgfrom_offs));
+ memcpy(page_address(in_token->pages[pgto]) + pgto_offs,
+ page_address(rqstp->rq_arg.pages[pgfrom]) + pgfrom_offs,
length);
+ to_offs += length;
+ from_offs += length;
inlen -= length;
- page_base = 0;
- i++;
}
return 0;
}
@@ -1759,11 +1771,14 @@ static int
svcauth_gss_release(struct svc_rqst *rqstp)
{
struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
- struct rpc_gss_wire_cred *gc = &gsd->clcred;
+ struct rpc_gss_wire_cred *gc;
struct xdr_buf *resbuf = &rqstp->rq_res;
int stat = -EINVAL;
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
+ if (!gsd)
+ goto out;
+ gc = &gsd->clcred;
if (gc->gc_proc != RPC_GSS_PROC_DATA)
goto out;
/* Release can be called twice, but we only wrap once. */
@@ -1804,10 +1819,10 @@ out_err:
if (rqstp->rq_cred.cr_group_info)
put_group_info(rqstp->rq_cred.cr_group_info);
rqstp->rq_cred.cr_group_info = NULL;
- if (gsd->rsci)
+ if (gsd && gsd->rsci) {
cache_put(&gsd->rsci->h, sn->rsc_cache);
- gsd->rsci = NULL;
-
+ gsd->rsci = NULL;
+ }
return stat;
}
@@ -1904,7 +1919,7 @@ gss_svc_init_net(struct net *net)
goto out2;
return 0;
out2:
- destroy_use_gss_proxy_proc_entry(net);
+ rsi_cache_destroy_net(net);
out1:
rsc_cache_destroy_net(net);
return rv;
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 3c15a99b9700..e41427e1740d 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -69,6 +69,17 @@ static void xprt_free_allocation(struct rpc_rqst *req)
kfree(req);
}
+static void xprt_bc_reinit_xdr_buf(struct xdr_buf *buf)
+{
+ buf->head[0].iov_len = PAGE_SIZE;
+ buf->tail[0].iov_len = 0;
+ buf->pages = NULL;
+ buf->page_len = 0;
+ buf->flags = 0;
+ buf->len = 0;
+ buf->buflen = PAGE_SIZE;
+}
+
static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags)
{
struct page *page;
@@ -291,6 +302,9 @@ void xprt_free_bc_rqst(struct rpc_rqst *req)
*/
spin_lock_bh(&xprt->bc_pa_lock);
if (xprt_need_to_requeue(xprt)) {
+ xprt_bc_reinit_xdr_buf(&req->rq_snd_buf);
+ xprt_bc_reinit_xdr_buf(&req->rq_rcv_buf);
+ req->rq_rcv_buf.len = PAGE_SIZE;
list_add_tail(&req->rq_bc_pa_list, &xprt->bc_pa_list);
xprt->bc_alloc_count++;
req = NULL;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 0d7d149b1b1b..e5498253ad93 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1267,7 +1267,7 @@ static int rpc_sockname(struct net *net, struct sockaddr *sap, size_t salen,
break;
default:
err = -EAFNOSUPPORT;
- goto out;
+ goto out_release;
}
if (err < 0) {
dprintk("RPC: can't bind UDP socket (%d)\n", err);
@@ -1814,9 +1814,6 @@ call_bind_status(struct rpc_task *task)
status = -EOPNOTSUPP;
break;
}
- if (task->tk_rebind_retry == 0)
- break;
- task->tk_rebind_retry--;
rpc_delay(task, 3*HZ);
goto retry_timeout;
case -ETIMEDOUT:
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index e2808586c9e6..9af919364a00 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -499,10 +499,20 @@ static struct rpc_task *__rpc_find_next_queued_priority(struct rpc_wait_queue *q
struct rpc_task *task;
/*
+ * Service the privileged queue.
+ */
+ q = &queue->tasks[RPC_NR_PRIORITY - 1];
+ if (queue->maxpriority > RPC_PRIORITY_PRIVILEGED && !list_empty(q)) {
+ task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
+ goto out;
+ }
+
+ /*
* Service a batch of tasks from a single owner.
*/
q = &queue->tasks[queue->priority];
- if (!list_empty(q) && --queue->nr) {
+ if (!list_empty(q) && queue->nr) {
+ queue->nr--;
task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
goto out;
}
@@ -699,7 +709,6 @@ rpc_init_task_statistics(struct rpc_task *task)
/* Initialize retry counters */
task->tk_garb_retry = 2;
task->tk_cred_retry = 2;
- task->tk_rebind_retry = 2;
/* starting timestamp */
task->tk_start = ktime_get();
@@ -883,8 +892,10 @@ int rpc_malloc(struct rpc_task *task)
struct rpc_buffer *buf;
gfp_t gfp = GFP_NOIO | __GFP_NOWARN;
+ if (RPC_IS_ASYNC(task))
+ gfp = GFP_NOWAIT | __GFP_NOWARN;
if (RPC_IS_SWAPPER(task))
- gfp = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
+ gfp |= __GFP_MEMALLOC;
size += sizeof(struct rpc_buffer);
if (size <= RPC_BUFFER_MAXSIZE)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index d65f8d35de87..6a606af8de81 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1146,6 +1146,22 @@ static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ..
extern void svc_tcp_prep_reply_hdr(struct svc_rqst *);
+__be32
+svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err)
+{
+ set_bit(RQ_AUTHERR, &rqstp->rq_flags);
+ return auth_err;
+}
+EXPORT_SYMBOL_GPL(svc_return_autherr);
+
+static __be32
+svc_get_autherr(struct svc_rqst *rqstp, __be32 *statp)
+{
+ if (test_and_clear_bit(RQ_AUTHERR, &rqstp->rq_flags))
+ return *statp;
+ return rpc_auth_ok;
+}
+
/*
* Common routine for processing the RPC request.
*/
@@ -1296,11 +1312,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
procp->pc_release(rqstp);
goto dropit;
}
- if (*statp == rpc_autherr_badcred) {
- if (procp->pc_release)
- procp->pc_release(rqstp);
- goto err_bad_auth;
- }
+ auth_stat = svc_get_autherr(rqstp, statp);
+ if (auth_stat != rpc_auth_ok)
+ goto err_release_bad_auth;
if (*statp == rpc_success && procp->pc_encode &&
!procp->pc_encode(rqstp, resv->iov_base + resv->iov_len)) {
dprintk("svc: failed to encode reply\n");
@@ -1330,7 +1344,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
sendit:
if (svc_authorise(rqstp))
- goto close;
+ goto close_xprt;
return 1; /* Caller can now send it */
dropit:
@@ -1339,6 +1353,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
return 0;
close:
+ svc_authorise(rqstp);
+close_xprt:
if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
svc_close_xprt(rqstp->rq_xprt);
dprintk("svc: svc_process close\n");
@@ -1347,7 +1363,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
err_short_len:
svc_printk(rqstp, "short len %zd, dropping request\n",
argv->iov_len);
- goto close;
+ goto close_xprt;
err_bad_rpc:
serv->sv_stats->rpcbadfmt++;
@@ -1357,6 +1373,9 @@ err_bad_rpc:
svc_putnl(resv, 2);
goto sendit;
+err_release_bad_auth:
+ if (procp->pc_release)
+ procp->pc_release(rqstp);
err_bad_auth:
dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
serv->sv_stats->rpcbadauth++;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 709c082dc905..4b56b949a463 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1052,7 +1052,7 @@ static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, st
struct svc_xprt *xprt;
int ret = 0;
- spin_lock(&serv->sv_lock);
+ spin_lock_bh(&serv->sv_lock);
list_for_each_entry(xprt, xprt_list, xpt_list) {
if (xprt->xpt_net != net)
continue;
@@ -1060,7 +1060,7 @@ static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, st
set_bit(XPT_CLOSE, &xprt->xpt_flags);
svc_xprt_enqueue(xprt);
}
- spin_unlock(&serv->sv_lock);
+ spin_unlock_bh(&serv->sv_lock);
return ret;
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d0b5a1c47a32..b5ee21d5d1f3 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -757,12 +757,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
dprintk("svc: socket %p TCP (listen) state change %d\n",
sk, sk->sk_state);
- if (svsk) {
- /* Refer to svc_setup_socket() for details. */
- rmb();
- svsk->sk_odata(sk);
- }
-
/*
* This callback may called twice when a new connection
* is established as a child socket inherits everything
@@ -771,15 +765,20 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
* when one of child sockets become ESTABLISHED.
* 2) data_ready method of the child socket may be called
* when it receives data before the socket is accepted.
- * In case of 2, we should ignore it silently.
+ * In case of 2, we should ignore it silently and DO NOT
+ * dereference svsk.
*/
- if (sk->sk_state == TCP_LISTEN) {
- if (svsk) {
- set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
- svc_xprt_enqueue(&svsk->sk_xprt);
- } else
- printk("svc: socket %p: no user data\n", sk);
- }
+ if (sk->sk_state != TCP_LISTEN)
+ return;
+
+ if (svsk) {
+ /* Refer to svc_setup_socket() for details. */
+ rmb();
+ svsk->sk_odata(sk);
+ set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);
+ svc_xprt_enqueue(&svsk->sk_xprt);
+ } else
+ printk("svc: socket %p: no user data\n", sk);
}
/*
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 540e340e2565..7459180e992b 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -544,7 +544,11 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
*/
xdr->p = (void *)p + frag2bytes;
space_left = xdr->buf->buflen - xdr->buf->len;
- xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
+ if (space_left - frag1bytes >= PAGE_SIZE)
+ xdr->end = (void *)p + PAGE_SIZE;
+ else
+ xdr->end = (void *)p + space_left - frag1bytes;
+
xdr->buf->page_len += frag2bytes;
xdr->buf->len += nbytes;
return p;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 5e7c13aa66d0..b1abf4848bbc 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -143,31 +143,64 @@ out:
}
EXPORT_SYMBOL_GPL(xprt_unregister_transport);
+static void
+xprt_class_release(const struct xprt_class *t)
+{
+ module_put(t->owner);
+}
+
+static const struct xprt_class *
+xprt_class_find_by_netid_locked(const char *netid)
+{
+ const struct xprt_class *t;
+ unsigned int i;
+
+ list_for_each_entry(t, &xprt_list, list) {
+ for (i = 0; t->netid[i][0] != '\0'; i++) {
+ if (strcmp(t->netid[i], netid) != 0)
+ continue;
+ if (!try_module_get(t->owner))
+ continue;
+ return t;
+ }
+ }
+ return NULL;
+}
+
+static const struct xprt_class *
+xprt_class_find_by_netid(const char *netid)
+{
+ const struct xprt_class *t;
+
+ spin_lock(&xprt_list_lock);
+ t = xprt_class_find_by_netid_locked(netid);
+ if (!t) {
+ spin_unlock(&xprt_list_lock);
+ request_module("rpc%s", netid);
+ spin_lock(&xprt_list_lock);
+ t = xprt_class_find_by_netid_locked(netid);
+ }
+ spin_unlock(&xprt_list_lock);
+ return t;
+}
+
/**
* xprt_load_transport - load a transport implementation
- * @transport_name: transport to load
+ * @netid: transport to load
*
* Returns:
* 0: transport successfully loaded
* -ENOENT: transport module not available
*/
-int xprt_load_transport(const char *transport_name)
+int xprt_load_transport(const char *netid)
{
- struct xprt_class *t;
- int result;
+ const struct xprt_class *t;
- result = 0;
- spin_lock(&xprt_list_lock);
- list_for_each_entry(t, &xprt_list, list) {
- if (strcmp(t->name, transport_name) == 0) {
- spin_unlock(&xprt_list_lock);
- goto out;
- }
- }
- spin_unlock(&xprt_list_lock);
- result = request_module("xprt%s", transport_name);
-out:
- return result;
+ t = xprt_class_find_by_netid(netid);
+ if (!t)
+ return -ENOENT;
+ xprt_class_release(t);
+ return 0;
}
EXPORT_SYMBOL_GPL(xprt_load_transport);
@@ -1512,7 +1545,14 @@ static void xprt_destroy(struct rpc_xprt *xprt)
*/
wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE);
+ /*
+ * xprt_schedule_autodisconnect() can run after XPRT_LOCKED
+ * is cleared. We use ->transport_lock to ensure the mod_timer()
+ * can only run *before* del_time_sync(), never after.
+ */
+ spin_lock_bh(&xprt->transport_lock);
del_timer_sync(&xprt->timer);
+ spin_unlock_bh(&xprt->transport_lock);
/*
* Destroy sockets etc from the system workqueue so they can
diff --git a/net/sunrpc/xprtrdma/module.c b/net/sunrpc/xprtrdma/module.c
index 620327c01302..45c5b41ac8dc 100644
--- a/net/sunrpc/xprtrdma/module.c
+++ b/net/sunrpc/xprtrdma/module.c
@@ -24,6 +24,7 @@ MODULE_DESCRIPTION("RPC/RDMA Transport");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_ALIAS("svcrdma");
MODULE_ALIAS("xprtrdma");
+MODULE_ALIAS("rpcrdma6");
static void __exit rpc_rdma_cleanup(void)
{
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index f2eaf264726b..7f9d8365c932 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -72,7 +72,7 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs)
/* Maximum Read list size */
maxsegs += 2; /* segment for head and tail buffers */
- size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
+ size += maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
/* Minimal Read chunk size */
size += sizeof(__be32); /* segment count */
@@ -98,7 +98,7 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs)
/* Maximum Write list size */
maxsegs += 2; /* segment for head and tail buffers */
- size = sizeof(__be32); /* segment count */
+ size += sizeof(__be32); /* segment count */
size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
size += sizeof(__be32); /* list discriminator */
@@ -980,6 +980,7 @@ static bool
rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
{
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
struct xdr_stream *xdr = &rep->rr_stream;
__be32 *p;
@@ -1003,6 +1004,10 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
if (*p != cpu_to_be32(RPC_CALL))
return false;
+ /* No bc service. */
+ if (xprt->bc_serv == NULL)
+ return false;
+
/* Now that we are sure this is a backchannel call,
* advance to the RPC header.
*/
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index d183d4aee822..cf2272a90f13 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -308,9 +308,9 @@ xprt_setup_rdma_bc(struct xprt_create *args)
xprt->timeout = &xprt_rdma_bc_timeout;
xprt_set_bound(xprt);
xprt_set_connected(xprt);
- xprt->bind_timeout = RPCRDMA_BIND_TO;
- xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
- xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
+ xprt->bind_timeout = 0;
+ xprt->reestablish_timeout = 0;
+ xprt->idle_timeout = 0;
xprt->prot = XPRT_TRANSPORT_BC_RDMA;
xprt->tsh_size = 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index aa4d19a780d7..4062cd624b26 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -639,10 +639,11 @@ static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma,
while (remaining) {
len = min_t(u32, PAGE_SIZE - pageoff, remaining);
- memcpy(dst, page_address(*ppages), len);
+ memcpy(dst, page_address(*ppages) + pageoff, len);
remaining -= len;
dst += len;
pageoff = 0;
+ ppages++;
}
}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index f56f36b4d742..e87a79be7ef0 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -665,8 +665,10 @@ xprt_rdma_allocate(struct rpc_task *task)
gfp_t flags;
flags = RPCRDMA_DEF_GFP;
+ if (RPC_IS_ASYNC(task))
+ flags = GFP_NOWAIT | __GFP_NOWARN;
if (RPC_IS_SWAPPER(task))
- flags = __GFP_MEMALLOC | GFP_NOWAIT | __GFP_NOWARN;
+ flags |= __GFP_MEMALLOC;
if (!rpcrdma_get_sendbuf(r_xprt, req, rqst->rq_callsize, flags))
goto out_fail;
@@ -854,6 +856,7 @@ static struct xprt_class xprt_rdma = {
.owner = THIS_MODULE,
.ident = XPRT_TRANSPORT_RDMA,
.setup = xprt_setup_rdma,
+ .netid = { "rdma", "rdma6", "" },
};
void xprt_rdma_cleanup(void)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9dc059dea689..a0a82d9a5900 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2974,9 +2974,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
}
xprt_set_bound(xprt);
xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
- ret = ERR_PTR(xs_local_setup_socket(transport));
- if (ret)
- goto out_err;
break;
default:
ret = ERR_PTR(-EAFNOSUPPORT);
@@ -3241,6 +3238,7 @@ static struct xprt_class xs_local_transport = {
.owner = THIS_MODULE,
.ident = XPRT_TRANSPORT_LOCAL,
.setup = xs_setup_local,
+ .netid = { "" },
};
static struct xprt_class xs_udp_transport = {
@@ -3249,6 +3247,7 @@ static struct xprt_class xs_udp_transport = {
.owner = THIS_MODULE,
.ident = XPRT_TRANSPORT_UDP,
.setup = xs_setup_udp,
+ .netid = { "udp", "udp6", "" },
};
static struct xprt_class xs_tcp_transport = {
@@ -3257,6 +3256,7 @@ static struct xprt_class xs_tcp_transport = {
.owner = THIS_MODULE,
.ident = XPRT_TRANSPORT_TCP,
.setup = xs_setup_tcp,
+ .netid = { "tcp", "tcp6", "" },
};
static struct xprt_class xs_bc_tcp_transport = {
@@ -3265,6 +3265,7 @@ static struct xprt_class xs_bc_tcp_transport = {
.owner = THIS_MODULE,
.ident = XPRT_TRANSPORT_BC_TCP,
.setup = xs_setup_bc_tcp,
+ .netid = { "" },
};
/**
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 2649a0a0d45e..4353968bc5a5 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -231,7 +231,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
*/
static int tipc_enable_bearer(struct net *net, const char *name,
u32 disc_domain, u32 prio,
- struct nlattr *attr[])
+ struct nlattr *attr[],
+ struct netlink_ext_ack *extack)
{
struct tipc_net *tn = tipc_net(net);
struct tipc_bearer_names b_names;
@@ -242,20 +243,23 @@ static int tipc_enable_bearer(struct net *net, const char *name,
int bearer_id = 0;
int res = -EINVAL;
char *errstr = "";
+ u32 i;
if (!bearer_name_validate(name, &b_names)) {
- errstr = "illegal name";
- goto rejected;
+ NL_SET_ERR_MSG(extack, "Illegal name");
+ return res;
}
if (prio > TIPC_MAX_LINK_PRI && prio != TIPC_MEDIA_LINK_PRI) {
errstr = "illegal priority";
+ NL_SET_ERR_MSG(extack, "Illegal priority");
goto rejected;
}
m = tipc_media_find(b_names.media_name);
if (!m) {
errstr = "media not registered";
+ NL_SET_ERR_MSG(extack, "Media not registered");
goto rejected;
}
@@ -263,33 +267,43 @@ static int tipc_enable_bearer(struct net *net, const char *name,
prio = m->priority;
/* Check new bearer vs existing ones and find free bearer id if any */
- while (bearer_id < MAX_BEARERS) {
- b = rtnl_dereference(tn->bearer_list[bearer_id]);
- if (!b)
- break;
+ bearer_id = MAX_BEARERS;
+ i = MAX_BEARERS;
+ while (i-- != 0) {
+ b = rtnl_dereference(tn->bearer_list[i]);
+ if (!b) {
+ bearer_id = i;
+ continue;
+ }
if (!strcmp(name, b->name)) {
errstr = "already enabled";
+ NL_SET_ERR_MSG(extack, "Already enabled");
goto rejected;
}
- bearer_id++;
- if (b->priority != prio)
- continue;
- if (++with_this_prio <= 2)
- continue;
- pr_warn("Bearer <%s>: already 2 bearers with priority %u\n",
- name, prio);
- if (prio == TIPC_MIN_LINK_PRI) {
- errstr = "cannot adjust to lower";
- goto rejected;
+
+ if (b->priority == prio &&
+ (++with_this_prio > 2)) {
+ pr_warn("Bearer <%s>: already 2 bearers with priority %u\n",
+ name, prio);
+
+ if (prio == TIPC_MIN_LINK_PRI) {
+ errstr = "cannot adjust to lower";
+ NL_SET_ERR_MSG(extack, "Cannot adjust to lower");
+ goto rejected;
+ }
+
+ pr_warn("Bearer <%s>: trying with adjusted priority\n",
+ name);
+ prio--;
+ bearer_id = MAX_BEARERS;
+ i = MAX_BEARERS;
+ with_this_prio = 1;
}
- pr_warn("Bearer <%s>: trying with adjusted priority\n", name);
- prio--;
- bearer_id = 0;
- with_this_prio = 1;
}
if (bearer_id >= MAX_BEARERS) {
errstr = "max 3 bearers permitted";
+ NL_SET_ERR_MSG(extack, "Max 3 bearers permitted");
goto rejected;
}
@@ -303,6 +317,7 @@ static int tipc_enable_bearer(struct net *net, const char *name,
if (res) {
kfree(b);
errstr = "failed to enable media";
+ NL_SET_ERR_MSG(extack, "Failed to enable media");
goto rejected;
}
@@ -318,6 +333,7 @@ static int tipc_enable_bearer(struct net *net, const char *name,
if (res) {
bearer_disable(net, b);
errstr = "failed to create discoverer";
+ NL_SET_ERR_MSG(extack, "Failed to create discoverer");
goto rejected;
}
@@ -795,6 +811,7 @@ int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info)
bearer = tipc_bearer_find(net, name);
if (!bearer) {
err = -EINVAL;
+ NL_SET_ERR_MSG(info->extack, "Bearer not found");
goto err_out;
}
@@ -834,8 +851,10 @@ int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
bearer = tipc_bearer_find(net, name);
- if (!bearer)
+ if (!bearer) {
+ NL_SET_ERR_MSG(info->extack, "Bearer not found");
return -EINVAL;
+ }
bearer_disable(net, bearer);
@@ -893,7 +912,8 @@ int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
}
- return tipc_enable_bearer(net, bearer, domain, prio, attrs);
+ return tipc_enable_bearer(net, bearer, domain, prio, attrs,
+ info->extack);
}
int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
@@ -932,6 +952,7 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
b = tipc_bearer_find(net, name);
if (!b) {
rtnl_unlock();
+ NL_SET_ERR_MSG(info->extack, "Bearer not found");
return -EINVAL;
}
@@ -972,8 +993,10 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
b = tipc_bearer_find(net, name);
- if (!b)
+ if (!b) {
+ NL_SET_ERR_MSG(info->extack, "Bearer not found");
return -EINVAL;
+ }
if (attrs[TIPC_NLA_BEARER_PROP]) {
struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
@@ -992,12 +1015,18 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
if (props[TIPC_NLA_PROP_WIN])
b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
if (props[TIPC_NLA_PROP_MTU]) {
- if (b->media->type_id != TIPC_MEDIA_TYPE_UDP)
+ if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) {
+ NL_SET_ERR_MSG(info->extack,
+ "MTU property is unsupported");
return -EINVAL;
+ }
#ifdef CONFIG_TIPC_MEDIA_UDP
if (tipc_udp_mtu_bad(nla_get_u32
- (props[TIPC_NLA_PROP_MTU])))
+ (props[TIPC_NLA_PROP_MTU]))) {
+ NL_SET_ERR_MSG(info->extack,
+ "MTU value is out-of-range");
return -EINVAL;
+ }
b->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]);
tipc_node_apply_property(net, b, TIPC_NLA_PROP_MTU);
#endif
@@ -1099,7 +1128,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info)
struct tipc_nl_msg msg;
struct tipc_media *media;
struct sk_buff *rep;
- struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+ struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1];
if (!info->attrs[TIPC_NLA_MEDIA])
return -EINVAL;
@@ -1125,6 +1154,7 @@ int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info)
rtnl_lock();
media = tipc_media_find(name);
if (!media) {
+ NL_SET_ERR_MSG(info->extack, "Media not found");
err = -EINVAL;
goto err_out;
}
@@ -1147,7 +1177,7 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
int err;
char *name;
struct tipc_media *m;
- struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+ struct nlattr *attrs[TIPC_NLA_MEDIA_MAX + 1];
if (!info->attrs[TIPC_NLA_MEDIA])
return -EINVAL;
@@ -1161,9 +1191,10 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]);
m = tipc_media_find(name);
- if (!m)
+ if (!m) {
+ NL_SET_ERR_MSG(info->extack, "Media not found");
return -EINVAL;
-
+ }
if (attrs[TIPC_NLA_MEDIA_PROP]) {
struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
@@ -1179,12 +1210,18 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
if (props[TIPC_NLA_PROP_WIN])
m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
if (props[TIPC_NLA_PROP_MTU]) {
- if (m->type_id != TIPC_MEDIA_TYPE_UDP)
+ if (m->type_id != TIPC_MEDIA_TYPE_UDP) {
+ NL_SET_ERR_MSG(info->extack,
+ "MTU property is unsupported");
return -EINVAL;
+ }
#ifdef CONFIG_TIPC_MEDIA_UDP
if (tipc_udp_mtu_bad(nla_get_u32
- (props[TIPC_NLA_PROP_MTU])))
+ (props[TIPC_NLA_PROP_MTU]))) {
+ NL_SET_ERR_MSG(info->extack,
+ "MTU value is out-of-range");
return -EINVAL;
+ }
m->mtu = nla_get_u32(props[TIPC_NLA_PROP_MTU]);
#endif
}
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 49bb9fc0aa06..ce0f067d0faf 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -93,6 +93,11 @@ out_sk_rht:
static void __net_exit tipc_exit_net(struct net *net)
{
tipc_net_stop(net);
+
+ /* Make sure the tipc_net_finalize_work stopped
+ * before releasing the resources.
+ */
+ flush_scheduled_work();
tipc_bcast_stop(net);
tipc_nametbl_stop(net);
tipc_sk_rht_destroy(net);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index c138d68e8a69..0436c8f2967d 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -146,8 +146,8 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
{
struct net *net = d->net;
struct tipc_net *tn = tipc_net(net);
- bool trial = time_before(jiffies, tn->addr_trial_end);
u32 self = tipc_own_addr(net);
+ bool trial = time_before(jiffies, tn->addr_trial_end) && !self;
if (mtyp == DSC_TRIAL_FAIL_MSG) {
if (!trial)
@@ -208,7 +208,10 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
u32 self;
int err;
- skb_linearize(skb);
+ if (skb_linearize(skb)) {
+ kfree_skb(skb);
+ return;
+ }
hdr = buf_msg(skb);
if (caps & TIPC_NODE_ID128)
diff --git a/net/tipc/link.c b/net/tipc/link.c
index f756b721f93e..ee4aca974622 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -914,9 +914,7 @@ void tipc_link_reset(struct tipc_link *l)
int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
struct sk_buff_head *xmitq)
{
- struct tipc_msg *hdr = buf_msg(skb_peek(list));
unsigned int maxwin = l->window;
- int imp = msg_importance(hdr);
unsigned int mtu = l->mtu;
u16 ack = l->rcv_nxt - 1;
u16 seqno = l->snd_nxt;
@@ -925,13 +923,20 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
struct sk_buff_head *backlogq = &l->backlogq;
struct sk_buff *skb, *_skb, **tskb;
int pkt_cnt = skb_queue_len(list);
+ struct tipc_msg *hdr;
int rc = 0;
+ int imp;
+
+ if (pkt_cnt <= 0)
+ return 0;
+ hdr = buf_msg(skb_peek(list));
if (unlikely(msg_size(hdr) > mtu)) {
__skb_queue_purge(list);
return -EMSGSIZE;
}
+ imp = msg_importance(hdr);
/* Allow oversubscription of one data msg per source at congestion */
if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) {
if (imp == TIPC_SYSTEM_IMPORTANCE) {
@@ -1574,20 +1579,25 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
u16 peers_tol = msg_link_tolerance(hdr);
u16 peers_prio = msg_linkprio(hdr);
u16 rcv_nxt = l->rcv_nxt;
- u16 dlen = msg_data_sz(hdr);
+ u32 dlen = msg_data_sz(hdr);
int mtyp = msg_type(hdr);
bool reply = msg_probe(hdr);
void *data;
char *if_name;
int rc = 0;
+ if (dlen > U16_MAX)
+ goto exit;
+
if (tipc_link_is_blocked(l) || !xmitq)
goto exit;
if (tipc_own_addr(l->net) > msg_prevnode(hdr))
l->net_plane = msg_net_plane(hdr);
- skb_linearize(skb);
+ if (skb_linearize(skb))
+ goto exit;
+
hdr = buf_msg(skb);
data = msg_data(hdr);
diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c
index 23706ee16607..0268857a3cfe 100644
--- a/net/tipc/monitor.c
+++ b/net/tipc/monitor.c
@@ -130,7 +130,7 @@ static void map_set(u64 *up_map, int i, unsigned int v)
static int map_get(u64 up_map, int i)
{
- return (up_map & (1 << i)) >> i;
+ return (up_map & (1ULL << i)) >> i;
}
static struct tipc_peer *peer_prev(struct tipc_peer *peer)
@@ -457,6 +457,8 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr,
state->probing = false;
/* Sanity check received domain record */
+ if (new_member_cnt > MAX_MON_DOMAIN)
+ return;
if (dlen < dom_rec_len(arrv_dom, 0))
return;
if (dlen != dom_rec_len(arrv_dom, new_member_cnt))
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index b078b77620f1..0ac270444974 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -140,19 +140,14 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
if (fragid == FIRST_FRAGMENT) {
if (unlikely(head))
goto err;
+ *buf = NULL;
+ if (skb_has_frag_list(frag) && __skb_linearize(frag))
+ goto err;
frag = skb_unshare(frag, GFP_ATOMIC);
if (unlikely(!frag))
goto err;
head = *headbuf = frag;
- *buf = NULL;
TIPC_SKB_CB(head)->tail = NULL;
- if (skb_is_nonlinear(head)) {
- skb_walk_frags(head, tail) {
- TIPC_SKB_CB(head)->tail = tail;
- }
- } else {
- skb_frag_list_init(head);
- }
return 0;
}
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 836e629e8f4a..661bc2551a0a 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -290,7 +290,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i,
pr_warn_ratelimited("Failed to remove binding %u,%u from %x\n",
type, lower, node);
} else {
- pr_warn("Unrecognized name table message received\n");
+ pr_warn_ratelimited("Unknown name table message received\n");
}
return false;
}
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 89993afe0fbd..059ffb8b466a 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -812,7 +812,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg,
list_for_each_entry(p, &sr->all_publ, all_publ)
if (p->key == *last_key)
break;
- if (p->key != *last_key)
+ if (list_entry_is_head(p, &sr->all_publ, all_publ))
return -EPIPE;
} else {
p = list_first_entry(&sr->all_publ,
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 9b36163d951e..bf11d57ef3ae 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -87,7 +87,7 @@ const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
[TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC },
- [TIPC_NLA_LINK_NAME] = { .type = NLA_STRING,
+ [TIPC_NLA_LINK_NAME] = { .type = NLA_NUL_STRING,
.len = TIPC_MAX_LINK_NAME },
[TIPC_NLA_LINK_MTU] = { .type = NLA_U32 },
[TIPC_NLA_LINK_BROADCAST] = { .type = NLA_FLAG },
@@ -116,7 +116,7 @@ const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
const struct nla_policy tipc_nl_bearer_policy[TIPC_NLA_BEARER_MAX + 1] = {
[TIPC_NLA_BEARER_UNSPEC] = { .type = NLA_UNSPEC },
- [TIPC_NLA_BEARER_NAME] = { .type = NLA_STRING,
+ [TIPC_NLA_BEARER_NAME] = { .type = NLA_NUL_STRING,
.len = TIPC_MAX_BEARER_NAME },
[TIPC_NLA_BEARER_PROP] = { .type = NLA_NESTED },
[TIPC_NLA_BEARER_DOMAIN] = { .type = NLA_U32 }
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index f8e111218a0e..2276a0704a63 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -101,6 +101,7 @@ static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len)
return -EMSGSIZE;
skb_put(skb, TLV_SPACE(len));
+ memset(tlv, 0, TLV_SPACE(len));
tlv->tlv_type = htons(type);
tlv->tlv_len = htons(TLV_LENGTH(len));
if (len && data)
@@ -671,7 +672,7 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg,
if (err)
return err;
- link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]);
+ link_info.dest = htonl(nla_get_flag(link[TIPC_NLA_LINK_DEST]));
link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP]));
nla_strlcpy(link_info.str, link[TIPC_NLA_LINK_NAME],
TIPC_MAX_LINK_NAME);
@@ -865,7 +866,7 @@ static int tipc_nl_compat_name_table_dump_header(struct tipc_nl_compat_msg *msg)
};
ntq = (struct tipc_name_table_query *)TLV_DATA(msg->req);
- if (TLV_GET_DATA_LEN(msg->req) < sizeof(struct tipc_name_table_query))
+ if (TLV_GET_DATA_LEN(msg->req) < (int)sizeof(struct tipc_name_table_query))
return -EINVAL;
depth = ntohl(ntq->depth);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 16e2af3a00cc..c83eaa718369 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -388,7 +388,7 @@ static int tipc_sk_sock_err(struct socket *sock, long *timeout)
rc_ = tipc_sk_sock_err((sock_), timeo_); \
if (rc_) \
break; \
- prepare_to_wait(sk_sleep(sk_), &wait_, TASK_INTERRUPTIBLE); \
+ add_wait_queue(sk_sleep(sk_), &wait_); \
release_sock(sk_); \
*(timeo_) = wait_woken(&wait_, TASK_INTERRUPTIBLE, *(timeo_)); \
sched_annotate_sleep(); \
@@ -453,6 +453,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
sock_init_data(sock, sk);
tipc_set_sk_state(sk, TIPC_OPEN);
if (tipc_sk_insert(tsk)) {
+ sk_free(sk);
pr_warn("Socket create failed; port number exhausted\n");
return -EINVAL;
}
@@ -1187,6 +1188,9 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
spin_lock_bh(&inputq->lock);
if (skb_peek(arrvq) == skb) {
skb_queue_splice_tail_init(&tmpq, inputq);
+ /* Decrease the skb's refcnt as increasing in the
+ * function tipc_skb_peek
+ */
kfree_skb(__skb_dequeue(arrvq));
}
spin_unlock_bh(&inputq->lock);
@@ -1713,6 +1717,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
bool connected = !tipc_sk_type_connectionless(sk);
struct tipc_sock *tsk = tipc_sk(sk);
int rc, err, hlen, dlen, copy;
+ struct tipc_skb_cb *skb_cb;
struct sk_buff_head xmitq;
struct tipc_msg *hdr;
struct sk_buff *skb;
@@ -1736,6 +1741,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
if (unlikely(rc))
goto exit;
skb = skb_peek(&sk->sk_receive_queue);
+ skb_cb = TIPC_SKB_CB(skb);
hdr = buf_msg(skb);
dlen = msg_data_sz(hdr);
hlen = msg_hdr_sz(hdr);
@@ -1755,18 +1761,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
/* Capture data if non-error msg, otherwise just set return value */
if (likely(!err)) {
- copy = min_t(int, dlen, buflen);
- if (unlikely(copy != dlen))
- m->msg_flags |= MSG_TRUNC;
- rc = skb_copy_datagram_msg(skb, hlen, m, copy);
+ int offset = skb_cb->bytes_read;
+
+ copy = min_t(int, dlen - offset, buflen);
+ rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy);
+ if (unlikely(rc))
+ goto exit;
+ if (unlikely(offset + copy < dlen)) {
+ if (flags & MSG_EOR) {
+ if (!(flags & MSG_PEEK))
+ skb_cb->bytes_read = offset + copy;
+ } else {
+ m->msg_flags |= MSG_TRUNC;
+ skb_cb->bytes_read = 0;
+ }
+ } else {
+ if (flags & MSG_EOR)
+ m->msg_flags |= MSG_EOR;
+ skb_cb->bytes_read = 0;
+ }
} else {
copy = 0;
rc = 0;
- if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control)
+ if (err != TIPC_CONN_SHUTDOWN && connected && !m->msg_control) {
rc = -ECONNRESET;
+ goto exit;
+ }
}
- if (unlikely(rc))
- goto exit;
/* Mark message as group event if applicable */
if (unlikely(grp_evt)) {
@@ -1789,6 +1810,9 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m,
tipc_node_distr_xmit(sock_net(sk), &xmitq);
}
+ if (skb_cb->bytes_read)
+ goto exit;
+
tsk_advance_rx_queue(sk);
if (likely(!connected))
@@ -2200,7 +2224,7 @@ static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
u32 dport, struct sk_buff_head *xmitq)
{
- unsigned long time_limit = jiffies + 2;
+ unsigned long time_limit = jiffies + usecs_to_jiffies(20000);
struct sk_buff *skb;
unsigned int lim;
atomic_t *dcnt;
@@ -2437,7 +2461,7 @@ static int tipc_listen(struct socket *sock, int len)
static int tipc_wait_for_accept(struct socket *sock, long timeo)
{
struct sock *sk = sock->sk;
- DEFINE_WAIT(wait);
+ DEFINE_WAIT_FUNC(wait, woken_wake_function);
int err;
/* True wake-one mechanism for incoming connections: only
@@ -2446,12 +2470,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
* anymore, the common case will execute the loop only once.
*/
for (;;) {
- prepare_to_wait_exclusive(sk_sleep(sk), &wait,
- TASK_INTERRUPTIBLE);
if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
+ add_wait_queue(sk_sleep(sk), &wait);
release_sock(sk);
- timeo = schedule_timeout(timeo);
+ timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo);
lock_sock(sk);
+ remove_wait_queue(sk_sleep(sk), &wait);
}
err = 0;
if (!skb_queue_empty(&sk->sk_receive_queue))
@@ -2463,7 +2487,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
if (signal_pending(current))
break;
}
- finish_wait(sk_sleep(sk), &wait);
return err;
}
@@ -3465,7 +3488,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb,
if (p->key == *last_publ)
break;
}
- if (p->key != *last_publ) {
+ if (list_entry_is_head(p, &tsk->publications, binding_sock)) {
/* We never set seq or call nl_dump_check_consistent()
* this means that setting prev_seq here will cause the
* consistence check to fail in the netlink callback
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index ec9a7137d267..d3bb19cd0ec0 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -184,7 +184,7 @@ static void tipc_conn_close(struct tipc_conn *con)
conn_put(con);
}
-static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s)
+static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s, struct socket *sock)
{
struct tipc_conn *con;
int ret;
@@ -210,10 +210,12 @@ static struct tipc_conn *tipc_conn_alloc(struct tipc_topsrv *s)
}
con->conid = ret;
s->idr_in_use++;
- spin_unlock_bh(&s->idr_lock);
set_bit(CF_CONNECTED, &con->flags);
con->server = s;
+ con->sock = sock;
+ conn_get(con);
+ spin_unlock_bh(&s->idr_lock);
return con;
}
@@ -457,17 +459,24 @@ static void tipc_conn_data_ready(struct sock *sk)
static void tipc_topsrv_accept(struct work_struct *work)
{
struct tipc_topsrv *srv = container_of(work, struct tipc_topsrv, awork);
- struct socket *lsock = srv->listener;
- struct socket *newsock;
+ struct socket *newsock, *lsock;
struct tipc_conn *con;
struct sock *newsk;
int ret;
+ spin_lock_bh(&srv->idr_lock);
+ if (!srv->listener) {
+ spin_unlock_bh(&srv->idr_lock);
+ return;
+ }
+ lsock = srv->listener;
+ spin_unlock_bh(&srv->idr_lock);
+
while (1) {
ret = kernel_accept(lsock, &newsock, O_NONBLOCK);
if (ret < 0)
return;
- con = tipc_conn_alloc(srv);
+ con = tipc_conn_alloc(srv, newsock);
if (IS_ERR(con)) {
ret = PTR_ERR(con);
sock_release(newsock);
@@ -479,11 +488,11 @@ static void tipc_topsrv_accept(struct work_struct *work)
newsk->sk_data_ready = tipc_conn_data_ready;
newsk->sk_write_space = tipc_conn_write_space;
newsk->sk_user_data = con;
- con->sock = newsock;
write_unlock_bh(&newsk->sk_callback_lock);
/* Wake up receive process in case of 'SYN+' message */
newsk->sk_data_ready(newsk);
+ conn_put(con);
}
}
@@ -496,7 +505,7 @@ static void tipc_topsrv_listener_data_ready(struct sock *sk)
read_lock_bh(&sk->sk_callback_lock);
srv = sk->sk_user_data;
- if (srv->listener)
+ if (srv)
queue_work(srv->rcv_wq, &srv->awork);
read_unlock_bh(&sk->sk_callback_lock);
}
@@ -575,19 +584,19 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
sub.seq.upper = upper;
sub.timeout = TIPC_WAIT_FOREVER;
sub.filter = filter;
- *(u32 *)&sub.usr_handle = port;
+ *(u64 *)&sub.usr_handle = (u64)port;
- con = tipc_conn_alloc(tipc_topsrv(net));
+ con = tipc_conn_alloc(tipc_topsrv(net), NULL);
if (IS_ERR(con))
return false;
*conid = con->conid;
- con->sock = NULL;
rc = tipc_conn_rcv_sub(tipc_topsrv(net), con, &sub);
- if (rc >= 0)
- return true;
+ if (rc)
+ conn_put(con);
+
conn_put(con);
- return false;
+ return !rc;
}
void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
@@ -671,12 +680,18 @@ static int tipc_topsrv_start(struct net *net)
ret = tipc_topsrv_work_start(srv);
if (ret < 0)
- return ret;
+ goto err_start;
ret = tipc_topsrv_create_listener(srv);
if (ret < 0)
- tipc_topsrv_work_stop(srv);
+ goto err_create;
+
+ return 0;
+err_create:
+ tipc_topsrv_work_stop(srv);
+err_start:
+ kfree(srv);
return ret;
}
@@ -700,8 +715,9 @@ static void tipc_topsrv_stop(struct net *net)
__module_get(lsock->sk->sk_prot_creator->owner);
srv->listener = NULL;
spin_unlock_bh(&srv->idr_lock);
- sock_release(lsock);
+
tipc_topsrv_work_stop(srv);
+ sock_release(lsock);
idr_destroy(&srv->conn_idr);
kfree(srv);
}
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 575d62130578..b290eb3ae155 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -110,13 +110,16 @@ static void tls_device_queue_ctx_destruction(struct tls_context *ctx)
unsigned long flags;
spin_lock_irqsave(&tls_device_lock, flags);
+ if (unlikely(!refcount_dec_and_test(&ctx->refcount)))
+ goto unlock;
+
list_move_tail(&ctx->list, &tls_device_gc_list);
/* schedule_work inside the spinlock
* to make sure tls_device_down waits for that work.
*/
schedule_work(&tls_device_gc_work);
-
+unlock:
spin_unlock_irqrestore(&tls_device_lock, flags);
}
@@ -214,8 +217,7 @@ void tls_device_sk_destruct(struct sock *sk)
clean_acked_data_disable(inet_csk(sk));
}
- if (refcount_dec_and_test(&tls_ctx->refcount))
- tls_device_queue_ctx_destruction(tls_ctx);
+ tls_device_queue_ctx_destruction(tls_ctx);
}
EXPORT_SYMBOL(tls_device_sk_destruct);
@@ -351,13 +353,13 @@ static int tls_push_data(struct sock *sk,
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST;
- int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE);
struct tls_record_info *record = ctx->open_record;
struct page_frag *pfrag;
size_t orig_size = size;
u32 max_open_record_len;
- int copy, rc = 0;
+ bool more = false;
bool done = false;
+ int copy, rc = 0;
long timeo;
if (flags &
@@ -422,9 +424,8 @@ handle_error:
if (!size) {
last_record:
tls_push_record_flags = flags;
- if (more) {
- tls_ctx->pending_open_record_frags =
- record->num_frags;
+ if (flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE)) {
+ more = true;
break;
}
@@ -445,6 +446,8 @@ last_record:
}
} while (!done);
+ tls_ctx->pending_open_record_frags = more;
+
if (orig_size - size > 0)
rc = orig_size - size;
@@ -954,6 +957,8 @@ void tls_device_offload_cleanup_rx(struct sock *sk)
if (tls_ctx->tx_conf != TLS_HW) {
dev_put(netdev);
tls_ctx->netdev = NULL;
+ } else {
+ set_bit(TLS_RX_DEV_CLOSED, &tls_ctx->flags);
}
out:
up_read(&device_offload_lock);
@@ -983,7 +988,8 @@ static int tls_device_down(struct net_device *netdev)
if (ctx->tx_conf == TLS_HW)
netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
TLS_OFFLOAD_CTX_DIR_TX);
- if (ctx->rx_conf == TLS_HW)
+ if (ctx->rx_conf == TLS_HW &&
+ !test_bit(TLS_RX_DEV_CLOSED, &ctx->flags))
netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
TLS_OFFLOAD_CTX_DIR_RX);
WRITE_ONCE(ctx->netdev, NULL);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index bbb2da70e870..7d761244a360 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -630,6 +630,12 @@ static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
return NULL;
}
+ if (!skb_queue_empty(&sk->sk_receive_queue)) {
+ __strp_unpause(&ctx->strp);
+ if (ctx->recv_pkt)
+ return ctx->recv_pkt;
+ }
+
if (sk->sk_shutdown & RCV_SHUTDOWN)
return NULL;
diff --git a/net/unix/Kconfig b/net/unix/Kconfig
index 8b31ab85d050..3b9e450656a4 100644
--- a/net/unix/Kconfig
+++ b/net/unix/Kconfig
@@ -19,6 +19,11 @@ config UNIX
Say Y unless you know what you are doing.
+config UNIX_SCM
+ bool
+ depends on UNIX
+ default y
+
config UNIX_DIAG
tristate "UNIX: socket monitoring interface"
depends on UNIX
diff --git a/net/unix/Makefile b/net/unix/Makefile
index ffd0a275c3a7..54e58cc4f945 100644
--- a/net/unix/Makefile
+++ b/net/unix/Makefile
@@ -10,3 +10,5 @@ unix-$(CONFIG_SYSCTL) += sysctl_net_unix.o
obj-$(CONFIG_UNIX_DIAG) += unix_diag.o
unix_diag-y := diag.o
+
+obj-$(CONFIG_UNIX_SCM) += scm.o
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 2020306468af..0632b494d329 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -119,6 +119,8 @@
#include <linux/freezer.h>
#include <linux/file.h>
+#include "scm.h"
+
struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
EXPORT_SYMBOL_GPL(unix_socket_table);
DEFINE_SPINLOCK(unix_table_lock);
@@ -443,7 +445,7 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
* -ECONNREFUSED. Otherwise, if we haven't queued any skbs
* to other and its full, we will hang waiting for POLLOUT.
*/
- if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
+ if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
return 1;
if (connected)
@@ -534,23 +536,25 @@ static void unix_release_sock(struct sock *sk, int embrion)
/* Clear state */
unix_state_lock(sk);
sock_orphan(sk);
- sk->sk_shutdown = SHUTDOWN_MASK;
+ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
path = u->path;
u->path.dentry = NULL;
u->path.mnt = NULL;
state = sk->sk_state;
sk->sk_state = TCP_CLOSE;
+
+ skpair = unix_peer(sk);
+ unix_peer(sk) = NULL;
+
unix_state_unlock(sk);
wake_up_interruptible_all(&u->peer_wait);
- skpair = unix_peer(sk);
-
if (skpair != NULL) {
if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
unix_state_lock(skpair);
/* No more writes */
- skpair->sk_shutdown = SHUTDOWN_MASK;
+ WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
skpair->sk_err = ECONNRESET;
unix_state_unlock(skpair);
@@ -560,7 +564,6 @@ static void unix_release_sock(struct sock *sk, int embrion)
unix_dgram_peer_wake_disconnect(sk, skpair);
sock_put(skpair); /* It may now die */
- unix_peer(sk) = NULL;
}
/* Try to flush out this socket. Throw out buffers at least */
@@ -591,26 +594,48 @@ static void unix_release_sock(struct sock *sk, int embrion)
* What the above comment does talk about? --ANK(980817)
*/
- if (unix_tot_inflight)
+ if (READ_ONCE(unix_tot_inflight))
unix_gc(); /* Garbage collect fds */
}
static void init_peercred(struct sock *sk)
{
- put_pid(sk->sk_peer_pid);
- if (sk->sk_peer_cred)
- put_cred(sk->sk_peer_cred);
+ const struct cred *old_cred;
+ struct pid *old_pid;
+
+ spin_lock(&sk->sk_peer_lock);
+ old_pid = sk->sk_peer_pid;
+ old_cred = sk->sk_peer_cred;
sk->sk_peer_pid = get_pid(task_tgid(current));
sk->sk_peer_cred = get_current_cred();
+ spin_unlock(&sk->sk_peer_lock);
+
+ put_pid(old_pid);
+ put_cred(old_cred);
}
static void copy_peercred(struct sock *sk, struct sock *peersk)
{
- put_pid(sk->sk_peer_pid);
- if (sk->sk_peer_cred)
- put_cred(sk->sk_peer_cred);
+ const struct cred *old_cred;
+ struct pid *old_pid;
+
+ if (sk < peersk) {
+ spin_lock(&sk->sk_peer_lock);
+ spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+ } else {
+ spin_lock(&peersk->sk_peer_lock);
+ spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING);
+ }
+ old_pid = sk->sk_peer_pid;
+ old_cred = sk->sk_peer_cred;
sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
+
+ spin_unlock(&sk->sk_peer_lock);
+ spin_unlock(&peersk->sk_peer_lock);
+
+ put_pid(old_pid);
+ put_cred(old_cred);
}
static int unix_listen(struct socket *sock, int backlog)
@@ -681,7 +706,7 @@ static int unix_set_peek_off(struct sock *sk, int val)
if (mutex_lock_interruptible(&u->iolock))
return -EINTR;
- sk->sk_peek_off = val;
+ WRITE_ONCE(sk->sk_peek_off, val);
mutex_unlock(&u->iolock);
return 0;
@@ -1207,7 +1232,7 @@ static long unix_wait_for_peer(struct sock *other, long timeo)
sched = !sock_flag(other, SOCK_DEAD) &&
!(other->sk_shutdown & RCV_SHUTDOWN) &&
- unix_recvq_full(other);
+ unix_recvq_full_lockless(other);
unix_state_unlock(other);
@@ -1514,65 +1539,51 @@ out:
return err;
}
-static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-{
- int i;
-
- scm->fp = UNIXCB(skb).fp;
- UNIXCB(skb).fp = NULL;
-
- for (i = scm->fp->count-1; i >= 0; i--)
- unix_notinflight(scm->fp->user, scm->fp->fp[i]);
-}
-
-static void unix_destruct_scm(struct sk_buff *skb)
-{
- struct scm_cookie scm;
- memset(&scm, 0, sizeof(scm));
- scm.pid = UNIXCB(skb).pid;
- if (UNIXCB(skb).fp)
- unix_detach_fds(&scm, skb);
-
- /* Alas, it calls VFS */
- /* So fscking what? fput() had been SMP-safe since the last Summer */
- scm_destroy(&scm);
- sock_wfree(skb);
-}
-
-/*
- * The "user->unix_inflight" variable is protected by the garbage
- * collection lock, and we just read it locklessly here. If you go
- * over the limit, there might be a tiny race in actually noticing
- * it across threads. Tough.
- */
-static inline bool too_many_unix_fds(struct task_struct *p)
+static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb)
{
- struct user_struct *user = current_user();
-
- if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
- return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
- return false;
-}
-
-static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
-{
- int i;
-
- if (too_many_unix_fds(current))
- return -ETOOMANYREFS;
+ scm->fp = scm_fp_dup(UNIXCB(skb).fp);
/*
- * Need to duplicate file references for the sake of garbage
- * collection. Otherwise a socket in the fps might become a
- * candidate for GC while the skb is not yet queued.
+ * Garbage collection of unix sockets starts by selecting a set of
+ * candidate sockets which have reference only from being in flight
+ * (total_refs == inflight_refs). This condition is checked once during
+ * the candidate collection phase, and candidates are marked as such, so
+ * that non-candidates can later be ignored. While inflight_refs is
+ * protected by unix_gc_lock, total_refs (file count) is not, hence this
+ * is an instantaneous decision.
+ *
+ * Once a candidate, however, the socket must not be reinstalled into a
+ * file descriptor while the garbage collection is in progress.
+ *
+ * If the above conditions are met, then the directed graph of
+ * candidates (*) does not change while unix_gc_lock is held.
+ *
+ * Any operations that changes the file count through file descriptors
+ * (dup, close, sendmsg) does not change the graph since candidates are
+ * not installed in fds.
+ *
+ * Dequeing a candidate via recvmsg would install it into an fd, but
+ * that takes unix_gc_lock to decrement the inflight count, so it's
+ * serialized with garbage collection.
+ *
+ * MSG_PEEK is special in that it does not change the inflight count,
+ * yet does install the socket into an fd. The following lock/unlock
+ * pair is to ensure serialization with garbage collection. It must be
+ * done between incrementing the file count and installing the file into
+ * an fd.
+ *
+ * If garbage collection starts after the barrier provided by the
+ * lock/unlock, then it will see the elevated refcount and not mark this
+ * as a candidate. If a garbage collection is already in progress
+ * before the file count was incremented, then the lock/unlock pair will
+ * ensure that garbage collection is finished before progressing to
+ * installing the fd.
+ *
+ * (*) A -> B where B is on the queue of A or B is on the queue of C
+ * which is on the queue of listening socket A.
*/
- UNIXCB(skb).fp = scm_fp_dup(scm->fp);
- if (!UNIXCB(skb).fp)
- return -ENOMEM;
-
- for (i = scm->fp->count - 1; i >= 0; i--)
- unix_inflight(scm->fp->user, scm->fp->fp[i]);
- return 0;
+ spin_lock(&unix_gc_lock);
+ spin_unlock(&unix_gc_lock);
}
static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
@@ -1973,6 +1984,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
if (false) {
alloc_skb:
+ spin_unlock(&other->sk_receive_queue.lock);
unix_state_unlock(other);
mutex_unlock(&unix_sk(other)->iolock);
newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
@@ -2012,6 +2024,7 @@ alloc_skb:
init_scm = false;
}
+ spin_lock(&other->sk_receive_queue.lock);
skb = skb_peek_tail(&other->sk_receive_queue);
if (tail && tail == skb) {
skb = newskb;
@@ -2042,14 +2055,11 @@ alloc_skb:
refcount_add(size, &sk->sk_wmem_alloc);
if (newskb) {
- err = unix_scm_to_skb(&scm, skb, false);
- if (err)
- goto err_state_unlock;
- spin_lock(&other->sk_receive_queue.lock);
+ unix_scm_to_skb(&scm, skb, false);
__skb_queue_tail(&other->sk_receive_queue, newskb);
- spin_unlock(&other->sk_receive_queue.lock);
}
+ spin_unlock(&other->sk_receive_queue.lock);
unix_state_unlock(other);
mutex_unlock(&unix_sk(other)->iolock);
@@ -2200,7 +2210,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
sk_peek_offset_fwd(sk, size);
if (UNIXCB(skb).fp)
- scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+ unix_peek_fds(&scm, skb);
}
err = (flags & MSG_TRUNC) ? skb->len - skip : size;
@@ -2441,7 +2451,7 @@ unlock:
/* It is questionable, see note in unix_dgram_recvmsg.
*/
if (UNIXCB(skb).fp)
- scm.fp = scm_fp_dup(UNIXCB(skb).fp);
+ unix_peek_fds(&scm, skb);
sk_peek_offset_fwd(sk, chunk);
@@ -2540,7 +2550,7 @@ static int unix_shutdown(struct socket *sock, int mode)
++mode;
unix_state_lock(sk);
- sk->sk_shutdown |= mode;
+ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
other = unix_peer(sk);
if (other)
sock_hold(other);
@@ -2557,7 +2567,7 @@ static int unix_shutdown(struct socket *sock, int mode)
if (mode&SEND_SHUTDOWN)
peer_mode |= RCV_SHUTDOWN;
unix_state_lock(other);
- other->sk_shutdown |= peer_mode;
+ WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
unix_state_unlock(other);
other->sk_state_change(other);
if (peer_mode == SHUTDOWN_MASK)
@@ -2676,16 +2686,18 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
{
struct sock *sk = sock->sk;
__poll_t mask;
+ u8 shutdown;
sock_poll_wait(file, sock, wait);
mask = 0;
+ shutdown = READ_ONCE(sk->sk_shutdown);
/* exceptional events? */
if (sk->sk_err)
mask |= EPOLLERR;
- if (sk->sk_shutdown == SHUTDOWN_MASK)
+ if (shutdown == SHUTDOWN_MASK)
mask |= EPOLLHUP;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
+ if (shutdown & RCV_SHUTDOWN)
mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
/* readable? */
@@ -2713,18 +2725,20 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
struct sock *sk = sock->sk, *other;
unsigned int writable;
__poll_t mask;
+ u8 shutdown;
sock_poll_wait(file, sock, wait);
mask = 0;
+ shutdown = READ_ONCE(sk->sk_shutdown);
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
mask |= EPOLLERR |
(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
- if (sk->sk_shutdown & RCV_SHUTDOWN)
+ if (shutdown & RCV_SHUTDOWN)
mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
- if (sk->sk_shutdown == SHUTDOWN_MASK)
+ if (shutdown == SHUTDOWN_MASK)
mask |= EPOLLHUP;
/* readable? */
@@ -2750,7 +2764,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
other = unix_peer(sk);
if (other && unix_peer(other) != sk &&
- unix_recvq_full(other) &&
+ unix_recvq_full_lockless(other) &&
unix_dgram_peer_wake_me(sk, other))
writable = 0;
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index c36757e72844..4d283e26d816 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -86,77 +86,13 @@
#include <net/scm.h>
#include <net/tcp_states.h>
+#include "scm.h"
+
/* Internal data structures and random procedures: */
-static LIST_HEAD(gc_inflight_list);
static LIST_HEAD(gc_candidates);
-static DEFINE_SPINLOCK(unix_gc_lock);
static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait);
-unsigned int unix_tot_inflight;
-
-struct sock *unix_get_socket(struct file *filp)
-{
- struct sock *u_sock = NULL;
- struct inode *inode = file_inode(filp);
-
- /* Socket ? */
- if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
- struct socket *sock = SOCKET_I(inode);
- struct sock *s = sock->sk;
-
- /* PF_UNIX ? */
- if (s && sock->ops && sock->ops->family == PF_UNIX)
- u_sock = s;
- }
- return u_sock;
-}
-
-/* Keep the number of times in flight count for the file
- * descriptor if it is for an AF_UNIX socket.
- */
-
-void unix_inflight(struct user_struct *user, struct file *fp)
-{
- struct sock *s = unix_get_socket(fp);
-
- spin_lock(&unix_gc_lock);
-
- if (s) {
- struct unix_sock *u = unix_sk(s);
-
- if (atomic_long_inc_return(&u->inflight) == 1) {
- BUG_ON(!list_empty(&u->link));
- list_add_tail(&u->link, &gc_inflight_list);
- } else {
- BUG_ON(list_empty(&u->link));
- }
- unix_tot_inflight++;
- }
- user->unix_inflight++;
- spin_unlock(&unix_gc_lock);
-}
-
-void unix_notinflight(struct user_struct *user, struct file *fp)
-{
- struct sock *s = unix_get_socket(fp);
-
- spin_lock(&unix_gc_lock);
-
- if (s) {
- struct unix_sock *u = unix_sk(s);
-
- BUG_ON(!atomic_long_read(&u->inflight));
- BUG_ON(list_empty(&u->link));
-
- if (atomic_long_dec_and_test(&u->inflight))
- list_del_init(&u->link);
- unix_tot_inflight--;
- }
- user->unix_inflight--;
- spin_unlock(&unix_gc_lock);
-}
-
static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
struct sk_buff_head *hitlist)
{
@@ -261,8 +197,11 @@ void wait_for_unix_gc(void)
{
/* If number of inflight sockets is insane,
* force a garbage collect right now.
+ * Paired with the WRITE_ONCE() in unix_inflight(),
+ * unix_notinflight() and gc_in_progress().
*/
- if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
+ if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
+ !READ_ONCE(gc_in_progress))
unix_gc();
wait_event(unix_gc_wait, gc_in_progress == false);
}
@@ -282,7 +221,9 @@ void unix_gc(void)
if (gc_in_progress)
goto out;
- gc_in_progress = true;
+ /* Paired with READ_ONCE() in wait_for_unix_gc(). */
+ WRITE_ONCE(gc_in_progress, true);
+
/* First, select candidates for garbage collection. Only
* in-flight sockets are considered, and from those only ones
* which don't have any external reference.
@@ -368,7 +309,10 @@ void unix_gc(void)
/* All candidates should have been detached by now. */
BUG_ON(!list_empty(&gc_candidates));
- gc_in_progress = false;
+
+ /* Paired with READ_ONCE() in wait_for_unix_gc(). */
+ WRITE_ONCE(gc_in_progress, false);
+
wake_up(&unix_gc_wait);
out:
diff --git a/net/unix/scm.c b/net/unix/scm.c
new file mode 100644
index 000000000000..ac206bfdbbe3
--- /dev/null
+++ b/net/unix/scm.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/socket.h>
+#include <linux/net.h>
+#include <linux/fs.h>
+#include <net/af_unix.h>
+#include <net/scm.h>
+#include <linux/init.h>
+
+#include "scm.h"
+
+unsigned int unix_tot_inflight;
+EXPORT_SYMBOL(unix_tot_inflight);
+
+LIST_HEAD(gc_inflight_list);
+EXPORT_SYMBOL(gc_inflight_list);
+
+DEFINE_SPINLOCK(unix_gc_lock);
+EXPORT_SYMBOL(unix_gc_lock);
+
+struct sock *unix_get_socket(struct file *filp)
+{
+ struct sock *u_sock = NULL;
+ struct inode *inode = file_inode(filp);
+
+ /* Socket ? */
+ if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) {
+ struct socket *sock = SOCKET_I(inode);
+ struct sock *s = sock->sk;
+
+ /* PF_UNIX ? */
+ if (s && sock->ops && sock->ops->family == PF_UNIX)
+ u_sock = s;
+ }
+ return u_sock;
+}
+EXPORT_SYMBOL(unix_get_socket);
+
+/* Keep the number of times in flight count for the file
+ * descriptor if it is for an AF_UNIX socket.
+ */
+void unix_inflight(struct user_struct *user, struct file *fp)
+{
+ struct sock *s = unix_get_socket(fp);
+
+ spin_lock(&unix_gc_lock);
+
+ if (s) {
+ struct unix_sock *u = unix_sk(s);
+
+ if (atomic_long_inc_return(&u->inflight) == 1) {
+ BUG_ON(!list_empty(&u->link));
+ list_add_tail(&u->link, &gc_inflight_list);
+ } else {
+ BUG_ON(list_empty(&u->link));
+ }
+ /* Paired with READ_ONCE() in wait_for_unix_gc() */
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
+ }
+ WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
+ spin_unlock(&unix_gc_lock);
+}
+
+void unix_notinflight(struct user_struct *user, struct file *fp)
+{
+ struct sock *s = unix_get_socket(fp);
+
+ spin_lock(&unix_gc_lock);
+
+ if (s) {
+ struct unix_sock *u = unix_sk(s);
+
+ BUG_ON(!atomic_long_read(&u->inflight));
+ BUG_ON(list_empty(&u->link));
+
+ if (atomic_long_dec_and_test(&u->inflight))
+ list_del_init(&u->link);
+ /* Paired with READ_ONCE() in wait_for_unix_gc() */
+ WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
+ }
+ WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
+ spin_unlock(&unix_gc_lock);
+}
+
+/*
+ * The "user->unix_inflight" variable is protected by the garbage
+ * collection lock, and we just read it locklessly here. If you go
+ * over the limit, there might be a tiny race in actually noticing
+ * it across threads. Tough.
+ */
+static inline bool too_many_unix_fds(struct task_struct *p)
+{
+ struct user_struct *user = current_user();
+
+ if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
+ return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+ return false;
+}
+
+int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ int i;
+
+ if (too_many_unix_fds(current))
+ return -ETOOMANYREFS;
+
+ /*
+ * Need to duplicate file references for the sake of garbage
+ * collection. Otherwise a socket in the fps might become a
+ * candidate for GC while the skb is not yet queued.
+ */
+ UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+ if (!UNIXCB(skb).fp)
+ return -ENOMEM;
+
+ for (i = scm->fp->count - 1; i >= 0; i--)
+ unix_inflight(scm->fp->user, scm->fp->fp[i]);
+ return 0;
+}
+EXPORT_SYMBOL(unix_attach_fds);
+
+void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+{
+ int i;
+
+ scm->fp = UNIXCB(skb).fp;
+ UNIXCB(skb).fp = NULL;
+
+ for (i = scm->fp->count-1; i >= 0; i--)
+ unix_notinflight(scm->fp->user, scm->fp->fp[i]);
+}
+EXPORT_SYMBOL(unix_detach_fds);
+
+void unix_destruct_scm(struct sk_buff *skb)
+{
+ struct scm_cookie scm;
+
+ memset(&scm, 0, sizeof(scm));
+ scm.pid = UNIXCB(skb).pid;
+ if (UNIXCB(skb).fp)
+ unix_detach_fds(&scm, skb);
+
+ /* Alas, it calls VFS */
+ /* So fscking what? fput() had been SMP-safe since the last Summer */
+ scm_destroy(&scm);
+ sock_wfree(skb);
+}
+EXPORT_SYMBOL(unix_destruct_scm);
diff --git a/net/unix/scm.h b/net/unix/scm.h
new file mode 100644
index 000000000000..5a255a477f16
--- /dev/null
+++ b/net/unix/scm.h
@@ -0,0 +1,10 @@
+#ifndef NET_UNIX_SCM_H
+#define NET_UNIX_SCM_H
+
+extern struct list_head gc_inflight_list;
+extern spinlock_t unix_gc_lock;
+
+int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb);
+void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb);
+
+#endif
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index c88dc8ee3144..0dfa2dfcb4bc 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -628,8 +628,9 @@ struct sock *__vsock_create(struct net *net,
vsk->trusted = psk->trusted;
vsk->owner = get_cred(psk->owner);
vsk->connect_timeout = psk->connect_timeout;
+ security_sk_clone(parent, sk);
} else {
- vsk->trusted = capable(CAP_NET_ADMIN);
+ vsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN);
vsk->owner = get_current_cred();
vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT;
}
@@ -816,10 +817,12 @@ static int vsock_shutdown(struct socket *sock, int mode)
*/
sk = sock->sk;
+
+ lock_sock(sk);
if (sock->state == SS_UNCONNECTED) {
err = -ENOTCONN;
if (sk->sk_type == SOCK_STREAM)
- return err;
+ goto out;
} else {
sock->state = SS_DISCONNECTING;
err = 0;
@@ -828,10 +831,8 @@ static int vsock_shutdown(struct socket *sock, int mode)
/* Receive and send shutdowns are treated alike. */
mode = mode & (RCV_SHUTDOWN | SEND_SHUTDOWN);
if (mode) {
- lock_sock(sk);
sk->sk_shutdown |= mode;
sk->sk_state_change(sk);
- release_sock(sk);
if (sk->sk_type == SOCK_STREAM) {
sock_reset_flag(sk, SOCK_DONE);
@@ -839,6 +840,8 @@ static int vsock_shutdown(struct socket *sock, int mode)
}
}
+out:
+ release_sock(sk);
return err;
}
@@ -1107,7 +1110,6 @@ static void vsock_connect_timeout(struct work_struct *work)
{
struct sock *sk;
struct vsock_sock *vsk;
- int cancel = 0;
vsk = container_of(work, struct vsock_sock, connect_work.work);
sk = sk_vsock(vsk);
@@ -1116,13 +1118,12 @@ static void vsock_connect_timeout(struct work_struct *work)
if (sk->sk_state == TCP_SYN_SENT &&
(sk->sk_shutdown != SHUTDOWN_MASK)) {
sk->sk_state = TCP_CLOSE;
+ sk->sk_socket->state = SS_UNCONNECTED;
sk->sk_err = ETIMEDOUT;
sk->sk_error_report(sk);
- cancel = 1;
+ vsock_transport_cancel_pkt(vsk);
}
release_sock(sk);
- if (cancel)
- vsock_transport_cancel_pkt(vsk);
sock_put(sk);
}
@@ -1159,6 +1160,8 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
* non-blocking call.
*/
err = -EALREADY;
+ if (flags & O_NONBLOCK)
+ goto out;
break;
default:
if ((sk->sk_state == TCP_LISTEN) ||
@@ -1213,7 +1216,14 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
* timeout fires.
*/
sock_hold(sk);
- schedule_delayed_work(&vsk->connect_work, timeout);
+
+ /* If the timeout function is already scheduled,
+ * reschedule it, then ungrab the socket refcount to
+ * keep it balanced.
+ */
+ if (mod_delayed_work(system_wq, &vsk->connect_work,
+ timeout))
+ sock_put(sk);
/* Skip ahead to preserve error code set above. */
goto out_wait;
@@ -1225,11 +1235,12 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
- sk->sk_state = TCP_CLOSE;
+ sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE;
sock->state = SS_UNCONNECTED;
vsock_transport_cancel_pkt(vsk);
+ vsock_remove_connected(vsk);
goto out_wait;
- } else if (timeout == 0) {
+ } else if ((sk->sk_state != TCP_ESTABLISHED) && (timeout == 0)) {
err = -ETIMEDOUT;
sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index db6ca51228d2..2bdf36845a5f 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -443,14 +443,10 @@ static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode)
static int hvs_shutdown(struct vsock_sock *vsk, int mode)
{
- struct sock *sk = sk_vsock(vsk);
-
if (!(mode & SEND_SHUTDOWN))
return 0;
- lock_sock(sk);
hvs_shutdown_lock_held(vsk->trans, mode);
- release_sock(sk);
return 0;
}
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index cc70d651d13e..e34979fcefd2 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -373,11 +373,14 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
static void virtio_vsock_reset_sock(struct sock *sk)
{
- lock_sock(sk);
+ /* vmci_transport.c doesn't take sk_lock here either. At least we're
+ * under vsock_table_lock so the sock cannot disappear while we're
+ * executing.
+ */
+
sk->sk_state = TCP_CLOSE;
sk->sk_err = ECONNRESET;
sk->sk_error_report(sk);
- release_sock(sk);
}
static void virtio_vsock_update_guest_cid(struct virtio_vsock *vsock)
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 5f8a72d34d31..187d0f7253a6 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -348,7 +348,7 @@ static s64 virtio_transport_has_space(struct vsock_sock *vsk)
struct virtio_vsock_sock *vvs = vsk->trans;
s64 bytes;
- bytes = vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
+ bytes = (s64)vvs->peer_buf_alloc - (vvs->tx_cnt - vvs->peer_fwd_cnt);
if (bytes < 0)
bytes = 0;
@@ -1033,10 +1033,10 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
vsk = vsock_sk(sk);
- space_available = virtio_transport_space_update(sk, pkt);
-
lock_sock(sk);
+ space_available = virtio_transport_space_update(sk, pkt);
+
/* Update CID in case it has changed after a transport reset event */
vsk->local_addr.svm_cid = dst.svm_cid;
@@ -1079,7 +1079,7 @@ EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt)
{
- kfree(pkt->buf);
+ kvfree(pkt->buf);
kfree(pkt);
}
EXPORT_SYMBOL_GPL(virtio_transport_free_pkt);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index c3d5ab01fba7..2a8127f245e8 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -584,8 +584,7 @@ vmci_transport_queue_pair_alloc(struct vmci_qp **qpair,
peer, flags, VMCI_NO_PRIVILEGE_FLAGS);
out:
if (err < 0) {
- pr_err("Could not attach to queue pair with %d\n",
- err);
+ pr_err_once("Could not attach to queue pair with %d\n", err);
err = vmci_transport_error_to_vsock_error(err);
}
@@ -1734,7 +1733,11 @@ static int vmci_transport_dgram_enqueue(
if (!dg)
return -ENOMEM;
- memcpy_from_msg(VMCI_DG_PAYLOAD(dg), msg, len);
+ err = memcpy_from_msg(VMCI_DG_PAYLOAD(dg), msg, len);
+ if (err) {
+ kfree(dg);
+ return err;
+ }
dg->dst = vmci_make_handle(remote_addr->svm_cid,
remote_addr->svm_port);
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 8158b375d170..4500ad5f2a61 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -27,7 +27,7 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
@$(kecho) " GEN $@"
@(echo '#include "reg.h"'; \
echo 'const u8 shipped_regdb_certs[] = {'; \
- cat $^ ; \
+ echo | cat - $^ ; \
echo '};'; \
echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);'; \
) > $@
diff --git a/net/wireless/certs/wens.hex b/net/wireless/certs/wens.hex
new file mode 100644
index 000000000000..0d50369bede9
--- /dev/null
+++ b/net/wireless/certs/wens.hex
@@ -0,0 +1,87 @@
+/* Chen-Yu Tsai's regdb certificate */
+0x30, 0x82, 0x02, 0xa7, 0x30, 0x82, 0x01, 0x8f,
+0x02, 0x14, 0x61, 0xc0, 0x38, 0x65, 0x1a, 0xab,
+0xdc, 0xf9, 0x4b, 0xd0, 0xac, 0x7f, 0xf0, 0x6c,
+0x72, 0x48, 0xdb, 0x18, 0xc6, 0x00, 0x30, 0x0d,
+0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d,
+0x01, 0x01, 0x0b, 0x05, 0x00, 0x30, 0x0f, 0x31,
+0x0d, 0x30, 0x0b, 0x06, 0x03, 0x55, 0x04, 0x03,
+0x0c, 0x04, 0x77, 0x65, 0x6e, 0x73, 0x30, 0x20,
+0x17, 0x0d, 0x32, 0x33, 0x31, 0x32, 0x30, 0x31,
+0x30, 0x37, 0x34, 0x31, 0x31, 0x34, 0x5a, 0x18,
+0x0f, 0x32, 0x31, 0x32, 0x33, 0x31, 0x31, 0x30,
+0x37, 0x30, 0x37, 0x34, 0x31, 0x31, 0x34, 0x5a,
+0x30, 0x0f, 0x31, 0x0d, 0x30, 0x0b, 0x06, 0x03,
+0x55, 0x04, 0x03, 0x0c, 0x04, 0x77, 0x65, 0x6e,
+0x73, 0x30, 0x82, 0x01, 0x22, 0x30, 0x0d, 0x06,
+0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x0d, 0x01,
+0x01, 0x01, 0x05, 0x00, 0x03, 0x82, 0x01, 0x0f,
+0x00, 0x30, 0x82, 0x01, 0x0a, 0x02, 0x82, 0x01,
+0x01, 0x00, 0xa9, 0x7a, 0x2c, 0x78, 0x4d, 0xa7,
+0x19, 0x2d, 0x32, 0x52, 0xa0, 0x2e, 0x6c, 0xef,
+0x88, 0x7f, 0x15, 0xc5, 0xb6, 0x69, 0x54, 0x16,
+0x43, 0x14, 0x79, 0x53, 0xb7, 0xae, 0x88, 0xfe,
+0xc0, 0xb7, 0x5d, 0x47, 0x8e, 0x1a, 0xe1, 0xef,
+0xb3, 0x90, 0x86, 0xda, 0xd3, 0x64, 0x81, 0x1f,
+0xce, 0x5d, 0x9e, 0x4b, 0x6e, 0x58, 0x02, 0x3e,
+0xb2, 0x6f, 0x5e, 0x42, 0x47, 0x41, 0xf4, 0x2c,
+0xb8, 0xa8, 0xd4, 0xaa, 0xc0, 0x0e, 0xe6, 0x48,
+0xf0, 0xa8, 0xce, 0xcb, 0x08, 0xae, 0x37, 0xaf,
+0xf6, 0x40, 0x39, 0xcb, 0x55, 0x6f, 0x5b, 0x4f,
+0x85, 0x34, 0xe6, 0x69, 0x10, 0x50, 0x72, 0x5e,
+0x4e, 0x9d, 0x4c, 0xba, 0x38, 0x36, 0x0d, 0xce,
+0x73, 0x38, 0xd7, 0x27, 0x02, 0x2a, 0x79, 0x03,
+0xe1, 0xac, 0xcf, 0xb0, 0x27, 0x85, 0x86, 0x93,
+0x17, 0xab, 0xec, 0x42, 0x77, 0x37, 0x65, 0x8a,
+0x44, 0xcb, 0xd6, 0x42, 0x93, 0x92, 0x13, 0xe3,
+0x39, 0x45, 0xc5, 0x6e, 0x00, 0x4a, 0x7f, 0xcb,
+0x42, 0x17, 0x2b, 0x25, 0x8c, 0xb8, 0x17, 0x3b,
+0x15, 0x36, 0x59, 0xde, 0x42, 0xce, 0x21, 0xe6,
+0xb6, 0xc7, 0x6e, 0x5e, 0x26, 0x1f, 0xf7, 0x8a,
+0x57, 0x9e, 0xa5, 0x96, 0x72, 0xb7, 0x02, 0x32,
+0xeb, 0x07, 0x2b, 0x73, 0xe2, 0x4f, 0x66, 0x58,
+0x9a, 0xeb, 0x0f, 0x07, 0xb6, 0xab, 0x50, 0x8b,
+0xc3, 0x8f, 0x17, 0xfa, 0x0a, 0x99, 0xc2, 0x16,
+0x25, 0xbf, 0x2d, 0x6b, 0x1a, 0xaa, 0xe6, 0x3e,
+0x5f, 0xeb, 0x6d, 0x9b, 0x5d, 0x4d, 0x42, 0x83,
+0x2d, 0x39, 0xb8, 0xc9, 0xac, 0xdb, 0x3a, 0x91,
+0x50, 0xdf, 0xbb, 0xb1, 0x76, 0x6d, 0x15, 0x73,
+0xfd, 0xc6, 0xe6, 0x6b, 0x71, 0x9e, 0x67, 0x36,
+0x22, 0x83, 0x79, 0xb1, 0xd6, 0xb8, 0x84, 0x52,
+0xaf, 0x96, 0x5b, 0xc3, 0x63, 0x02, 0x4e, 0x78,
+0x70, 0x57, 0x02, 0x03, 0x01, 0x00, 0x01, 0x30,
+0x0d, 0x06, 0x09, 0x2a, 0x86, 0x48, 0x86, 0xf7,
+0x0d, 0x01, 0x01, 0x0b, 0x05, 0x00, 0x03, 0x82,
+0x01, 0x01, 0x00, 0x24, 0x28, 0xee, 0x22, 0x74,
+0x7f, 0x7c, 0xfa, 0x6c, 0x1f, 0xb3, 0x18, 0xd1,
+0xc2, 0x3d, 0x7d, 0x29, 0x42, 0x88, 0xad, 0x82,
+0xa5, 0xb1, 0x8a, 0x05, 0xd0, 0xec, 0x5c, 0x91,
+0x20, 0xf6, 0x82, 0xfd, 0xd5, 0x67, 0x60, 0x5f,
+0x31, 0xf5, 0xbd, 0x88, 0x91, 0x70, 0xbd, 0xb8,
+0xb9, 0x8c, 0x88, 0xfe, 0x53, 0xc9, 0x54, 0x9b,
+0x43, 0xc4, 0x7a, 0x43, 0x74, 0x6b, 0xdd, 0xb0,
+0xb1, 0x3b, 0x33, 0x45, 0x46, 0x78, 0xa3, 0x1c,
+0xef, 0x54, 0x68, 0xf7, 0x85, 0x9c, 0xe4, 0x51,
+0x6f, 0x06, 0xaf, 0x81, 0xdb, 0x2a, 0x7b, 0x7b,
+0x6f, 0xa8, 0x9c, 0x67, 0xd8, 0xcb, 0xc9, 0x91,
+0x40, 0x00, 0xae, 0xd9, 0xa1, 0x9f, 0xdd, 0xa6,
+0x43, 0x0e, 0x28, 0x7b, 0xaa, 0x1b, 0xe9, 0x84,
+0xdb, 0x76, 0x64, 0x42, 0x70, 0xc9, 0xc0, 0xeb,
+0xae, 0x84, 0x11, 0x16, 0x68, 0x4e, 0x84, 0x9e,
+0x7e, 0x92, 0x36, 0xee, 0x1c, 0x3b, 0x08, 0x63,
+0xeb, 0x79, 0x84, 0x15, 0x08, 0x9d, 0xaf, 0xc8,
+0x9a, 0xc7, 0x34, 0xd3, 0x94, 0x4b, 0xd1, 0x28,
+0x97, 0xbe, 0xd1, 0x45, 0x75, 0xdc, 0x35, 0x62,
+0xac, 0x1d, 0x1f, 0xb7, 0xb7, 0x15, 0x87, 0xc8,
+0x98, 0xc0, 0x24, 0x31, 0x56, 0x8d, 0xed, 0xdb,
+0x06, 0xc6, 0x46, 0xbf, 0x4b, 0x6d, 0xa6, 0xd5,
+0xab, 0xcc, 0x60, 0xfc, 0xe5, 0x37, 0xb6, 0x53,
+0x7d, 0x58, 0x95, 0xa9, 0x56, 0xc7, 0xf7, 0xee,
+0xc3, 0xa0, 0x76, 0xf7, 0x65, 0x4d, 0x53, 0xfa,
+0xff, 0x5f, 0x76, 0x33, 0x5a, 0x08, 0xfa, 0x86,
+0x92, 0x5a, 0x13, 0xfa, 0x1a, 0xfc, 0xf2, 0x1b,
+0x8c, 0x7f, 0x42, 0x6d, 0xb7, 0x7e, 0xb7, 0xb4,
+0xf0, 0xc7, 0x83, 0xbb, 0xa2, 0x81, 0x03, 0x2d,
+0xd4, 0x2a, 0x63, 0x3f, 0xf7, 0x31, 0x2e, 0x40,
+0x33, 0x5c, 0x46, 0xbc, 0x9b, 0xc1, 0x05, 0xa5,
+0x45, 0x4e, 0xc3,
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 68660781aa51..7c66f99046ac 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -4,6 +4,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
+ * Copyright (C) 2018-2021 Intel Corporation
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -835,9 +836,6 @@ int wiphy_register(struct wiphy *wiphy)
return res;
}
- /* set up regulatory info */
- wiphy_regulatory_register(wiphy);
-
list_add_rcu(&rdev->list, &cfg80211_rdev_list);
cfg80211_rdev_list_generation++;
@@ -851,6 +849,9 @@ int wiphy_register(struct wiphy *wiphy)
cfg80211_debugfs_rdev_add(rdev);
nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
+ /* set up regulatory info */
+ wiphy_regulatory_register(wiphy);
+
if (wiphy->regulatory_flags & REGULATORY_CUSTOM_REG) {
struct regulatory_request request;
diff --git a/net/wireless/core.h b/net/wireless/core.h
index f5d58652108d..5f177dad2fa8 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -404,6 +404,8 @@ void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev);
/* internal helpers */
bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher);
+bool cfg80211_valid_key_idx(struct cfg80211_registered_device *rdev,
+ int key_idx, bool pairwise);
int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
struct key_params *params, int key_idx,
bool pairwise, const u8 *mac_addr);
diff --git a/net/wireless/debugfs.c b/net/wireless/debugfs.c
index 30fc6eb352bc..e6410487e25d 100644
--- a/net/wireless/debugfs.c
+++ b/net/wireless/debugfs.c
@@ -68,9 +68,10 @@ static ssize_t ht40allow_map_read(struct file *file,
{
struct wiphy *wiphy = file->private_data;
char *buf;
- unsigned int offset = 0, buf_size = PAGE_SIZE, i, r;
+ unsigned int offset = 0, buf_size = PAGE_SIZE, i;
enum nl80211_band band;
struct ieee80211_supported_band *sband;
+ ssize_t r;
buf = kzalloc(buf_size, GFP_KERNEL);
if (!buf)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 4e4179209982..e33c1175b158 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1950,7 +1950,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
* case we'll continue with more data in the next round,
* but break unconditionally so unsplit data stops here.
*/
- state->split_start++;
+ if (state->split)
+ state->split_start++;
+ else
+ state->split_start = 0;
break;
case 9:
if (rdev->wiphy.extended_capabilities &&
@@ -2882,6 +2885,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
wdev_lock(wdev);
switch (wdev->iftype) {
case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
if (wdev->ssid_len &&
nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
goto nla_put_failure_locked;
@@ -3621,9 +3625,6 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
if (err)
return err;
- if (key.idx < 0)
- return -EINVAL;
-
if (info->attrs[NL80211_ATTR_MAC])
mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
@@ -3639,6 +3640,10 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
key.type != NL80211_KEYTYPE_GROUP)
return -EINVAL;
+ if (!cfg80211_valid_key_idx(rdev, key.idx,
+ key.type == NL80211_KEYTYPE_PAIRWISE))
+ return -EINVAL;
+
if (!rdev->ops->del_key)
return -EOPNOTSUPP;
@@ -6499,7 +6504,7 @@ static int nl80211_update_mesh_config(struct sk_buff *skb,
struct cfg80211_registered_device *rdev = info->user_ptr[0];
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
- struct mesh_config cfg;
+ struct mesh_config cfg = {};
u32 mask;
int err;
@@ -11499,7 +11504,7 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
struct net_device *dev = info->user_ptr[1];
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct nlattr *tb[NUM_NL80211_REKEY_DATA];
- struct cfg80211_gtk_rekey_data rekey_data;
+ struct cfg80211_gtk_rekey_data rekey_data = {};
int err;
if (!info->attrs[NL80211_ATTR_REKEY_DATA])
@@ -11787,6 +11792,9 @@ static int handle_nan_filter(struct nlattr *attr_filter,
i = 0;
nla_for_each_nested(attr, attr_filter, rem) {
filter[i].filter = nla_memdup(attr, GFP_KERNEL);
+ if (!filter[i].filter)
+ goto err;
+
filter[i].len = nla_len(attr);
i++;
}
@@ -11799,6 +11807,15 @@ static int handle_nan_filter(struct nlattr *attr_filter,
}
return 0;
+
+err:
+ i = 0;
+ nla_for_each_nested(attr, attr_filter, rem) {
+ kfree(filter[i].filter);
+ i++;
+ }
+ kfree(filter);
+ return -ENOMEM;
}
static int nl80211_nan_add_func(struct sk_buff *skb,
@@ -15502,7 +15519,8 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
wdev->chandef = *chandef;
wdev->preset_chandef = *chandef;
- if (wdev->iftype == NL80211_IFTYPE_STATION &&
+ if ((wdev->iftype == NL80211_IFTYPE_STATION ||
+ wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) &&
!WARN_ON(!wdev->current_bss))
wdev->current_bss->pub.channel = chandef->chan;
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 935aebf15010..beba41f8a178 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1050,6 +1050,8 @@ static void regdb_fw_cb(const struct firmware *fw, void *context)
static int query_regdb_file(const char *alpha2)
{
+ int err;
+
ASSERT_RTNL();
if (regdb)
@@ -1059,9 +1061,13 @@ static int query_regdb_file(const char *alpha2)
if (!alpha2)
return -ENOMEM;
- return request_firmware_nowait(THIS_MODULE, true, "regulatory.db",
- &reg_pdev->dev, GFP_KERNEL,
- (void *)alpha2, regdb_fw_cb);
+ err = request_firmware_nowait(THIS_MODULE, true, "regulatory.db",
+ &reg_pdev->dev, GFP_KERNEL,
+ (void *)alpha2, regdb_fw_cb);
+ if (err)
+ kfree(alpha2);
+
+ return err;
}
int reg_reload_regdb(void)
@@ -3374,7 +3380,7 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
power_rule = &reg_rule->power_rule;
if (reg_rule->flags & NL80211_RRF_AUTO_BW)
- snprintf(bw, sizeof(bw), "%d KHz, %d KHz AUTO",
+ snprintf(bw, sizeof(bw), "%d KHz, %u KHz AUTO",
freq_range->max_bandwidth_khz,
reg_get_max_bandwidth(rd, reg_rule));
else
@@ -3756,6 +3762,7 @@ void wiphy_regulatory_register(struct wiphy *wiphy)
wiphy_update_regulatory(wiphy, lr->initiator);
wiphy_all_share_dfs_chan_state(wiphy);
+ reg_process_self_managed_hints();
}
void wiphy_regulatory_deregister(struct wiphy *wiphy)
@@ -3911,8 +3918,10 @@ static int __init regulatory_init_db(void)
return -EINVAL;
err = load_builtin_regdb_keys();
- if (err)
+ if (err) {
+ platform_device_unregister(reg_pdev);
return err;
+ }
/* We always try to get an update for the static regdomain */
err = regulatory_hint_core(cfg80211_world_regdom->alpha2);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index e5d61ba837ad..d871349036a5 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1029,14 +1029,14 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev,
* be grouped with this beacon for updates ...
*/
if (!cfg80211_combine_bsses(rdev, new)) {
- kfree(new);
+ bss_ref_put(rdev, new);
goto drop;
}
}
if (rdev->bss_entries >= bss_entries_limit &&
!cfg80211_bss_expire_oldest(rdev)) {
- kfree(new);
+ bss_ref_put(rdev, new);
goto drop;
}
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index f455b9af6815..ebc73faa8fb1 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -269,6 +269,15 @@ void cfg80211_conn_work(struct work_struct *work)
rtnl_unlock();
}
+static void cfg80211_step_auth_next(struct cfg80211_conn *conn,
+ struct cfg80211_bss *bss)
+{
+ memcpy(conn->bssid, bss->bssid, ETH_ALEN);
+ conn->params.bssid = conn->bssid;
+ conn->params.channel = bss->channel;
+ conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
+}
+
/* Returned bss is reference counted and must be cleaned up appropriately. */
static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
{
@@ -286,10 +295,7 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
if (!bss)
return NULL;
- memcpy(wdev->conn->bssid, bss->bssid, ETH_ALEN);
- wdev->conn->params.bssid = wdev->conn->bssid;
- wdev->conn->params.channel = bss->channel;
- wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
+ cfg80211_step_auth_next(wdev->conn, bss);
schedule_work(&rdev->conn_work);
return bss;
@@ -530,7 +536,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
cfg80211_sme_free(wdev);
}
- if (WARN_ON(wdev->conn))
+ if (wdev->conn)
return -EINPROGRESS;
wdev->conn = kzalloc(sizeof(*wdev->conn), GFP_KERNEL);
@@ -568,7 +574,12 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
wdev->conn->params.ssid_len = wdev->ssid_len;
/* see if we have the bss already */
- bss = cfg80211_get_conn_bss(wdev);
+ bss = cfg80211_get_bss(wdev->wiphy, wdev->conn->params.channel,
+ wdev->conn->params.bssid,
+ wdev->conn->params.ssid,
+ wdev->conn->params.ssid_len,
+ wdev->conn_bss_type,
+ IEEE80211_PRIVACY(wdev->conn->params.privacy));
if (prev_bssid) {
memcpy(wdev->conn->prev_bssid, prev_bssid, ETH_ALEN);
@@ -579,6 +590,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
if (bss) {
enum nl80211_timeout_reason treason;
+ cfg80211_step_auth_next(wdev->conn, bss);
err = cfg80211_conn_do_work(wdev, &treason);
cfg80211_put_bss(wdev->wiphy, bss);
} else {
@@ -1207,6 +1219,13 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
} else {
if (WARN_ON(connkeys))
return -EINVAL;
+
+ /* connect can point to wdev->wext.connect which
+ * can hold key data from a previous connection
+ */
+ connect->key = NULL;
+ connect->key_len = 0;
+ connect->key_idx = 0;
}
wdev->connect_keys = connkeys;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 1a878b84cbd0..82bf1339c28e 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -214,11 +214,48 @@ bool cfg80211_supported_cipher_suite(struct wiphy *wiphy, u32 cipher)
return false;
}
+static bool
+cfg80211_igtk_cipher_supported(struct cfg80211_registered_device *rdev)
+{
+ struct wiphy *wiphy = &rdev->wiphy;
+ int i;
+
+ for (i = 0; i < wiphy->n_cipher_suites; i++) {
+ switch (wiphy->cipher_suites[i]) {
+ case WLAN_CIPHER_SUITE_AES_CMAC:
+ case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+ case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+ case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool cfg80211_valid_key_idx(struct cfg80211_registered_device *rdev,
+ int key_idx, bool pairwise)
+{
+ int max_key_idx;
+
+ if (pairwise)
+ max_key_idx = 3;
+ else if (cfg80211_igtk_cipher_supported(rdev))
+ max_key_idx = 5;
+ else
+ max_key_idx = 3;
+
+ if (key_idx < 0 || key_idx > max_key_idx)
+ return false;
+
+ return true;
+}
+
int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
struct key_params *params, int key_idx,
bool pairwise, const u8 *mac_addr)
{
- if (key_idx < 0 || key_idx > 5)
+ if (!cfg80211_valid_key_idx(rdev, key_idx, pairwise))
return -EINVAL;
if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
@@ -422,7 +459,7 @@ EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
const u8 *addr, enum nl80211_iftype iftype,
- u8 data_offset)
+ u8 data_offset, bool is_amsdu)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct {
@@ -510,7 +547,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
skb_copy_bits(skb, hdrlen, &payload, sizeof(payload));
tmp.h_proto = payload.proto;
- if (likely((ether_addr_equal(payload.hdr, rfc1042_header) &&
+ if (likely((!is_amsdu && ether_addr_equal(payload.hdr, rfc1042_header) &&
tmp.h_proto != htons(ETH_P_AARP) &&
tmp.h_proto != htons(ETH_P_IPX)) ||
ether_addr_equal(payload.hdr, bridge_tunnel_header)))
@@ -652,6 +689,9 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
remaining = skb->len - offset;
if (subframe_len > remaining)
goto purge;
+ /* mitigate A-MSDU aggregation injection attacks */
+ if (ether_addr_equal(eth.h_dest, rfc1042_header))
+ goto purge;
offset += sizeof(struct ethhdr);
last = remaining <= subframe_len + padding;
@@ -910,6 +950,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
switch (otype) {
case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
cfg80211_stop_ap(rdev, dev, true);
break;
case NL80211_IFTYPE_ADHOC:
@@ -925,6 +966,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
case NL80211_IFTYPE_MESH_POINT:
/* mesh should be handled? */
break;
+ case NL80211_IFTYPE_OCB:
+ cfg80211_leave_ocb(rdev, dev);
+ break;
default:
break;
}
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 69102fda9ebd..a57f54bc0e1a 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -796,6 +796,12 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd,
}
}
+ /* Sanity-check to ensure we never end up _allocating_ zero
+ * bytes of data for extra.
+ */
+ if (extra_size <= 0)
+ return -EFAULT;
+
/* kzalloc() ensures NULL-termination for essid_compat. */
extra = kzalloc(extra_size, GFP_KERNEL);
if (!extra)
@@ -896,8 +902,9 @@ out:
int call_commit_handler(struct net_device *dev)
{
#ifdef CONFIG_WIRELESS_EXT
- if ((netif_running(dev)) &&
- (dev->wireless_handlers->standard[0] != NULL))
+ if (netif_running(dev) &&
+ dev->wireless_handlers &&
+ dev->wireless_handlers->standard[0])
/* Call the commit handler on the driver */
return dev->wireless_handlers->standard[0](dev, NULL,
NULL, NULL);
diff --git a/net/wireless/wext-spy.c b/net/wireless/wext-spy.c
index 33bef22e44e9..b379a0371653 100644
--- a/net/wireless/wext-spy.c
+++ b/net/wireless/wext-spy.c
@@ -120,8 +120,8 @@ int iw_handler_set_thrspy(struct net_device * dev,
return -EOPNOTSUPP;
/* Just do it */
- memcpy(&(spydata->spy_thr_low), &(threshold->low),
- 2 * sizeof(struct iw_quality));
+ spydata->spy_thr_low = threshold->low;
+ spydata->spy_thr_high = threshold->high;
/* Clear flag */
memset(spydata->spy_thr_under, '\0', sizeof(spydata->spy_thr_under));
@@ -147,8 +147,8 @@ int iw_handler_get_thrspy(struct net_device * dev,
return -EOPNOTSUPP;
/* Just do it */
- memcpy(&(threshold->low), &(spydata->spy_thr_low),
- 2 * sizeof(struct iw_quality));
+ threshold->low = spydata->spy_thr_low;
+ threshold->high = spydata->spy_thr_high;
return 0;
}
@@ -173,10 +173,10 @@ static void iw_send_thrspy_event(struct net_device * dev,
memcpy(threshold.addr.sa_data, address, ETH_ALEN);
threshold.addr.sa_family = ARPHRD_ETHER;
/* Copy stats */
- memcpy(&(threshold.qual), wstats, sizeof(struct iw_quality));
+ threshold.qual = *wstats;
/* Copy also thresholds */
- memcpy(&(threshold.low), &(spydata->spy_thr_low),
- 2 * sizeof(struct iw_quality));
+ threshold.low = spydata->spy_thr_low;
+ threshold.high = spydata->spy_thr_high;
/* Send event to user space */
wireless_send_event(dev, SIOCGIWTHRSPY, &wrqu, (char *) &threshold);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index bd1cbbfe5924..9d0328bb30ca 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -497,6 +497,12 @@ static int x25_listen(struct socket *sock, int backlog)
int rc = -EOPNOTSUPP;
lock_sock(sk);
+ if (sock->state != SS_UNCONNECTED) {
+ rc = -EINVAL;
+ release_sock(sk);
+ return rc;
+ }
+
if (sk->sk_state != TCP_LISTEN) {
memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN);
sk->sk_max_ack_backlog = backlog;
@@ -551,7 +557,7 @@ static int x25_create(struct net *net, struct socket *sock, int protocol,
if (protocol)
goto out;
- rc = -ENOBUFS;
+ rc = -ENOMEM;
if ((sk = x25_alloc_socket(net, kern)) == NULL)
goto out;
@@ -680,7 +686,8 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
int len, i, rc = 0;
if (addr_len != sizeof(struct sockaddr_x25) ||
- addr->sx25_family != AF_X25) {
+ addr->sx25_family != AF_X25 ||
+ strnlen(addr->sx25_addr.x25_addr, X25_ADDR_LEN) == X25_ADDR_LEN) {
rc = -EINVAL;
goto out;
}
@@ -774,7 +781,8 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
rc = -EINVAL;
if (addr_len != sizeof(struct sockaddr_x25) ||
- addr->sx25_family != AF_X25)
+ addr->sx25_family != AF_X25 ||
+ strnlen(addr->sx25_addr.x25_addr, X25_ADDR_LEN) == X25_ADDR_LEN)
goto out;
rc = -ENETUNREACH;
@@ -824,7 +832,7 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
sock->state = SS_CONNECTED;
rc = 0;
out_put_neigh:
- if (rc) {
+ if (rc && x25->neighbour) {
read_lock_bh(&x25_list_lock);
x25_neigh_put(x25->neighbour);
x25->neighbour = NULL;
@@ -1049,6 +1057,7 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
makex25->lci = lci;
makex25->dest_addr = dest_addr;
makex25->source_addr = source_addr;
+ x25_neigh_hold(nb);
makex25->neighbour = nb;
makex25->facilities = facilities;
makex25->dte_facilities= dte_facilities;
@@ -1794,10 +1803,15 @@ void x25_kill_by_neigh(struct x25_neigh *nb)
write_lock_bh(&x25_list_lock);
- sk_for_each(s, &x25_list)
- if (x25_sk(s)->neighbour == nb)
+ sk_for_each(s, &x25_list) {
+ if (x25_sk(s)->neighbour == nb) {
+ write_unlock_bh(&x25_list_lock);
+ lock_sock(s);
x25_disconnect(s, ENETUNREACH, 0, 0);
-
+ release_sock(s);
+ write_lock_bh(&x25_list_lock);
+ }
+ }
write_unlock_bh(&x25_list_lock);
/* Remove any related forwards */
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index 30f71620d4e3..24f2676e3b66 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -122,7 +122,7 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev,
if (!pskb_may_pull(skb, 1)) {
x25_neigh_put(nb);
- return 0;
+ goto drop;
}
switch (skb->data[0]) {
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 9ff2ab63e639..6bb0649c028c 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -289,17 +289,17 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
return (xs->zc) ? xsk_zc_xmit(sk) : xsk_generic_xmit(sk, m, total_len);
}
-static unsigned int xsk_poll(struct file *file, struct socket *sock,
+static __poll_t xsk_poll(struct file *file, struct socket *sock,
struct poll_table_struct *wait)
{
- unsigned int mask = datagram_poll(file, sock, wait);
+ __poll_t mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
if (xs->rx && !xskq_empty_desc(xs->rx))
- mask |= POLLIN | POLLRDNORM;
+ mask |= EPOLLIN | EPOLLRDNORM;
if (xs->tx && !xskq_full_desc(xs->tx))
- mask |= POLLOUT | POLLWRNORM;
+ mask |= EPOLLOUT | EPOLLWRNORM;
return mask;
}
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index fe96c0d039f2..cf7cbb5dd918 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -245,12 +245,15 @@ static inline void xskq_produce_flush_desc(struct xsk_queue *q)
static inline bool xskq_full_desc(struct xsk_queue *q)
{
- return xskq_nb_avail(q, q->nentries) == q->nentries;
+ /* No barriers needed since data is not accessed */
+ return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer) ==
+ q->nentries;
}
static inline bool xskq_empty_desc(struct xsk_queue *q)
{
- return xskq_nb_free(q, q->prod_tail, q->nentries) == q->nentries;
+ /* No barriers needed since data is not accessed */
+ return READ_ONCE(q->ring->consumer) == READ_ONCE(q->ring->producer);
}
void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props);
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index fbc4552d17b8..6e5e307f985e 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -3,6 +3,8 @@
# Makefile for the XFRM subsystem.
#
+xfrm_interface-$(CONFIG_XFRM_INTERFACE) += xfrm_interface_core.o
+
obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
xfrm_input.o xfrm_output.o \
xfrm_sysctl.o xfrm_replay.o xfrm_device.o
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index e7a0ce98479f..8a9f02997067 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -153,6 +153,9 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
if (x->encap || x->tfcpad)
return -EINVAL;
+ if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND))
+ return -EINVAL;
+
dev = dev_get_by_index(net, xuo->ifindex);
if (!dev) {
if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) {
@@ -190,7 +193,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
xso->dev = dev;
xso->num_exthdrs = 1;
- xso->flags = xuo->flags;
+ /* Don't forward bit that is not implemented */
+ xso->flags = xuo->flags & ~XFRM_OFFLOAD_IPV6;
err = dev->xfrmdev_ops->xdo_dev_state_add(x);
if (err) {
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 0ee13d12782f..fcba8a139f61 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -420,7 +420,7 @@ resume:
/* only the first xfrm gets the encap type */
encap_type = 0;
- if (async && x->repl->recheck(x, skb, seq)) {
+ if (x->repl->recheck(x, skb, seq)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATESEQERROR);
goto drop_unlock;
}
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface_core.c
index 6cc9f6e2dd2b..10fa26103bdf 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -219,8 +219,8 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
skb->dev = dev;
if (err) {
- dev->stats.rx_errors++;
- dev->stats.rx_dropped++;
+ DEV_STATS_INC(dev, rx_errors);
+ DEV_STATS_INC(dev, rx_dropped);
return 0;
}
@@ -260,7 +260,6 @@ static int
xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
{
struct xfrm_if *xi = netdev_priv(dev);
- struct net_device_stats *stats = &xi->dev->stats;
struct dst_entry *dst = skb_dst(skb);
unsigned int length = skb->len;
struct net_device *tdev;
@@ -286,7 +285,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
tdev = dst->dev;
if (tdev == dev) {
- stats->collisions++;
+ DEV_STATS_INC(dev, collisions);
net_warn_ratelimited("%s: Local routing loop detected!\n",
dev->name);
goto tx_err_dst_release;
@@ -300,16 +299,22 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ if (skb->len > 1280)
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ else
+ goto xmit;
} else {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
- htonl(mtu));
+ if (!(ip_hdr(skb)->frag_off & htons(IP_DF)))
+ goto xmit;
+ icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ htonl(mtu));
}
dst_release(dst);
return -EMSGSIZE;
}
+xmit:
xfrmi_scrub_packet(skb, !net_eq(xi->net, dev_net(dev)));
skb_dst_set(skb, dst);
skb->dev = tdev;
@@ -323,13 +328,13 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
tstats->tx_packets++;
u64_stats_update_end(&tstats->syncp);
} else {
- stats->tx_errors++;
- stats->tx_aborted_errors++;
+ DEV_STATS_INC(dev, tx_errors);
+ DEV_STATS_INC(dev, tx_aborted_errors);
}
return 0;
tx_err_link_failure:
- stats->tx_carrier_errors++;
+ DEV_STATS_INC(dev, tx_carrier_errors);
dst_link_failure(skb);
tx_err_dst_release:
dst_release(dst);
@@ -339,7 +344,6 @@ tx_err_dst_release:
static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct xfrm_if *xi = netdev_priv(dev);
- struct net_device_stats *stats = &xi->dev->stats;
struct dst_entry *dst = skb_dst(skb);
struct flowi fl;
int ret;
@@ -348,23 +352,23 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
switch (skb->protocol) {
case htons(ETH_P_IPV6):
- xfrm_decode_session(skb, &fl, AF_INET6);
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ xfrm_decode_session(skb, &fl, AF_INET6);
if (!dst) {
fl.u.ip6.flowi6_oif = dev->ifindex;
fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
dst = ip6_route_output(dev_net(dev), NULL, &fl.u.ip6);
if (dst->error) {
dst_release(dst);
- stats->tx_carrier_errors++;
+ DEV_STATS_INC(dev, tx_carrier_errors);
goto tx_err;
}
skb_dst_set(skb, dst);
}
break;
case htons(ETH_P_IP):
- xfrm_decode_session(skb, &fl, AF_INET);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ xfrm_decode_session(skb, &fl, AF_INET);
if (!dst) {
struct rtable *rt;
@@ -372,7 +376,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
fl.u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC;
rt = __ip_route_output_key(dev_net(dev), &fl.u.ip4);
if (IS_ERR(rt)) {
- stats->tx_carrier_errors++;
+ DEV_STATS_INC(dev, tx_carrier_errors);
goto tx_err;
}
skb_dst_set(skb, &rt->dst);
@@ -391,8 +395,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
tx_err:
- stats->tx_errors++;
- stats->tx_dropped++;
+ DEV_STATS_INC(dev, tx_errors);
+ DEV_STATS_INC(dev, tx_dropped);
kfree_skb(skb);
return NETDEV_TX_OK;
}
@@ -659,11 +663,16 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
struct netlink_ext_ack *extack)
{
struct net *net = dev_net(dev);
- struct xfrm_if_parms p;
+ struct xfrm_if_parms p = {};
struct xfrm_if *xi;
int err;
xfrmi_netlink_parms(data, &p);
+ if (!p.if_id) {
+ NL_SET_ERR_MSG(extack, "if_id must be non zero");
+ return -EINVAL;
+ }
+
xi = xfrmi_locate(net, &p);
if (xi)
return -EEXIST;
@@ -688,9 +697,14 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
{
struct xfrm_if *xi = netdev_priv(dev);
struct net *net = xi->net;
- struct xfrm_if_parms p;
+ struct xfrm_if_parms p = {};
xfrmi_netlink_parms(data, &p);
+ if (!p.if_id) {
+ NL_SET_ERR_MSG(extack, "if_id must be non zero");
+ return -EINVAL;
+ }
+
xi = xfrmi_locate(net, &p);
if (!xi) {
xi = netdev_priv(dev);
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index a00ec715aa46..32aed1d0f6ee 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -216,6 +216,7 @@ static void ipcomp_free_scratches(void)
vfree(*per_cpu_ptr(scratches, i));
free_percpu(scratches);
+ ipcomp_scratches = NULL;
}
static void * __percpu *ipcomp_alloc_scratches(void)
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 939f3adf075a..c8a7a5739425 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -654,8 +654,6 @@ EXPORT_SYMBOL(xfrm_policy_hash_rebuild);
* of an absolute inpredictability of ordering of rules. This will not pass. */
static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
{
- static u32 idx_generator;
-
for (;;) {
struct hlist_head *list;
struct xfrm_policy *p;
@@ -663,8 +661,8 @@ static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
int found;
if (!index) {
- idx = (idx_generator | dir);
- idx_generator += 8;
+ idx = (net->xfrm.idx_generator | dir);
+ net->xfrm.idx_generator += 8;
} else {
idx = index;
index = 0;
@@ -727,14 +725,10 @@ static void xfrm_policy_requeue(struct xfrm_policy *old,
spin_unlock_bh(&pq->hold_queue.lock);
}
-static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
- struct xfrm_policy *pol)
+static inline bool xfrm_policy_mark_match(const struct xfrm_mark *mark,
+ struct xfrm_policy *pol)
{
- if (policy->mark.v == pol->mark.v &&
- policy->priority == pol->priority)
- return true;
-
- return false;
+ return mark->v == pol->mark.v && mark->m == pol->mark.m;
}
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
@@ -753,7 +747,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
if (pol->type == policy->type &&
pol->if_id == policy->if_id &&
!selector_cmp(&pol->selector, &policy->selector) &&
- xfrm_policy_mark_match(policy, pol) &&
+ xfrm_policy_mark_match(&policy->mark, pol) &&
xfrm_sec_ctx_match(pol->security, policy->security) &&
!WARN_ON(delpol)) {
if (excl) {
@@ -803,11 +797,10 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
}
EXPORT_SYMBOL(xfrm_policy_insert);
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
- u8 type, int dir,
- struct xfrm_selector *sel,
- struct xfrm_sec_ctx *ctx, int delete,
- int *err)
+struct xfrm_policy *
+xfrm_policy_bysel_ctx(struct net *net, const struct xfrm_mark *mark, u32 if_id,
+ u8 type, int dir, struct xfrm_selector *sel,
+ struct xfrm_sec_ctx *ctx, int delete, int *err)
{
struct xfrm_policy *pol, *ret;
struct hlist_head *chain;
@@ -819,7 +812,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
hlist_for_each_entry(pol, chain, bydst) {
if (pol->type == type &&
pol->if_id == if_id &&
- (mark & pol->mark.m) == pol->mark.v &&
+ xfrm_policy_mark_match(mark, pol) &&
!selector_cmp(sel, &pol->selector) &&
xfrm_sec_ctx_match(ctx, pol->security)) {
xfrm_pol_hold(pol);
@@ -844,9 +837,9 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
}
EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id,
- u8 type, int dir, u32 id, int delete,
- int *err)
+struct xfrm_policy *
+xfrm_policy_byid(struct net *net, const struct xfrm_mark *mark, u32 if_id,
+ u8 type, int dir, u32 id, int delete, int *err)
{
struct xfrm_policy *pol, *ret;
struct hlist_head *chain;
@@ -861,8 +854,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id,
ret = NULL;
hlist_for_each_entry(pol, chain, byidx) {
if (pol->type == type && pol->index == id &&
- pol->if_id == if_id &&
- (mark & pol->mark.m) == pol->mark.v) {
+ pol->if_id == if_id && xfrm_policy_mark_match(mark, pol)) {
xfrm_pol_hold(pol);
if (delete) {
*err = security_xfrm_policy_delete(
@@ -1703,8 +1695,10 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
*num_xfrms = 0;
return 0;
}
- if (IS_ERR(pols[0]))
+ if (IS_ERR(pols[0])) {
+ *num_pols = 0;
return PTR_ERR(pols[0]);
+ }
*num_xfrms = pols[0]->xfrm_nr;
@@ -1719,6 +1713,7 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family,
if (pols[1]) {
if (IS_ERR(pols[1])) {
xfrm_pols_put(pols, *num_pols);
+ *num_pols = 0;
return PTR_ERR(pols[1]);
}
(*num_pols)++;
@@ -2101,8 +2096,8 @@ struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
xflo.flags = flags;
/* To accelerate a bit... */
- if ((dst_orig->flags & DST_NOXFRM) ||
- !net->xfrm.policy_count[XFRM_POLICY_OUT])
+ if (!if_id && ((dst_orig->flags & DST_NOXFRM) ||
+ !net->xfrm.policy_count[XFRM_POLICY_OUT]))
goto nopol;
xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo, if_id);
@@ -2243,7 +2238,7 @@ xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
static inline int
xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
- unsigned short family)
+ unsigned short family, u32 if_id)
{
if (xfrm_state_kern(x))
return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
@@ -2254,7 +2249,8 @@ xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
(tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
!(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
!(x->props.mode != XFRM_MODE_TRANSPORT &&
- xfrm_state_addr_cmp(tmpl, x, family));
+ xfrm_state_addr_cmp(tmpl, x, family)) &&
+ (if_id == 0 || if_id == x->if_id);
}
/*
@@ -2266,7 +2262,7 @@ xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
*/
static inline int
xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
- unsigned short family)
+ unsigned short family, u32 if_id)
{
int idx = start;
@@ -2276,7 +2272,7 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star
} else
start = -1;
for (; idx < sp->len; idx++) {
- if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
+ if (xfrm_state_ok(tmpl, sp->xvec[idx], family, if_id))
return ++idx;
if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
if (start == -1)
@@ -2406,6 +2402,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
if (pols[1]) {
if (IS_ERR(pols[1])) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
+ xfrm_pol_put(pols[0]);
return 0;
}
pols[1]->curlft.use_time = ktime_get_real_seconds();
@@ -2452,7 +2449,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
* are implied between each two transformations.
*/
for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
- k = xfrm_policy_ok(tpp[i], sp, k, family);
+ k = xfrm_policy_ok(tpp[i], sp, k, family, if_id);
if (k < 0) {
if (k < -1)
/* "-2 - errored_index" returned */
@@ -3050,7 +3047,7 @@ static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
}
static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
- u8 dir, u8 type, struct net *net)
+ u8 dir, u8 type, struct net *net, u32 if_id)
{
struct xfrm_policy *pol, *ret = NULL;
struct hlist_head *chain;
@@ -3059,7 +3056,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
spin_lock_bh(&net->xfrm.xfrm_policy_lock);
chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
hlist_for_each_entry(pol, chain, bydst) {
- if (xfrm_migrate_selector_match(sel, &pol->selector) &&
+ if ((if_id == 0 || pol->if_id == if_id) &&
+ xfrm_migrate_selector_match(sel, &pol->selector) &&
pol->type == type) {
ret = pol;
priority = ret->priority;
@@ -3071,7 +3069,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *
if ((pol->priority >= priority) && ret)
break;
- if (xfrm_migrate_selector_match(sel, &pol->selector) &&
+ if ((if_id == 0 || pol->if_id == if_id) &&
+ xfrm_migrate_selector_match(sel, &pol->selector) &&
pol->type == type) {
ret = pol;
break;
@@ -3187,7 +3186,7 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
struct xfrm_migrate *m, int num_migrate,
struct xfrm_kmaddress *k, struct net *net,
- struct xfrm_encap_tmpl *encap)
+ struct xfrm_encap_tmpl *encap, u32 if_id)
{
int i, err, nx_cur = 0, nx_new = 0;
struct xfrm_policy *pol = NULL;
@@ -3206,14 +3205,14 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
}
/* Stage 1 - find policy */
- if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
+ if ((pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id)) == NULL) {
err = -ENOENT;
goto out;
}
/* Stage 2 - find and update state(s) */
for (i = 0, mp = m; i < num_migrate; i++, mp++) {
- if ((x = xfrm_migrate_state_find(mp, net))) {
+ if ((x = xfrm_migrate_state_find(mp, net, if_id))) {
x_cur[nx_cur] = x;
nx_cur++;
xc = xfrm_state_migrate(x, mp, encap);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index a649d7c2f48c..cf147e1837a9 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -41,7 +41,6 @@ static void xfrm_state_gc_task(struct work_struct *work);
*/
static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
-static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation);
static struct kmem_cache *xfrm_state_cache __ro_after_init;
static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
@@ -137,7 +136,7 @@ static void xfrm_hash_resize(struct work_struct *work)
}
spin_lock_bh(&net->xfrm.xfrm_state_lock);
- write_seqcount_begin(&xfrm_state_hash_generation);
+ write_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
@@ -153,7 +152,7 @@ static void xfrm_hash_resize(struct work_struct *work)
rcu_assign_pointer(net->xfrm.state_byspi, nspi);
net->xfrm.state_hmask = nhashmask;
- write_seqcount_end(&xfrm_state_hash_generation);
+ write_seqcount_end(&net->xfrm.xfrm_state_hash_generation);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
osize = (ohashmask + 1) * sizeof(struct hlist_head);
@@ -965,7 +964,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
to_put = NULL;
- sequence = read_seqcount_begin(&xfrm_state_hash_generation);
+ sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
rcu_read_lock();
h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
@@ -1076,7 +1075,7 @@ out:
if (to_put)
xfrm_state_put(to_put);
- if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) {
+ if (read_seqcount_retry(&net->xfrm.xfrm_state_hash_generation, sequence)) {
*err = -EAGAIN;
if (x) {
xfrm_state_put(x);
@@ -1443,9 +1442,6 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
memcpy(&x->mark, &orig->mark, sizeof(x->mark));
memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark));
- if (xfrm_init_state(x) < 0)
- goto error;
-
x->props.flags = orig->props.flags;
x->props.extra_flags = orig->props.extra_flags;
@@ -1467,7 +1463,8 @@ out:
return NULL;
}
-struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net)
+struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net,
+ u32 if_id)
{
unsigned int h;
struct xfrm_state *x = NULL;
@@ -1483,6 +1480,8 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
continue;
if (m->reqid && x->props.reqid != m->reqid)
continue;
+ if (if_id != 0 && x->if_id != if_id)
+ continue;
if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
m->old_family) ||
!xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
@@ -1498,6 +1497,8 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
if (x->props.mode != m->mode ||
x->id.proto != m->proto)
continue;
+ if (if_id != 0 && x->if_id != if_id)
+ continue;
if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
m->old_family) ||
!xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
@@ -1524,6 +1525,11 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
if (!xc)
return NULL;
+ xc->props.family = m->new_family;
+
+ if (xfrm_init_state(xc) < 0)
+ goto error;
+
memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
@@ -1825,6 +1831,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
int err = -ENOENT;
__be32 minspi = htonl(low);
__be32 maxspi = htonl(high);
+ __be32 newspi = 0;
u32 mark = x->mark.v & x->mark.m;
spin_lock_bh(&x->lock);
@@ -1843,21 +1850,22 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
xfrm_state_put(x0);
goto unlock;
}
- x->id.spi = minspi;
+ newspi = minspi;
} else {
u32 spi = 0;
for (h = 0; h < high-low+1; h++) {
spi = low + prandom_u32()%(high-low+1);
x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
if (x0 == NULL) {
- x->id.spi = htonl(spi);
+ newspi = htonl(spi);
break;
}
xfrm_state_put(x0);
}
}
- if (x->id.spi) {
+ if (newspi) {
spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ x->id.spi = newspi;
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
@@ -2404,6 +2412,7 @@ int __net_init xfrm_state_init(struct net *net)
net->xfrm.state_num = 0;
INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
spin_lock_init(&net->xfrm.xfrm_state_lock);
+ seqcount_init(&net->xfrm.xfrm_state_hash_generation);
return 0;
out_byspi:
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 0b80c7907715..699e544b4bfd 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -521,7 +521,7 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs,
struct nlattr *et = attrs[XFRMA_ETIMER_THRESH];
struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH];
- if (re) {
+ if (re && x->replay_esn && x->preplay_esn) {
struct xfrm_replay_state_esn *replay_esn;
replay_esn = nla_data(re);
memcpy(x->replay_esn, replay_esn,
@@ -579,6 +579,20 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
copy_from_user_state(x, p);
+ if (attrs[XFRMA_ENCAP]) {
+ x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]),
+ sizeof(*x->encap), GFP_KERNEL);
+ if (x->encap == NULL)
+ goto error;
+ }
+
+ if (attrs[XFRMA_COADDR]) {
+ x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]),
+ sizeof(*x->coaddr), GFP_KERNEL);
+ if (x->coaddr == NULL)
+ goto error;
+ }
+
if (attrs[XFRMA_SA_EXTRA_FLAGS])
x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]);
@@ -599,23 +613,9 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
attrs[XFRMA_ALG_COMP])))
goto error;
- if (attrs[XFRMA_ENCAP]) {
- x->encap = kmemdup(nla_data(attrs[XFRMA_ENCAP]),
- sizeof(*x->encap), GFP_KERNEL);
- if (x->encap == NULL)
- goto error;
- }
-
if (attrs[XFRMA_TFCPAD])
x->tfcpad = nla_get_u32(attrs[XFRMA_TFCPAD]);
- if (attrs[XFRMA_COADDR]) {
- x->coaddr = kmemdup(nla_data(attrs[XFRMA_COADDR]),
- sizeof(*x->coaddr), GFP_KERNEL);
- if (x->coaddr == NULL)
- goto error;
- }
-
xfrm_mark_get(attrs, &x->mark);
xfrm_smark_init(attrs, &x->props.smark);
@@ -1036,6 +1036,15 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
sizeof(*filter), GFP_KERNEL);
if (filter == NULL)
return -ENOMEM;
+
+ /* see addr_match(), (prefix length >> 5) << 2
+ * will be used to compare xfrm_address_t
+ */
+ if (filter->splen > (sizeof(xfrm_address_t) << 3) ||
+ filter->dplen > (sizeof(xfrm_address_t) << 3)) {
+ kfree(filter);
+ return -EINVAL;
+ }
}
if (attrs[XFRMA_PROTO])
@@ -1862,7 +1871,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
struct km_event c;
int delete;
struct xfrm_mark m;
- u32 mark = xfrm_mark_get(attrs, &m);
u32 if_id = 0;
p = nlmsg_data(nlh);
@@ -1879,8 +1887,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
if (attrs[XFRMA_IF_ID])
if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+ xfrm_mark_get(attrs, &m);
+
if (p->index)
- xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, delete, &err);
+ xp = xfrm_policy_byid(net, &m, if_id, type, p->dir,
+ p->index, delete, &err);
else {
struct nlattr *rt = attrs[XFRMA_SEC_CTX];
struct xfrm_sec_ctx *ctx;
@@ -1897,8 +1908,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
return err;
}
- xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir, &p->sel,
- ctx, delete, &err);
+ xp = xfrm_policy_bysel_ctx(net, &m, if_id, type, p->dir,
+ &p->sel, ctx, delete, &err);
security_xfrm_policy_free(ctx);
}
if (xp == NULL)
@@ -2165,7 +2176,6 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
u8 type = XFRM_POLICY_TYPE_MAIN;
int err = -ENOENT;
struct xfrm_mark m;
- u32 mark = xfrm_mark_get(attrs, &m);
u32 if_id = 0;
err = copy_from_user_policy_type(&type, attrs);
@@ -2179,8 +2189,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
if (attrs[XFRMA_IF_ID])
if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+ xfrm_mark_get(attrs, &m);
+
if (p->index)
- xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, 0, &err);
+ xp = xfrm_policy_byid(net, &m, if_id, type, p->dir, p->index,
+ 0, &err);
else {
struct nlattr *rt = attrs[XFRMA_SEC_CTX];
struct xfrm_sec_ctx *ctx;
@@ -2197,7 +2210,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err)
return err;
}
- xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir,
+ xp = xfrm_policy_bysel_ctx(net, &m, if_id, type, p->dir,
&p->sel, ctx, 0, &err);
security_xfrm_policy_free(ctx);
}
@@ -2369,6 +2382,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
int n = 0;
struct net *net = sock_net(skb->sk);
struct xfrm_encap_tmpl *encap = NULL;
+ u32 if_id = 0;
if (attrs[XFRMA_MIGRATE] == NULL)
return -EINVAL;
@@ -2393,7 +2407,10 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh,
return 0;
}
- err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap);
+ if (attrs[XFRMA_IF_ID])
+ if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+
+ err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap, if_id);
kfree(encap);
@@ -2565,7 +2582,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) },
[XFRMA_ENCAP] = { .len = sizeof(struct xfrm_encap_tmpl) },
[XFRMA_TMPL] = { .len = sizeof(struct xfrm_user_tmpl) },
- [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_sec_ctx) },
+ [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_user_sec_ctx) },
[XFRMA_LTIME_VAL] = { .len = sizeof(struct xfrm_lifetime_cur) },
[XFRMA_REPLAY_VAL] = { .len = sizeof(struct xfrm_replay_state) },
[XFRMA_REPLAY_THRESH] = { .type = NLA_U32 },
@@ -2813,7 +2830,7 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
if (x->props.extra_flags)
l += nla_total_size(sizeof(x->props.extra_flags));
if (x->xso.dev)
- l += nla_total_size(sizeof(x->xso));
+ l += nla_total_size(sizeof(struct xfrm_user_offload));
if (x->props.smark.v | x->props.smark.m) {
l += nla_total_size(sizeof(x->props.smark.v));
l += nla_total_size(sizeof(x->props.smark.m));