aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/core.c1
-rw-r--r--net/8021q/vlan_dev.c15
-rw-r--r--net/8021q/vlan_netlink.c4
-rw-r--r--net/Kconfig2
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/ddp.c16
-rw-r--r--net/atm/common.c1
-rw-r--r--net/atm/lec.c1
-rw-r--r--net/atm/mpc.c1
-rw-r--r--net/batman-adv/Makefile1
-rw-r--r--net/batman-adv/bridge_loop_avoidance.c2
-rw-r--r--net/batman-adv/fragmentation.c8
-rw-r--r--net/batman-adv/gateway_client.c2
-rw-r--r--net/batman-adv/main.c5
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/multicast.c132
-rw-r--r--net/batman-adv/multicast.h30
-rw-r--r--net/batman-adv/multicast_forw.c1178
-rw-r--r--net/batman-adv/netlink.c2
-rw-r--r--net/batman-adv/originator.c28
-rw-r--r--net/batman-adv/originator.h3
-rw-r--r--net/batman-adv/routing.c70
-rw-r--r--net/batman-adv/routing.h11
-rw-r--r--net/batman-adv/soft-interface.c18
-rw-r--r--net/batman-adv/types.h70
-rw-r--r--net/bluetooth/hci_conn.c51
-rw-r--r--net/bluetooth/hci_core.c7
-rw-r--r--net/bluetooth/hci_debugfs.c12
-rw-r--r--net/bluetooth/hci_event.c24
-rw-r--r--net/bluetooth/hci_sync.c113
-rw-r--r--net/bluetooth/iso.c197
-rw-r--r--net/bluetooth/l2cap_core.c11
-rw-r--r--net/bluetooth/lib.c69
-rw-r--r--net/bluetooth/mgmt.c21
-rw-r--r--net/bluetooth/rfcomm/core.c2
-rw-r--r--net/bluetooth/rfcomm/tty.c2
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c38
-rw-r--r--net/bpf/test_run.c17
-rw-r--r--net/bpfilter/.gitignore2
-rw-r--r--net/bpfilter/Kconfig23
-rw-r--r--net/bpfilter/Makefile20
-rw-r--r--net/bpfilter/bpfilter_kern.c136
-rw-r--r--net/bpfilter/bpfilter_umh_blob.S7
-rw-r--r--net/bpfilter/main.c64
-rw-r--r--net/bpfilter/msgfmt.h17
-rw-r--r--net/bridge/br_cfm_netlink.c2
-rw-r--r--net/bridge/br_device.c1
-rw-r--r--net/bridge/br_mdb.c133
-rw-r--r--net/bridge/br_multicast.c20
-rw-r--r--net/bridge/br_netfilter_hooks.c138
-rw-r--r--net/bridge/br_netfilter_ipv6.c14
-rw-r--r--net/bridge/br_private.h14
-rw-r--r--net/bridge/br_switchdev.c84
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c30
-rw-r--r--net/caif/caif_dev.c1
-rw-r--r--net/caif/caif_socket.c1
-rw-r--r--net/caif/caif_usb.c1
-rw-r--r--net/caif/chnl_net.c1
-rw-r--r--net/can/j1939/j1939-priv.h3
-rw-r--r--net/can/j1939/main.c2
-rw-r--r--net/can/j1939/socket.c46
-rw-r--r--net/ceph/messenger_v1.c33
-rw-r--r--net/ceph/messenger_v2.c7
-rw-r--r--net/ceph/osd_client.c44
-rw-r--r--net/compat.c2
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/bpf_sk_storage.c3
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c155
-rw-r--r--net/core/dev.h6
-rw-r--r--net/core/dev_ioctl.c7
-rw-r--r--net/core/drop_monitor.c2
-rw-r--r--net/core/fib_rules.c4
-rw-r--r--net/core/filter.c73
-rw-r--r--net/core/link_watch.c8
-rw-r--r--net/core/net-sysfs.c17
-rw-r--r--net/core/net_namespace.c49
-rw-r--r--net/core/netdev-genl-gen.c110
-rw-r--r--net/core/netdev-genl-gen.h16
-rw-r--r--net/core/netdev-genl.c344
-rw-r--r--net/core/page_pool.c117
-rw-r--r--net/core/page_pool_priv.h12
-rw-r--r--net/core/page_pool_user.c411
-rw-r--r--net/core/pktgen.c6
-rw-r--r--net/core/request_sock.c3
-rw-r--r--net/core/rtnetlink.c128
-rw-r--r--net/core/scm.c4
-rw-r--r--net/core/skbuff.c94
-rw-r--r--net/core/skmsg.c7
-rw-r--r--net/core/sock.c42
-rw-r--r--net/core/sysctl_net_core.c15
-rw-r--r--net/core/xdp.c33
-rw-r--r--net/dccp/ipv6.c4
-rw-r--r--net/devlink/core.c28
-rw-r--r--net/devlink/dev.c37
-rw-r--r--net/devlink/devl_internal.h80
-rw-r--r--net/devlink/health.c13
-rw-r--r--net/devlink/linecard.c5
-rw-r--r--net/devlink/netlink.c161
-rw-r--r--net/devlink/netlink_gen.c20
-rw-r--r--net/devlink/netlink_gen.h9
-rw-r--r--net/devlink/param.c5
-rw-r--r--net/devlink/port.c12
-rw-r--r--net/devlink/rate.c5
-rw-r--r--net/devlink/region.c9
-rw-r--r--net/devlink/trap.c18
-rw-r--r--net/dns_resolver/Kconfig2
-rw-r--r--net/dns_resolver/dns_key.c2
-rw-r--r--net/dsa/tag_ar9331.c1
-rw-r--r--net/dsa/tag_brcm.c1
-rw-r--r--net/dsa/tag_dsa.c1
-rw-r--r--net/dsa/tag_gswip.c1
-rw-r--r--net/dsa/tag_hellcreek.c1
-rw-r--r--net/dsa/tag_ksz.c1
-rw-r--r--net/dsa/tag_lan9303.c1
-rw-r--r--net/dsa/tag_mtk.c1
-rw-r--r--net/dsa/tag_none.c1
-rw-r--r--net/dsa/tag_ocelot.c1
-rw-r--r--net/dsa/tag_ocelot_8021q.c1
-rw-r--r--net/dsa/tag_qca.c1
-rw-r--r--net/dsa/tag_rtl4_a.c6
-rw-r--r--net/dsa/tag_rtl8_4.c1
-rw-r--r--net/dsa/tag_rzn1_a5psw.c1
-rw-r--r--net/dsa/tag_sja1105.c1
-rw-r--r--net/dsa/tag_trailer.c1
-rw-r--r--net/dsa/tag_xrs700x.c1
-rw-r--r--net/dsa/user.c36
-rw-r--r--net/ethtool/common.c18
-rw-r--r--net/ethtool/features.c9
-rw-r--r--net/ethtool/ioctl.c198
-rw-r--r--net/ethtool/rings.c12
-rw-r--r--net/ethtool/rss.c24
-rw-r--r--net/handshake/handshake-test.c5
-rw-r--r--net/hsr/hsr_device.c71
-rw-r--r--net/hsr/hsr_forward.c4
-rw-r--r--net/hsr/hsr_main.c1
-rw-r--r--net/ieee802154/Makefile2
-rw-r--r--net/ieee802154/core.c24
-rw-r--r--net/ieee802154/nl802154.c249
-rw-r--r--net/ieee802154/pan.c109
-rw-r--r--net/ieee802154/rdev-ops.h30
-rw-r--r--net/ieee802154/trace.h38
-rw-r--r--net/ipv4/Makefile2
-rw-r--r--net/ipv4/af_inet.c14
-rw-r--r--net/ipv4/ah4.c1
-rw-r--r--net/ipv4/arp.c3
-rw-r--r--net/ipv4/bpf_tcp_ca.c69
-rw-r--r--net/ipv4/bpfilter/Makefile2
-rw-r--r--net/ipv4/bpfilter/sockopt.c71
-rw-r--r--net/ipv4/devinet.c21
-rw-r--r--net/ipv4/esp4.c1
-rw-r--r--net/ipv4/fib_rules.c6
-rw-r--r--net/ipv4/fib_trie.c1
-rw-r--r--net/ipv4/inet_connection_sock.c125
-rw-r--r--net/ipv4/inet_diag.c86
-rw-r--r--net/ipv4/inet_hashtables.c150
-rw-r--r--net/ipv4/inet_timewait_sock.c21
-rw-r--r--net/ipv4/ip_gre.c1
-rw-r--r--net/ipv4/ip_output.c25
-rw-r--r--net/ipv4/ip_sockglue.c57
-rw-r--r--net/ipv4/ip_tunnel.c29
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/ip_vti.c1
-rw-r--r--net/ipv4/ipip.c1
-rw-r--r--net/ipv4/ipmr.c15
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c9
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/syncookies.c215
-rw-r--r--net/ipv4/sysctl_net_ipv4.c18
-rw-r--r--net/ipv4/tcp.c118
-rw-r--r--net/ipv4/tcp_ao.c16
-rw-r--r--net/ipv4/tcp_input.c29
-rw-r--r--net/ipv4/tcp_timer.c4
-rw-r--r--net/ipv4/tunnel4.c1
-rw-r--r--net/ipv4/udp.c43
-rw-r--r--net/ipv4/udp_tunnel_core.c1
-rw-r--r--net/ipv4/xfrm4_tunnel.c1
-rw-r--r--net/ipv6/addrconf.c28
-rw-r--r--net/ipv6/addrconf_core.c21
-rw-r--r--net/ipv6/af_inet6.c3
-rw-r--r--net/ipv6/ah6.c1
-rw-r--r--net/ipv6/datagram.c6
-rw-r--r--net/ipv6/esp6.c1
-rw-r--r--net/ipv6/exthdrs.c10
-rw-r--r--net/ipv6/exthdrs_offload.c11
-rw-r--r--net/ipv6/fib6_rules.c4
-rw-r--r--net/ipv6/icmp.c8
-rw-r--r--net/ipv6/ip6_offload.c76
-rw-r--r--net/ipv6/ip6_output.c13
-rw-r--r--net/ipv6/ip6_tunnel.c47
-rw-r--r--net/ipv6/ip6_udp_tunnel.c1
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c136
-rw-r--r--net/ipv6/mcast.c4
-rw-r--r--net/ipv6/mip6.c1
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c11
-rw-r--r--net/ipv6/ping.c8
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/ipv6/route.c21
-rw-r--r--net/ipv6/seg6.c20
-rw-r--r--net/ipv6/sit.c1
-rw-r--r--net/ipv6/syncookies.c108
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/ipv6/tunnel6.c1
-rw-r--r--net/ipv6/udp.c20
-rw-r--r--net/ipv6/xfrm6_tunnel.c1
-rw-r--r--net/iucv/iucv.c6
-rw-r--r--net/kcm/kcmsock.c2
-rw-r--r--net/key/af_key.c1
-rw-r--r--net/l2tp/l2tp_ip6.c6
-rw-r--r--net/llc/af_llc.c26
-rw-r--r--net/llc/llc_core.c7
-rw-r--r--net/mac80211/Kconfig1
-rw-r--r--net/mac80211/Makefile2
-rw-r--r--net/mac80211/cfg.c20
-rw-r--r--net/mac80211/chan.c13
-rw-r--r--net/mac80211/debugfs.c1
-rw-r--r--net/mac80211/debugfs_netdev.c4
-rw-r--r--net/mac80211/debugfs_netdev.h5
-rw-r--r--net/mac80211/debugfs_sta.c2
-rw-r--r--net/mac80211/driver-ops.h22
-rw-r--r--net/mac80211/ibss.c2
-rw-r--r--net/mac80211/ieee80211_i.h36
-rw-r--r--net/mac80211/iface.c2
-rw-r--r--net/mac80211/link.c3
-rw-r--r--net/mac80211/main.c2
-rw-r--r--net/mac80211/mesh_hwmp.c2
-rw-r--r--net/mac80211/mesh_pathtbl.c8
-rw-r--r--net/mac80211/mlme.c229
-rw-r--r--net/mac80211/rate.c3
-rw-r--r--net/mac80211/rx.c21
-rw-r--r--net/mac80211/scan.c82
-rw-r--r--net/mac80211/sta_info.c15
-rw-r--r--net/mac80211/sta_info.h2
-rw-r--r--net/mac80211/tdls.c18
-rw-r--r--net/mac80211/tests/Makefile2
-rw-r--r--net/mac80211/tests/mfp.c286
-rw-r--r--net/mac80211/trace.h25
-rw-r--r--net/mac80211/tx.c21
-rw-r--r--net/mac80211/util.c16
-rw-r--r--net/mac80211/wbrf.c93
-rw-r--r--net/mac802154/cfg.c175
-rw-r--r--net/mac802154/ieee802154_i.h27
-rw-r--r--net/mac802154/main.c2
-rw-r--r--net/mac802154/rx.c36
-rw-r--r--net/mac802154/scan.c407
-rw-r--r--net/mctp/route.c12
-rw-r--r--net/mptcp/diag.c11
-rw-r--r--net/mptcp/fastopen.c6
-rw-r--r--net/mptcp/mib.c1
-rw-r--r--net/mptcp/mib.h8
-rw-r--r--net/mptcp/mptcp_pm_gen.c2
-rw-r--r--net/mptcp/mptcp_pm_gen.h2
-rw-r--r--net/mptcp/options.c17
-rw-r--r--net/mptcp/pm_netlink.c76
-rw-r--r--net/mptcp/pm_userspace.c46
-rw-r--r--net/mptcp/protocol.c252
-rw-r--r--net/mptcp/protocol.h62
-rw-r--r--net/mptcp/sockopt.c29
-rw-r--r--net/mptcp/subflow.c105
-rw-r--r--net/ncsi/internal.h7
-rw-r--r--net/ncsi/ncsi-cmd.c3
-rw-r--r--net/ncsi/ncsi-manage.c29
-rw-r--r--net/ncsi/ncsi-netlink.c4
-rw-r--r--net/ncsi/ncsi-pkt.h17
-rw-r--r--net/ncsi/ncsi-rsp.c67
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h16
-rw-r--r--net/netfilter/ipset/ip_set_core.c39
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h20
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c8
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c13
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c4
-rw-r--r--net/netfilter/nf_conntrack_core.c1
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c4
-rw-r--r--net/netfilter/nf_conntrack_netlink.c18
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c10
-rw-r--r--net/netfilter/nf_flow_table_core.c17
-rw-r--r--net/netfilter/nf_log.c7
-rw-r--r--net/netfilter/nf_log_syslog.c13
-rw-r--r--net/netfilter/nf_nat_core.c5
-rw-r--r--net/netfilter/nf_queue.c6
-rw-r--r--net/netfilter/nf_synproxy_core.c4
-rw-r--r--net/netfilter/nf_tables_api.c317
-rw-r--r--net/netfilter/nfnetlink_log.c8
-rw-r--r--net/netfilter/nfnetlink_queue.c13
-rw-r--r--net/netfilter/nft_chain_filter.c11
-rw-r--r--net/netfilter/nft_compat.c49
-rw-r--r--net/netfilter/nft_ct.c26
-rw-r--r--net/netfilter/nft_flow_offload.c6
-rw-r--r--net/netfilter/nft_limit.c42
-rw-r--r--net/netfilter/nft_nat.c5
-rw-r--r--net/netfilter/nft_rt.c5
-rw-r--r--net/netfilter/nft_set_hash.c8
-rw-r--r--net/netfilter/nft_set_pipapo.c130
-rw-r--r--net/netfilter/nft_set_pipapo.h22
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c19
-rw-r--r--net/netfilter/nft_set_rbtree.c17
-rw-r--r--net/netfilter/nft_socket.c5
-rw-r--r--net/netfilter/nft_synproxy.c7
-rw-r--r--net/netfilter/nft_tproxy.c5
-rw-r--r--net/netfilter/nft_tunnel.c1
-rw-r--r--net/netfilter/nft_xfrm.c5
-rw-r--r--net/netfilter/xt_physdev.c2
-rw-r--r--net/netlabel/netlabel_calipso.c49
-rw-r--r--net/netlink/af_netlink.c10
-rw-r--r--net/netlink/genetlink.c148
-rw-r--r--net/netrom/af_netrom.c14
-rw-r--r--net/netrom/nr_dev.c2
-rw-r--r--net/netrom/nr_in.c6
-rw-r--r--net/netrom/nr_out.c2
-rw-r--r--net/netrom/nr_route.c8
-rw-r--r--net/netrom/nr_subr.c5
-rw-r--r--net/nfc/digital_core.c1
-rw-r--r--net/nfc/nci/core.c5
-rw-r--r--net/nfc/nci/spi.c1
-rw-r--r--net/openvswitch/flow_netlink.c49
-rw-r--r--net/packet/af_packet.c21
-rw-r--r--net/phonet/datagram.c4
-rw-r--r--net/phonet/pep.c41
-rw-r--r--net/psample/psample.c2
-rw-r--r--net/rds/af_rds.c2
-rw-r--r--net/rds/rdma.c3
-rw-r--r--net/rds/recv.c13
-rw-r--r--net/rds/send.c6
-rw-r--r--net/rds/tcp_listen.c2
-rw-r--r--net/rfkill/core.c4
-rw-r--r--net/rxrpc/af_rxrpc.c62
-rw-r--r--net/rxrpc/ar-internal.h44
-rw-r--r--net/rxrpc/call_event.c12
-rw-r--r--net/rxrpc/call_object.c22
-rw-r--r--net/rxrpc/conn_client.c10
-rw-r--r--net/rxrpc/conn_event.c10
-rw-r--r--net/rxrpc/conn_service.c3
-rw-r--r--net/rxrpc/input.c115
-rw-r--r--net/rxrpc/local_object.c13
-rw-r--r--net/rxrpc/net_ns.c4
-rw-r--r--net/rxrpc/output.c14
-rw-r--r--net/rxrpc/peer_object.c58
-rw-r--r--net/rxrpc/proc.c78
-rw-r--r--net/rxrpc/rxkad.c6
-rw-r--r--net/rxrpc/sendmsg.c11
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_api.c251
-rw-r--r--net/sched/act_bpf.c2
-rw-r--r--net/sched/act_connmark.c2
-rw-r--r--net/sched/act_csum.c4
-rw-r--r--net/sched/act_ct.c14
-rw-r--r--net/sched/act_ctinfo.c2
-rw-r--r--net/sched/act_gact.c2
-rw-r--r--net/sched/act_gate.c2
-rw-r--r--net/sched/act_ife.c2
-rw-r--r--net/sched/act_ipt.c464
-rw-r--r--net/sched/act_mirred.c282
-rw-r--r--net/sched/act_mpls.c2
-rw-r--r--net/sched/act_nat.c2
-rw-r--r--net/sched/act_pedit.c2
-rw-r--r--net/sched/act_police.c2
-rw-r--r--net/sched/act_sample.c2
-rw-r--r--net/sched/act_simple.c2
-rw-r--r--net/sched/act_skbedit.c2
-rw-r--r--net/sched/act_skbmod.c2
-rw-r--r--net/sched/act_tunnel_key.c2
-rw-r--r--net/sched/act_vlan.c2
-rw-r--r--net/sched/cls_api.c113
-rw-r--r--net/sched/cls_flower.c28
-rw-r--r--net/sched/cls_u32.c36
-rw-r--r--net/sched/em_canid.c1
-rw-r--r--net/sched/em_cmp.c1
-rw-r--r--net/sched/em_meta.c1
-rw-r--r--net/sched/em_nbyte.c1
-rw-r--r--net/sched/em_text.c1
-rw-r--r--net/sched/em_u32.c1
-rw-r--r--net/sched/sch_api.c79
-rw-r--r--net/sched/sch_cbs.c4
-rw-r--r--net/sched/sch_generic.c9
-rw-r--r--net/sctp/inqueue.c14
-rw-r--r--net/sctp/socket.c13
-rw-r--r--net/smc/af_smc.c121
-rw-r--r--net/smc/smc.h11
-rw-r--r--net/smc/smc_clc.c333
-rw-r--r--net/smc/smc_clc.h67
-rw-r--r--net/smc/smc_core.c43
-rw-r--r--net/smc/smc_core.h18
-rw-r--r--net/smc/smc_diag.c11
-rw-r--r--net/smc/smc_ib.c2
-rw-r--r--net/smc/smc_ism.c50
-rw-r--r--net/smc/smc_ism.h30
-rw-r--r--net/smc/smc_pnet.c4
-rw-r--r--net/smc/smc_sysctl.c24
-rw-r--r--net/smc/smc_sysctl.h2
-rw-r--r--net/smc/smc_tx.c30
-rw-r--r--net/socket.c6
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c1
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c1
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c16
-rw-r--r--net/sunrpc/clnt.c61
-rw-r--r--net/sunrpc/sunrpc_syms.c1
-rw-r--r--net/sunrpc/svc.c26
-rw-r--r--net/sunrpc/svc_xprt.c32
-rw-r--r--net/sunrpc/svcauth.c16
-rw-r--r--net/sunrpc/svcsock.c18
-rw-r--r--net/sunrpc/xprt.c31
-rw-r--r--net/sunrpc/xprtmultipath.c19
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c32
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c11
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c211
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c450
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c96
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c36
-rw-r--r--net/sunrpc/xprtrdma/verbs.c2
-rw-r--r--net/switchdev/switchdev.c73
-rw-r--r--net/tipc/bearer.c6
-rw-r--r--net/tipc/link.c15
-rw-r--r--net/tipc/netlink_compat.c2
-rw-r--r--net/tls/tls_main.c2
-rw-r--r--net/tls/tls_sw.c199
-rw-r--r--net/unix/af_unix.c33
-rw-r--r--net/unix/diag.c2
-rw-r--r--net/unix/garbage.c10
-rw-r--r--net/unix/scm.c4
-rw-r--r--net/unix/unix_bpf.c21
-rw-r--r--net/vmw_vsock/af_vsock.c9
-rw-r--r--net/vmw_vsock/hyperv_transport.c4
-rw-r--r--net/vmw_vsock/virtio_transport.c7
-rw-r--r--net/vmw_vsock/virtio_transport_common.c43
-rw-r--r--net/vmw_vsock/vsock_loopback.c1
-rw-r--r--net/wireless/Kconfig1
-rw-r--r--net/wireless/Makefile4
-rw-r--r--net/wireless/chan.c97
-rw-r--r--net/wireless/core.c3
-rw-r--r--net/wireless/core.h16
-rw-r--r--net/wireless/mlme.c2
-rw-r--r--net/wireless/nl80211.c317
-rw-r--r--net/wireless/nl80211.h2
-rw-r--r--net/wireless/rdev-ops.h26
-rw-r--r--net/wireless/reg.c8
-rw-r--r--net/wireless/reg.h5
-rw-r--r--net/wireless/scan.c302
-rw-r--r--net/wireless/sme.c2
-rw-r--r--net/wireless/tests/Makefile2
-rw-r--r--net/wireless/tests/scan.c625
-rw-r--r--net/wireless/tests/util.c56
-rw-r--r--net/wireless/tests/util.h66
-rw-r--r--net/wireless/trace.h22
-rw-r--r--net/wireless/util.c56
-rw-r--r--net/x25/af_x25.c14
-rw-r--r--net/x25/x25_facilities.c14
-rw-r--r--net/x25/x25_out.c2
-rw-r--r--net/xdp/xdp_umem.c11
-rw-r--r--net/xdp/xsk.c71
-rw-r--r--net/xdp/xsk_buff_pool.c15
-rw-r--r--net/xdp/xsk_queue.h19
-rw-r--r--net/xfrm/Makefile1
-rw-r--r--net/xfrm/xfrm_algo.c1
-rw-r--r--net/xfrm/xfrm_device.c2
-rw-r--r--net/xfrm/xfrm_output.c6
-rw-r--r--net/xfrm/xfrm_policy.c8
-rw-r--r--net/xfrm/xfrm_state_bpf.c134
-rw-r--r--net/xfrm/xfrm_user.c4
463 files changed, 12886 insertions, 4442 deletions
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index 7b3341cef926..850d4a185f55 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -179,4 +179,5 @@ static void __exit lowpan_module_exit(void)
module_init(lowpan_module_init);
module_exit(lowpan_module_exit);
+MODULE_DESCRIPTION("IPv6 over Low-Power Wireless Personal Area Network core module");
MODULE_LICENSE("GPL");
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 2a7f1b15714a..407b2335f091 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -702,20 +702,7 @@ static int vlan_ethtool_get_ts_info(struct net_device *dev,
struct ethtool_ts_info *info)
{
const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
- const struct ethtool_ops *ops = vlan->real_dev->ethtool_ops;
- struct phy_device *phydev = vlan->real_dev->phydev;
-
- if (phy_has_tsinfo(phydev)) {
- return phy_ts_info(phydev, info);
- } else if (ops->get_ts_info) {
- return ops->get_ts_info(vlan->real_dev, info);
- } else {
- info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
- SOF_TIMESTAMPING_SOFTWARE;
- info->phc_index = -1;
- }
-
- return 0;
+ return ethtool_get_ts_info_by_layer(vlan->real_dev, info);
}
static void vlan_dev_get_stats64(struct net_device *dev,
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 214532173536..a3b68243fd4b 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -118,12 +118,16 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[],
}
if (data[IFLA_VLAN_INGRESS_QOS]) {
nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) {
+ if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING)
+ continue;
m = nla_data(attr);
vlan_dev_set_ingress_priority(dev, m->to, m->from);
}
}
if (data[IFLA_VLAN_EGRESS_QOS]) {
nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) {
+ if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING)
+ continue;
m = nla_data(attr);
err = vlan_dev_set_egress_priority(dev, m->from, m->to);
if (err)
diff --git a/net/Kconfig b/net/Kconfig
index 3ec6bc98fa05..4adc47d0c9c2 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -233,8 +233,6 @@ source "net/bridge/netfilter/Kconfig"
endif
-source "net/bpfilter/Kconfig"
-
source "net/dccp/Kconfig"
source "net/sctp/Kconfig"
source "net/rds/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index 4c4dc535453d..b06b5539e7a6 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -19,7 +19,6 @@ obj-$(CONFIG_TLS) += tls/
obj-$(CONFIG_XFRM) += xfrm/
obj-$(CONFIG_UNIX_SCM) += unix/
obj-y += ipv6/
-obj-$(CONFIG_BPFILTER) += bpfilter/
obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/
obj-$(CONFIG_BRIDGE) += bridge/
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index a852ec093fa8..198f5ba2feae 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1581,7 +1581,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
}
/* Build a packet */
- SOCK_DEBUG(sk, "SK %p: Got address.\n", sk);
+ net_dbg_ratelimited("SK %p: Got address.\n", sk);
/* For headers */
size = sizeof(struct ddpehdr) + len + ddp_dl->header_length;
@@ -1602,7 +1602,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
dev = rt->dev;
- SOCK_DEBUG(sk, "SK %p: Size needed %d, device %s\n",
+ net_dbg_ratelimited("SK %p: Size needed %d, device %s\n",
sk, size, dev->name);
hard_header_len = dev->hard_header_len;
@@ -1631,7 +1631,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
skb_reserve(skb, hard_header_len);
skb->dev = dev;
- SOCK_DEBUG(sk, "SK %p: Begin build.\n", sk);
+ net_dbg_ratelimited("SK %p: Begin build.\n", sk);
ddp = skb_put(skb, sizeof(struct ddpehdr));
ddp->deh_len_hops = htons(len + sizeof(*ddp));
@@ -1642,7 +1642,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
ddp->deh_dport = usat->sat_port;
ddp->deh_sport = at->src_port;
- SOCK_DEBUG(sk, "SK %p: Copy user data (%zd bytes).\n", sk, len);
+ net_dbg_ratelimited("SK %p: Copy user data (%zd bytes).\n", sk, len);
err = memcpy_from_msg(skb_put(skb, len), msg, len);
if (err) {
@@ -1666,7 +1666,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (skb2) {
loopback = 1;
- SOCK_DEBUG(sk, "SK %p: send out(copy).\n", sk);
+ net_dbg_ratelimited("SK %p: send out(copy).\n", sk);
/*
* If it fails it is queued/sent above in the aarp queue
*/
@@ -1675,7 +1675,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
}
if (dev->flags & IFF_LOOPBACK || loopback) {
- SOCK_DEBUG(sk, "SK %p: Loop back.\n", sk);
+ net_dbg_ratelimited("SK %p: Loop back.\n", sk);
/* loop back */
skb_orphan(skb);
if (ddp->deh_dnode == ATADDR_BCAST) {
@@ -1689,7 +1689,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
}
ddp_dl->request(ddp_dl, skb, dev->dev_addr);
} else {
- SOCK_DEBUG(sk, "SK %p: send out.\n", sk);
+ net_dbg_ratelimited("SK %p: send out.\n", sk);
if (rt->flags & RTF_GATEWAY) {
gsat.sat_addr = rt->gateway;
usat = &gsat;
@@ -1700,7 +1700,7 @@ static int atalk_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
*/
aarp_send_ddp(dev, skb, &usat->sat_addr, NULL);
}
- SOCK_DEBUG(sk, "SK %p: Done write (%zd).\n", sk, len);
+ net_dbg_ratelimited("SK %p: Done write (%zd).\n", sk, len);
out:
release_sock(sk);
diff --git a/net/atm/common.c b/net/atm/common.c
index f7019df41c3e..2a1ec014e901 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -890,6 +890,7 @@ subsys_initcall(atm_init);
module_exit(atm_exit);
+MODULE_DESCRIPTION("Asynchronous Transfer Mode (ATM) networking core");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_ATMPVC);
MODULE_ALIAS_NETPROTO(PF_ATMSVC);
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 6257bf12e5a0..ffef658862db 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -2234,4 +2234,5 @@ out:
spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
}
+MODULE_DESCRIPTION("ATM LAN Emulation (LANE) support");
MODULE_LICENSE("GPL");
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 033871e718a3..324e3ab96bb3 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -1532,4 +1532,5 @@ static void __exit atm_mpoa_cleanup(void)
module_init(atm_mpoa_init);
module_exit(atm_mpoa_cleanup);
+MODULE_DESCRIPTION("Multi-Protocol Over ATM (MPOA) driver");
MODULE_LICENSE("GPL");
diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile
index 3bd0760c76a2..b51d8b071b56 100644
--- a/net/batman-adv/Makefile
+++ b/net/batman-adv/Makefile
@@ -20,6 +20,7 @@ batman-adv-y += hash.o
batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o
batman-adv-y += main.o
batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o
+batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast_forw.o
batman-adv-y += netlink.o
batman-adv-$(CONFIG_BATMAN_ADV_NC) += network-coding.o
batman-adv-y += originator.o
diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c
index 37ce6cfb3520..5f46ca3d4bb8 100644
--- a/net/batman-adv/bridge_loop_avoidance.c
+++ b/net/batman-adv/bridge_loop_avoidance.c
@@ -20,7 +20,6 @@
#include <linux/if_vlan.h>
#include <linux/jhash.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
@@ -31,6 +30,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/sprintf.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/workqueue.h>
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index c120c7c6d25f..757c084ac2d1 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -25,7 +25,6 @@
#include "hard-interface.h"
#include "originator.h"
-#include "routing.h"
#include "send.h"
/**
@@ -351,18 +350,14 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
struct batadv_orig_node *orig_node_src)
{
struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
- struct batadv_orig_node *orig_node_dst;
struct batadv_neigh_node *neigh_node = NULL;
struct batadv_frag_packet *packet;
u16 total_size;
bool ret = false;
packet = (struct batadv_frag_packet *)skb->data;
- orig_node_dst = batadv_orig_hash_find(bat_priv, packet->dest);
- if (!orig_node_dst)
- goto out;
- neigh_node = batadv_find_router(bat_priv, orig_node_dst, recv_if);
+ neigh_node = batadv_orig_to_router(bat_priv, packet->dest, recv_if);
if (!neigh_node)
goto out;
@@ -381,7 +376,6 @@ bool batadv_frag_skb_fwd(struct sk_buff *skb,
}
out:
- batadv_orig_node_put(orig_node_dst);
batadv_neigh_node_put(neigh_node);
return ret;
}
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index d26124bc27e1..0ddd8b4b3f4c 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -18,7 +18,6 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include <linux/kernel.h>
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
@@ -29,6 +28,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/sprintf.h>
#include <linux/stddef.h>
#include <linux/udp.h>
#include <net/sock.h>
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index e8a449915566..5fc754b0b3f7 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -6,6 +6,7 @@
#include "main.h"
+#include <linux/array_size.h>
#include <linux/atomic.h>
#include <linux/build_bug.h>
#include <linux/byteorder/generic.h>
@@ -20,7 +21,6 @@
#include <linux/init.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include <linux/kernel.h>
#include <linux/kobject.h>
#include <linux/kref.h>
#include <linux/list.h>
@@ -33,6 +33,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/sprintf.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/workqueue.h>
@@ -532,6 +533,8 @@ static void batadv_recv_handler_init(void)
/* broadcast packet */
batadv_rx_handler[BATADV_BCAST] = batadv_recv_bcast_packet;
+ /* multicast packet */
+ batadv_rx_handler[BATADV_MCAST] = batadv_recv_mcast_packet;
/* unicast packets ... */
/* unicast with 4 addresses packet */
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 10007c5894a1..870dcd7f1786 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2023.3"
+#define BATADV_SOURCE_VERSION "2024.0"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 315394f12c55..14088c4ff2f6 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -25,7 +25,6 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/jiffies.h>
-#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
@@ -36,6 +35,7 @@
#include <linux/skbuff.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/sprintf.h>
#include <linux/stddef.h>
#include <linux/string.h>
#include <linux/types.h>
@@ -236,6 +236,37 @@ static u8 batadv_mcast_mla_rtr_flags_get(struct batadv_priv *bat_priv,
}
/**
+ * batadv_mcast_mla_forw_flags_get() - get multicast forwarding flags
+ * @bat_priv: the bat priv with all the soft interface information
+ *
+ * Checks if all active hard interfaces have an MTU larger or equal to 1280
+ * bytes (IPv6 minimum MTU).
+ *
+ * Return: BATADV_MCAST_HAVE_MC_PTYPE_CAPA if yes, BATADV_NO_FLAGS otherwise.
+ */
+static u8 batadv_mcast_mla_forw_flags_get(struct batadv_priv *bat_priv)
+{
+ const struct batadv_hard_iface *hard_iface;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
+ if (hard_iface->if_status != BATADV_IF_ACTIVE)
+ continue;
+
+ if (hard_iface->soft_iface != bat_priv->soft_iface)
+ continue;
+
+ if (hard_iface->net_dev->mtu < IPV6_MIN_MTU) {
+ rcu_read_unlock();
+ return BATADV_NO_FLAGS;
+ }
+ }
+ rcu_read_unlock();
+
+ return BATADV_MCAST_HAVE_MC_PTYPE_CAPA;
+}
+
+/**
* batadv_mcast_mla_flags_get() - get the new multicast flags
* @bat_priv: the bat priv with all the soft interface information
*
@@ -256,6 +287,7 @@ batadv_mcast_mla_flags_get(struct batadv_priv *bat_priv)
mla_flags.enabled = 1;
mla_flags.tvlv_flags |= batadv_mcast_mla_rtr_flags_get(bat_priv,
bridge);
+ mla_flags.tvlv_flags |= batadv_mcast_mla_forw_flags_get(bat_priv);
if (!bridge)
return mla_flags;
@@ -806,23 +838,25 @@ static void batadv_mcast_flags_log(struct batadv_priv *bat_priv, u8 flags)
{
bool old_enabled = bat_priv->mcast.mla_flags.enabled;
u8 old_flags = bat_priv->mcast.mla_flags.tvlv_flags;
- char str_old_flags[] = "[.... . ]";
+ char str_old_flags[] = "[.... . .]";
- sprintf(str_old_flags, "[%c%c%c%s%s]",
+ sprintf(str_old_flags, "[%c%c%c%s%s%c]",
(old_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
(old_flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
(old_flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.',
!(old_flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ",
- !(old_flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ");
+ !(old_flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ",
+ !(old_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) ? 'P' : '.');
batadv_dbg(BATADV_DBG_MCAST, bat_priv,
- "Changing multicast flags from '%s' to '[%c%c%c%s%s]'\n",
+ "Changing multicast flags from '%s' to '[%c%c%c%s%s%c]'\n",
old_enabled ? str_old_flags : "<undefined>",
(flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) ? 'U' : '.',
(flags & BATADV_MCAST_WANT_ALL_IPV4) ? '4' : '.',
(flags & BATADV_MCAST_WANT_ALL_IPV6) ? '6' : '.',
!(flags & BATADV_MCAST_WANT_NO_RTR4) ? "R4" : ". ",
- !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ");
+ !(flags & BATADV_MCAST_WANT_NO_RTR6) ? "R6" : ". ",
+ !(flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) ? 'P' : '.');
}
/**
@@ -1136,16 +1170,61 @@ static int batadv_mcast_forw_rtr_count(struct batadv_priv *bat_priv,
}
/**
+ * batadv_mcast_forw_mode_by_count() - get forwarding mode by count
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to check
+ * @vid: the vlan identifier
+ * @is_routable: stores whether the destination is routable
+ * @count: the number of originators the multicast packet need to be sent to
+ *
+ * For a multicast packet with multiple destination originators, checks which
+ * mode to use. For BATADV_FORW_MCAST it also encapsulates the packet with a
+ * complete batman-adv multicast header.
+ *
+ * Return:
+ * BATADV_FORW_MCAST: If all nodes have multicast packet routing
+ * capabilities and an MTU >= 1280 on all hard interfaces (including us)
+ * and the encapsulated multicast packet with all destination addresses
+ * would still fit into an 1280 bytes batman-adv multicast packet
+ * (excluding the outer ethernet frame) and we could successfully push
+ * the full batman-adv multicast packet header.
+ * BATADV_FORW_UCASTS: If the packet cannot be sent in a batman-adv
+ * multicast packet and the amount of batman-adv unicast packets needed
+ * is smaller or equal to the configured multicast fanout.
+ * BATADV_FORW_BCAST: Otherwise.
+ */
+static enum batadv_forw_mode
+batadv_mcast_forw_mode_by_count(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, unsigned short vid,
+ int is_routable, int count)
+{
+ unsigned int mcast_hdrlen = batadv_mcast_forw_packet_hdrlen(count);
+ u8 own_tvlv_flags = bat_priv->mcast.mla_flags.tvlv_flags;
+
+ if (!atomic_read(&bat_priv->mcast.num_no_mc_ptype_capa) &&
+ own_tvlv_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA &&
+ skb->len + mcast_hdrlen <= IPV6_MIN_MTU &&
+ batadv_mcast_forw_push(bat_priv, skb, vid, is_routable, count))
+ return BATADV_FORW_MCAST;
+
+ if (count <= atomic_read(&bat_priv->multicast_fanout))
+ return BATADV_FORW_UCASTS;
+
+ return BATADV_FORW_BCAST;
+}
+
+/**
* batadv_mcast_forw_mode() - check on how to forward a multicast packet
* @bat_priv: the bat priv with all the soft interface information
* @skb: the multicast packet to check
+ * @vid: the vlan identifier
* @is_routable: stores whether the destination is routable
*
* Return: The forwarding mode as enum batadv_forw_mode.
*/
enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
- int *is_routable)
+ unsigned short vid, int *is_routable)
{
int ret, tt_count, ip_count, unsnoop_count, total_count;
bool is_unsnoopable = false;
@@ -1175,10 +1254,8 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
else if (unsnoop_count)
return BATADV_FORW_BCAST;
- if (total_count <= atomic_read(&bat_priv->multicast_fanout))
- return BATADV_FORW_UCASTS;
-
- return BATADV_FORW_BCAST;
+ return batadv_mcast_forw_mode_by_count(bat_priv, skb, vid, *is_routable,
+ total_count);
}
/**
@@ -1739,6 +1816,31 @@ static void batadv_mcast_want_rtr6_update(struct batadv_priv *bat_priv,
}
/**
+ * batadv_mcast_have_mc_ptype_update() - update multicast packet type counter
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig: the orig_node which multicast state might have changed of
+ * @mcast_flags: flags indicating the new multicast state
+ *
+ * If the BATADV_MCAST_HAVE_MC_PTYPE_CAPA flag of this originator, orig, has
+ * toggled then this method updates the counter accordingly.
+ */
+static void batadv_mcast_have_mc_ptype_update(struct batadv_priv *bat_priv,
+ struct batadv_orig_node *orig,
+ u8 mcast_flags)
+{
+ lockdep_assert_held(&orig->mcast_handler_lock);
+
+ /* switched from flag set to unset */
+ if (!(mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA) &&
+ orig->mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA)
+ atomic_inc(&bat_priv->mcast.num_no_mc_ptype_capa);
+ /* switched from flag unset to set */
+ else if (mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA &&
+ !(orig->mcast_flags & BATADV_MCAST_HAVE_MC_PTYPE_CAPA))
+ atomic_dec(&bat_priv->mcast.num_no_mc_ptype_capa);
+}
+
+/**
* batadv_mcast_tvlv_flags_get() - get multicast flags from an OGM TVLV
* @enabled: whether the originator has multicast TVLV support enabled
* @tvlv_value: tvlv buffer containing the multicast flags
@@ -1806,6 +1908,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
batadv_mcast_want_ipv6_update(bat_priv, orig, mcast_flags);
batadv_mcast_want_rtr4_update(bat_priv, orig, mcast_flags);
batadv_mcast_want_rtr6_update(bat_priv, orig, mcast_flags);
+ batadv_mcast_have_mc_ptype_update(bat_priv, orig, mcast_flags);
orig->mcast_flags = mcast_flags;
spin_unlock_bh(&orig->mcast_handler_lock);
@@ -1820,6 +1923,10 @@ void batadv_mcast_init(struct batadv_priv *bat_priv)
batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler,
NULL, NULL, BATADV_TVLV_MCAST, 2,
BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
+ batadv_tvlv_handler_register(bat_priv, NULL, NULL,
+ batadv_mcast_forw_tracker_tvlv_handler,
+ BATADV_TVLV_MCAST_TRACKER, 1,
+ BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
INIT_DELAYED_WORK(&bat_priv->mcast.work, batadv_mcast_mla_update);
batadv_mcast_start_timer(bat_priv);
@@ -2068,6 +2175,7 @@ void batadv_mcast_free(struct batadv_priv *bat_priv)
cancel_delayed_work_sync(&bat_priv->mcast.work);
batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
+ batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST_TRACKER, 1);
batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
/* safely calling outside of worker, as worker was canceled above */
@@ -2091,6 +2199,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig)
BATADV_MCAST_WANT_NO_RTR4);
batadv_mcast_want_rtr6_update(bat_priv, orig,
BATADV_MCAST_WANT_NO_RTR6);
+ batadv_mcast_have_mc_ptype_update(bat_priv, orig,
+ BATADV_MCAST_HAVE_MC_PTYPE_CAPA);
spin_unlock_bh(&orig->mcast_handler_lock);
}
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index a9770d8d6d36..d97ee51d26f2 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -11,6 +11,7 @@
#include <linux/netlink.h>
#include <linux/skbuff.h>
+#include <linux/types.h>
/**
* enum batadv_forw_mode - the way a packet should be forwarded as
@@ -28,6 +29,12 @@ enum batadv_forw_mode {
*/
BATADV_FORW_UCASTS,
+ /**
+ * @BATADV_FORW_MCAST: forward the packet to some nodes via a
+ * batman-adv multicast packet
+ */
+ BATADV_FORW_MCAST,
+
/** @BATADV_FORW_NONE: don't forward, drop it */
BATADV_FORW_NONE,
};
@@ -36,7 +43,7 @@ enum batadv_forw_mode {
enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
- int *is_routable);
+ unsigned short vid, int *is_routable);
int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
unsigned short vid, int is_routable);
@@ -52,11 +59,23 @@ void batadv_mcast_free(struct batadv_priv *bat_priv);
void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
+/* multicast_forw.c */
+
+int batadv_mcast_forw_tracker_tvlv_handler(struct batadv_priv *bat_priv,
+ struct sk_buff *skb);
+
+unsigned int batadv_mcast_forw_packet_hdrlen(unsigned int num_dests);
+
+bool batadv_mcast_forw_push(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, int is_routable, int count);
+
+int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv, struct sk_buff *skb);
+
#else
static inline enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
- int *is_routable)
+ unsigned short vid, int *is_routable)
{
return BATADV_FORW_BCAST;
}
@@ -94,6 +113,13 @@ static inline void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node)
{
}
+static inline int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+}
+
#endif /* CONFIG_BATMAN_ADV_MCAST */
#endif /* _NET_BATMAN_ADV_MULTICAST_H_ */
diff --git a/net/batman-adv/multicast_forw.c b/net/batman-adv/multicast_forw.c
new file mode 100644
index 000000000000..fafd6ba8c056
--- /dev/null
+++ b/net/batman-adv/multicast_forw.c
@@ -0,0 +1,1178 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) B.A.T.M.A.N. contributors:
+ *
+ * Linus Lüssing
+ */
+
+#include "multicast.h"
+#include "main.h"
+
+#include <linux/bug.h>
+#include <linux/build_bug.h>
+#include <linux/byteorder/generic.h>
+#include <linux/compiler.h>
+#include <linux/errno.h>
+#include <linux/etherdevice.h>
+#include <linux/gfp.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/ipv6.h>
+#include <linux/limits.h>
+#include <linux/netdevice.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
+#include <linux/skbuff.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <uapi/linux/batadv_packet.h>
+
+#include "bridge_loop_avoidance.h"
+#include "originator.h"
+#include "send.h"
+#include "translation-table.h"
+
+#define batadv_mcast_forw_tracker_for_each_dest(dest, num_dests) \
+ for (; num_dests; num_dests--, (dest) += ETH_ALEN)
+
+#define batadv_mcast_forw_tracker_for_each_dest2(dest1, dest2, num_dests) \
+ for (; num_dests; num_dests--, (dest1) += ETH_ALEN, (dest2) += ETH_ALEN)
+
+/**
+ * batadv_mcast_forw_skb_push() - skb_push and memorize amount of pushed bytes
+ * @skb: the skb to push onto
+ * @size: the amount of bytes to push
+ * @len: stores the total amount of bytes pushed
+ *
+ * Performs an skb_push() onto the given skb and adds the amount of pushed bytes
+ * to the given len pointer.
+ *
+ * Return: the return value of the skb_push() call.
+ */
+static void *batadv_mcast_forw_skb_push(struct sk_buff *skb, size_t size,
+ unsigned short *len)
+{
+ *len += size;
+ return skb_push(skb, size);
+}
+
+/**
+ * batadv_mcast_forw_push_padding() - push 2 padding bytes to skb's front
+ * @skb: the skb to push onto
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Pushes two padding bytes to the front of the given skb.
+ *
+ * Return: On success a pointer to the first byte of the two pushed padding
+ * bytes within the skb. NULL otherwise.
+ */
+static char *
+batadv_mcast_forw_push_padding(struct sk_buff *skb, unsigned short *tvlv_len)
+{
+ const int pad_len = 2;
+ char *padding;
+
+ if (skb_headroom(skb) < pad_len)
+ return NULL;
+
+ padding = batadv_mcast_forw_skb_push(skb, pad_len, tvlv_len);
+ memset(padding, 0, pad_len);
+
+ return padding;
+}
+
+/**
+ * batadv_mcast_forw_push_est_padding() - push padding bytes if necessary
+ * @skb: the skb to potentially push the padding onto
+ * @count: the (estimated) number of originators the multicast packet needs to
+ * be sent to
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * If the number of destination entries is even then this adds two
+ * padding bytes to the end of the tracker TVLV.
+ *
+ * Return: true on success or if no padding is needed, false otherwise.
+ */
+static bool
+batadv_mcast_forw_push_est_padding(struct sk_buff *skb, int count,
+ unsigned short *tvlv_len)
+{
+ if (!(count % 2) && !batadv_mcast_forw_push_padding(skb, tvlv_len))
+ return false;
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_orig_entry() - get orig_node from an hlist node
+ * @node: the hlist node to get the orig_node from
+ * @entry_offset: the offset of the hlist node within the orig_node struct
+ *
+ * Return: The orig_node containing the hlist node on success, NULL on error.
+ */
+static struct batadv_orig_node *
+batadv_mcast_forw_orig_entry(struct hlist_node *node,
+ size_t entry_offset)
+{
+ /* sanity check */
+ switch (entry_offset) {
+ case offsetof(struct batadv_orig_node, mcast_want_all_ipv4_node):
+ case offsetof(struct batadv_orig_node, mcast_want_all_ipv6_node):
+ case offsetof(struct batadv_orig_node, mcast_want_all_rtr4_node):
+ case offsetof(struct batadv_orig_node, mcast_want_all_rtr6_node):
+ break;
+ default:
+ WARN_ON(1);
+ return NULL;
+ }
+
+ return (struct batadv_orig_node *)((void *)node - entry_offset);
+}
+
+/**
+ * batadv_mcast_forw_push_dest() - push an originator MAC address onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination address onto
+ * @vid: the vlan identifier
+ * @orig_node: the originator node to get the MAC address from
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * If the orig_node is a BLA backbone gateway, if there is not enough skb
+ * headroom available or if num_dests is already at its maximum (65535) then
+ * neither the skb nor num_dests is changed. Otherwise the originator's MAC
+ * address is pushed onto the given skb and num_dests incremented by one.
+ *
+ * Return: true if the orig_node is a backbone gateway or if an orig address
+ * was pushed successfully, false otherwise.
+ */
+static bool batadv_mcast_forw_push_dest(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, unsigned short vid,
+ struct batadv_orig_node *orig_node,
+ unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ BUILD_BUG_ON(sizeof_field(struct batadv_tvlv_mcast_tracker, num_dests)
+ != sizeof(__be16));
+
+ /* Avoid sending to other BLA gateways - they already got the frame from
+ * the LAN side we share with them.
+ * TODO: Refactor to take BLA into account earlier in mode check.
+ */
+ if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid))
+ return true;
+
+ if (skb_headroom(skb) < ETH_ALEN || *num_dests == U16_MAX)
+ return false;
+
+ batadv_mcast_forw_skb_push(skb, ETH_ALEN, tvlv_len);
+ ether_addr_copy(skb->data, orig_node->orig);
+ (*num_dests)++;
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_push_dests_list() - push originators from list onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @head: the list to gather originators from
+ * @entry_offset: offset of an hlist node in an orig_node structure
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators in the given list onto the given
+ * skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static int batadv_mcast_forw_push_dests_list(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ struct hlist_head *head,
+ size_t entry_offset,
+ unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ struct hlist_node *node;
+ struct batadv_orig_node *orig_node;
+
+ rcu_read_lock();
+ __hlist_for_each_rcu(node, head) {
+ orig_node = batadv_mcast_forw_orig_entry(node, entry_offset);
+ if (!orig_node ||
+ !batadv_mcast_forw_push_dest(bat_priv, skb, vid, orig_node,
+ num_dests, tvlv_len)) {
+ rcu_read_unlock();
+ return false;
+ }
+ }
+ rcu_read_unlock();
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_push_tt() - push originators with interest through TT
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet through the translation table onto the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool
+batadv_mcast_forw_push_tt(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ struct batadv_tt_orig_list_entry *orig_entry;
+
+ struct batadv_tt_global_entry *tt_global;
+ const u8 *addr = eth_hdr(skb)->h_dest;
+
+ /* ok */
+ int ret = true;
+
+ tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid);
+ if (!tt_global)
+ goto out;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(orig_entry, &tt_global->orig_list, list) {
+ if (!batadv_mcast_forw_push_dest(bat_priv, skb, vid,
+ orig_entry->orig_node,
+ num_dests, tvlv_len)) {
+ ret = false;
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ batadv_tt_global_entry_put(tt_global);
+
+out:
+ return ret;
+}
+
+/**
+ * batadv_mcast_forw_push_want_all() - push originators with want-all flag
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet through the want-all flag onto the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool batadv_mcast_forw_push_want_all(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ struct hlist_head *head = NULL;
+ size_t offset;
+ int ret;
+
+ switch (eth_hdr(skb)->h_proto) {
+ case htons(ETH_P_IP):
+ head = &bat_priv->mcast.want_all_ipv4_list;
+ offset = offsetof(struct batadv_orig_node,
+ mcast_want_all_ipv4_node);
+ break;
+ case htons(ETH_P_IPV6):
+ head = &bat_priv->mcast.want_all_ipv6_list;
+ offset = offsetof(struct batadv_orig_node,
+ mcast_want_all_ipv6_node);
+ break;
+ default:
+ return false;
+ }
+
+ ret = batadv_mcast_forw_push_dests_list(bat_priv, skb, vid, head,
+ offset, num_dests, tvlv_len);
+ if (!ret)
+ return false;
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_push_want_rtr() - push originators with want-router flag
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @num_dests: a pointer to store the number of pushed addresses in
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet through the want-all-rtr flag onto the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool batadv_mcast_forw_push_want_rtr(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ unsigned short *num_dests,
+ unsigned short *tvlv_len)
+{
+ struct hlist_head *head = NULL;
+ size_t offset;
+ int ret;
+
+ switch (eth_hdr(skb)->h_proto) {
+ case htons(ETH_P_IP):
+ head = &bat_priv->mcast.want_all_rtr4_list;
+ offset = offsetof(struct batadv_orig_node,
+ mcast_want_all_rtr4_node);
+ break;
+ case htons(ETH_P_IPV6):
+ head = &bat_priv->mcast.want_all_rtr6_list;
+ offset = offsetof(struct batadv_orig_node,
+ mcast_want_all_rtr6_node);
+ break;
+ default:
+ return false;
+ }
+
+ ret = batadv_mcast_forw_push_dests_list(bat_priv, skb, vid, head,
+ offset, num_dests, tvlv_len);
+ if (!ret)
+ return false;
+
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_scrape() - remove bytes within skb data
+ * @skb: the skb to remove bytes from
+ * @offset: the offset from the skb data from which to scrape
+ * @len: the amount of bytes to scrape starting from the offset
+ *
+ * Scrapes/removes len bytes from the given skb at the given offset from the
+ * skb data.
+ *
+ * Caller needs to ensure that the region from the skb data's start up
+ * to/including the to be removed bytes are linearized.
+ */
+static void batadv_mcast_forw_scrape(struct sk_buff *skb,
+ unsigned short offset,
+ unsigned short len)
+{
+ char *to, *from;
+
+ SKB_LINEAR_ASSERT(skb);
+
+ to = skb_pull(skb, len);
+ from = to - len;
+
+ memmove(to, from, offset);
+}
+
+/**
+ * batadv_mcast_forw_push_scrape_padding() - remove TVLV padding
+ * @skb: the skb to potentially adjust the TVLV's padding on
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Remove two padding bytes from the end of the multicast tracker TVLV,
+ * from before the payload data.
+ *
+ * Caller needs to ensure that the TVLV bytes are linearized.
+ */
+static void batadv_mcast_forw_push_scrape_padding(struct sk_buff *skb,
+ unsigned short *tvlv_len)
+{
+ const int pad_len = 2;
+
+ batadv_mcast_forw_scrape(skb, *tvlv_len - pad_len, pad_len);
+ *tvlv_len -= pad_len;
+}
+
+/**
+ * batadv_mcast_forw_push_insert_padding() - insert TVLV padding
+ * @skb: the skb to potentially adjust the TVLV's padding on
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Inserts two padding bytes at the end of the multicast tracker TVLV,
+ * before the payload data in the given skb.
+ *
+ * Return: true on success, false otherwise.
+ */
+static bool batadv_mcast_forw_push_insert_padding(struct sk_buff *skb,
+ unsigned short *tvlv_len)
+{
+ unsigned short offset = *tvlv_len;
+ char *to, *from = skb->data;
+
+ to = batadv_mcast_forw_push_padding(skb, tvlv_len);
+ if (!to)
+ return false;
+
+ memmove(to, from, offset);
+ memset(to + offset, 0, *tvlv_len - offset);
+ return true;
+}
+
+/**
+ * batadv_mcast_forw_push_adjust_padding() - adjust padding if necessary
+ * @skb: the skb to potentially adjust the TVLV's padding on
+ * @count: the estimated number of originators the multicast packet needs to
+ * be sent to
+ * @num_dests_pushed: the number of originators that were actually added to the
+ * multicast packet's tracker TVLV
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Adjusts the padding in the multicast packet's tracker TVLV depending on the
+ * initially estimated amount of destinations versus the amount of destinations
+ * that were actually added to the tracker TVLV.
+ *
+ * If the initial estimate was correct or at least the oddness was the same then
+ * no padding adjustment is performed.
+ * If the initially estimated number was even, so padding was initially added,
+ * but it turned out to be odd then padding is removed.
+ * If the initially estimated number was odd, so no padding was initially added,
+ * but it turned out to be even then padding is added.
+ *
+ * Return: true if no padding adjustment is needed or the adjustment was
+ * successful, false otherwise.
+ */
+static bool
+batadv_mcast_forw_push_adjust_padding(struct sk_buff *skb, int *count,
+ unsigned short num_dests_pushed,
+ unsigned short *tvlv_len)
+{
+ int ret = true;
+
+ if (likely((num_dests_pushed % 2) == (*count % 2)))
+ goto out;
+
+ /**
+ * estimated even number of destinations, but turned out to be odd
+ * -> remove padding
+ */
+ if (!(*count % 2) && (num_dests_pushed % 2))
+ batadv_mcast_forw_push_scrape_padding(skb, tvlv_len);
+ /**
+ * estimated odd number of destinations, but turned out to be even
+ * -> add padding
+ */
+ else if ((*count % 2) && (!(num_dests_pushed % 2)))
+ ret = batadv_mcast_forw_push_insert_padding(skb, tvlv_len);
+
+out:
+ *count = num_dests_pushed;
+ return ret;
+}
+
+/**
+ * batadv_mcast_forw_push_dests() - push originator addresses onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the destination addresses onto
+ * @vid: the vlan identifier
+ * @is_routable: indicates whether the destination is routable
+ * @count: the number of originators the multicast packet needs to be sent to
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Push the MAC addresses of all originators which have indicated interest in
+ * this multicast packet onto the given skb.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int
+batadv_mcast_forw_push_dests(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, int is_routable, int *count,
+ unsigned short *tvlv_len)
+{
+ unsigned short num_dests = 0;
+
+ if (!batadv_mcast_forw_push_est_padding(skb, *count, tvlv_len))
+ goto err;
+
+ if (!batadv_mcast_forw_push_tt(bat_priv, skb, vid, &num_dests,
+ tvlv_len))
+ goto err;
+
+ if (!batadv_mcast_forw_push_want_all(bat_priv, skb, vid, &num_dests,
+ tvlv_len))
+ goto err;
+
+ if (is_routable &&
+ !batadv_mcast_forw_push_want_rtr(bat_priv, skb, vid, &num_dests,
+ tvlv_len))
+ goto err;
+
+ if (!batadv_mcast_forw_push_adjust_padding(skb, count, num_dests,
+ tvlv_len))
+ goto err;
+
+ return 0;
+err:
+ return -ENOMEM;
+}
+
+/**
+ * batadv_mcast_forw_push_tracker() - push a multicast tracker TVLV header
+ * @skb: the skb to push the tracker TVLV onto
+ * @num_dests: the number of destination addresses to set in the header
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Pushes a multicast tracker TVLV header onto the given skb, including the
+ * generic TVLV header but excluding the destination MAC addresses.
+ *
+ * The provided num_dests value is taken into consideration to set the
+ * num_dests field in the tracker header and to set the appropriate TVLV length
+ * value fields.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int batadv_mcast_forw_push_tracker(struct sk_buff *skb, int num_dests,
+ unsigned short *tvlv_len)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ struct batadv_tvlv_hdr *tvlv_hdr;
+ unsigned int tvlv_value_len;
+
+ if (skb_headroom(skb) < sizeof(*mcast_tracker) + sizeof(*tvlv_hdr))
+ return -ENOMEM;
+
+ tvlv_value_len = sizeof(*mcast_tracker) + *tvlv_len;
+ if (tvlv_value_len + sizeof(*tvlv_hdr) > U16_MAX)
+ return -ENOMEM;
+
+ batadv_mcast_forw_skb_push(skb, sizeof(*mcast_tracker), tvlv_len);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb->data;
+ mcast_tracker->num_dests = htons(num_dests);
+
+ skb_reset_network_header(skb);
+
+ batadv_mcast_forw_skb_push(skb, sizeof(*tvlv_hdr), tvlv_len);
+ tvlv_hdr = (struct batadv_tvlv_hdr *)skb->data;
+ tvlv_hdr->type = BATADV_TVLV_MCAST_TRACKER;
+ tvlv_hdr->version = 1;
+ tvlv_hdr->len = htons(tvlv_value_len);
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_forw_push_tvlvs() - push a multicast tracker TVLV onto an skb
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the skb to push the tracker TVLV onto
+ * @vid: the vlan identifier
+ * @is_routable: indicates whether the destination is routable
+ * @count: the number of originators the multicast packet needs to be sent to
+ * @tvlv_len: stores the amount of currently pushed TVLV bytes
+ *
+ * Pushes a multicast tracker TVLV onto the given skb, including the collected
+ * destination MAC addresses and the generic TVLV header.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int
+batadv_mcast_forw_push_tvlvs(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, int is_routable, int count,
+ unsigned short *tvlv_len)
+{
+ int ret;
+
+ ret = batadv_mcast_forw_push_dests(bat_priv, skb, vid, is_routable,
+ &count, tvlv_len);
+ if (ret < 0)
+ return ret;
+
+ ret = batadv_mcast_forw_push_tracker(skb, count, tvlv_len);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_forw_push_hdr() - push a multicast packet header onto an skb
+ * @skb: the skb to push the header onto
+ * @tvlv_len: the total TVLV length value to set in the header
+ *
+ * Pushes a batman-adv multicast packet header onto the given skb and sets
+ * the provided total TVLV length value in it.
+ *
+ * Caller needs to ensure enough skb headroom is available.
+ *
+ * Return: -ENOMEM if there is not enough skb headroom available. Otherwise, on
+ * success 0.
+ */
+static int
+batadv_mcast_forw_push_hdr(struct sk_buff *skb, unsigned short tvlv_len)
+{
+ struct batadv_mcast_packet *mcast_packet;
+
+ if (skb_headroom(skb) < sizeof(*mcast_packet))
+ return -ENOMEM;
+
+ skb_push(skb, sizeof(*mcast_packet));
+
+ mcast_packet = (struct batadv_mcast_packet *)skb->data;
+ mcast_packet->version = BATADV_COMPAT_VERSION;
+ mcast_packet->ttl = BATADV_TTL;
+ mcast_packet->packet_type = BATADV_MCAST;
+ mcast_packet->reserved = 0;
+ mcast_packet->tvlv_len = htons(tvlv_len);
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_forw_scrub_dests() - scrub destinations in a tracker TVLV
+ * @bat_priv: the bat priv with all the soft interface information
+ * @comp_neigh: next hop neighbor to scrub+collect destinations for
+ * @dest: start MAC entry in original skb's tracker TVLV
+ * @next_dest: start MAC entry in to be sent skb's tracker TVLV
+ * @num_dests: number of remaining destination MAC entries to iterate over
+ *
+ * This sorts destination entries into either the original batman-adv
+ * multicast packet or the skb (copy) that is going to be sent to comp_neigh
+ * next.
+ *
+ * In preparation for the next, to be (unicast) transmitted batman-adv multicast
+ * packet skb to be sent to the given neighbor node, tries to collect all
+ * originator MAC addresses that have the given neighbor node as their next hop
+ * in the to be transmitted skb (copy), which next_dest points into. That is we
+ * zero all destination entries in next_dest which do not have comp_neigh as
+ * their next hop. And zero all destination entries in the original skb that
+ * would have comp_neigh as their next hop (to avoid redundant transmissions and
+ * duplicated payload later).
+ */
+static void
+batadv_mcast_forw_scrub_dests(struct batadv_priv *bat_priv,
+ struct batadv_neigh_node *comp_neigh, u8 *dest,
+ u8 *next_dest, u16 num_dests)
+{
+ struct batadv_neigh_node *next_neigh;
+
+ /* skip first entry, this is what we are comparing with */
+ eth_zero_addr(dest);
+ dest += ETH_ALEN;
+ next_dest += ETH_ALEN;
+ num_dests--;
+
+ batadv_mcast_forw_tracker_for_each_dest2(dest, next_dest, num_dests) {
+ if (is_zero_ether_addr(next_dest))
+ continue;
+
+ /* sanity check, we expect unicast destinations */
+ if (is_multicast_ether_addr(next_dest)) {
+ eth_zero_addr(dest);
+ eth_zero_addr(next_dest);
+ continue;
+ }
+
+ next_neigh = batadv_orig_to_router(bat_priv, next_dest, NULL);
+ if (!next_neigh) {
+ eth_zero_addr(next_dest);
+ continue;
+ }
+
+ if (!batadv_compare_eth(next_neigh->addr, comp_neigh->addr)) {
+ eth_zero_addr(next_dest);
+ batadv_neigh_node_put(next_neigh);
+ continue;
+ }
+
+ /* found an entry for our next packet to transmit, so remove it
+ * from the original packet
+ */
+ eth_zero_addr(dest);
+ batadv_neigh_node_put(next_neigh);
+ }
+}
+
+/**
+ * batadv_mcast_forw_shrink_fill() - swap slot with next non-zero destination
+ * @slot: the to be filled zero-MAC destination entry in a tracker TVLV
+ * @num_dests_slot: remaining entries in tracker TVLV from/including slot
+ *
+ * Searches for the next non-zero-MAC destination entry in a tracker TVLV after
+ * the given slot pointer. And if found, swaps it with the zero-MAC destination
+ * entry which the slot points to.
+ *
+ * Return: true if slot was swapped/filled successfully, false otherwise.
+ */
+static bool batadv_mcast_forw_shrink_fill(u8 *slot, u16 num_dests_slot)
+{
+ u16 num_dests_filler;
+ u8 *filler;
+
+ /* sanity check, should not happen */
+ if (!num_dests_slot)
+ return false;
+
+ num_dests_filler = num_dests_slot - 1;
+ filler = slot + ETH_ALEN;
+
+ /* find a candidate to fill the empty slot */
+ batadv_mcast_forw_tracker_for_each_dest(filler, num_dests_filler) {
+ if (is_zero_ether_addr(filler))
+ continue;
+
+ ether_addr_copy(slot, filler);
+ eth_zero_addr(filler);
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * batadv_mcast_forw_shrink_pack_dests() - pack destinations of a tracker TVLV
+ * @skb: the batman-adv multicast packet to compact destinations in
+ *
+ * Compacts the originator destination MAC addresses in the multicast tracker
+ * TVLV of the given multicast packet. This is done by moving all non-zero
+ * MAC addresses in direction of the skb head and all zero MAC addresses in skb
+ * tail direction, within the multicast tracker TVLV.
+ *
+ * Return: The number of consecutive zero MAC address destinations which are
+ * now at the end of the multicast tracker TVLV.
+ */
+static int batadv_mcast_forw_shrink_pack_dests(struct sk_buff *skb)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ unsigned char *skb_net_hdr;
+ u16 num_dests_slot;
+ u8 *slot;
+
+ skb_net_hdr = skb_network_header(skb);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+ num_dests_slot = ntohs(mcast_tracker->num_dests);
+
+ slot = (u8 *)mcast_tracker + sizeof(*mcast_tracker);
+
+ batadv_mcast_forw_tracker_for_each_dest(slot, num_dests_slot) {
+ /* find an empty slot */
+ if (!is_zero_ether_addr(slot))
+ continue;
+
+ if (!batadv_mcast_forw_shrink_fill(slot, num_dests_slot))
+ /* could not find a filler, so we successfully packed
+ * and can stop - and must not reduce num_dests_slot!
+ */
+ break;
+ }
+
+ /* num_dests_slot is now the amount of reduced, zeroed
+ * destinations at the end of the tracker TVLV
+ */
+ return num_dests_slot;
+}
+
+/**
+ * batadv_mcast_forw_shrink_align_offset() - get new alignment offset
+ * @num_dests_old: the old, to be updated amount of destination nodes
+ * @num_dests_reduce: the number of destinations that were removed
+ *
+ * Calculates the amount of potential extra alignment offset that is needed to
+ * adjust the TVLV padding after the change in destination nodes.
+ *
+ * Return:
+ * 0: If no change to padding is needed.
+ * 2: If padding needs to be removed.
+ * -2: If padding needs to be added.
+ */
+static short
+batadv_mcast_forw_shrink_align_offset(unsigned int num_dests_old,
+ unsigned int num_dests_reduce)
+{
+ /* even amount of removed destinations -> no alignment change */
+ if (!(num_dests_reduce % 2))
+ return 0;
+
+ /* even to odd amount of destinations -> remove padding */
+ if (!(num_dests_old % 2))
+ return 2;
+
+ /* odd to even amount of destinations -> add padding */
+ return -2;
+}
+
+/**
+ * batadv_mcast_forw_shrink_update_headers() - update shrunk mc packet headers
+ * @skb: the batman-adv multicast packet to update headers of
+ * @num_dests_reduce: the number of destinations that were removed
+ *
+ * This updates any fields of a batman-adv multicast packet that are affected
+ * by the reduced number of destinations in the multicast tracket TVLV. In
+ * particular this updates:
+ *
+ * The num_dest field of the multicast tracker TVLV.
+ * The TVLV length field of the according generic TVLV header.
+ * The batman-adv multicast packet's total TVLV length field.
+ *
+ * Return: The offset in skb's tail direction at which the new batman-adv
+ * multicast packet header needs to start.
+ */
+static unsigned int
+batadv_mcast_forw_shrink_update_headers(struct sk_buff *skb,
+ unsigned int num_dests_reduce)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ struct batadv_mcast_packet *mcast_packet;
+ struct batadv_tvlv_hdr *tvlv_hdr;
+ unsigned char *skb_net_hdr;
+ unsigned int offset;
+ short align_offset;
+ u16 num_dests;
+
+ skb_net_hdr = skb_network_header(skb);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+ num_dests = ntohs(mcast_tracker->num_dests);
+
+ align_offset = batadv_mcast_forw_shrink_align_offset(num_dests,
+ num_dests_reduce);
+ offset = ETH_ALEN * num_dests_reduce + align_offset;
+ num_dests -= num_dests_reduce;
+
+ /* update tracker header */
+ mcast_tracker->num_dests = htons(num_dests);
+
+ /* update tracker's tvlv header's length field */
+ tvlv_hdr = (struct batadv_tvlv_hdr *)(skb_network_header(skb) -
+ sizeof(*tvlv_hdr));
+ tvlv_hdr->len = htons(ntohs(tvlv_hdr->len) - offset);
+
+ /* update multicast packet header's tvlv length field */
+ mcast_packet = (struct batadv_mcast_packet *)skb->data;
+ mcast_packet->tvlv_len = htons(ntohs(mcast_packet->tvlv_len) - offset);
+
+ return offset;
+}
+
+/**
+ * batadv_mcast_forw_shrink_move_headers() - move multicast headers by offset
+ * @skb: the batman-adv multicast packet to move headers for
+ * @offset: a non-negative offset to move headers by, towards the skb tail
+ *
+ * Moves the batman-adv multicast packet header, its multicast tracker TVLV and
+ * any TVLVs in between by the given offset in direction towards the tail.
+ */
+static void
+batadv_mcast_forw_shrink_move_headers(struct sk_buff *skb, unsigned int offset)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ unsigned char *skb_net_hdr;
+ unsigned int len;
+ u16 num_dests;
+
+ skb_net_hdr = skb_network_header(skb);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+ num_dests = ntohs(mcast_tracker->num_dests);
+ len = skb_network_offset(skb) + sizeof(*mcast_tracker);
+ len += num_dests * ETH_ALEN;
+
+ batadv_mcast_forw_scrape(skb, len, offset);
+}
+
+/**
+ * batadv_mcast_forw_shrink_tracker() - remove zero addresses in a tracker tvlv
+ * @skb: the batman-adv multicast packet to (potentially) shrink
+ *
+ * Removes all destinations with a zero MAC addresses (00:00:00:00:00:00) from
+ * the given batman-adv multicast packet's tracker TVLV and updates headers
+ * accordingly to maintain a valid batman-adv multicast packet.
+ */
+static void batadv_mcast_forw_shrink_tracker(struct sk_buff *skb)
+{
+ unsigned int offset;
+ u16 dests_reduced;
+
+ dests_reduced = batadv_mcast_forw_shrink_pack_dests(skb);
+ if (!dests_reduced)
+ return;
+
+ offset = batadv_mcast_forw_shrink_update_headers(skb, dests_reduced);
+ batadv_mcast_forw_shrink_move_headers(skb, offset);
+}
+
+/**
+ * batadv_mcast_forw_packet() - forward a batman-adv multicast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the received or locally generated batman-adv multicast packet
+ * @local_xmit: indicates that the packet was locally generated and not received
+ *
+ * Parses the tracker TVLV of a batman-adv multicast packet and forwards the
+ * packet as indicated in this TVLV.
+ *
+ * Caller needs to set the skb network header to the start of the multicast
+ * tracker TVLV (excluding the generic TVLV header) and the skb transport header
+ * to the next byte after this multicast tracker TVLV.
+ *
+ * Caller needs to free the skb.
+ *
+ * Return: NET_RX_SUCCESS or NET_RX_DROP on success or a negative error
+ * code on failure. NET_RX_SUCCESS if the received packet is supposed to be
+ * decapsulated and forwarded to the own soft interface, NET_RX_DROP otherwise.
+ */
+static int batadv_mcast_forw_packet(struct batadv_priv *bat_priv,
+ struct sk_buff *skb, bool local_xmit)
+{
+ struct batadv_tvlv_mcast_tracker *mcast_tracker;
+ struct batadv_neigh_node *neigh_node;
+ unsigned long offset, num_dests_off;
+ struct sk_buff *nexthop_skb;
+ unsigned char *skb_net_hdr;
+ bool local_recv = false;
+ unsigned int tvlv_len;
+ bool xmitted = false;
+ u8 *dest, *next_dest;
+ u16 num_dests;
+ int ret;
+
+ /* (at least) TVLV part needs to be linearized */
+ SKB_LINEAR_ASSERT(skb);
+
+ /* check if num_dests is within skb length */
+ num_dests_off = offsetof(struct batadv_tvlv_mcast_tracker, num_dests);
+ if (num_dests_off > skb_network_header_len(skb))
+ return -EINVAL;
+
+ skb_net_hdr = skb_network_header(skb);
+ mcast_tracker = (struct batadv_tvlv_mcast_tracker *)skb_net_hdr;
+ num_dests = ntohs(mcast_tracker->num_dests);
+
+ dest = (u8 *)mcast_tracker + sizeof(*mcast_tracker);
+
+ /* check if full tracker tvlv is within skb length */
+ tvlv_len = sizeof(*mcast_tracker) + ETH_ALEN * num_dests;
+ if (tvlv_len > skb_network_header_len(skb))
+ return -EINVAL;
+
+ /* invalidate checksum: */
+ skb->ip_summed = CHECKSUM_NONE;
+
+ batadv_mcast_forw_tracker_for_each_dest(dest, num_dests) {
+ if (is_zero_ether_addr(dest))
+ continue;
+
+ /* only unicast originator addresses supported */
+ if (is_multicast_ether_addr(dest)) {
+ eth_zero_addr(dest);
+ continue;
+ }
+
+ if (batadv_is_my_mac(bat_priv, dest)) {
+ eth_zero_addr(dest);
+ local_recv = true;
+ continue;
+ }
+
+ neigh_node = batadv_orig_to_router(bat_priv, dest, NULL);
+ if (!neigh_node) {
+ eth_zero_addr(dest);
+ continue;
+ }
+
+ nexthop_skb = skb_copy(skb, GFP_ATOMIC);
+ if (!nexthop_skb) {
+ batadv_neigh_node_put(neigh_node);
+ return -ENOMEM;
+ }
+
+ offset = dest - skb->data;
+ next_dest = nexthop_skb->data + offset;
+
+ batadv_mcast_forw_scrub_dests(bat_priv, neigh_node, dest,
+ next_dest, num_dests);
+ batadv_mcast_forw_shrink_tracker(nexthop_skb);
+
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_TX);
+ batadv_add_counter(bat_priv, BATADV_CNT_MCAST_TX_BYTES,
+ nexthop_skb->len + ETH_HLEN);
+ xmitted = true;
+ ret = batadv_send_unicast_skb(nexthop_skb, neigh_node);
+
+ batadv_neigh_node_put(neigh_node);
+
+ if (ret < 0)
+ return ret;
+ }
+
+ if (xmitted) {
+ if (local_xmit) {
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_TX_LOCAL);
+ batadv_add_counter(bat_priv,
+ BATADV_CNT_MCAST_TX_LOCAL_BYTES,
+ skb->len -
+ skb_transport_offset(skb));
+ } else {
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_FWD);
+ batadv_add_counter(bat_priv, BATADV_CNT_MCAST_FWD_BYTES,
+ skb->len + ETH_HLEN);
+ }
+ }
+
+ if (local_recv)
+ return NET_RX_SUCCESS;
+ else
+ return NET_RX_DROP;
+}
+
+/**
+ * batadv_mcast_forw_tracker_tvlv_handler() - handle an mcast tracker tvlv
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the received batman-adv multicast packet
+ *
+ * Parses the tracker TVLV of an incoming batman-adv multicast packet and
+ * forwards the packet as indicated in this TVLV.
+ *
+ * Caller needs to set the skb network header to the start of the multicast
+ * tracker TVLV (excluding the generic TVLV header) and the skb transport header
+ * to the next byte after this multicast tracker TVLV.
+ *
+ * Caller needs to free the skb.
+ *
+ * Return: NET_RX_SUCCESS or NET_RX_DROP on success or a negative error
+ * code on failure. NET_RX_SUCCESS if the received packet is supposed to be
+ * decapsulated and forwarded to the own soft interface, NET_RX_DROP otherwise.
+ */
+int batadv_mcast_forw_tracker_tvlv_handler(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ return batadv_mcast_forw_packet(bat_priv, skb, false);
+}
+
+/**
+ * batadv_mcast_forw_packet_hdrlen() - multicast packet header length
+ * @num_dests: number of destination nodes
+ *
+ * Calculates the total batman-adv multicast packet header length for a given
+ * number of destination nodes (excluding the outer ethernet frame).
+ *
+ * Return: The calculated total batman-adv multicast packet header length.
+ */
+unsigned int batadv_mcast_forw_packet_hdrlen(unsigned int num_dests)
+{
+ /**
+ * If the number of destination entries is even then we need to add
+ * two byte padding to the tracker TVLV.
+ */
+ int padding = (!(num_dests % 2)) ? 2 : 0;
+
+ return padding + num_dests * ETH_ALEN +
+ sizeof(struct batadv_tvlv_mcast_tracker) +
+ sizeof(struct batadv_tvlv_hdr) +
+ sizeof(struct batadv_mcast_packet);
+}
+
+/**
+ * batadv_mcast_forw_expand_head() - expand headroom for an mcast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to send
+ *
+ * Tries to expand an skb's headroom so that its head to tail is 1298
+ * bytes (minimum IPv6 MTU + vlan ethernet header size) large.
+ *
+ * Return: -EINVAL if the given skb's length is too large or -ENOMEM on memory
+ * allocation failure. Otherwise, on success, zero is returned.
+ */
+static int batadv_mcast_forw_expand_head(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ int hdr_size = VLAN_ETH_HLEN + IPV6_MIN_MTU - skb->len;
+
+ /* TODO: Could be tightened to actual number of destination nodes?
+ * But it's tricky, number of destinations might have increased since
+ * we last checked.
+ */
+ if (hdr_size < 0) {
+ /* batadv_mcast_forw_mode_check_count() should ensure we do not
+ * end up here
+ */
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ if (skb_headroom(skb) < hdr_size &&
+ pskb_expand_head(skb, hdr_size, 0, GFP_ATOMIC) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * batadv_mcast_forw_push() - encapsulate skb in a batman-adv multicast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to encapsulate and send
+ * @vid: the vlan identifier
+ * @is_routable: indicates whether the destination is routable
+ * @count: the number of originators the multicast packet needs to be sent to
+ *
+ * Encapsulates the given multicast packet in a batman-adv multicast packet.
+ * A multicast tracker TVLV with destination originator addresses for any node
+ * that signaled interest in it, that is either via the translation table or the
+ * according want-all flags, is attached accordingly.
+ *
+ * Return: true on success, false otherwise.
+ */
+bool batadv_mcast_forw_push(struct batadv_priv *bat_priv, struct sk_buff *skb,
+ unsigned short vid, int is_routable, int count)
+{
+ unsigned short tvlv_len = 0;
+ int ret;
+
+ if (batadv_mcast_forw_expand_head(bat_priv, skb) < 0)
+ goto err;
+
+ skb_reset_transport_header(skb);
+
+ ret = batadv_mcast_forw_push_tvlvs(bat_priv, skb, vid, is_routable,
+ count, &tvlv_len);
+ if (ret < 0)
+ goto err;
+
+ ret = batadv_mcast_forw_push_hdr(skb, tvlv_len);
+ if (ret < 0)
+ goto err;
+
+ return true;
+
+err:
+ if (tvlv_len)
+ skb_pull(skb, tvlv_len);
+
+ return false;
+}
+
+/**
+ * batadv_mcast_forw_mcsend() - send a self prepared batman-adv multicast packet
+ * @bat_priv: the bat priv with all the soft interface information
+ * @skb: the multicast packet to encapsulate and send
+ *
+ * Transmits a batman-adv multicast packet that was locally prepared and
+ * consumes/frees it.
+ *
+ * Return: NET_XMIT_DROP on memory allocation failure. NET_XMIT_SUCCESS
+ * otherwise.
+ */
+int batadv_mcast_forw_mcsend(struct batadv_priv *bat_priv,
+ struct sk_buff *skb)
+{
+ int ret = batadv_mcast_forw_packet(bat_priv, skb, true);
+
+ if (ret < 0) {
+ kfree_skb(skb);
+ return NET_XMIT_DROP;
+ }
+
+ consume_skb(skb);
+ return NET_XMIT_SUCCESS;
+}
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index 0c64d81a7761..1f7ed9d4f6fd 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -7,6 +7,7 @@
#include "netlink.h"
#include "main.h"
+#include <linux/array_size.h>
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/bug.h>
@@ -20,7 +21,6 @@
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
#include <linux/init.h>
-#include <linux/kernel.h>
#include <linux/limits.h>
#include <linux/list.h>
#include <linux/minmax.h>
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 34903df4fe93..71c143d4b6d0 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -312,6 +312,33 @@ batadv_orig_router_get(struct batadv_orig_node *orig_node,
}
/**
+ * batadv_orig_to_router() - get next hop neighbor to an orig address
+ * @bat_priv: the bat priv with all the soft interface information
+ * @orig_addr: the originator MAC address to search the best next hop router for
+ * @if_outgoing: the interface where the payload packet has been received or
+ * the OGM should be sent to
+ *
+ * Return: A neighbor node which is the best router towards the given originator
+ * address.
+ */
+struct batadv_neigh_node *
+batadv_orig_to_router(struct batadv_priv *bat_priv, u8 *orig_addr,
+ struct batadv_hard_iface *if_outgoing)
+{
+ struct batadv_neigh_node *neigh_node;
+ struct batadv_orig_node *orig_node;
+
+ orig_node = batadv_orig_hash_find(bat_priv, orig_addr);
+ if (!orig_node)
+ return NULL;
+
+ neigh_node = batadv_find_router(bat_priv, orig_node, if_outgoing);
+ batadv_orig_node_put(orig_node);
+
+ return neigh_node;
+}
+
+/**
* batadv_orig_ifinfo_get() - find the ifinfo from an orig_node
* @orig_node: the orig node to be queried
* @if_outgoing: the interface for which the ifinfo should be acquired
@@ -942,6 +969,7 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
#ifdef CONFIG_BATMAN_ADV_MCAST
orig_node->mcast_flags = BATADV_MCAST_WANT_NO_RTR4;
orig_node->mcast_flags |= BATADV_MCAST_WANT_NO_RTR6;
+ orig_node->mcast_flags |= BATADV_MCAST_HAVE_MC_PTYPE_CAPA;
INIT_HLIST_NODE(&orig_node->mcast_want_all_unsnoopables_node);
INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv4_node);
INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv6_node);
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index ea3d69e4e670..db0c55128170 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -36,6 +36,9 @@ void batadv_neigh_node_release(struct kref *ref);
struct batadv_neigh_node *
batadv_orig_router_get(struct batadv_orig_node *orig_node,
const struct batadv_hard_iface *if_outgoing);
+struct batadv_neigh_node *
+batadv_orig_to_router(struct batadv_priv *bat_priv, u8 *orig_addr,
+ struct batadv_hard_iface *if_outgoing);
struct batadv_neigh_ifinfo *
batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh,
struct batadv_hard_iface *if_outgoing);
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 163cd43c4821..f1061985149f 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1270,3 +1270,73 @@ out:
batadv_orig_node_put(orig_node);
return ret;
}
+
+#ifdef CONFIG_BATMAN_ADV_MCAST
+/**
+ * batadv_recv_mcast_packet() - process received batman-adv multicast packet
+ * @skb: the received batman-adv multicast packet
+ * @recv_if: interface that the skb is received on
+ *
+ * Parses the given, received batman-adv multicast packet. Depending on the
+ * contents of its TVLV forwards it and/or decapsulates it to hand it to the
+ * soft interface.
+ *
+ * Return: NET_RX_DROP if the skb is not consumed, NET_RX_SUCCESS otherwise.
+ */
+int batadv_recv_mcast_packet(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if)
+{
+ struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface);
+ struct batadv_mcast_packet *mcast_packet;
+ int hdr_size = sizeof(*mcast_packet);
+ unsigned char *tvlv_buff;
+ int ret = NET_RX_DROP;
+ u16 tvlv_buff_len;
+
+ if (batadv_check_unicast_packet(bat_priv, skb, hdr_size) < 0)
+ goto free_skb;
+
+ /* create a copy of the skb, if needed, to modify it. */
+ if (skb_cow(skb, ETH_HLEN) < 0)
+ goto free_skb;
+
+ /* packet needs to be linearized to access the tvlv content */
+ if (skb_linearize(skb) < 0)
+ goto free_skb;
+
+ mcast_packet = (struct batadv_mcast_packet *)skb->data;
+ if (mcast_packet->ttl-- < 2)
+ goto free_skb;
+
+ tvlv_buff = (unsigned char *)(skb->data + hdr_size);
+ tvlv_buff_len = ntohs(mcast_packet->tvlv_len);
+
+ if (tvlv_buff_len > skb->len - hdr_size)
+ goto free_skb;
+
+ ret = batadv_tvlv_containers_process(bat_priv, BATADV_MCAST, NULL, skb,
+ tvlv_buff, tvlv_buff_len);
+ if (ret >= 0) {
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_RX);
+ batadv_add_counter(bat_priv, BATADV_CNT_MCAST_RX_BYTES,
+ skb->len + ETH_HLEN);
+ }
+
+ hdr_size += tvlv_buff_len;
+
+ if (ret == NET_RX_SUCCESS && (skb->len - hdr_size >= ETH_HLEN)) {
+ batadv_inc_counter(bat_priv, BATADV_CNT_MCAST_RX_LOCAL);
+ batadv_add_counter(bat_priv, BATADV_CNT_MCAST_RX_LOCAL_BYTES,
+ skb->len - hdr_size);
+
+ batadv_interface_rx(bat_priv->soft_iface, skb, hdr_size, NULL);
+ /* skb was consumed */
+ skb = NULL;
+ }
+
+free_skb:
+ kfree_skb(skb);
+
+ return ret;
+}
+#endif /* CONFIG_BATMAN_ADV_MCAST */
diff --git a/net/batman-adv/routing.h b/net/batman-adv/routing.h
index afd15b3879f1..e9849f032a24 100644
--- a/net/batman-adv/routing.h
+++ b/net/batman-adv/routing.h
@@ -27,6 +27,17 @@ int batadv_recv_frag_packet(struct sk_buff *skb,
struct batadv_hard_iface *iface);
int batadv_recv_bcast_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
+#ifdef CONFIG_BATMAN_ADV_MCAST
+int batadv_recv_mcast_packet(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if);
+#else
+static inline int batadv_recv_mcast_packet(struct sk_buff *skb,
+ struct batadv_hard_iface *recv_if)
+{
+ kfree_skb(skb);
+ return NET_RX_DROP;
+}
+#endif
int batadv_recv_unicast_tvlv(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
int batadv_recv_unhandled_unicast_packet(struct sk_buff *skb,
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 1bf1232a4f75..89c51b3cf430 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -301,12 +301,13 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,
send:
if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) {
- forw_mode = batadv_mcast_forw_mode(bat_priv, skb,
+ forw_mode = batadv_mcast_forw_mode(bat_priv, skb, vid,
&mcast_is_routable);
switch (forw_mode) {
case BATADV_FORW_BCAST:
break;
case BATADV_FORW_UCASTS:
+ case BATADV_FORW_MCAST:
do_bcast = false;
break;
case BATADV_FORW_NONE:
@@ -365,6 +366,8 @@ send:
} else if (forw_mode == BATADV_FORW_UCASTS) {
ret = batadv_mcast_forw_send(bat_priv, skb, vid,
mcast_is_routable);
+ } else if (forw_mode == BATADV_FORW_MCAST) {
+ ret = batadv_mcast_forw_mcsend(bat_priv, skb);
} else {
if (batadv_dat_snoop_outgoing_arp_request(bat_priv,
skb))
@@ -762,6 +765,7 @@ static int batadv_softif_init_late(struct net_device *dev)
atomic_set(&bat_priv->mcast.num_want_all_unsnoopables, 0);
atomic_set(&bat_priv->mcast.num_want_all_ipv4, 0);
atomic_set(&bat_priv->mcast.num_want_all_ipv6, 0);
+ atomic_set(&bat_priv->mcast.num_no_mc_ptype_capa, 0);
#endif
atomic_set(&bat_priv->gw.mode, BATADV_GW_MODE_OFF);
atomic_set(&bat_priv->gw.bandwidth_down, 100);
@@ -925,6 +929,18 @@ static const struct {
{ "tt_response_rx" },
{ "tt_roam_adv_tx" },
{ "tt_roam_adv_rx" },
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ { "mcast_tx" },
+ { "mcast_tx_bytes" },
+ { "mcast_tx_local" },
+ { "mcast_tx_local_bytes" },
+ { "mcast_rx" },
+ { "mcast_rx_bytes" },
+ { "mcast_rx_local" },
+ { "mcast_rx_local_bytes" },
+ { "mcast_fwd" },
+ { "mcast_fwd_bytes" },
+#endif
#ifdef CONFIG_BATMAN_ADV_DAT
{ "dat_get_tx" },
{ "dat_get_rx" },
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 17d5ea1d8e84..00840d5784fe 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -862,6 +862,70 @@ enum batadv_counters {
*/
BATADV_CNT_TT_ROAM_ADV_RX,
+#ifdef CONFIG_BATMAN_ADV_MCAST
+ /**
+ * @BATADV_CNT_MCAST_TX: transmitted batman-adv multicast packets
+ * counter
+ */
+ BATADV_CNT_MCAST_TX,
+
+ /**
+ * @BATADV_CNT_MCAST_TX_BYTES: transmitted batman-adv multicast packets
+ * bytes counter
+ */
+ BATADV_CNT_MCAST_TX_BYTES,
+
+ /**
+ * @BATADV_CNT_MCAST_TX_LOCAL: counter for multicast packets which
+ * were locally encapsulated and transmitted as batman-adv multicast
+ * packets
+ */
+ BATADV_CNT_MCAST_TX_LOCAL,
+
+ /**
+ * @BATADV_CNT_MCAST_TX_LOCAL_BYTES: bytes counter for multicast packets
+ * which were locally encapsulated and transmitted as batman-adv
+ * multicast packets
+ */
+ BATADV_CNT_MCAST_TX_LOCAL_BYTES,
+
+ /**
+ * @BATADV_CNT_MCAST_RX: received batman-adv multicast packet counter
+ */
+ BATADV_CNT_MCAST_RX,
+
+ /**
+ * @BATADV_CNT_MCAST_RX_BYTES: received batman-adv multicast packet
+ * bytes counter
+ */
+ BATADV_CNT_MCAST_RX_BYTES,
+
+ /**
+ * @BATADV_CNT_MCAST_RX_LOCAL: counter for received batman-adv multicast
+ * packets which were forwarded to the local soft interface
+ */
+ BATADV_CNT_MCAST_RX_LOCAL,
+
+ /**
+ * @BATADV_CNT_MCAST_RX_LOCAL_BYTES: bytes counter for received
+ * batman-adv multicast packets which were forwarded to the local soft
+ * interface
+ */
+ BATADV_CNT_MCAST_RX_LOCAL_BYTES,
+
+ /**
+ * @BATADV_CNT_MCAST_FWD: counter for received batman-adv multicast
+ * packets which were forwarded to other, neighboring nodes
+ */
+ BATADV_CNT_MCAST_FWD,
+
+ /**
+ * @BATADV_CNT_MCAST_FWD_BYTES: bytes counter for received batman-adv
+ * multicast packets which were forwarded to other, neighboring nodes
+ */
+ BATADV_CNT_MCAST_FWD_BYTES,
+#endif
+
#ifdef CONFIG_BATMAN_ADV_DAT
/**
* @BATADV_CNT_DAT_GET_TX: transmitted dht GET traffic packet counter
@@ -1279,6 +1343,12 @@ struct batadv_priv_mcast {
atomic_t num_want_all_rtr6;
/**
+ * @num_no_mc_ptype_capa: counter for number of nodes without the
+ * BATADV_MCAST_HAVE_MC_PTYPE_CAPA flag
+ */
+ atomic_t num_no_mc_ptype_capa;
+
+ /**
* @want_lists_lock: lock for protecting modifications to mcasts
* want_all_{unsnoopables,ipv4,ipv6}_list (traversals are rcu-locked)
*/
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 2cee330188ce..a41d2693f4d8 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -300,6 +300,13 @@ static int configure_datapath_sync(struct hci_dev *hdev, struct bt_codec *codec)
__u8 vnd_len, *vnd_data = NULL;
struct hci_op_configure_data_path *cmd = NULL;
+ if (!codec->data_path || !hdev->get_codec_config_data)
+ return 0;
+
+ /* Do not take me as error */
+ if (!hdev->get_codec_config_data)
+ return 0;
+
err = hdev->get_codec_config_data(hdev, ESCO_LINK, codec, &vnd_len,
&vnd_data);
if (err < 0)
@@ -345,9 +352,7 @@ static int hci_enhanced_setup_sync(struct hci_dev *hdev, void *data)
bt_dev_dbg(hdev, "hcon %p", conn);
- /* for offload use case, codec needs to configured before opening SCO */
- if (conn->codec.data_path)
- configure_datapath_sync(hdev, &conn->codec);
+ configure_datapath_sync(hdev, &conn->codec);
conn->state = BT_CONNECT;
conn->out = true;
@@ -1086,8 +1091,9 @@ static void hci_conn_cleanup_child(struct hci_conn *conn, u8 reason)
hci_conn_failed(conn, reason);
break;
case ISO_LINK:
- if (conn->state != BT_CONNECTED &&
- !test_bit(HCI_CONN_CREATE_CIS, &conn->flags))
+ if ((conn->state != BT_CONNECTED &&
+ !test_bit(HCI_CONN_CREATE_CIS, &conn->flags)) ||
+ test_bit(HCI_CONN_BIG_CREATED, &conn->flags))
hci_conn_failed(conn, reason);
break;
}
@@ -2228,7 +2234,17 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
__u8 base_len, __u8 *base)
{
struct hci_conn *conn;
+ struct hci_conn *parent;
__u8 eir[HCI_MAX_PER_AD_LENGTH];
+ struct hci_link *link;
+
+ /* Look for any BIS that is open for rebinding */
+ conn = hci_conn_hash_lookup_big_state(hdev, qos->bcast.big, BT_OPEN);
+ if (conn) {
+ memcpy(qos, &conn->iso_qos, sizeof(*qos));
+ conn->state = BT_CONNECTED;
+ return conn;
+ }
if (base_len && base)
base_len = eir_append_service_data(eir, 0, 0x1851,
@@ -2256,6 +2272,20 @@ struct hci_conn *hci_bind_bis(struct hci_dev *hdev, bdaddr_t *dst,
conn->iso_qos = *qos;
conn->state = BT_BOUND;
+ /* Link BISes together */
+ parent = hci_conn_hash_lookup_big(hdev,
+ conn->iso_qos.bcast.big);
+ if (parent && parent != conn) {
+ link = hci_conn_link(parent, conn);
+ if (!link) {
+ hci_conn_drop(conn);
+ return ERR_PTR(-ENOLINK);
+ }
+
+ /* Link takes the refcount */
+ hci_conn_drop(conn);
+ }
+
return conn;
}
@@ -2287,6 +2317,9 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
if (IS_ERR(conn))
return conn;
+ if (conn->state == BT_CONNECTED)
+ return conn;
+
data.big = qos->bcast.big;
data.bis = qos->bcast.bis;
@@ -2421,12 +2454,10 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED,
sizeof(cp), &cp);
- /* If we're already encrypted set the REAUTH_PEND flag,
- * otherwise set the ENCRYPT_PEND.
+ /* Set the ENCRYPT_PEND to trigger encryption after
+ * authentication.
*/
- if (test_bit(HCI_CONN_ENCRYPT, &conn->flags))
- set_bit(HCI_CONN_REAUTH_PEND, &conn->flags);
- else
+ if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
set_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 65601aa52e0d..2821a42cefdc 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1049,6 +1049,7 @@ static void hci_error_reset(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset);
+ hci_dev_hold(hdev);
BT_DBG("%s", hdev->name);
if (hdev->hw_error)
@@ -1056,10 +1057,10 @@ static void hci_error_reset(struct work_struct *work)
else
bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code);
- if (hci_dev_do_close(hdev))
- return;
+ if (!hci_dev_do_close(hdev))
+ hci_dev_do_open(hdev);
- hci_dev_do_open(hdev);
+ hci_dev_put(hdev);
}
void hci_uuids_clear(struct hci_dev *hdev)
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 6b7741f6e95b..233453807b50 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -1046,10 +1046,12 @@ static int min_key_size_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val > hdev->le_max_key_size || val < SMP_MIN_ENC_KEY_SIZE)
+ hci_dev_lock(hdev);
+ if (val > hdev->le_max_key_size || val < SMP_MIN_ENC_KEY_SIZE) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_min_key_size = val;
hci_dev_unlock(hdev);
@@ -1074,10 +1076,12 @@ static int max_key_size_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val > SMP_MAX_ENC_KEY_SIZE || val < hdev->le_min_key_size)
+ hci_dev_lock(hdev);
+ if (val > SMP_MAX_ENC_KEY_SIZE || val < hdev->le_min_key_size) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_max_key_size = val;
hci_dev_unlock(hdev);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index ebf17b51072f..2a5f5a7d2412 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3500,14 +3500,8 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, void *data,
if (!ev->status) {
clear_bit(HCI_CONN_AUTH_FAILURE, &conn->flags);
-
- if (!hci_conn_ssp_enabled(conn) &&
- test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) {
- bt_dev_info(hdev, "re-auth of legacy device is not possible.");
- } else {
- set_bit(HCI_CONN_AUTH, &conn->flags);
- conn->sec_level = conn->pending_sec_level;
- }
+ set_bit(HCI_CONN_AUTH, &conn->flags);
+ conn->sec_level = conn->pending_sec_level;
} else {
if (ev->status == HCI_ERROR_PIN_OR_KEY_MISSING)
set_bit(HCI_CONN_AUTH_FAILURE, &conn->flags);
@@ -3516,7 +3510,6 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, void *data,
}
clear_bit(HCI_CONN_AUTH_PEND, &conn->flags);
- clear_bit(HCI_CONN_REAUTH_PEND, &conn->flags);
if (conn->state == BT_CONFIG) {
if (!ev->status && hci_conn_ssp_enabled(conn)) {
@@ -5336,9 +5329,12 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, void *data,
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
- if (!conn || !hci_conn_ssp_enabled(conn))
+ if (!conn || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
goto unlock;
+ /* Assume remote supports SSP since it has triggered this event */
+ set_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
+
hci_conn_hold(conn);
if (!hci_dev_test_flag(hdev, HCI_MGMT))
@@ -6801,6 +6797,10 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev, void *data,
return send_conn_param_neg_reply(hdev, handle,
HCI_ERROR_UNKNOWN_CONN_ID);
+ if (max > hcon->le_conn_max_interval)
+ return send_conn_param_neg_reply(hdev, handle,
+ HCI_ERROR_INVALID_LL_PARAMS);
+
if (hci_check_conn_params(min, max, latency, timeout))
return send_conn_param_neg_reply(hdev, handle,
HCI_ERROR_INVALID_LL_PARAMS);
@@ -7427,10 +7427,10 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event,
* keep track of the bdaddr of the connection event that woke us up.
*/
if (event == HCI_EV_CONN_REQUEST) {
- bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
+ bacpy(&hdev->wake_addr, &conn_request->bdaddr);
hdev->wake_addr_type = BDADDR_BREDR;
} else if (event == HCI_EV_CONN_COMPLETE) {
- bacpy(&hdev->wake_addr, &conn_request->bdaddr);
+ bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
hdev->wake_addr_type = BDADDR_BREDR;
} else if (event == HCI_EV_LE_META) {
struct hci_ev_le_meta *le_ev = (void *)skb->data;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index d85a7091a116..5716345a26df 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -348,8 +348,6 @@ static void le_scan_disable(struct work_struct *work)
if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
goto _return;
- cancel_delayed_work(&hdev->le_scan_restart);
-
status = hci_cmd_sync_queue(hdev, scan_disable_sync, NULL, NULL);
if (status) {
bt_dev_err(hdev, "failed to disable LE scan: %d", status);
@@ -397,71 +395,6 @@ _return:
static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val,
u8 filter_dup);
-static int hci_le_scan_restart_sync(struct hci_dev *hdev)
-{
- /* If controller is not scanning we are done. */
- if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
- return 0;
-
- if (hdev->scanning_paused) {
- bt_dev_dbg(hdev, "Scanning is paused for suspend");
- return 0;
- }
-
- hci_le_set_scan_enable_sync(hdev, LE_SCAN_DISABLE, 0x00);
- return hci_le_set_scan_enable_sync(hdev, LE_SCAN_ENABLE,
- LE_SCAN_FILTER_DUP_ENABLE);
-}
-
-static void le_scan_restart(struct work_struct *work)
-{
- struct hci_dev *hdev = container_of(work, struct hci_dev,
- le_scan_restart.work);
- unsigned long timeout, duration, scan_start, now;
- int status;
-
- bt_dev_dbg(hdev, "");
-
- status = hci_le_scan_restart_sync(hdev);
- if (status) {
- bt_dev_err(hdev, "failed to restart LE scan: status %d",
- status);
- return;
- }
-
- hci_dev_lock(hdev);
-
- if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
- !hdev->discovery.scan_start)
- goto unlock;
-
- /* When the scan was started, hdev->le_scan_disable has been queued
- * after duration from scan_start. During scan restart this job
- * has been canceled, and we need to queue it again after proper
- * timeout, to make sure that scan does not run indefinitely.
- */
- duration = hdev->discovery.scan_duration;
- scan_start = hdev->discovery.scan_start;
- now = jiffies;
- if (now - scan_start <= duration) {
- int elapsed;
-
- if (now >= scan_start)
- elapsed = now - scan_start;
- else
- elapsed = ULONG_MAX - scan_start + now;
-
- timeout = duration - elapsed;
- } else {
- timeout = 0;
- }
-
- queue_delayed_work(hdev->req_workqueue,
- &hdev->le_scan_disable, timeout);
-
-unlock:
- hci_dev_unlock(hdev);
-}
static int reenable_adv_sync(struct hci_dev *hdev, void *data)
{
@@ -630,7 +563,6 @@ void hci_cmd_sync_init(struct hci_dev *hdev)
INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work);
INIT_WORK(&hdev->reenable_adv_work, reenable_adv);
INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable);
- INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart);
INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire);
}
@@ -2274,8 +2206,11 @@ static int hci_le_add_accept_list_sync(struct hci_dev *hdev,
/* During suspend, only wakeable devices can be in acceptlist */
if (hdev->suspended &&
- !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP))
+ !(params->flags & HCI_CONN_FLAG_REMOTE_WAKEUP)) {
+ hci_le_del_accept_list_sync(hdev, &params->addr,
+ params->addr_type);
return 0;
+ }
/* Select filter policy to accept all advertising */
if (*num_entries >= hdev->le_accept_list_size)
@@ -3800,12 +3735,14 @@ static int hci_set_event_mask_sync(struct hci_dev *hdev)
if (lmp_bredr_capable(hdev)) {
events[4] |= 0x01; /* Flow Specification Complete */
- /* Don't set Disconnect Complete when suspended as that
- * would wakeup the host when disconnecting due to
- * suspend.
+ /* Don't set Disconnect Complete and mode change when
+ * suspended as that would wakeup the host when disconnecting
+ * due to suspend.
*/
- if (hdev->suspended)
+ if (hdev->suspended) {
events[0] &= 0xef;
+ events[2] &= 0xf7;
+ }
} else {
/* Use a different default for LE-only devices */
memset(events, 0, sizeof(events));
@@ -4960,7 +4897,6 @@ int hci_dev_close_sync(struct hci_dev *hdev)
cancel_delayed_work(&hdev->power_off);
cancel_delayed_work(&hdev->ncmd_timer);
cancel_delayed_work(&hdev->le_scan_disable);
- cancel_delayed_work(&hdev->le_scan_restart);
hci_request_cancel_all(hdev);
@@ -5178,7 +5114,6 @@ int hci_stop_discovery_sync(struct hci_dev *hdev)
if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) {
cancel_delayed_work(&hdev->le_scan_disable);
- cancel_delayed_work(&hdev->le_scan_restart);
err = hci_scan_disable_sync(hdev);
if (err)
@@ -5627,7 +5562,7 @@ static int hci_inquiry_sync(struct hci_dev *hdev, u8 length)
bt_dev_dbg(hdev, "");
- if (hci_dev_test_flag(hdev, HCI_INQUIRY))
+ if (test_bit(HCI_INQUIRY, &hdev->flags))
return 0;
hci_dev_lock(hdev);
@@ -5686,19 +5621,18 @@ static int hci_active_scan_sync(struct hci_dev *hdev, uint16_t interval)
if (err < 0)
own_addr_type = ADDR_LE_DEV_PUBLIC;
- if (hci_is_adv_monitoring(hdev)) {
+ if (hci_is_adv_monitoring(hdev) ||
+ (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) &&
+ hdev->discovery.result_filtering)) {
/* Duplicate filter should be disabled when some advertisement
* monitor is activated, otherwise AdvMon can only receive one
* advertisement for one peer(*) during active scanning, and
* might report loss to these peers.
*
- * Note that different controllers have different meanings of
- * |duplicate|. Some of them consider packets with the same
- * address as duplicate, and others consider packets with the
- * same address and the same RSSI as duplicate. Although in the
- * latter case we don't need to disable duplicate filter, but
- * it is common to have active scanning for a short period of
- * time, the power impact should be neglectable.
+ * If controller does strict duplicate filtering and the
+ * discovery requires result filtering disables controller based
+ * filtering since that can cause reports that would match the
+ * host filter to not be reported.
*/
filter_dup = LE_SCAN_FILTER_DUP_DISABLE;
}
@@ -5778,17 +5712,6 @@ int hci_start_discovery_sync(struct hci_dev *hdev)
bt_dev_dbg(hdev, "timeout %u ms", jiffies_to_msecs(timeout));
- /* When service discovery is used and the controller has a
- * strict duplicate filter, it is important to remember the
- * start and duration of the scan. This is required for
- * restarting scanning during the discovery phase.
- */
- if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) &&
- hdev->discovery.result_filtering) {
- hdev->discovery.scan_start = jiffies;
- hdev->discovery.scan_duration = timeout;
- }
-
queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_disable,
timeout);
return 0;
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 07b80e97aead..04f6572d35f1 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -54,6 +54,7 @@ static void iso_sock_kill(struct sock *sk);
enum {
BT_SK_BIG_SYNC,
BT_SK_PA_SYNC,
+ BT_SK_PA_SYNC_TERM,
};
struct iso_pinfo {
@@ -82,6 +83,11 @@ static bool iso_match_sid(struct sock *sk, void *data);
static bool iso_match_sync_handle(struct sock *sk, void *data);
static void iso_sock_disconn(struct sock *sk);
+typedef bool (*iso_sock_match_t)(struct sock *sk, void *data);
+
+static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst,
+ iso_sock_match_t match, void *data);
+
/* ---- ISO timers ---- */
#define ISO_CONN_TIMEOUT (HZ * 40)
#define ISO_DISCONN_TIMEOUT (HZ * 2)
@@ -190,10 +196,21 @@ static void iso_chan_del(struct sock *sk, int err)
sock_set_flag(sk, SOCK_ZAPPED);
}
+static bool iso_match_conn_sync_handle(struct sock *sk, void *data)
+{
+ struct hci_conn *hcon = data;
+
+ if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags))
+ return false;
+
+ return hcon->sync_handle == iso_pi(sk)->sync_handle;
+}
+
static void iso_conn_del(struct hci_conn *hcon, int err)
{
struct iso_conn *conn = hcon->iso_data;
struct sock *sk;
+ struct sock *parent;
if (!conn)
return;
@@ -209,6 +226,25 @@ static void iso_conn_del(struct hci_conn *hcon, int err)
if (sk) {
lock_sock(sk);
+
+ /* While a PA sync hcon is in the process of closing,
+ * mark parent socket with a flag, so that any residual
+ * BIGInfo adv reports that arrive before PA sync is
+ * terminated are not processed anymore.
+ */
+ if (test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) {
+ parent = iso_get_sock_listen(&hcon->src,
+ &hcon->dst,
+ iso_match_conn_sync_handle,
+ hcon);
+
+ if (parent) {
+ set_bit(BT_SK_PA_SYNC_TERM,
+ &iso_pi(parent)->flags);
+ sock_put(parent);
+ }
+ }
+
iso_sock_clear_timer(sk);
iso_chan_del(sk, err);
release_sock(sk);
@@ -545,8 +581,6 @@ static struct sock *__iso_get_sock_listen_by_sid(bdaddr_t *ba, bdaddr_t *bc,
return NULL;
}
-typedef bool (*iso_sock_match_t)(struct sock *sk, void *data);
-
/* Find socket listening:
* source bdaddr (Unicast)
* destination bdaddr (Broadcast only)
@@ -574,19 +608,68 @@ static struct sock *iso_get_sock_listen(bdaddr_t *src, bdaddr_t *dst,
continue;
/* Exact match. */
- if (!bacmp(&iso_pi(sk)->src, src))
+ if (!bacmp(&iso_pi(sk)->src, src)) {
+ sock_hold(sk);
break;
+ }
/* Closest match */
- if (!bacmp(&iso_pi(sk)->src, BDADDR_ANY))
+ if (!bacmp(&iso_pi(sk)->src, BDADDR_ANY)) {
+ if (sk1)
+ sock_put(sk1);
+
sk1 = sk;
+ sock_hold(sk1);
+ }
}
+ if (sk && sk1)
+ sock_put(sk1);
+
read_unlock(&iso_sk_list.lock);
return sk ? sk : sk1;
}
+static struct sock *iso_get_sock_big(struct sock *match_sk, bdaddr_t *src,
+ bdaddr_t *dst, uint8_t big)
+{
+ struct sock *sk = NULL;
+
+ read_lock(&iso_sk_list.lock);
+
+ sk_for_each(sk, &iso_sk_list.head) {
+ if (match_sk == sk)
+ continue;
+
+ /* Look for sockets that have already been
+ * connected to the BIG
+ */
+ if (sk->sk_state != BT_CONNECTED &&
+ sk->sk_state != BT_CONNECT)
+ continue;
+
+ /* Match Broadcast destination */
+ if (bacmp(&iso_pi(sk)->dst, dst))
+ continue;
+
+ /* Match BIG handle */
+ if (iso_pi(sk)->qos.bcast.big != big)
+ continue;
+
+ /* Match source address */
+ if (bacmp(&iso_pi(sk)->src, src))
+ continue;
+
+ sock_hold(sk);
+ break;
+ }
+
+ read_unlock(&iso_sk_list.lock);
+
+ return sk;
+}
+
static void iso_sock_destruct(struct sock *sk)
{
BT_DBG("sk %p", sk);
@@ -639,6 +722,28 @@ static void iso_sock_kill(struct sock *sk)
static void iso_sock_disconn(struct sock *sk)
{
+ struct sock *bis_sk;
+ struct hci_conn *hcon = iso_pi(sk)->conn->hcon;
+
+ if (test_bit(HCI_CONN_BIG_CREATED, &hcon->flags)) {
+ bis_sk = iso_get_sock_big(sk, &iso_pi(sk)->src,
+ &iso_pi(sk)->dst,
+ iso_pi(sk)->qos.bcast.big);
+
+ /* If there are any other connected sockets for the
+ * same BIG, just delete the sk and leave the bis
+ * hcon active, in case later rebinding is needed.
+ */
+ if (bis_sk) {
+ hcon->state = BT_OPEN;
+ iso_pi(sk)->conn->hcon = NULL;
+ iso_sock_clear_timer(sk);
+ iso_chan_del(sk, bt_to_errno(hcon->abort_reason));
+ sock_put(bis_sk);
+ return;
+ }
+ }
+
sk->sk_state = BT_DISCONN;
iso_sock_set_timer(sk, ISO_DISCONN_TIMEOUT);
iso_conn_lock(iso_pi(sk)->conn);
@@ -792,27 +897,75 @@ static int iso_sock_bind_bc(struct socket *sock, struct sockaddr *addr,
BT_DBG("sk %p bc_sid %u bc_num_bis %u", sk, sa->iso_bc->bc_sid,
sa->iso_bc->bc_num_bis);
- if (addr_len > sizeof(*sa) + sizeof(*sa->iso_bc))
+ if (addr_len != sizeof(*sa) + sizeof(*sa->iso_bc))
return -EINVAL;
bacpy(&iso_pi(sk)->dst, &sa->iso_bc->bc_bdaddr);
+
+ /* Check if the address type is of LE type */
+ if (!bdaddr_type_is_le(sa->iso_bc->bc_bdaddr_type))
+ return -EINVAL;
+
iso_pi(sk)->dst_type = sa->iso_bc->bc_bdaddr_type;
iso_pi(sk)->sync_handle = -1;
+
+ if (sa->iso_bc->bc_sid > 0x0f)
+ return -EINVAL;
+
iso_pi(sk)->bc_sid = sa->iso_bc->bc_sid;
+
+ if (sa->iso_bc->bc_num_bis > ISO_MAX_NUM_BIS)
+ return -EINVAL;
+
iso_pi(sk)->bc_num_bis = sa->iso_bc->bc_num_bis;
- for (i = 0; i < iso_pi(sk)->bc_num_bis; i++) {
+ for (i = 0; i < iso_pi(sk)->bc_num_bis; i++)
if (sa->iso_bc->bc_bis[i] < 0x01 ||
sa->iso_bc->bc_bis[i] > 0x1f)
return -EINVAL;
- memcpy(iso_pi(sk)->bc_bis, sa->iso_bc->bc_bis,
- iso_pi(sk)->bc_num_bis);
- }
+ memcpy(iso_pi(sk)->bc_bis, sa->iso_bc->bc_bis,
+ iso_pi(sk)->bc_num_bis);
return 0;
}
+static int iso_sock_bind_pa_sk(struct sock *sk, struct sockaddr_iso *sa,
+ int addr_len)
+{
+ int err = 0;
+
+ if (sk->sk_type != SOCK_SEQPACKET) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ if (addr_len != sizeof(*sa) + sizeof(*sa->iso_bc)) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ if (sa->iso_bc->bc_num_bis > ISO_MAX_NUM_BIS) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ iso_pi(sk)->bc_num_bis = sa->iso_bc->bc_num_bis;
+
+ for (int i = 0; i < iso_pi(sk)->bc_num_bis; i++)
+ if (sa->iso_bc->bc_bis[i] < 0x01 ||
+ sa->iso_bc->bc_bis[i] > 0x1f) {
+ err = -EINVAL;
+ goto done;
+ }
+
+ memcpy(iso_pi(sk)->bc_bis, sa->iso_bc->bc_bis,
+ iso_pi(sk)->bc_num_bis);
+
+done:
+ return err;
+}
+
static int iso_sock_bind(struct socket *sock, struct sockaddr *addr,
int addr_len)
{
@@ -828,6 +981,15 @@ static int iso_sock_bind(struct socket *sock, struct sockaddr *addr,
lock_sock(sk);
+ /* Allow the user to bind a PA sync socket to a number
+ * of BISes to sync to.
+ */
+ if (sk->sk_state == BT_CONNECT2 &&
+ test_bit(BT_SK_PA_SYNC, &iso_pi(sk)->flags)) {
+ err = iso_sock_bind_pa_sk(sk, sa, addr_len);
+ goto done;
+ }
+
if (sk->sk_state != BT_OPEN) {
err = -EBADFD;
goto done;
@@ -1694,6 +1856,7 @@ static void iso_conn_ready(struct iso_conn *conn)
parent->sk_data_ready(parent);
release_sock(parent);
+ sock_put(parent);
}
}
@@ -1759,9 +1922,20 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
/* Try to get PA sync listening socket, if it exists */
sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
iso_match_pa_sync_flag, NULL);
- if (!sk)
+
+ if (!sk) {
sk = iso_get_sock_listen(&hdev->bdaddr, bdaddr,
iso_match_sync_handle, ev2);
+
+ /* If PA Sync is in process of terminating,
+ * do not handle any more BIGInfo adv reports.
+ */
+
+ if (sk && test_bit(BT_SK_PA_SYNC_TERM,
+ &iso_pi(sk)->flags))
+ return lm;
+ }
+
if (sk) {
int err;
@@ -1778,6 +1952,7 @@ int iso_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 *flags)
if (err) {
bt_dev_err(hdev, "hci_le_big_create_sync: %d",
err);
+ sock_put(sk);
sk = NULL;
}
}
@@ -1810,6 +1985,8 @@ done:
if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags))
*flags |= HCI_PROTO_DEFER;
+ sock_put(sk);
+
return lm;
}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index baeebee41cd9..656f49b299d2 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -5613,7 +5613,13 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
memset(&rsp, 0, sizeof(rsp));
- err = hci_check_conn_params(min, max, latency, to_multiplier);
+ if (max > hcon->le_conn_max_interval) {
+ BT_DBG("requested connection interval exceeds current bounds.");
+ err = -EINVAL;
+ } else {
+ err = hci_check_conn_params(min, max, latency, to_multiplier);
+ }
+
if (err)
rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED);
else
@@ -6526,7 +6532,8 @@ static inline void l2cap_sig_channel(struct l2cap_conn *conn,
if (len > skb->len || !cmd->ident) {
BT_DBG("corrupted command");
l2cap_sig_send_rej(conn, cmd->ident);
- break;
+ skb_pull(skb, len > skb->len ? skb->len : len);
+ continue;
}
err = l2cap_bredr_sig_cmd(conn, cmd, len, skb->data);
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index 53a796ac078c..43aa01fd07b9 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -30,6 +30,15 @@
#include <net/bluetooth/bluetooth.h>
+/**
+ * baswap() - Swaps the order of a bd address
+ * @dst: Pointer to a bdaddr_t struct that will store the swapped
+ * bd address.
+ * @src: Pointer to the bdaddr_t struct to be swapped.
+ *
+ * This function reverses the byte order of a Bluetooth device
+ * address.
+ */
void baswap(bdaddr_t *dst, const bdaddr_t *src)
{
const unsigned char *s = (const unsigned char *)src;
@@ -41,7 +50,19 @@ void baswap(bdaddr_t *dst, const bdaddr_t *src)
}
EXPORT_SYMBOL(baswap);
-/* Bluetooth error codes to Unix errno mapping */
+/**
+ * bt_to_errno() - Bluetooth error codes to standard errno
+ * @code: Bluetooth error code to be converted
+ *
+ * This function takes a Bluetooth error code as input and convets
+ * it to an equivalent Unix/standard errno value.
+ *
+ * Return:
+ *
+ * If the bt error code is known, an equivalent Unix errno value
+ * is returned.
+ * If the given bt error code is not known, ENOSYS is returned.
+ */
int bt_to_errno(__u16 code)
{
switch (code) {
@@ -135,10 +156,22 @@ int bt_to_errno(__u16 code)
}
EXPORT_SYMBOL(bt_to_errno);
-/* Unix errno to Bluetooth error codes mapping */
+/**
+ * bt_status() - Standard errno value to Bluetooth error code
+ * @err: Unix/standard errno value to be converted
+ *
+ * This function converts a standard/Unix errno value to an
+ * equivalent Bluetooth error code.
+ *
+ * Return: Bluetooth error code.
+ *
+ * If the given errno is not found, 0x1f is returned by default
+ * which indicates an unspecified error.
+ * For err >= 0, no conversion is performed, and the same value
+ * is immediately returned.
+ */
__u8 bt_status(int err)
{
- /* Don't convert if already positive value */
if (err >= 0)
return err;
@@ -206,6 +239,10 @@ __u8 bt_status(int err)
}
EXPORT_SYMBOL(bt_status);
+/**
+ * bt_info() - Log Bluetooth information message
+ * @format: Message's format string
+ */
void bt_info(const char *format, ...)
{
struct va_format vaf;
@@ -222,6 +259,10 @@ void bt_info(const char *format, ...)
}
EXPORT_SYMBOL(bt_info);
+/**
+ * bt_warn() - Log Bluetooth warning message
+ * @format: Message's format string
+ */
void bt_warn(const char *format, ...)
{
struct va_format vaf;
@@ -238,6 +279,10 @@ void bt_warn(const char *format, ...)
}
EXPORT_SYMBOL(bt_warn);
+/**
+ * bt_err() - Log Bluetooth error message
+ * @format: Message's format string
+ */
void bt_err(const char *format, ...)
{
struct va_format vaf;
@@ -267,6 +312,10 @@ bool bt_dbg_get(void)
return debug_enable;
}
+/**
+ * bt_dbg() - Log Bluetooth debugging message
+ * @format: Message's format string
+ */
void bt_dbg(const char *format, ...)
{
struct va_format vaf;
@@ -287,6 +336,13 @@ void bt_dbg(const char *format, ...)
EXPORT_SYMBOL(bt_dbg);
#endif
+/**
+ * bt_warn_ratelimited() - Log rate-limited Bluetooth warning message
+ * @format: Message's format string
+ *
+ * This functions works like bt_warn, but it uses rate limiting
+ * to prevent the message from being logged too often.
+ */
void bt_warn_ratelimited(const char *format, ...)
{
struct va_format vaf;
@@ -303,6 +359,13 @@ void bt_warn_ratelimited(const char *format, ...)
}
EXPORT_SYMBOL(bt_warn_ratelimited);
+/**
+ * bt_err_ratelimited() - Log rate-limited Bluetooth error message
+ * @format: Message's format string
+ *
+ * This functions works like bt_err, but it uses rate limiting
+ * to prevent the message from being logged too often.
+ */
void bt_err_ratelimited(const char *format, ...)
{
struct va_format vaf;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 9dd815b6603f..ee3b4aad8bd8 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1045,6 +1045,8 @@ static void rpa_expired(struct work_struct *work)
hci_cmd_sync_queue(hdev, rpa_expired_sync, NULL, NULL);
}
+static int set_discoverable_sync(struct hci_dev *hdev, void *data);
+
static void discov_off(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev,
@@ -1063,7 +1065,7 @@ static void discov_off(struct work_struct *work)
hci_dev_clear_flag(hdev, HCI_DISCOVERABLE);
hdev->discov_timeout = 0;
- hci_update_discoverable(hdev);
+ hci_cmd_sync_queue(hdev, set_discoverable_sync, NULL, NULL);
mgmt_new_settings(hdev);
@@ -10145,21 +10147,6 @@ static bool eir_has_uuids(u8 *eir, u16 eir_len, u16 uuid_count, u8 (*uuids)[16])
return false;
}
-static void restart_le_scan(struct hci_dev *hdev)
-{
- /* If controller is not scanning we are done. */
- if (!hci_dev_test_flag(hdev, HCI_LE_SCAN))
- return;
-
- if (time_after(jiffies + DISCOV_LE_RESTART_DELAY,
- hdev->discovery.scan_start +
- hdev->discovery.scan_duration))
- return;
-
- queue_delayed_work(hdev->req_workqueue, &hdev->le_scan_restart,
- DISCOV_LE_RESTART_DELAY);
-}
-
static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir,
u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len)
{
@@ -10194,8 +10181,6 @@ static bool is_filter_match(struct hci_dev *hdev, s8 rssi, u8 *eir,
* scanning to ensure updated result with updated RSSI values.
*/
if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks)) {
- restart_le_scan(hdev);
-
/* Validate RSSI value against the RSSI threshold once more. */
if (hdev->discovery.rssi != HCI_RSSI_INVALID &&
rssi < hdev->discovery.rssi)
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 053ef8f25fae..1d34d8497033 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1941,7 +1941,7 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s)
/* Get data directly from socket receive queue without copying it. */
while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
skb_orphan(skb);
- if (!skb_linearize(skb)) {
+ if (!skb_linearize(skb) && sk->sk_state != BT_CLOSED) {
s = rfcomm_recv_frame(s, skb);
if (!s)
break;
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 94ec913dfb76..69c75c041fe1 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -1041,7 +1041,7 @@ static void rfcomm_tty_flush_buffer(struct tty_struct *tty)
tty_wakeup(tty);
}
-static void rfcomm_tty_send_xchar(struct tty_struct *tty, char ch)
+static void rfcomm_tty_send_xchar(struct tty_struct *tty, u8 ch)
{
BT_DBG("tty %p ch %c", tty, ch);
}
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index 5918d1b32e19..8906f7bdf4a9 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -12,6 +12,11 @@ extern struct bpf_struct_ops bpf_bpf_dummy_ops;
/* A common type for test_N with return value in bpf_dummy_ops */
typedef int (*dummy_ops_test_ret_fn)(struct bpf_dummy_ops_state *state, ...);
+static int dummy_ops_test_ret_function(struct bpf_dummy_ops_state *state, ...)
+{
+ return 0;
+}
+
struct bpf_dummy_ops_test_args {
u64 args[MAX_BPF_FUNC_ARGS];
struct bpf_dummy_ops_state state;
@@ -62,7 +67,7 @@ static int dummy_ops_copy_args(struct bpf_dummy_ops_test_args *args)
static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args)
{
- dummy_ops_test_ret_fn test = (void *)image;
+ dummy_ops_test_ret_fn test = (void *)image + cfi_get_offset();
struct bpf_dummy_ops_state *state = NULL;
/* state needs to be NULL if args[0] is 0 */
@@ -101,12 +106,11 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
goto out;
}
- image = bpf_jit_alloc_exec(PAGE_SIZE);
+ image = arch_alloc_bpf_trampoline(PAGE_SIZE);
if (!image) {
err = -ENOMEM;
goto out;
}
- set_vm_flush_reset_perms(image);
link = kzalloc(sizeof(*link), GFP_USER);
if (!link) {
@@ -120,11 +124,12 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
op_idx = prog->expected_attach_type;
err = bpf_struct_ops_prepare_trampoline(tlinks, link,
&st_ops->func_models[op_idx],
+ &dummy_ops_test_ret_function,
image, image + PAGE_SIZE);
if (err < 0)
goto out;
- set_memory_rox((long)image, 1);
+ arch_protect_bpf_trampoline(image, PAGE_SIZE);
prog_ret = dummy_ops_call_op(image, args);
err = dummy_ops_copy_args(args);
@@ -134,7 +139,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
err = -EFAULT;
out:
kfree(args);
- bpf_jit_free_exec(image);
+ arch_free_bpf_trampoline(image, PAGE_SIZE);
if (link)
bpf_link_put(&link->link);
kfree(tlinks);
@@ -220,6 +225,28 @@ static void bpf_dummy_unreg(void *kdata)
{
}
+static int bpf_dummy_test_1(struct bpf_dummy_ops_state *cb)
+{
+ return 0;
+}
+
+static int bpf_dummy_test_2(struct bpf_dummy_ops_state *cb, int a1, unsigned short a2,
+ char a3, unsigned long a4)
+{
+ return 0;
+}
+
+static int bpf_dummy_test_sleepable(struct bpf_dummy_ops_state *cb)
+{
+ return 0;
+}
+
+static struct bpf_dummy_ops __bpf_bpf_dummy_ops = {
+ .test_1 = bpf_dummy_test_1,
+ .test_2 = bpf_dummy_test_2,
+ .test_sleepable = bpf_dummy_test_sleepable,
+};
+
struct bpf_struct_ops bpf_bpf_dummy_ops = {
.verifier_ops = &bpf_dummy_verifier_ops,
.init = bpf_dummy_init,
@@ -228,4 +255,5 @@ struct bpf_struct_ops bpf_bpf_dummy_ops = {
.reg = bpf_dummy_reg,
.unreg = bpf_dummy_unreg,
.name = "bpf_dummy_ops",
+ .cfi_stubs = &__bpf_bpf_dummy_ops,
};
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index c9fdcc5cdce1..dfd919374017 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -542,7 +542,7 @@ struct bpf_fentry_test_t {
int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
{
- asm volatile ("");
+ asm volatile ("": "+r"(arg));
return (long)arg;
}
@@ -600,10 +600,21 @@ __bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
refcount_dec(&p->cnt);
}
+__bpf_kfunc void bpf_kfunc_call_test_release_dtor(void *p)
+{
+ bpf_kfunc_call_test_release(p);
+}
+CFI_NOSEAL(bpf_kfunc_call_test_release_dtor);
+
__bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p)
{
}
+__bpf_kfunc void bpf_kfunc_call_memb_release_dtor(void *p)
+{
+}
+CFI_NOSEAL(bpf_kfunc_call_memb_release_dtor);
+
__bpf_kfunc_end_defs();
BTF_SET8_START(bpf_test_modify_return_ids)
@@ -1671,9 +1682,9 @@ static const struct btf_kfunc_id_set bpf_prog_test_kfunc_set = {
BTF_ID_LIST(bpf_prog_test_dtor_kfunc_ids)
BTF_ID(struct, prog_test_ref_kfunc)
-BTF_ID(func, bpf_kfunc_call_test_release)
+BTF_ID(func, bpf_kfunc_call_test_release_dtor)
BTF_ID(struct, prog_test_member)
-BTF_ID(func, bpf_kfunc_call_memb_release)
+BTF_ID(func, bpf_kfunc_call_memb_release_dtor)
static int __init bpf_prog_test_run_init(void)
{
diff --git a/net/bpfilter/.gitignore b/net/bpfilter/.gitignore
deleted file mode 100644
index f34e85ee8204..000000000000
--- a/net/bpfilter/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-bpfilter_umh
diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig
deleted file mode 100644
index 3d4a21462458..000000000000
--- a/net/bpfilter/Kconfig
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-menuconfig BPFILTER
- bool "BPF based packet filtering framework (BPFILTER)"
- depends on BPF && INET
- select USERMODE_DRIVER
- help
- This builds experimental bpfilter framework that is aiming to
- provide netfilter compatible functionality via BPF
-
-if BPFILTER
-config BPFILTER_UMH
- tristate "bpfilter kernel module with user mode helper"
- depends on CC_CAN_LINK
- depends on m || CC_CAN_LINK_STATIC
- default m
- help
- This builds bpfilter kernel module with embedded user mode helper
-
- Note: To compile this as built-in, your toolchain must support
- building static binaries, since rootfs isn't mounted at the time
- when __init functions are called and do_execv won't be able to find
- the elf interpreter.
-endif
diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
deleted file mode 100644
index cdac82b8c53a..000000000000
--- a/net/bpfilter/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Makefile for the Linux BPFILTER layer.
-#
-
-userprogs := bpfilter_umh
-bpfilter_umh-objs := main.o
-userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi
-
-ifeq ($(CONFIG_BPFILTER_UMH), y)
-# builtin bpfilter_umh should be linked with -static
-# since rootfs isn't mounted at the time of __init
-# function is called and do_execv won't find elf interpreter
-userldflags += -static
-endif
-
-$(obj)/bpfilter_umh_blob.o: $(obj)/bpfilter_umh
-
-obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o
-bpfilter-objs += bpfilter_kern.o bpfilter_umh_blob.o
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
deleted file mode 100644
index 97e129e3f31c..000000000000
--- a/net/bpfilter/bpfilter_kern.c
+++ /dev/null
@@ -1,136 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/umh.h>
-#include <linux/bpfilter.h>
-#include <linux/sched.h>
-#include <linux/sched/signal.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include "msgfmt.h"
-
-extern char bpfilter_umh_start;
-extern char bpfilter_umh_end;
-
-static void shutdown_umh(void)
-{
- struct umd_info *info = &bpfilter_ops.info;
- struct pid *tgid = info->tgid;
-
- if (tgid) {
- kill_pid(tgid, SIGKILL, 1);
- wait_event(tgid->wait_pidfd, thread_group_exited(tgid));
- umd_cleanup_helper(info);
- }
-}
-
-static void __stop_umh(void)
-{
- if (IS_ENABLED(CONFIG_INET))
- shutdown_umh();
-}
-
-static int bpfilter_send_req(struct mbox_request *req)
-{
- struct mbox_reply reply;
- loff_t pos = 0;
- ssize_t n;
-
- if (!bpfilter_ops.info.tgid)
- return -EFAULT;
- pos = 0;
- n = kernel_write(bpfilter_ops.info.pipe_to_umh, req, sizeof(*req),
- &pos);
- if (n != sizeof(*req)) {
- pr_err("write fail %zd\n", n);
- goto stop;
- }
- pos = 0;
- n = kernel_read(bpfilter_ops.info.pipe_from_umh, &reply, sizeof(reply),
- &pos);
- if (n != sizeof(reply)) {
- pr_err("read fail %zd\n", n);
- goto stop;
- }
- return reply.status;
-stop:
- __stop_umh();
- return -EFAULT;
-}
-
-static int bpfilter_process_sockopt(struct sock *sk, int optname,
- sockptr_t optval, unsigned int optlen,
- bool is_set)
-{
- struct mbox_request req = {
- .is_set = is_set,
- .pid = current->pid,
- .cmd = optname,
- .addr = (uintptr_t)optval.user,
- .len = optlen,
- };
- if (sockptr_is_kernel(optval)) {
- pr_err("kernel access not supported\n");
- return -EFAULT;
- }
- return bpfilter_send_req(&req);
-}
-
-static int start_umh(void)
-{
- struct mbox_request req = { .pid = current->pid };
- int err;
-
- /* fork usermode process */
- err = fork_usermode_driver(&bpfilter_ops.info);
- if (err)
- return err;
- pr_info("Loaded bpfilter_umh pid %d\n", pid_nr(bpfilter_ops.info.tgid));
-
- /* health check that usermode process started correctly */
- if (bpfilter_send_req(&req) != 0) {
- shutdown_umh();
- return -EFAULT;
- }
-
- return 0;
-}
-
-static int __init load_umh(void)
-{
- int err;
-
- err = umd_load_blob(&bpfilter_ops.info,
- &bpfilter_umh_start,
- &bpfilter_umh_end - &bpfilter_umh_start);
- if (err)
- return err;
-
- mutex_lock(&bpfilter_ops.lock);
- err = start_umh();
- if (!err && IS_ENABLED(CONFIG_INET)) {
- bpfilter_ops.sockopt = &bpfilter_process_sockopt;
- bpfilter_ops.start = &start_umh;
- }
- mutex_unlock(&bpfilter_ops.lock);
- if (err)
- umd_unload_blob(&bpfilter_ops.info);
- return err;
-}
-
-static void __exit fini_umh(void)
-{
- mutex_lock(&bpfilter_ops.lock);
- if (IS_ENABLED(CONFIG_INET)) {
- shutdown_umh();
- bpfilter_ops.start = NULL;
- bpfilter_ops.sockopt = NULL;
- }
- mutex_unlock(&bpfilter_ops.lock);
-
- umd_unload_blob(&bpfilter_ops.info);
-}
-module_init(load_umh);
-module_exit(fini_umh);
-MODULE_LICENSE("GPL");
diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S
deleted file mode 100644
index 40311d10d2f2..000000000000
--- a/net/bpfilter/bpfilter_umh_blob.S
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
- .section .init.rodata, "a"
- .global bpfilter_umh_start
-bpfilter_umh_start:
- .incbin "net/bpfilter/bpfilter_umh"
- .global bpfilter_umh_end
-bpfilter_umh_end:
diff --git a/net/bpfilter/main.c b/net/bpfilter/main.c
deleted file mode 100644
index 291a92546246..000000000000
--- a/net/bpfilter/main.c
+++ /dev/null
@@ -1,64 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE
-#include <sys/uio.h>
-#include <errno.h>
-#include <stdio.h>
-#include <sys/socket.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include "../../include/uapi/linux/bpf.h"
-#include <asm/unistd.h>
-#include "msgfmt.h"
-
-FILE *debug_f;
-
-static int handle_get_cmd(struct mbox_request *cmd)
-{
- switch (cmd->cmd) {
- case 0:
- return 0;
- default:
- break;
- }
- return -ENOPROTOOPT;
-}
-
-static int handle_set_cmd(struct mbox_request *cmd)
-{
- return -ENOPROTOOPT;
-}
-
-static void loop(void)
-{
- while (1) {
- struct mbox_request req;
- struct mbox_reply reply;
- int n;
-
- n = read(0, &req, sizeof(req));
- if (n != sizeof(req)) {
- fprintf(debug_f, "invalid request %d\n", n);
- return;
- }
-
- reply.status = req.is_set ?
- handle_set_cmd(&req) :
- handle_get_cmd(&req);
-
- n = write(1, &reply, sizeof(reply));
- if (n != sizeof(reply)) {
- fprintf(debug_f, "reply failed %d\n", n);
- return;
- }
- }
-}
-
-int main(void)
-{
- debug_f = fopen("/dev/kmsg", "w");
- setvbuf(debug_f, 0, _IOLBF, 0);
- fprintf(debug_f, "<5>Started bpfilter\n");
- loop();
- fclose(debug_f);
- return 0;
-}
diff --git a/net/bpfilter/msgfmt.h b/net/bpfilter/msgfmt.h
deleted file mode 100644
index 98d121c62945..000000000000
--- a/net/bpfilter/msgfmt.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _NET_BPFILTER_MSGFMT_H
-#define _NET_BPFILTER_MSGFMT_H
-
-struct mbox_request {
- __u64 addr;
- __u32 len;
- __u32 is_set;
- __u32 cmd;
- __u32 pid;
-};
-
-struct mbox_reply {
- __u32 status;
-};
-
-#endif
diff --git a/net/bridge/br_cfm_netlink.c b/net/bridge/br_cfm_netlink.c
index 5c4c369f8536..2faab44652e7 100644
--- a/net/bridge/br_cfm_netlink.c
+++ b/net/bridge/br_cfm_netlink.c
@@ -362,7 +362,7 @@ static int br_cc_ccm_tx_parse(struct net_bridge *br, struct nlattr *attr,
memset(&tx_info, 0, sizeof(tx_info));
- instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_RDI_INSTANCE]);
+ instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE]);
nla_memcpy(&tx_info.dmac.addr,
tb[IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC],
sizeof(tx_info.dmac.addr));
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 8f40de3af154..65cee0ad3c1b 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -471,6 +471,7 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_fdb_get = br_fdb_get,
.ndo_mdb_add = br_mdb_add,
.ndo_mdb_del = br_mdb_del,
+ .ndo_mdb_del_bulk = br_mdb_del_bulk,
.ndo_mdb_dump = br_mdb_dump,
.ndo_mdb_get = br_mdb_get,
.ndo_bridge_getlink = br_getlink,
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 8cc526067bc2..bc37e47ad829 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -1412,6 +1412,139 @@ int br_mdb_del(struct net_device *dev, struct nlattr *tb[],
return err;
}
+struct br_mdb_flush_desc {
+ u32 port_ifindex;
+ u16 vid;
+ u8 rt_protocol;
+ u8 state;
+ u8 state_mask;
+};
+
+static const struct nla_policy br_mdbe_attrs_del_bulk_pol[MDBE_ATTR_MAX + 1] = {
+ [MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC),
+ [MDBE_ATTR_STATE_MASK] = NLA_POLICY_MASK(NLA_U8, MDB_PERMANENT),
+};
+
+static int br_mdb_flush_desc_init(struct br_mdb_flush_desc *desc,
+ struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
+{
+ struct br_mdb_entry *entry = nla_data(tb[MDBA_SET_ENTRY]);
+ struct nlattr *mdbe_attrs[MDBE_ATTR_MAX + 1];
+ int err;
+
+ desc->port_ifindex = entry->ifindex;
+ desc->vid = entry->vid;
+ desc->state = entry->state;
+
+ if (!tb[MDBA_SET_ENTRY_ATTRS])
+ return 0;
+
+ err = nla_parse_nested(mdbe_attrs, MDBE_ATTR_MAX,
+ tb[MDBA_SET_ENTRY_ATTRS],
+ br_mdbe_attrs_del_bulk_pol, extack);
+ if (err)
+ return err;
+
+ if (mdbe_attrs[MDBE_ATTR_STATE_MASK])
+ desc->state_mask = nla_get_u8(mdbe_attrs[MDBE_ATTR_STATE_MASK]);
+
+ if (mdbe_attrs[MDBE_ATTR_RTPROT])
+ desc->rt_protocol = nla_get_u8(mdbe_attrs[MDBE_ATTR_RTPROT]);
+
+ return 0;
+}
+
+static void br_mdb_flush_host(struct net_bridge *br,
+ struct net_bridge_mdb_entry *mp,
+ const struct br_mdb_flush_desc *desc)
+{
+ u8 state;
+
+ if (desc->port_ifindex && desc->port_ifindex != br->dev->ifindex)
+ return;
+
+ if (desc->rt_protocol)
+ return;
+
+ state = br_group_is_l2(&mp->addr) ? MDB_PERMANENT : 0;
+ if (desc->state_mask && (state & desc->state_mask) != desc->state)
+ return;
+
+ br_multicast_host_leave(mp, true);
+ if (!mp->ports && netif_running(br->dev))
+ mod_timer(&mp->timer, jiffies);
+}
+
+static void br_mdb_flush_pgs(struct net_bridge *br,
+ struct net_bridge_mdb_entry *mp,
+ const struct br_mdb_flush_desc *desc)
+{
+ struct net_bridge_port_group __rcu **pp;
+ struct net_bridge_port_group *p;
+
+ for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;) {
+ u8 state;
+
+ if (desc->port_ifindex &&
+ desc->port_ifindex != p->key.port->dev->ifindex) {
+ pp = &p->next;
+ continue;
+ }
+
+ if (desc->rt_protocol && desc->rt_protocol != p->rt_protocol) {
+ pp = &p->next;
+ continue;
+ }
+
+ state = p->flags & MDB_PG_FLAGS_PERMANENT ? MDB_PERMANENT : 0;
+ if (desc->state_mask &&
+ (state & desc->state_mask) != desc->state) {
+ pp = &p->next;
+ continue;
+ }
+
+ br_multicast_del_pg(mp, p, pp);
+ }
+}
+
+static void br_mdb_flush(struct net_bridge *br,
+ const struct br_mdb_flush_desc *desc)
+{
+ struct net_bridge_mdb_entry *mp;
+
+ spin_lock_bh(&br->multicast_lock);
+
+ /* Safe variant is not needed because entries are removed from the list
+ * upon group timer expiration or bridge deletion.
+ */
+ hlist_for_each_entry(mp, &br->mdb_list, mdb_node) {
+ if (desc->vid && desc->vid != mp->addr.vid)
+ continue;
+
+ br_mdb_flush_host(br, mp, desc);
+ br_mdb_flush_pgs(br, mp, desc);
+ }
+
+ spin_unlock_bh(&br->multicast_lock);
+}
+
+int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge *br = netdev_priv(dev);
+ struct br_mdb_flush_desc desc = {};
+ int err;
+
+ err = br_mdb_flush_desc_init(&desc, tb, extack);
+ if (err)
+ return err;
+
+ br_mdb_flush(br, &desc);
+
+ return 0;
+}
+
static const struct nla_policy br_mdbe_attrs_get_pol[MDBE_ATTR_MAX + 1] = {
[MDBE_ATTR_SOURCE] = NLA_POLICY_RANGE(NLA_BINARY,
sizeof(struct in_addr),
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index d7d021af1029..2d7b73242958 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1762,6 +1762,10 @@ static void br_ip6_multicast_querier_expired(struct timer_list *t)
}
#endif
+static void br_multicast_query_delay_expired(struct timer_list *t)
+{
+}
+
static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx,
struct br_ip *ip,
struct sk_buff *skb)
@@ -3198,7 +3202,7 @@ br_multicast_update_query_timer(struct net_bridge_mcast *brmctx,
unsigned long max_delay)
{
if (!timer_pending(&query->timer))
- query->delay_time = jiffies + max_delay;
+ mod_timer(&query->delay_timer, jiffies + max_delay);
mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval);
}
@@ -4041,13 +4045,11 @@ void br_multicast_ctx_init(struct net_bridge *br,
brmctx->multicast_querier_interval = 255 * HZ;
brmctx->multicast_membership_interval = 260 * HZ;
- brmctx->ip4_other_query.delay_time = 0;
brmctx->ip4_querier.port_ifidx = 0;
seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock);
brmctx->multicast_igmp_version = 2;
#if IS_ENABLED(CONFIG_IPV6)
brmctx->multicast_mld_version = 1;
- brmctx->ip6_other_query.delay_time = 0;
brmctx->ip6_querier.port_ifidx = 0;
seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock);
#endif
@@ -4056,6 +4058,8 @@ void br_multicast_ctx_init(struct net_bridge *br,
br_ip4_multicast_local_router_expired, 0);
timer_setup(&brmctx->ip4_other_query.timer,
br_ip4_multicast_querier_expired, 0);
+ timer_setup(&brmctx->ip4_other_query.delay_timer,
+ br_multicast_query_delay_expired, 0);
timer_setup(&brmctx->ip4_own_query.timer,
br_ip4_multicast_query_expired, 0);
#if IS_ENABLED(CONFIG_IPV6)
@@ -4063,6 +4067,8 @@ void br_multicast_ctx_init(struct net_bridge *br,
br_ip6_multicast_local_router_expired, 0);
timer_setup(&brmctx->ip6_other_query.timer,
br_ip6_multicast_querier_expired, 0);
+ timer_setup(&brmctx->ip6_other_query.delay_timer,
+ br_multicast_query_delay_expired, 0);
timer_setup(&brmctx->ip6_own_query.timer,
br_ip6_multicast_query_expired, 0);
#endif
@@ -4197,10 +4203,12 @@ static void __br_multicast_stop(struct net_bridge_mcast *brmctx)
{
del_timer_sync(&brmctx->ip4_mc_router_timer);
del_timer_sync(&brmctx->ip4_other_query.timer);
+ del_timer_sync(&brmctx->ip4_other_query.delay_timer);
del_timer_sync(&brmctx->ip4_own_query.timer);
#if IS_ENABLED(CONFIG_IPV6)
del_timer_sync(&brmctx->ip6_mc_router_timer);
del_timer_sync(&brmctx->ip6_other_query.timer);
+ del_timer_sync(&brmctx->ip6_other_query.delay_timer);
del_timer_sync(&brmctx->ip6_own_query.timer);
#endif
}
@@ -4643,13 +4651,15 @@ int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val)
max_delay = brmctx->multicast_query_response_interval;
if (!timer_pending(&brmctx->ip4_other_query.timer))
- brmctx->ip4_other_query.delay_time = jiffies + max_delay;
+ mod_timer(&brmctx->ip4_other_query.delay_timer,
+ jiffies + max_delay);
br_multicast_start_querier(brmctx, &brmctx->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
if (!timer_pending(&brmctx->ip6_other_query.timer))
- brmctx->ip6_other_query.delay_time = jiffies + max_delay;
+ mod_timer(&brmctx->ip6_other_query.delay_timer,
+ jiffies + max_delay);
br_multicast_start_querier(brmctx, &brmctx->ip6_own_query);
#endif
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 6adcb45bca75..35e10c5a766d 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -43,6 +43,10 @@
#include <linux/sysctl.h>
#endif
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack_core.h>
+#endif
+
static unsigned int brnf_net_id __read_mostly;
struct brnf_net {
@@ -279,8 +283,17 @@ int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_
if ((READ_ONCE(neigh->nud_state) & NUD_CONNECTED) &&
READ_ONCE(neigh->hh.hh_len)) {
+ struct net_device *br_indev;
+
+ br_indev = nf_bridge_get_physindev(skb, net);
+ if (!br_indev) {
+ neigh_release(neigh);
+ goto free_skb;
+ }
+
neigh_hh_bridge(&neigh->hh, skb);
- skb->dev = nf_bridge->physindev;
+ skb->dev = br_indev;
+
ret = br_handle_frame_finish(net, sk, skb);
} else {
/* the neighbour function below overwrites the complete
@@ -352,12 +365,18 @@ br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb,
*/
static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net_device *dev = skb->dev;
+ struct net_device *dev = skb->dev, *br_indev;
struct iphdr *iph = ip_hdr(skb);
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct rtable *rt;
int err;
+ br_indev = nf_bridge_get_physindev(skb, net);
+ if (!br_indev) {
+ kfree_skb(skb);
+ return 0;
+ }
+
nf_bridge->frag_max_size = IPCB(skb)->frag_max_size;
if (nf_bridge->pkt_otherhost) {
@@ -397,7 +416,7 @@ free_skb:
} else {
if (skb_dst(skb)->dev == dev) {
bridged_dnat:
- skb->dev = nf_bridge->physindev;
+ skb->dev = br_indev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
br_nf_hook_thresh(NF_BR_PRE_ROUTING,
@@ -410,7 +429,7 @@ bridged_dnat:
skb->pkt_type = PACKET_HOST;
}
} else {
- rt = bridge_parent_rtable(nf_bridge->physindev);
+ rt = bridge_parent_rtable(br_indev);
if (!rt) {
kfree_skb(skb);
return 0;
@@ -419,7 +438,7 @@ bridged_dnat:
skb_dst_set_noref(skb, &rt->dst);
}
- skb->dev = nf_bridge->physindev;
+ skb->dev = br_indev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL,
@@ -456,7 +475,7 @@ struct net_device *setup_pre_routing(struct sk_buff *skb, const struct net *net)
}
nf_bridge->in_prerouting = 1;
- nf_bridge->physindev = skb->dev;
+ nf_bridge->physinif = skb->dev->ifindex;
skb->dev = brnf_get_logical_dev(skb, skb->dev, net);
if (skb->protocol == htons(ETH_P_8021Q))
@@ -538,6 +557,90 @@ static unsigned int br_nf_pre_routing(void *priv,
return NF_STOLEN;
}
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+/* conntracks' nf_confirm logic cannot handle cloned skbs referencing
+ * the same nf_conn entry, which will happen for multicast (broadcast)
+ * Frames on bridges.
+ *
+ * Example:
+ * macvlan0
+ * br0
+ * ethX ethY
+ *
+ * ethX (or Y) receives multicast or broadcast packet containing
+ * an IP packet, not yet in conntrack table.
+ *
+ * 1. skb passes through bridge and fake-ip (br_netfilter)Prerouting.
+ * -> skb->_nfct now references a unconfirmed entry
+ * 2. skb is broad/mcast packet. bridge now passes clones out on each bridge
+ * interface.
+ * 3. skb gets passed up the stack.
+ * 4. In macvlan case, macvlan driver retains clone(s) of the mcast skb
+ * and schedules a work queue to send them out on the lower devices.
+ *
+ * The clone skb->_nfct is not a copy, it is the same entry as the
+ * original skb. The macvlan rx handler then returns RX_HANDLER_PASS.
+ * 5. Normal conntrack hooks (in NF_INET_LOCAL_IN) confirm the orig skb.
+ *
+ * The Macvlan broadcast worker and normal confirm path will race.
+ *
+ * This race will not happen if step 2 already confirmed a clone. In that
+ * case later steps perform skb_clone() with skb->_nfct already confirmed (in
+ * hash table). This works fine.
+ *
+ * But such confirmation won't happen when eb/ip/nftables rules dropped the
+ * packets before they reached the nf_confirm step in postrouting.
+ *
+ * Work around this problem by explicit confirmation of the entry at
+ * LOCAL_IN time, before upper layer has a chance to clone the unconfirmed
+ * entry.
+ *
+ */
+static unsigned int br_nf_local_in(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nf_conntrack *nfct = skb_nfct(skb);
+ const struct nf_ct_hook *ct_hook;
+ struct nf_conn *ct;
+ int ret;
+
+ if (!nfct || skb->pkt_type == PACKET_HOST)
+ return NF_ACCEPT;
+
+ ct = container_of(nfct, struct nf_conn, ct_general);
+ if (likely(nf_ct_is_confirmed(ct)))
+ return NF_ACCEPT;
+
+ WARN_ON_ONCE(skb_shared(skb));
+ WARN_ON_ONCE(refcount_read(&nfct->use) != 1);
+
+ /* We can't call nf_confirm here, it would create a dependency
+ * on nf_conntrack module.
+ */
+ ct_hook = rcu_dereference(nf_ct_hook);
+ if (!ct_hook) {
+ skb->_nfct = 0ul;
+ nf_conntrack_put(nfct);
+ return NF_ACCEPT;
+ }
+
+ nf_bridge_pull_encap_header(skb);
+ ret = ct_hook->confirm(skb);
+ switch (ret & NF_VERDICT_MASK) {
+ case NF_STOLEN:
+ return NF_STOLEN;
+ default:
+ nf_bridge_push_encap_header(skb);
+ break;
+ }
+
+ ct = container_of(nfct, struct nf_conn, ct_general);
+ WARN_ON_ONCE(!nf_ct_is_confirmed(ct));
+
+ return ret;
+}
+#endif
/* PF_BRIDGE/FORWARD *************************************************/
static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -553,7 +656,11 @@ static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff
if (skb->protocol == htons(ETH_P_IPV6))
nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
- in = nf_bridge->physindev;
+ in = nf_bridge_get_physindev(skb, net);
+ if (!in) {
+ kfree_skb(skb);
+ return 0;
+ }
if (nf_bridge->pkt_otherhost) {
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->pkt_otherhost = false;
@@ -899,6 +1006,13 @@ static unsigned int ip_sabotage_in(void *priv,
static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+ struct net_device *br_indev;
+
+ br_indev = nf_bridge_get_physindev(skb, dev_net(skb->dev));
+ if (!br_indev) {
+ kfree_skb(skb);
+ return;
+ }
skb_pull(skb, ETH_HLEN);
nf_bridge->bridged_dnat = 0;
@@ -908,7 +1022,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN),
nf_bridge->neigh_header,
ETH_HLEN - ETH_ALEN);
- skb->dev = nf_bridge->physindev;
+ skb->dev = br_indev;
nf_bridge->physoutdev = NULL;
br_handle_frame_finish(dev_net(skb->dev), NULL, skb);
@@ -938,6 +1052,14 @@ static const struct nf_hook_ops br_nf_ops[] = {
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_BR_PRI_BRNF,
},
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ {
+ .hook = br_nf_local_in,
+ .pf = NFPROTO_BRIDGE,
+ .hooknum = NF_BR_LOCAL_IN,
+ .priority = NF_BR_PRI_LAST,
+ },
+#endif
{
.hook = br_nf_forward,
.pf = NFPROTO_BRIDGE,
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 2e24a743f917..e0421eaa3abc 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -102,9 +102,15 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct rtable *rt;
- struct net_device *dev = skb->dev;
+ struct net_device *dev = skb->dev, *br_indev;
const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
+ br_indev = nf_bridge_get_physindev(skb, net);
+ if (!br_indev) {
+ kfree_skb(skb);
+ return 0;
+ }
+
nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
if (nf_bridge->pkt_otherhost) {
@@ -122,7 +128,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
}
if (skb_dst(skb)->dev == dev) {
- skb->dev = nf_bridge->physindev;
+ skb->dev = br_indev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
br_nf_hook_thresh(NF_BR_PRE_ROUTING,
@@ -133,7 +139,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
skb->pkt_type = PACKET_HOST;
} else {
- rt = bridge_parent_rtable(nf_bridge->physindev);
+ rt = bridge_parent_rtable(br_indev);
if (!rt) {
kfree_skb(skb);
return 0;
@@ -142,7 +148,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc
skb_dst_set_noref(skb, &rt->dst);
}
- skb->dev = nf_bridge->physindev;
+ skb->dev = br_indev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 6b7f36769d03..86ea5e6689b5 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -78,7 +78,7 @@ struct bridge_mcast_own_query {
/* other querier */
struct bridge_mcast_other_query {
struct timer_list timer;
- unsigned long delay_time;
+ struct timer_list delay_timer;
};
/* selected querier */
@@ -186,6 +186,7 @@ enum {
* struct net_bridge_vlan - per-vlan entry
*
* @vnode: rhashtable member
+ * @tnode: rhashtable member
* @vid: VLAN id
* @flags: bridge vlan flags
* @priv_flags: private (in-kernel) bridge vlan flags
@@ -196,6 +197,7 @@ enum {
* @refcnt: if MASTER flag set, this is bumped for each port referencing it
* @brvlan: if MASTER flag unset, this points to the global per-VLAN context
* for this VLAN entry
+ * @tinfo: bridge tunnel info
* @br_mcast_ctx: if MASTER flag set, this is the global vlan multicast context
* @port_mcast_ctx: if MASTER flag unset, this is the per-port/vlan multicast
* context
@@ -1020,6 +1022,8 @@ int br_mdb_add(struct net_device *dev, struct nlattr *tb[], u16 nlmsg_flags,
struct netlink_ext_ack *extack);
int br_mdb_del(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack);
+int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[],
+ struct netlink_ext_ack *extack);
int br_mdb_dump(struct net_device *dev, struct sk_buff *skb,
struct netlink_callback *cb);
int br_mdb_get(struct net_device *dev, struct nlattr *tb[], u32 portid, u32 seq,
@@ -1155,7 +1159,7 @@ __br_multicast_querier_exists(struct net_bridge_mcast *brmctx,
own_querier_enabled = false;
}
- return time_is_before_jiffies(querier->delay_time) &&
+ return !timer_pending(&querier->delay_timer) &&
(own_querier_enabled || timer_pending(&querier->timer));
}
@@ -1428,6 +1432,12 @@ static inline int br_mdb_del(struct net_device *dev, struct nlattr *tb[],
return -EOPNOTSUPP;
}
+static inline int br_mdb_del_bulk(struct net_device *dev, struct nlattr *tb[],
+ struct netlink_ext_ack *extack)
+{
+ return -EOPNOTSUPP;
+}
+
static inline int br_mdb_dump(struct net_device *dev, struct sk_buff *skb,
struct netlink_callback *cb)
{
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index ee84e783e1df..7b41ee8740cb 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -595,21 +595,40 @@ br_switchdev_mdb_replay_one(struct notifier_block *nb, struct net_device *dev,
}
static int br_switchdev_mdb_queue_one(struct list_head *mdb_list,
+ struct net_device *dev,
+ unsigned long action,
enum switchdev_obj_id id,
const struct net_bridge_mdb_entry *mp,
struct net_device *orig_dev)
{
- struct switchdev_obj_port_mdb *mdb;
+ struct switchdev_obj_port_mdb mdb = {
+ .obj = {
+ .id = id,
+ .orig_dev = orig_dev,
+ },
+ };
+ struct switchdev_obj_port_mdb *pmdb;
- mdb = kzalloc(sizeof(*mdb), GFP_ATOMIC);
- if (!mdb)
- return -ENOMEM;
+ br_switchdev_mdb_populate(&mdb, mp);
+
+ if (action == SWITCHDEV_PORT_OBJ_ADD &&
+ switchdev_port_obj_act_is_deferred(dev, action, &mdb.obj)) {
+ /* This event is already in the deferred queue of
+ * events, so this replay must be elided, lest the
+ * driver receives duplicate events for it. This can
+ * only happen when replaying additions, since
+ * modifications are always immediately visible in
+ * br->mdb_list, whereas actual event delivery may be
+ * delayed.
+ */
+ return 0;
+ }
- mdb->obj.id = id;
- mdb->obj.orig_dev = orig_dev;
- br_switchdev_mdb_populate(mdb, mp);
- list_add_tail(&mdb->obj.list, mdb_list);
+ pmdb = kmemdup(&mdb, sizeof(mdb), GFP_ATOMIC);
+ if (!pmdb)
+ return -ENOMEM;
+ list_add_tail(&pmdb->obj.list, mdb_list);
return 0;
}
@@ -677,51 +696,50 @@ br_switchdev_mdb_replay(struct net_device *br_dev, struct net_device *dev,
if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
return 0;
- /* We cannot walk over br->mdb_list protected just by the rtnl_mutex,
- * because the write-side protection is br->multicast_lock. But we
- * need to emulate the [ blocking ] calling context of a regular
- * switchdev event, so since both br->multicast_lock and RCU read side
- * critical sections are atomic, we have no choice but to pick the RCU
- * read side lock, queue up all our events, leave the critical section
- * and notify switchdev from blocking context.
+ if (adding)
+ action = SWITCHDEV_PORT_OBJ_ADD;
+ else
+ action = SWITCHDEV_PORT_OBJ_DEL;
+
+ /* br_switchdev_mdb_queue_one() will take care to not queue a
+ * replay of an event that is already pending in the switchdev
+ * deferred queue. In order to safely determine that, there
+ * must be no new deferred MDB notifications enqueued for the
+ * duration of the MDB scan. Therefore, grab the write-side
+ * lock to avoid racing with any concurrent IGMP/MLD snooping.
*/
- rcu_read_lock();
+ spin_lock_bh(&br->multicast_lock);
- hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
+ hlist_for_each_entry(mp, &br->mdb_list, mdb_node) {
struct net_bridge_port_group __rcu * const *pp;
const struct net_bridge_port_group *p;
if (mp->host_joined) {
- err = br_switchdev_mdb_queue_one(&mdb_list,
+ err = br_switchdev_mdb_queue_one(&mdb_list, dev, action,
SWITCHDEV_OBJ_ID_HOST_MDB,
mp, br_dev);
if (err) {
- rcu_read_unlock();
+ spin_unlock_bh(&br->multicast_lock);
goto out_free_mdb;
}
}
- for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
+ for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
pp = &p->next) {
if (p->key.port->dev != dev)
continue;
- err = br_switchdev_mdb_queue_one(&mdb_list,
+ err = br_switchdev_mdb_queue_one(&mdb_list, dev, action,
SWITCHDEV_OBJ_ID_PORT_MDB,
mp, dev);
if (err) {
- rcu_read_unlock();
+ spin_unlock_bh(&br->multicast_lock);
goto out_free_mdb;
}
}
}
- rcu_read_unlock();
-
- if (adding)
- action = SWITCHDEV_PORT_OBJ_ADD;
- else
- action = SWITCHDEV_PORT_OBJ_DEL;
+ spin_unlock_bh(&br->multicast_lock);
list_for_each_entry(obj, &mdb_list, list) {
err = br_switchdev_mdb_replay_one(nb, dev,
@@ -786,6 +804,16 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
br_switchdev_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
br_switchdev_vlan_replay(br_dev, ctx, false, blocking_nb, NULL);
+
+ /* Make sure that the device leaving this bridge has seen all
+ * relevant events before it is disassociated. In the normal
+ * case, when the device is directly attached to the bridge,
+ * this is covered by del_nbp(). If the association was indirect
+ * however, e.g. via a team or bond, and the device is leaving
+ * that intermediate device, then the bridge port remains in
+ * place.
+ */
+ switchdev_deferred_process();
}
/* Let the bridge know that this port is offloaded, so that it can assign a
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index abb090f94ed2..6f877e31709b 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -291,6 +291,30 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
return nf_conntrack_in(skb, &bridge_state);
}
+static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+
+ if (skb->pkt_type == PACKET_HOST)
+ return NF_ACCEPT;
+
+ /* nf_conntrack_confirm() cannot handle concurrent clones,
+ * this happens for broad/multicast frames with e.g. macvlan on top
+ * of the bridge device.
+ */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
+ return NF_ACCEPT;
+
+ /* let inet prerouting call conntrack again */
+ skb->_nfct = 0;
+ nf_ct_put(ct);
+
+ return NF_ACCEPT;
+}
+
static void nf_ct_bridge_frag_save(struct sk_buff *skb,
struct nf_bridge_frag_data *data)
{
@@ -386,6 +410,12 @@ static struct nf_hook_ops nf_ct_bridge_hook_ops[] __read_mostly = {
.priority = NF_IP_PRI_CONNTRACK,
},
{
+ .hook = nf_ct_bridge_in,
+ .pf = NFPROTO_BRIDGE,
+ .hooknum = NF_BR_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
+ },
+ {
.hook = nf_ct_bridge_post,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_POST_ROUTING,
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 6a0cba4fc29f..24e85c5487ef 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -27,6 +27,7 @@
#include <net/caif/cfcnfg.h>
#include <net/caif/cfserl.h>
+MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol support");
MODULE_LICENSE("GPL");
/* Used for local tracking of the CAIF net devices */
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 9c82698da4f5..039dfbd367c9 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -27,6 +27,7 @@
#include <net/caif/caif_dev.h>
#include <net/caif/cfpkt.h>
+MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol socket support (AF_CAIF)");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(AF_CAIF);
diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
index bf61ea4b8132..5dc05a1e3178 100644
--- a/net/caif/caif_usb.c
+++ b/net/caif/caif_usb.c
@@ -20,6 +20,7 @@
#include <net/caif/cfpkt.h>
#include <net/caif/cfcnfg.h>
+MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol USB support");
MODULE_LICENSE("GPL");
#define CFUSB_PAD_DESCR_SZ 1 /* Alignment descriptor length */
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index f35fc87c453a..47901bd4def1 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -31,6 +31,7 @@
/*This list is protected by the rtnl lock. */
static LIST_HEAD(chnl_net_list);
+MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol GPRS network device");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("caif");
diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h
index 16af1a7f80f6..31a93cae5111 100644
--- a/net/can/j1939/j1939-priv.h
+++ b/net/can/j1939/j1939-priv.h
@@ -86,7 +86,7 @@ struct j1939_priv {
unsigned int tp_max_packet_size;
/* lock for j1939_socks list */
- spinlock_t j1939_socks_lock;
+ rwlock_t j1939_socks_lock;
struct list_head j1939_socks;
struct kref rx_kref;
@@ -301,6 +301,7 @@ struct j1939_sock {
int ifindex;
struct j1939_addr addr;
+ spinlock_t filters_lock;
struct j1939_filter *filters;
int nfilters;
pgn_t pgn_rx_filter;
diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c
index ecff1c947d68..a6fb89fa6278 100644
--- a/net/can/j1939/main.c
+++ b/net/can/j1939/main.c
@@ -274,7 +274,7 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev)
return ERR_PTR(-ENOMEM);
j1939_tp_init(priv);
- spin_lock_init(&priv->j1939_socks_lock);
+ rwlock_init(&priv->j1939_socks_lock);
INIT_LIST_HEAD(&priv->j1939_socks);
mutex_lock(&j1939_netdev_lock);
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index 14c431663233..305dd72c844c 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -80,16 +80,16 @@ static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk)
jsk->state |= J1939_SOCK_BOUND;
j1939_priv_get(priv);
- spin_lock_bh(&priv->j1939_socks_lock);
+ write_lock_bh(&priv->j1939_socks_lock);
list_add_tail(&jsk->list, &priv->j1939_socks);
- spin_unlock_bh(&priv->j1939_socks_lock);
+ write_unlock_bh(&priv->j1939_socks_lock);
}
static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk)
{
- spin_lock_bh(&priv->j1939_socks_lock);
+ write_lock_bh(&priv->j1939_socks_lock);
list_del_init(&jsk->list);
- spin_unlock_bh(&priv->j1939_socks_lock);
+ write_unlock_bh(&priv->j1939_socks_lock);
j1939_priv_put(priv);
jsk->state &= ~J1939_SOCK_BOUND;
@@ -262,12 +262,17 @@ static bool j1939_sk_match_dst(struct j1939_sock *jsk,
static bool j1939_sk_match_filter(struct j1939_sock *jsk,
const struct j1939_sk_buff_cb *skcb)
{
- const struct j1939_filter *f = jsk->filters;
- int nfilter = jsk->nfilters;
+ const struct j1939_filter *f;
+ int nfilter;
+
+ spin_lock_bh(&jsk->filters_lock);
+
+ f = jsk->filters;
+ nfilter = jsk->nfilters;
if (!nfilter)
/* receive all when no filters are assigned */
- return true;
+ goto filter_match_found;
for (; nfilter; ++f, --nfilter) {
if ((skcb->addr.pgn & f->pgn_mask) != f->pgn)
@@ -276,9 +281,15 @@ static bool j1939_sk_match_filter(struct j1939_sock *jsk,
continue;
if ((skcb->addr.src_name & f->name_mask) != f->name)
continue;
- return true;
+ goto filter_match_found;
}
+
+ spin_unlock_bh(&jsk->filters_lock);
return false;
+
+filter_match_found:
+ spin_unlock_bh(&jsk->filters_lock);
+ return true;
}
static bool j1939_sk_recv_match_one(struct j1939_sock *jsk,
@@ -329,13 +340,13 @@ bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb)
struct j1939_sock *jsk;
bool match = false;
- spin_lock_bh(&priv->j1939_socks_lock);
+ read_lock_bh(&priv->j1939_socks_lock);
list_for_each_entry(jsk, &priv->j1939_socks, list) {
match = j1939_sk_recv_match_one(jsk, skcb);
if (match)
break;
}
- spin_unlock_bh(&priv->j1939_socks_lock);
+ read_unlock_bh(&priv->j1939_socks_lock);
return match;
}
@@ -344,11 +355,11 @@ void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb)
{
struct j1939_sock *jsk;
- spin_lock_bh(&priv->j1939_socks_lock);
+ read_lock_bh(&priv->j1939_socks_lock);
list_for_each_entry(jsk, &priv->j1939_socks, list) {
j1939_sk_recv_one(jsk, skb);
}
- spin_unlock_bh(&priv->j1939_socks_lock);
+ read_unlock_bh(&priv->j1939_socks_lock);
}
static void j1939_sk_sock_destruct(struct sock *sk)
@@ -401,6 +412,7 @@ static int j1939_sk_init(struct sock *sk)
atomic_set(&jsk->skb_pending, 0);
spin_lock_init(&jsk->sk_session_queue_lock);
INIT_LIST_HEAD(&jsk->sk_session_queue);
+ spin_lock_init(&jsk->filters_lock);
/* j1939_sk_sock_destruct() depends on SOCK_RCU_FREE flag */
sock_set_flag(sk, SOCK_RCU_FREE);
@@ -703,9 +715,11 @@ static int j1939_sk_setsockopt(struct socket *sock, int level, int optname,
}
lock_sock(&jsk->sk);
+ spin_lock_bh(&jsk->filters_lock);
ofilters = jsk->filters;
jsk->filters = filters;
jsk->nfilters = count;
+ spin_unlock_bh(&jsk->filters_lock);
release_sock(&jsk->sk);
kfree(ofilters);
return 0;
@@ -1080,12 +1094,12 @@ void j1939_sk_errqueue(struct j1939_session *session,
}
/* spread RX notifications to all sockets subscribed to this session */
- spin_lock_bh(&priv->j1939_socks_lock);
+ read_lock_bh(&priv->j1939_socks_lock);
list_for_each_entry(jsk, &priv->j1939_socks, list) {
if (j1939_sk_recv_match_one(jsk, &session->skcb))
__j1939_sk_errqueue(session, &jsk->sk, type);
}
- spin_unlock_bh(&priv->j1939_socks_lock);
+ read_unlock_bh(&priv->j1939_socks_lock);
};
void j1939_sk_send_loop_abort(struct sock *sk, int err)
@@ -1273,7 +1287,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
struct j1939_sock *jsk;
int error_code = ENETDOWN;
- spin_lock_bh(&priv->j1939_socks_lock);
+ read_lock_bh(&priv->j1939_socks_lock);
list_for_each_entry(jsk, &priv->j1939_socks, list) {
jsk->sk.sk_err = error_code;
if (!sock_flag(&jsk->sk, SOCK_DEAD))
@@ -1281,7 +1295,7 @@ void j1939_sk_netdev_event_netdown(struct j1939_priv *priv)
j1939_sk_queue_drop_all(priv, jsk, error_code);
}
- spin_unlock_bh(&priv->j1939_socks_lock);
+ read_unlock_bh(&priv->j1939_socks_lock);
}
static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd,
diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c
index f9a50d7f0d20..0cb61c76b9b8 100644
--- a/net/ceph/messenger_v1.c
+++ b/net/ceph/messenger_v1.c
@@ -160,8 +160,9 @@ static size_t sizeof_footer(struct ceph_connection *con)
static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
{
/* Initialize data cursor if it's not a sparse read */
- if (!msg->sparse_read)
- ceph_msg_data_cursor_init(&msg->cursor, msg, data_len);
+ u64 len = msg->sparse_read_total ? : data_len;
+
+ ceph_msg_data_cursor_init(&msg->cursor, msg, len);
}
/*
@@ -991,7 +992,7 @@ static inline int read_partial_message_section(struct ceph_connection *con,
return read_partial_message_chunk(con, section, sec_len, crc);
}
-static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
+static int read_partial_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
{
struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE);
@@ -1026,7 +1027,7 @@ static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
return 1;
}
-static int read_sparse_msg_data(struct ceph_connection *con)
+static int read_partial_sparse_msg_data(struct ceph_connection *con)
{
struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
@@ -1036,31 +1037,31 @@ static int read_sparse_msg_data(struct ceph_connection *con)
if (do_datacrc)
crc = con->in_data_crc;
- do {
+ while (cursor->total_resid) {
if (con->v1.in_sr_kvec.iov_base)
ret = read_partial_message_chunk(con,
&con->v1.in_sr_kvec,
con->v1.in_sr_len,
&crc);
else if (cursor->sr_resid > 0)
- ret = read_sparse_msg_extent(con, &crc);
-
- if (ret <= 0) {
- if (do_datacrc)
- con->in_data_crc = crc;
- return ret;
- }
+ ret = read_partial_sparse_msg_extent(con, &crc);
+ if (ret <= 0)
+ break;
memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec));
ret = con->ops->sparse_read(con, cursor,
(char **)&con->v1.in_sr_kvec.iov_base);
+ if (ret <= 0) {
+ ret = ret ? ret : 1; /* must return > 0 to indicate success */
+ break;
+ }
con->v1.in_sr_len = ret;
- } while (ret > 0);
+ }
if (do_datacrc)
con->in_data_crc = crc;
- return ret < 0 ? ret : 1; /* must return > 0 to indicate success */
+ return ret;
}
static int read_partial_msg_data(struct ceph_connection *con)
@@ -1253,8 +1254,8 @@ static int read_partial_message(struct ceph_connection *con)
if (!m->num_data_items)
return -EIO;
- if (m->sparse_read)
- ret = read_sparse_msg_data(con);
+ if (m->sparse_read_total)
+ ret = read_partial_sparse_msg_data(con);
else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
ret = read_partial_msg_data_bounce(con);
else
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index f8ec60e1aba3..bd608ffa0627 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -1128,7 +1128,7 @@ static int decrypt_tail(struct ceph_connection *con)
struct sg_table enc_sgt = {};
struct sg_table sgt = {};
struct page **pages = NULL;
- bool sparse = con->in_msg->sparse_read;
+ bool sparse = !!con->in_msg->sparse_read_total;
int dpos = 0;
int tail_len;
int ret;
@@ -2034,6 +2034,9 @@ static int prepare_sparse_read_data(struct ceph_connection *con)
if (!con_secure(con))
con->in_data_crc = -1;
+ ceph_msg_data_cursor_init(&con->v2.in_cursor, msg,
+ msg->sparse_read_total);
+
reset_in_kvecs(con);
con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT;
con->v2.data_len_remain = data_len(msg);
@@ -2060,7 +2063,7 @@ static int prepare_read_tail_plain(struct ceph_connection *con)
}
if (data_len(msg)) {
- if (msg->sparse_read)
+ if (msg->sparse_read_total)
con->v2.in_state = IN_S_PREPARE_SPARSE_DATA;
else
con->v2.in_state = IN_S_PREPARE_READ_DATA;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index d3a759e052c8..9d078b37fe0b 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -5510,7 +5510,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
}
m = ceph_msg_get(req->r_reply);
- m->sparse_read = (bool)srlen;
+ m->sparse_read_total = srlen;
dout("get_reply tid %lld %p\n", tid, m);
@@ -5777,11 +5777,8 @@ static int prep_next_sparse_read(struct ceph_connection *con,
}
if (o->o_sparse_op_idx < 0) {
- u64 srlen = sparse_data_requested(req);
-
- dout("%s: [%d] starting new sparse read req. srlen=0x%llx\n",
- __func__, o->o_osd, srlen);
- ceph_msg_data_cursor_init(cursor, con->in_msg, srlen);
+ dout("%s: [%d] starting new sparse read req\n",
+ __func__, o->o_osd);
} else {
u64 end;
@@ -5850,8 +5847,6 @@ static inline void convert_extent_map(struct ceph_sparse_read *sr)
}
#endif
-#define MAX_EXTENTS 4096
-
static int osd_sparse_read(struct ceph_connection *con,
struct ceph_msg_data_cursor *cursor,
char **pbuf)
@@ -5859,8 +5854,8 @@ static int osd_sparse_read(struct ceph_connection *con,
struct ceph_osd *o = con->private;
struct ceph_sparse_read *sr = &o->o_sparse_read;
u32 count = sr->sr_count;
- u64 eoff, elen;
- int ret;
+ u64 eoff, elen, len = 0;
+ int i, ret;
switch (sr->sr_state) {
case CEPH_SPARSE_READ_HDR:
@@ -5882,23 +5877,16 @@ next_op:
if (count > 0) {
if (!sr->sr_extent || count > sr->sr_ext_len) {
- /*
- * Apply a hard cap to the number of extents.
- * If we have more, assume something is wrong.
- */
- if (count > MAX_EXTENTS) {
- dout("%s: OSD returned 0x%x extents in a single reply!\n",
- __func__, count);
- return -EREMOTEIO;
- }
-
/* no extent array provided, or too short */
kfree(sr->sr_extent);
sr->sr_extent = kmalloc_array(count,
sizeof(*sr->sr_extent),
GFP_NOIO);
- if (!sr->sr_extent)
+ if (!sr->sr_extent) {
+ pr_err("%s: failed to allocate %u extents\n",
+ __func__, count);
return -ENOMEM;
+ }
sr->sr_ext_len = count;
}
ret = count * sizeof(*sr->sr_extent);
@@ -5912,8 +5900,20 @@ next_op:
convert_extent_map(sr);
ret = sizeof(sr->sr_datalen);
*pbuf = (char *)&sr->sr_datalen;
- sr->sr_state = CEPH_SPARSE_READ_DATA;
+ sr->sr_state = CEPH_SPARSE_READ_DATA_PRE;
break;
+ case CEPH_SPARSE_READ_DATA_PRE:
+ /* Convert sr_datalen to host-endian */
+ sr->sr_datalen = le32_to_cpu((__force __le32)sr->sr_datalen);
+ for (i = 0; i < count; i++)
+ len += sr->sr_extent[i].len;
+ if (sr->sr_datalen != len) {
+ pr_warn_ratelimited("data len %u != extent len %llu\n",
+ sr->sr_datalen, len);
+ return -EREMOTEIO;
+ }
+ sr->sr_state = CEPH_SPARSE_READ_DATA;
+ fallthrough;
case CEPH_SPARSE_READ_DATA:
if (sr->sr_index >= count) {
sr->sr_state = CEPH_SPARSE_READ_HDR;
diff --git a/net/compat.c b/net/compat.c
index 6564720f32b7..485db8ee9b28 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -297,7 +297,7 @@ void scm_detach_fds_compat(struct msghdr *msg, struct scm_cookie *scm)
int err = 0, i;
for (i = 0; i < fdmax; i++) {
- err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags);
+ err = scm_recv_one_fd(scm->fp->fp[i], cmsg_data + i, o_flags);
if (err < 0)
break;
}
diff --git a/net/core/Makefile b/net/core/Makefile
index 0cb734cbc24b..821aec06abf1 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -18,7 +18,7 @@ obj-y += dev.o dev_addr_lists.o dst.o netevent.o \
obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
obj-y += net-sysfs.o
-obj-$(CONFIG_PAGE_POOL) += page_pool.o
+obj-$(CONFIG_PAGE_POOL) += page_pool.o page_pool_user.o
obj-$(CONFIG_PROC_FS) += net-procfs.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_NETPOLL) += netpoll.o
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index cca7594be92e..6c4d90b24d46 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -275,9 +275,10 @@ BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap,
void *owner, u32 size)
{
- int optmem_max = READ_ONCE(sysctl_optmem_max);
struct sock *sk = (struct sock *)owner;
+ int optmem_max;
+ optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
/* same check as in sock_kmalloc() */
if (size <= optmem_max &&
atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 103d46fa0eeb..a8b625abe242 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -751,7 +751,7 @@ size_t memcpy_to_iter_csum(void *iter_to, size_t progress,
size_t len, void *from, void *priv2)
{
__wsum *csum = priv2;
- __wsum next = csum_partial_copy_nocheck(from, iter_to, len);
+ __wsum next = csum_partial_copy_nocheck(from + progress, iter_to, len);
*csum = csum_block_add(*csum, next, progress);
return 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index ad20bebe153f..76e6438f4858 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -165,7 +165,6 @@ static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_extack(unsigned long val,
struct net_device *dev,
struct netlink_ext_ack *extack);
-static struct napi_struct *napi_by_id(unsigned int napi_id);
/*
* The @dev_base_head list is protected by @dev_base_lock and the rtnl
@@ -337,7 +336,7 @@ int netdev_name_node_alt_create(struct net_device *dev, const char *name)
return -ENOMEM;
netdev_name_node_add(net, name_node);
/* The node that holds dev->name acts as a head of per-device list. */
- list_add_tail(&name_node->list, &dev->name_node->list);
+ list_add_tail_rcu(&name_node->list, &dev->name_node->list);
return 0;
}
@@ -3757,6 +3756,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q);
+ tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_DROP);
+
if (q->flags & TCQ_F_NOLOCK) {
if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) &&
qdisc_run_begin(q)) {
@@ -3786,7 +3787,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
no_lock_out:
if (unlikely(to_free))
kfree_skb_list_reason(to_free,
- SKB_DROP_REASON_QDISC_DROP);
+ tcf_get_drop_reason(to_free));
return rc;
}
@@ -3841,7 +3842,8 @@ no_lock_out:
}
spin_unlock(root_lock);
if (unlikely(to_free))
- kfree_skb_list_reason(to_free, SKB_DROP_REASON_QDISC_DROP);
+ kfree_skb_list_reason(to_free,
+ tcf_get_drop_reason(to_free));
if (unlikely(contended))
spin_unlock(&q->busylock);
return rc;
@@ -3927,14 +3929,14 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb,
tc_skb_cb(skb)->mru = 0;
tc_skb_cb(skb)->post_ct = false;
- res.drop_reason = *drop_reason;
+ tcf_set_drop_reason(skb, *drop_reason);
mini_qdisc_bstats_cpu_update(miniq, skb);
ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false);
/* Only tcf related quirks below. */
switch (ret) {
case TC_ACT_SHOT:
- *drop_reason = res.drop_reason;
+ *drop_reason = tcf_get_drop_reason(skb);
mini_qdisc_qstats_cpu_drop(miniq);
break;
case TC_ACT_OK:
@@ -6142,7 +6144,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
EXPORT_SYMBOL(napi_complete_done);
/* must be called under rcu_read_lock(), as we dont take a reference */
-static struct napi_struct *napi_by_id(unsigned int napi_id)
+struct napi_struct *napi_by_id(unsigned int napi_id)
{
unsigned int hash = napi_id % HASH_SIZE(napi_hash);
struct napi_struct *napi;
@@ -6403,6 +6405,43 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
}
EXPORT_SYMBOL(dev_set_threaded);
+/**
+ * netif_queue_set_napi - Associate queue with the napi
+ * @dev: device to which NAPI and queue belong
+ * @queue_index: Index of queue
+ * @type: queue type as RX or TX
+ * @napi: NAPI context, pass NULL to clear previously set NAPI
+ *
+ * Set queue with its corresponding napi context. This should be done after
+ * registering the NAPI handler for the queue-vector and the queues have been
+ * mapped to the corresponding interrupt vector.
+ */
+void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
+ enum netdev_queue_type type, struct napi_struct *napi)
+{
+ struct netdev_rx_queue *rxq;
+ struct netdev_queue *txq;
+
+ if (WARN_ON_ONCE(napi && !napi->dev))
+ return;
+ if (dev->reg_state >= NETREG_REGISTERED)
+ ASSERT_RTNL();
+
+ switch (type) {
+ case NETDEV_QUEUE_TYPE_RX:
+ rxq = __netif_get_rx_queue(dev, queue_index);
+ rxq->napi = napi;
+ return;
+ case NETDEV_QUEUE_TYPE_TX:
+ txq = netdev_get_tx_queue(dev, queue_index);
+ txq->napi = napi;
+ return;
+ default:
+ return;
+ }
+}
+EXPORT_SYMBOL(netif_queue_set_napi);
+
void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
@@ -6438,6 +6477,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
*/
if (dev->threaded && napi_kthread_create(napi))
dev->threaded = 0;
+ netif_napi_set_irq(napi, -1);
}
EXPORT_SYMBOL(netif_napi_add_weight);
@@ -9034,28 +9074,6 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
}
EXPORT_SYMBOL(netdev_port_same_parent_id);
-static void netdev_dpll_pin_assign(struct net_device *dev, struct dpll_pin *dpll_pin)
-{
-#if IS_ENABLED(CONFIG_DPLL)
- rtnl_lock();
- dev->dpll_pin = dpll_pin;
- rtnl_unlock();
-#endif
-}
-
-void netdev_dpll_pin_set(struct net_device *dev, struct dpll_pin *dpll_pin)
-{
- WARN_ON(!dpll_pin);
- netdev_dpll_pin_assign(dev, dpll_pin);
-}
-EXPORT_SYMBOL(netdev_dpll_pin_set);
-
-void netdev_dpll_pin_clear(struct net_device *dev)
-{
- netdev_dpll_pin_assign(dev, NULL);
-}
-EXPORT_SYMBOL(netdev_dpll_pin_clear);
-
/**
* dev_change_proto_down - set carrier according to proto_down.
*
@@ -10514,7 +10532,7 @@ void netdev_run_todo(void)
write_lock(&dev_base_lock);
dev->reg_state = NETREG_UNREGISTERED;
write_unlock(&dev_base_lock);
- linkwatch_forget_dev(dev);
+ linkwatch_sync_dev(dev);
}
while (!list_empty(&list)) {
@@ -11239,17 +11257,19 @@ int __dev_change_net_namespace(struct net_device *dev, struct net *net,
dev_net_set(dev, net);
dev->ifindex = new_ifindex;
- /* Send a netdev-add uevent to the new namespace */
- kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
- netdev_adjacent_add_links(dev);
-
if (new_name[0]) /* Rename the netdev to prepared name */
strscpy(dev->name, new_name, IFNAMSIZ);
/* Fixup kobjects */
+ dev_set_uevent_suppress(&dev->dev, 1);
err = device_rename(&dev->dev, dev->name);
+ dev_set_uevent_suppress(&dev->dev, 0);
WARN_ON(err);
+ /* Send a netdev-add uevent to the new namespace */
+ kobject_uevent(&dev->dev.kobj, KOBJ_ADD);
+ netdev_adjacent_add_links(dev);
+
/* Adapt owner in case owning user namespace of target network
* namespace is different from the original one.
*/
@@ -11509,6 +11529,7 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
static void __net_exit default_device_exit_net(struct net *net)
{
+ struct netdev_name_node *name_node, *tmp;
struct net_device *dev, *aux;
/*
* Push all migratable network devices back to the
@@ -11531,6 +11552,14 @@ static void __net_exit default_device_exit_net(struct net *net)
snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
if (netdev_name_in_use(&init_net, fb_name))
snprintf(fb_name, IFNAMSIZ, "dev%%d");
+
+ netdev_for_each_altname_safe(dev, name_node, tmp)
+ if (netdev_name_in_use(&init_net, name_node->name)) {
+ netdev_name_node_del(name_node);
+ synchronize_rcu();
+ __netdev_name_node_alt_destroy(name_node);
+ }
+
err = dev_change_net_namespace(dev, &init_net, fb_name);
if (err) {
pr_emerg("%s: failed to move %s to init_net: %d\n",
@@ -11573,6 +11602,62 @@ static struct pernet_operations __net_initdata default_device_ops = {
.exit_batch = default_device_exit_batch,
};
+static void __init net_dev_struct_check(void)
+{
+ /* TX read-mostly hotpath */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, priv_flags);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, netdev_ops);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, header_ops);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, _tx);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, real_num_tx_queues);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_size);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_ipv4_max_size);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_segs);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_partial_features);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, num_tc);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, mtu);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, needed_headroom);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tc_to_txq);
+#ifdef CONFIG_XPS
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, xps_maps);
+#endif
+#ifdef CONFIG_NETFILTER_EGRESS
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, nf_hooks_egress);
+#endif
+#ifdef CONFIG_NET_XGRESS
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tcx_egress);
+#endif
+ CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 160);
+
+ /* TXRX read-mostly hotpath */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, lstats);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, hard_header_len);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, features);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, ip6_ptr);
+ CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_txrx, 38);
+
+ /* RX read-mostly hotpath */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ptype_specific);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, ifindex);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, real_num_rx_queues);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, _rx);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_flush_timeout);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, napi_defer_hard_irqs);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_max_size);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, gro_ipv4_max_size);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, rx_handler_data);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, nd_net);
+#ifdef CONFIG_NETPOLL
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, npinfo);
+#endif
+#ifdef CONFIG_NET_XGRESS
+ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress);
+#endif
+ CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 104);
+}
+
/*
* Initialize the DEV module. At boot time this walks the device list and
* unhooks any devices that fail to initialise (normally hardware not
@@ -11590,6 +11675,8 @@ static int __init net_dev_init(void)
BUG_ON(!dev_boot_phase);
+ net_dev_struct_check();
+
if (dev_proc_init())
goto out;
diff --git a/net/core/dev.h b/net/core/dev.h
index 5aa45f0fd4ae..7480b4c84298 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -30,7 +30,6 @@ int __init dev_proc_init(void);
#endif
void linkwatch_init_dev(struct net_device *dev);
-void linkwatch_forget_dev(struct net_device *dev);
void linkwatch_run_queue(void);
void dev_addr_flush(struct net_device *dev);
@@ -64,6 +63,9 @@ int dev_change_name(struct net_device *dev, const char *newname);
#define netdev_for_each_altname(dev, namenode) \
list_for_each_entry((namenode), &(dev)->name_node->list, list)
+#define netdev_for_each_altname_safe(dev, namenode, next) \
+ list_for_each_entry_safe((namenode), (next), &(dev)->name_node->list, \
+ list)
int netdev_name_node_alt_create(struct net_device *dev, const char *name);
int netdev_name_node_alt_destroy(struct net_device *dev, const char *name);
@@ -145,4 +147,6 @@ void xdp_do_check_flushed(struct napi_struct *napi);
#else
static inline void xdp_do_check_flushed(struct napi_struct *napi) { }
#endif
+
+struct napi_struct *napi_by_id(unsigned int napi_id);
#endif
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index feeddf95f450..9a66cf5015f2 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -322,9 +322,9 @@ static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr)
* frames and not forward them), it must set IFF_SEE_ALL_HWTSTAMP_REQUESTS in
* dev->priv_flags.
*/
-static int dev_set_hwtstamp_phylib(struct net_device *dev,
- struct kernel_hwtstamp_config *cfg,
- struct netlink_ext_ack *extack)
+int dev_set_hwtstamp_phylib(struct net_device *dev,
+ struct kernel_hwtstamp_config *cfg,
+ struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
bool phy_ts = phy_has_hwtstamp(dev->phydev);
@@ -363,6 +363,7 @@ static int dev_set_hwtstamp_phylib(struct net_device *dev,
return 0;
}
+EXPORT_SYMBOL_GPL(dev_set_hwtstamp_phylib);
static int dev_set_hwtstamp(struct net_device *dev, struct ifreq *ifr)
{
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index b240d9aae4a6..b0f221d658be 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -183,7 +183,7 @@ out:
}
static const struct genl_multicast_group dropmon_mcgrps[] = {
- { .name = "events", .cap_sys_admin = 1 },
+ { .name = "events", .flags = GENL_MCAST_CAP_SYS_ADMIN, },
};
static void send_dm_alert(struct work_struct *work)
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 75282222e0b4..3f933ffcefc3 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -53,7 +53,7 @@ bool fib_rule_matchall(const struct fib_rule *rule)
EXPORT_SYMBOL_GPL(fib_rule_matchall);
int fib_default_rule_add(struct fib_rules_ops *ops,
- u32 pref, u32 table, u32 flags)
+ u32 pref, u32 table)
{
struct fib_rule *r;
@@ -65,7 +65,6 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
r->action = FR_ACT_TO_TBL;
r->pref = pref;
r->table = table;
- r->flags = flags;
r->proto = RTPROT_KERNEL;
r->fr_net = ops->fro_net;
r->uid_range = fib_kuid_range_unset;
@@ -594,7 +593,6 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (tb[FRA_TUN_ID])
nlrule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
- err = -EINVAL;
if (tb[FRA_L3MDEV] &&
fib_nl2rule_l3mdev(tb[FRA_L3MDEV], nlrule, extack) < 0)
goto errout_free;
diff --git a/net/core/filter.c b/net/core/filter.c
index 1737884be52f..ef3e78b6a39c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -83,6 +83,7 @@
#include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netkit.h>
#include <linux/un.h>
+#include <net/xdp_sock_drv.h>
#include "dev.h"
@@ -203,7 +204,7 @@ BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
return 0;
nla = (struct nlattr *) &skb->data[a];
- if (nla->nla_len > skb->len - a)
+ if (!nla_ok(nla, skb->len - a))
return 0;
nla = nla_find_nested(nla, x);
@@ -1219,8 +1220,8 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
*/
static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
{
+ int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
u32 filter_size = bpf_prog_size(fp->prog->len);
- int optmem_max = READ_ONCE(sysctl_optmem_max);
/* same check as in sock_kmalloc() */
if (filter_size <= optmem_max &&
@@ -1550,12 +1551,13 @@ EXPORT_SYMBOL_GPL(sk_attach_filter);
int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
{
struct bpf_prog *prog = __get_filter(fprog, sk);
- int err;
+ int err, optmem_max;
if (IS_ERR(prog))
return PTR_ERR(prog);
- if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max))
+ optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
+ if (bpf_prog_size(prog->len) > optmem_max)
err = -ENOMEM;
else
err = reuseport_attach_prog(sk, prog);
@@ -1594,7 +1596,7 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
{
struct bpf_prog *prog;
- int err;
+ int err, optmem_max;
if (sock_flag(sk, SOCK_FILTER_LOCKED))
return -EPERM;
@@ -1622,7 +1624,8 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
}
} else {
/* BPF_PROG_TYPE_SOCKET_FILTER */
- if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) {
+ optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
+ if (bpf_prog_size(prog->len) > optmem_max) {
err = -ENOMEM;
goto err_prog_put;
}
@@ -4090,10 +4093,46 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset);
skb_frag_size_add(frag, offset);
sinfo->xdp_frags_size += offset;
+ if (rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)
+ xsk_buff_get_tail(xdp)->data_end += offset;
return 0;
}
+static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
+ struct xdp_mem_info *mem_info, bool release)
+{
+ struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp);
+
+ if (release) {
+ xsk_buff_del_tail(zc_frag);
+ __xdp_return(NULL, mem_info, false, zc_frag);
+ } else {
+ zc_frag->data_end -= shrink;
+ }
+}
+
+static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
+ int shrink)
+{
+ struct xdp_mem_info *mem_info = &xdp->rxq->mem;
+ bool release = skb_frag_size(frag) == shrink;
+
+ if (mem_info->type == MEM_TYPE_XSK_BUFF_POOL) {
+ bpf_xdp_shrink_data_zc(xdp, shrink, mem_info, release);
+ goto out;
+ }
+
+ if (release) {
+ struct page *page = skb_frag_page(frag);
+
+ __xdp_return(page_address(page), mem_info, false, NULL);
+ }
+
+out:
+ return release;
+}
+
static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
@@ -4108,12 +4147,7 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
len_free += shrink;
offset -= shrink;
-
- if (skb_frag_size(frag) == shrink) {
- struct page *page = skb_frag_page(frag);
-
- __xdp_return(page_address(page), &xdp->rxq->mem,
- false, NULL);
+ if (bpf_xdp_shrink_data(xdp, frag, shrink)) {
n_frags_free++;
} else {
skb_frag_size_sub(frag, shrink);
@@ -7257,7 +7291,6 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
struct tcphdr *, th, u32, th_len)
{
#ifdef CONFIG_SYN_COOKIES
- u32 cookie;
int ret;
if (unlikely(!sk || th_len < sizeof(*th)))
@@ -7279,8 +7312,6 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
if (tcp_synq_no_recent_overflow(sk))
return -ENOENT;
- cookie = ntohl(th->ack_seq) - 1;
-
/* Both struct iphdr and struct ipv6hdr have the version field at the
* same offset so we can cast to the shorter header (struct iphdr).
*/
@@ -7289,7 +7320,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
return -EINVAL;
- ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
+ ret = __cookie_v4_check((struct iphdr *)iph, th);
break;
#if IS_BUILTIN(CONFIG_IPV6)
@@ -7300,7 +7331,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len
if (sk->sk_family != AF_INET6)
return -EINVAL;
- ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
+ ret = __cookie_v6_check((struct ipv6hdr *)iph, th);
break;
#endif /* CONFIG_IPV6 */
@@ -7753,9 +7784,7 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = {
BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph,
struct tcphdr *, th)
{
- u32 cookie = ntohl(th->ack_seq) - 1;
-
- if (__cookie_v4_check(iph, th, cookie) > 0)
+ if (__cookie_v4_check(iph, th) > 0)
return 0;
return -EACCES;
@@ -7776,9 +7805,7 @@ BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph,
struct tcphdr *, th)
{
#if IS_BUILTIN(CONFIG_IPV6)
- u32 cookie = ntohl(th->ack_seq) - 1;
-
- if (__cookie_v6_check(iph, th, cookie) > 0)
+ if (__cookie_v6_check(iph, th) > 0)
return 0;
return -EACCES;
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index c469d1c4db5d..429571c258da 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -192,7 +192,10 @@ static void __linkwatch_run_queue(int urgent_only)
#define MAX_DO_DEV_PER_LOOP 100
int do_dev = MAX_DO_DEV_PER_LOOP;
- struct net_device *dev;
+ /* Use a local list here since we add non-urgent
+ * events back to the global one when called with
+ * urgent_only=1.
+ */
LIST_HEAD(wrk);
/* Give urgent case more budget */
@@ -218,6 +221,7 @@ static void __linkwatch_run_queue(int urgent_only)
list_splice_init(&lweventlist, &wrk);
while (!list_empty(&wrk) && do_dev > 0) {
+ struct net_device *dev;
dev = list_first_entry(&wrk, struct net_device, link_watch_list);
list_del_init(&dev->link_watch_list);
@@ -245,7 +249,7 @@ static void __linkwatch_run_queue(int urgent_only)
spin_unlock_irq(&lweventlist_lock);
}
-void linkwatch_forget_dev(struct net_device *dev)
+void linkwatch_sync_dev(struct net_device *dev)
{
unsigned long flags;
int clean = 0;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index fccaa5bac0ed..a09d507c5b03 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -193,11 +193,22 @@ static ssize_t carrier_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct net_device *netdev = to_net_dev(dev);
+ int ret = -EINVAL;
- if (netif_running(netdev))
- return sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev));
+ if (!rtnl_trylock())
+ return restart_syscall();
- return -EINVAL;
+ if (netif_running(netdev)) {
+ /* Synchronize carrier state with link watch,
+ * see also rtnl_getlink().
+ */
+ linkwatch_sync_dev(netdev);
+
+ ret = sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev));
+ }
+ rtnl_unlock();
+
+ return ret;
}
static DEVICE_ATTR_RW(carrier);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index f4183c4c1ec8..72799533426b 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -372,6 +372,10 @@ out_undo:
static int __net_init net_defaults_init_net(struct net *net)
{
net->core.sysctl_somaxconn = SOMAXCONN;
+ /* Limits per socket sk_omem_alloc usage.
+ * TCP zerocopy regular usage needs 128 KB.
+ */
+ net->core.sysctl_optmem_max = 128 * 1024;
net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
return 0;
@@ -1099,11 +1103,56 @@ out:
rtnl_set_sk_err(net, RTNLGRP_NSID, err);
}
+#ifdef CONFIG_NET_NS
+static void __init netns_ipv4_struct_check(void)
+{
+ /* TX readonly hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_early_retrans);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_tso_win_divisor);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_tso_rtt_log);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_autocorking);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_min_snd_mss);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_notsent_lowat);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_limit_output_bytes);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_min_rtt_wlen);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_tcp_wmem);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx,
+ sysctl_ip_fwd_use_pmtu);
+ CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_tx, 33);
+
+ /* TXRX readonly hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_txrx,
+ sysctl_tcp_moderate_rcvbuf);
+ CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_txrx, 1);
+
+ /* RX readonly hotpath cache line */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
+ sysctl_ip_early_demux);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
+ sysctl_tcp_early_demux);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
+ sysctl_tcp_reordering);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx,
+ sysctl_tcp_rmem);
+ CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 18);
+}
+#endif
+
void __init net_ns_init(void)
{
struct net_generic *ng;
#ifdef CONFIG_NET_NS
+ netns_ipv4_struct_check();
net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
SMP_CACHE_BYTES,
SLAB_PANIC|SLAB_ACCOUNT, NULL);
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index ea9231378aa6..be7f2ebd61b2 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -10,11 +10,64 @@
#include <uapi/linux/netdev.h>
+/* Integer value ranges */
+static const struct netlink_range_validation netdev_a_page_pool_id_range = {
+ .min = 1ULL,
+ .max = 4294967295ULL,
+};
+
+static const struct netlink_range_validation netdev_a_page_pool_ifindex_range = {
+ .min = 1ULL,
+ .max = 2147483647ULL,
+};
+
+/* Common nested types */
+const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1] = {
+ [NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range),
+ [NETDEV_A_PAGE_POOL_IFINDEX] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_page_pool_ifindex_range),
+};
+
/* NETDEV_CMD_DEV_GET - do */
static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1] = {
[NETDEV_A_DEV_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
};
+/* NETDEV_CMD_PAGE_POOL_GET - do */
+#ifdef CONFIG_PAGE_POOL
+static const struct nla_policy netdev_page_pool_get_nl_policy[NETDEV_A_PAGE_POOL_ID + 1] = {
+ [NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range),
+};
+#endif /* CONFIG_PAGE_POOL */
+
+/* NETDEV_CMD_PAGE_POOL_STATS_GET - do */
+#ifdef CONFIG_PAGE_POOL_STATS
+static const struct nla_policy netdev_page_pool_stats_get_nl_policy[NETDEV_A_PAGE_POOL_STATS_INFO + 1] = {
+ [NETDEV_A_PAGE_POOL_STATS_INFO] = NLA_POLICY_NESTED(netdev_page_pool_info_nl_policy),
+};
+#endif /* CONFIG_PAGE_POOL_STATS */
+
+/* NETDEV_CMD_QUEUE_GET - do */
+static const struct nla_policy netdev_queue_get_do_nl_policy[NETDEV_A_QUEUE_TYPE + 1] = {
+ [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+ [NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1),
+ [NETDEV_A_QUEUE_ID] = { .type = NLA_U32, },
+};
+
+/* NETDEV_CMD_QUEUE_GET - dump */
+static const struct nla_policy netdev_queue_get_dump_nl_policy[NETDEV_A_QUEUE_IFINDEX + 1] = {
+ [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
+/* NETDEV_CMD_NAPI_GET - do */
+static const struct nla_policy netdev_napi_get_do_nl_policy[NETDEV_A_NAPI_ID + 1] = {
+ [NETDEV_A_NAPI_ID] = { .type = NLA_U32, },
+};
+
+/* NETDEV_CMD_NAPI_GET - dump */
+static const struct nla_policy netdev_napi_get_dump_nl_policy[NETDEV_A_NAPI_IFINDEX + 1] = {
+ [NETDEV_A_NAPI_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
/* Ops table for netdev */
static const struct genl_split_ops netdev_nl_ops[] = {
{
@@ -29,10 +82,67 @@ static const struct genl_split_ops netdev_nl_ops[] = {
.dumpit = netdev_nl_dev_get_dumpit,
.flags = GENL_CMD_CAP_DUMP,
},
+#ifdef CONFIG_PAGE_POOL
+ {
+ .cmd = NETDEV_CMD_PAGE_POOL_GET,
+ .doit = netdev_nl_page_pool_get_doit,
+ .policy = netdev_page_pool_get_nl_policy,
+ .maxattr = NETDEV_A_PAGE_POOL_ID,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NETDEV_CMD_PAGE_POOL_GET,
+ .dumpit = netdev_nl_page_pool_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+#endif /* CONFIG_PAGE_POOL */
+#ifdef CONFIG_PAGE_POOL_STATS
+ {
+ .cmd = NETDEV_CMD_PAGE_POOL_STATS_GET,
+ .doit = netdev_nl_page_pool_stats_get_doit,
+ .policy = netdev_page_pool_stats_get_nl_policy,
+ .maxattr = NETDEV_A_PAGE_POOL_STATS_INFO,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NETDEV_CMD_PAGE_POOL_STATS_GET,
+ .dumpit = netdev_nl_page_pool_stats_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+#endif /* CONFIG_PAGE_POOL_STATS */
+ {
+ .cmd = NETDEV_CMD_QUEUE_GET,
+ .doit = netdev_nl_queue_get_doit,
+ .policy = netdev_queue_get_do_nl_policy,
+ .maxattr = NETDEV_A_QUEUE_TYPE,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NETDEV_CMD_QUEUE_GET,
+ .dumpit = netdev_nl_queue_get_dumpit,
+ .policy = netdev_queue_get_dump_nl_policy,
+ .maxattr = NETDEV_A_QUEUE_IFINDEX,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+ {
+ .cmd = NETDEV_CMD_NAPI_GET,
+ .doit = netdev_nl_napi_get_doit,
+ .policy = netdev_napi_get_do_nl_policy,
+ .maxattr = NETDEV_A_NAPI_ID,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NETDEV_CMD_NAPI_GET,
+ .dumpit = netdev_nl_napi_get_dumpit,
+ .policy = netdev_napi_get_dump_nl_policy,
+ .maxattr = NETDEV_A_NAPI_IFINDEX,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
};
static const struct genl_multicast_group netdev_nl_mcgrps[] = {
[NETDEV_NLGRP_MGMT] = { "mgmt", },
+ [NETDEV_NLGRP_PAGE_POOL] = { "page-pool", },
};
struct genl_family netdev_nl_family __ro_after_init = {
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
index 7b370c073e7d..a47f2bcbe4fa 100644
--- a/net/core/netdev-genl-gen.h
+++ b/net/core/netdev-genl-gen.h
@@ -11,11 +11,27 @@
#include <uapi/linux/netdev.h>
+/* Common nested types */
+extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1];
+
int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info);
int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+int netdev_nl_page_pool_get_doit(struct sk_buff *skb, struct genl_info *info);
+int netdev_nl_page_pool_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int netdev_nl_page_pool_stats_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info);
+int netdev_nl_queue_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info);
+int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
enum {
NETDEV_NLGRP_MGMT,
+ NETDEV_NLGRP_PAGE_POOL,
};
extern struct genl_family netdev_nl_family;
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
index fe61f85bcf33..fd98936da3ae 100644
--- a/net/core/netdev-genl.c
+++ b/net/core/netdev-genl.c
@@ -6,13 +6,32 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/xdp.h>
+#include <net/xdp_sock.h>
+#include <net/netdev_rx_queue.h>
+#include <net/busy_poll.h>
#include "netdev-genl-gen.h"
+#include "dev.h"
+
+struct netdev_nl_dump_ctx {
+ unsigned long ifindex;
+ unsigned int rxq_idx;
+ unsigned int txq_idx;
+ unsigned int napi_id;
+};
+
+static struct netdev_nl_dump_ctx *netdev_dump_ctx(struct netlink_callback *cb)
+{
+ NL_ASSERT_DUMP_CTX_FITS(struct netdev_nl_dump_ctx);
+
+ return (struct netdev_nl_dump_ctx *)cb->ctx;
+}
static int
netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
const struct genl_info *info)
{
+ u64 xsk_features = 0;
u64 xdp_rx_meta = 0;
void *hdr;
@@ -26,11 +45,20 @@ netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
XDP_METADATA_KFUNC_xxx
#undef XDP_METADATA_KFUNC
+ if (netdev->xsk_tx_metadata_ops) {
+ if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp)
+ xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP;
+ if (netdev->xsk_tx_metadata_ops->tmo_request_checksum)
+ xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM;
+ }
+
if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) ||
nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES,
netdev->xdp_features, NETDEV_A_DEV_PAD) ||
nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES,
- xdp_rx_meta, NETDEV_A_DEV_PAD)) {
+ xdp_rx_meta, NETDEV_A_DEV_PAD) ||
+ nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES,
+ xsk_features, NETDEV_A_DEV_PAD)) {
genlmsg_cancel(rsp, hdr);
return -EINVAL;
}
@@ -111,12 +139,13 @@ err_free_msg:
int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
struct net *net = sock_net(skb->sk);
struct net_device *netdev;
int err = 0;
rtnl_lock();
- for_each_netdev_dump(net, netdev, cb->args[0]) {
+ for_each_netdev_dump(net, netdev, ctx->ifindex) {
err = netdev_nl_dev_fill(netdev, skb, genl_info_dump(cb));
if (err < 0)
break;
@@ -129,6 +158,317 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
+static int
+netdev_nl_napi_fill_one(struct sk_buff *rsp, struct napi_struct *napi,
+ const struct genl_info *info)
+{
+ void *hdr;
+ pid_t pid;
+
+ if (WARN_ON_ONCE(!napi->dev))
+ return -EINVAL;
+ if (!(napi->dev->flags & IFF_UP))
+ return 0;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (napi->napi_id >= MIN_NAPI_ID &&
+ nla_put_u32(rsp, NETDEV_A_NAPI_ID, napi->napi_id))
+ goto nla_put_failure;
+
+ if (nla_put_u32(rsp, NETDEV_A_NAPI_IFINDEX, napi->dev->ifindex))
+ goto nla_put_failure;
+
+ if (napi->irq >= 0 && nla_put_u32(rsp, NETDEV_A_NAPI_IRQ, napi->irq))
+ goto nla_put_failure;
+
+ if (napi->thread) {
+ pid = task_pid_nr(napi->thread);
+ if (nla_put_u32(rsp, NETDEV_A_NAPI_PID, pid))
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(rsp, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
+}
+
+int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct napi_struct *napi;
+ struct sk_buff *rsp;
+ u32 napi_id;
+ int err;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_NAPI_ID))
+ return -EINVAL;
+
+ napi_id = nla_get_u32(info->attrs[NETDEV_A_NAPI_ID]);
+
+ rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!rsp)
+ return -ENOMEM;
+
+ rtnl_lock();
+
+ napi = napi_by_id(napi_id);
+ if (napi)
+ err = netdev_nl_napi_fill_one(rsp, napi, info);
+ else
+ err = -EINVAL;
+
+ rtnl_unlock();
+
+ if (err)
+ goto err_free_msg;
+
+ return genlmsg_reply(rsp, info);
+
+err_free_msg:
+ nlmsg_free(rsp);
+ return err;
+}
+
+static int
+netdev_nl_napi_dump_one(struct net_device *netdev, struct sk_buff *rsp,
+ const struct genl_info *info,
+ struct netdev_nl_dump_ctx *ctx)
+{
+ struct napi_struct *napi;
+ int err = 0;
+
+ if (!(netdev->flags & IFF_UP))
+ return err;
+
+ list_for_each_entry(napi, &netdev->napi_list, dev_list) {
+ if (ctx->napi_id && napi->napi_id >= ctx->napi_id)
+ continue;
+
+ err = netdev_nl_napi_fill_one(rsp, napi, info);
+ if (err)
+ return err;
+ ctx->napi_id = napi->napi_id;
+ }
+ return err;
+}
+
+int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
+ const struct genl_info *info = genl_info_dump(cb);
+ struct net *net = sock_net(skb->sk);
+ struct net_device *netdev;
+ u32 ifindex = 0;
+ int err = 0;
+
+ if (info->attrs[NETDEV_A_NAPI_IFINDEX])
+ ifindex = nla_get_u32(info->attrs[NETDEV_A_NAPI_IFINDEX]);
+
+ rtnl_lock();
+ if (ifindex) {
+ netdev = __dev_get_by_index(net, ifindex);
+ if (netdev)
+ err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
+ else
+ err = -ENODEV;
+ } else {
+ for_each_netdev_dump(net, netdev, ctx->ifindex) {
+ err = netdev_nl_napi_dump_one(netdev, skb, info, ctx);
+ if (err < 0)
+ break;
+ ctx->napi_id = 0;
+ }
+ }
+ rtnl_unlock();
+
+ if (err != -EMSGSIZE)
+ return err;
+
+ return skb->len;
+}
+
+static int
+netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
+ u32 q_idx, u32 q_type, const struct genl_info *info)
+{
+ struct netdev_rx_queue *rxq;
+ struct netdev_queue *txq;
+ void *hdr;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx) ||
+ nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type) ||
+ nla_put_u32(rsp, NETDEV_A_QUEUE_IFINDEX, netdev->ifindex))
+ goto nla_put_failure;
+
+ switch (q_type) {
+ case NETDEV_QUEUE_TYPE_RX:
+ rxq = __netif_get_rx_queue(netdev, q_idx);
+ if (rxq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID,
+ rxq->napi->napi_id))
+ goto nla_put_failure;
+ break;
+ case NETDEV_QUEUE_TYPE_TX:
+ txq = netdev_get_tx_queue(netdev, q_idx);
+ if (txq->napi && nla_put_u32(rsp, NETDEV_A_QUEUE_NAPI_ID,
+ txq->napi->napi_id))
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(rsp, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
+}
+
+static int netdev_nl_queue_validate(struct net_device *netdev, u32 q_id,
+ u32 q_type)
+{
+ switch (q_type) {
+ case NETDEV_QUEUE_TYPE_RX:
+ if (q_id >= netdev->real_num_rx_queues)
+ return -EINVAL;
+ return 0;
+ case NETDEV_QUEUE_TYPE_TX:
+ if (q_id >= netdev->real_num_tx_queues)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int
+netdev_nl_queue_fill(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx,
+ u32 q_type, const struct genl_info *info)
+{
+ int err = 0;
+
+ if (!(netdev->flags & IFF_UP))
+ return err;
+
+ err = netdev_nl_queue_validate(netdev, q_idx, q_type);
+ if (err)
+ return err;
+
+ return netdev_nl_queue_fill_one(rsp, netdev, q_idx, q_type, info);
+}
+
+int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ u32 q_id, q_type, ifindex;
+ struct net_device *netdev;
+ struct sk_buff *rsp;
+ int err;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_ID) ||
+ GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) ||
+ GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX))
+ return -EINVAL;
+
+ q_id = nla_get_u32(info->attrs[NETDEV_A_QUEUE_ID]);
+ q_type = nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]);
+ ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
+
+ rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!rsp)
+ return -ENOMEM;
+
+ rtnl_lock();
+
+ netdev = __dev_get_by_index(genl_info_net(info), ifindex);
+ if (netdev)
+ err = netdev_nl_queue_fill(rsp, netdev, q_id, q_type, info);
+ else
+ err = -ENODEV;
+
+ rtnl_unlock();
+
+ if (err)
+ goto err_free_msg;
+
+ return genlmsg_reply(rsp, info);
+
+err_free_msg:
+ nlmsg_free(rsp);
+ return err;
+}
+
+static int
+netdev_nl_queue_dump_one(struct net_device *netdev, struct sk_buff *rsp,
+ const struct genl_info *info,
+ struct netdev_nl_dump_ctx *ctx)
+{
+ int err = 0;
+ int i;
+
+ if (!(netdev->flags & IFF_UP))
+ return err;
+
+ for (i = ctx->rxq_idx; i < netdev->real_num_rx_queues;) {
+ err = netdev_nl_queue_fill_one(rsp, netdev, i,
+ NETDEV_QUEUE_TYPE_RX, info);
+ if (err)
+ return err;
+ ctx->rxq_idx = i++;
+ }
+ for (i = ctx->txq_idx; i < netdev->real_num_tx_queues;) {
+ err = netdev_nl_queue_fill_one(rsp, netdev, i,
+ NETDEV_QUEUE_TYPE_TX, info);
+ if (err)
+ return err;
+ ctx->txq_idx = i++;
+ }
+
+ return err;
+}
+
+int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct netdev_nl_dump_ctx *ctx = netdev_dump_ctx(cb);
+ const struct genl_info *info = genl_info_dump(cb);
+ struct net *net = sock_net(skb->sk);
+ struct net_device *netdev;
+ u32 ifindex = 0;
+ int err = 0;
+
+ if (info->attrs[NETDEV_A_QUEUE_IFINDEX])
+ ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]);
+
+ rtnl_lock();
+ if (ifindex) {
+ netdev = __dev_get_by_index(net, ifindex);
+ if (netdev)
+ err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
+ else
+ err = -ENODEV;
+ } else {
+ for_each_netdev_dump(net, netdev, ctx->ifindex) {
+ err = netdev_nl_queue_dump_one(netdev, skb, info, ctx);
+ if (err < 0)
+ break;
+ ctx->rxq_idx = 0;
+ ctx->txq_idx = 0;
+ }
+ }
+ rtnl_unlock();
+
+ if (err != -EMSGSIZE)
+ return err;
+
+ return skb->len;
+}
+
static int netdev_genl_netdevice_event(struct notifier_block *nb,
unsigned long event, void *ptr)
{
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index dec544337236..4933762e5a6b 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -23,10 +23,12 @@
#include <trace/events/page_pool.h>
+#include "page_pool_priv.h"
+
#define DEFER_TIME (msecs_to_jiffies(1000))
#define DEFER_WARN_INTERVAL (60 * HZ)
-#define BIAS_MAX LONG_MAX
+#define BIAS_MAX (LONG_MAX >> 1)
#ifdef CONFIG_PAGE_POOL_STATS
/* alloc_stat_inc is intended to be used in softirq context */
@@ -69,7 +71,7 @@ static const char pp_stats[][ETH_GSTRING_LEN] = {
* is passed to this API which is filled in. The caller can then report
* those stats to the user (perhaps via ethtool, debugfs, etc.).
*/
-bool page_pool_get_stats(struct page_pool *pool,
+bool page_pool_get_stats(const struct page_pool *pool,
struct page_pool_stats *stats)
{
int cpu = 0;
@@ -173,7 +175,8 @@ static int page_pool_init(struct page_pool *pool,
{
unsigned int ring_qsize = 1024; /* Default */
- memcpy(&pool->p, params, sizeof(pool->p));
+ memcpy(&pool->p, &params->fast, sizeof(pool->p));
+ memcpy(&pool->slow, &params->slow, sizeof(pool->slow));
/* Validate only known flags were used */
if (pool->p.flags & ~(PP_FLAG_ALL))
@@ -211,6 +214,8 @@ static int page_pool_init(struct page_pool *pool,
*/
}
+ pool->has_init_callback = !!pool->slow.init_callback;
+
#ifdef CONFIG_PAGE_POOL_STATS
pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
if (!pool->recycle_stats)
@@ -235,6 +240,18 @@ static int page_pool_init(struct page_pool *pool,
return 0;
}
+static void page_pool_uninit(struct page_pool *pool)
+{
+ ptr_ring_cleanup(&pool->ring, NULL);
+
+ if (pool->p.flags & PP_FLAG_DMA_MAP)
+ put_device(pool->p.dev);
+
+#ifdef CONFIG_PAGE_POOL_STATS
+ free_percpu(pool->recycle_stats);
+#endif
+}
+
/**
* page_pool_create() - create a page pool.
* @params: parameters, see struct page_pool_params
@@ -249,13 +266,21 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
return ERR_PTR(-ENOMEM);
err = page_pool_init(pool, params);
- if (err < 0) {
- pr_warn("%s() gave up with errno %d\n", __func__, err);
- kfree(pool);
- return ERR_PTR(err);
- }
+ if (err < 0)
+ goto err_free;
+
+ err = page_pool_list(pool);
+ if (err)
+ goto err_uninit;
return pool;
+
+err_uninit:
+ page_pool_uninit(pool);
+err_free:
+ pr_warn("%s() gave up with errno %d\n", __func__, err);
+ kfree(pool);
+ return ERR_PTR(err);
}
EXPORT_SYMBOL(page_pool_create);
@@ -388,8 +413,8 @@ static void page_pool_set_pp_info(struct page_pool *pool,
* the overhead is negligible.
*/
page_pool_fragment_page(page, 1);
- if (pool->p.init_callback)
- pool->p.init_callback(page, pool->p.init_arg);
+ if (pool->has_init_callback)
+ pool->slow.init_callback(page, pool->slow.init_arg);
}
static void page_pool_clear_pp_info(struct page *page)
@@ -504,7 +529,7 @@ EXPORT_SYMBOL(page_pool_alloc_pages);
*/
#define _distance(a, b) (s32)((a) - (b))
-static s32 page_pool_inflight(struct page_pool *pool)
+s32 page_pool_inflight(const struct page_pool *pool, bool strict)
{
u32 release_cnt = atomic_read(&pool->pages_state_release_cnt);
u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt);
@@ -512,27 +537,27 @@ static s32 page_pool_inflight(struct page_pool *pool)
inflight = _distance(hold_cnt, release_cnt);
- trace_page_pool_release(pool, inflight, hold_cnt, release_cnt);
- WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight);
+ if (strict) {
+ trace_page_pool_release(pool, inflight, hold_cnt, release_cnt);
+ WARN(inflight < 0, "Negative(%d) inflight packet-pages",
+ inflight);
+ } else {
+ inflight = max(0, inflight);
+ }
return inflight;
}
-/* Disconnects a page (from a page_pool). API users can have a need
- * to disconnect a page (from a page_pool), to allow it to be used as
- * a regular page (that will eventually be returned to the normal
- * page-allocator via put_page).
- */
-static void page_pool_return_page(struct page_pool *pool, struct page *page)
+static __always_inline
+void __page_pool_release_page_dma(struct page_pool *pool, struct page *page)
{
dma_addr_t dma;
- int count;
if (!(pool->p.flags & PP_FLAG_DMA_MAP))
/* Always account for inflight pages, even if we didn't
* map them
*/
- goto skip_dma_unmap;
+ return;
dma = page_pool_get_dma_addr(page);
@@ -541,7 +566,19 @@ static void page_pool_return_page(struct page_pool *pool, struct page *page)
PAGE_SIZE << pool->p.order, pool->p.dma_dir,
DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
page_pool_set_dma_addr(page, 0);
-skip_dma_unmap:
+}
+
+/* Disconnects a page (from a page_pool). API users can have a need
+ * to disconnect a page (from a page_pool), to allow it to be used as
+ * a regular page (that will eventually be returned to the normal
+ * page-allocator via put_page).
+ */
+void page_pool_return_page(struct page_pool *pool, struct page *page)
+{
+ int count;
+
+ __page_pool_release_page_dma(pool, page);
+
page_pool_clear_pp_info(page);
/* This may be the last page returned, releasing the pool, so
@@ -647,8 +684,8 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
return NULL;
}
-void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
- unsigned int dma_sync_size, bool allow_direct)
+void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page,
+ unsigned int dma_sync_size, bool allow_direct)
{
page = __page_pool_put_page(pool, page, dma_sync_size, allow_direct);
if (page && !page_pool_recycle_in_ring(pool, page)) {
@@ -657,7 +694,7 @@ void page_pool_put_defragged_page(struct page_pool *pool, struct page *page,
page_pool_return_page(pool, page);
}
}
-EXPORT_SYMBOL(page_pool_put_defragged_page);
+EXPORT_SYMBOL(page_pool_put_unrefed_page);
/**
* page_pool_put_page_bulk() - release references on multiple pages
@@ -684,7 +721,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
struct page *page = virt_to_head_page(data[i]);
/* It is not the last user for the page frag case */
- if (!page_pool_is_last_frag(page))
+ if (!page_pool_is_last_ref(page))
continue;
page = __page_pool_put_page(pool, page, -1, false);
@@ -726,7 +763,7 @@ static struct page *page_pool_drain_frag(struct page_pool *pool,
long drain_count = BIAS_MAX - pool->frag_users;
/* Some user is still using the page frag */
- if (likely(page_pool_defrag_page(page, drain_count)))
+ if (likely(page_pool_unref_page(page, drain_count)))
return NULL;
if (page_ref_count(page) == 1 && !page_is_pfmemalloc(page)) {
@@ -747,7 +784,7 @@ static void page_pool_free_frag(struct page_pool *pool)
pool->frag_page = NULL;
- if (!page || page_pool_defrag_page(page, drain_count))
+ if (!page || page_pool_unref_page(page, drain_count))
return;
page_pool_return_page(pool, page);
@@ -818,14 +855,8 @@ static void __page_pool_destroy(struct page_pool *pool)
if (pool->disconnect)
pool->disconnect(pool);
- ptr_ring_cleanup(&pool->ring, NULL);
-
- if (pool->p.flags & PP_FLAG_DMA_MAP)
- put_device(pool->p.dev);
-
-#ifdef CONFIG_PAGE_POOL_STATS
- free_percpu(pool->recycle_stats);
-#endif
+ page_pool_unlist(pool);
+ page_pool_uninit(pool);
kfree(pool);
}
@@ -862,7 +893,7 @@ static int page_pool_release(struct page_pool *pool)
int inflight;
page_pool_scrub(pool);
- inflight = page_pool_inflight(pool);
+ inflight = page_pool_inflight(pool, true);
if (!inflight)
__page_pool_destroy(pool);
@@ -873,18 +904,21 @@ static void page_pool_release_retry(struct work_struct *wq)
{
struct delayed_work *dwq = to_delayed_work(wq);
struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw);
+ void *netdev;
int inflight;
inflight = page_pool_release(pool);
if (!inflight)
return;
- /* Periodic warning */
- if (time_after_eq(jiffies, pool->defer_warn)) {
+ /* Periodic warning for page pools the user can't see */
+ netdev = READ_ONCE(pool->slow.netdev);
+ if (time_after_eq(jiffies, pool->defer_warn) &&
+ (!netdev || netdev == NET_PTR_POISON)) {
int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ;
- pr_warn("%s() stalled pool shutdown %d inflight %d sec\n",
- __func__, inflight, sec);
+ pr_warn("%s() stalled pool shutdown: id %u, %d inflight %d sec\n",
+ __func__, pool->user.id, inflight, sec);
pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
}
@@ -929,6 +963,7 @@ void page_pool_destroy(struct page_pool *pool)
if (!page_pool_release(pool))
return;
+ page_pool_detached(pool);
pool->defer_start = jiffies;
pool->defer_warn = jiffies + DEFER_WARN_INTERVAL;
diff --git a/net/core/page_pool_priv.h b/net/core/page_pool_priv.h
new file mode 100644
index 000000000000..90665d40f1eb
--- /dev/null
+++ b/net/core/page_pool_priv.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __PAGE_POOL_PRIV_H
+#define __PAGE_POOL_PRIV_H
+
+s32 page_pool_inflight(const struct page_pool *pool, bool strict);
+
+int page_pool_list(struct page_pool *pool);
+void page_pool_detached(struct page_pool *pool);
+void page_pool_unlist(struct page_pool *pool);
+
+#endif
diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c
new file mode 100644
index 000000000000..278294aca66a
--- /dev/null
+++ b/net/core/page_pool_user.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/xarray.h>
+#include <net/net_debug.h>
+#include <net/page_pool/types.h>
+#include <net/page_pool/helpers.h>
+#include <net/sock.h>
+
+#include "page_pool_priv.h"
+#include "netdev-genl-gen.h"
+
+static DEFINE_XARRAY_FLAGS(page_pools, XA_FLAGS_ALLOC1);
+/* Protects: page_pools, netdevice->page_pools, pool->slow.netdev, pool->user.
+ * Ordering: inside rtnl_lock
+ */
+static DEFINE_MUTEX(page_pools_lock);
+
+/* Page pools are only reachable from user space (via netlink) if they are
+ * linked to a netdev at creation time. Following page pool "visibility"
+ * states are possible:
+ * - normal
+ * - user.list: linked to real netdev, netdev: real netdev
+ * - orphaned - real netdev has disappeared
+ * - user.list: linked to lo, netdev: lo
+ * - invisible - either (a) created without netdev linking, (b) unlisted due
+ * to error, or (c) the entire namespace which owned this pool disappeared
+ * - user.list: unhashed, netdev: unknown
+ */
+
+typedef int (*pp_nl_fill_cb)(struct sk_buff *rsp, const struct page_pool *pool,
+ const struct genl_info *info);
+
+static int
+netdev_nl_page_pool_get_do(struct genl_info *info, u32 id, pp_nl_fill_cb fill)
+{
+ struct page_pool *pool;
+ struct sk_buff *rsp;
+ int err;
+
+ mutex_lock(&page_pools_lock);
+ pool = xa_load(&page_pools, id);
+ if (!pool || hlist_unhashed(&pool->user.list) ||
+ !net_eq(dev_net(pool->slow.netdev), genl_info_net(info))) {
+ err = -ENOENT;
+ goto err_unlock;
+ }
+
+ rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!rsp) {
+ err = -ENOMEM;
+ goto err_unlock;
+ }
+
+ err = fill(rsp, pool, info);
+ if (err)
+ goto err_free_msg;
+
+ mutex_unlock(&page_pools_lock);
+
+ return genlmsg_reply(rsp, info);
+
+err_free_msg:
+ nlmsg_free(rsp);
+err_unlock:
+ mutex_unlock(&page_pools_lock);
+ return err;
+}
+
+struct page_pool_dump_cb {
+ unsigned long ifindex;
+ u32 pp_id;
+};
+
+static int
+netdev_nl_page_pool_get_dump(struct sk_buff *skb, struct netlink_callback *cb,
+ pp_nl_fill_cb fill)
+{
+ struct page_pool_dump_cb *state = (void *)cb->ctx;
+ const struct genl_info *info = genl_info_dump(cb);
+ struct net *net = sock_net(skb->sk);
+ struct net_device *netdev;
+ struct page_pool *pool;
+ int err = 0;
+
+ rtnl_lock();
+ mutex_lock(&page_pools_lock);
+ for_each_netdev_dump(net, netdev, state->ifindex) {
+ hlist_for_each_entry(pool, &netdev->page_pools, user.list) {
+ if (state->pp_id && state->pp_id < pool->user.id)
+ continue;
+
+ state->pp_id = pool->user.id;
+ err = fill(skb, pool, info);
+ if (err)
+ goto out;
+ }
+
+ state->pp_id = 0;
+ }
+out:
+ mutex_unlock(&page_pools_lock);
+ rtnl_unlock();
+
+ if (skb->len && err == -EMSGSIZE)
+ return skb->len;
+ return err;
+}
+
+static int
+page_pool_nl_stats_fill(struct sk_buff *rsp, const struct page_pool *pool,
+ const struct genl_info *info)
+{
+#ifdef CONFIG_PAGE_POOL_STATS
+ struct page_pool_stats stats = {};
+ struct nlattr *nest;
+ void *hdr;
+
+ if (!page_pool_get_stats(pool, &stats))
+ return 0;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ nest = nla_nest_start(rsp, NETDEV_A_PAGE_POOL_STATS_INFO);
+
+ if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_ID, pool->user.id) ||
+ (pool->slow.netdev->ifindex != LOOPBACK_IFINDEX &&
+ nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX,
+ pool->slow.netdev->ifindex)))
+ goto err_cancel_nest;
+
+ nla_nest_end(rsp, nest);
+
+ if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST,
+ stats.alloc_stats.fast) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW,
+ stats.alloc_stats.slow) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER,
+ stats.alloc_stats.slow_high_order) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY,
+ stats.alloc_stats.empty) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL,
+ stats.alloc_stats.refill) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE,
+ stats.alloc_stats.waive) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED,
+ stats.recycle_stats.cached) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL,
+ stats.recycle_stats.cache_full) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING,
+ stats.recycle_stats.ring) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL,
+ stats.recycle_stats.ring_full) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT,
+ stats.recycle_stats.released_refcnt))
+ goto err_cancel_msg;
+
+ genlmsg_end(rsp, hdr);
+
+ return 0;
+err_cancel_nest:
+ nla_nest_cancel(rsp, nest);
+err_cancel_msg:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
+#else
+ GENL_SET_ERR_MSG(info, "kernel built without CONFIG_PAGE_POOL_STATS");
+ return -EOPNOTSUPP;
+#endif
+}
+
+int netdev_nl_page_pool_stats_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct nlattr *tb[ARRAY_SIZE(netdev_page_pool_info_nl_policy)];
+ struct nlattr *nest;
+ int err;
+ u32 id;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_PAGE_POOL_STATS_INFO))
+ return -EINVAL;
+
+ nest = info->attrs[NETDEV_A_PAGE_POOL_STATS_INFO];
+ err = nla_parse_nested(tb, ARRAY_SIZE(tb) - 1, nest,
+ netdev_page_pool_info_nl_policy,
+ info->extack);
+ if (err)
+ return err;
+
+ if (NL_REQ_ATTR_CHECK(info->extack, nest, tb, NETDEV_A_PAGE_POOL_ID))
+ return -EINVAL;
+ if (tb[NETDEV_A_PAGE_POOL_IFINDEX]) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[NETDEV_A_PAGE_POOL_IFINDEX],
+ "selecting by ifindex not supported");
+ return -EINVAL;
+ }
+
+ id = nla_get_uint(tb[NETDEV_A_PAGE_POOL_ID]);
+
+ return netdev_nl_page_pool_get_do(info, id, page_pool_nl_stats_fill);
+}
+
+int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return netdev_nl_page_pool_get_dump(skb, cb, page_pool_nl_stats_fill);
+}
+
+static int
+page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
+ const struct genl_info *info)
+{
+ size_t inflight, refsz;
+ void *hdr;
+
+ hdr = genlmsg_iput(rsp, info);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_ID, pool->user.id))
+ goto err_cancel;
+
+ if (pool->slow.netdev->ifindex != LOOPBACK_IFINDEX &&
+ nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX,
+ pool->slow.netdev->ifindex))
+ goto err_cancel;
+ if (pool->user.napi_id &&
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, pool->user.napi_id))
+ goto err_cancel;
+
+ inflight = page_pool_inflight(pool, false);
+ refsz = PAGE_SIZE << pool->p.order;
+ if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT, inflight) ||
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
+ inflight * refsz))
+ goto err_cancel;
+ if (pool->user.detach_time &&
+ nla_put_uint(rsp, NETDEV_A_PAGE_POOL_DETACH_TIME,
+ pool->user.detach_time))
+ goto err_cancel;
+
+ genlmsg_end(rsp, hdr);
+
+ return 0;
+err_cancel:
+ genlmsg_cancel(rsp, hdr);
+ return -EMSGSIZE;
+}
+
+static void netdev_nl_page_pool_event(const struct page_pool *pool, u32 cmd)
+{
+ struct genl_info info;
+ struct sk_buff *ntf;
+ struct net *net;
+
+ lockdep_assert_held(&page_pools_lock);
+
+ /* 'invisible' page pools don't matter */
+ if (hlist_unhashed(&pool->user.list))
+ return;
+ net = dev_net(pool->slow.netdev);
+
+ if (!genl_has_listeners(&netdev_nl_family, net, NETDEV_NLGRP_PAGE_POOL))
+ return;
+
+ genl_info_init_ntf(&info, &netdev_nl_family, cmd);
+
+ ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!ntf)
+ return;
+
+ if (page_pool_nl_fill(ntf, pool, &info)) {
+ nlmsg_free(ntf);
+ return;
+ }
+
+ genlmsg_multicast_netns(&netdev_nl_family, net, ntf,
+ 0, NETDEV_NLGRP_PAGE_POOL, GFP_KERNEL);
+}
+
+int netdev_nl_page_pool_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ u32 id;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_PAGE_POOL_ID))
+ return -EINVAL;
+
+ id = nla_get_uint(info->attrs[NETDEV_A_PAGE_POOL_ID]);
+
+ return netdev_nl_page_pool_get_do(info, id, page_pool_nl_fill);
+}
+
+int netdev_nl_page_pool_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ return netdev_nl_page_pool_get_dump(skb, cb, page_pool_nl_fill);
+}
+
+int page_pool_list(struct page_pool *pool)
+{
+ static u32 id_alloc_next;
+ int err;
+
+ mutex_lock(&page_pools_lock);
+ err = xa_alloc_cyclic(&page_pools, &pool->user.id, pool, xa_limit_32b,
+ &id_alloc_next, GFP_KERNEL);
+ if (err < 0)
+ goto err_unlock;
+
+ INIT_HLIST_NODE(&pool->user.list);
+ if (pool->slow.netdev) {
+ hlist_add_head(&pool->user.list,
+ &pool->slow.netdev->page_pools);
+ pool->user.napi_id = pool->p.napi ? pool->p.napi->napi_id : 0;
+
+ netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_ADD_NTF);
+ }
+
+ mutex_unlock(&page_pools_lock);
+ return 0;
+
+err_unlock:
+ mutex_unlock(&page_pools_lock);
+ return err;
+}
+
+void page_pool_detached(struct page_pool *pool)
+{
+ mutex_lock(&page_pools_lock);
+ pool->user.detach_time = ktime_get_boottime_seconds();
+ netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_CHANGE_NTF);
+ mutex_unlock(&page_pools_lock);
+}
+
+void page_pool_unlist(struct page_pool *pool)
+{
+ mutex_lock(&page_pools_lock);
+ netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_DEL_NTF);
+ xa_erase(&page_pools, pool->user.id);
+ if (!hlist_unhashed(&pool->user.list))
+ hlist_del(&pool->user.list);
+ mutex_unlock(&page_pools_lock);
+}
+
+static void page_pool_unreg_netdev_wipe(struct net_device *netdev)
+{
+ struct page_pool *pool;
+ struct hlist_node *n;
+
+ mutex_lock(&page_pools_lock);
+ hlist_for_each_entry_safe(pool, n, &netdev->page_pools, user.list) {
+ hlist_del_init(&pool->user.list);
+ pool->slow.netdev = NET_PTR_POISON;
+ }
+ mutex_unlock(&page_pools_lock);
+}
+
+static void page_pool_unreg_netdev(struct net_device *netdev)
+{
+ struct page_pool *pool, *last;
+ struct net_device *lo;
+
+ lo = dev_net(netdev)->loopback_dev;
+
+ mutex_lock(&page_pools_lock);
+ last = NULL;
+ hlist_for_each_entry(pool, &netdev->page_pools, user.list) {
+ pool->slow.netdev = lo;
+ netdev_nl_page_pool_event(pool,
+ NETDEV_CMD_PAGE_POOL_CHANGE_NTF);
+ last = pool;
+ }
+ if (last)
+ hlist_splice_init(&netdev->page_pools, &last->user.list,
+ &lo->page_pools);
+ mutex_unlock(&page_pools_lock);
+}
+
+static int
+page_pool_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ if (event != NETDEV_UNREGISTER)
+ return NOTIFY_DONE;
+
+ if (hlist_empty(&netdev->page_pools))
+ return NOTIFY_OK;
+
+ if (netdev->ifindex != LOOPBACK_IFINDEX)
+ page_pool_unreg_netdev(netdev);
+ else
+ page_pool_unreg_netdev_wipe(netdev);
+ return NOTIFY_OK;
+}
+
+static struct notifier_block page_pool_netdevice_nb = {
+ .notifier_call = page_pool_netdevice_event,
+};
+
+static int __init page_pool_user_init(void)
+{
+ return register_netdevice_notifier(&page_pool_netdevice_nb);
+}
+
+subsys_initcall(page_pool_user_init);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 57cea67b7562..ea55a758a475 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3669,10 +3669,8 @@ static int pktgen_thread_worker(void *arg)
if (unlikely(!pkt_dev && t->control == 0)) {
if (t->net->pktgen_exiting)
break;
- wait_event_interruptible_timeout(t->queue,
- t->control != 0,
- HZ/10);
- try_to_freeze();
+ wait_event_freezable_timeout(t->queue,
+ t->control != 0, HZ / 10);
continue;
}
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index f35c2e998406..63de5c635842 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -33,9 +33,6 @@
void reqsk_queue_alloc(struct request_sock_queue *queue)
{
- spin_lock_init(&queue->rskq_lock);
-
- spin_lock_init(&queue->fastopenq.lock);
queue->fastopenq.rskq_rst_head = NULL;
queue->fastopenq.rskq_rst_tail = NULL;
queue->fastopenq.qlen = 0;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e8431c6c8490..bd50e9fe3234 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -342,8 +342,7 @@ int rtnl_unregister(int protocol, int msgtype)
return -ENOENT;
}
- link = rtnl_dereference(tab[msgindex]);
- RCU_INIT_POINTER(tab[msgindex], NULL);
+ link = rcu_replace_pointer_rtnl(tab[msgindex], NULL);
rtnl_unlock();
kfree_rcu(link, rcu);
@@ -368,18 +367,13 @@ void rtnl_unregister_all(int protocol)
BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
rtnl_lock();
- tab = rtnl_dereference(rtnl_msg_handlers[protocol]);
+ tab = rcu_replace_pointer_rtnl(rtnl_msg_handlers[protocol], NULL);
if (!tab) {
rtnl_unlock();
return;
}
- RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL);
for (msgindex = 0; msgindex < RTM_NR_MSGTYPES; msgindex++) {
- link = rtnl_dereference(tab[msgindex]);
- if (!link)
- continue;
-
- RCU_INIT_POINTER(tab[msgindex], NULL);
+ link = rcu_replace_pointer_rtnl(tab[msgindex], NULL);
kfree_rcu(link, rcu);
}
rtnl_unlock();
@@ -1026,14 +1020,17 @@ static size_t rtnl_xdp_size(void)
static size_t rtnl_prop_list_size(const struct net_device *dev)
{
struct netdev_name_node *name_node;
- size_t size;
+ unsigned int cnt = 0;
- if (list_empty(&dev->name_node->list))
+ rcu_read_lock();
+ list_for_each_entry_rcu(name_node, &dev->name_node->list, list)
+ cnt++;
+ rcu_read_unlock();
+
+ if (!cnt)
return 0;
- size = nla_total_size(0);
- list_for_each_entry(name_node, &dev->name_node->list, list)
- size += nla_total_size(ALTIFNAMSIZ);
- return size;
+
+ return nla_total_size(0) + cnt * nla_total_size(ALTIFNAMSIZ);
}
static size_t rtnl_proto_down_size(const struct net_device *dev)
@@ -1060,7 +1057,7 @@ static size_t rtnl_dpll_pin_size(const struct net_device *dev)
{
size_t size = nla_total_size(0); /* nest IFLA_DPLL_PIN */
- size += dpll_msg_pin_handle_size(netdev_dpll_pin(dev));
+ size += dpll_netdev_pin_handle_size(dev);
return size;
}
@@ -1795,7 +1792,7 @@ static int rtnl_fill_dpll_pin(struct sk_buff *skb,
if (!dpll_pin_nest)
return -EMSGSIZE;
- ret = dpll_msg_add_pin_handle(skb, netdev_dpll_pin(dev));
+ ret = dpll_netdev_add_pin_handle(skb, dev);
if (ret < 0)
goto nest_cancel;
@@ -2905,13 +2902,6 @@ static int do_setlink(const struct sk_buff *skb,
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
}
- if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
- if (err)
- goto errout;
- status |= DO_SETLINK_MODIFIED;
- }
-
if (ifm->ifi_flags || ifm->ifi_change) {
err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm),
extack);
@@ -2919,6 +2909,13 @@ static int do_setlink(const struct sk_buff *skb,
goto errout;
}
+ if (tb[IFLA_MASTER]) {
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
+ if (err)
+ goto errout;
+ status |= DO_SETLINK_MODIFIED;
+ }
+
if (tb[IFLA_CARRIER]) {
err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER]));
if (err)
@@ -3849,10 +3846,18 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
goto out;
err = -ENOBUFS;
- nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL);
+ nskb = nlmsg_new_large(if_nlmsg_size(dev, ext_filter_mask));
if (nskb == NULL)
goto out;
+ /* Synchronize the carrier state so we don't report a state
+ * that we're not actually going to honour immediately; if
+ * the driver just did a carrier off->on transition, we can
+ * only TX if link watch work has run, but without this we'd
+ * already report carrier on, even if it doesn't work yet.
+ */
+ linkwatch_sync_dev(dev);
+
err = rtnl_fill_ifinfo(nskb, dev, net,
RTM_NEWLINK, NETLINK_CB(skb).portid,
nlh->nlmsg_seq, 0, 0, ext_filter_mask,
@@ -5164,10 +5169,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
struct net_device *dev;
- struct nlattr *br_spec, *attr = NULL;
+ struct nlattr *br_spec, *attr, *br_flags_attr = NULL;
int rem, err = -EOPNOTSUPP;
u16 flags = 0;
- bool have_flags = false;
if (nlmsg_len(nlh) < sizeof(*ifm))
return -EINVAL;
@@ -5185,11 +5189,11 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (br_spec) {
nla_for_each_nested(attr, br_spec, rem) {
- if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) {
+ if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) {
if (nla_len(attr) < sizeof(flags))
return -EINVAL;
- have_flags = true;
+ br_flags_attr = attr;
flags = nla_get_u16(attr);
}
@@ -5233,8 +5237,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
- if (have_flags)
- memcpy(nla_data(attr), &flags, sizeof(flags));
+ if (br_flags_attr)
+ memcpy(nla_data(br_flags_attr), &flags, sizeof(flags));
out:
return err;
}
@@ -6408,17 +6412,64 @@ static int rtnl_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,
return dev->netdev_ops->ndo_mdb_add(dev, tb, nlh->nlmsg_flags, extack);
}
+static int rtnl_validate_mdb_entry_del_bulk(const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
+{
+ struct br_mdb_entry *entry = nla_data(attr);
+ struct br_mdb_entry zero_entry = {};
+
+ if (nla_len(attr) != sizeof(struct br_mdb_entry)) {
+ NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length");
+ return -EINVAL;
+ }
+
+ if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) {
+ NL_SET_ERR_MSG(extack, "Unknown entry state");
+ return -EINVAL;
+ }
+
+ if (entry->flags) {
+ NL_SET_ERR_MSG(extack, "Entry flags cannot be set");
+ return -EINVAL;
+ }
+
+ if (entry->vid >= VLAN_N_VID - 1) {
+ NL_SET_ERR_MSG(extack, "Invalid entry VLAN id");
+ return -EINVAL;
+ }
+
+ if (memcmp(&entry->addr, &zero_entry.addr, sizeof(entry->addr))) {
+ NL_SET_ERR_MSG(extack, "Entry address cannot be set");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct nla_policy mdba_del_bulk_policy[MDBA_SET_ENTRY_MAX + 1] = {
+ [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
+ rtnl_validate_mdb_entry_del_bulk,
+ sizeof(struct br_mdb_entry)),
+ [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED },
+};
+
static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
+ bool del_bulk = !!(nlh->nlmsg_flags & NLM_F_BULK);
struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1];
struct net *net = sock_net(skb->sk);
struct br_port_msg *bpm;
struct net_device *dev;
int err;
- err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb,
- MDBA_SET_ENTRY_MAX, mdba_policy, extack);
+ if (!del_bulk)
+ err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb,
+ MDBA_SET_ENTRY_MAX, mdba_policy,
+ extack);
+ else
+ err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX,
+ mdba_del_bulk_policy, extack);
if (err)
return err;
@@ -6439,6 +6490,14 @@ static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
return -EINVAL;
}
+ if (del_bulk) {
+ if (!dev->netdev_ops->ndo_mdb_del_bulk) {
+ NL_SET_ERR_MSG(extack, "Device does not support MDB bulk deletion");
+ return -EOPNOTSUPP;
+ }
+ return dev->netdev_ops->ndo_mdb_del_bulk(dev, tb, extack);
+ }
+
if (!dev->netdev_ops->ndo_mdb_del) {
NL_SET_ERR_MSG(extack, "Device does not support MDB operations");
return -EOPNOTSUPP;
@@ -6684,5 +6743,6 @@ void __init rtnetlink_init(void)
rtnl_register(PF_BRIDGE, RTM_GETMDB, rtnl_mdb_get, rtnl_mdb_dump, 0);
rtnl_register(PF_BRIDGE, RTM_NEWMDB, rtnl_mdb_add, NULL, 0);
- rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL, 0);
+ rtnl_register(PF_BRIDGE, RTM_DELMDB, rtnl_mdb_del, NULL,
+ RTNL_FLAG_BULK_DEL_SUPPORTED);
}
diff --git a/net/core/scm.c b/net/core/scm.c
index 7dc47c17d863..d0e0852a24d5 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -105,7 +105,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
if (fd < 0 || !(file = fget_raw(fd)))
return -EBADF;
/* don't allow io_uring files */
- if (io_uring_get_socket(file)) {
+ if (io_is_uring_fops(file)) {
fput(file);
return -EINVAL;
}
@@ -325,7 +325,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
}
for (i = 0; i < fdmax; i++) {
- err = receive_fd_user(scm->fp->fp[i], cmsg_data + i, o_flags);
+ err = scm_recv_one_fd(scm->fp->fp[i], cmsg_data + i, o_flags);
if (err < 0)
break;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 94cc40a6f797..edbbef563d4d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -337,7 +337,7 @@ static struct sk_buff *napi_skb_cache_get(void)
}
skb = nc->skb_cache[--nc->skb_count];
- kasan_unpoison_object_data(skbuff_cache, skb);
+ kasan_mempool_unpoison_object(skb, kmem_cache_size(skbuff_cache));
return skb;
}
@@ -890,6 +890,11 @@ static void skb_clone_fraglist(struct sk_buff *skb)
skb_get(list);
}
+static bool is_pp_page(struct page *page)
+{
+ return (page->pp_magic & ~0x3UL) == PP_SIGNATURE;
+}
+
#if IS_ENABLED(CONFIG_PAGE_POOL)
bool napi_pp_put_page(struct page *page, bool napi_safe)
{
@@ -905,7 +910,7 @@ bool napi_pp_put_page(struct page *page, bool napi_safe)
* and page_is_pfmemalloc() is checked in __page_pool_put_page()
* to avoid recycling the pfmemalloc page.
*/
- if (unlikely((page->pp_magic & ~0x3UL) != PP_SIGNATURE))
+ if (unlikely(!is_pp_page(page)))
return false;
pp = page->pp;
@@ -942,6 +947,37 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
return napi_pp_put_page(virt_to_page(data), napi_safe);
}
+/**
+ * skb_pp_frag_ref() - Increase fragment references of a page pool aware skb
+ * @skb: page pool aware skb
+ *
+ * Increase the fragment reference count (pp_ref_count) of a skb. This is
+ * intended to gain fragment references only for page pool aware skbs,
+ * i.e. when skb->pp_recycle is true, and not for fragments in a
+ * non-pp-recycling skb. It has a fallback to increase references on normal
+ * pages, as page pool aware skbs may also have normal page fragments.
+ */
+static int skb_pp_frag_ref(struct sk_buff *skb)
+{
+ struct skb_shared_info *shinfo;
+ struct page *head_page;
+ int i;
+
+ if (!skb->pp_recycle)
+ return -EINVAL;
+
+ shinfo = skb_shinfo(skb);
+
+ for (i = 0; i < shinfo->nr_frags; i++) {
+ head_page = compound_head(skb_frag_page(&shinfo->frags[i]));
+ if (likely(is_pp_page(head_page)))
+ page_pool_ref_page(head_page);
+ else
+ page_ref_inc(head_page);
+ }
+ return 0;
+}
+
static void skb_kfree_head(void *head, unsigned int end_offset)
{
if (end_offset == SKB_SMALL_HEAD_HEADROOM)
@@ -1309,13 +1345,15 @@ static void napi_skb_cache_put(struct sk_buff *skb)
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
u32 i;
- kasan_poison_object_data(skbuff_cache, skb);
+ if (!kasan_mempool_poison_object(skb))
+ return;
+
nc->skb_cache[nc->skb_count++] = skb;
if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++)
- kasan_unpoison_object_data(skbuff_cache,
- nc->skb_cache[i]);
+ kasan_mempool_unpoison_object(nc->skb_cache[i],
+ kmem_cache_size(skbuff_cache));
kmem_cache_free_bulk(skbuff_cache, NAPI_SKB_CACHE_HALF,
nc->skb_cache + NAPI_SKB_CACHE_HALF);
@@ -5767,17 +5805,12 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
return false;
/* In general, avoid mixing page_pool and non-page_pool allocated
- * pages within the same SKB. Additionally avoid dealing with clones
- * with page_pool pages, in case the SKB is using page_pool fragment
- * references (page_pool_alloc_frag()). Since we only take full page
- * references for cloned SKBs at the moment that would result in
- * inconsistent reference counts.
- * In theory we could take full references if @from is cloned and
- * !@to->pp_recycle but its tricky (due to potential race with
- * the clone disappearing) and rare, so not worth dealing with.
+ * pages within the same SKB. In theory we could take full
+ * references if @from is cloned and !@to->pp_recycle but its
+ * tricky (due to potential race with the clone disappearing) and
+ * rare, so not worth dealing with.
*/
- if (to->pp_recycle != from->pp_recycle ||
- (from->pp_recycle && skb_cloned(from)))
+ if (to->pp_recycle != from->pp_recycle)
return false;
if (len <= skb_tailroom(to)) {
@@ -5834,8 +5867,10 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
/* if the skb is not cloned this does nothing
* since we set nr_frags to 0.
*/
- for (i = 0; i < from_shinfo->nr_frags; i++)
- __skb_frag_ref(&from_shinfo->frags[i]);
+ if (skb_pp_frag_ref(from)) {
+ for (i = 0; i < from_shinfo->nr_frags; i++)
+ __skb_frag_ref(&from_shinfo->frags[i]);
+ }
to->truesize += delta;
to->len += len;
@@ -5962,6 +5997,31 @@ int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len)
}
EXPORT_SYMBOL(skb_ensure_writable);
+int skb_ensure_writable_head_tail(struct sk_buff *skb, struct net_device *dev)
+{
+ int needed_headroom = dev->needed_headroom;
+ int needed_tailroom = dev->needed_tailroom;
+
+ /* For tail taggers, we need to pad short frames ourselves, to ensure
+ * that the tail tag does not fail at its role of being at the end of
+ * the packet, once the conduit interface pads the frame. Account for
+ * that pad length here, and pad later.
+ */
+ if (unlikely(needed_tailroom && skb->len < ETH_ZLEN))
+ needed_tailroom += ETH_ZLEN - skb->len;
+ /* skb_headroom() returns unsigned int... */
+ needed_headroom = max_t(int, needed_headroom - skb_headroom(skb), 0);
+ needed_tailroom = max_t(int, needed_tailroom - skb_tailroom(skb), 0);
+
+ if (likely(!needed_headroom && !needed_tailroom && !skb_cloned(skb)))
+ /* No reallocation needed, yay! */
+ return 0;
+
+ return pskb_expand_head(skb, needed_headroom, needed_tailroom,
+ GFP_ATOMIC);
+}
+EXPORT_SYMBOL(skb_ensure_writable_head_tail);
+
/* remove VLAN header from packet and update csum accordingly.
* expects a non skb_vlan_tag_present skb with a vlan tag payload
*/
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 93ecfceac1bc..4d75ef9d24bf 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1226,8 +1226,11 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
rcu_read_lock();
psock = sk_psock(sk);
- if (psock)
- psock->saved_data_ready(sk);
+ if (psock) {
+ read_lock_bh(&sk->sk_callback_lock);
+ sk_psock_data_ready(sk, psock);
+ read_unlock_bh(&sk->sk_callback_lock);
+ }
rcu_read_unlock();
}
}
diff --git a/net/core/sock.c b/net/core/sock.c
index d02534c77413..5e78798456fd 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -107,6 +107,7 @@
#include <linux/interrupt.h>
#include <linux/poll.h>
#include <linux/tcp.h>
+#include <linux/udp.h>
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/user_namespace.h>
@@ -283,10 +284,6 @@ EXPORT_SYMBOL(sysctl_rmem_max);
__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
-/* Maximal space eaten by iovec or ancillary data plus some space */
-int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
-EXPORT_SYMBOL(sysctl_optmem_max);
-
int sysctl_tstamp_allow_data __read_mostly = 1;
DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
@@ -1191,6 +1188,17 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
*/
WRITE_ONCE(sk->sk_txrehash, (u8)val);
return 0;
+ case SO_PEEK_OFF:
+ {
+ int (*set_peek_off)(struct sock *sk, int val);
+
+ set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
+ if (set_peek_off)
+ ret = set_peek_off(sk, val);
+ else
+ ret = -EOPNOTSUPP;
+ return ret;
+ }
}
sockopt_lock_sock(sk);
@@ -1433,18 +1441,6 @@ set_sndbuf:
sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
break;
- case SO_PEEK_OFF:
- {
- int (*set_peek_off)(struct sock *sk, int val);
-
- set_peek_off = READ_ONCE(sock->ops)->set_peek_off;
- if (set_peek_off)
- ret = set_peek_off(sk, val);
- else
- ret = -EOPNOTSUPP;
- break;
- }
-
case SO_NOFCS:
sock_valbool_flag(sk, SOCK_NOFCS, valbool);
break;
@@ -2658,7 +2654,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
/* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
- READ_ONCE(sysctl_optmem_max))
+ READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
return NULL;
skb = alloc_skb(size, priority);
@@ -2676,7 +2672,7 @@ struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
*/
void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
{
- int optmem_max = READ_ONCE(sysctl_optmem_max);
+ int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
if ((unsigned int)size <= optmem_max &&
atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
@@ -4148,8 +4144,14 @@ bool sk_busy_loop_end(void *p, unsigned long start_time)
{
struct sock *sk = p;
- return !skb_queue_empty_lockless(&sk->sk_receive_queue) ||
- sk_busy_loop_timeout(sk, start_time);
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+ return true;
+
+ if (sk_is_udp(sk) &&
+ !skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
+ return true;
+
+ return sk_busy_loop_timeout(sk, start_time);
}
EXPORT_SYMBOL(sk_busy_loop_end);
#endif /* CONFIG_NET_RX_BUSY_POLL */
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 03f1edb948d7..0f0cb1465e08 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -509,13 +509,6 @@ static struct ctl_table net_core_table[] = {
.proc_handler = proc_dointvec,
},
{
- .procname = "optmem_max",
- .data = &sysctl_optmem_max,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- {
.procname = "tstamp_allow_data",
.data = &sysctl_tstamp_allow_data,
.maxlen = sizeof(int),
@@ -674,6 +667,14 @@ static struct ctl_table netns_core_table[] = {
.proc_handler = proc_dointvec_minmax
},
{
+ .procname = "optmem_max",
+ .data = &init_net.core.sysctl_optmem_max,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .extra1 = SYSCTL_ZERO,
+ .proc_handler = proc_dointvec_minmax
+ },
+ {
.procname = "txrehash",
.data = &init_net.core.sysctl_txrehash,
.maxlen = sizeof(u8),
diff --git a/net/core/xdp.c b/net/core/xdp.c
index b6f1d6dab3f2..4869c1c2d8f3 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -736,6 +736,39 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
return -EOPNOTSUPP;
}
+/**
+ * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag
+ * @ctx: XDP context pointer.
+ * @vlan_proto: Destination pointer for VLAN Tag protocol identifier (TPID).
+ * @vlan_tci: Destination pointer for VLAN TCI (VID + DEI + PCP)
+ *
+ * In case of success, ``vlan_proto`` contains *Tag protocol identifier (TPID)*,
+ * usually ``ETH_P_8021Q`` or ``ETH_P_8021AD``, but some networks can use
+ * custom TPIDs. ``vlan_proto`` is stored in **network byte order (BE)**
+ * and should be used as follows:
+ * ``if (vlan_proto == bpf_htons(ETH_P_8021Q)) do_something();``
+ *
+ * ``vlan_tci`` contains the remaining 16 bits of a VLAN tag.
+ * Driver is expected to provide those in **host byte order (usually LE)**,
+ * so the bpf program should not perform byte conversion.
+ * According to 802.1Q standard, *VLAN TCI (Tag control information)*
+ * is a bit field that contains:
+ * *VLAN identifier (VID)* that can be read with ``vlan_tci & 0xfff``,
+ * *Drop eligible indicator (DEI)* - 1 bit,
+ * *Priority code point (PCP)* - 3 bits.
+ * For detailed meaning of DEI and PCP, please refer to other sources.
+ *
+ * Return:
+ * * Returns 0 on success or ``-errno`` on error.
+ * * ``-EOPNOTSUPP`` : device driver doesn't implement kfunc
+ * * ``-ENODATA`` : VLAN tag was not stripped or is not available
+ */
+__bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx,
+ __be16 *vlan_proto, u16 *vlan_tci)
+{
+ return -EOPNOTSUPP;
+}
+
__bpf_kfunc_end_defs();
BTF_SET8_START(xdp_metadata_kfunc_ids)
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 4550b680665a..ded07e09f813 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -669,7 +669,7 @@ discard:
ipv6_pktoptions:
if (!((1 << sk->sk_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) {
if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
- np->mcast_oif = inet6_iif(opt_skb);
+ WRITE_ONCE(np->mcast_oif, inet6_iif(opt_skb));
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit);
if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
@@ -889,7 +889,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
u32 exthdrlen = icsk->icsk_ext_hdr_len;
struct sockaddr_in sin;
- SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
+ net_dbg_ratelimited("connect: ipv4 mapped\n");
if (ipv6_only_sock(sk))
return -ENETUNREACH;
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 6984877e9f10..7f0b093208d7 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -46,7 +46,7 @@ struct devlink_rel {
u32 obj_index;
devlink_rel_notify_cb_t *notify_cb;
devlink_rel_cleanup_cb_t *cleanup_cb;
- struct work_struct notify_work;
+ struct delayed_work notify_work;
} nested_in;
};
@@ -70,7 +70,7 @@ static void __devlink_rel_put(struct devlink_rel *rel)
static void devlink_rel_nested_in_notify_work(struct work_struct *work)
{
struct devlink_rel *rel = container_of(work, struct devlink_rel,
- nested_in.notify_work);
+ nested_in.notify_work.work);
struct devlink *devlink;
devlink = devlinks_xa_get(rel->nested_in.devlink_index);
@@ -96,13 +96,13 @@ rel_put:
return;
reschedule_work:
- schedule_work(&rel->nested_in.notify_work);
+ schedule_delayed_work(&rel->nested_in.notify_work, 1);
}
static void devlink_rel_nested_in_notify_work_schedule(struct devlink_rel *rel)
{
__devlink_rel_get(rel);
- schedule_work(&rel->nested_in.notify_work);
+ schedule_delayed_work(&rel->nested_in.notify_work, 0);
}
static struct devlink_rel *devlink_rel_alloc(void)
@@ -123,8 +123,8 @@ static struct devlink_rel *devlink_rel_alloc(void)
}
refcount_set(&rel->refcount, 1);
- INIT_WORK(&rel->nested_in.notify_work,
- &devlink_rel_nested_in_notify_work);
+ INIT_DELAYED_WORK(&rel->nested_in.notify_work,
+ &devlink_rel_nested_in_notify_work);
return rel;
}
@@ -503,14 +503,14 @@ static void __net_exit devlink_pernet_pre_exit(struct net *net)
* all devlink instances from this namespace into init_net.
*/
devlinks_xa_for_each_registered_get(net, index, devlink) {
- devl_lock(devlink);
+ devl_dev_lock(devlink, true);
err = 0;
if (devl_is_registered(devlink))
err = devlink_reload(devlink, &init_net,
DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
DEVLINK_RELOAD_LIMIT_UNSPEC,
&actions_performed, NULL);
- devl_unlock(devlink);
+ devl_dev_unlock(devlink, true);
devlink_put(devlink);
if (err && err != -EOPNOTSUPP)
pr_warn("Failed to reload devlink instance into init_net\n");
@@ -529,14 +529,20 @@ static int __init devlink_init(void)
{
int err;
- err = genl_register_family(&devlink_nl_family);
- if (err)
- goto out;
err = register_pernet_subsys(&devlink_pernet_ops);
if (err)
goto out;
+ err = genl_register_family(&devlink_nl_family);
+ if (err)
+ goto out_unreg_pernet_subsys;
err = register_netdevice_notifier(&devlink_port_netdevice_nb);
+ if (!err)
+ return 0;
+
+ genl_unregister_family(&devlink_nl_family);
+out_unreg_pernet_subsys:
+ unregister_pernet_subsys(&devlink_pernet_ops);
out:
WARN_ON(err);
return err;
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index 4fc7adb32663..19dbf540748a 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -4,6 +4,7 @@
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
*/
+#include <linux/device.h>
#include <net/genetlink.h>
#include <net/sock.h>
#include "devl_internal.h"
@@ -201,7 +202,10 @@ static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
int err;
WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL);
- WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
+ WARN_ON(!devl_is_registered(devlink));
+
+ if (!devlink_nl_notify_need(devlink))
+ return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -213,8 +217,7 @@ static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_notify_send(devlink, msg);
}
int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info)
@@ -424,6 +427,18 @@ static void devlink_reload_netns_change(struct devlink *devlink,
devlink_rel_nested_in_notify(devlink);
}
+static void devlink_reload_reinit_sanity_check(struct devlink *devlink)
+{
+ WARN_ON(!list_empty(&devlink->trap_policer_list));
+ WARN_ON(!list_empty(&devlink->trap_group_list));
+ WARN_ON(!list_empty(&devlink->trap_list));
+ WARN_ON(!list_empty(&devlink->dpipe_table_list));
+ WARN_ON(!list_empty(&devlink->sb_list));
+ WARN_ON(!list_empty(&devlink->rate_list));
+ WARN_ON(!list_empty(&devlink->linecard_list));
+ WARN_ON(!xa_empty(&devlink->ports));
+}
+
int devlink_reload(struct devlink *devlink, struct net *dest_net,
enum devlink_reload_action action,
enum devlink_reload_limit limit,
@@ -433,6 +448,13 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net,
struct net *curr_net;
int err;
+ /* Make sure the reload operations are invoked with the device lock
+ * held to allow drivers to trigger functionality that expects it
+ * (e.g., PCI reset) and to close possible races between these
+ * operations and probe/remove.
+ */
+ device_lock_assert(devlink->dev);
+
memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
sizeof(remote_reload_stats));
@@ -444,8 +466,10 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net,
if (dest_net && !net_eq(dest_net, curr_net))
devlink_reload_netns_change(devlink, curr_net, dest_net);
- if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT)
+ if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT) {
devlink_params_driverinit_load_new(devlink);
+ devlink_reload_reinit_sanity_check(devlink);
+ }
err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
devlink_reload_failed_set(devlink, !!err);
@@ -977,7 +1001,7 @@ static void __devlink_flash_update_notify(struct devlink *devlink,
cmd != DEVLINK_CMD_FLASH_UPDATE_END &&
cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink))
return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -988,8 +1012,7 @@ static void __devlink_flash_update_notify(struct devlink *devlink,
if (err)
goto out_free_msg;
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_notify_send(devlink, msg);
return;
out_free_msg:
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index 183dbe3807ab..c7a8e13f917c 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -3,6 +3,7 @@
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
*/
+#include <linux/device.h>
#include <linux/etherdevice.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
@@ -90,10 +91,29 @@ extern struct genl_family devlink_nl_family;
struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp);
+static inline bool __devl_is_registered(struct devlink *devlink)
+{
+ return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
+}
+
static inline bool devl_is_registered(struct devlink *devlink)
{
devl_assert_locked(devlink);
- return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
+ return __devl_is_registered(devlink);
+}
+
+static inline void devl_dev_lock(struct devlink *devlink, bool dev_lock)
+{
+ if (dev_lock)
+ device_lock(devlink->dev);
+ devl_lock(devlink);
+}
+
+static inline void devl_dev_unlock(struct devlink *devlink, bool dev_lock)
+{
+ devl_unlock(devlink);
+ if (dev_lock)
+ device_unlock(devlink->dev);
}
typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index);
@@ -111,9 +131,6 @@ int devlink_rel_devlink_handle_put(struct sk_buff *msg, struct devlink *devlink,
bool *msg_updated);
/* Netlink */
-#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
-#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
-
enum devlink_multicast_groups {
DEVLINK_MCGRP_CONFIG,
};
@@ -140,7 +157,8 @@ typedef int devlink_nl_dump_one_func_t(struct sk_buff *msg,
int flags);
struct devlink *
-devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs);
+devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs,
+ bool dev_lock);
int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb,
devlink_nl_dump_one_func_t *dump_one);
@@ -167,6 +185,58 @@ int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net,
struct devlink *devlink, int attrtype);
int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info);
+static inline bool devlink_nl_notify_need(struct devlink *devlink)
+{
+ return genl_has_listeners(&devlink_nl_family, devlink_net(devlink),
+ DEVLINK_MCGRP_CONFIG);
+}
+
+struct devlink_obj_desc {
+ struct rcu_head rcu;
+ const char *bus_name;
+ const char *dev_name;
+ unsigned int port_index;
+ bool port_index_valid;
+ long data[];
+};
+
+static inline void devlink_nl_obj_desc_init(struct devlink_obj_desc *desc,
+ struct devlink *devlink)
+{
+ memset(desc, 0, sizeof(*desc));
+ desc->bus_name = devlink->dev->bus->name;
+ desc->dev_name = dev_name(devlink->dev);
+}
+
+static inline void devlink_nl_obj_desc_port_set(struct devlink_obj_desc *desc,
+ struct devlink_port *devlink_port)
+{
+ desc->port_index = devlink_port->index;
+ desc->port_index_valid = true;
+}
+
+int devlink_nl_notify_filter(struct sock *dsk, struct sk_buff *skb, void *data);
+
+static inline void devlink_nl_notify_send_desc(struct devlink *devlink,
+ struct sk_buff *msg,
+ struct devlink_obj_desc *desc)
+{
+ genlmsg_multicast_netns_filtered(&devlink_nl_family,
+ devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG,
+ GFP_KERNEL,
+ devlink_nl_notify_filter, desc);
+}
+
+static inline void devlink_nl_notify_send(struct devlink *devlink,
+ struct sk_buff *msg)
+{
+ struct devlink_obj_desc desc;
+
+ devlink_nl_obj_desc_init(&desc, devlink);
+ devlink_nl_notify_send_desc(devlink, msg, &desc);
+}
+
/* Notify */
void devlink_notify_register(struct devlink *devlink);
void devlink_notify_unregister(struct devlink *devlink);
diff --git a/net/devlink/health.c b/net/devlink/health.c
index 695df61f8ac2..acb8c0e174bb 100644
--- a/net/devlink/health.c
+++ b/net/devlink/health.c
@@ -490,12 +490,16 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter,
enum devlink_command cmd)
{
struct devlink *devlink = reporter->devlink;
+ struct devlink_obj_desc desc;
struct sk_buff *msg;
int err;
WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
ASSERT_DEVLINK_REGISTERED(devlink);
+ if (!devlink_nl_notify_need(devlink))
+ return;
+
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return;
@@ -506,8 +510,10 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
- 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_obj_desc_init(&desc, devlink);
+ if (reporter->devlink_port)
+ devlink_nl_obj_desc_port_set(&desc, reporter->devlink_port);
+ devlink_nl_notify_send_desc(devlink, msg, &desc);
}
void
@@ -1151,7 +1157,8 @@ devlink_health_reporter_get_from_cb_lock(struct netlink_callback *cb)
struct nlattr **attrs = info->attrs;
struct devlink *devlink;
- devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
+ devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs,
+ false);
if (IS_ERR(devlink))
return NULL;
diff --git a/net/devlink/linecard.c b/net/devlink/linecard.c
index 2f1c317b64cd..67f70a621d27 100644
--- a/net/devlink/linecard.c
+++ b/net/devlink/linecard.c
@@ -136,7 +136,7 @@ static void devlink_linecard_notify(struct devlink_linecard *linecard,
WARN_ON(cmd != DEVLINK_CMD_LINECARD_NEW &&
cmd != DEVLINK_CMD_LINECARD_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ if (!__devl_is_registered(devlink) || !devlink_nl_notify_need(devlink))
return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -150,8 +150,7 @@ static void devlink_linecard_notify(struct devlink_linecard *linecard,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_notify_send(devlink, msg);
}
void devlink_linecards_notify_register(struct devlink *devlink)
diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c
index d0b90ebc8b15..499885c8b9ca 100644
--- a/net/devlink/netlink.c
+++ b/net/devlink/netlink.c
@@ -9,10 +9,127 @@
#include "devl_internal.h"
+#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
+#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
+#define DEVLINK_NL_FLAG_NEED_DEV_LOCK BIT(2)
+
static const struct genl_multicast_group devlink_nl_mcgrps[] = {
[DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME },
};
+struct devlink_nl_sock_priv {
+ struct devlink_obj_desc __rcu *flt;
+ spinlock_t flt_lock; /* Protects flt. */
+};
+
+static void devlink_nl_sock_priv_init(void *priv)
+{
+ struct devlink_nl_sock_priv *sk_priv = priv;
+
+ spin_lock_init(&sk_priv->flt_lock);
+}
+
+static void devlink_nl_sock_priv_destroy(void *priv)
+{
+ struct devlink_nl_sock_priv *sk_priv = priv;
+ struct devlink_obj_desc *flt;
+
+ flt = rcu_dereference_protected(sk_priv->flt, true);
+ kfree_rcu(flt, rcu);
+}
+
+int devlink_nl_notify_filter_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink_nl_sock_priv *sk_priv;
+ struct nlattr **attrs = info->attrs;
+ struct devlink_obj_desc *flt;
+ size_t data_offset = 0;
+ size_t data_size = 0;
+ char *pos;
+
+ if (attrs[DEVLINK_ATTR_BUS_NAME])
+ data_size = size_add(data_size,
+ nla_len(attrs[DEVLINK_ATTR_BUS_NAME]) + 1);
+ if (attrs[DEVLINK_ATTR_DEV_NAME])
+ data_size = size_add(data_size,
+ nla_len(attrs[DEVLINK_ATTR_DEV_NAME]) + 1);
+
+ flt = kzalloc(size_add(sizeof(*flt), data_size), GFP_KERNEL);
+ if (!flt)
+ return -ENOMEM;
+
+ pos = (char *) flt->data;
+ if (attrs[DEVLINK_ATTR_BUS_NAME]) {
+ data_offset += nla_strscpy(pos,
+ attrs[DEVLINK_ATTR_BUS_NAME],
+ data_size) + 1;
+ flt->bus_name = pos;
+ pos += data_offset;
+ }
+ if (attrs[DEVLINK_ATTR_DEV_NAME]) {
+ nla_strscpy(pos, attrs[DEVLINK_ATTR_DEV_NAME],
+ data_size - data_offset);
+ flt->dev_name = pos;
+ }
+
+ if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
+ flt->port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
+ flt->port_index_valid = true;
+ }
+
+ /* Don't attach empty filter. */
+ if (!flt->bus_name && !flt->dev_name && !flt->port_index_valid) {
+ kfree(flt);
+ flt = NULL;
+ }
+
+ sk_priv = genl_sk_priv_get(&devlink_nl_family, NETLINK_CB(skb).sk);
+ if (IS_ERR(sk_priv)) {
+ kfree(flt);
+ return PTR_ERR(sk_priv);
+ }
+ spin_lock(&sk_priv->flt_lock);
+ flt = rcu_replace_pointer(sk_priv->flt, flt,
+ lockdep_is_held(&sk_priv->flt_lock));
+ spin_unlock(&sk_priv->flt_lock);
+ kfree_rcu(flt, rcu);
+ return 0;
+}
+
+static bool devlink_obj_desc_match(const struct devlink_obj_desc *desc,
+ const struct devlink_obj_desc *flt)
+{
+ if (desc->bus_name && flt->bus_name &&
+ strcmp(desc->bus_name, flt->bus_name))
+ return false;
+ if (desc->dev_name && flt->dev_name &&
+ strcmp(desc->dev_name, flt->dev_name))
+ return false;
+ if (desc->port_index_valid && flt->port_index_valid &&
+ desc->port_index != flt->port_index)
+ return false;
+ return true;
+}
+
+int devlink_nl_notify_filter(struct sock *dsk, struct sk_buff *skb, void *data)
+{
+ struct devlink_obj_desc *desc = data;
+ struct devlink_nl_sock_priv *sk_priv;
+ struct devlink_obj_desc *flt;
+ int ret = 0;
+
+ rcu_read_lock();
+ sk_priv = __genl_sk_priv_get(&devlink_nl_family, dsk);
+ if (!IS_ERR_OR_NULL(sk_priv)) {
+ flt = rcu_dereference(sk_priv->flt);
+ if (flt)
+ ret = !devlink_obj_desc_match(desc, flt);
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net,
struct devlink *devlink, int attrtype)
{
@@ -61,7 +178,8 @@ int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info)
}
struct devlink *
-devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
+devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs,
+ bool dev_lock)
{
struct devlink *devlink;
unsigned long index;
@@ -75,12 +193,12 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
devlinks_xa_for_each_registered_get(net, index, devlink) {
- devl_lock(devlink);
+ devl_dev_lock(devlink, dev_lock);
if (devl_is_registered(devlink) &&
strcmp(devlink->dev->bus->name, busname) == 0 &&
strcmp(dev_name(devlink->dev), devname) == 0)
return devlink;
- devl_unlock(devlink);
+ devl_dev_unlock(devlink, dev_lock);
devlink_put(devlink);
}
@@ -90,11 +208,13 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info,
u8 flags)
{
+ bool dev_lock = flags & DEVLINK_NL_FLAG_NEED_DEV_LOCK;
struct devlink_port *devlink_port;
struct devlink *devlink;
int err;
- devlink = devlink_get_from_attrs_lock(genl_info_net(info), info->attrs);
+ devlink = devlink_get_from_attrs_lock(genl_info_net(info), info->attrs,
+ dev_lock);
if (IS_ERR(devlink))
return PTR_ERR(devlink);
@@ -114,7 +234,7 @@ static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info,
return 0;
unlock:
- devl_unlock(devlink);
+ devl_dev_unlock(devlink, dev_lock);
devlink_put(devlink);
return err;
}
@@ -131,6 +251,12 @@ int devlink_nl_pre_doit_port(const struct genl_split_ops *ops,
return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_PORT);
}
+int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEV_LOCK);
+}
+
int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
struct sk_buff *skb,
struct genl_info *info)
@@ -138,16 +264,30 @@ int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT);
}
-void devlink_nl_post_doit(const struct genl_split_ops *ops,
- struct sk_buff *skb, struct genl_info *info)
+static void __devlink_nl_post_doit(struct sk_buff *skb, struct genl_info *info,
+ u8 flags)
{
+ bool dev_lock = flags & DEVLINK_NL_FLAG_NEED_DEV_LOCK;
struct devlink *devlink;
devlink = info->user_ptr[0];
- devl_unlock(devlink);
+ devl_dev_unlock(devlink, dev_lock);
devlink_put(devlink);
}
+void devlink_nl_post_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ __devlink_nl_post_doit(skb, info, 0);
+}
+
+void
+devlink_nl_post_doit_dev_lock(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ __devlink_nl_post_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEV_LOCK);
+}
+
static int devlink_nl_inst_single_dumpit(struct sk_buff *msg,
struct netlink_callback *cb, int flags,
devlink_nl_dump_one_func_t *dump_one,
@@ -156,7 +296,7 @@ static int devlink_nl_inst_single_dumpit(struct sk_buff *msg,
struct devlink *devlink;
int err;
- devlink = devlink_get_from_attrs_lock(sock_net(msg->sk), attrs);
+ devlink = devlink_get_from_attrs_lock(sock_net(msg->sk), attrs, false);
if (IS_ERR(devlink))
return PTR_ERR(devlink);
err = dump_one(msg, devlink, cb, flags | NLM_F_DUMP_FILTERED);
@@ -229,4 +369,7 @@ struct genl_family devlink_nl_family __ro_after_init = {
.resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1,
.mcgrps = devlink_nl_mcgrps,
.n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps),
+ .sock_priv_size = sizeof(struct devlink_nl_sock_priv),
+ .sock_priv_init = devlink_nl_sock_priv_init,
+ .sock_priv_destroy = devlink_nl_sock_priv_destroy,
};
diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c
index 788dfdc498a9..c81cf2dd154f 100644
--- a/net/devlink/netlink_gen.c
+++ b/net/devlink/netlink_gen.c
@@ -560,8 +560,15 @@ static const struct nla_policy devlink_selftests_run_nl_policy[DEVLINK_ATTR_SELF
[DEVLINK_ATTR_SELFTESTS] = NLA_POLICY_NESTED(devlink_dl_selftest_id_nl_policy),
};
+/* DEVLINK_CMD_NOTIFY_FILTER_SET - do */
+static const struct nla_policy devlink_notify_filter_set_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = {
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, },
+};
+
/* Ops table for devlink */
-const struct genl_split_ops devlink_nl_ops[73] = {
+const struct genl_split_ops devlink_nl_ops[74] = {
{
.cmd = DEVLINK_CMD_GET,
.validate = GENL_DONT_VALIDATE_STRICT,
@@ -846,9 +853,9 @@ const struct genl_split_ops devlink_nl_ops[73] = {
{
.cmd = DEVLINK_CMD_RELOAD,
.validate = GENL_DONT_VALIDATE_STRICT,
- .pre_doit = devlink_nl_pre_doit,
+ .pre_doit = devlink_nl_pre_doit_dev_lock,
.doit = devlink_nl_reload_doit,
- .post_doit = devlink_nl_post_doit,
+ .post_doit = devlink_nl_post_doit_dev_lock,
.policy = devlink_reload_nl_policy,
.maxattr = DEVLINK_ATTR_RELOAD_LIMITS,
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
@@ -1233,4 +1240,11 @@ const struct genl_split_ops devlink_nl_ops[73] = {
.maxattr = DEVLINK_ATTR_SELFTESTS,
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
},
+ {
+ .cmd = DEVLINK_CMD_NOTIFY_FILTER_SET,
+ .doit = devlink_nl_notify_filter_set_doit,
+ .policy = devlink_notify_filter_set_nl_policy,
+ .maxattr = DEVLINK_ATTR_PORT_INDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
};
diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h
index 0e9e89c31c31..8f2bd50ddf5e 100644
--- a/net/devlink/netlink_gen.h
+++ b/net/devlink/netlink_gen.h
@@ -16,18 +16,23 @@ extern const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_F
extern const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1];
/* Ops table for devlink */
-extern const struct genl_split_ops devlink_nl_ops[73];
+extern const struct genl_split_ops devlink_nl_ops[74];
int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
struct genl_info *info);
int devlink_nl_pre_doit_port(const struct genl_split_ops *ops,
struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info);
int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops,
struct sk_buff *skb,
struct genl_info *info);
void
devlink_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb,
struct genl_info *info);
+void
+devlink_nl_post_doit_dev_lock(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info);
int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
@@ -137,5 +142,7 @@ int devlink_nl_selftests_get_doit(struct sk_buff *skb, struct genl_info *info);
int devlink_nl_selftests_get_dumpit(struct sk_buff *skb,
struct netlink_callback *cb);
int devlink_nl_selftests_run_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_notify_filter_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
#endif /* _LINUX_DEVLINK_GEN_H */
diff --git a/net/devlink/param.c b/net/devlink/param.c
index d74df09311a9..22bc3b500518 100644
--- a/net/devlink/param.c
+++ b/net/devlink/param.c
@@ -343,7 +343,7 @@ static void devlink_param_notify(struct devlink *devlink,
* will replay the notifications if the params are added/removed
* outside of the lifetime of the instance.
*/
- if (!devl_is_registered(devlink))
+ if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink))
return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -356,8 +356,7 @@ static void devlink_param_notify(struct devlink *devlink,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_notify_send(devlink, msg);
}
static void devlink_params_notify(struct devlink *devlink,
diff --git a/net/devlink/port.c b/net/devlink/port.c
index 7634f187fa50..4b2d46ccfe48 100644
--- a/net/devlink/port.c
+++ b/net/devlink/port.c
@@ -507,12 +507,13 @@ static void devlink_port_notify(struct devlink_port *devlink_port,
enum devlink_command cmd)
{
struct devlink *devlink = devlink_port->devlink;
+ struct devlink_obj_desc desc;
struct sk_buff *msg;
int err;
WARN_ON(cmd != DEVLINK_CMD_PORT_NEW && cmd != DEVLINK_CMD_PORT_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ if (!__devl_is_registered(devlink) || !devlink_nl_notify_need(devlink))
return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -525,8 +526,9 @@ static void devlink_port_notify(struct devlink_port *devlink_port,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
- 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_obj_desc_init(&desc, devlink);
+ devlink_nl_obj_desc_port_set(&desc, devlink_port);
+ devlink_nl_notify_send_desc(devlink, msg, &desc);
}
static void devlink_ports_notify(struct devlink *devlink,
@@ -581,7 +583,7 @@ devlink_nl_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) {
err = devlink_nl_port_fill(msg, devlink_port,
- DEVLINK_CMD_NEW,
+ DEVLINK_CMD_PORT_NEW,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, flags,
cb->extack);
@@ -672,7 +674,7 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port,
return -EOPNOTSUPP;
}
if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) {
- NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
+ NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FN_ATTR_STATE],
"Function does not support state setting");
return -EOPNOTSUPP;
}
diff --git a/net/devlink/rate.c b/net/devlink/rate.c
index 94b289b93ff2..7139e67e93ae 100644
--- a/net/devlink/rate.c
+++ b/net/devlink/rate.c
@@ -146,7 +146,7 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate,
WARN_ON(cmd != DEVLINK_CMD_RATE_NEW && cmd != DEVLINK_CMD_RATE_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink))
return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -159,8 +159,7 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
- 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_notify_send(devlink, msg);
}
void devlink_rates_notify_register(struct devlink *devlink)
diff --git a/net/devlink/region.c b/net/devlink/region.c
index 0aab7b82d678..7319127c5913 100644
--- a/net/devlink/region.c
+++ b/net/devlink/region.c
@@ -234,15 +234,15 @@ static void devlink_nl_region_notify(struct devlink_region *region,
struct sk_buff *msg;
WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+
+ if (!__devl_is_registered(devlink) || !devlink_nl_notify_need(devlink))
return;
msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0);
if (IS_ERR(msg))
return;
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
- 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_notify_send(devlink, msg);
}
void devlink_regions_notify_register(struct devlink *devlink)
@@ -883,7 +883,8 @@ int devlink_nl_region_read_dumpit(struct sk_buff *skb,
start_offset = state->start_offset;
- devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
+ devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs,
+ false);
if (IS_ERR(devlink))
return PTR_ERR(devlink);
diff --git a/net/devlink/trap.c b/net/devlink/trap.c
index c26313e7ca08..5d18c7424df1 100644
--- a/net/devlink/trap.c
+++ b/net/devlink/trap.c
@@ -1173,7 +1173,8 @@ devlink_trap_group_notify(struct devlink *devlink,
WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW &&
cmd != DEVLINK_CMD_TRAP_GROUP_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+
+ if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink))
return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -1187,8 +1188,7 @@ devlink_trap_group_notify(struct devlink *devlink,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_notify_send(devlink, msg);
}
void devlink_trap_groups_notify_register(struct devlink *devlink)
@@ -1234,7 +1234,8 @@ static void devlink_trap_notify(struct devlink *devlink,
WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW &&
cmd != DEVLINK_CMD_TRAP_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+
+ if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink))
return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -1247,8 +1248,7 @@ static void devlink_trap_notify(struct devlink *devlink,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_notify_send(devlink, msg);
}
void devlink_traps_notify_register(struct devlink *devlink)
@@ -1710,7 +1710,8 @@ devlink_trap_policer_notify(struct devlink *devlink,
WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_POLICER_NEW &&
cmd != DEVLINK_CMD_TRAP_POLICER_DEL);
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+
+ if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink))
return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
@@ -1724,8 +1725,7 @@ devlink_trap_policer_notify(struct devlink *devlink,
return;
}
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ devlink_nl_notify_send(devlink, msg);
}
void devlink_trap_policers_notify_register(struct devlink *devlink)
diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig
index 155b06163409..7c2dba273e35 100644
--- a/net/dns_resolver/Kconfig
+++ b/net/dns_resolver/Kconfig
@@ -23,6 +23,6 @@ config DNS_RESOLVER
information.
To compile this as a module, choose M here: the module will be called
- dnsresolver.
+ dns_resolver.
If unsure, say N.
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index f18ca02aa95a..c42ddd85ff1f 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -104,7 +104,7 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
const struct dns_server_list_v1_header *v1;
/* It may be a server list. */
- if (datalen <= sizeof(*v1))
+ if (datalen < sizeof(*v1))
return -EINVAL;
v1 = (const struct dns_server_list_v1_header *)data;
diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c
index 92ce67b93a58..cbb588ca73aa 100644
--- a/net/dsa/tag_ar9331.c
+++ b/net/dsa/tag_ar9331.c
@@ -89,6 +89,7 @@ static const struct dsa_device_ops ar9331_netdev_ops = {
.needed_headroom = AR9331_HDR_LEN,
};
+MODULE_DESCRIPTION("DSA tag driver for Atheros AR9331 SoC with built-in switch");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_AR9331, AR9331_NAME);
module_dsa_tag_driver(ar9331_netdev_ops);
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 83d283a5d27e..8c3c068728e5 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -335,4 +335,5 @@ static struct dsa_tag_driver *dsa_tag_driver_array[] = {
module_dsa_tag_drivers(dsa_tag_driver_array);
+MODULE_DESCRIPTION("DSA tag driver for Broadcom switches using in-frame headers");
MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 8ed52dd663ab..2a2c4fb61a65 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -406,4 +406,5 @@ static struct dsa_tag_driver *dsa_tag_drivers[] = {
module_dsa_tag_drivers(dsa_tag_drivers);
+MODULE_DESCRIPTION("DSA tag driver for Marvell switches using DSA headers");
MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c
index 3539141b5350..51a1f46a567f 100644
--- a/net/dsa/tag_gswip.c
+++ b/net/dsa/tag_gswip.c
@@ -107,6 +107,7 @@ static const struct dsa_device_ops gswip_netdev_ops = {
.needed_headroom = GSWIP_RX_HEADER_LEN,
};
+MODULE_DESCRIPTION("DSA tag driver for Lantiq / Intel GSWIP switches");
MODULE_LICENSE("GPL");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_GSWIP, GSWIP_NAME);
diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c
index 6e233cd0aa38..663b25785d95 100644
--- a/net/dsa/tag_hellcreek.c
+++ b/net/dsa/tag_hellcreek.c
@@ -67,6 +67,7 @@ static const struct dsa_device_ops hellcreek_netdev_ops = {
.needed_tailroom = HELLCREEK_TAG_LEN,
};
+MODULE_DESCRIPTION("DSA tag driver for Hirschmann Hellcreek TSN switches");
MODULE_LICENSE("Dual MIT/GPL");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_HELLCREEK, HELLCREEK_NAME);
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 9be341fa88f0..ee7b272ab715 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -459,4 +459,5 @@ static struct dsa_tag_driver *dsa_tag_driver_array[] = {
module_dsa_tag_drivers(dsa_tag_driver_array);
+MODULE_DESCRIPTION("DSA tag driver for Microchip 8795/937x/9477/9893 families of switches");
MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c
index 1ed8ee24855d..258e5d7dc5ef 100644
--- a/net/dsa/tag_lan9303.c
+++ b/net/dsa/tag_lan9303.c
@@ -119,6 +119,7 @@ static const struct dsa_device_ops lan9303_netdev_ops = {
.needed_headroom = LAN9303_TAG_LEN,
};
+MODULE_DESCRIPTION("DSA tag driver for SMSC/Microchip LAN9303 family of switches");
MODULE_LICENSE("GPL");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_LAN9303, LAN9303_NAME);
diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c
index 2483785f6ab1..b670e3c53e91 100644
--- a/net/dsa/tag_mtk.c
+++ b/net/dsa/tag_mtk.c
@@ -102,6 +102,7 @@ static const struct dsa_device_ops mtk_netdev_ops = {
.needed_headroom = MTK_HDR_LEN,
};
+MODULE_DESCRIPTION("DSA tag driver for Mediatek switches");
MODULE_LICENSE("GPL");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_MTK, MTK_NAME);
diff --git a/net/dsa/tag_none.c b/net/dsa/tag_none.c
index 9a473624db50..e9c9670a9c44 100644
--- a/net/dsa/tag_none.c
+++ b/net/dsa/tag_none.c
@@ -27,4 +27,5 @@ static const struct dsa_device_ops none_ops = {
module_dsa_tag_driver(none_ops);
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_NONE, NONE_NAME);
+MODULE_DESCRIPTION("DSA no-op tag driver");
MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index ef2f8fffb2c7..e0e4300bfbd3 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -217,4 +217,5 @@ static struct dsa_tag_driver *ocelot_tag_driver_array[] = {
module_dsa_tag_drivers(ocelot_tag_driver_array);
+MODULE_DESCRIPTION("DSA tag driver for Ocelot family of switches, using NPI port");
MODULE_LICENSE("GPL v2");
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 210039320888..b059381310fe 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -133,6 +133,7 @@ static const struct dsa_device_ops ocelot_8021q_netdev_ops = {
.promisc_on_conduit = true,
};
+MODULE_DESCRIPTION("DSA tag driver for Ocelot family of switches, using VLAN");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_OCELOT_8021Q, OCELOT_8021Q_NAME);
diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c
index 6514aa7993ce..0cf61286b426 100644
--- a/net/dsa/tag_qca.c
+++ b/net/dsa/tag_qca.c
@@ -119,6 +119,7 @@ static const struct dsa_device_ops qca_netdev_ops = {
.promisc_on_conduit = true,
};
+MODULE_DESCRIPTION("DSA tag driver for Qualcomm Atheros QCA8K switches");
MODULE_LICENSE("GPL");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_QCA, QCA_NAME);
diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c
index 4da5bad1a7aa..feaefa0e179b 100644
--- a/net/dsa/tag_rtl4_a.c
+++ b/net/dsa/tag_rtl4_a.c
@@ -23,7 +23,6 @@
#define RTL4_A_NAME "rtl4a"
#define RTL4_A_HDR_LEN 4
-#define RTL4_A_ETHERTYPE 0x8899
#define RTL4_A_PROTOCOL_SHIFT 12
/*
* 0x1 = Realtek Remote Control protocol (RRCP)
@@ -54,7 +53,7 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
/* Set Ethertype */
p = (__be16 *)tag;
- *p = htons(RTL4_A_ETHERTYPE);
+ *p = htons(ETH_P_REALTEK);
out = (RTL4_A_PROTOCOL_RTL8366RB << RTL4_A_PROTOCOL_SHIFT);
/* The lower bits indicate the port number */
@@ -82,7 +81,7 @@ static struct sk_buff *rtl4a_tag_rcv(struct sk_buff *skb,
tag = dsa_etype_header_pos_rx(skb);
p = (__be16 *)tag;
etype = ntohs(*p);
- if (etype != RTL4_A_ETHERTYPE) {
+ if (etype != ETH_P_REALTEK) {
/* Not custom, just pass through */
netdev_dbg(dev, "non-realtek ethertype 0x%04x\n", etype);
return skb;
@@ -122,5 +121,6 @@ static const struct dsa_device_ops rtl4a_netdev_ops = {
};
module_dsa_tag_driver(rtl4a_netdev_ops);
+MODULE_DESCRIPTION("DSA tag driver for Realtek 4 byte protocol A tags");
MODULE_LICENSE("GPL");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_RTL4_A, RTL4_A_NAME);
diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c
index 07e857debabf..15c2bae2b429 100644
--- a/net/dsa/tag_rtl8_4.c
+++ b/net/dsa/tag_rtl8_4.c
@@ -258,4 +258,5 @@ static struct dsa_tag_driver *dsa_tag_drivers[] = {
};
module_dsa_tag_drivers(dsa_tag_drivers);
+MODULE_DESCRIPTION("DSA tag driver for Realtek 8 byte protocol 4 tags");
MODULE_LICENSE("GPL");
diff --git a/net/dsa/tag_rzn1_a5psw.c b/net/dsa/tag_rzn1_a5psw.c
index 2ce866b45615..69d51221b1e5 100644
--- a/net/dsa/tag_rzn1_a5psw.c
+++ b/net/dsa/tag_rzn1_a5psw.c
@@ -110,6 +110,7 @@ static const struct dsa_device_ops a5psw_netdev_ops = {
.needed_headroom = A5PSW_TAG_LEN,
};
+MODULE_DESCRIPTION("DSA tag driver for Renesas RZ/N1 A5PSW switch");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_A5PSW, A5PSW_NAME);
module_dsa_tag_driver(a5psw_netdev_ops);
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 1fffe8c2b589..2717e9d7b612 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -806,4 +806,5 @@ static struct dsa_tag_driver *sja1105_tag_driver_array[] = {
module_dsa_tag_drivers(sja1105_tag_driver_array);
+MODULE_DESCRIPTION("DSA tag driver for NXP SJA1105 switches");
MODULE_LICENSE("GPL v2");
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 1ebb25a8b140..22742a53d6f4 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -59,6 +59,7 @@ static const struct dsa_device_ops trailer_netdev_ops = {
.needed_tailroom = 4,
};
+MODULE_DESCRIPTION("DSA tag driver for switches using a trailer tag");
MODULE_LICENSE("GPL");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_TRAILER, TRAILER_NAME);
diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c
index c9c163598ef2..68d4633ddd5e 100644
--- a/net/dsa/tag_xrs700x.c
+++ b/net/dsa/tag_xrs700x.c
@@ -60,6 +60,7 @@ static const struct dsa_device_ops xrs700x_netdev_ops = {
.needed_tailroom = 1,
};
+MODULE_DESCRIPTION("DSA tag driver for XRS700x switches");
MODULE_LICENSE("GPL");
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_XRS700X, XRS700X_NAME);
diff --git a/net/dsa/user.c b/net/dsa/user.c
index d438884a4eb0..b15e71cc342c 100644
--- a/net/dsa/user.c
+++ b/net/dsa/user.c
@@ -920,30 +920,6 @@ netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev)
}
EXPORT_SYMBOL_GPL(dsa_enqueue_skb);
-static int dsa_realloc_skb(struct sk_buff *skb, struct net_device *dev)
-{
- int needed_headroom = dev->needed_headroom;
- int needed_tailroom = dev->needed_tailroom;
-
- /* For tail taggers, we need to pad short frames ourselves, to ensure
- * that the tail tag does not fail at its role of being at the end of
- * the packet, once the conduit interface pads the frame. Account for
- * that pad length here, and pad later.
- */
- if (unlikely(needed_tailroom && skb->len < ETH_ZLEN))
- needed_tailroom += ETH_ZLEN - skb->len;
- /* skb_headroom() returns unsigned int... */
- needed_headroom = max_t(int, needed_headroom - skb_headroom(skb), 0);
- needed_tailroom = max_t(int, needed_tailroom - skb_tailroom(skb), 0);
-
- if (likely(!needed_headroom && !needed_tailroom && !skb_cloned(skb)))
- /* No reallocation needed, yay! */
- return 0;
-
- return pskb_expand_head(skb, needed_headroom, needed_tailroom,
- GFP_ATOMIC);
-}
-
static netdev_tx_t dsa_user_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_user_priv *p = netdev_priv(dev);
@@ -956,13 +932,14 @@ static netdev_tx_t dsa_user_xmit(struct sk_buff *skb, struct net_device *dev)
/* Handle tx timestamp if any */
dsa_skb_tx_timestamp(p, skb);
- if (dsa_realloc_skb(skb, dev)) {
+ if (skb_ensure_writable_head_tail(skb, dev)) {
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
/* needed_tailroom should still be 'warm' in the cache line from
- * dsa_realloc_skb(), which has also ensured that padding is safe.
+ * skb_ensure_writable_head_tail(), which has also ensured that
+ * padding is safe.
*/
if (dev->needed_tailroom)
eth_skb_pad(skb);
@@ -2829,13 +2806,14 @@ EXPORT_SYMBOL_GPL(dsa_user_dev_check);
static int dsa_user_changeupper(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
{
- struct dsa_port *dp = dsa_user_to_port(dev);
struct netlink_ext_ack *extack;
int err = NOTIFY_DONE;
+ struct dsa_port *dp;
if (!dsa_user_dev_check(dev))
return err;
+ dp = dsa_user_to_port(dev);
extack = netdev_notifier_info_to_extack(&info->info);
if (netif_is_bridge_master(info->upper_dev)) {
@@ -2888,11 +2866,13 @@ static int dsa_user_changeupper(struct net_device *dev,
static int dsa_user_prechangeupper(struct net_device *dev,
struct netdev_notifier_changeupper_info *info)
{
- struct dsa_port *dp = dsa_user_to_port(dev);
+ struct dsa_port *dp;
if (!dsa_user_dev_check(dev))
return NOTIFY_DONE;
+ dp = dsa_user_to_port(dev);
+
if (netif_is_bridge_master(info->upper_dev) && !info->linking)
dsa_port_pre_bridge_leave(dp, info->upper_dev);
else if (netif_is_lag_master(info->upper_dev) && !info->linking)
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index b4419fb6df6a..6b2a360dcdf0 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -589,8 +589,8 @@ err_free_info:
int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max)
{
+ struct ethtool_rxfh_param rxfh = {};
u32 dev_size, current_max = 0;
- u32 *indir;
int ret;
if (!dev->ethtool_ops->get_rxfh_indir_size ||
@@ -600,21 +600,21 @@ int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max)
if (dev_size == 0)
return -EOPNOTSUPP;
- indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
- if (!indir)
+ rxfh.indir = kcalloc(dev_size, sizeof(rxfh.indir[0]), GFP_USER);
+ if (!rxfh.indir)
return -ENOMEM;
- ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL);
+ ret = dev->ethtool_ops->get_rxfh(dev, &rxfh);
if (ret)
goto out;
while (dev_size--)
- current_max = max(current_max, indir[dev_size]);
+ current_max = max(current_max, rxfh.indir[dev_size]);
*max = current_max;
out:
- kfree(indir);
+ kfree(rxfh.indir);
return ret;
}
@@ -661,6 +661,12 @@ int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index)
}
EXPORT_SYMBOL(ethtool_get_phc_vclocks);
+int ethtool_get_ts_info_by_layer(struct net_device *dev, struct ethtool_ts_info *info)
+{
+ return __ethtool_get_ts_info(dev, info);
+}
+EXPORT_SYMBOL(ethtool_get_ts_info_by_layer);
+
const struct ethtool_phy_ops *ethtool_phy_ops;
void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops)
diff --git a/net/ethtool/features.c b/net/ethtool/features.c
index a79af8c25a07..b6cb101d7f19 100644
--- a/net/ethtool/features.c
+++ b/net/ethtool/features.c
@@ -234,17 +234,20 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info)
dev = req_info.dev;
rtnl_lock();
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto out_rtnl;
ethnl_features_to_bitmap(old_active, dev->features);
ethnl_features_to_bitmap(old_wanted, dev->wanted_features);
ret = ethnl_parse_bitset(req_wanted, req_mask, NETDEV_FEATURE_COUNT,
tb[ETHTOOL_A_FEATURES_WANTED],
netdev_features_strings, info->extack);
if (ret < 0)
- goto out_rtnl;
+ goto out_ops;
if (ethnl_bitmap_to_features(req_mask) & ~NETIF_F_ETHTOOL_BITS) {
GENL_SET_ERR_MSG(info, "attempt to change non-ethtool features");
ret = -EINVAL;
- goto out_rtnl;
+ goto out_ops;
}
/* set req_wanted bits not in req_mask from old_wanted */
@@ -281,6 +284,8 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info)
if (mod)
netdev_features_change(dev);
+out_ops:
+ ethnl_ops_complete(dev);
out_rtnl:
rtnl_unlock();
ethnl_parse_header_dev_put(&req_info);
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 0b0ce4f81c01..7519b0818b91 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -58,6 +58,9 @@ static struct devlink *netdev_to_devlink_get(struct net_device *dev)
u32 ethtool_op_get_link(struct net_device *dev)
{
+ /* Synchronize carrier state with link watch, see also rtnl_getlink() */
+ linkwatch_sync_dev(dev);
+
return netif_carrier_ok(dev) ? 1 : 0;
}
EXPORT_SYMBOL(ethtool_op_get_link);
@@ -969,18 +972,38 @@ static int ethtool_rxnfc_copy_to_user(void __user *useraddr,
static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
u32 cmd, void __user *useraddr)
{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
struct ethtool_rxnfc info;
size_t info_size = sizeof(info);
int rc;
- if (!dev->ethtool_ops->set_rxnfc)
+ if (!ops->set_rxnfc)
return -EOPNOTSUPP;
rc = ethtool_rxnfc_copy_struct(cmd, &info, &info_size, useraddr);
if (rc)
return rc;
- rc = dev->ethtool_ops->set_rxnfc(dev, &info);
+ if (ops->get_rxfh) {
+ struct ethtool_rxfh_param rxfh = {};
+
+ rc = ops->get_rxfh(dev, &rxfh);
+ if (rc)
+ return rc;
+
+ /* Sanity check: if symmetric-xor is set, then:
+ * 1 - no other fields besides IP src/dst and/or L4 src/dst
+ * 2 - If src is set, dst must also be set
+ */
+ if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) &&
+ ((info.data & ~(RXH_IP_SRC | RXH_IP_DST |
+ RXH_L4_B_0_1 | RXH_L4_B_2_3)) ||
+ (!!(info.data & RXH_IP_SRC) ^ !!(info.data & RXH_IP_DST)) ||
+ (!!(info.data & RXH_L4_B_0_1) ^ !!(info.data & RXH_L4_B_2_3))))
+ return -EINVAL;
+ }
+
+ rc = ops->set_rxnfc(dev, &info);
if (rc)
return rc;
@@ -1058,15 +1081,15 @@ EXPORT_SYMBOL(netdev_rss_key_fill);
static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
void __user *useraddr)
{
- u32 user_size, dev_size;
- u32 *indir;
+ struct ethtool_rxfh_param rxfh = {};
+ u32 user_size;
int ret;
if (!dev->ethtool_ops->get_rxfh_indir_size ||
!dev->ethtool_ops->get_rxfh)
return -EOPNOTSUPP;
- dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
- if (dev_size == 0)
+ rxfh.indir_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
+ if (rxfh.indir_size == 0)
return -EOPNOTSUPP;
if (copy_from_user(&user_size,
@@ -1075,41 +1098,41 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
return -EFAULT;
if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh_indir, size),
- &dev_size, sizeof(dev_size)))
+ &rxfh.indir_size, sizeof(rxfh.indir_size)))
return -EFAULT;
/* If the user buffer size is 0, this is just a query for the
* device table size. Otherwise, if it's smaller than the
* device table size it's an error.
*/
- if (user_size < dev_size)
+ if (user_size < rxfh.indir_size)
return user_size == 0 ? 0 : -EINVAL;
- indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
- if (!indir)
+ rxfh.indir = kcalloc(rxfh.indir_size, sizeof(rxfh.indir[0]), GFP_USER);
+ if (!rxfh.indir)
return -ENOMEM;
- ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL);
+ ret = dev->ethtool_ops->get_rxfh(dev, &rxfh);
if (ret)
goto out;
-
if (copy_to_user(useraddr +
offsetof(struct ethtool_rxfh_indir, ring_index[0]),
- indir, dev_size * sizeof(indir[0])))
+ rxfh.indir, rxfh.indir_size * sizeof(*rxfh.indir)))
ret = -EFAULT;
out:
- kfree(indir);
+ kfree(rxfh.indir);
return ret;
}
static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
void __user *useraddr)
{
- struct ethtool_rxnfc rx_rings;
- u32 user_size, dev_size, i;
- u32 *indir;
const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_rxfh_param rxfh_dev = {};
+ struct netlink_ext_ack *extack = NULL;
+ struct ethtool_rxnfc rx_rings;
+ u32 user_size, i;
int ret;
u32 ringidx_offset = offsetof(struct ethtool_rxfh_indir, ring_index[0]);
@@ -1117,8 +1140,8 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
!ops->get_rxnfc)
return -EOPNOTSUPP;
- dev_size = ops->get_rxfh_indir_size(dev);
- if (dev_size == 0)
+ rxfh_dev.indir_size = ops->get_rxfh_indir_size(dev);
+ if (rxfh_dev.indir_size == 0)
return -EOPNOTSUPP;
if (copy_from_user(&user_size,
@@ -1126,11 +1149,12 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
sizeof(user_size)))
return -EFAULT;
- if (user_size != 0 && user_size != dev_size)
+ if (user_size != 0 && user_size != rxfh_dev.indir_size)
return -EINVAL;
- indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
- if (!indir)
+ rxfh_dev.indir = kcalloc(rxfh_dev.indir_size,
+ sizeof(rxfh_dev.indir[0]), GFP_USER);
+ if (!rxfh_dev.indir)
return -ENOMEM;
rx_rings.cmd = ETHTOOL_GRXRINGS;
@@ -1139,18 +1163,21 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
goto out;
if (user_size == 0) {
- for (i = 0; i < dev_size; i++)
+ u32 *indir = rxfh_dev.indir;
+
+ for (i = 0; i < rxfh_dev.indir_size; i++)
indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
} else {
- ret = ethtool_copy_validate_indir(indir,
+ ret = ethtool_copy_validate_indir(rxfh_dev.indir,
useraddr + ringidx_offset,
&rx_rings,
- dev_size);
+ rxfh_dev.indir_size);
if (ret)
goto out;
}
- ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE);
+ rxfh_dev.hfunc = ETH_RSS_HASH_NO_CHANGE;
+ ret = ops->set_rxfh(dev, &rxfh_dev, extack);
if (ret)
goto out;
@@ -1161,32 +1188,29 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
dev->priv_flags |= IFF_RXFH_CONFIGURED;
out:
- kfree(indir);
+ kfree(rxfh_dev.indir);
return ret;
}
static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
void __user *useraddr)
{
- int ret;
const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_rxfh_param rxfh_dev = {};
u32 user_indir_size, user_key_size;
- u32 dev_indir_size = 0, dev_key_size = 0;
struct ethtool_rxfh rxfh;
- u32 total_size;
u32 indir_bytes;
- u32 *indir = NULL;
- u8 dev_hfunc = 0;
- u8 *hkey = NULL;
u8 *rss_config;
+ u32 total_size;
+ int ret;
if (!ops->get_rxfh)
return -EOPNOTSUPP;
if (ops->get_rxfh_indir_size)
- dev_indir_size = ops->get_rxfh_indir_size(dev);
+ rxfh_dev.indir_size = ops->get_rxfh_indir_size(dev);
if (ops->get_rxfh_key_size)
- dev_key_size = ops->get_rxfh_key_size(dev);
+ rxfh_dev.key_size = ops->get_rxfh_key_size(dev);
if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
return -EFAULT;
@@ -1194,44 +1218,46 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
user_key_size = rxfh.key_size;
/* Check that reserved fields are 0 for now */
- if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
+ if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32)
return -EINVAL;
/* Most drivers don't handle rss_context, check it's 0 as well */
- if (rxfh.rss_context && !ops->get_rxfh_context)
+ if (rxfh.rss_context && !ops->cap_rss_ctx_supported)
return -EOPNOTSUPP;
- rxfh.indir_size = dev_indir_size;
- rxfh.key_size = dev_key_size;
+ rxfh.indir_size = rxfh_dev.indir_size;
+ rxfh.key_size = rxfh_dev.key_size;
if (copy_to_user(useraddr, &rxfh, sizeof(rxfh)))
return -EFAULT;
- if ((user_indir_size && (user_indir_size != dev_indir_size)) ||
- (user_key_size && (user_key_size != dev_key_size)))
+ if ((user_indir_size && user_indir_size != rxfh_dev.indir_size) ||
+ (user_key_size && user_key_size != rxfh_dev.key_size))
return -EINVAL;
- indir_bytes = user_indir_size * sizeof(indir[0]);
+ indir_bytes = user_indir_size * sizeof(rxfh_dev.indir[0]);
total_size = indir_bytes + user_key_size;
rss_config = kzalloc(total_size, GFP_USER);
if (!rss_config)
return -ENOMEM;
if (user_indir_size)
- indir = (u32 *)rss_config;
+ rxfh_dev.indir = (u32 *)rss_config;
if (user_key_size)
- hkey = rss_config + indir_bytes;
+ rxfh_dev.key = rss_config + indir_bytes;
- if (rxfh.rss_context)
- ret = dev->ethtool_ops->get_rxfh_context(dev, indir, hkey,
- &dev_hfunc,
- rxfh.rss_context);
- else
- ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
+ rxfh_dev.rss_context = rxfh.rss_context;
+
+ ret = dev->ethtool_ops->get_rxfh(dev, &rxfh_dev);
if (ret)
goto out;
if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, hfunc),
- &dev_hfunc, sizeof(rxfh.hfunc))) {
+ &rxfh_dev.hfunc, sizeof(rxfh.hfunc))) {
+ ret = -EFAULT;
+ } else if (copy_to_user(useraddr +
+ offsetof(struct ethtool_rxfh, input_xfrm),
+ &rxfh_dev.input_xfrm,
+ sizeof(rxfh.input_xfrm))) {
ret = -EFAULT;
} else if (copy_to_user(useraddr +
offsetof(struct ethtool_rxfh, rss_config[0]),
@@ -1247,16 +1273,16 @@ out:
static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
void __user *useraddr)
{
- int ret;
+ u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
const struct ethtool_ops *ops = dev->ethtool_ops;
+ u32 dev_indir_size = 0, dev_key_size = 0, i;
+ struct ethtool_rxfh_param rxfh_dev = {};
+ struct netlink_ext_ack *extack = NULL;
struct ethtool_rxnfc rx_rings;
struct ethtool_rxfh rxfh;
- u32 dev_indir_size = 0, dev_key_size = 0, i;
- u32 *indir = NULL, indir_bytes = 0;
- u8 *hkey = NULL;
+ u32 indir_bytes = 0;
u8 *rss_config;
- u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
- bool delete = false;
+ int ret;
if (!ops->get_rxnfc || !ops->set_rxfh)
return -EOPNOTSUPP;
@@ -1270,25 +1296,34 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
return -EFAULT;
/* Check that reserved fields are 0 for now */
- if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32)
+ if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32)
return -EINVAL;
/* Most drivers don't handle rss_context, check it's 0 as well */
- if (rxfh.rss_context && !ops->set_rxfh_context)
+ if (rxfh.rss_context && !ops->cap_rss_ctx_supported)
+ return -EOPNOTSUPP;
+ /* Check input data transformation capabilities */
+ if (rxfh.input_xfrm && rxfh.input_xfrm != RXH_XFRM_SYM_XOR &&
+ rxfh.input_xfrm != RXH_XFRM_NO_CHANGE)
+ return -EINVAL;
+ if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) &&
+ !ops->cap_rss_sym_xor_supported)
return -EOPNOTSUPP;
/* If either indir, hash key or function is valid, proceed further.
- * Must request at least one change: indir size, hash key or function.
+ * Must request at least one change: indir size, hash key, function
+ * or input transformation.
*/
if ((rxfh.indir_size &&
rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE &&
rxfh.indir_size != dev_indir_size) ||
(rxfh.key_size && (rxfh.key_size != dev_key_size)) ||
(rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE &&
- rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE))
+ rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE &&
+ rxfh.input_xfrm == RXH_XFRM_NO_CHANGE))
return -EINVAL;
if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
- indir_bytes = dev_indir_size * sizeof(indir[0]);
+ indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]);
rss_config = kzalloc(indir_bytes + rxfh.key_size, GFP_USER);
if (!rss_config)
@@ -1305,8 +1340,9 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
*/
if (rxfh.indir_size &&
rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) {
- indir = (u32 *)rss_config;
- ret = ethtool_copy_validate_indir(indir,
+ rxfh_dev.indir = (u32 *)rss_config;
+ rxfh_dev.indir_size = dev_indir_size;
+ ret = ethtool_copy_validate_indir(rxfh_dev.indir,
useraddr + rss_cfg_offset,
&rx_rings,
rxfh.indir_size);
@@ -1314,17 +1350,22 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
goto out;
} else if (rxfh.indir_size == 0) {
if (rxfh.rss_context == 0) {
- indir = (u32 *)rss_config;
+ u32 *indir;
+
+ rxfh_dev.indir = (u32 *)rss_config;
+ rxfh_dev.indir_size = dev_indir_size;
+ indir = rxfh_dev.indir;
for (i = 0; i < dev_indir_size; i++)
indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
} else {
- delete = true;
+ rxfh_dev.rss_delete = true;
}
}
if (rxfh.key_size) {
- hkey = rss_config + indir_bytes;
- if (copy_from_user(hkey,
+ rxfh_dev.key_size = dev_key_size;
+ rxfh_dev.key = rss_config + indir_bytes;
+ if (copy_from_user(rxfh_dev.key,
useraddr + rss_cfg_offset + indir_bytes,
rxfh.key_size)) {
ret = -EFAULT;
@@ -1332,19 +1373,19 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
}
}
- if (rxfh.rss_context)
- ret = ops->set_rxfh_context(dev, indir, hkey, rxfh.hfunc,
- &rxfh.rss_context, delete);
- else
- ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
+ rxfh_dev.hfunc = rxfh.hfunc;
+ rxfh_dev.rss_context = rxfh.rss_context;
+ rxfh_dev.input_xfrm = rxfh.input_xfrm;
+
+ ret = ops->set_rxfh(dev, &rxfh_dev, extack);
if (ret)
goto out;
if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context),
- &rxfh.rss_context, sizeof(rxfh.rss_context)))
+ &rxfh_dev.rss_context, sizeof(rxfh_dev.rss_context)))
ret = -EFAULT;
- if (!rxfh.rss_context) {
+ if (!rxfh_dev.rss_context) {
/* indicate whether rxfh was set to default */
if (rxfh.indir_size == 0)
dev->priv_flags &= ~IFF_RXFH_CONFIGURED;
@@ -1991,6 +2032,13 @@ __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...)
}
EXPORT_SYMBOL(ethtool_sprintf);
+void ethtool_puts(u8 **data, const char *str)
+{
+ strscpy(*data, str, ETH_GSTRING_LEN);
+ *data += ETH_GSTRING_LEN;
+}
+EXPORT_SYMBOL(ethtool_puts);
+
static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
{
struct ethtool_value id;
diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c
index fb09f774ea01..b7865a14fdf8 100644
--- a/net/ethtool/rings.c
+++ b/net/ethtool/rings.c
@@ -124,6 +124,8 @@ const struct nla_policy ethnl_rings_set_policy[] = {
[ETHTOOL_A_RINGS_RX_JUMBO] = { .type = NLA_U32 },
[ETHTOOL_A_RINGS_TX] = { .type = NLA_U32 },
[ETHTOOL_A_RINGS_RX_BUF_LEN] = NLA_POLICY_MIN(NLA_U32, 1),
+ [ETHTOOL_A_RINGS_TCP_DATA_SPLIT] =
+ NLA_POLICY_MAX(NLA_U8, ETHTOOL_TCP_DATA_SPLIT_ENABLED),
[ETHTOOL_A_RINGS_CQE_SIZE] = NLA_POLICY_MIN(NLA_U32, 1),
[ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1),
[ETHTOOL_A_RINGS_RX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1),
@@ -145,6 +147,14 @@ ethnl_set_rings_validate(struct ethnl_req_info *req_info,
return -EOPNOTSUPP;
}
+ if (tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT] &&
+ !(ops->supported_ring_params & ETHTOOL_RING_USE_TCP_DATA_SPLIT)) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT],
+ "setting TCP data split is not supported");
+ return -EOPNOTSUPP;
+ }
+
if (tb[ETHTOOL_A_RINGS_CQE_SIZE] &&
!(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) {
NL_SET_ERR_MSG_ATTR(info->extack,
@@ -202,6 +212,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info)
ethnl_update_u32(&ringparam.tx_pending, tb[ETHTOOL_A_RINGS_TX], &mod);
ethnl_update_u32(&kernel_ringparam.rx_buf_len,
tb[ETHTOOL_A_RINGS_RX_BUF_LEN], &mod);
+ ethnl_update_u8(&kernel_ringparam.tcp_data_split,
+ tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT], &mod);
ethnl_update_u32(&kernel_ringparam.cqe_size,
tb[ETHTOOL_A_RINGS_CQE_SIZE], &mod);
ethnl_update_u8(&kernel_ringparam.tx_push,
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index 5764202e6cb6..71679137eff2 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -13,6 +13,7 @@ struct rss_reply_data {
u32 indir_size;
u32 hkey_size;
u32 hfunc;
+ u32 input_xfrm;
u32 *indir_table;
u8 *hkey;
};
@@ -48,9 +49,9 @@ rss_prepare_data(const struct ethnl_req_info *req_base,
struct rss_reply_data *data = RSS_REPDATA(reply_base);
struct rss_req_info *request = RSS_REQINFO(req_base);
struct net_device *dev = reply_base->dev;
+ struct ethtool_rxfh_param rxfh = {};
const struct ethtool_ops *ops;
u32 total_size, indir_bytes;
- u8 dev_hfunc = 0;
u8 *rss_config;
int ret;
@@ -59,7 +60,7 @@ rss_prepare_data(const struct ethnl_req_info *req_base,
return -EOPNOTSUPP;
/* Some drivers don't handle rss_context */
- if (request->rss_context && !ops->get_rxfh_context)
+ if (request->rss_context && !ops->cap_rss_ctx_supported)
return -EOPNOTSUPP;
ret = ethnl_ops_begin(dev);
@@ -83,21 +84,21 @@ rss_prepare_data(const struct ethnl_req_info *req_base,
if (data->indir_size)
data->indir_table = (u32 *)rss_config;
-
if (data->hkey_size)
data->hkey = rss_config + indir_bytes;
- if (request->rss_context)
- ret = ops->get_rxfh_context(dev, data->indir_table, data->hkey,
- &dev_hfunc, request->rss_context);
- else
- ret = ops->get_rxfh(dev, data->indir_table, data->hkey,
- &dev_hfunc);
+ rxfh.indir_size = data->indir_size;
+ rxfh.indir = data->indir_table;
+ rxfh.key_size = data->hkey_size;
+ rxfh.key = data->hkey;
+ rxfh.rss_context = request->rss_context;
+ ret = ops->get_rxfh(dev, &rxfh);
if (ret)
goto out_ops;
- data->hfunc = dev_hfunc;
+ data->hfunc = rxfh.hfunc;
+ data->input_xfrm = rxfh.input_xfrm;
out_ops:
ethnl_ops_complete(dev);
return ret;
@@ -111,6 +112,7 @@ rss_reply_size(const struct ethnl_req_info *req_base,
int len;
len = nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */
+ nla_total_size(sizeof(u32)) + /* _RSS_INPUT_XFRM */
nla_total_size(sizeof(u32) * data->indir_size) + /* _RSS_INDIR */
nla_total_size(data->hkey_size); /* _RSS_HKEY */
@@ -125,6 +127,8 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base,
if ((data->hfunc &&
nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) ||
+ (data->input_xfrm &&
+ nla_put_u32(skb, ETHTOOL_A_RSS_INPUT_XFRM, data->input_xfrm)) ||
(data->indir_size &&
nla_put(skb, ETHTOOL_A_RSS_INDIR,
sizeof(u32) * data->indir_size, data->indir_table)) ||
diff --git a/net/handshake/handshake-test.c b/net/handshake/handshake-test.c
index 16ed7bfd29e4..34fd1d9b2db8 100644
--- a/net/handshake/handshake-test.c
+++ b/net/handshake/handshake-test.c
@@ -471,7 +471,10 @@ static void handshake_req_destroy_test1(struct kunit *test)
handshake_req_cancel(sock->sk);
/* Act */
- fput(filp);
+ /* Ensure the close/release/put process has run to
+ * completion before checking the result.
+ */
+ __fput_sync(filp);
/* Assert */
KUNIT_EXPECT_PTR_EQ(test, handshake_req_destroy_test, req);
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 306f942c3b28..9d71b66183da 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -173,7 +173,24 @@ static int hsr_dev_open(struct net_device *dev)
static int hsr_dev_close(struct net_device *dev)
{
- /* Nothing to do here. */
+ struct hsr_port *port;
+ struct hsr_priv *hsr;
+
+ hsr = netdev_priv(dev);
+ hsr_for_each_port(hsr, port) {
+ if (port->type == HSR_PT_MASTER)
+ continue;
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
+ case HSR_PT_SLAVE_B:
+ dev_uc_unsync(port->dev, dev);
+ dev_mc_unsync(port->dev, dev);
+ break;
+ default:
+ break;
+ }
+ }
+
return 0;
}
@@ -291,7 +308,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
skb = hsr_init_skb(master);
if (!skb) {
- WARN_ONCE(1, "HSR: Could not send supervision frame\n");
+ netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n");
return;
}
@@ -338,7 +355,7 @@ static void send_prp_supervision_frame(struct hsr_port *master,
skb = hsr_init_skb(master);
if (!skb) {
- WARN_ONCE(1, "PRP: Could not send supervision frame\n");
+ netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n");
return;
}
@@ -404,12 +421,60 @@ void hsr_del_ports(struct hsr_priv *hsr)
hsr_del_port(port);
}
+static void hsr_set_rx_mode(struct net_device *dev)
+{
+ struct hsr_port *port;
+ struct hsr_priv *hsr;
+
+ hsr = netdev_priv(dev);
+
+ hsr_for_each_port(hsr, port) {
+ if (port->type == HSR_PT_MASTER)
+ continue;
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
+ case HSR_PT_SLAVE_B:
+ dev_mc_sync_multiple(port->dev, dev);
+ dev_uc_sync_multiple(port->dev, dev);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void hsr_change_rx_flags(struct net_device *dev, int change)
+{
+ struct hsr_port *port;
+ struct hsr_priv *hsr;
+
+ hsr = netdev_priv(dev);
+
+ hsr_for_each_port(hsr, port) {
+ if (port->type == HSR_PT_MASTER)
+ continue;
+ switch (port->type) {
+ case HSR_PT_SLAVE_A:
+ case HSR_PT_SLAVE_B:
+ if (change & IFF_ALLMULTI)
+ dev_set_allmulti(port->dev,
+ dev->flags &
+ IFF_ALLMULTI ? 1 : -1);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
static const struct net_device_ops hsr_device_ops = {
.ndo_change_mtu = hsr_dev_change_mtu,
.ndo_open = hsr_dev_open,
.ndo_stop = hsr_dev_close,
.ndo_start_xmit = hsr_dev_xmit,
+ .ndo_change_rx_flags = hsr_change_rx_flags,
.ndo_fix_features = hsr_fix_features,
+ .ndo_set_rx_mode = hsr_set_rx_mode,
};
static struct device_type hsr_type = {
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index 80cdc6f6b34c..5d68cb181695 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -83,7 +83,7 @@ static bool is_supervision_frame(struct hsr_priv *hsr, struct sk_buff *skb)
return false;
/* Get next tlv */
- total_length += sizeof(struct hsr_sup_tlv) + hsr_sup_tag->tlv.HSR_TLV_length;
+ total_length += hsr_sup_tag->tlv.HSR_TLV_length;
if (!pskb_may_pull(skb, total_length))
return false;
skb_pull(skb, total_length);
@@ -435,7 +435,7 @@ static void hsr_forward_do(struct hsr_frame_info *frame)
continue;
/* Don't send frame over port where it has been sent before.
- * Also fro SAN, this shouldn't be done.
+ * Also for SAN, this shouldn't be done.
*/
if (!frame->is_from_san &&
hsr_register_frame_out(port, frame->node_src,
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index b099c3150150..cb83c8feb746 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -167,4 +167,5 @@ static void __exit hsr_exit(void)
module_init(hsr_init);
module_exit(hsr_exit);
+MODULE_DESCRIPTION("High-availability Seamless Redundancy (HSR) driver");
MODULE_LICENSE("GPL");
diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile
index f05b7bdae2aa..7bce67673e83 100644
--- a/net/ieee802154/Makefile
+++ b/net/ieee802154/Makefile
@@ -4,7 +4,7 @@ obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o
obj-y += 6lowpan/
ieee802154-y := netlink.o nl-mac.o nl-phy.o nl_policy.o core.o \
- header_ops.o sysfs.o nl802154.o trace.o
+ header_ops.o sysfs.o nl802154.o trace.o pan.o
ieee802154_socket-y := socket.o
CFLAGS_trace.o := -I$(src)
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index 57546e07e06a..60e8fff1347e 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -198,6 +198,25 @@ void wpan_phy_free(struct wpan_phy *phy)
}
EXPORT_SYMBOL(wpan_phy_free);
+static void cfg802154_free_peer_structures(struct wpan_dev *wpan_dev)
+{
+ struct ieee802154_pan_device *child, *tmp;
+
+ mutex_lock(&wpan_dev->association_lock);
+
+ kfree(wpan_dev->parent);
+ wpan_dev->parent = NULL;
+
+ list_for_each_entry_safe(child, tmp, &wpan_dev->children, node) {
+ list_del(&child->node);
+ kfree(child);
+ }
+
+ wpan_dev->nchildren = 0;
+
+ mutex_unlock(&wpan_dev->association_lock);
+}
+
int cfg802154_switch_netns(struct cfg802154_registered_device *rdev,
struct net *net)
{
@@ -276,6 +295,9 @@ static int cfg802154_netdev_notifier_call(struct notifier_block *nb,
wpan_dev->identifier = ++rdev->wpan_dev_id;
list_add_rcu(&wpan_dev->list, &rdev->wpan_dev_list);
rdev->devlist_generation++;
+ mutex_init(&wpan_dev->association_lock);
+ INIT_LIST_HEAD(&wpan_dev->children);
+ wpan_dev->max_associations = SZ_16K;
wpan_dev->netdev = dev;
break;
@@ -291,6 +313,8 @@ static int cfg802154_netdev_notifier_call(struct notifier_block *nb,
rdev->opencount++;
break;
case NETDEV_UNREGISTER:
+ cfg802154_free_peer_structures(wpan_dev);
+
/* It is possible to get NETDEV_UNREGISTER
* multiple times. To detect that, check
* that the interface is still on the list
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 1a265a421308..7eb37de3add2 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -234,6 +234,8 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
NL802154_SCAN_DONE_REASON_ABORTED),
[NL802154_ATTR_BEACON_INTERVAL] =
NLA_POLICY_MAX(NLA_U8, IEEE802154_ACTIVE_SCAN_DURATION),
+ [NL802154_ATTR_MAX_ASSOCIATIONS] = { .type = NLA_U32 },
+ [NL802154_ATTR_PEER] = { .type = NLA_NESTED },
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
[NL802154_ATTR_SEC_ENABLED] = { .type = NLA_U8, },
@@ -248,7 +250,6 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
};
-#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
static int
nl802154_prepare_wpan_dev_dump(struct sk_buff *skb,
struct netlink_callback *cb,
@@ -307,7 +308,6 @@ nl802154_finish_wpan_dev_dump(struct cfg802154_registered_device *rdev)
{
rtnl_unlock();
}
-#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
/* message building helper */
static inline void *nl802154hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
@@ -1087,15 +1087,14 @@ static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info)
pan_id = nla_get_le16(info->attrs[NL802154_ATTR_PAN_ID]);
- /* TODO
- * I am not sure about to check here on broadcast pan_id.
- * Broadcast is a valid setting, comment from 802.15.4:
- * If this value is 0xffff, the device is not associated.
- *
- * This could useful to simple deassociate an device.
+ /* Only allow changing the PAN ID when the device has no more
+ * associations ongoing to avoid confusing peers.
*/
- if (pan_id == cpu_to_le16(IEEE802154_PAN_ID_BROADCAST))
+ if (cfg802154_device_is_associated(wpan_dev)) {
+ NL_SET_ERR_MSG(info->extack,
+ "Existing associations, changing PAN ID forbidden");
return -EINVAL;
+ }
return rdev_set_pan_id(rdev, wpan_dev, pan_id);
}
@@ -1123,20 +1122,17 @@ static int nl802154_set_short_addr(struct sk_buff *skb, struct genl_info *info)
short_addr = nla_get_le16(info->attrs[NL802154_ATTR_SHORT_ADDR]);
- /* TODO
- * I am not sure about to check here on broadcast short_addr.
- * Broadcast is a valid setting, comment from 802.15.4:
- * A value of 0xfffe indicates that the device has
- * associated but has not been allocated an address. A
- * value of 0xffff indicates that the device does not
- * have a short address.
- *
- * I think we should allow to set these settings but
- * don't allow to allow socket communication with it.
+ /* The short address only has a meaning when part of a PAN, after a
+ * proper association procedure. However, we want to still offer the
+ * possibility to create static networks so changing the short address
+ * is only allowed when not already associated to other devices with
+ * the official handshake.
*/
- if (short_addr == cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC) ||
- short_addr == cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST))
+ if (cfg802154_device_is_associated(wpan_dev)) {
+ NL_SET_ERR_MSG(info->extack,
+ "Existing associations, changing short address forbidden");
return -EINVAL;
+ }
return rdev_set_short_addr(rdev, wpan_dev, short_addr);
}
@@ -1638,6 +1634,189 @@ nl802154_stop_beacons(struct sk_buff *skb, struct genl_info *info)
return rdev_stop_beacons(rdev, wpan_dev);
}
+static int nl802154_associate(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev;
+ struct wpan_phy *wpan_phy;
+ struct ieee802154_addr coord;
+ int err;
+
+ wpan_dev = dev->ieee802154_ptr;
+ wpan_phy = &rdev->wpan_phy;
+
+ if (wpan_phy->flags & WPAN_PHY_FLAG_DATAGRAMS_ONLY) {
+ NL_SET_ERR_MSG(info->extack, "PHY only supports datagrams");
+ return -EOPNOTSUPP;
+ }
+
+ if (!info->attrs[NL802154_ATTR_PAN_ID] ||
+ !info->attrs[NL802154_ATTR_EXTENDED_ADDR])
+ return -EINVAL;
+
+ coord.pan_id = nla_get_le16(info->attrs[NL802154_ATTR_PAN_ID]);
+ coord.mode = IEEE802154_ADDR_LONG;
+ coord.extended_addr = nla_get_le64(info->attrs[NL802154_ATTR_EXTENDED_ADDR]);
+
+ mutex_lock(&wpan_dev->association_lock);
+ err = rdev_associate(rdev, wpan_dev, &coord);
+ mutex_unlock(&wpan_dev->association_lock);
+ if (err)
+ pr_err("Association with PAN ID 0x%x failed (%d)\n",
+ le16_to_cpu(coord.pan_id), err);
+
+ return err;
+}
+
+static int nl802154_disassociate(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+ struct wpan_phy *wpan_phy = &rdev->wpan_phy;
+ struct ieee802154_addr target;
+
+ if (wpan_phy->flags & WPAN_PHY_FLAG_DATAGRAMS_ONLY) {
+ NL_SET_ERR_MSG(info->extack, "PHY only supports datagrams");
+ return -EOPNOTSUPP;
+ }
+
+ target.pan_id = wpan_dev->pan_id;
+
+ if (info->attrs[NL802154_ATTR_EXTENDED_ADDR]) {
+ target.mode = IEEE802154_ADDR_LONG;
+ target.extended_addr = nla_get_le64(info->attrs[NL802154_ATTR_EXTENDED_ADDR]);
+ } else if (info->attrs[NL802154_ATTR_SHORT_ADDR]) {
+ target.mode = IEEE802154_ADDR_SHORT;
+ target.short_addr = nla_get_le16(info->attrs[NL802154_ATTR_SHORT_ADDR]);
+ } else {
+ NL_SET_ERR_MSG(info->extack, "Device address is missing");
+ return -EINVAL;
+ }
+
+ mutex_lock(&wpan_dev->association_lock);
+ rdev_disassociate(rdev, wpan_dev, &target);
+ mutex_unlock(&wpan_dev->association_lock);
+
+ return 0;
+}
+
+static int nl802154_set_max_associations(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+ unsigned int max_assoc;
+
+ if (!info->attrs[NL802154_ATTR_MAX_ASSOCIATIONS]) {
+ NL_SET_ERR_MSG(info->extack, "No maximum number of association given");
+ return -EINVAL;
+ }
+
+ max_assoc = nla_get_u32(info->attrs[NL802154_ATTR_MAX_ASSOCIATIONS]);
+
+ mutex_lock(&wpan_dev->association_lock);
+ cfg802154_set_max_associations(wpan_dev, max_assoc);
+ mutex_unlock(&wpan_dev->association_lock);
+
+ return 0;
+}
+
+static int nl802154_send_peer_info(struct sk_buff *msg,
+ struct netlink_callback *cb,
+ u32 seq, int flags,
+ struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_pan_device *peer,
+ enum nl802154_peer_type type)
+{
+ struct nlattr *nla;
+ void *hdr;
+
+ ASSERT_RTNL();
+
+ hdr = nl802154hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags,
+ NL802154_CMD_LIST_ASSOCIATIONS);
+ if (!hdr)
+ return -ENOBUFS;
+
+ genl_dump_check_consistent(cb, hdr);
+
+ nla = nla_nest_start_noflag(msg, NL802154_ATTR_PEER);
+ if (!nla)
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, NL802154_DEV_ADDR_ATTR_PEER_TYPE, type))
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, NL802154_DEV_ADDR_ATTR_MODE, peer->mode))
+ goto nla_put_failure;
+
+ if (nla_put(msg, NL802154_DEV_ADDR_ATTR_SHORT,
+ IEEE802154_SHORT_ADDR_LEN, &peer->short_addr))
+ goto nla_put_failure;
+
+ if (nla_put(msg, NL802154_DEV_ADDR_ATTR_EXTENDED,
+ IEEE802154_EXTENDED_ADDR_LEN, &peer->extended_addr))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, nla);
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+ nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int nl802154_list_associations(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct cfg802154_registered_device *rdev;
+ struct ieee802154_pan_device *child;
+ struct wpan_dev *wpan_dev;
+ int err;
+
+ err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev);
+ if (err)
+ return err;
+
+ mutex_lock(&wpan_dev->association_lock);
+
+ if (cb->args[2])
+ goto out;
+
+ if (wpan_dev->parent) {
+ err = nl802154_send_peer_info(skb, cb, cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, rdev, wpan_dev,
+ wpan_dev->parent,
+ NL802154_PEER_TYPE_PARENT);
+ if (err < 0)
+ goto out_err;
+ }
+
+ list_for_each_entry(child, &wpan_dev->children, node) {
+ err = nl802154_send_peer_info(skb, cb, cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, rdev, wpan_dev,
+ child,
+ NL802154_PEER_TYPE_CHILD);
+ if (err < 0)
+ goto out_err;
+ }
+
+ cb->args[2] = 1;
+out:
+ err = skb->len;
+out_err:
+ mutex_unlock(&wpan_dev->association_lock);
+
+ nl802154_finish_wpan_dev_dump(rdev);
+
+ return err;
+}
+
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
static const struct nla_policy nl802154_dev_addr_policy[NL802154_DEV_ADDR_ATTR_MAX + 1] = {
[NL802154_DEV_ADDR_ATTR_PAN_ID] = { .type = NLA_U16 },
@@ -2759,6 +2938,34 @@ static const struct genl_ops nl802154_ops[] = {
NL802154_FLAG_CHECK_NETDEV_UP |
NL802154_FLAG_NEED_RTNL,
},
+ {
+ .cmd = NL802154_CMD_ASSOCIATE,
+ .doit = nl802154_associate,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_CHECK_NETDEV_UP |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_DISASSOCIATE,
+ .doit = nl802154_disassociate,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_CHECK_NETDEV_UP |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_SET_MAX_ASSOCIATIONS,
+ .doit = nl802154_set_max_associations,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_LIST_ASSOCIATIONS,
+ .dumpit = nl802154_list_associations,
+ /* can be retrieved by unprivileged users */
+ },
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
{
.cmd = NL802154_CMD_SET_SEC_PARAMS,
diff --git a/net/ieee802154/pan.c b/net/ieee802154/pan.c
new file mode 100644
index 000000000000..249df7364b3e
--- /dev/null
+++ b/net/ieee802154/pan.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * IEEE 802.15.4 PAN management
+ *
+ * Copyright (C) 2023 Qorvo US, Inc
+ * Authors:
+ * - David Girault <david.girault@qorvo.com>
+ * - Miquel Raynal <miquel.raynal@bootlin.com>
+ */
+
+#include <linux/kernel.h>
+#include <net/cfg802154.h>
+#include <net/af_ieee802154.h>
+
+/* Checks whether a device address matches one from the PAN list.
+ * This helper is meant to be used only during PAN management, when we expect
+ * extended addresses to be used.
+ */
+static bool cfg802154_pan_device_is_matching(struct ieee802154_pan_device *pan_dev,
+ struct ieee802154_addr *ext_dev)
+{
+ if (!pan_dev || !ext_dev)
+ return false;
+
+ if (ext_dev->mode == IEEE802154_ADDR_SHORT)
+ return false;
+
+ return pan_dev->extended_addr == ext_dev->extended_addr;
+}
+
+bool cfg802154_device_is_associated(struct wpan_dev *wpan_dev)
+{
+ bool is_assoc;
+
+ mutex_lock(&wpan_dev->association_lock);
+ is_assoc = !list_empty(&wpan_dev->children) || wpan_dev->parent;
+ mutex_unlock(&wpan_dev->association_lock);
+
+ return is_assoc;
+}
+
+bool cfg802154_device_is_parent(struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *target)
+{
+ lockdep_assert_held(&wpan_dev->association_lock);
+
+ return cfg802154_pan_device_is_matching(wpan_dev->parent, target);
+}
+EXPORT_SYMBOL_GPL(cfg802154_device_is_parent);
+
+struct ieee802154_pan_device *
+cfg802154_device_is_child(struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *target)
+{
+ struct ieee802154_pan_device *child;
+
+ lockdep_assert_held(&wpan_dev->association_lock);
+
+ list_for_each_entry(child, &wpan_dev->children, node)
+ if (cfg802154_pan_device_is_matching(child, target))
+ return child;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(cfg802154_device_is_child);
+
+__le16 cfg802154_get_free_short_addr(struct wpan_dev *wpan_dev)
+{
+ struct ieee802154_pan_device *child;
+ __le16 addr;
+
+ lockdep_assert_held(&wpan_dev->association_lock);
+
+ do {
+ get_random_bytes(&addr, 2);
+ if (addr == cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST) ||
+ addr == cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC))
+ continue;
+
+ if (wpan_dev->short_addr == addr)
+ continue;
+
+ if (wpan_dev->parent && wpan_dev->parent->short_addr == addr)
+ continue;
+
+ list_for_each_entry(child, &wpan_dev->children, node)
+ if (child->short_addr == addr)
+ continue;
+
+ break;
+ } while (1);
+
+ return addr;
+}
+EXPORT_SYMBOL_GPL(cfg802154_get_free_short_addr);
+
+unsigned int cfg802154_set_max_associations(struct wpan_dev *wpan_dev,
+ unsigned int max)
+{
+ unsigned int old_max;
+
+ lockdep_assert_held(&wpan_dev->association_lock);
+
+ old_max = wpan_dev->max_associations;
+ wpan_dev->max_associations = max;
+
+ return old_max;
+}
+EXPORT_SYMBOL_GPL(cfg802154_set_max_associations);
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h
index 5eaae15c610e..64071ef6f57b 100644
--- a/net/ieee802154/rdev-ops.h
+++ b/net/ieee802154/rdev-ops.h
@@ -265,6 +265,36 @@ static inline int rdev_stop_beacons(struct cfg802154_registered_device *rdev,
return ret;
}
+static inline int rdev_associate(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *coord)
+{
+ int ret;
+
+ if (!rdev->ops->associate)
+ return -EOPNOTSUPP;
+
+ trace_802154_rdev_associate(&rdev->wpan_phy, wpan_dev, coord);
+ ret = rdev->ops->associate(&rdev->wpan_phy, wpan_dev, coord);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
+}
+
+static inline int rdev_disassociate(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *target)
+{
+ int ret;
+
+ if (!rdev->ops->disassociate)
+ return -EOPNOTSUPP;
+
+ trace_802154_rdev_disassociate(&rdev->wpan_phy, wpan_dev, target);
+ ret = rdev->ops->disassociate(&rdev->wpan_phy, wpan_dev, target);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
+}
+
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
/* TODO this is already a nl802154, so move into ieee802154 */
static inline void
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h
index c16db0b326fa..62aa6465253a 100644
--- a/net/ieee802154/trace.h
+++ b/net/ieee802154/trace.h
@@ -356,6 +356,44 @@ DEFINE_EVENT(802154_wdev_template, 802154_rdev_stop_beacons,
TP_ARGS(wpan_phy, wpan_dev)
);
+TRACE_EVENT(802154_rdev_associate,
+ TP_PROTO(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *coord),
+ TP_ARGS(wpan_phy, wpan_dev, coord),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ WPAN_DEV_ENTRY
+ __field(__le64, addr)
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ WPAN_DEV_ASSIGN;
+ __entry->addr = coord->extended_addr;
+ ),
+ TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", associating with: 0x%llx",
+ WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->addr)
+);
+
+TRACE_EVENT(802154_rdev_disassociate,
+ TP_PROTO(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *target),
+ TP_ARGS(wpan_phy, wpan_dev, target),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ WPAN_DEV_ENTRY
+ __field(__le64, addr)
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ WPAN_DEV_ASSIGN;
+ __entry->addr = target->extended_addr;
+ ),
+ TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT ", disassociating with: 0x%llx",
+ WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG, __entry->addr)
+);
+
TRACE_EVENT(802154_rdev_return_int,
TP_PROTO(struct wpan_phy *wpan_phy, int ret),
TP_ARGS(wpan_phy, ret),
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index e144a02a6a61..ec36d2ec059e 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -16,8 +16,6 @@ obj-y := route.o inetpeer.o protocol.o \
inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \
metrics.o netlink.o nexthop.o udp_tunnel_stub.o
-obj-$(CONFIG_BPFILTER) += bpfilter/
-
obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o
obj-$(CONFIG_PROC_FS) += proc.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index fb81de10d332..a5a820ee2026 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -330,6 +330,9 @@ lookup_protocol:
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = SK_CAN_REUSE;
+ if (INET_PROTOSW_ICSK & answer_flags)
+ inet_init_csk_locks(sk);
+
inet = inet_sk(sk);
inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
@@ -1625,14 +1628,17 @@ EXPORT_SYMBOL(inet_current_timestamp);
int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
{
- if (sk->sk_family == AF_INET)
+ unsigned int family = READ_ONCE(sk->sk_family);
+
+ if (family == AF_INET)
return ip_recv_error(sk, msg, len, addr_len);
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
+ if (family == AF_INET6)
return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len);
#endif
return -EINVAL;
}
+EXPORT_SYMBOL(inet_recv_error);
int inet_gro_complete(struct sk_buff *skb, int nhoff)
{
@@ -1847,9 +1853,7 @@ static __net_init int inet_init_net(struct net *net)
/*
* Set defaults for local port range
*/
- seqlock_init(&net->ipv4.ip_local_ports.lock);
- net->ipv4.ip_local_ports.range[0] = 32768;
- net->ipv4.ip_local_ports.range[1] = 60999;
+ net->ipv4.ip_local_ports.range = 60999u << 16 | 32768u;
seqlock_init(&net->ipv4.ping_group_range.lock);
/*
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index a2e6e1fdf82b..64aec3dff8ec 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -597,5 +597,6 @@ static void __exit ah4_fini(void)
module_init(ah4_init);
module_exit(ah4_fini);
+MODULE_DESCRIPTION("IPv4 AH transformation library");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_AH);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 9456f5bb35e5..0d0d725b46ad 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1125,7 +1125,8 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
if (neigh) {
if (!(READ_ONCE(neigh->nud_state) & NUD_NOARP)) {
read_lock_bh(&neigh->lock);
- memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len);
+ memcpy(r->arp_ha.sa_data, neigh->ha,
+ min(dev->addr_len, sizeof(r->arp_ha.sa_data_min)));
r->arp_flags = arp_state_to_flags(neigh);
read_unlock_bh(&neigh->lock);
r->arp_ha.sa_family = dev->type;
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 39dcccf0f174..ae8b15e6896f 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -271,6 +271,74 @@ static int bpf_tcp_ca_validate(void *kdata)
return tcp_validate_congestion_control(kdata);
}
+static u32 bpf_tcp_ca_ssthresh(struct sock *sk)
+{
+ return 0;
+}
+
+static void bpf_tcp_ca_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+{
+}
+
+static void bpf_tcp_ca_set_state(struct sock *sk, u8 new_state)
+{
+}
+
+static void bpf_tcp_ca_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
+{
+}
+
+static void bpf_tcp_ca_in_ack_event(struct sock *sk, u32 flags)
+{
+}
+
+static void bpf_tcp_ca_pkts_acked(struct sock *sk, const struct ack_sample *sample)
+{
+}
+
+static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk)
+{
+ return 0;
+}
+
+static void bpf_tcp_ca_cong_control(struct sock *sk, const struct rate_sample *rs)
+{
+}
+
+static u32 bpf_tcp_ca_undo_cwnd(struct sock *sk)
+{
+ return 0;
+}
+
+static u32 bpf_tcp_ca_sndbuf_expand(struct sock *sk)
+{
+ return 0;
+}
+
+static void __bpf_tcp_ca_init(struct sock *sk)
+{
+}
+
+static void __bpf_tcp_ca_release(struct sock *sk)
+{
+}
+
+static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = {
+ .ssthresh = bpf_tcp_ca_ssthresh,
+ .cong_avoid = bpf_tcp_ca_cong_avoid,
+ .set_state = bpf_tcp_ca_set_state,
+ .cwnd_event = bpf_tcp_ca_cwnd_event,
+ .in_ack_event = bpf_tcp_ca_in_ack_event,
+ .pkts_acked = bpf_tcp_ca_pkts_acked,
+ .min_tso_segs = bpf_tcp_ca_min_tso_segs,
+ .cong_control = bpf_tcp_ca_cong_control,
+ .undo_cwnd = bpf_tcp_ca_undo_cwnd,
+ .sndbuf_expand = bpf_tcp_ca_sndbuf_expand,
+
+ .init = __bpf_tcp_ca_init,
+ .release = __bpf_tcp_ca_release,
+};
+
struct bpf_struct_ops bpf_tcp_congestion_ops = {
.verifier_ops = &bpf_tcp_ca_verifier_ops,
.reg = bpf_tcp_ca_reg,
@@ -281,6 +349,7 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = {
.init = bpf_tcp_ca_init,
.validate = bpf_tcp_ca_validate,
.name = "tcp_congestion_ops",
+ .cfi_stubs = &__bpf_ops_tcp_congestion_ops,
};
static int __init bpf_tcp_ca_kfunc_init(void)
diff --git a/net/ipv4/bpfilter/Makefile b/net/ipv4/bpfilter/Makefile
deleted file mode 100644
index 00af5305e05a..000000000000
--- a/net/ipv4/bpfilter/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_BPFILTER) += sockopt.o
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
deleted file mode 100644
index 193bcc2acccc..000000000000
--- a/net/ipv4/bpfilter/sockopt.c
+++ /dev/null
@@ -1,71 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/uaccess.h>
-#include <linux/bpfilter.h>
-#include <uapi/linux/bpf.h>
-#include <linux/wait.h>
-#include <linux/kmod.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-
-struct bpfilter_umh_ops bpfilter_ops;
-EXPORT_SYMBOL_GPL(bpfilter_ops);
-
-static int bpfilter_mbox_request(struct sock *sk, int optname, sockptr_t optval,
- unsigned int optlen, bool is_set)
-{
- int err;
- mutex_lock(&bpfilter_ops.lock);
- if (!bpfilter_ops.sockopt) {
- mutex_unlock(&bpfilter_ops.lock);
- request_module("bpfilter");
- mutex_lock(&bpfilter_ops.lock);
-
- if (!bpfilter_ops.sockopt) {
- err = -ENOPROTOOPT;
- goto out;
- }
- }
- if (bpfilter_ops.info.tgid &&
- thread_group_exited(bpfilter_ops.info.tgid))
- umd_cleanup_helper(&bpfilter_ops.info);
-
- if (!bpfilter_ops.info.tgid) {
- err = bpfilter_ops.start();
- if (err)
- goto out;
- }
- err = bpfilter_ops.sockopt(sk, optname, optval, optlen, is_set);
-out:
- mutex_unlock(&bpfilter_ops.lock);
- return err;
-}
-
-int bpfilter_ip_set_sockopt(struct sock *sk, int optname, sockptr_t optval,
- unsigned int optlen)
-{
- return bpfilter_mbox_request(sk, optname, optval, optlen, true);
-}
-
-int bpfilter_ip_get_sockopt(struct sock *sk, int optname, char __user *optval,
- int __user *optlen)
-{
- int len;
-
- if (get_user(len, optlen))
- return -EFAULT;
-
- return bpfilter_mbox_request(sk, optname, USER_SOCKPTR(optval), len,
- false);
-}
-
-static int __init bpfilter_sockopt_init(void)
-{
- mutex_init(&bpfilter_ops.lock);
- bpfilter_ops.info.tgid = NULL;
- bpfilter_ops.info.driver_name = "bpfilter_umh";
-
- return 0;
-}
-device_initcall(bpfilter_sockopt_init);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ca0ff15dc8fa..bc74f131fe4d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1825,6 +1825,21 @@ done:
return err;
}
+/* Combine dev_addr_genid and dev_base_seq to detect changes.
+ */
+static u32 inet_base_seq(const struct net *net)
+{
+ u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
+ net->dev_base_seq;
+
+ /* Must not return 0 (see nl_dump_check_consistent()).
+ * Chose a value far away from 0.
+ */
+ if (!res)
+ res = 0x80000000;
+ return res;
+}
+
static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
@@ -1876,8 +1891,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
idx = 0;
head = &tgt_net->dev_index_head[h];
rcu_read_lock();
- cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
- tgt_net->dev_base_seq;
+ cb->seq = inet_base_seq(tgt_net);
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
@@ -2278,8 +2292,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
idx = 0;
head = &net->dev_index_head[h];
rcu_read_lock();
- cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
- net->dev_base_seq;
+ cb->seq = inet_base_seq(net);
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 4ccfc104f13a..4dd9e5040672 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1247,5 +1247,6 @@ static void __exit esp4_fini(void)
module_init(esp4_init);
module_exit(esp4_fini);
+MODULE_DESCRIPTION("IPv4 ESP transformation library");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_ESP);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 513f475c6a53..5bdd1c016009 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -395,13 +395,13 @@ static int fib_default_rules_init(struct fib_rules_ops *ops)
{
int err;
- err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL, 0);
+ err = fib_default_rule_add(ops, 0, RT_TABLE_LOCAL);
if (err < 0)
return err;
- err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN, 0);
+ err = fib_default_rule_add(ops, 0x7FFE, RT_TABLE_MAIN);
if (err < 0)
return err;
- err = fib_default_rule_add(ops, 0x7FFF, RT_TABLE_DEFAULT, 0);
+ err = fib_default_rule_add(ops, 0x7FFF, RT_TABLE_DEFAULT);
if (err < 0)
return err;
return 0;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 9bdfdab906fe..3ff35f811765 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -52,6 +52,7 @@
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
#include <linux/rcupdate.h>
+#include <linux/rcupdate_wait.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/init.h>
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 394a498c2823..459af1f89739 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -117,37 +117,39 @@ bool inet_rcv_saddr_any(const struct sock *sk)
return !sk->sk_rcv_saddr;
}
-void inet_get_local_port_range(const struct net *net, int *low, int *high)
-{
- unsigned int seq;
-
- do {
- seq = read_seqbegin(&net->ipv4.ip_local_ports.lock);
-
- *low = net->ipv4.ip_local_ports.range[0];
- *high = net->ipv4.ip_local_ports.range[1];
- } while (read_seqretry(&net->ipv4.ip_local_ports.lock, seq));
-}
-EXPORT_SYMBOL(inet_get_local_port_range);
-
-void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
+/**
+ * inet_sk_get_local_port_range - fetch ephemeral ports range
+ * @sk: socket
+ * @low: pointer to low port
+ * @high: pointer to high port
+ *
+ * Fetch netns port range (/proc/sys/net/ipv4/ip_local_port_range)
+ * Range can be overridden if socket got IP_LOCAL_PORT_RANGE option.
+ * Returns true if IP_LOCAL_PORT_RANGE was set on this socket.
+ */
+bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
{
- const struct inet_sock *inet = inet_sk(sk);
- const struct net *net = sock_net(sk);
int lo, hi, sk_lo, sk_hi;
+ bool local_range = false;
+ u32 sk_range;
- inet_get_local_port_range(net, &lo, &hi);
+ inet_get_local_port_range(sock_net(sk), &lo, &hi);
- sk_lo = inet->local_port_range.lo;
- sk_hi = inet->local_port_range.hi;
+ sk_range = READ_ONCE(inet_sk(sk)->local_port_range);
+ if (unlikely(sk_range)) {
+ sk_lo = sk_range & 0xffff;
+ sk_hi = sk_range >> 16;
- if (unlikely(lo <= sk_lo && sk_lo <= hi))
- lo = sk_lo;
- if (unlikely(lo <= sk_hi && sk_hi <= hi))
- hi = sk_hi;
+ if (lo <= sk_lo && sk_lo <= hi)
+ lo = sk_lo;
+ if (lo <= sk_hi && sk_hi <= hi)
+ hi = sk_hi;
+ local_range = true;
+ }
*low = lo;
*high = hi;
+ return local_range;
}
EXPORT_SYMBOL(inet_sk_get_local_port_range);
@@ -157,8 +159,11 @@ static bool inet_use_bhash2_on_bind(const struct sock *sk)
if (sk->sk_family == AF_INET6) {
int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
- return addr_type != IPV6_ADDR_ANY &&
- addr_type != IPV6_ADDR_MAPPED;
+ if (addr_type == IPV6_ADDR_ANY)
+ return false;
+
+ if (addr_type != IPV6_ADDR_MAPPED)
+ return true;
}
#endif
return sk->sk_rcv_saddr != htonl(INADDR_ANY);
@@ -211,18 +216,9 @@ static bool inet_bhash2_conflict(const struct sock *sk,
bool relax, bool reuseport_cb_ok,
bool reuseport_ok)
{
- struct inet_timewait_sock *tw2;
struct sock *sk2;
- sk_for_each_bound_bhash2(sk2, &tb2->owners) {
- if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
- reuseport_cb_ok, reuseport_ok))
- return true;
- }
-
- twsk_for_each_bound_bhash2(tw2, &tb2->deathrow) {
- sk2 = (struct sock *)tw2;
-
+ sk_for_each_bound(sk2, &tb2->owners) {
if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
reuseport_cb_ok, reuseport_ok))
return true;
@@ -231,15 +227,20 @@ static bool inet_bhash2_conflict(const struct sock *sk,
return false;
}
+#define sk_for_each_bound_bhash(__sk, __tb2, __tb) \
+ hlist_for_each_entry(__tb2, &(__tb)->bhash2, bhash_node) \
+ sk_for_each_bound(sk2, &(__tb2)->owners)
+
/* This should be called only when the tb and tb2 hashbuckets' locks are held */
static int inet_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb,
const struct inet_bind2_bucket *tb2, /* may be null */
bool relax, bool reuseport_ok)
{
- bool reuseport_cb_ok;
- struct sock_reuseport *reuseport_cb;
kuid_t uid = sock_i_uid((struct sock *)sk);
+ struct sock_reuseport *reuseport_cb;
+ bool reuseport_cb_ok;
+ struct sock *sk2;
rcu_read_lock();
reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
@@ -247,32 +248,29 @@ static int inet_csk_bind_conflict(const struct sock *sk,
reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks);
rcu_read_unlock();
- /*
- * Unlike other sk lookup places we do not check
+ /* Conflicts with an existing IPV6_ADDR_ANY (if ipv6) or INADDR_ANY (if
+ * ipv4) should have been checked already. We need to do these two
+ * checks separately because their spinlocks have to be acquired/released
+ * independently of each other, to prevent possible deadlocks
+ */
+ if (inet_use_bhash2_on_bind(sk))
+ return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax,
+ reuseport_cb_ok, reuseport_ok);
+
+ /* Unlike other sk lookup places we do not check
* for sk_net here, since _all_ the socks listed
* in tb->owners and tb2->owners list belong
* to the same net - the one this bucket belongs to.
*/
+ sk_for_each_bound_bhash(sk2, tb2, tb) {
+ if (!inet_bind_conflict(sk, sk2, uid, relax, reuseport_cb_ok, reuseport_ok))
+ continue;
- if (!inet_use_bhash2_on_bind(sk)) {
- struct sock *sk2;
-
- sk_for_each_bound(sk2, &tb->owners)
- if (inet_bind_conflict(sk, sk2, uid, relax,
- reuseport_cb_ok, reuseport_ok) &&
- inet_rcv_saddr_equal(sk, sk2, true))
- return true;
-
- return false;
+ if (inet_rcv_saddr_equal(sk, sk2, true))
+ return true;
}
- /* Conflicts with an existing IPV6_ADDR_ANY (if ipv6) or INADDR_ANY (if
- * ipv4) should have been checked already. We need to do these two
- * checks separately because their spinlocks have to be acquired/released
- * independently of each other, to prevent possible deadlocks
- */
- return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok);
+ return false;
}
/* Determine if there is a bind conflict with an existing IPV6_ADDR_ANY (if ipv6) or
@@ -455,7 +453,7 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
kuid_t uid = sock_i_uid(sk);
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
- if (hlist_empty(&tb->owners)) {
+ if (hlist_empty(&tb->bhash2)) {
tb->fastreuse = reuse;
if (sk->sk_reuseport) {
tb->fastreuseport = FASTREUSEPORT_ANY;
@@ -547,7 +545,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
}
if (!found_port) {
- if (!hlist_empty(&tb->owners)) {
+ if (!hlist_empty(&tb->bhash2)) {
if (sk->sk_reuse == SK_FORCE_REUSE ||
(tb->fastreuse > 0 && reuse) ||
sk_reuseport_match(tb, sk))
@@ -567,7 +565,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
if (!tb2) {
tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep,
- net, head2, port, l3mdev, sk);
+ net, head2, tb, sk);
if (!tb2)
goto fail_unlock;
bhash2_created = true;
@@ -589,11 +587,10 @@ success:
fail_unlock:
if (ret) {
+ if (bhash2_created)
+ inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, tb2);
if (bhash_created)
inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
- if (bhash2_created)
- inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,
- tb2);
}
if (head2_lock_acquired)
spin_unlock(&head2->lock);
@@ -730,6 +727,10 @@ out:
}
if (req)
reqsk_put(req);
+
+ if (newsk)
+ inet_init_csk_locks(newsk);
+
return newsk;
out_err:
newsk = NULL;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 7d0e7aaa71e0..8e6b6aa0579e 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -1077,10 +1077,94 @@ skip_listen_ht:
s_i = num = s_num = 0;
}
+/* Process a maximum of SKARR_SZ sockets at a time when walking hash buckets
+ * with bh disabled.
+ */
+#define SKARR_SZ 16
+
+ /* Dump bound but inactive (not listening, connecting, etc.) sockets */
+ if (cb->args[0] == 1) {
+ if (!(idiag_states & TCPF_BOUND_INACTIVE))
+ goto skip_bind_ht;
+
+ for (i = s_i; i < hashinfo->bhash_size; i++) {
+ struct inet_bind_hashbucket *ibb;
+ struct inet_bind2_bucket *tb2;
+ struct sock *sk_arr[SKARR_SZ];
+ int num_arr[SKARR_SZ];
+ int idx, accum, res;
+
+resume_bind_walk:
+ num = 0;
+ accum = 0;
+ ibb = &hashinfo->bhash2[i];
+
+ spin_lock_bh(&ibb->lock);
+ inet_bind_bucket_for_each(tb2, &ibb->chain) {
+ if (!net_eq(ib2_net(tb2), net))
+ continue;
+
+ sk_for_each_bound(sk, &tb2->owners) {
+ struct inet_sock *inet = inet_sk(sk);
+
+ if (num < s_num)
+ goto next_bind;
+
+ if (sk->sk_state != TCP_CLOSE ||
+ !inet->inet_num)
+ goto next_bind;
+
+ if (r->sdiag_family != AF_UNSPEC &&
+ r->sdiag_family != sk->sk_family)
+ goto next_bind;
+
+ if (!inet_diag_bc_sk(bc, sk))
+ goto next_bind;
+
+ sock_hold(sk);
+ num_arr[accum] = num;
+ sk_arr[accum] = sk;
+ if (++accum == SKARR_SZ)
+ goto pause_bind_walk;
+next_bind:
+ num++;
+ }
+ }
+pause_bind_walk:
+ spin_unlock_bh(&ibb->lock);
+
+ res = 0;
+ for (idx = 0; idx < accum; idx++) {
+ if (res >= 0) {
+ res = inet_sk_diag_fill(sk_arr[idx],
+ NULL, skb, cb,
+ r, NLM_F_MULTI,
+ net_admin);
+ if (res < 0)
+ num = num_arr[idx];
+ }
+ sock_put(sk_arr[idx]);
+ }
+ if (res < 0)
+ goto done;
+
+ cond_resched();
+
+ if (accum == SKARR_SZ) {
+ s_num = num + 1;
+ goto resume_bind_walk;
+ }
+
+ s_num = 0;
+ }
+skip_bind_ht:
+ cb->args[0] = 2;
+ s_i = num = s_num = 0;
+ }
+
if (!(idiag_states & ~TCPF_LISTEN))
goto out;
-#define SKARR_SZ 16
for (i = s_i; i <= hashinfo->ehash_mask; i++) {
struct inet_ehash_bucket *head = &hashinfo->ehash[i];
spinlock_t *lock = inet_ehash_lockp(hashinfo, i);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index a532f749e477..308ff34002ea 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -76,7 +76,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
tb->port = snum;
tb->fastreuse = 0;
tb->fastreuseport = 0;
- INIT_HLIST_HEAD(&tb->owners);
+ INIT_HLIST_HEAD(&tb->bhash2);
hlist_add_head(&tb->node, &head->chain);
}
return tb;
@@ -87,7 +87,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
*/
void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket *tb)
{
- if (hlist_empty(&tb->owners)) {
+ if (hlist_empty(&tb->bhash2)) {
__hlist_del(&tb->node);
kmem_cache_free(cachep, tb);
}
@@ -100,47 +100,52 @@ bool inet_bind_bucket_match(const struct inet_bind_bucket *tb, const struct net
tb->l3mdev == l3mdev;
}
-static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb,
+static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb2,
struct net *net,
struct inet_bind_hashbucket *head,
- unsigned short port, int l3mdev,
+ struct inet_bind_bucket *tb,
const struct sock *sk)
{
- write_pnet(&tb->ib_net, net);
- tb->l3mdev = l3mdev;
- tb->port = port;
+ write_pnet(&tb2->ib_net, net);
+ tb2->l3mdev = tb->l3mdev;
+ tb2->port = tb->port;
#if IS_ENABLED(CONFIG_IPV6)
- tb->family = sk->sk_family;
- if (sk->sk_family == AF_INET6)
- tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr;
- else
+ BUILD_BUG_ON(USHRT_MAX < (IPV6_ADDR_ANY | IPV6_ADDR_MAPPED));
+ if (sk->sk_family == AF_INET6) {
+ tb2->addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+ tb2->v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+ } else {
+ tb2->addr_type = IPV6_ADDR_MAPPED;
+ ipv6_addr_set_v4mapped(sk->sk_rcv_saddr, &tb2->v6_rcv_saddr);
+ }
+#else
+ tb2->rcv_saddr = sk->sk_rcv_saddr;
#endif
- tb->rcv_saddr = sk->sk_rcv_saddr;
- INIT_HLIST_HEAD(&tb->owners);
- INIT_HLIST_HEAD(&tb->deathrow);
- hlist_add_head(&tb->node, &head->chain);
+ INIT_HLIST_HEAD(&tb2->owners);
+ hlist_add_head(&tb2->node, &head->chain);
+ hlist_add_head(&tb2->bhash_node, &tb->bhash2);
}
struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep,
struct net *net,
struct inet_bind_hashbucket *head,
- unsigned short port,
- int l3mdev,
+ struct inet_bind_bucket *tb,
const struct sock *sk)
{
- struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
+ struct inet_bind2_bucket *tb2 = kmem_cache_alloc(cachep, GFP_ATOMIC);
- if (tb)
- inet_bind2_bucket_init(tb, net, head, port, l3mdev, sk);
+ if (tb2)
+ inet_bind2_bucket_init(tb2, net, head, tb, sk);
- return tb;
+ return tb2;
}
/* Caller must hold hashbucket lock for this tb with local BH disabled */
void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb)
{
- if (hlist_empty(&tb->owners) && hlist_empty(&tb->deathrow)) {
+ if (hlist_empty(&tb->owners)) {
__hlist_del(&tb->node);
+ __hlist_del(&tb->bhash_node);
kmem_cache_free(cachep, tb);
}
}
@@ -149,18 +154,11 @@ static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2,
const struct sock *sk)
{
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family != tb2->family) {
- if (sk->sk_family == AF_INET)
- return ipv6_addr_v4mapped(&tb2->v6_rcv_saddr) &&
- tb2->v6_rcv_saddr.s6_addr32[3] == sk->sk_rcv_saddr;
-
- return ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr) &&
- sk->sk_v6_rcv_saddr.s6_addr32[3] == tb2->rcv_saddr;
- }
-
if (sk->sk_family == AF_INET6)
- return ipv6_addr_equal(&tb2->v6_rcv_saddr,
- &sk->sk_v6_rcv_saddr);
+ return ipv6_addr_equal(&tb2->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
+
+ if (tb2->addr_type != IPV6_ADDR_MAPPED)
+ return false;
#endif
return tb2->rcv_saddr == sk->sk_rcv_saddr;
}
@@ -169,10 +167,9 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
struct inet_bind2_bucket *tb2, unsigned short port)
{
inet_sk(sk)->inet_num = port;
- sk_add_bind_node(sk, &tb->owners);
inet_csk(sk)->icsk_bind_hash = tb;
- sk_add_bind2_node(sk, &tb2->owners);
inet_csk(sk)->icsk_bind2_hash = tb2;
+ sk_add_bind_node(sk, &tb2->owners);
}
/*
@@ -192,21 +189,20 @@ static void __inet_put_port(struct sock *sk)
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
- __sk_del_bind_node(sk);
inet_csk(sk)->icsk_bind_hash = NULL;
inet_sk(sk)->inet_num = 0;
- inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
spin_lock(&head2->lock);
if (inet_csk(sk)->icsk_bind2_hash) {
struct inet_bind2_bucket *tb2 = inet_csk(sk)->icsk_bind2_hash;
- __sk_del_bind2_node(sk);
+ __sk_del_bind_node(sk);
inet_csk(sk)->icsk_bind2_hash = NULL;
inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
}
spin_unlock(&head2->lock);
+ inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
spin_unlock(&head->lock);
}
@@ -275,8 +271,7 @@ bhash2_find:
tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, child);
if (!tb2) {
tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep,
- net, head2, port,
- l3mdev, child);
+ net, head2, tb, child);
if (!tb2)
goto error;
}
@@ -836,16 +831,15 @@ bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const
return false;
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family != tb->family) {
- if (sk->sk_family == AF_INET)
- return ipv6_addr_any(&tb->v6_rcv_saddr) ||
- ipv6_addr_v4mapped_any(&tb->v6_rcv_saddr);
+ if (tb->addr_type == IPV6_ADDR_ANY)
+ return true;
+ if (tb->addr_type != IPV6_ADDR_MAPPED)
return false;
- }
- if (sk->sk_family == AF_INET6)
- return ipv6_addr_any(&tb->v6_rcv_saddr);
+ if (sk->sk_family == AF_INET6 &&
+ !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
+ return false;
#endif
return tb->rcv_saddr == 0;
}
@@ -942,7 +936,7 @@ static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family,
spin_lock_bh(&head->lock);
spin_lock(&head2->lock);
- __sk_del_bind2_node(sk);
+ __sk_del_bind_node(sk);
inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, inet_csk(sk)->icsk_bind2_hash);
spin_unlock(&head2->lock);
@@ -957,10 +951,10 @@ static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family,
tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
if (!tb2) {
tb2 = new_tb2;
- inet_bind2_bucket_init(tb2, net, head2, port, l3mdev, sk);
+ inet_bind2_bucket_init(tb2, net, head2, inet_csk(sk)->icsk_bind_hash, sk);
}
- sk_add_bind2_node(sk, &tb2->owners);
inet_csk(sk)->icsk_bind2_hash = tb2;
+ sk_add_bind_node(sk, &tb2->owners);
spin_unlock(&head2->lock);
spin_unlock_bh(&head->lock);
@@ -1012,7 +1006,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
bool tb_created = false;
u32 remaining, offset;
int ret, i, low, high;
- int l3mdev;
+ bool local_ports;
+ int step, l3mdev;
u32 index;
if (port) {
@@ -1024,10 +1019,12 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
l3mdev = inet_sk_bound_l3mdev(sk);
- inet_sk_get_local_port_range(sk, &low, &high);
+ local_ports = inet_sk_get_local_port_range(sk, &low, &high);
+ step = local_ports ? 1 : 2;
+
high++; /* [32768, 60999] -> [32768, 61000[ */
remaining = high - low;
- if (likely(remaining > 1))
+ if (!local_ports && remaining > 1)
remaining &= ~1U;
get_random_sleepable_once(table_perturb,
@@ -1040,10 +1037,11 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
/* In first pass we try ports of @low parity.
* inet_csk_get_port() does the opposite choice.
*/
- offset &= ~1U;
+ if (!local_ports)
+ offset &= ~1U;
other_parity_scan:
port = low + offset;
- for (i = 0; i < remaining; i += 2, port += 2) {
+ for (i = 0; i < remaining; i += step, port += step) {
if (unlikely(port >= high))
port -= remaining;
if (inet_is_local_reserved_port(net, port))
@@ -1060,7 +1058,7 @@ other_parity_scan:
if (tb->fastreuse >= 0 ||
tb->fastreuseport >= 0)
goto next_port;
- WARN_ON(hlist_empty(&tb->owners));
+ WARN_ON(hlist_empty(&tb->bhash2));
if (!check_established(death_row, sk,
port, &tw))
goto ok;
@@ -1083,10 +1081,11 @@ next_port:
cond_resched();
}
- offset++;
- if ((offset & 1) && remaining > 1)
- goto other_parity_scan;
-
+ if (!local_ports) {
+ offset++;
+ if ((offset & 1) && remaining > 1)
+ goto other_parity_scan;
+ }
return -EADDRNOTAVAIL;
ok:
@@ -1099,7 +1098,7 @@ ok:
tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk);
if (!tb2) {
tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net,
- head2, port, l3mdev, sk);
+ head2, tb, sk);
if (!tb2)
goto error;
}
@@ -1109,8 +1108,8 @@ ok:
* on low contention the randomness is maximal and on high contention
* it may be inexistent.
*/
- i = max_t(int, i, get_random_u32_below(8) * 2);
- WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
+ i = max_t(int, i, get_random_u32_below(8) * step);
+ WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + step);
/* Head lock still held and bh's disabled */
inet_bind_hash(sk, tb, tb2, port);
@@ -1131,10 +1130,33 @@ ok:
return 0;
error:
+ if (sk_hashed(sk)) {
+ spinlock_t *lock = inet_ehash_lockp(hinfo, sk->sk_hash);
+
+ sock_prot_inuse_add(net, sk->sk_prot, -1);
+
+ spin_lock(lock);
+ sk_nulls_del_node_init_rcu(sk);
+ spin_unlock(lock);
+
+ sk->sk_hash = 0;
+ inet_sk(sk)->inet_sport = 0;
+ inet_sk(sk)->inet_num = 0;
+
+ if (tw)
+ inet_twsk_bind_unhash(tw, hinfo);
+ }
+
spin_unlock(&head2->lock);
if (tb_created)
inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
- spin_unlock_bh(&head->lock);
+ spin_unlock(&head->lock);
+
+ if (tw)
+ inet_twsk_deschedule_put(tw);
+
+ local_bh_enable();
+
return -ENOMEM;
}
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index dd37a5bf6881..5befa4de5b24 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -35,13 +35,11 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
if (!tb)
return;
- __hlist_del(&tw->tw_bind_node);
+ __sk_del_bind_node((struct sock *)tw);
tw->tw_tb = NULL;
- inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
-
- __hlist_del(&tw->tw_bind2_node);
tw->tw_tb2 = NULL;
inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
+ inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
__sock_put((struct sock *)tw);
}
@@ -94,18 +92,6 @@ static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
hlist_nulls_add_head_rcu(&tw->tw_node, list);
}
-static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
- struct hlist_head *list)
-{
- hlist_add_head(&tw->tw_bind_node, list);
-}
-
-static void inet_twsk_add_bind2_node(struct inet_timewait_sock *tw,
- struct hlist_head *list)
-{
- hlist_add_head(&tw->tw_bind2_node, list);
-}
-
/*
* Enter the time wait state. This is called with locally disabled BH.
* Essentially we whip up a timewait bucket, copy the relevant info into it
@@ -133,11 +119,10 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
tw->tw_tb = icsk->icsk_bind_hash;
WARN_ON(!icsk->icsk_bind_hash);
- inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
tw->tw_tb2 = icsk->icsk_bind2_hash;
WARN_ON(!icsk->icsk_bind2_hash);
- inet_twsk_add_bind2_node(tw, &tw->tw_tb2->deathrow);
+ sk_add_bind_node((struct sock *)tw, &tw->tw_tb2->owners);
spin_unlock(&bhead2->lock);
spin_unlock(&bhead->lock);
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 5169c3c72cff..6b9cf5a24c19 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1793,6 +1793,7 @@ static void __exit ipgre_fini(void)
module_init(ipgre_init);
module_exit(ipgre_fini);
+MODULE_DESCRIPTION("IPv4 GRE tunnels over IP library");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("gre");
MODULE_ALIAS_RTNL_LINK("gretap");
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b06f678b03a1..67d846622365 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -972,8 +972,8 @@ static int __ip_append_data(struct sock *sk,
unsigned int maxfraglen, fragheaderlen, maxnonfragsize;
int csummode = CHECKSUM_NONE;
struct rtable *rt = (struct rtable *)cork->dst;
+ bool paged, hold_tskey, extra_uref = false;
unsigned int wmem_alloc_delta = 0;
- bool paged, extra_uref = false;
u32 tskey = 0;
skb = skb_peek_tail(queue);
@@ -982,10 +982,6 @@ static int __ip_append_data(struct sock *sk,
mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
paged = !!cork->gso_size;
- if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
- READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
- tskey = atomic_inc_return(&sk->sk_tskey) - 1;
-
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
@@ -1052,6 +1048,11 @@ static int __ip_append_data(struct sock *sk,
cork->length += length;
+ hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
+ if (hold_tskey)
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+
/* So, what's going on in the loop below?
*
* We use calculated fragment length to generate chained skb,
@@ -1274,6 +1275,8 @@ error:
cork->length -= length;
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
+ if (hold_tskey)
+ atomic_dec(&sk->sk_tskey);
return err;
}
@@ -1287,6 +1290,12 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
if (unlikely(!rt))
return -EFAULT;
+ cork->fragsize = ip_sk_use_pmtu(sk) ?
+ dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu);
+
+ if (!inetdev_valid_mtu(cork->fragsize))
+ return -ENETUNREACH;
+
/*
* setup for corking.
*/
@@ -1303,12 +1312,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
cork->addr = ipc->addr;
}
- cork->fragsize = ip_sk_use_pmtu(sk) ?
- dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu);
-
- if (!inetdev_valid_mtu(cork->fragsize))
- return -ENETUNREACH;
-
cork->gso_size = ipc->gso_size;
cork->dst = &rt->dst;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 2efc53526a38..21d2ffa919e9 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -47,8 +47,6 @@
#include <linux/errqueue.h>
#include <linux/uaccess.h>
-#include <linux/bpfilter.h>
-
/*
* SOL_IP control messages.
*/
@@ -775,7 +773,7 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL;
- if (optlen > READ_ONCE(sysctl_optmem_max))
+ if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen);
@@ -811,7 +809,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < size0)
return -EINVAL;
- if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
+ if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max) - 4)
return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL);
@@ -1055,6 +1053,19 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
case IP_TOS: /* This sets both TOS and Precedence */
ip_sock_set_tos(sk, val);
return 0;
+ case IP_LOCAL_PORT_RANGE:
+ {
+ u16 lo = val;
+ u16 hi = val >> 16;
+
+ if (optlen != sizeof(u32))
+ return -EINVAL;
+ if (lo != 0 && hi != 0 && lo > hi)
+ return -EINVAL;
+
+ WRITE_ONCE(inet->local_port_range, val);
+ return 0;
+ }
}
err = 0;
@@ -1241,7 +1252,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
if (optlen < IP_MSFILTER_SIZE(0))
goto e_inval;
- if (optlen > READ_ONCE(sysctl_optmem_max)) {
+ if (optlen > READ_ONCE(net->core.sysctl_optmem_max)) {
err = -ENOBUFS;
break;
}
@@ -1332,20 +1343,6 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
err = xfrm_user_policy(sk, optname, optval, optlen);
break;
- case IP_LOCAL_PORT_RANGE:
- {
- const __u16 lo = val;
- const __u16 hi = val >> 16;
-
- if (optlen != sizeof(__u32))
- goto e_inval;
- if (lo != 0 && hi != 0 && lo > hi)
- goto e_inval;
-
- inet->local_port_range.lo = lo;
- inet->local_port_range.hi = hi;
- break;
- }
default:
err = -ENOPROTOOPT;
break;
@@ -1366,12 +1363,13 @@ e_inval:
* ipv4_pktinfo_prepare - transfer some info from rtable to skb
* @sk: socket
* @skb: buffer
+ * @drop_dst: if true, drops skb dst
*
* To support IP_CMSG_PKTINFO option, we store rt_iif and specific
* destination in skb->cb[] before dst drop.
* This way, receiver doesn't make cache line misses to read rtable.
*/
-void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
+void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst)
{
struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
bool prepare = inet_test_bit(PKTINFO, sk) ||
@@ -1400,7 +1398,8 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
pktinfo->ipi_ifindex = 0;
pktinfo->ipi_spec_dst.s_addr = 0;
}
- skb_dst_drop(skb);
+ if (drop_dst)
+ skb_dst_drop(skb);
}
int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
@@ -1412,11 +1411,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
return -ENOPROTOOPT;
err = do_ip_setsockopt(sk, level, optname, optval, optlen);
-#if IS_ENABLED(CONFIG_BPFILTER_UMH)
- if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
- optname < BPFILTER_IPT_SET_MAX)
- err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
-#endif
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
@@ -1692,6 +1686,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
return -EFAULT;
return 0;
}
+ case IP_LOCAL_PORT_RANGE:
+ val = READ_ONCE(inet->local_port_range);
+ goto copyval;
}
if (needs_rtnl)
@@ -1721,9 +1718,6 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
else
err = ip_get_mcast_msfilter(sk, optval, optlen, len);
goto out;
- case IP_LOCAL_PORT_RANGE:
- val = inet->local_port_range.hi << 16 | inet->local_port_range.lo;
- break;
case IP_PROTOCOL:
val = inet_sk(sk)->inet_num;
break;
@@ -1764,11 +1758,6 @@ int ip_getsockopt(struct sock *sk, int level,
err = do_ip_getsockopt(sk, level, optname,
USER_SOCKPTR(optval), USER_SOCKPTR(optlen));
-#if IS_ENABLED(CONFIG_BPFILTER_UMH)
- if (optname >= BPFILTER_IPT_SO_GET_INFO &&
- optname < BPFILTER_IPT_GET_MAX)
- err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
-#endif
#ifdef CONFIG_NETFILTER
/* we need to exclude all possible ENOPROTOOPTs except default case */
if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index beeae624c412..1b6981de3f29 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -554,6 +554,20 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
return 0;
}
+static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
+{
+ /* we must cap headroom to some upperlimit, else pskb_expand_head
+ * will overflow header offsets in skb_headers_offset_update().
+ */
+ static const unsigned int max_allowed = 512;
+
+ if (headroom > max_allowed)
+ headroom = max_allowed;
+
+ if (headroom > READ_ONCE(dev->needed_headroom))
+ WRITE_ONCE(dev->needed_headroom, headroom);
+}
+
void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
u8 proto, int tunnel_hlen)
{
@@ -632,13 +646,13 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
- if (headroom > READ_ONCE(dev->needed_headroom))
- WRITE_ONCE(dev->needed_headroom, headroom);
-
- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ if (skb_cow_head(skb, headroom)) {
ip_rt_put(rt);
goto tx_dropped;
}
+
+ ip_tunnel_adj_headroom(dev, headroom);
+
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
df, !net_eq(tunnel->net, dev_net(dev)));
return;
@@ -818,16 +832,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
- if (max_headroom > READ_ONCE(dev->needed_headroom))
- WRITE_ONCE(dev->needed_headroom, max_headroom);
- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ if (skb_cow_head(skb, max_headroom)) {
ip_rt_put(rt);
DEV_STATS_INC(dev, tx_dropped);
kfree_skb(skb);
return;
}
+ ip_tunnel_adj_headroom(dev, max_headroom);
+
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
df, !net_eq(tunnel->net, dev_net(dev)));
return;
@@ -1298,4 +1312,5 @@ void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
}
EXPORT_SYMBOL_GPL(ip_tunnel_setup);
+MODULE_DESCRIPTION("IPv4 tunnel implementation library");
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 586b1b3e35b8..80ccd6661aa3 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -332,7 +332,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu)
};
skb_reset_network_header(skb);
- csum = csum_partial(icmp6h, len, 0);
+ csum = skb_checksum(skb, skb_transport_offset(skb), len, 0);
icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len,
IPPROTO_ICMPV6, csum);
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 9ab9b3ebe0cd..d1d6bb28ed6e 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -721,6 +721,7 @@ static void __exit vti_fini(void)
module_init(vti_init);
module_exit(vti_fini);
+MODULE_DESCRIPTION("Virtual (secure) IP tunneling library");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("vti");
MODULE_ALIAS_NETDEV("ip_vti0");
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 27b8f83c6ea2..03afa3871efc 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -658,6 +658,7 @@ static void __exit ipip_fini(void)
module_init(ipip_init);
module_exit(ipip_fini);
+MODULE_DESCRIPTION("IP/IP protocol decoder library");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("ipip");
MODULE_ALIAS_NETDEV("tunl0");
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9e222a57bc2b..362229836510 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -253,7 +253,7 @@ static int __net_init ipmr_rules_init(struct net *net)
goto err1;
}
- err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
+ err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT);
if (err < 0)
goto err2;
@@ -1025,6 +1025,10 @@ static int ipmr_cache_report(const struct mr_table *mrt,
struct sk_buff *skb;
int ret;
+ mroute_sk = rcu_dereference(mrt->mroute_sk);
+ if (!mroute_sk)
+ return -EINVAL;
+
if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE)
skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
else
@@ -1069,7 +1073,8 @@ static int ipmr_cache_report(const struct mr_table *mrt,
msg = (struct igmpmsg *)skb_network_header(skb);
msg->im_vif = vifi;
msg->im_vif_hi = vifi >> 8;
- skb_dst_set(skb, dst_clone(skb_dst(pkt)));
+ ipv4_pktinfo_prepare(mroute_sk, pkt, false);
+ memcpy(skb->cb, pkt->cb, sizeof(skb->cb));
/* Add our header */
igmp = skb_put(skb, sizeof(struct igmphdr));
igmp->type = assert;
@@ -1079,12 +1084,6 @@ static int ipmr_cache_report(const struct mr_table *mrt,
skb->transport_header = skb->network_header;
}
- mroute_sk = rcu_dereference(mrt->mroute_sk);
- if (!mroute_sk) {
- kfree_skb(skb);
- return -EINVAL;
- }
-
igmpmsg_netlink_event(mrt, skb);
/* Deliver to mrouted */
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index f01b038fc1cd..04504b2b51df 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -239,7 +239,6 @@ static int nf_reject_fill_skb_dst(struct sk_buff *skb_in)
void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb,
int hook)
{
- struct net_device *br_indev __maybe_unused;
struct sk_buff *nskb;
struct iphdr *niph;
const struct tcphdr *oth;
@@ -289,9 +288,13 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb,
* build the eth header using the original destination's MAC as the
* source, and send the RST packet directly.
*/
- br_indev = nf_bridge_get_physindev(oldskb);
- if (br_indev) {
+ if (nf_bridge_info_exists(oldskb)) {
struct ethhdr *oeth = eth_hdr(oldskb);
+ struct net_device *br_indev;
+
+ br_indev = nf_bridge_get_physindev(oldskb, net);
+ if (!br_indev)
+ goto free_nskb;
nskb->dev = br_indev;
niph->tot_len = htons(nskb->len);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 27da9d7294c0..aea89326c697 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -292,7 +292,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
/* Charge it to the socket. */
- ipv4_pktinfo_prepare(sk, skb);
+ ipv4_pktinfo_prepare(sk, skb, true);
if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
kfree_skb_reason(skb, reason);
return NET_RX_DROP;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index d37282c06e3d..61f1c96cfe63 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -189,12 +189,14 @@ __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mssp)
* Check if a ack sequence number is a valid syncookie.
* Return the decoded mss if it is, or 0 if not.
*/
-int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
- u32 cookie)
+int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th)
{
+ __u32 cookie = ntohl(th->ack_seq) - 1;
__u32 seq = ntohl(th->seq) - 1;
- __u32 mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
- th->source, th->dest, seq);
+ __u32 mssind;
+
+ mssind = check_tcp_syn_cookie(cookie, iph->saddr, iph->daddr,
+ th->source, th->dest, seq);
return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
}
@@ -202,7 +204,7 @@ EXPORT_SYMBOL_GPL(__cookie_v4_check);
struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
- struct dst_entry *dst, u32 tsoff)
+ struct dst_entry *dst)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct sock *child;
@@ -212,7 +214,6 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
NULL, &own_req);
if (child) {
refcount_set(&req->rsk_refcnt, 1);
- tcp_sk(child)->tsoffset = tsoff;
sock_rps_save_rxhash(child, skb);
if (rsk_drop_req(req)) {
@@ -269,26 +270,46 @@ bool cookie_timestamp_decode(const struct net *net,
}
EXPORT_SYMBOL(cookie_timestamp_decode);
-bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
- const struct net *net, const struct dst_entry *dst)
+static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req)
{
- bool ecn_ok = tcp_opt->rcv_tsecr & TS_OPT_ECN;
+ struct inet_request_sock *ireq = inet_rsk(req);
+ struct tcp_request_sock *treq = tcp_rsk(req);
+ const struct tcphdr *th = tcp_hdr(skb);
- if (!ecn_ok)
- return false;
+ req->num_retrans = 0;
- if (READ_ONCE(net->ipv4.sysctl_tcp_ecn))
- return true;
+ ireq->ir_num = ntohs(th->dest);
+ ireq->ir_rmt_port = th->source;
+ ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
+ ireq->ir_mark = inet_request_mark(sk, skb);
+
+ if (IS_ENABLED(CONFIG_SMC))
+ ireq->smc_ok = 0;
- return dst_feature(dst, RTAX_FEATURE_ECN);
+ treq->snt_synack = 0;
+ treq->tfo_listener = false;
+ treq->txhash = net_tx_rndhash();
+ treq->rcv_isn = ntohl(th->seq) - 1;
+ treq->snt_isn = ntohl(th->ack_seq) - 1;
+ treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
+ treq->req_usec_ts = false;
+
+#if IS_ENABLED(CONFIG_MPTCP)
+ treq->is_mptcp = sk_is_mptcp(sk);
+ if (treq->is_mptcp)
+ return mptcp_subflow_init_cookie_req(req, sk, skb);
+#endif
+
+ return 0;
}
-EXPORT_SYMBOL(cookie_ecn_ok);
struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
- const struct tcp_request_sock_ops *af_ops,
- struct sock *sk,
- struct sk_buff *skb)
+ struct sock *sk, struct sk_buff *skb,
+ struct tcp_options_received *tcp_opt,
+ int mss, u32 tsoff)
{
+ struct inet_request_sock *ireq;
struct tcp_request_sock *treq;
struct request_sock *req;
@@ -300,126 +321,109 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
if (!req)
return NULL;
- treq = tcp_rsk(req);
+ if (cookie_tcp_reqsk_init(sk, skb, req)) {
+ reqsk_free(req);
+ return NULL;
+ }
- /* treq->af_specific might be used to perform TCP_MD5 lookup */
- treq->af_specific = af_ops;
+ ireq = inet_rsk(req);
+ treq = tcp_rsk(req);
- treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
- treq->req_usec_ts = false;
+ req->mss = mss;
+ req->ts_recent = tcp_opt->saw_tstamp ? tcp_opt->rcv_tsval : 0;
-#if IS_ENABLED(CONFIG_MPTCP)
- treq->is_mptcp = sk_is_mptcp(sk);
- if (treq->is_mptcp) {
- int err = mptcp_subflow_init_cookie_req(req, sk, skb);
+ ireq->snd_wscale = tcp_opt->snd_wscale;
+ ireq->tstamp_ok = tcp_opt->saw_tstamp;
+ ireq->sack_ok = tcp_opt->sack_ok;
+ ireq->wscale_ok = tcp_opt->wscale_ok;
+ ireq->ecn_ok = !!(tcp_opt->rcv_tsecr & TS_OPT_ECN);
- if (err) {
- reqsk_free(req);
- return NULL;
- }
- }
-#endif
+ treq->ts_off = tsoff;
return req;
}
EXPORT_SYMBOL_GPL(cookie_tcp_reqsk_alloc);
-/* On input, sk is a listener.
- * Output is listener if incoming packet would not create a child
- * NULL if memory could not be allocated.
- */
-struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
+static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
- struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
struct tcp_options_received tcp_opt;
- struct inet_request_sock *ireq;
- struct tcp_request_sock *treq;
- struct tcp_sock *tp = tcp_sk(sk);
- const struct tcphdr *th = tcp_hdr(skb);
- __u32 cookie = ntohl(th->ack_seq) - 1;
- struct sock *ret = sk;
- struct request_sock *req;
- int full_space, mss;
- struct rtable *rt;
- __u8 rcv_wscale;
- struct flowi4 fl4;
u32 tsoff = 0;
- int l3index;
-
- if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
- !th->ack || th->rst)
- goto out;
+ int mss;
if (tcp_synq_no_recent_overflow(sk))
goto out;
- mss = __cookie_v4_check(ip_hdr(skb), th, cookie);
- if (mss == 0) {
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
+ mss = __cookie_v4_check(ip_hdr(skb), tcp_hdr(skb));
+ if (!mss) {
+ __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
+ __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV);
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+ tcp_parse_options(net, skb, &tcp_opt, 0, NULL);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
- tsoff = secure_tcp_ts_off(sock_net(sk),
+ tsoff = secure_tcp_ts_off(net,
ip_hdr(skb)->daddr,
ip_hdr(skb)->saddr);
tcp_opt.rcv_tsecr -= tsoff;
}
- if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt))
+ if (!cookie_timestamp_decode(net, &tcp_opt))
goto out;
- ret = NULL;
- req = cookie_tcp_reqsk_alloc(&tcp_request_sock_ops,
- &tcp_request_sock_ipv4_ops, sk, skb);
- if (!req)
+ return cookie_tcp_reqsk_alloc(&tcp_request_sock_ops, sk, skb,
+ &tcp_opt, mss, tsoff);
+out:
+ return ERR_PTR(-EINVAL);
+}
+
+/* On input, sk is a listener.
+ * Output is listener if incoming packet would not create a child
+ * NULL if memory could not be allocated.
+ */
+struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
+{
+ struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
+ const struct tcphdr *th = tcp_hdr(skb);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_request_sock *ireq;
+ struct net *net = sock_net(sk);
+ struct request_sock *req;
+ struct sock *ret = sk;
+ struct flowi4 fl4;
+ struct rtable *rt;
+ __u8 rcv_wscale;
+ int full_space;
+
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
goto out;
+ req = cookie_tcp_check(net, sk, skb);
+ if (IS_ERR(req))
+ goto out;
+ if (!req)
+ goto out_drop;
+
ireq = inet_rsk(req);
- treq = tcp_rsk(req);
- treq->rcv_isn = ntohl(th->seq) - 1;
- treq->snt_isn = cookie;
- treq->ts_off = 0;
- treq->txhash = net_tx_rndhash();
- req->mss = mss;
- ireq->ir_num = ntohs(th->dest);
- ireq->ir_rmt_port = th->source;
+
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
- ireq->ir_mark = inet_request_mark(sk, skb);
- ireq->snd_wscale = tcp_opt.snd_wscale;
- ireq->sack_ok = tcp_opt.sack_ok;
- ireq->wscale_ok = tcp_opt.wscale_ok;
- ireq->tstamp_ok = tcp_opt.saw_tstamp;
- req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
- treq->snt_synack = 0;
- treq->tfo_listener = false;
-
- if (IS_ENABLED(CONFIG_SMC))
- ireq->smc_ok = 0;
-
- ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
-
- l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
- tcp_ao_syncookie(sk, skb, treq, AF_INET, l3index);
/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8)
*/
- RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(sock_net(sk), skb));
+ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb));
- if (security_inet_conn_request(sk, skb, req)) {
- reqsk_free(req);
- goto out;
- }
+ if (security_inet_conn_request(sk, skb, req))
+ goto out_free;
- req->num_retrans = 0;
+ tcp_ao_syncookie(sk, skb, req, AF_INET);
/*
* We need to lookup the route here to get at the correct
@@ -433,11 +437,9 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
opt->srr ? opt->faddr : ireq->ir_rmt_addr,
ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid);
security_req_classify_flow(req, flowi4_to_flowi_common(&fl4));
- rt = ip_route_output_key(sock_net(sk), &fl4);
- if (IS_ERR(rt)) {
- reqsk_free(req);
- goto out;
- }
+ rt = ip_route_output_key(net, &fl4);
+ if (IS_ERR(rt))
+ goto out_free;
/* Try to redo what tcp_v4_send_synack did. */
req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
@@ -453,13 +455,18 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
dst_metric(&rt->dst, RTAX_INITRWND));
ireq->rcv_wscale = rcv_wscale;
- ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst);
+ ireq->ecn_ok &= cookie_ecn_ok(net, &rt->dst);
- ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst, tsoff);
+ ret = tcp_get_cookie_sock(sk, skb, req, &rt->dst);
/* ip_queue_xmit() depends on our flow being setup
* Normal sockets get it right from inet_csk_route_child_sock()
*/
if (ret)
inet_sk(ret)->cork.fl.u.ip4 = fl4;
-out: return ret;
+out:
+ return ret;
+out_free:
+ reqsk_free(req);
+out_drop:
+ return NULL;
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index f63a545a7374..7e4f16a7dcc1 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -50,26 +50,22 @@ static int tcp_plb_max_cong_thresh = 256;
static int sysctl_tcp_low_latency __read_mostly;
/* Update system visible IP port range */
-static void set_local_port_range(struct net *net, int range[2])
+static void set_local_port_range(struct net *net, unsigned int low, unsigned int high)
{
- bool same_parity = !((range[0] ^ range[1]) & 1);
+ bool same_parity = !((low ^ high) & 1);
- write_seqlock_bh(&net->ipv4.ip_local_ports.lock);
if (same_parity && !net->ipv4.ip_local_ports.warned) {
net->ipv4.ip_local_ports.warned = true;
pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
}
- net->ipv4.ip_local_ports.range[0] = range[0];
- net->ipv4.ip_local_ports.range[1] = range[1];
- write_sequnlock_bh(&net->ipv4.ip_local_ports.lock);
+ WRITE_ONCE(net->ipv4.ip_local_ports.range, high << 16 | low);
}
/* Validate changes from /proc interface. */
static int ipv4_local_port_range(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- struct net *net =
- container_of(table->data, struct net, ipv4.ip_local_ports.range);
+ struct net *net = table->data;
int ret;
int range[2];
struct ctl_table tmp = {
@@ -93,7 +89,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
(range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock)))
ret = -EINVAL;
else
- set_local_port_range(net, range);
+ set_local_port_range(net, range[0], range[1]);
}
return ret;
@@ -733,8 +729,8 @@ static struct ctl_table ipv4_net_table[] = {
},
{
.procname = "ip_local_port_range",
- .maxlen = sizeof(init_net.ipv4.ip_local_ports.range),
- .data = &init_net.ipv4.ip_local_ports.range,
+ .maxlen = 0,
+ .data = &init_net,
.mode = 0644,
.proc_handler = ipv4_local_port_range,
},
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ff6838ca2e58..c82dc42f57c6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -722,6 +722,7 @@ void tcp_push(struct sock *sk, int flags, int mss_now,
if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
+ smp_mb__after_atomic();
}
/* It is possible TX completion already happened
* before we set TSQ_THROTTLED.
@@ -1785,7 +1786,17 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb,
static bool can_map_frag(const skb_frag_t *frag)
{
- return skb_frag_size(frag) == PAGE_SIZE && !skb_frag_off(frag);
+ struct page *page;
+
+ if (skb_frag_size(frag) != PAGE_SIZE || skb_frag_off(frag))
+ return false;
+
+ page = skb_frag_page(frag);
+
+ if (PageCompound(page) || page->mapping)
+ return false;
+
+ return true;
}
static int find_next_mappable_frag(const skb_frag_t *frag,
@@ -1849,7 +1860,6 @@ static int receive_fallback_to_copy(struct sock *sk,
{
unsigned long copy_address = (unsigned long)zc->copybuf_address;
struct msghdr msg = {};
- struct iovec iov;
int err;
zc->length = 0;
@@ -1858,8 +1868,8 @@ static int receive_fallback_to_copy(struct sock *sk,
if (copy_address != zc->copybuf_address)
return -EINVAL;
- err = import_single_range(ITER_DEST, (void __user *)copy_address,
- inq, &iov, &msg.msg_iter);
+ err = import_ubuf(ITER_DEST, (void __user *)copy_address, inq,
+ &msg.msg_iter);
if (err)
return err;
@@ -1886,14 +1896,13 @@ static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc,
{
unsigned long copy_address = (unsigned long)zc->copybuf_address;
struct msghdr msg = {};
- struct iovec iov;
int err;
if (copy_address != zc->copybuf_address)
return -EINVAL;
- err = import_single_range(ITER_DEST, (void __user *)copy_address,
- copylen, &iov, &msg.msg_iter);
+ err = import_ubuf(ITER_DEST, (void __user *)copy_address, copylen,
+ &msg.msg_iter);
if (err)
return err;
err = skb_copy_datagram_msg(skb, *offset, &msg, copylen);
@@ -2605,6 +2614,7 @@ void tcp_set_state(struct sock *sk, int state)
BUILD_BUG_ON((int)BPF_TCP_LISTEN != (int)TCP_LISTEN);
BUILD_BUG_ON((int)BPF_TCP_CLOSING != (int)TCP_CLOSING);
BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV);
+ BUILD_BUG_ON((int)BPF_TCP_BOUND_INACTIVE != (int)TCP_BOUND_INACTIVE);
BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES);
/* bpf uapi header bpf.h defines an anonymous enum with values
@@ -4585,6 +4595,98 @@ static void __init tcp_init_mem(void)
sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; /* 9.37 % */
}
+static void __init tcp_struct_check(void)
+{
+ /* TX read-mostly hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, max_window);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, rcv_ssthresh);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, reordering);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, notsent_lowat);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, gso_segs);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, lost_skb_hint);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_tx, retransmit_skb_hint);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_tx, 40);
+
+ /* TXRX read-mostly hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, tsoffset);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, snd_wnd);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, mss_cache);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, snd_cwnd);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, prr_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, lost_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, sacked_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_txrx, scaling_ratio);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_txrx, 32);
+
+ /* RX read-mostly hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, copied_seq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rcv_tstamp);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_wl1);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, tlp_high_seq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rttvar_us);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, retrans_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, advmss);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, urg_data);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, lost);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rtt_min);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, out_of_order_queue);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_ssthresh);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 69);
+
+ /* TX read-write hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, segs_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, data_segs_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, bytes_sent);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, snd_sml);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, chrono_start);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, chrono_stat);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, write_seq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, pushed_seq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, lsndtime);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, mdev_us);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_wstamp_ns);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_clock_cache);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tcp_mstamp);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, rtt_seq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, tsorted_sent_queue);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, highest_sack);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, ecn_flags);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_tx, 113);
+
+ /* TXRX read-write hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, pred_flags);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_nxt);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_nxt);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_una);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, window_clamp);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, srtt_us);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, packets_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, snd_up);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, delivered_ce);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_txrx, 76);
+
+ /* RX read-write hotpath cache lines */
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_received);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, segs_in);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, data_segs_in);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_wup);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, max_packets_out);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, cwnd_usage_seq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_delivered);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rate_interval_us);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_last_tsecr);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, first_tx_mstamp);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, delivered_mstamp);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, bytes_acked);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcv_rtt_est);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, rcvq_space);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_write_rx, 99);
+}
+
void __init tcp_init(void)
{
int max_rshare, max_wshare, cnt;
@@ -4595,6 +4697,8 @@ void __init tcp_init(void)
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
sizeof_field(struct sk_buff, cb));
+ tcp_struct_check();
+
percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE);
diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
index f8308d3f565e..87db432c6bb4 100644
--- a/net/ipv4/tcp_ao.c
+++ b/net/ipv4/tcp_ao.c
@@ -844,18 +844,30 @@ static struct tcp_ao_key *tcp_ao_inbound_lookup(unsigned short int family,
}
void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb,
- struct tcp_request_sock *treq,
- unsigned short int family, int l3index)
+ struct request_sock *req, unsigned short int family)
{
+ struct tcp_request_sock *treq = tcp_rsk(req);
const struct tcphdr *th = tcp_hdr(skb);
const struct tcp_ao_hdr *aoh;
struct tcp_ao_key *key;
+ int l3index;
+
+ /* treq->af_specific is used to perform TCP_AO lookup
+ * in tcp_create_openreq_child().
+ */
+#if IS_ENABLED(CONFIG_IPV6)
+ if (family == AF_INET6)
+ treq->af_specific = &tcp_request_sock_ipv6_ops;
+ else
+#endif
+ treq->af_specific = &tcp_request_sock_ipv4_ops;
treq->used_tcp_ao = false;
if (tcp_parse_auth_options(th, NULL, &aoh) || !aoh)
return;
+ l3index = l3mdev_master_ifindex_by_index(sock_net(sk), inet_rsk(req)->ir_iif);
key = tcp_ao_inbound_lookup(family, sk, skb, -1, aoh->keyid, l3index);
if (!key)
/* Key not found, continue without TCP-AO */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 701cb87043f2..df7b13f0e5e0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -202,23 +202,17 @@ static void bpf_skops_established(struct sock *sk, int bpf_op,
}
#endif
-static void tcp_gro_dev_warn(struct sock *sk, const struct sk_buff *skb,
- unsigned int len)
+static __cold void tcp_gro_dev_warn(const struct sock *sk, const struct sk_buff *skb,
+ unsigned int len)
{
- static bool __once __read_mostly;
+ struct net_device *dev;
- if (!__once) {
- struct net_device *dev;
-
- __once = true;
-
- rcu_read_lock();
- dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
- if (!dev || len >= dev->mtu)
- pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
- dev ? dev->name : "Unknown driver");
- rcu_read_unlock();
- }
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
+ if (!dev || len >= READ_ONCE(dev->mtu))
+ pr_warn("%s: Driver has suspect GRO implementation, TCP performance may be compromised.\n",
+ dev ? dev->name : "Unknown driver");
+ rcu_read_unlock();
}
/* Adapt the MSS value used to make delayed ack decision to the
@@ -250,9 +244,8 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
tcp_sk(sk)->advmss);
/* Account for possibly-removed options */
- if (unlikely(len > icsk->icsk_ack.rcv_mss +
- MAX_TCP_OPTION_SPACE))
- tcp_gro_dev_warn(sk, skb, len);
+ DO_ONCE_LITE_IF(len > icsk->icsk_ack.rcv_mss + MAX_TCP_OPTION_SPACE,
+ tcp_gro_dev_warn, sk, skb, len);
/* If the skb has a len of exactly 1*MSS and has the PSH bit
* set then it is likely the end of an application write. So
* more data may not be arriving soon, and yet the data sender
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 1f9f6c1c196b..d1ad20ce1c8c 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -626,7 +626,6 @@ void tcp_retransmit_timer(struct sock *sk)
* implemented ftp to mars will work nicely. We will have to fix
* the 120 second clamps though!
*/
- icsk->icsk_backoff++;
out_reset_timer:
/* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
@@ -647,11 +646,12 @@ out_reset_timer:
tcp_rto_min(sk),
TCP_RTO_MAX);
} else if (sk->sk_state != TCP_SYN_SENT ||
- icsk->icsk_backoff >
+ tp->total_rto >
READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) {
/* Use normal (exponential) backoff unless linear timeouts are
* activated.
*/
+ icsk->icsk_backoff++;
icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 5048c47c79b2..4c1f836aae38 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -294,4 +294,5 @@ static void __exit tunnel4_fini(void)
module_init(tunnel4_init);
module_exit(tunnel4_fini);
+MODULE_DESCRIPTION("IPv4 XFRM tunnel library");
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 89e5a806b82e..e474b201900f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -805,7 +805,7 @@ void udp_flush_pending_frames(struct sock *sk)
if (up->pending) {
up->len = 0;
- up->pending = 0;
+ WRITE_ONCE(up->pending, 0);
ip_flush_pending_frames(sk);
}
}
@@ -993,7 +993,7 @@ int udp_push_pending_frames(struct sock *sk)
out:
up->len = 0;
- up->pending = 0;
+ WRITE_ONCE(up->pending, 0);
return err;
}
EXPORT_SYMBOL(udp_push_pending_frames);
@@ -1070,7 +1070,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
fl4 = &inet->cork.fl.u.ip4;
- if (up->pending) {
+ if (READ_ONCE(up->pending)) {
/*
* There are pending frames.
* The socket lock must be held while it's corked.
@@ -1269,7 +1269,7 @@ back_from_confirm:
fl4->saddr = saddr;
fl4->fl4_dport = dport;
fl4->fl4_sport = inet->inet_sport;
- up->pending = AF_INET;
+ WRITE_ONCE(up->pending, AF_INET);
do_append_data:
up->len += ulen;
@@ -1281,7 +1281,7 @@ do_append_data:
else if (!corkreq)
err = udp_push_pending_frames(sk);
else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
- up->pending = 0;
+ WRITE_ONCE(up->pending, 0);
release_sock(sk);
out:
@@ -1319,7 +1319,7 @@ void udp_splice_eof(struct socket *sock)
struct sock *sk = sock->sk;
struct udp_sock *up = udp_sk(sk);
- if (!up->pending || udp_test_bit(CORK, sk))
+ if (!READ_ONCE(up->pending) || udp_test_bit(CORK, sk))
return;
lock_sock(sk);
@@ -1589,12 +1589,7 @@ int udp_init_sock(struct sock *sk)
void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
{
- if (unlikely(READ_ONCE(sk->sk_peek_off) >= 0)) {
- bool slow = lock_sock_fast(sk);
-
- sk_peek_offset_bwd(sk, len);
- unlock_sock_fast(sk, slow);
- }
+ sk_peek_offset_bwd(sk, len);
if (!skb_unref(skb))
return;
@@ -2169,7 +2164,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
udp_csum_pull_header(skb);
- ipv4_pktinfo_prepare(sk, skb);
+ ipv4_pktinfo_prepare(sk, skb, true);
return __udp_queue_rcv_skb(sk, skb);
csum_error:
@@ -3137,16 +3132,18 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
struct bpf_udp_iter_state *iter = seq->private;
struct udp_iter_state *state = &iter->state;
struct net *net = seq_file_net(seq);
+ int resume_bucket, resume_offset;
struct udp_table *udptable;
unsigned int batch_sks = 0;
bool resized = false;
struct sock *sk;
+ resume_bucket = state->bucket;
+ resume_offset = iter->offset;
+
/* The current batch is done, so advance the bucket. */
- if (iter->st_bucket_done) {
+ if (iter->st_bucket_done)
state->bucket++;
- iter->offset = 0;
- }
udptable = udp_get_table_seq(seq, net);
@@ -3166,19 +3163,19 @@ again:
for (; state->bucket <= udptable->mask; state->bucket++) {
struct udp_hslot *hslot2 = &udptable->hash2[state->bucket];
- if (hlist_empty(&hslot2->head)) {
- iter->offset = 0;
+ if (hlist_empty(&hslot2->head))
continue;
- }
+ iter->offset = 0;
spin_lock_bh(&hslot2->lock);
udp_portaddr_for_each_entry(sk, &hslot2->head) {
if (seq_sk_match(seq, sk)) {
/* Resume from the last iterated socket at the
* offset in the bucket before iterator was stopped.
*/
- if (iter->offset) {
- --iter->offset;
+ if (state->bucket == resume_bucket &&
+ iter->offset < resume_offset) {
+ ++iter->offset;
continue;
}
if (iter->end_sk < iter->max_sk) {
@@ -3192,9 +3189,6 @@ again:
if (iter->end_sk)
break;
-
- /* Reset the current bucket's offset before moving to the next bucket. */
- iter->offset = 0;
}
/* All done: no batch made. */
@@ -3213,7 +3207,6 @@ again:
/* After allocating a larger batch, retry one more time to grab
* the whole bucket.
*/
- state->bucket--;
goto again;
}
done:
diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c
index a87defb2b167..860aff5f8599 100644
--- a/net/ipv4/udp_tunnel_core.c
+++ b/net/ipv4/udp_tunnel_core.c
@@ -253,4 +253,5 @@ struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(udp_tunnel_dst_lookup);
+MODULE_DESCRIPTION("IPv4 Foo over UDP tunnel driver");
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 8489fa106583..8cb266af1393 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -114,5 +114,6 @@ static void __exit ipip_fini(void)
module_init(ipip_init);
module_exit(ipip_fini);
+MODULE_DESCRIPTION("IPv4 XFRM tunnel driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_IPIP);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 733ace18806c..055230b669cf 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -708,6 +708,22 @@ errout:
return err;
}
+/* Combine dev_addr_genid and dev_base_seq to detect changes.
+ */
+static u32 inet6_base_seq(const struct net *net)
+{
+ u32 res = atomic_read(&net->ipv6.dev_addr_genid) +
+ net->dev_base_seq;
+
+ /* Must not return 0 (see nl_dump_check_consistent()).
+ * Chose a value far away from 0.
+ */
+ if (!res)
+ res = 0x80000000;
+ return res;
+}
+
+
static int inet6_netconf_dump_devconf(struct sk_buff *skb,
struct netlink_callback *cb)
{
@@ -741,8 +757,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
idx = 0;
head = &net->dev_index_head[h];
rcu_read_lock();
- cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^
- net->dev_base_seq;
+ cb->seq = inet6_base_seq(net);
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
@@ -5362,7 +5377,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
}
rcu_read_lock();
- cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq;
+ cb->seq = inet6_base_seq(tgt_net);
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &tgt_net->dev_index_head[h];
@@ -5494,9 +5509,10 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
}
addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
- if (!addr)
- return -EINVAL;
-
+ if (!addr) {
+ err = -EINVAL;
+ goto errout;
+ }
ifm = nlmsg_data(nlh);
if (ifm->ifa_index)
dev = dev_get_by_index(tgt_net, ifm->ifa_index);
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 507a8353a6bd..c008d21925d7 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -220,19 +220,26 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
EXPORT_SYMBOL_GPL(ipv6_stub);
/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
-const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
+const struct in6_addr in6addr_loopback __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_LOOPBACK_INIT;
EXPORT_SYMBOL(in6addr_loopback);
-const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
+const struct in6_addr in6addr_any __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_ANY_INIT;
EXPORT_SYMBOL(in6addr_any);
-const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
+const struct in6_addr in6addr_linklocal_allnodes __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
EXPORT_SYMBOL(in6addr_linklocal_allnodes);
-const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_linklocal_allrouters __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
EXPORT_SYMBOL(in6addr_linklocal_allrouters);
-const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
+const struct in6_addr in6addr_interfacelocal_allnodes __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
EXPORT_SYMBOL(in6addr_interfacelocal_allnodes);
-const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_interfacelocal_allrouters __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
EXPORT_SYMBOL(in6addr_interfacelocal_allrouters);
-const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_sitelocal_allrouters __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
EXPORT_SYMBOL(in6addr_sitelocal_allrouters);
static void snmp6_free_dev(struct inet6_dev *idev)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 13a1833a4df5..959bfd9f6344 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -199,6 +199,9 @@ lookup_protocol:
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = SK_CAN_REUSE;
+ if (INET_PROTOSW_ICSK & answer_flags)
+ inet_init_csk_locks(sk);
+
inet = inet_sk(sk);
inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 2016e90e6e1d..eb474f0987ae 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -800,5 +800,6 @@ static void __exit ah6_fini(void)
module_init(ah6_init);
module_exit(ah6_fini);
+MODULE_DESCRIPTION("IPv6 AH transformation helpers");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_AH);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index cc6a502db39d..fff78496803d 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -60,9 +60,9 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6,
if (!oif) {
if (ipv6_addr_is_multicast(&fl6->daddr))
- oif = np->mcast_oif;
+ oif = READ_ONCE(np->mcast_oif);
else
- oif = np->ucast_oif;
+ oif = READ_ONCE(np->ucast_oif);
}
fl6->flowi6_oif = oif;
@@ -229,7 +229,7 @@ ipv4_connected:
}
if (!sk->sk_bound_dev_if && (addr_type & IPV6_ADDR_MULTICAST))
- WRITE_ONCE(sk->sk_bound_dev_if, np->mcast_oif);
+ WRITE_ONCE(sk->sk_bound_dev_if, READ_ONCE(np->mcast_oif));
/* Connect to link-local address requires an interface */
if (!sk->sk_bound_dev_if) {
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 2cc1a45742d8..6e6efe026cdc 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -1301,5 +1301,6 @@ static void __exit esp6_fini(void)
module_init(esp6_init);
module_exit(esp6_fini);
+MODULE_DESCRIPTION("IPv6 ESP transformation helpers");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ESP);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 4952ae792450..02e9ffb63af1 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -177,6 +177,8 @@ static bool ip6_parse_tlv(bool hopbyhop,
case IPV6_TLV_IOAM:
if (!ipv6_hop_ioam(skb, off))
return false;
+
+ nh = skb_network_header(skb);
break;
case IPV6_TLV_JUMBO:
if (!ipv6_hop_jumbo(skb, off))
@@ -943,6 +945,14 @@ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
if (!skb_valid_dst(skb))
ip6_route_input(skb);
+ /* About to mangle packet header */
+ if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len))
+ goto drop;
+
+ /* Trace pointer may have changed */
+ trace = (struct ioam6_trace_hdr *)(skb_network_header(skb)
+ + optoff + sizeof(*hdr));
+
ioam6_fill_trace_data(skb, ns, trace, true);
break;
default:
diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c
index 06750d65d480..4c00398f4dca 100644
--- a/net/ipv6/exthdrs_offload.c
+++ b/net/ipv6/exthdrs_offload.c
@@ -16,6 +16,10 @@ static const struct net_offload dstopt_offload = {
.flags = INET6_PROTO_GSO_EXTHDR,
};
+static const struct net_offload hbh_offload = {
+ .flags = INET6_PROTO_GSO_EXTHDR,
+};
+
int __init ipv6_exthdrs_offload_init(void)
{
int ret;
@@ -28,9 +32,16 @@ int __init ipv6_exthdrs_offload_init(void)
if (ret)
goto out_rt;
+ ret = inet6_add_offload(&hbh_offload, IPPROTO_HOPOPTS);
+ if (ret)
+ goto out_dstopts;
+
out:
return ret;
+out_dstopts:
+ inet6_del_offload(&dstopt_offload, IPPROTO_DSTOPTS);
+
out_rt:
inet6_del_offload(&rthdr_offload, IPPROTO_ROUTING);
goto out;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 7c2003833010..7523c4baef35 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -475,11 +475,11 @@ static int __net_init fib6_rules_net_init(struct net *net)
if (IS_ERR(ops))
return PTR_ERR(ops);
- err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL, 0);
+ err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL);
if (err)
goto out_fib6_rules_ops;
- err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN, 0);
+ err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN);
if (err)
goto out_fib6_rules_ops;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index f62427097126..1635da07285f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -584,9 +584,9 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
tmp_hdr.icmp6_pointer = htonl(info);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
- fl6.flowi6_oif = np->mcast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
else if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->ucast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
ipcm6_init_sk(&ipc6, sk);
ipc6.sockc.mark = mark;
@@ -770,9 +770,9 @@ static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
np = inet6_sk(sk);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
- fl6.flowi6_oif = np->mcast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
else if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->ucast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
if (ip6_dst_lookup(net, sk, &dst, &fl6))
goto out;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index d6314287338d..cca64c7809be 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -37,6 +37,40 @@
INDIRECT_CALL_L4(cb, f2, f1, head, skb); \
})
+static int ipv6_gro_pull_exthdrs(struct sk_buff *skb, int off, int proto)
+{
+ const struct net_offload *ops = NULL;
+ struct ipv6_opt_hdr *opth;
+
+ for (;;) {
+ int len;
+
+ ops = rcu_dereference(inet6_offloads[proto]);
+
+ if (unlikely(!ops))
+ break;
+
+ if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
+ break;
+
+ opth = skb_gro_header(skb, off + sizeof(*opth), off);
+ if (unlikely(!opth))
+ break;
+
+ len = ipv6_optlen(opth);
+
+ opth = skb_gro_header(skb, off + len, off);
+ if (unlikely(!opth))
+ break;
+ proto = opth->nexthdr;
+
+ off += len;
+ }
+
+ skb_gro_pull(skb, off - skb_network_offset(skb));
+ return proto;
+}
+
static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
{
const struct net_offload *ops = NULL;
@@ -45,15 +79,13 @@ static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
struct ipv6_opt_hdr *opth;
int len;
- if (proto != NEXTHDR_HOP) {
- ops = rcu_dereference(inet6_offloads[proto]);
+ ops = rcu_dereference(inet6_offloads[proto]);
- if (unlikely(!ops))
- break;
+ if (unlikely(!ops))
+ break;
- if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
- break;
- }
+ if (!(ops->flags & INET6_PROTO_GSO_EXTHDR))
+ break;
if (unlikely(!pskb_may_pull(skb, 8)))
break;
@@ -171,13 +203,12 @@ static int ipv6_exthdrs_len(struct ipv6hdr *iph,
proto = iph->nexthdr;
for (;;) {
- if (proto != NEXTHDR_HOP) {
- *opps = rcu_dereference(inet6_offloads[proto]);
- if (unlikely(!(*opps)))
- break;
- if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR))
- break;
- }
+ *opps = rcu_dereference(inet6_offloads[proto]);
+ if (unlikely(!(*opps)))
+ break;
+ if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR))
+ break;
+
opth = (void *)opth + optlen;
optlen = ipv6_optlen(opth);
len += optlen;
@@ -206,28 +237,25 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
goto out;
skb_set_network_header(skb, off);
- skb_gro_pull(skb, sizeof(*iph));
- skb_set_transport_header(skb, skb_gro_offset(skb));
- flush += ntohs(iph->payload_len) != skb_gro_len(skb);
+ flush += ntohs(iph->payload_len) != skb->len - hlen;
proto = iph->nexthdr;
ops = rcu_dereference(inet6_offloads[proto]);
if (!ops || !ops->callbacks.gro_receive) {
- pskb_pull(skb, skb_gro_offset(skb));
- skb_gro_frag0_invalidate(skb);
- proto = ipv6_gso_pull_exthdrs(skb, proto);
- skb_gro_pull(skb, -skb_transport_offset(skb));
- skb_reset_transport_header(skb);
- __skb_push(skb, skb_gro_offset(skb));
+ proto = ipv6_gro_pull_exthdrs(skb, hlen, proto);
ops = rcu_dereference(inet6_offloads[proto]);
if (!ops || !ops->callbacks.gro_receive)
goto out;
- iph = ipv6_hdr(skb);
+ iph = skb_gro_network_header(skb);
+ } else {
+ skb_gro_pull(skb, sizeof(*iph));
}
+ skb_set_transport_header(skb, skb_gro_offset(skb));
+
NAPI_GRO_CB(skb)->proto = proto;
flush--;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a722a43dd668..31b86fe661aa 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1424,11 +1424,11 @@ static int __ip6_append_data(struct sock *sk,
bool zc = false;
u32 tskey = 0;
struct rt6_info *rt = (struct rt6_info *)cork->dst;
+ bool paged, hold_tskey, extra_uref = false;
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
unsigned int wmem_alloc_delta = 0;
- bool paged, extra_uref = false;
skb = skb_peek_tail(queue);
if (!skb) {
@@ -1440,10 +1440,6 @@ static int __ip6_append_data(struct sock *sk,
mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
orig_mtu = mtu;
- if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
- READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
- tskey = atomic_inc_return(&sk->sk_tskey) - 1;
-
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
@@ -1538,6 +1534,11 @@ emsgsize:
flags &= ~MSG_SPLICE_PAGES;
}
+ hold_tskey = cork->tx_flags & SKBTX_ANY_TSTAMP &&
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID;
+ if (hold_tskey)
+ tskey = atomic_inc_return(&sk->sk_tskey) - 1;
+
/*
* Let's try using as much space as possible.
* Use MTU if total length of the message fits into the MTU.
@@ -1794,6 +1795,8 @@ error:
cork->length -= length;
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
+ if (hold_tskey)
+ atomic_dec(&sk->sk_tskey);
return err;
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5e80e517f071..9bbabf750a21 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -399,7 +399,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw;
unsigned int nhoff = raw - skb->data;
unsigned int off = nhoff + sizeof(*ipv6h);
- u8 next, nexthdr = ipv6h->nexthdr;
+ u8 nexthdr = ipv6h->nexthdr;
while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
struct ipv6_opt_hdr *hdr;
@@ -410,25 +410,25 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
hdr = (struct ipv6_opt_hdr *)(skb->data + off);
if (nexthdr == NEXTHDR_FRAGMENT) {
- struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
- if (frag_hdr->frag_off)
- break;
optlen = 8;
} else if (nexthdr == NEXTHDR_AUTH) {
optlen = ipv6_authlen(hdr);
} else {
optlen = ipv6_optlen(hdr);
}
- /* cache hdr->nexthdr, since pskb_may_pull() might
- * invalidate hdr
- */
- next = hdr->nexthdr;
- if (nexthdr == NEXTHDR_DEST) {
- u16 i = 2;
- /* Remember : hdr is no longer valid at this point. */
- if (!pskb_may_pull(skb, off + optlen))
+ if (!pskb_may_pull(skb, off + optlen))
+ break;
+
+ hdr = (struct ipv6_opt_hdr *)(skb->data + off);
+ if (nexthdr == NEXTHDR_FRAGMENT) {
+ struct frag_hdr *frag_hdr = (struct frag_hdr *)hdr;
+
+ if (frag_hdr->frag_off)
break;
+ }
+ if (nexthdr == NEXTHDR_DEST) {
+ u16 i = 2;
while (1) {
struct ipv6_tlv_tnl_enc_lim *tel;
@@ -449,7 +449,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
i++;
}
}
- nexthdr = next;
+ nexthdr = hdr->nexthdr;
off += optlen;
}
return 0;
@@ -796,8 +796,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
struct sk_buff *skb),
bool log_ecn_err)
{
- const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- int err;
+ const struct ipv6hdr *ipv6h;
+ int nh, err;
if ((!(tpi->flags & TUNNEL_CSUM) &&
(tunnel->parms.i_flags & TUNNEL_CSUM)) ||
@@ -829,7 +829,6 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
goto drop;
}
- ipv6h = ipv6_hdr(skb);
skb->protocol = eth_type_trans(skb, tunnel->dev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
} else {
@@ -837,7 +836,23 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
skb_reset_mac_header(skb);
}
+ /* Save offset of outer header relative to skb->head,
+ * because we are going to reset the network header to the inner header
+ * and might change skb->head.
+ */
+ nh = skb_network_header(skb) - skb->head;
+
skb_reset_network_header(skb);
+
+ if (!pskb_inet_may_pull(skb)) {
+ DEV_STATS_INC(tunnel->dev, rx_length_errors);
+ DEV_STATS_INC(tunnel->dev, rx_errors);
+ goto drop;
+ }
+
+ /* Get the outer header. */
+ ipv6h = (struct ipv6hdr *)(skb->head + nh);
+
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
__skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index a7bf0327b380..c99053189ea8 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -182,4 +182,5 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(udp_tunnel6_dst_lookup);
+MODULE_DESCRIPTION("IPv6 Foo over UDP tunnel driver");
MODULE_LICENSE("GPL");
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 30ca064b76ef..9782c180fee6 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -242,7 +242,7 @@ static int __net_init ip6mr_rules_init(struct net *net)
goto err1;
}
- err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
+ err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT);
if (err < 0)
goto err2;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 7d661735cb9d..56c3c467f9de 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -210,7 +210,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < GROUP_FILTER_SIZE(0))
return -EINVAL;
- if (optlen > READ_ONCE(sysctl_optmem_max))
+ if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max))
return -ENOBUFS;
gsf = memdup_sockptr(optval, optlen);
@@ -244,7 +244,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (optlen < size0)
return -EINVAL;
- if (optlen > READ_ONCE(sysctl_optmem_max) - 4)
+ if (optlen > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max) - 4)
return -ENOBUFS;
p = kmalloc(optlen + 4, GFP_KERNEL);
@@ -509,6 +509,59 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
return ip6_sock_set_addr_preferences(sk, val);
+ case IPV6_MULTICAST_IF:
+ if (sk->sk_type == SOCK_STREAM)
+ return -ENOPROTOOPT;
+ if (optlen < sizeof(int))
+ return -EINVAL;
+ if (val) {
+ struct net_device *dev;
+ int bound_dev_if, midx;
+
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, val);
+ if (!dev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
+ midx = l3mdev_master_ifindex_rcu(dev);
+
+ rcu_read_unlock();
+
+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ if (bound_dev_if &&
+ bound_dev_if != val &&
+ (!midx || midx != bound_dev_if))
+ return -EINVAL;
+ }
+ WRITE_ONCE(np->mcast_oif, val);
+ return 0;
+ case IPV6_UNICAST_IF:
+ {
+ struct net_device *dev;
+ int ifindex;
+
+ if (optlen != sizeof(int))
+ return -EINVAL;
+
+ ifindex = (__force int)ntohl((__force __be32)val);
+ if (!ifindex) {
+ WRITE_ONCE(np->ucast_oif, 0);
+ return 0;
+ }
+
+ dev = dev_get_by_index(net, ifindex);
+ if (!dev)
+ return -EADDRNOTAVAIL;
+ dev_put(dev);
+
+ if (READ_ONCE(sk->sk_bound_dev_if))
+ return -EINVAL;
+
+ WRITE_ONCE(np->ucast_oif, ifindex);
+ return 0;
+ }
}
if (needs_rtnl)
rtnl_lock();
@@ -829,67 +882,6 @@ done:
break;
}
-
- case IPV6_UNICAST_IF:
- {
- struct net_device *dev = NULL;
- int ifindex;
-
- if (optlen != sizeof(int))
- goto e_inval;
-
- ifindex = (__force int)ntohl((__force __be32)val);
- if (ifindex == 0) {
- np->ucast_oif = 0;
- retv = 0;
- break;
- }
-
- dev = dev_get_by_index(net, ifindex);
- retv = -EADDRNOTAVAIL;
- if (!dev)
- break;
- dev_put(dev);
-
- retv = -EINVAL;
- if (sk->sk_bound_dev_if)
- break;
-
- np->ucast_oif = ifindex;
- retv = 0;
- break;
- }
-
- case IPV6_MULTICAST_IF:
- if (sk->sk_type == SOCK_STREAM)
- break;
- if (optlen < sizeof(int))
- goto e_inval;
-
- if (val) {
- struct net_device *dev;
- int midx;
-
- rcu_read_lock();
-
- dev = dev_get_by_index_rcu(net, val);
- if (!dev) {
- rcu_read_unlock();
- retv = -ENODEV;
- break;
- }
- midx = l3mdev_master_ifindex_rcu(dev);
-
- rcu_read_unlock();
-
- if (sk->sk_bound_dev_if &&
- sk->sk_bound_dev_if != val &&
- (!midx || midx != sk->sk_bound_dev_if))
- goto e_inval;
- }
- np->mcast_oif = val;
- retv = 0;
- break;
case IPV6_ADD_MEMBERSHIP:
case IPV6_DROP_MEMBERSHIP:
{
@@ -1161,10 +1153,12 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
sockopt_release_sock(sk);
if (!skb) {
if (np->rxopt.bits.rxinfo) {
+ int mcast_oif = READ_ONCE(np->mcast_oif);
struct in6_pktinfo src_info;
- src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
+
+ src_info.ipi6_ifindex = mcast_oif ? :
np->sticky_pktinfo.ipi6_ifindex;
- src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr;
+ src_info.ipi6_addr = mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr;
put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxhlim) {
@@ -1178,11 +1172,13 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
put_cmsg(&msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);
}
if (np->rxopt.bits.rxoinfo) {
+ int mcast_oif = READ_ONCE(np->mcast_oif);
struct in6_pktinfo src_info;
- src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
+
+ src_info.ipi6_ifindex = mcast_oif ? :
np->sticky_pktinfo.ipi6_ifindex;
- src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr :
- np->sticky_pktinfo.ipi6_addr;
+ src_info.ipi6_addr = mcast_oif ? sk->sk_v6_daddr :
+ np->sticky_pktinfo.ipi6_addr;
put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
}
if (np->rxopt.bits.rxohlim) {
@@ -1359,7 +1355,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_MULTICAST_IF:
- val = np->mcast_oif;
+ val = READ_ONCE(np->mcast_oif);
break;
case IPV6_MULTICAST_ALL:
@@ -1367,7 +1363,7 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_UNICAST_IF:
- val = (__force int)htonl((__u32) np->ucast_oif);
+ val = (__force int)htonl((__u32) READ_ONCE(np->ucast_oif));
break;
case IPV6_MTU_DISCOVER:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index b75d3c9d41bb..bc6e0a0bad3c 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2722,8 +2722,12 @@ void ipv6_mc_down(struct inet6_dev *idev)
synchronize_net();
mld_query_stop_work(idev);
mld_report_stop_work(idev);
+
+ mutex_lock(&idev->mc_lock);
mld_ifc_stop_work(idev);
mld_gq_stop_work(idev);
+ mutex_unlock(&idev->mc_lock);
+
mld_dad_stop_work(idev);
}
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 83d2a8be263f..6a16a5bd0d91 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -405,6 +405,7 @@ static void __exit mip6_fini(void)
module_init(mip6_init);
module_exit(mip6_fini);
+MODULE_DESCRIPTION("IPv6 Mobility driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_DSTOPTS);
MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_ROUTING);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index d45bc54b7ea5..196dd4ecb5e2 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -278,7 +278,6 @@ static int nf_reject6_fill_skb_dst(struct sk_buff *skb_in)
void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
int hook)
{
- struct net_device *br_indev __maybe_unused;
struct sk_buff *nskb;
struct tcphdr _otcph;
const struct tcphdr *otcph;
@@ -354,9 +353,15 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
* build the eth header using the original destination's MAC as the
* source, and send the RST packet directly.
*/
- br_indev = nf_bridge_get_physindev(oldskb);
- if (br_indev) {
+ if (nf_bridge_info_exists(oldskb)) {
struct ethhdr *oeth = eth_hdr(oldskb);
+ struct net_device *br_indev;
+
+ br_indev = nf_bridge_get_physindev(oldskb, net);
+ if (!br_indev) {
+ kfree_skb(nskb);
+ return;
+ }
nskb->dev = br_indev;
nskb->protocol = htons(ETH_P_IPV6);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index d2098dd4ceae..ef2059c88955 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -107,9 +107,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
oif = np->sticky_pktinfo.ipi6_ifindex;
if (!oif && ipv6_addr_is_multicast(daddr))
- oif = np->mcast_oif;
+ oif = READ_ONCE(np->mcast_oif);
else if (!oif)
- oif = np->ucast_oif;
+ oif = READ_ONCE(np->ucast_oif);
addr_type = ipv6_addr_type(daddr);
if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) ||
@@ -157,9 +157,9 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
rt = (struct rt6_info *) dst;
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
- fl6.flowi6_oif = np->mcast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
else if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->ucast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
pfh.icmph.type = user_icmph.icmp6_type;
pfh.icmph.code = user_icmph.icmp6_code;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index dd0a4e73e602..03dbb874c363 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -876,9 +876,9 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
final_p = fl6_update_dst(&fl6, opt, &final);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
- fl6.flowi6_oif = np->mcast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
else if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->ucast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
if (hdrincl)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ea1dec8448fc..ef815ba583a8 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5332,19 +5332,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
err_nh = NULL;
list_for_each_entry(nh, &rt6_nh_list, next) {
err = __ip6_ins_rt(nh->fib6_info, info, extack);
- fib6_info_release(nh->fib6_info);
-
- if (!err) {
- /* save reference to last route successfully inserted */
- rt_last = nh->fib6_info;
-
- /* save reference to first route for notification */
- if (!rt_notif)
- rt_notif = nh->fib6_info;
- }
- /* nh->fib6_info is used or freed at this point, reset to NULL*/
- nh->fib6_info = NULL;
if (err) {
if (replace && nhn)
NL_SET_ERR_MSG_MOD(extack,
@@ -5352,6 +5340,12 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
err_nh = nh;
goto add_errout;
}
+ /* save reference to last route successfully inserted */
+ rt_last = nh->fib6_info;
+
+ /* save reference to first route for notification */
+ if (!rt_notif)
+ rt_notif = nh->fib6_info;
/* Because each route is added like a single route we remove
* these flags after the first nexthop: if there is a collision,
@@ -5412,8 +5406,7 @@ add_errout:
cleanup:
list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
- if (nh->fib6_info)
- fib6_info_release(nh->fib6_info);
+ fib6_info_release(nh->fib6_info);
list_del(&nh->next);
kfree(nh);
}
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 29346a6eec9f..35508abd76f4 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -512,22 +512,24 @@ int __init seg6_init(void)
{
int err;
- err = genl_register_family(&seg6_genl_family);
+ err = register_pernet_subsys(&ip6_segments_ops);
if (err)
goto out;
- err = register_pernet_subsys(&ip6_segments_ops);
+ err = genl_register_family(&seg6_genl_family);
if (err)
- goto out_unregister_genl;
+ goto out_unregister_pernet;
#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
err = seg6_iptunnel_init();
if (err)
- goto out_unregister_pernet;
+ goto out_unregister_genl;
err = seg6_local_init();
- if (err)
- goto out_unregister_pernet;
+ if (err) {
+ seg6_iptunnel_exit();
+ goto out_unregister_genl;
+ }
#endif
#ifdef CONFIG_IPV6_SEG6_HMAC
@@ -548,11 +550,11 @@ out_unregister_iptun:
#endif
#endif
#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
-out_unregister_pernet:
- unregister_pernet_subsys(&ip6_segments_ops);
-#endif
out_unregister_genl:
genl_unregister_family(&seg6_genl_family);
+#endif
+out_unregister_pernet:
+ unregister_pernet_subsys(&ip6_segments_ops);
goto out;
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index cc24cefdb85c..5e9f625b76e3 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1956,6 +1956,7 @@ xfrm_tunnel_failed:
module_init(sit_init);
module_exit(sit_cleanup);
+MODULE_DESCRIPTION("IPv6-in-IPv4 tunnel SIT driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("sit");
MODULE_ALIAS_NETDEV("sit0");
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 12eedc6ca2cc..c8d2ca27220c 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -114,76 +114,82 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mssp)
return __cookie_v6_init_sequence(iph, th, mssp);
}
-int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,
- __u32 cookie)
+int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th)
{
+ __u32 cookie = ntohl(th->ack_seq) - 1;
__u32 seq = ntohl(th->seq) - 1;
- __u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
- th->source, th->dest, seq);
+ __u32 mssind;
+
+ mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr,
+ th->source, th->dest, seq);
return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;
}
EXPORT_SYMBOL_GPL(__cookie_v6_check);
-struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
+static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
struct tcp_options_received tcp_opt;
- struct inet_request_sock *ireq;
- struct tcp_request_sock *treq;
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct tcp_sock *tp = tcp_sk(sk);
- const struct tcphdr *th = tcp_hdr(skb);
- __u32 cookie = ntohl(th->ack_seq) - 1;
- struct sock *ret = sk;
- struct request_sock *req;
- int full_space, mss;
- struct dst_entry *dst;
- __u8 rcv_wscale;
u32 tsoff = 0;
- int l3index;
-
- if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
- !th->ack || th->rst)
- goto out;
+ int mss;
if (tcp_synq_no_recent_overflow(sk))
goto out;
- mss = __cookie_v6_check(ipv6_hdr(skb), th, cookie);
- if (mss == 0) {
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
+ mss = __cookie_v6_check(ipv6_hdr(skb), tcp_hdr(skb));
+ if (!mss) {
+ __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_SYNCOOKIESRECV);
+ __NET_INC_STATS(net, LINUX_MIB_SYNCOOKIESRECV);
/* check for timestamp cookie support */
memset(&tcp_opt, 0, sizeof(tcp_opt));
- tcp_parse_options(sock_net(sk), skb, &tcp_opt, 0, NULL);
+ tcp_parse_options(net, skb, &tcp_opt, 0, NULL);
if (tcp_opt.saw_tstamp && tcp_opt.rcv_tsecr) {
- tsoff = secure_tcpv6_ts_off(sock_net(sk),
+ tsoff = secure_tcpv6_ts_off(net,
ipv6_hdr(skb)->daddr.s6_addr32,
ipv6_hdr(skb)->saddr.s6_addr32);
tcp_opt.rcv_tsecr -= tsoff;
}
- if (!cookie_timestamp_decode(sock_net(sk), &tcp_opt))
+ if (!cookie_timestamp_decode(net, &tcp_opt))
goto out;
- ret = NULL;
- req = cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops,
- &tcp_request_sock_ipv6_ops, sk, skb);
- if (!req)
+ return cookie_tcp_reqsk_alloc(&tcp6_request_sock_ops, sk, skb,
+ &tcp_opt, mss, tsoff);
+out:
+ return ERR_PTR(-EINVAL);
+}
+
+struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
+{
+ const struct tcphdr *th = tcp_hdr(skb);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_request_sock *ireq;
+ struct net *net = sock_net(sk);
+ struct request_sock *req;
+ struct dst_entry *dst;
+ struct sock *ret = sk;
+ __u8 rcv_wscale;
+ int full_space;
+
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
goto out;
+ req = cookie_tcp_check(net, sk, skb);
+ if (IS_ERR(req))
+ goto out;
+ if (!req)
+ goto out_drop;
+
ireq = inet_rsk(req);
- treq = tcp_rsk(req);
- treq->tfo_listener = false;
- req->mss = mss;
- ireq->ir_rmt_port = th->source;
- ireq->ir_num = ntohs(th->dest);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
@@ -197,31 +203,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->pktopts = skb;
}
- ireq->ir_iif = inet_request_bound_dev_if(sk, skb);
/* So that link locals have meaning */
if (!sk->sk_bound_dev_if &&
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = tcp_v6_iif(skb);
- ireq->ir_mark = inet_request_mark(sk, skb);
-
- req->num_retrans = 0;
- ireq->snd_wscale = tcp_opt.snd_wscale;
- ireq->sack_ok = tcp_opt.sack_ok;
- ireq->wscale_ok = tcp_opt.wscale_ok;
- ireq->tstamp_ok = tcp_opt.saw_tstamp;
- req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
- treq->snt_synack = 0;
- treq->rcv_isn = ntohl(th->seq) - 1;
- treq->snt_isn = cookie;
- treq->ts_off = 0;
- treq->txhash = net_tx_rndhash();
-
- l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
- tcp_ao_syncookie(sk, skb, treq, AF_INET6, l3index);
-
- if (IS_ENABLED(CONFIG_SMC))
- ireq->smc_ok = 0;
+ tcp_ao_syncookie(sk, skb, req, AF_INET6);
/*
* We need to lookup the dst_entry to get the correct window size.
@@ -243,7 +230,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
fl6.flowi6_uid = sk->sk_uid;
security_req_classify_flow(req, flowi6_to_flowi_common(&fl6));
- dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
if (IS_ERR(dst))
goto out_free;
}
@@ -261,12 +248,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
dst_metric(dst, RTAX_INITRWND));
ireq->rcv_wscale = rcv_wscale;
- ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst);
+ ireq->ecn_ok &= cookie_ecn_ok(net, dst);
- ret = tcp_get_cookie_sock(sk, skb, req, dst, tsoff);
+ ret = tcp_get_cookie_sock(sk, skb, req, dst);
out:
return ret;
out_free:
reqsk_free(req);
+out_drop:
return NULL;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8c6623496dd7..57b25b1fc9d9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1699,7 +1699,7 @@ ipv6_pktoptions:
if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
- np->mcast_oif = tcp_v6_iif(opt_skb);
+ WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb));
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
WRITE_ONCE(np->mcast_hops,
ipv6_hdr(opt_skb)->hop_limit);
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 00e8d8b1c9a7..dc4ea9b11794 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -302,4 +302,5 @@ static void __exit tunnel6_fini(void)
module_init(tunnel6_init);
module_exit(tunnel6_fini);
+MODULE_DESCRIPTION("IP-in-IPv6 tunnel driver");
MODULE_LICENSE("GPL");
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 622b10a549f7..3f2249b4cd5f 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1135,7 +1135,7 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
udp_flush_pending_frames(sk);
else if (up->pending) {
up->len = 0;
- up->pending = 0;
+ WRITE_ONCE(up->pending, 0);
ip6_flush_pending_frames(sk);
}
}
@@ -1313,7 +1313,7 @@ static int udp_v6_push_pending_frames(struct sock *sk)
&inet_sk(sk)->cork.base);
out:
up->len = 0;
- up->pending = 0;
+ WRITE_ONCE(up->pending, 0);
return err;
}
@@ -1370,7 +1370,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
default:
return -EINVAL;
}
- } else if (!up->pending) {
+ } else if (!READ_ONCE(up->pending)) {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
daddr = &sk->sk_v6_daddr;
@@ -1401,8 +1401,8 @@ do_udp_sendmsg:
return -EMSGSIZE;
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
- if (up->pending) {
- if (up->pending == AF_INET)
+ if (READ_ONCE(up->pending)) {
+ if (READ_ONCE(up->pending) == AF_INET)
return udp_sendmsg(sk, msg, len);
/*
* There are pending frames.
@@ -1541,10 +1541,10 @@ do_udp_sendmsg:
connected = false;
if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr)) {
- fl6->flowi6_oif = np->mcast_oif;
+ fl6->flowi6_oif = READ_ONCE(np->mcast_oif);
connected = false;
} else if (!fl6->flowi6_oif)
- fl6->flowi6_oif = np->ucast_oif;
+ fl6->flowi6_oif = READ_ONCE(np->ucast_oif);
security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
@@ -1593,7 +1593,7 @@ back_from_confirm:
goto out;
}
- up->pending = AF_INET6;
+ WRITE_ONCE(up->pending, AF_INET6);
do_append_data:
if (ipc6.dontfrag < 0)
@@ -1607,7 +1607,7 @@ do_append_data:
else if (!corkreq)
err = udp_v6_push_pending_frames(sk);
else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
- up->pending = 0;
+ WRITE_ONCE(up->pending, 0);
if (err > 0)
err = inet6_test_bit(RECVERR6, sk) ? net_xmit_errno(err) : 0;
@@ -1648,7 +1648,7 @@ static void udpv6_splice_eof(struct socket *sock)
struct sock *sk = sock->sk;
struct udp_sock *up = udp_sk(sk);
- if (!up->pending || udp_test_bit(CORK, sk))
+ if (!READ_ONCE(up->pending) || udp_test_bit(CORK, sk))
return;
lock_sock(sk);
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 1323f2f6928e..f6cb94f82cc3 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -401,5 +401,6 @@ static void __exit xfrm6_tunnel_fini(void)
module_init(xfrm6_tunnel_init);
module_exit(xfrm6_tunnel_fini);
+MODULE_DESCRIPTION("IPv6 XFRM tunnel driver");
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_IPV6);
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 0ed6e34d6edd..b0b3e9c5af44 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -67,7 +67,7 @@ static int iucv_bus_match(struct device *dev, struct device_driver *drv)
return 0;
}
-struct bus_type iucv_bus = {
+const struct bus_type iucv_bus = {
.name = "iucv",
.match = iucv_bus_match,
};
@@ -156,7 +156,7 @@ static char iucv_error_pathid[16] = "INVALID PATHID";
static LIST_HEAD(iucv_handler_list);
/*
- * iucv_path_table: an array of iucv_path structures.
+ * iucv_path_table: array of pointers to iucv_path structures.
*/
static struct iucv_path **iucv_path_table;
static unsigned long iucv_max_pathid;
@@ -544,7 +544,7 @@ static int iucv_enable(void)
cpus_read_lock();
rc = -ENOMEM;
- alloc_size = iucv_max_pathid * sizeof(struct iucv_path);
+ alloc_size = iucv_max_pathid * sizeof(*iucv_path_table);
iucv_path_table = kzalloc(alloc_size, GFP_KERNEL);
if (!iucv_path_table)
goto out;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 65d1f6755f98..1184d40167b8 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -634,7 +634,7 @@ retry:
msize = 0;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
- msize += skb_shinfo(skb)->frags[i].bv_len;
+ msize += skb_frag_size(&skb_shinfo(skb)->frags[i]);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE,
skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index d68d01804dc7..f79fb99271ed 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3924,5 +3924,6 @@ out_unregister_key_proto:
module_init(ipsec_pfkey_init);
module_exit(ipsec_pfkey_exit);
+MODULE_DESCRIPTION("PF_KEY socket helpers");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_KEY);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index bb373e249237..7bf14cf9ffaa 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -599,9 +599,9 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
final_p = fl6_update_dst(&fl6, opt, &final);
if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
- fl6.flowi6_oif = np->mcast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->mcast_oif);
else if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->ucast_oif;
+ fl6.flowi6_oif = READ_ONCE(np->ucast_oif);
security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
@@ -627,7 +627,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
back_from_confirm:
lock_sock(sk);
- ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0;
+ ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0);
err = ip6_append_data(sk, ip_generic_getfrag, msg,
ulen, transhdrlen, &ipc6,
&fl6, (struct rt6_info *)dst,
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 9b06c380866b..fde1140d899e 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -226,6 +226,8 @@ static int llc_ui_release(struct socket *sock)
}
netdev_put(llc->dev, &llc->dev_tracker);
sock_put(sk);
+ sock_orphan(sk);
+ sock->sk = NULL;
llc_sk_free(sk);
out:
return 0;
@@ -928,14 +930,15 @@ copy_uaddr:
*/
static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
+ DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name);
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
- DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name);
int flags = msg->msg_flags;
int noblock = flags & MSG_DONTWAIT;
+ int rc = -EINVAL, copied = 0, hdrlen, hh_len;
struct sk_buff *skb = NULL;
+ struct net_device *dev;
size_t size = 0;
- int rc = -EINVAL, copied = 0, hdrlen;
dprintk("%s: sending from %02X to %02X\n", __func__,
llc->laddr.lsap, llc->daddr.lsap);
@@ -955,22 +958,29 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (rc)
goto out;
}
- hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr);
+ dev = llc->dev;
+ hh_len = LL_RESERVED_SPACE(dev);
+ hdrlen = llc_ui_header_len(sk, addr);
size = hdrlen + len;
- if (size > llc->dev->mtu)
- size = llc->dev->mtu;
+ size = min_t(size_t, size, READ_ONCE(dev->mtu));
copied = size - hdrlen;
rc = -EINVAL;
if (copied < 0)
goto out;
release_sock(sk);
- skb = sock_alloc_send_skb(sk, size, noblock, &rc);
+ skb = sock_alloc_send_skb(sk, hh_len + size, noblock, &rc);
lock_sock(sk);
if (!skb)
goto out;
- skb->dev = llc->dev;
+ if (sock_flag(sk, SOCK_ZAPPED) ||
+ llc->dev != dev ||
+ hdrlen != llc_ui_header_len(sk, addr) ||
+ hh_len != LL_RESERVED_SPACE(dev) ||
+ size > READ_ONCE(dev->mtu))
+ goto out;
+ skb->dev = dev;
skb->protocol = llc_proto_type(addr->sllc_arphrd);
- skb_reserve(skb, hdrlen);
+ skb_reserve(skb, hh_len + hdrlen);
rc = memcpy_from_msg(skb_put(skb, copied), msg, copied);
if (rc)
goto out;
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 6e387aadffce..4f16d9c88350 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -135,22 +135,15 @@ static struct packet_type llc_packet_type __read_mostly = {
.func = llc_rcv,
};
-static struct packet_type llc_tr_packet_type __read_mostly = {
- .type = cpu_to_be16(ETH_P_TR_802_2),
- .func = llc_rcv,
-};
-
static int __init llc_init(void)
{
dev_add_pack(&llc_packet_type);
- dev_add_pack(&llc_tr_packet_type);
return 0;
}
static void __exit llc_exit(void)
{
dev_remove_pack(&llc_packet_type);
- dev_remove_pack(&llc_tr_packet_type);
}
module_init(llc_init);
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index cb0291decf2e..13438cc0a6b1 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -62,7 +62,6 @@ config MAC80211_KUNIT_TEST
depends on KUNIT
depends on MAC80211
default KUNIT_ALL_TESTS
- depends on !KERNEL_6_2
help
Enable this option to test mac80211 internals with kunit.
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index c9eb52768133..4406b4f8f3b9 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -67,4 +67,6 @@ mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y)
obj-y += tests/
+mac80211-y += wbrf.o
+
ccflags-y += -DDEBUG
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index eb1d3ef84353..327682995c92 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -987,7 +987,8 @@ static int
ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata,
struct cfg80211_unsol_bcast_probe_resp *params,
struct ieee80211_link_data *link,
- struct ieee80211_bss_conf *link_conf)
+ struct ieee80211_bss_conf *link_conf,
+ u64 *changed)
{
struct unsol_bcast_probe_resp_data *new, *old = NULL;
@@ -1011,7 +1012,8 @@ ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata,
RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL);
}
- return BSS_CHANGED_UNSOL_BCAST_PROBE_RESP;
+ *changed |= BSS_CHANGED_UNSOL_BCAST_PROBE_RESP;
+ return 0;
}
static int ieee80211_set_ftm_responder_params(
@@ -1270,7 +1272,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
return -EALREADY;
if (params->smps_mode != NL80211_SMPS_OFF)
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
link->smps_mode = IEEE80211_SMPS_OFF;
@@ -1450,10 +1452,9 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
err = ieee80211_set_unsol_bcast_probe_resp(sdata,
&params->unsol_bcast_probe_resp,
- link, link_conf);
+ link, link_conf, &changed);
if (err < 0)
goto error;
- changed |= err;
err = drv_start_ap(sdata->local, sdata, link_conf);
if (err) {
@@ -1525,10 +1526,9 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
err = ieee80211_set_unsol_bcast_probe_resp(sdata,
&params->unsol_bcast_probe_resp,
- link, link_conf);
+ link, link_conf, &changed);
if (err < 0)
return err;
- changed |= err;
if (beacon->he_bss_color_valid &&
beacon->he_bss_color.enabled != link_conf->he_bss_color.enabled) {
@@ -1869,6 +1869,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
sband->band);
}
+ ieee80211_sta_set_rx_nss(link_sta);
+
return ret;
}
@@ -2556,7 +2558,7 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy,
* devices that report signal in dBm.
*/
if (!ieee80211_hw_check(&sdata->local->hw, SIGNAL_DBM))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
conf->rssi_threshold = nconf->rssi_threshold;
}
if (_chg_mesh_attr(NL80211_MESHCONF_HT_OPMODE, mask)) {
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 1d928f29ad6f..ef4c2cebc080 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -507,11 +507,16 @@ static void _ieee80211_change_chanctx(struct ieee80211_local *local,
WARN_ON(!cfg80211_chandef_compatible(&ctx->conf.def, chandef));
+ ieee80211_remove_wbrf(local, &ctx->conf.def);
+
ctx->conf.def = *chandef;
/* check if min chanctx also changed */
changed = IEEE80211_CHANCTX_CHANGE_WIDTH |
_ieee80211_recalc_chanctx_min_def(local, ctx, rsvd_for);
+
+ ieee80211_add_wbrf(local, &ctx->conf.def);
+
drv_change_chanctx(local, ctx, changed);
if (!local->use_chanctx) {
@@ -667,6 +672,8 @@ static int ieee80211_add_chanctx(struct ieee80211_local *local,
lockdep_assert_wiphy(local->hw.wiphy);
+ ieee80211_add_wbrf(local, &ctx->conf.def);
+
if (!local->use_chanctx)
local->hw.conf.radar_enabled = ctx->conf.radar_enabled;
@@ -746,6 +753,8 @@ static void ieee80211_del_chanctx(struct ieee80211_local *local,
}
ieee80211_recalc_idle(local);
+
+ ieee80211_remove_wbrf(local, &ctx->conf.def);
}
static void ieee80211_free_chanctx(struct ieee80211_local *local,
@@ -858,7 +867,7 @@ static int ieee80211_assign_link_chanctx(struct ieee80211_link_data *link,
int ret = 0;
if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
conf = rcu_dereference_protected(link->conf->chanctx_conf,
lockdep_is_held(&local->hw.wiphy->mtx));
@@ -1106,7 +1115,7 @@ int ieee80211_link_reserve_chanctx(struct ieee80211_link_data *link,
curr_ctx = ieee80211_link_get_chanctx(link);
if (curr_ctx && local->use_chanctx && !local->ops->switch_vif_chanctx)
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
new_ctx = ieee80211_find_reservation_chanctx(local, chandef, mode);
if (!new_ctx) {
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index b575ae90e57f..74be49191e70 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -497,6 +497,7 @@ static const char *hw_flag_names[] = {
FLAG(SUPPORTS_CONC_MON_RX_DECAP),
FLAG(DETECTS_COLOR_COLLISION),
FLAG(MLO_MCAST_MULTI_LINK_TX),
+ FLAG(DISALLOW_PUNCTURING),
#undef FLAG
};
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index dce5606ed66d..68596ef78b15 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -997,8 +997,8 @@ static void add_link_files(struct ieee80211_link_data *link,
}
}
-void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata,
- bool mld_vif)
+static void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata,
+ bool mld_vif)
{
char buf[10+IFNAMSIZ];
diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h
index b226b1aae88a..a02ec0a413f6 100644
--- a/net/mac80211/debugfs_netdev.h
+++ b/net/mac80211/debugfs_netdev.h
@@ -11,8 +11,6 @@
#include "ieee80211_i.h"
#ifdef CONFIG_MAC80211_DEBUGFS
-void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata,
- bool mld_vif);
void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata);
void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata);
void ieee80211_debugfs_recreate_netdev(struct ieee80211_sub_if_data *sdata,
@@ -24,9 +22,6 @@ void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link);
void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link);
void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link);
#else
-static inline void ieee80211_debugfs_add_netdev(
- struct ieee80211_sub_if_data *sdata, bool mld_vif)
-{}
static inline void ieee80211_debugfs_remove_netdev(
struct ieee80211_sub_if_data *sdata)
{}
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 5bf507ebb096..1e9389c49a57 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -16,7 +16,7 @@
#include "sta_info.h"
#include "driver-ops.h"
-/* sta attributtes */
+/* sta attributes */
#define STA_READ(name, field, format_string) \
static ssize_t sta_ ##name## _read(struct file *file, \
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index f690c385a345..eb482fb8c3af 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -805,7 +805,7 @@ drv_cancel_remain_on_channel(struct ieee80211_local *local,
static inline int drv_set_ringparam(struct ieee80211_local *local,
u32 tx, u32 rx)
{
- int ret = -ENOTSUPP;
+ int ret = -EOPNOTSUPP;
might_sleep();
lockdep_assert_wiphy(local->hw.wiphy);
@@ -1666,6 +1666,26 @@ static inline int drv_net_setup_tc(struct ieee80211_local *local,
return ret;
}
+static inline bool drv_can_activate_links(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ u16 active_links)
+{
+ bool ret = true;
+
+ lockdep_assert_wiphy(local->hw.wiphy);
+
+ if (!check_sdata_in_driver(sdata))
+ return false;
+
+ trace_drv_can_activate_links(local, sdata, active_links);
+ if (local->ops->can_activate_links)
+ ret = local->ops->can_activate_links(&local->hw, &sdata->vif,
+ active_links);
+ trace_drv_return_bool(local, ret);
+
+ return ret;
+}
+
int drv_change_vif_links(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
u16 old_links, u16 new_links,
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 8b1e02f2f9ae..8f2b445a5ec3 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -485,7 +485,7 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
ifibss->ssid_len, IEEE80211_BSS_TYPE_IBSS,
IEEE80211_PRIVACY(ifibss->privacy));
- if (WARN_ON(!cbss))
+ if (unlikely(!cbss))
return -EINVAL;
rcu_read_lock();
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 84df104f272b..0b2b53550bd9 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -92,11 +92,14 @@ enum ieee80211_status_data {
IEEE80211_STATUS_SUBDATA_MASK = 0xff0,
};
-/*
- * Keep a station's queues on the active list for deficit accounting purposes
- * if it was active or queued during the last 100ms
- */
-#define AIRTIME_ACTIVE_DURATION (HZ / 10)
+static inline bool
+ieee80211_sta_keep_active(struct sta_info *sta, u8 ac)
+{
+ /* Keep a station's queues on the active list for deficit accounting
+ * purposes if it was active or queued during the last 100ms.
+ */
+ return time_before_eq(jiffies, sta->airtime[ac].last_active + HZ / 10);
+}
struct ieee80211_bss {
u32 device_ts_beacon, device_ts_presp;
@@ -436,6 +439,7 @@ struct ieee80211_mgd_assoc_data {
bool need_beacon;
bool synced;
bool timeout_started;
+ bool comeback; /* whether the AP has requested association comeback */
bool s1g;
unsigned int assoc_link_id;
@@ -1559,6 +1563,8 @@ struct ieee80211_local {
/* extended capabilities provided by mac80211 */
u8 ext_capa[8];
+
+ bool wbrf_supported;
};
static inline struct ieee80211_sub_if_data *
@@ -1770,10 +1776,7 @@ static inline bool txq_has_queue(struct ieee80211_txq *txq)
static inline bool
ieee80211_have_rx_timestamp(struct ieee80211_rx_status *status)
{
- WARN_ON_ONCE(status->flag & RX_FLAG_MACTIME_START &&
- status->flag & RX_FLAG_MACTIME_END);
- return !!(status->flag & (RX_FLAG_MACTIME_START | RX_FLAG_MACTIME_END |
- RX_FLAG_MACTIME_PLCP_START));
+ return status->flag & RX_FLAG_MACTIME;
}
void ieee80211_vif_inc_num_mcast(struct ieee80211_sub_if_data *sdata);
@@ -2600,4 +2603,19 @@ ieee80211_eht_cap_ie_to_sta_eht_cap(struct ieee80211_sub_if_data *sdata,
const struct ieee80211_eht_cap_elem *eht_cap_ie_elem,
u8 eht_cap_len,
struct link_sta_info *link_sta);
+
+void ieee80211_check_wbrf_support(struct ieee80211_local *local);
+void ieee80211_add_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef);
+void ieee80211_remove_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef);
+
+#if IS_ENABLED(CONFIG_MAC80211_KUNIT_TEST)
+#define EXPORT_SYMBOL_IF_MAC80211_KUNIT(sym) EXPORT_SYMBOL_IF_KUNIT(sym)
+#define VISIBLE_IF_MAC80211_KUNIT
+ieee80211_rx_result
+ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx);
+#else
+#define EXPORT_SYMBOL_IF_MAC80211_KUNIT(sym)
+#define VISIBLE_IF_MAC80211_KUNIT static
+#endif
+
#endif /* IEEE80211_I_H */
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index e4e7c0b38cb6..11c4caa4748e 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1783,7 +1783,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
/* need to do this after the switch so vif.type is correct */
ieee80211_link_setup(&sdata->deflink);
- ieee80211_debugfs_add_netdev(sdata, false);
+ ieee80211_debugfs_recreate_netdev(sdata, false);
}
static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/link.c b/net/mac80211/link.c
index bf7bd880d062..d4f86955afa6 100644
--- a/net/mac80211/link.c
+++ b/net/mac80211/link.c
@@ -444,6 +444,9 @@ int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links)
lockdep_assert_wiphy(local->hw.wiphy);
+ if (!drv_can_activate_links(local, sdata, active_links))
+ return -EINVAL;
+
old_active = sdata->vif.active_links;
if (old_active & active_links) {
/*
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 033a5261ac3a..f2ece7793573 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1405,6 +1405,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
debugfs_hw_add(local);
rate_control_add_debugfs(local);
+ ieee80211_check_wbrf_support(local);
+
rtnl_lock();
wiphy_lock(hw->wiphy);
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 775d52561c54..024f48db6b05 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -151,7 +151,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
break;
default:
kfree_skb(skb);
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
*pos++ = ie_len;
*pos++ = flags;
diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c
index 8a3f44ce3e04..735edde1bd81 100644
--- a/net/mac80211/mesh_pathtbl.c
+++ b/net/mac80211/mesh_pathtbl.c
@@ -676,10 +676,10 @@ struct mesh_path *mesh_path_add(struct ieee80211_sub_if_data *sdata,
if (ether_addr_equal(dst, sdata->vif.addr))
/* never add ourselves as neighbours */
- return ERR_PTR(-ENOTSUPP);
+ return ERR_PTR(-EOPNOTSUPP);
if (is_multicast_ether_addr(dst))
- return ERR_PTR(-ENOTSUPP);
+ return ERR_PTR(-EOPNOTSUPP);
if (atomic_add_unless(&sdata->u.mesh.mpaths, 1, MESH_MAX_MPATHS) == 0)
return ERR_PTR(-ENOSPC);
@@ -719,10 +719,10 @@ int mpp_path_add(struct ieee80211_sub_if_data *sdata,
if (ether_addr_equal(dst, sdata->vif.addr))
/* never add ourselves as neighbours */
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
if (is_multicast_ether_addr(dst))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
new_mpath = mesh_path_new(sdata, dst, GFP_ATOMIC);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c8998cf01b7a..2022a26eb881 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -8,7 +8,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2023 Intel Corporation
+ * Copyright (C) 2018 - 2024 Intel Corporation
*/
#include <linux/delay.h>
@@ -43,6 +43,9 @@
#define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10)
#define IEEE80211_ASSOC_MAX_TRIES 3
+#define IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS msecs_to_jiffies(100)
+#define IEEE80211_ADV_TTLM_ST_UNDERFLOW 0xff00
+
static int max_nullfunc_tries = 2;
module_param(max_nullfunc_tries, int, 0644);
MODULE_PARM_DESC(max_nullfunc_tries,
@@ -135,6 +138,7 @@ ieee80211_handle_puncturing_bitmap(struct ieee80211_link_data *link,
u16 bitmap, u64 *changed)
{
struct cfg80211_chan_def *chandef = &link->conf->chandef;
+ struct ieee80211_local *local = link->sdata->local;
u16 extracted;
u64 _changed = 0;
@@ -147,7 +151,9 @@ ieee80211_handle_puncturing_bitmap(struct ieee80211_link_data *link,
bitmap);
if (cfg80211_valid_disable_subchannel_bitmap(&bitmap,
- chandef))
+ chandef) &&
+ !(bitmap && ieee80211_hw_check(&local->hw,
+ DISALLOW_PUNCTURING)))
break;
link->u.mgd.conn_flags |=
ieee80211_chandef_downgrade(chandef);
@@ -595,6 +601,7 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link,
return ret;
}
+ cfg80211_schedule_channels_check(&sdata->wdev);
return 0;
}
@@ -1382,7 +1389,7 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
struct ieee80211_mgmt *mgmt;
u8 *pos, qos_info, *ie_start;
size_t offset, noffset;
- u16 capab = WLAN_CAPABILITY_ESS, link_capab;
+ u16 capab = 0, link_capab;
__le16 listen_int;
struct element *ext_capa = NULL;
enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
@@ -1529,6 +1536,17 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
*pos++ = assoc_data->ssid_len;
memcpy(pos, assoc_data->ssid, assoc_data->ssid_len);
+ /*
+ * This bit is technically reserved, so it shouldn't matter for either
+ * the AP or us, but it also means we shouldn't set it. However, we've
+ * always set it in the past, and apparently some EHT APs check that
+ * we don't set it. To avoid interoperability issues with old APs that
+ * for some reason check it and want it to be set, set the bit for all
+ * pre-EHT connections as we used to do.
+ */
+ if (link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT)
+ capab |= WLAN_CAPABILITY_ESS;
+
/* add the elements for the assoc (main) link */
link_capab = capab;
offset = ieee80211_assoc_link_elems(sdata, skb, &link_capab,
@@ -2900,6 +2918,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
/* other links will be destroyed */
sdata->deflink.u.mgd.bss = NULL;
+ sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
netif_carrier_off(sdata->dev);
@@ -5027,9 +5046,6 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
if (!link)
return 0;
- /* will change later if needed */
- link->smps_mode = IEEE80211_SMPS_OFF;
-
/*
* If this fails (possibly due to channel context sharing
* on incompatible channels, e.g. 80+80 and 160 sharing the
@@ -5367,6 +5383,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
assoc_data->ap_addr, tu, ms);
assoc_data->timeout = jiffies + msecs_to_jiffies(ms);
assoc_data->timeout_started = true;
+ assoc_data->comeback = true;
if (ms > IEEE80211_ASSOC_TIMEOUT)
run_again(sdata, assoc_data->timeout);
goto notify_driver;
@@ -5388,33 +5405,24 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
}
if (ieee80211_vif_is_mld(&sdata->vif)) {
+ struct ieee80211_mle_basic_common_info *common;
+
if (!elems->ml_basic) {
sdata_info(sdata,
- "MLO association with %pM but no multi-link element in response!\n",
+ "MLO association with %pM but no (basic) multi-link element in response!\n",
assoc_data->ap_addr);
goto abandon_assoc;
}
- if (le16_get_bits(elems->ml_basic->control,
- IEEE80211_ML_CONTROL_TYPE) !=
- IEEE80211_ML_CONTROL_TYPE_BASIC) {
+ common = (void *)elems->ml_basic->variable;
+
+ if (memcmp(assoc_data->ap_addr,
+ common->mld_mac_addr, ETH_ALEN)) {
sdata_info(sdata,
- "bad multi-link element (control=0x%x)\n",
- le16_to_cpu(elems->ml_basic->control));
+ "AP MLD MAC address mismatch: got %pM expected %pM\n",
+ common->mld_mac_addr,
+ assoc_data->ap_addr);
goto abandon_assoc;
- } else {
- struct ieee80211_mle_basic_common_info *common;
-
- common = (void *)elems->ml_basic->variable;
-
- if (memcmp(assoc_data->ap_addr,
- common->mld_mac_addr, ETH_ALEN)) {
- sdata_info(sdata,
- "AP MLD MAC address mismatch: got %pM expected %pM\n",
- common->mld_mac_addr,
- assoc_data->ap_addr);
- goto abandon_assoc;
- }
}
}
@@ -5682,6 +5690,7 @@ static bool ieee80211_config_puncturing(struct ieee80211_link_data *link,
const struct ieee80211_eht_operation *eht_oper,
u64 *changed)
{
+ struct ieee80211_local *local = link->sdata->local;
u16 bitmap = 0, extracted;
if ((eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT) &&
@@ -5713,6 +5722,9 @@ static bool ieee80211_config_puncturing(struct ieee80211_link_data *link,
return false;
}
+ if (bitmap && ieee80211_hw_check(&local->hw, DISALLOW_PUNCTURING))
+ return false;
+
ieee80211_handle_puncturing_bitmap(link, eht_oper, bitmap, changed);
return true;
}
@@ -5946,6 +5958,13 @@ ieee80211_parse_adv_t2l(struct ieee80211_sub_if_data *sdata,
pos++;
ttlm_info->switch_time = get_unaligned_le16(pos);
+
+ /* Since ttlm_info->switch_time == 0 means no switch time, bump it
+ * by 1.
+ */
+ if (!ttlm_info->switch_time)
+ ttlm_info->switch_time = 1;
+
pos += 2;
if (control & IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT) {
@@ -6040,25 +6059,46 @@ static void ieee80211_process_adv_ttlm(struct ieee80211_sub_if_data *sdata,
}
if (ttlm_info.switch_time) {
- u32 st_us, delay = 0;
- u32 ts_l26 = beacon_ts & GENMASK(25, 0);
+ u16 beacon_ts_tu, st_tu, delay;
+ u32 delay_jiffies;
+ u64 mask;
/* The t2l map switch time is indicated with a partial
- * TSF value, convert it to TSF and calc the delay
- * to the start time.
+ * TSF value (bits 10 to 25), get the partial beacon TS
+ * as well, and calc the delay to the start time.
*/
- st_us = ieee80211_tu_to_usec(ttlm_info.switch_time);
- if (st_us > ts_l26)
- delay = st_us - ts_l26;
+ mask = GENMASK_ULL(25, 10);
+ beacon_ts_tu = (beacon_ts & mask) >> 10;
+ st_tu = ttlm_info.switch_time;
+ delay = st_tu - beacon_ts_tu;
+
+ /*
+ * If the switch time is far in the future, then it
+ * could also be the previous switch still being
+ * announced.
+ * We can simply ignore it for now, if it is a future
+ * switch the AP will continue to announce it anyway.
+ */
+ if (delay > IEEE80211_ADV_TTLM_ST_UNDERFLOW)
+ return;
+
+ delay_jiffies = TU_TO_JIFFIES(delay);
+
+ /* Link switching can take time, so schedule it
+ * 100ms before to be ready on time
+ */
+ if (delay_jiffies > IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS)
+ delay_jiffies -=
+ IEEE80211_ADV_TTLM_SAFETY_BUFFER_MS;
else
- continue;
+ delay_jiffies = 0;
sdata->u.mgd.ttlm_info = ttlm_info;
wiphy_delayed_work_cancel(sdata->local->hw.wiphy,
&sdata->u.mgd.ttlm_work);
wiphy_delayed_work_queue(sdata->local->hw.wiphy,
&sdata->u.mgd.ttlm_work,
- usecs_to_jiffies(delay));
+ delay_jiffies);
return;
}
}
@@ -6702,8 +6742,18 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
}
ifmgd->auth_data->timeout_started = true;
} else if (ifmgd->assoc_data &&
+ !ifmgd->assoc_data->comeback &&
(ieee80211_is_assoc_req(fc) ||
ieee80211_is_reassoc_req(fc))) {
+ /*
+ * Update association timeout based on the TX status
+ * for the (Re)Association Request frame. Skip this if
+ * we have already processed a (Re)Association Response
+ * frame that indicated need for association comeback
+ * at a specific time in the future. This could happen
+ * if the TX status information is delayed enough for
+ * the response to be received and processed first.
+ */
if (status_acked) {
ifmgd->assoc_data->timeout =
jiffies + IEEE80211_ASSOC_TIMEOUT_SHORT;
@@ -7044,6 +7094,7 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link)
link->u.mgd.p2p_noa_index = -1;
link->u.mgd.conn_flags = 0;
link->conf->bssid = link->u.mgd.bssid;
+ link->smps_mode = IEEE80211_SMPS_OFF;
wiphy_work_init(&link->u.mgd.request_smps_work,
ieee80211_request_smps_mgd_work);
@@ -7257,6 +7308,75 @@ out_err:
return err;
}
+static bool ieee80211_mgd_csa_present(struct ieee80211_sub_if_data *sdata,
+ const struct cfg80211_bss_ies *ies,
+ u8 cur_channel, bool ignore_ecsa)
+{
+ const struct element *csa_elem, *ecsa_elem;
+ struct ieee80211_channel_sw_ie *csa = NULL;
+ struct ieee80211_ext_chansw_ie *ecsa = NULL;
+
+ if (!ies)
+ return false;
+
+ csa_elem = cfg80211_find_elem(WLAN_EID_CHANNEL_SWITCH,
+ ies->data, ies->len);
+ if (csa_elem && csa_elem->datalen == sizeof(*csa))
+ csa = (void *)csa_elem->data;
+
+ ecsa_elem = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ ies->data, ies->len);
+ if (ecsa_elem && ecsa_elem->datalen == sizeof(*ecsa))
+ ecsa = (void *)ecsa_elem->data;
+
+ if (csa && csa->count == 0)
+ csa = NULL;
+ if (csa && !csa->mode && csa->new_ch_num == cur_channel)
+ csa = NULL;
+
+ if (ecsa && ecsa->count == 0)
+ ecsa = NULL;
+ if (ecsa && !ecsa->mode && ecsa->new_ch_num == cur_channel)
+ ecsa = NULL;
+
+ if (ignore_ecsa && ecsa) {
+ sdata_info(sdata,
+ "Ignoring ECSA in probe response - was considered stuck!\n");
+ return csa;
+ }
+
+ return csa || ecsa;
+}
+
+static bool ieee80211_mgd_csa_in_process(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_bss *bss)
+{
+ u8 cur_channel;
+ bool ret;
+
+ cur_channel = ieee80211_frequency_to_channel(bss->channel->center_freq);
+
+ rcu_read_lock();
+ if (ieee80211_mgd_csa_present(sdata,
+ rcu_dereference(bss->beacon_ies),
+ cur_channel, false)) {
+ ret = true;
+ goto out;
+ }
+
+ if (ieee80211_mgd_csa_present(sdata,
+ rcu_dereference(bss->proberesp_ies),
+ cur_channel, bss->proberesp_ecsa_stuck)) {
+ ret = true;
+ goto out;
+ }
+
+ ret = false;
+out:
+ rcu_read_unlock();
+ return ret;
+}
+
/* config hooks */
int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
struct cfg80211_auth_request *req)
@@ -7265,7 +7385,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_mgd_auth_data *auth_data;
struct ieee80211_link_data *link;
- const struct element *csa_elem, *ecsa_elem;
u16 auth_alg;
int err;
bool cont_auth;
@@ -7308,21 +7427,10 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
if (ifmgd->assoc_data)
return -EBUSY;
- rcu_read_lock();
- csa_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_CHANNEL_SWITCH);
- ecsa_elem = ieee80211_bss_get_elem(req->bss,
- WLAN_EID_EXT_CHANSWITCH_ANN);
- if ((csa_elem &&
- csa_elem->datalen == sizeof(struct ieee80211_channel_sw_ie) &&
- ((struct ieee80211_channel_sw_ie *)csa_elem->data)->count != 0) ||
- (ecsa_elem &&
- ecsa_elem->datalen == sizeof(struct ieee80211_ext_chansw_ie) &&
- ((struct ieee80211_ext_chansw_ie *)ecsa_elem->data)->count != 0)) {
- rcu_read_unlock();
+ if (ieee80211_mgd_csa_in_process(sdata, req->bss)) {
sdata_info(sdata, "AP is in CSA process, reject auth\n");
return -EINVAL;
}
- rcu_read_unlock();
auth_data = kzalloc(sizeof(*auth_data) + req->auth_data_len +
req->ie_len, GFP_KERNEL);
@@ -7586,7 +7694,8 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata,
bitmap = get_unaligned_le16(disable_subchannel_bitmap);
if (cfg80211_valid_disable_subchannel_bitmap(&bitmap,
- &link->conf->chandef))
+ &link->conf->chandef) &&
+ !(bitmap && ieee80211_hw_check(&local->hw, DISALLOW_PUNCTURING)))
ieee80211_handle_puncturing_bitmap(link,
eht_oper,
bitmap,
@@ -7631,7 +7740,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_mgd_assoc_data *assoc_data;
- const struct element *ssid_elem, *csa_elem, *ecsa_elem;
+ const struct element *ssid_elem;
struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg;
ieee80211_conn_flags_t conn_flags = 0;
struct ieee80211_link_data *link;
@@ -7654,23 +7763,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
cbss = req->link_id < 0 ? req->bss : req->links[req->link_id].bss;
- rcu_read_lock();
- ssid_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID);
- if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) {
- rcu_read_unlock();
+ if (ieee80211_mgd_csa_in_process(sdata, cbss)) {
+ sdata_info(sdata, "AP is in CSA process, reject assoc\n");
kfree(assoc_data);
return -EINVAL;
}
- csa_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_CHANNEL_SWITCH);
- ecsa_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_EXT_CHANSWITCH_ANN);
- if ((csa_elem &&
- csa_elem->datalen == sizeof(struct ieee80211_channel_sw_ie) &&
- ((struct ieee80211_channel_sw_ie *)csa_elem->data)->count != 0) ||
- (ecsa_elem &&
- ecsa_elem->datalen == sizeof(struct ieee80211_ext_chansw_ie) &&
- ((struct ieee80211_ext_chansw_ie *)ecsa_elem->data)->count != 0)) {
- sdata_info(sdata, "AP is in CSA process, reject assoc\n");
+ rcu_read_lock();
+ ssid_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID);
+ if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) {
rcu_read_unlock();
kfree(assoc_data);
return -EINVAL;
@@ -7945,8 +8046,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
rcu_read_lock();
beacon_ies = rcu_dereference(req->bss->beacon_ies);
-
- if (beacon_ies) {
+ if (!beacon_ies) {
/*
* Wait up to one beacon interval ...
* should this be more if we miss one?
@@ -8027,6 +8127,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
ieee80211_report_disconnect(sdata, frame_buf,
sizeof(frame_buf), true,
req->reason_code, false);
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
return 0;
}
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index d5ea5f5bcf3a..9d33fd2377c8 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -119,7 +119,8 @@ void rate_control_rate_update(struct ieee80211_local *local,
rcu_read_unlock();
}
- drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
+ if (sta->uploaded)
+ drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
}
int ieee80211_rate_control_register(const struct rate_control_ops *ops)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 64352e4e6d00..0bf72928ccfc 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -19,6 +19,7 @@
#include <linux/export.h>
#include <linux/kcov.h>
#include <linux/bitops.h>
+#include <kunit/visibility.h>
#include <net/mac80211.h>
#include <net/ieee80211_radiotap.h>
#include <asm/unaligned.h>
@@ -566,7 +567,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
if (local->hw.radiotap_timestamp.units_pos >= 0) {
u16 accuracy = 0;
- u8 flags = IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT;
+ u8 flags;
+ u64 ts;
rthdr->it_present |=
cpu_to_le32(BIT(IEEE80211_RADIOTAP_TIMESTAMP));
@@ -575,7 +577,15 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
while ((pos - (u8 *)rthdr) & 7)
pos++;
- put_unaligned_le64(status->device_timestamp, pos);
+ if (status->flag & RX_FLAG_MACTIME_IS_RTAP_TS64) {
+ flags = IEEE80211_RADIOTAP_TIMESTAMP_FLAG_64BIT;
+ ts = status->mactime;
+ } else {
+ flags = IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT;
+ ts = status->device_timestamp;
+ }
+
+ put_unaligned_le64(ts, pos);
pos += sizeof(u64);
if (local->hw.radiotap_timestamp.accuracy >= 0) {
@@ -920,7 +930,7 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
* Drivers always need to pass packets that are aligned to two-byte boundaries
* to the stack.
*
- * Additionally, should, if possible, align the payload data in a way that
+ * Additionally, they should, if possible, align the payload data in a way that
* guarantees that the contained IP header is aligned to a four-byte
* boundary. In the case of regular frames, this simply means aligning the
* payload to a four-byte boundary (because either the IP header is directly
@@ -936,7 +946,7 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
* subframe to a length that is a multiple of four.
*
* Padding like Atheros hardware adds which is between the 802.11 header and
- * the payload is not supported, the driver is required to move the 802.11
+ * the payload is not supported; the driver is required to move the 802.11
* header to be directly in front of the payload in that case.
*/
static void ieee80211_verify_alignment(struct ieee80211_rx_data *rx)
@@ -2405,7 +2415,7 @@ static int ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc)
return 0;
}
-static ieee80211_rx_result
+VISIBLE_IF_MAC80211_KUNIT ieee80211_rx_result
ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
{
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
@@ -2484,6 +2494,7 @@ ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
return RX_CONTINUE;
}
+EXPORT_SYMBOL_IF_MAC80211_KUNIT(ieee80211_drop_unencrypted_mgmt);
static ieee80211_rx_result
__ieee80211_data_to_8023(struct ieee80211_rx_data *rx, bool *port_control)
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 24fa06105378..f9d5842601fa 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2015 Intel Mobile Communications GmbH
* Copyright 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/if_arp.h>
@@ -194,11 +194,32 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
if (scan_sdata && scan_sdata->vif.type == NL80211_IFTYPE_STATION &&
scan_sdata->vif.cfg.assoc &&
ieee80211_have_rx_timestamp(rx_status)) {
- bss_meta.parent_tsf =
- ieee80211_calculate_rx_timestamp(local, rx_status,
- len + FCS_LEN, 24);
- ether_addr_copy(bss_meta.parent_bssid,
- scan_sdata->vif.bss_conf.bssid);
+ struct ieee80211_bss_conf *link_conf = NULL;
+
+ /* for an MLO connection, set the TSF data only in case we have
+ * an indication on which of the links the frame was received
+ */
+ if (ieee80211_vif_is_mld(&scan_sdata->vif)) {
+ if (rx_status->link_valid) {
+ s8 link_id = rx_status->link_id;
+
+ link_conf =
+ rcu_dereference(scan_sdata->vif.link_conf[link_id]);
+ }
+ } else {
+ link_conf = &scan_sdata->vif.bss_conf;
+ }
+
+ if (link_conf) {
+ bss_meta.parent_tsf =
+ ieee80211_calculate_rx_timestamp(local,
+ rx_status,
+ len + FCS_LEN,
+ 24);
+
+ ether_addr_copy(bss_meta.parent_bssid,
+ link_conf->bssid);
+ }
}
rcu_read_unlock();
@@ -216,14 +237,18 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
}
static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_channel *channel,
u32 scan_flags, const u8 *da)
{
if (!sdata)
return false;
- /* accept broadcast for OCE */
- if (scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP &&
- is_broadcast_ether_addr(da))
+
+ /* accept broadcast on 6 GHz and for OCE */
+ if (is_broadcast_ether_addr(da) &&
+ (channel->band == NL80211_BAND_6GHZ ||
+ scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP))
return true;
+
if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR)
return true;
return ether_addr_equal(da, sdata->vif.addr);
@@ -272,6 +297,12 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0);
}
+ channel = ieee80211_get_channel_khz(local->hw.wiphy,
+ ieee80211_rx_status_to_khz(rx_status));
+
+ if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
+ return;
+
if (ieee80211_is_probe_resp(mgmt->frame_control)) {
struct cfg80211_scan_request *scan_req;
struct cfg80211_sched_scan_request *sched_scan_req;
@@ -289,19 +320,15 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
/* ignore ProbeResp to foreign address or non-bcast (OCE)
* unless scanning with randomised address
*/
- if (!ieee80211_scan_accept_presp(sdata1, scan_req_flags,
+ if (!ieee80211_scan_accept_presp(sdata1, channel,
+ scan_req_flags,
mgmt->da) &&
- !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags,
+ !ieee80211_scan_accept_presp(sdata2, channel,
+ sched_scan_req_flags,
mgmt->da))
return;
}
- channel = ieee80211_get_channel_khz(local->hw.wiphy,
- ieee80211_rx_status_to_khz(rx_status));
-
- if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
- return;
-
bss = ieee80211_bss_info_update(local, rx_status,
mgmt, skb->len,
channel);
@@ -666,6 +693,21 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
if (local->scan_req)
return -EBUSY;
+ /* For an MLO connection, if a link ID was specified, validate that it
+ * is indeed active. If no link ID was specified, select one of the
+ * active links.
+ */
+ if (ieee80211_vif_is_mld(&sdata->vif)) {
+ if (req->tsf_report_link_id >= 0) {
+ if (!(sdata->vif.active_links &
+ BIT(req->tsf_report_link_id)))
+ return -EINVAL;
+ } else {
+ req->tsf_report_link_id =
+ __ffs(sdata->vif.active_links);
+ }
+ }
+
if (!__ieee80211_can_leave_ch(sdata))
return -EBUSY;
@@ -714,6 +756,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
local->hw_scan_req->req.duration = req->duration;
local->hw_scan_req->req.duration_mandatory =
req->duration_mandatory;
+ local->hw_scan_req->req.tsf_report_link_id =
+ req->tsf_report_link_id;
local->hw_scan_band = 0;
local->hw_scan_req->req.n_6ghz_params = req->n_6ghz_params;
@@ -1251,7 +1295,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
iebufsz = local->scan_ies_len + req->ie_len;
if (!local->ops->sched_scan_start)
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
for (i = 0; i < NUM_NL80211_BANDS; i++) {
if (local->hw.wiphy->bands[i]) {
@@ -1316,7 +1360,7 @@ int ieee80211_request_sched_scan_stop(struct ieee80211_local *local)
lockdep_assert_wiphy(local->hw.wiphy);
if (!local->ops->sched_scan_stop)
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
/* We don't want to restart sched scan anymore. */
RCU_INIT_POINTER(local->sched_scan_req, NULL);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 0ba613dd1cc4..4391d8dd634b 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -40,7 +40,7 @@
* either sta_info_insert() or sta_info_insert_rcu(); only in the latter
* case (which acquires an rcu read section but must not be called from
* within one) will the pointer still be valid after the call. Note that
- * the caller may not do much with the STA info before inserting it, in
+ * the caller may not do much with the STA info before inserting it; in
* particular, it may not start any mesh peer link management or add
* encryption keys.
*
@@ -58,7 +58,7 @@
* In order to remove a STA info structure, various sta_info_destroy_*()
* calls are available.
*
- * There is no concept of ownership on a STA entry, each structure is
+ * There is no concept of ownership on a STA entry; each structure is
* owned by the global hash table/list until it is removed. All users of
* the structure need to be RCU protected so that the structure won't be
* freed before they are done using it.
@@ -404,7 +404,10 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
int i;
for (i = 0; i < ARRAY_SIZE(sta->link); i++) {
- if (!(sta->sta.valid_links & BIT(i)))
+ struct link_sta_info *link_sta;
+
+ link_sta = rcu_access_pointer(sta->link[i]);
+ if (!link_sta)
continue;
sta_remove_link(sta, i, false);
@@ -910,6 +913,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
if (ieee80211_vif_is_mesh(&sdata->vif))
mesh_accept_plinks_update(sdata);
+ ieee80211_check_fast_xmit(sta);
+
return 0;
out_remove:
if (sta->sta.valid_links)
@@ -2268,7 +2273,6 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
struct ieee80211_local *local = sta->sdata->local;
u8 ac = ieee80211_ac_from_tid(tid);
u32 airtime = 0;
- u32 diff;
if (sta->local->airtime_flags & AIRTIME_USE_TX)
airtime += tx_airtime;
@@ -2279,8 +2283,7 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
sta->airtime[ac].tx_airtime += tx_airtime;
sta->airtime[ac].rx_airtime += rx_airtime;
- diff = (u32)jiffies - sta->airtime[ac].last_active;
- if (diff <= AIRTIME_ACTIVE_DURATION)
+ if (ieee80211_sta_keep_active(sta, ac))
sta->airtime[ac].deficit -= airtime;
spin_unlock_bh(&local->active_txq_lock[ac]);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 7acf2223e47a..5ef1554f991f 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -138,7 +138,7 @@ enum ieee80211_agg_stop_reason {
struct airtime_info {
u64 rx_airtime;
u64 tx_airtime;
- u32 last_active;
+ unsigned long last_active;
s32 deficit;
atomic_t aql_tx_pending; /* Estimated airtime for frames pending */
u32 aql_limit_low;
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 05a7dff69fe9..49730b424141 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -1001,7 +1001,7 @@ ieee80211_tdls_build_mgmt_packet_data(struct ieee80211_sub_if_data *sdata,
skb);
break;
default:
- ret = -ENOTSUPP;
+ ret = -EOPNOTSUPP;
break;
}
@@ -1071,7 +1071,7 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev,
/* any value is ok */
break;
default:
- ret = -ENOTSUPP;
+ ret = -EOPNOTSUPP;
break;
}
@@ -1177,7 +1177,7 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev,
smps_mode != IEEE80211_SMPS_OFF) {
tdls_dbg(sdata, "Aborting TDLS setup due to SMPS mode %d\n",
smps_mode);
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
lockdep_assert_wiphy(local->hw.wiphy);
@@ -1289,7 +1289,7 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
int ret;
if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
/* make sure we are in managed mode, and associated */
if (sdata->vif.type != NL80211_IFTYPE_STATION ||
@@ -1446,7 +1446,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
lockdep_assert_wiphy(local->hw.wiphy);
if (!(wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS))
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
if (sdata->vif.type != NL80211_IFTYPE_STATION)
return -EINVAL;
@@ -1459,7 +1459,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
case NL80211_TDLS_SETUP:
case NL80211_TDLS_DISCOVERY_REQ:
/* We don't support in-driver setup/teardown/discovery */
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
/* protect possible bss_conf changes and avoid concurrency in
@@ -1510,7 +1510,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
return ret;
break;
default:
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
if (ether_addr_equal(sdata->u.mgd.tdls_peer, peer)) {
@@ -1673,7 +1673,7 @@ ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev,
if (!test_sta_flag(sta, WLAN_STA_TDLS_CHAN_SWITCH)) {
tdls_dbg(sdata, "TDLS channel switch unsupported by %pM\n",
addr);
- ret = -ENOTSUPP;
+ ret = -EOPNOTSUPP;
goto out;
}
@@ -1993,7 +1993,7 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
if (!sta->sta.deflink.ht_cap.ht_supported && elems->sec_chan_offs &&
elems->sec_chan_offs->sec_chan_offs) {
tdls_dbg(sdata, "TDLS chan switch - wide chan unsupported\n");
- ret = -ENOTSUPP;
+ ret = -EOPNOTSUPP;
goto out;
}
diff --git a/net/mac80211/tests/Makefile b/net/mac80211/tests/Makefile
index 4814584f8a14..4fdaf3feaca3 100644
--- a/net/mac80211/tests/Makefile
+++ b/net/mac80211/tests/Makefile
@@ -1,3 +1,3 @@
-mac80211-tests-y += module.o elems.o
+mac80211-tests-y += module.o elems.o mfp.o
obj-$(CONFIG_MAC80211_KUNIT_TEST) += mac80211-tests.o
diff --git a/net/mac80211/tests/mfp.c b/net/mac80211/tests/mfp.c
new file mode 100644
index 000000000000..a8dc1601da60
--- /dev/null
+++ b/net/mac80211/tests/mfp.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KUnit tests for management frame acceptance
+ *
+ * Copyright (C) 2023 Intel Corporation
+ */
+#include <kunit/test.h>
+#include <kunit/skbuff.h>
+#include "../ieee80211_i.h"
+#include "../sta_info.h"
+
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+
+static const struct mfp_test_case {
+ const char *desc;
+ bool sta, mfp, decrypted, unicast, assoc;
+ u8 category;
+ u8 stype;
+ u8 action;
+ ieee80211_rx_result result;
+} accept_mfp_cases[] = {
+ /* regular public action */
+ {
+ .desc = "public action: accept unicast from unknown peer",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PUBLIC,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = true,
+ .result = RX_CONTINUE,
+ },
+ {
+ .desc = "public action: accept multicast from unknown peer",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PUBLIC,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = false,
+ .result = RX_CONTINUE,
+ },
+ {
+ .desc = "public action: accept unicast without MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PUBLIC,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = true,
+ .sta = true,
+ .result = RX_CONTINUE,
+ },
+ {
+ .desc = "public action: accept multicast without MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PUBLIC,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = false,
+ .sta = true,
+ .result = RX_CONTINUE,
+ },
+ {
+ .desc = "public action: drop unicast with MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PUBLIC,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = true,
+ .sta = true,
+ .mfp = true,
+ .result = RX_DROP_U_UNPROT_UNICAST_PUB_ACTION,
+ },
+ {
+ .desc = "public action: accept multicast with MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PUBLIC,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = false,
+ .sta = true,
+ .mfp = true,
+ .result = RX_CONTINUE,
+ },
+ /* protected dual of public action */
+ {
+ .desc = "protected dual: drop unicast from unknown peer",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = true,
+ .result = RX_DROP_U_UNPROT_DUAL,
+ },
+ {
+ .desc = "protected dual: drop multicast from unknown peer",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = false,
+ .result = RX_DROP_U_UNPROT_DUAL,
+ },
+ {
+ .desc = "protected dual: drop unicast without MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = true,
+ .sta = true,
+ .result = RX_DROP_U_UNPROT_DUAL,
+ },
+ {
+ .desc = "protected dual: drop multicast without MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = false,
+ .sta = true,
+ .result = RX_DROP_U_UNPROT_DUAL,
+ },
+ {
+ .desc = "protected dual: drop undecrypted unicast with MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = true,
+ .sta = true,
+ .mfp = true,
+ .result = RX_DROP_U_UNPROT_DUAL,
+ },
+ {
+ .desc = "protected dual: drop undecrypted multicast with MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .unicast = false,
+ .sta = true,
+ .mfp = true,
+ .result = RX_DROP_U_UNPROT_DUAL,
+ },
+ {
+ .desc = "protected dual: accept unicast with MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .decrypted = true,
+ .unicast = true,
+ .sta = true,
+ .mfp = true,
+ .result = RX_CONTINUE,
+ },
+ {
+ .desc = "protected dual: accept multicast with MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION,
+ .action = WLAN_PUB_ACTION_DSE_ENABLEMENT,
+ .decrypted = true,
+ .unicast = false,
+ .sta = true,
+ .mfp = true,
+ .result = RX_CONTINUE,
+ },
+ /* deauth/disassoc before keys are set */
+ {
+ .desc = "deauth: accept unicast with MFP but w/o key",
+ .stype = IEEE80211_STYPE_DEAUTH,
+ .sta = true,
+ .mfp = true,
+ .unicast = true,
+ .result = RX_CONTINUE,
+ },
+ {
+ .desc = "disassoc: accept unicast with MFP but w/o key",
+ .stype = IEEE80211_STYPE_DEAUTH,
+ .sta = true,
+ .mfp = true,
+ .unicast = true,
+ .result = RX_CONTINUE,
+ },
+ /* non-public robust action frame ... */
+ {
+ .desc = "BA action: drop unicast before assoc",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_BACK,
+ .unicast = true,
+ .sta = true,
+ .result = RX_DROP_U_UNPROT_ROBUST_ACTION,
+ },
+ {
+ .desc = "BA action: drop unprotected after assoc",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_BACK,
+ .unicast = true,
+ .sta = true,
+ .mfp = true,
+ .result = RX_DROP_U_UNPROT_UCAST_MGMT,
+ },
+ {
+ .desc = "BA action: accept unprotected without MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_BACK,
+ .unicast = true,
+ .sta = true,
+ .assoc = true,
+ .mfp = false,
+ .result = RX_CONTINUE,
+ },
+ {
+ .desc = "BA action: drop unprotected with MFP",
+ .stype = IEEE80211_STYPE_ACTION,
+ .category = WLAN_CATEGORY_BACK,
+ .unicast = true,
+ .sta = true,
+ .mfp = true,
+ .result = RX_DROP_U_UNPROT_UCAST_MGMT,
+ },
+};
+
+KUNIT_ARRAY_PARAM_DESC(accept_mfp, accept_mfp_cases, desc);
+
+static void accept_mfp(struct kunit *test)
+{
+ static struct sta_info sta;
+ const struct mfp_test_case *params = test->param_value;
+ struct ieee80211_rx_data rx = {
+ .sta = params->sta ? &sta : NULL,
+ };
+ struct ieee80211_rx_status *status;
+ struct ieee80211_hdr_3addr hdr = {
+ .frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+ params->stype),
+ .addr1 = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .addr2 = { 0x12, 0x22, 0x33, 0x44, 0x55, 0x66 },
+ /* A3/BSSID doesn't matter here */
+ };
+
+ memset(&sta, 0, sizeof(sta));
+
+ if (!params->sta) {
+ KUNIT_ASSERT_FALSE(test, params->mfp);
+ KUNIT_ASSERT_FALSE(test, params->decrypted);
+ }
+
+ if (params->mfp)
+ set_sta_flag(&sta, WLAN_STA_MFP);
+
+ if (params->assoc)
+ set_bit(WLAN_STA_ASSOC, &sta._flags);
+
+ rx.skb = kunit_zalloc_skb(test, 128, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, rx.skb);
+ status = IEEE80211_SKB_RXCB(rx.skb);
+
+ if (params->decrypted) {
+ status->flag |= RX_FLAG_DECRYPTED;
+ if (params->unicast)
+ hdr.frame_control |=
+ cpu_to_le16(IEEE80211_FCTL_PROTECTED);
+ }
+
+ if (params->unicast)
+ hdr.addr1[0] = 0x02;
+
+ skb_put_data(rx.skb, &hdr, sizeof(hdr));
+
+ switch (params->stype) {
+ case IEEE80211_STYPE_ACTION:
+ skb_put_u8(rx.skb, params->category);
+ skb_put_u8(rx.skb, params->action);
+ break;
+ case IEEE80211_STYPE_DEAUTH:
+ case IEEE80211_STYPE_DISASSOC: {
+ __le16 reason = cpu_to_le16(WLAN_REASON_UNSPECIFIED);
+
+ skb_put_data(rx.skb, &reason, sizeof(reason));
+ }
+ break;
+ }
+
+ KUNIT_EXPECT_EQ(test,
+ (__force u32)ieee80211_drop_unencrypted_mgmt(&rx),
+ (__force u32)params->result);
+}
+
+static struct kunit_case mfp_test_cases[] = {
+ KUNIT_CASE_PARAM(accept_mfp, accept_mfp_gen_params),
+ {}
+};
+
+static struct kunit_suite mfp = {
+ .name = "mac80211-mfp",
+ .test_cases = mfp_test_cases,
+};
+
+kunit_test_suite(mfp);
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 032718d5b298..06835ed4c44f 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -2512,6 +2512,31 @@ TRACE_EVENT(drv_net_setup_tc,
)
);
+TRACE_EVENT(drv_can_activate_links,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ u16 active_links),
+
+ TP_ARGS(local, sdata, active_links),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ VIF_ENTRY
+ __field(u16, active_links)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ __entry->active_links = active_links;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT VIF_PR_FMT " requested active_links:0x%04x\n",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->active_links
+ )
+);
+
TRACE_EVENT(drv_change_vif_links,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index ed4fdf655343..6fbb15b65902 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*
* Transmit and frame generation functions.
*/
@@ -3048,7 +3048,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
sdata->vif.type == NL80211_IFTYPE_STATION)
goto out;
- if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+ if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !sta->uploaded)
goto out;
if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
@@ -3100,10 +3100,11 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
/* DA SA BSSID */
build.da_offs = offsetof(struct ieee80211_hdr, addr1);
build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
+ rcu_read_lock();
link = rcu_dereference(sdata->link[tdls_link_id]);
- if (WARN_ON_ONCE(!link))
- break;
- memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN);
+ if (!WARN_ON_ONCE(!link))
+ memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN);
+ rcu_read_unlock();
build.hdr_len = 24;
break;
}
@@ -3926,6 +3927,7 @@ begin:
goto begin;
skb = __skb_dequeue(&tx.skbs);
+ info = IEEE80211_SKB_CB(skb);
if (!skb_queue_empty(&tx.skbs)) {
spin_lock_bh(&fq->lock);
@@ -3970,7 +3972,7 @@ begin:
}
encap_out:
- IEEE80211_SKB_CB(skb)->control.vif = vif;
+ info->control.vif = vif;
if (tx.sta &&
wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
@@ -4013,14 +4015,13 @@ ieee80211_txq_set_active(struct txq_info *txqi)
return;
sta = container_of(txqi->txq.sta, struct sta_info, sta);
- sta->airtime[txqi->txq.ac].last_active = (u32)jiffies;
+ sta->airtime[txqi->txq.ac].last_active = jiffies;
}
static bool
ieee80211_txq_keep_active(struct txq_info *txqi)
{
struct sta_info *sta;
- u32 diff;
if (!txqi->txq.sta)
return false;
@@ -4029,9 +4030,7 @@ ieee80211_txq_keep_active(struct txq_info *txqi)
if (ieee80211_sta_deficit(sta, txqi->txq.ac) >= 0)
return false;
- diff = (u32)jiffies - sta->airtime[txqi->txq.ac].last_active;
-
- return diff <= AIRTIME_ACTIVE_DURATION;
+ return ieee80211_sta_keep_active(sta, txqi->txq.ac);
}
struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index ed680120d5a7..643c54855be6 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -4176,6 +4176,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
unsigned int mpdu_offset)
{
u64 ts = status->mactime;
+ bool mactime_plcp_start;
struct rate_info ri;
u16 rate;
u8 n_ltf;
@@ -4183,6 +4184,9 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
if (WARN_ON(!ieee80211_have_rx_timestamp(status)))
return 0;
+ mactime_plcp_start = (status->flag & RX_FLAG_MACTIME) ==
+ RX_FLAG_MACTIME_PLCP_START;
+
memset(&ri, 0, sizeof(ri));
ri.bw = status->bw;
@@ -4197,7 +4201,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
if (status->enc_flags & RX_ENC_FLAG_SHORT_GI)
ri.flags |= RATE_INFO_FLAGS_SHORT_GI;
/* TODO/FIXME: is this right? handle other PPDUs */
- if (status->flag & RX_FLAG_MACTIME_PLCP_START) {
+ if (mactime_plcp_start) {
mpdu_offset += 2;
ts += 36;
}
@@ -4214,7 +4218,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
* See P802.11ax_D6.0, section 27.3.4 for
* VHT PPDU format.
*/
- if (status->flag & RX_FLAG_MACTIME_PLCP_START) {
+ if (mactime_plcp_start) {
mpdu_offset += 2;
ts += 36;
@@ -4238,7 +4242,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
* See P802.11REVmd_D3.0, section 19.3.2 for
* HT PPDU format.
*/
- if (status->flag & RX_FLAG_MACTIME_PLCP_START) {
+ if (mactime_plcp_start) {
mpdu_offset += 2;
if (status->enc_flags & RX_ENC_FLAG_HT_GF)
ts += 24;
@@ -4266,7 +4270,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
* See P802.11REVmd_D3.0, section 21.3.2 for
* VHT PPDU format.
*/
- if (status->flag & RX_FLAG_MACTIME_PLCP_START) {
+ if (mactime_plcp_start) {
mpdu_offset += 2;
ts += 36;
@@ -4288,7 +4292,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
sband = local->hw.wiphy->bands[status->band];
ri.legacy = sband->bitrates[status->rate_idx].bitrate;
- if (status->flag & RX_FLAG_MACTIME_PLCP_START) {
+ if (mactime_plcp_start) {
if (status->band == NL80211_BAND_5GHZ) {
ts += 20;
mpdu_offset += 2;
@@ -4310,7 +4314,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
return 0;
/* rewind from end of MPDU */
- if (status->flag & RX_FLAG_MACTIME_END)
+ if ((status->flag & RX_FLAG_MACTIME) == RX_FLAG_MACTIME_END)
ts -= mpdu_len * 8 * 10 / rate;
ts += mpdu_offset * 8 * 10 / rate;
diff --git a/net/mac80211/wbrf.c b/net/mac80211/wbrf.c
new file mode 100644
index 000000000000..3a8612309137
--- /dev/null
+++ b/net/mac80211/wbrf.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wifi Band Exclusion Interface for WLAN
+ * Copyright (C) 2023 Advanced Micro Devices
+ *
+ */
+
+#include <linux/acpi_amd_wbrf.h>
+#include <linux/units.h>
+#include <net/cfg80211.h>
+#include "ieee80211_i.h"
+
+void ieee80211_check_wbrf_support(struct ieee80211_local *local)
+{
+ struct wiphy *wiphy = local->hw.wiphy;
+ struct device *dev;
+
+ if (!wiphy)
+ return;
+
+ dev = wiphy->dev.parent;
+ if (!dev)
+ return;
+
+ local->wbrf_supported = acpi_amd_wbrf_supported_producer(dev);
+}
+
+static void get_chan_freq_boundary(u32 center_freq, u32 bandwidth, u64 *start, u64 *end)
+{
+ bandwidth *= KHZ_PER_MHZ;
+ center_freq *= KHZ_PER_MHZ;
+
+ *start = center_freq - bandwidth / 2;
+ *end = center_freq + bandwidth / 2;
+
+ /* Frequency in Hz is expected */
+ *start = *start * HZ_PER_KHZ;
+ *end = *end * HZ_PER_KHZ;
+}
+
+static void get_ranges_from_chandef(struct cfg80211_chan_def *chandef,
+ struct wbrf_ranges_in_out *ranges_in)
+{
+ u64 start_freq1, end_freq1;
+ u64 start_freq2, end_freq2;
+ int bandwidth;
+
+ bandwidth = nl80211_chan_width_to_mhz(chandef->width);
+
+ get_chan_freq_boundary(chandef->center_freq1, bandwidth, &start_freq1, &end_freq1);
+
+ ranges_in->band_list[0].start = start_freq1;
+ ranges_in->band_list[0].end = end_freq1;
+ ranges_in->num_of_ranges = 1;
+
+ if (chandef->width == NL80211_CHAN_WIDTH_80P80) {
+ get_chan_freq_boundary(chandef->center_freq2, bandwidth, &start_freq2, &end_freq2);
+
+ ranges_in->band_list[1].start = start_freq2;
+ ranges_in->band_list[1].end = end_freq2;
+ ranges_in->num_of_ranges++;
+ }
+}
+
+void ieee80211_add_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef)
+{
+ struct wbrf_ranges_in_out ranges_in = {0};
+ struct device *dev;
+
+ if (!local->wbrf_supported)
+ return;
+
+ dev = local->hw.wiphy->dev.parent;
+
+ get_ranges_from_chandef(chandef, &ranges_in);
+
+ acpi_amd_wbrf_add_remove(dev, WBRF_RECORD_ADD, &ranges_in);
+}
+
+void ieee80211_remove_wbrf(struct ieee80211_local *local, struct cfg80211_chan_def *chandef)
+{
+ struct wbrf_ranges_in_out ranges_in = {0};
+ struct device *dev;
+
+ if (!local->wbrf_supported)
+ return;
+
+ dev = local->hw.wiphy->dev.parent;
+
+ get_ranges_from_chandef(chandef, &ranges_in);
+
+ acpi_amd_wbrf_add_remove(dev, WBRF_RECORD_REMOVE, &ranges_in);
+}
diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c
index 5c3cb019f751..ef7f23af043f 100644
--- a/net/mac802154/cfg.c
+++ b/net/mac802154/cfg.c
@@ -315,6 +315,179 @@ static int mac802154_stop_beacons(struct wpan_phy *wpan_phy,
return mac802154_stop_beacons_locked(local, sdata);
}
+static int mac802154_associate(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *coord)
+{
+ struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
+ u64 ceaddr = swab64((__force u64)coord->extended_addr);
+ struct ieee802154_sub_if_data *sdata;
+ struct ieee802154_pan_device *parent;
+ __le16 short_addr;
+ int ret;
+
+ ASSERT_RTNL();
+
+ sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(wpan_dev);
+
+ if (wpan_dev->parent) {
+ dev_err(&sdata->dev->dev,
+ "Device %8phC is already associated\n", &ceaddr);
+ return -EPERM;
+ }
+
+ if (coord->mode == IEEE802154_SHORT_ADDRESSING)
+ return -EINVAL;
+
+ parent = kzalloc(sizeof(*parent), GFP_KERNEL);
+ if (!parent)
+ return -ENOMEM;
+
+ parent->pan_id = coord->pan_id;
+ parent->mode = coord->mode;
+ parent->extended_addr = coord->extended_addr;
+ parent->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST);
+
+ /* Set the PAN ID hardware address filter beforehand to avoid dropping
+ * the association response with a destination PAN ID field set to the
+ * "new" PAN ID.
+ */
+ if (local->hw.flags & IEEE802154_HW_AFILT) {
+ ret = drv_set_pan_id(local, coord->pan_id);
+ if (ret < 0)
+ goto free_parent;
+ }
+
+ ret = mac802154_perform_association(sdata, parent, &short_addr);
+ if (ret)
+ goto reset_panid;
+
+ if (local->hw.flags & IEEE802154_HW_AFILT) {
+ ret = drv_set_short_addr(local, short_addr);
+ if (ret < 0)
+ goto reset_panid;
+ }
+
+ wpan_dev->pan_id = coord->pan_id;
+ wpan_dev->short_addr = short_addr;
+ wpan_dev->parent = parent;
+
+ return 0;
+
+reset_panid:
+ if (local->hw.flags & IEEE802154_HW_AFILT)
+ drv_set_pan_id(local, cpu_to_le16(IEEE802154_PAN_ID_BROADCAST));
+
+free_parent:
+ kfree(parent);
+ return ret;
+}
+
+static int mac802154_disassociate_from_parent(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev)
+{
+ struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
+ struct ieee802154_pan_device *child, *tmp;
+ struct ieee802154_sub_if_data *sdata;
+ unsigned int max_assoc;
+ u64 eaddr;
+ int ret;
+
+ sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(wpan_dev);
+
+ /* Start by disassociating all the children and preventing new ones to
+ * attempt associations.
+ */
+ max_assoc = cfg802154_set_max_associations(wpan_dev, 0);
+ list_for_each_entry_safe(child, tmp, &wpan_dev->children, node) {
+ ret = mac802154_send_disassociation_notif(sdata, child,
+ IEEE802154_COORD_WISHES_DEVICE_TO_LEAVE);
+ if (ret) {
+ eaddr = swab64((__force u64)child->extended_addr);
+ dev_err(&sdata->dev->dev,
+ "Disassociation with %8phC may have failed (%d)\n",
+ &eaddr, ret);
+ }
+
+ list_del(&child->node);
+ }
+
+ ret = mac802154_send_disassociation_notif(sdata, wpan_dev->parent,
+ IEEE802154_DEVICE_WISHES_TO_LEAVE);
+ if (ret) {
+ eaddr = swab64((__force u64)wpan_dev->parent->extended_addr);
+ dev_err(&sdata->dev->dev,
+ "Disassociation from %8phC may have failed (%d)\n",
+ &eaddr, ret);
+ }
+
+ ret = 0;
+
+ kfree(wpan_dev->parent);
+ wpan_dev->parent = NULL;
+ wpan_dev->pan_id = cpu_to_le16(IEEE802154_PAN_ID_BROADCAST);
+ wpan_dev->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST);
+
+ if (local->hw.flags & IEEE802154_HW_AFILT) {
+ ret = drv_set_pan_id(local, wpan_dev->pan_id);
+ if (ret < 0)
+ goto reset_mac_assoc;
+
+ ret = drv_set_short_addr(local, wpan_dev->short_addr);
+ if (ret < 0)
+ goto reset_mac_assoc;
+ }
+
+reset_mac_assoc:
+ cfg802154_set_max_associations(wpan_dev, max_assoc);
+
+ return ret;
+}
+
+static int mac802154_disassociate_child(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_pan_device *child)
+{
+ struct ieee802154_sub_if_data *sdata;
+ int ret;
+
+ sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(wpan_dev);
+
+ ret = mac802154_send_disassociation_notif(sdata, child,
+ IEEE802154_COORD_WISHES_DEVICE_TO_LEAVE);
+ if (ret)
+ return ret;
+
+ list_del(&child->node);
+ wpan_dev->nchildren--;
+ kfree(child);
+
+ return 0;
+}
+
+static int mac802154_disassociate(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_addr *target)
+{
+ u64 teaddr = swab64((__force u64)target->extended_addr);
+ struct ieee802154_pan_device *pan_device;
+
+ ASSERT_RTNL();
+
+ if (cfg802154_device_is_parent(wpan_dev, target))
+ return mac802154_disassociate_from_parent(wpan_phy, wpan_dev);
+
+ pan_device = cfg802154_device_is_child(wpan_dev, target);
+ if (pan_device)
+ return mac802154_disassociate_child(wpan_phy, wpan_dev,
+ pan_device);
+
+ dev_err(&wpan_dev->netdev->dev,
+ "Device %8phC is not associated with us\n", &teaddr);
+
+ return -EINVAL;
+}
+
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
static void
ieee802154_get_llsec_table(struct wpan_phy *wpan_phy,
@@ -526,6 +699,8 @@ const struct cfg802154_ops mac802154_config_ops = {
.abort_scan = mac802154_abort_scan,
.send_beacons = mac802154_send_beacons,
.stop_beacons = mac802154_stop_beacons,
+ .associate = mac802154_associate,
+ .disassociate = mac802154_disassociate,
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
.get_llsec_table = ieee802154_get_llsec_table,
.lock_llsec_table = ieee802154_lock_llsec_table,
diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h
index c347ec9ff8c9..08dd521a51a5 100644
--- a/net/mac802154/ieee802154_i.h
+++ b/net/mac802154/ieee802154_i.h
@@ -24,6 +24,7 @@
enum ieee802154_ongoing {
IEEE802154_IS_SCANNING = BIT(0),
IEEE802154_IS_BEACONING = BIT(1),
+ IEEE802154_IS_ASSOCIATING = BIT(2),
};
/* mac802154 device private data */
@@ -74,6 +75,13 @@ struct ieee802154_local {
struct list_head rx_mac_cmd_list;
struct work_struct rx_mac_cmd_work;
+ /* Association */
+ struct ieee802154_pan_device *assoc_dev;
+ struct completion assoc_done;
+ __le16 assoc_addr;
+ u8 assoc_status;
+ struct work_struct assoc_work;
+
bool started;
bool suspended;
unsigned long ongoing;
@@ -296,6 +304,25 @@ static inline bool mac802154_is_beaconing(struct ieee802154_local *local)
void mac802154_rx_mac_cmd_worker(struct work_struct *work);
+int mac802154_perform_association(struct ieee802154_sub_if_data *sdata,
+ struct ieee802154_pan_device *coord,
+ __le16 *short_addr);
+int mac802154_process_association_resp(struct ieee802154_sub_if_data *sdata,
+ struct sk_buff *skb);
+
+static inline bool mac802154_is_associating(struct ieee802154_local *local)
+{
+ return test_bit(IEEE802154_IS_ASSOCIATING, &local->ongoing);
+}
+
+int mac802154_send_disassociation_notif(struct ieee802154_sub_if_data *sdata,
+ struct ieee802154_pan_device *target,
+ u8 reason);
+int mac802154_process_disassociation_notif(struct ieee802154_sub_if_data *sdata,
+ struct sk_buff *skb);
+int mac802154_process_association_req(struct ieee802154_sub_if_data *sdata,
+ struct sk_buff *skb);
+
/* interface handling */
int ieee802154_iface_init(void);
void ieee802154_iface_exit(void);
diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index 357ece67432b..9ab7396668d2 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c
@@ -103,6 +103,8 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops)
INIT_DELAYED_WORK(&local->beacon_work, mac802154_beacon_worker);
INIT_WORK(&local->rx_mac_cmd_work, mac802154_rx_mac_cmd_worker);
+ init_completion(&local->assoc_done);
+
/* init supported flags with 802.15.4 default ranges */
phy->supported.max_minbe = 8;
phy->supported.min_maxbe = 3;
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index e2434b4fe514..e40a988d6c80 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -93,6 +93,31 @@ void mac802154_rx_mac_cmd_worker(struct work_struct *work)
queue_delayed_work(local->mac_wq, &local->beacon_work, 0);
break;
+
+ case IEEE802154_CMD_ASSOCIATION_RESP:
+ dev_dbg(&mac_pkt->sdata->dev->dev, "processing ASSOC RESP\n");
+ if (!mac802154_is_associating(local))
+ break;
+
+ mac802154_process_association_resp(mac_pkt->sdata, mac_pkt->skb);
+ break;
+
+ case IEEE802154_CMD_ASSOCIATION_REQ:
+ dev_dbg(&mac_pkt->sdata->dev->dev, "processing ASSOC REQ\n");
+ if (mac_pkt->sdata->wpan_dev.iftype != NL802154_IFTYPE_COORD)
+ break;
+
+ mac802154_process_association_req(mac_pkt->sdata, mac_pkt->skb);
+ break;
+
+ case IEEE802154_CMD_DISASSOCIATION_NOTIFY:
+ dev_dbg(&mac_pkt->sdata->dev->dev, "processing DISASSOC NOTIF\n");
+ if (mac_pkt->sdata->wpan_dev.iftype != NL802154_IFTYPE_COORD)
+ break;
+
+ mac802154_process_disassociation_notif(mac_pkt->sdata, mac_pkt->skb);
+ break;
+
default:
break;
}
@@ -131,12 +156,15 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata,
switch (mac_cb(skb)->dest.mode) {
case IEEE802154_ADDR_NONE:
- if (hdr->source.mode != IEEE802154_ADDR_NONE)
- /* FIXME: check if we are PAN coordinator */
- skb->pkt_type = PACKET_OTHERHOST;
- else
+ if (hdr->source.mode == IEEE802154_ADDR_NONE)
/* ACK comes with both addresses empty */
skb->pkt_type = PACKET_HOST;
+ else if (!wpan_dev->parent)
+ /* No dest means PAN coordinator is the recipient */
+ skb->pkt_type = PACKET_HOST;
+ else
+ /* We are not the PAN coordinator, just relaying */
+ skb->pkt_type = PACKET_OTHERHOST;
break;
case IEEE802154_ADDR_LONG:
if (mac_cb(skb)->dest.pan_id != span &&
diff --git a/net/mac802154/scan.c b/net/mac802154/scan.c
index d9658f2c4ae6..1c0eeaa76560 100644
--- a/net/mac802154/scan.c
+++ b/net/mac802154/scan.c
@@ -466,6 +466,7 @@ int mac802154_send_beacons_locked(struct ieee802154_sub_if_data *sdata,
struct cfg802154_beacon_request *request)
{
struct ieee802154_local *local = sdata->local;
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
ASSERT_RTNL();
@@ -495,8 +496,7 @@ int mac802154_send_beacons_locked(struct ieee802154_sub_if_data *sdata,
local->beacon.mac_pl.superframe_order = request->interval;
local->beacon.mac_pl.final_cap_slot = 0xf;
local->beacon.mac_pl.battery_life_ext = 0;
- /* TODO: Fill this field with the coordinator situation in the network */
- local->beacon.mac_pl.pan_coordinator = 1;
+ local->beacon.mac_pl.pan_coordinator = !wpan_dev->parent;
local->beacon.mac_pl.assoc_permit = 1;
if (request->interval == IEEE802154_ACTIVE_SCAN_DURATION)
@@ -510,3 +510,406 @@ int mac802154_send_beacons_locked(struct ieee802154_sub_if_data *sdata,
return 0;
}
+
+int mac802154_perform_association(struct ieee802154_sub_if_data *sdata,
+ struct ieee802154_pan_device *coord,
+ __le16 *short_addr)
+{
+ u64 ceaddr = swab64((__force u64)coord->extended_addr);
+ struct ieee802154_association_req_frame frame = {};
+ struct ieee802154_local *local = sdata->local;
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ struct sk_buff *skb;
+ int ret;
+
+ frame.mhr.fc.type = IEEE802154_FC_TYPE_MAC_CMD;
+ frame.mhr.fc.security_enabled = 0;
+ frame.mhr.fc.frame_pending = 0;
+ frame.mhr.fc.ack_request = 1; /* We always expect an ack here */
+ frame.mhr.fc.intra_pan = 0;
+ frame.mhr.fc.dest_addr_mode = (coord->mode == IEEE802154_ADDR_LONG) ?
+ IEEE802154_EXTENDED_ADDRESSING : IEEE802154_SHORT_ADDRESSING;
+ frame.mhr.fc.version = IEEE802154_2003_STD;
+ frame.mhr.fc.source_addr_mode = IEEE802154_EXTENDED_ADDRESSING;
+ frame.mhr.source.mode = IEEE802154_ADDR_LONG;
+ frame.mhr.source.pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST);
+ frame.mhr.source.extended_addr = wpan_dev->extended_addr;
+ frame.mhr.dest.mode = coord->mode;
+ frame.mhr.dest.pan_id = coord->pan_id;
+ if (coord->mode == IEEE802154_ADDR_LONG)
+ frame.mhr.dest.extended_addr = coord->extended_addr;
+ else
+ frame.mhr.dest.short_addr = coord->short_addr;
+ frame.mhr.seq = atomic_inc_return(&wpan_dev->dsn) & 0xFF;
+ frame.mac_pl.cmd_id = IEEE802154_CMD_ASSOCIATION_REQ;
+ frame.assoc_req_pl.device_type = 1;
+ frame.assoc_req_pl.power_source = 1;
+ frame.assoc_req_pl.rx_on_when_idle = 1;
+ frame.assoc_req_pl.alloc_addr = 1;
+
+ skb = alloc_skb(IEEE802154_MAC_CMD_SKB_SZ + sizeof(frame.assoc_req_pl),
+ GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ skb->dev = sdata->dev;
+
+ ret = ieee802154_mac_cmd_push(skb, &frame, &frame.assoc_req_pl,
+ sizeof(frame.assoc_req_pl));
+ if (ret) {
+ kfree_skb(skb);
+ return ret;
+ }
+
+ local->assoc_dev = coord;
+ reinit_completion(&local->assoc_done);
+ set_bit(IEEE802154_IS_ASSOCIATING, &local->ongoing);
+
+ ret = ieee802154_mlme_tx_one_locked(local, sdata, skb);
+ if (ret) {
+ if (ret > 0)
+ ret = (ret == IEEE802154_NO_ACK) ? -EREMOTEIO : -EIO;
+ dev_warn(&sdata->dev->dev,
+ "No ASSOC REQ ACK received from %8phC\n", &ceaddr);
+ goto clear_assoc;
+ }
+
+ ret = wait_for_completion_killable_timeout(&local->assoc_done, 10 * HZ);
+ if (ret <= 0) {
+ dev_warn(&sdata->dev->dev,
+ "No ASSOC RESP received from %8phC\n", &ceaddr);
+ ret = -ETIMEDOUT;
+ goto clear_assoc;
+ }
+
+ if (local->assoc_status != IEEE802154_ASSOCIATION_SUCCESSFUL) {
+ if (local->assoc_status == IEEE802154_PAN_AT_CAPACITY)
+ ret = -ERANGE;
+ else
+ ret = -EPERM;
+
+ dev_warn(&sdata->dev->dev,
+ "Negative ASSOC RESP received from %8phC: %s\n", &ceaddr,
+ local->assoc_status == IEEE802154_PAN_AT_CAPACITY ?
+ "PAN at capacity" : "access denied");
+ }
+
+ ret = 0;
+ *short_addr = local->assoc_addr;
+
+clear_assoc:
+ clear_bit(IEEE802154_IS_ASSOCIATING, &local->ongoing);
+ local->assoc_dev = NULL;
+
+ return ret;
+}
+
+int mac802154_process_association_resp(struct ieee802154_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct ieee802154_addr *src = &mac_cb(skb)->source;
+ struct ieee802154_addr *dest = &mac_cb(skb)->dest;
+ u64 deaddr = swab64((__force u64)dest->extended_addr);
+ struct ieee802154_local *local = sdata->local;
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ struct ieee802154_assoc_resp_pl resp_pl = {};
+
+ if (skb->len != sizeof(resp_pl))
+ return -EINVAL;
+
+ if (unlikely(src->mode != IEEE802154_EXTENDED_ADDRESSING ||
+ dest->mode != IEEE802154_EXTENDED_ADDRESSING))
+ return -EINVAL;
+
+ if (unlikely(dest->extended_addr != wpan_dev->extended_addr ||
+ src->extended_addr != local->assoc_dev->extended_addr))
+ return -ENODEV;
+
+ memcpy(&resp_pl, skb->data, sizeof(resp_pl));
+ local->assoc_addr = resp_pl.short_addr;
+ local->assoc_status = resp_pl.status;
+
+ dev_dbg(&skb->dev->dev,
+ "ASSOC RESP 0x%x received from %8phC, getting short address %04x\n",
+ local->assoc_status, &deaddr, local->assoc_addr);
+
+ complete(&local->assoc_done);
+
+ return 0;
+}
+
+int mac802154_send_disassociation_notif(struct ieee802154_sub_if_data *sdata,
+ struct ieee802154_pan_device *target,
+ u8 reason)
+{
+ struct ieee802154_disassociation_notif_frame frame = {};
+ u64 teaddr = swab64((__force u64)target->extended_addr);
+ struct ieee802154_local *local = sdata->local;
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ struct sk_buff *skb;
+ int ret;
+
+ frame.mhr.fc.type = IEEE802154_FC_TYPE_MAC_CMD;
+ frame.mhr.fc.security_enabled = 0;
+ frame.mhr.fc.frame_pending = 0;
+ frame.mhr.fc.ack_request = 1;
+ frame.mhr.fc.intra_pan = 1;
+ frame.mhr.fc.dest_addr_mode = (target->mode == IEEE802154_ADDR_LONG) ?
+ IEEE802154_EXTENDED_ADDRESSING : IEEE802154_SHORT_ADDRESSING;
+ frame.mhr.fc.version = IEEE802154_2003_STD;
+ frame.mhr.fc.source_addr_mode = IEEE802154_EXTENDED_ADDRESSING;
+ frame.mhr.source.mode = IEEE802154_ADDR_LONG;
+ frame.mhr.source.pan_id = wpan_dev->pan_id;
+ frame.mhr.source.extended_addr = wpan_dev->extended_addr;
+ frame.mhr.dest.mode = target->mode;
+ frame.mhr.dest.pan_id = wpan_dev->pan_id;
+ if (target->mode == IEEE802154_ADDR_LONG)
+ frame.mhr.dest.extended_addr = target->extended_addr;
+ else
+ frame.mhr.dest.short_addr = target->short_addr;
+ frame.mhr.seq = atomic_inc_return(&wpan_dev->dsn) & 0xFF;
+ frame.mac_pl.cmd_id = IEEE802154_CMD_DISASSOCIATION_NOTIFY;
+ frame.disassoc_pl = reason;
+
+ skb = alloc_skb(IEEE802154_MAC_CMD_SKB_SZ + sizeof(frame.disassoc_pl),
+ GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ skb->dev = sdata->dev;
+
+ ret = ieee802154_mac_cmd_push(skb, &frame, &frame.disassoc_pl,
+ sizeof(frame.disassoc_pl));
+ if (ret) {
+ kfree_skb(skb);
+ return ret;
+ }
+
+ ret = ieee802154_mlme_tx_one_locked(local, sdata, skb);
+ if (ret) {
+ dev_warn(&sdata->dev->dev,
+ "No DISASSOC ACK received from %8phC\n", &teaddr);
+ if (ret > 0)
+ ret = (ret == IEEE802154_NO_ACK) ? -EREMOTEIO : -EIO;
+ return ret;
+ }
+
+ dev_dbg(&sdata->dev->dev, "DISASSOC ACK received from %8phC\n", &teaddr);
+ return 0;
+}
+
+static int
+mac802154_send_association_resp_locked(struct ieee802154_sub_if_data *sdata,
+ struct ieee802154_pan_device *target,
+ struct ieee802154_assoc_resp_pl *assoc_resp_pl)
+{
+ u64 teaddr = swab64((__force u64)target->extended_addr);
+ struct ieee802154_association_resp_frame frame = {};
+ struct ieee802154_local *local = sdata->local;
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ struct sk_buff *skb;
+ int ret;
+
+ frame.mhr.fc.type = IEEE802154_FC_TYPE_MAC_CMD;
+ frame.mhr.fc.security_enabled = 0;
+ frame.mhr.fc.frame_pending = 0;
+ frame.mhr.fc.ack_request = 1; /* We always expect an ack here */
+ frame.mhr.fc.intra_pan = 1;
+ frame.mhr.fc.dest_addr_mode = IEEE802154_EXTENDED_ADDRESSING;
+ frame.mhr.fc.version = IEEE802154_2003_STD;
+ frame.mhr.fc.source_addr_mode = IEEE802154_EXTENDED_ADDRESSING;
+ frame.mhr.source.mode = IEEE802154_ADDR_LONG;
+ frame.mhr.source.extended_addr = wpan_dev->extended_addr;
+ frame.mhr.dest.mode = IEEE802154_ADDR_LONG;
+ frame.mhr.dest.pan_id = wpan_dev->pan_id;
+ frame.mhr.dest.extended_addr = target->extended_addr;
+ frame.mhr.seq = atomic_inc_return(&wpan_dev->dsn) & 0xFF;
+ frame.mac_pl.cmd_id = IEEE802154_CMD_ASSOCIATION_RESP;
+
+ skb = alloc_skb(IEEE802154_MAC_CMD_SKB_SZ + sizeof(*assoc_resp_pl),
+ GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ skb->dev = sdata->dev;
+
+ ret = ieee802154_mac_cmd_push(skb, &frame, assoc_resp_pl,
+ sizeof(*assoc_resp_pl));
+ if (ret) {
+ kfree_skb(skb);
+ return ret;
+ }
+
+ ret = ieee802154_mlme_tx_locked(local, sdata, skb);
+ if (ret) {
+ dev_warn(&sdata->dev->dev,
+ "No ASSOC RESP ACK received from %8phC\n", &teaddr);
+ if (ret > 0)
+ ret = (ret == IEEE802154_NO_ACK) ? -EREMOTEIO : -EIO;
+ return ret;
+ }
+
+ return 0;
+}
+
+int mac802154_process_association_req(struct ieee802154_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ struct ieee802154_addr *src = &mac_cb(skb)->source;
+ struct ieee802154_addr *dest = &mac_cb(skb)->dest;
+ struct ieee802154_assoc_resp_pl assoc_resp_pl = {};
+ struct ieee802154_assoc_req_pl assoc_req_pl;
+ struct ieee802154_pan_device *child, *exchild;
+ struct ieee802154_addr tmp = {};
+ u64 ceaddr;
+ int ret;
+
+ if (skb->len != sizeof(assoc_req_pl))
+ return -EINVAL;
+
+ if (unlikely(src->mode != IEEE802154_EXTENDED_ADDRESSING))
+ return -EINVAL;
+
+ if (unlikely(dest->pan_id != wpan_dev->pan_id))
+ return -ENODEV;
+
+ if (dest->mode == IEEE802154_EXTENDED_ADDRESSING &&
+ unlikely(dest->extended_addr != wpan_dev->extended_addr))
+ return -ENODEV;
+ else if (dest->mode == IEEE802154_SHORT_ADDRESSING &&
+ unlikely(dest->short_addr != wpan_dev->short_addr))
+ return -ENODEV;
+
+ if (wpan_dev->parent) {
+ dev_dbg(&sdata->dev->dev,
+ "Ignoring ASSOC REQ, not the PAN coordinator\n");
+ return -ENODEV;
+ }
+
+ mutex_lock(&wpan_dev->association_lock);
+
+ memcpy(&assoc_req_pl, skb->data, sizeof(assoc_req_pl));
+ if (assoc_req_pl.assoc_type) {
+ dev_err(&skb->dev->dev, "Fast associations not supported yet\n");
+ ret = -EOPNOTSUPP;
+ goto unlock;
+ }
+
+ child = kzalloc(sizeof(*child), GFP_KERNEL);
+ if (!child) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ child->extended_addr = src->extended_addr;
+ child->mode = IEEE802154_EXTENDED_ADDRESSING;
+ ceaddr = swab64((__force u64)child->extended_addr);
+
+ if (wpan_dev->nchildren >= wpan_dev->max_associations) {
+ if (!wpan_dev->max_associations)
+ assoc_resp_pl.status = IEEE802154_PAN_ACCESS_DENIED;
+ else
+ assoc_resp_pl.status = IEEE802154_PAN_AT_CAPACITY;
+ assoc_resp_pl.short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_BROADCAST);
+ dev_dbg(&sdata->dev->dev,
+ "Refusing ASSOC REQ from child %8phC, %s\n", &ceaddr,
+ assoc_resp_pl.status == IEEE802154_PAN_ACCESS_DENIED ?
+ "access denied" : "too many children");
+ } else {
+ assoc_resp_pl.status = IEEE802154_ASSOCIATION_SUCCESSFUL;
+ if (assoc_req_pl.alloc_addr) {
+ assoc_resp_pl.short_addr = cfg802154_get_free_short_addr(wpan_dev);
+ child->mode = IEEE802154_SHORT_ADDRESSING;
+ } else {
+ assoc_resp_pl.short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC);
+ }
+ child->short_addr = assoc_resp_pl.short_addr;
+ dev_dbg(&sdata->dev->dev,
+ "Accepting ASSOC REQ from child %8phC, providing short address 0x%04x\n",
+ &ceaddr, le16_to_cpu(child->short_addr));
+ }
+
+ ret = mac802154_send_association_resp_locked(sdata, child, &assoc_resp_pl);
+ if (ret || assoc_resp_pl.status != IEEE802154_ASSOCIATION_SUCCESSFUL) {
+ kfree(child);
+ goto unlock;
+ }
+
+ dev_dbg(&sdata->dev->dev,
+ "Successful association with new child %8phC\n", &ceaddr);
+
+ /* Ensure this child is not already associated (might happen due to
+ * retransmissions), in this case drop the ex structure.
+ */
+ tmp.mode = child->mode;
+ tmp.extended_addr = child->extended_addr;
+ exchild = cfg802154_device_is_child(wpan_dev, &tmp);
+ if (exchild) {
+ dev_dbg(&sdata->dev->dev,
+ "Child %8phC was already known\n", &ceaddr);
+ list_del(&exchild->node);
+ }
+
+ list_add(&child->node, &wpan_dev->children);
+ wpan_dev->nchildren++;
+
+unlock:
+ mutex_unlock(&wpan_dev->association_lock);
+ return ret;
+}
+
+int mac802154_process_disassociation_notif(struct ieee802154_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct ieee802154_addr *src = &mac_cb(skb)->source;
+ struct ieee802154_addr *dest = &mac_cb(skb)->dest;
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ struct ieee802154_pan_device *child;
+ struct ieee802154_addr target;
+ bool parent;
+ u64 teaddr;
+
+ if (skb->len != sizeof(u8))
+ return -EINVAL;
+
+ if (unlikely(src->mode != IEEE802154_EXTENDED_ADDRESSING))
+ return -EINVAL;
+
+ if (dest->mode == IEEE802154_EXTENDED_ADDRESSING &&
+ unlikely(dest->extended_addr != wpan_dev->extended_addr))
+ return -ENODEV;
+ else if (dest->mode == IEEE802154_SHORT_ADDRESSING &&
+ unlikely(dest->short_addr != wpan_dev->short_addr))
+ return -ENODEV;
+
+ if (dest->pan_id != wpan_dev->pan_id)
+ return -ENODEV;
+
+ target.mode = IEEE802154_EXTENDED_ADDRESSING;
+ target.extended_addr = src->extended_addr;
+ teaddr = swab64((__force u64)target.extended_addr);
+ dev_dbg(&skb->dev->dev, "Processing DISASSOC NOTIF from %8phC\n", &teaddr);
+
+ mutex_lock(&wpan_dev->association_lock);
+ parent = cfg802154_device_is_parent(wpan_dev, &target);
+ if (!parent)
+ child = cfg802154_device_is_child(wpan_dev, &target);
+ if (!parent && !child) {
+ mutex_unlock(&wpan_dev->association_lock);
+ return -EINVAL;
+ }
+
+ if (parent) {
+ kfree(wpan_dev->parent);
+ wpan_dev->parent = NULL;
+ } else {
+ list_del(&child->node);
+ kfree(child);
+ wpan_dev->nchildren--;
+ }
+
+ mutex_unlock(&wpan_dev->association_lock);
+
+ return 0;
+}
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 7a47a58aa54b..ceee44ea09d9 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -663,7 +663,7 @@ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk,
spin_unlock_irqrestore(&mns->keys_lock, flags);
if (!tagbits) {
- kfree(key);
+ mctp_key_unref(key);
return ERR_PTR(-EBUSY);
}
@@ -888,7 +888,7 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
dev = dev_get_by_index_rcu(sock_net(sk), cb->ifindex);
if (!dev) {
rcu_read_unlock();
- return rc;
+ goto out_free;
}
rt->dev = __mctp_dev_get(dev);
rcu_read_unlock();
@@ -903,7 +903,8 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
rt->mtu = 0;
} else {
- return -EINVAL;
+ rc = -EINVAL;
+ goto out_free;
}
spin_lock_irqsave(&rt->dev->addrs_lock, flags);
@@ -966,12 +967,17 @@ int mctp_local_output(struct sock *sk, struct mctp_route *rt,
rc = mctp_do_fragment_route(rt, skb, mtu, tag);
}
+ /* route output functions consume the skb, even on error */
+ skb = NULL;
+
out_release:
if (!ext_rt)
mctp_route_release(rt);
mctp_dev_put(tmp_rt.dev);
+out_free:
+ kfree_skb(skb);
return rc;
}
diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c
index a536586742f2..7017dd60659d 100644
--- a/net/mptcp/diag.c
+++ b/net/mptcp/diag.c
@@ -13,17 +13,22 @@
#include <uapi/linux/mptcp.h>
#include "protocol.h"
-static int subflow_get_info(const struct sock *sk, struct sk_buff *skb)
+static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
{
struct mptcp_subflow_context *sf;
struct nlattr *start;
u32 flags = 0;
+ bool slow;
int err;
+ if (inet_sk_state_load(sk) == TCP_LISTEN)
+ return 0;
+
start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP);
if (!start)
return -EMSGSIZE;
+ slow = lock_sock_fast(sk);
rcu_read_lock();
sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data);
if (!sf) {
@@ -63,17 +68,19 @@ static int subflow_get_info(const struct sock *sk, struct sk_buff *skb)
sf->map_data_len) ||
nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) ||
nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) ||
- nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) {
+ nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) {
err = -EMSGSIZE;
goto nla_failure;
}
rcu_read_unlock();
+ unlock_sock_fast(sk, slow);
nla_nest_end(skb, start);
return 0;
nla_failure:
rcu_read_unlock();
+ unlock_sock_fast(sk, slow);
nla_nest_cancel(skb, start);
return err;
}
diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
index 74698582a285..ad28da655f8b 100644
--- a/net/mptcp/fastopen.c
+++ b/net/mptcp/fastopen.c
@@ -59,13 +59,12 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
mptcp_data_unlock(sk);
}
-void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt)
+void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
{
struct sock *sk = (struct sock *)msk;
struct sk_buff *skb;
- mptcp_data_lock(sk);
skb = skb_peek_tail(&sk->sk_receive_queue);
if (skb) {
WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
@@ -77,5 +76,4 @@ void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_
}
pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq);
- mptcp_data_unlock(sk);
}
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index a0990c365a2e..c30405e76833 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -66,6 +66,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED),
SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE),
SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
+ SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index cae71d947252..dd7fd1f246b5 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -65,6 +65,7 @@ enum linux_mptcp_mib_field {
* conflict with another subflow while updating msk rcv wnd
*/
MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */
+ MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */
__MPTCP_MIB_MAX
};
@@ -95,4 +96,11 @@ static inline void __MPTCP_INC_STATS(struct net *net,
__SNMP_INC_STATS(net->mib.mptcp_statistics, field);
}
+static inline void MPTCP_DEC_STATS(struct net *net,
+ enum linux_mptcp_mib_field field)
+{
+ if (likely(net->mib.mptcp_statistics))
+ SNMP_DEC_STATS(net->mib.mptcp_statistics, field);
+}
+
bool mptcp_mib_alloc(struct net *net);
diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c
index a2325e70ddab..670da7822e6c 100644
--- a/net/mptcp/mptcp_pm_gen.c
+++ b/net/mptcp/mptcp_pm_gen.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
/* Do not edit directly, auto-generated from: */
-/* Documentation/netlink/specs/mptcp.yaml */
+/* Documentation/netlink/specs/mptcp_pm.yaml */
/* YNL-GEN kernel source */
#include <net/netlink.h>
diff --git a/net/mptcp/mptcp_pm_gen.h b/net/mptcp/mptcp_pm_gen.h
index 10579d184587..ac9fc7225b6a 100644
--- a/net/mptcp/mptcp_pm_gen.h
+++ b/net/mptcp/mptcp_pm_gen.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
/* Do not edit directly, auto-generated from: */
-/* Documentation/netlink/specs/mptcp.yaml */
+/* Documentation/netlink/specs/mptcp_pm.yaml */
/* YNL-GEN kernel header */
#ifndef _LINUX_MPTCP_PM_GEN_H
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index c53914012d01..63fc0758c22d 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -123,8 +123,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
break;
case MPTCPOPT_MP_JOIN:
- mp_opt->suboptions |= OPTIONS_MPTCP_MPJ;
if (opsize == TCPOLEN_MPTCP_MPJ_SYN) {
+ mp_opt->suboptions |= OPTION_MPTCP_MPJ_SYN;
mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
mp_opt->join_id = *ptr++;
mp_opt->token = get_unaligned_be32(ptr);
@@ -135,6 +135,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->backup, mp_opt->join_id,
mp_opt->token, mp_opt->nonce);
} else if (opsize == TCPOLEN_MPTCP_MPJ_SYNACK) {
+ mp_opt->suboptions |= OPTION_MPTCP_MPJ_SYNACK;
mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
mp_opt->join_id = *ptr++;
mp_opt->thmac = get_unaligned_be64(ptr);
@@ -145,11 +146,10 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->backup, mp_opt->join_id,
mp_opt->thmac, mp_opt->nonce);
} else if (opsize == TCPOLEN_MPTCP_MPJ_ACK) {
+ mp_opt->suboptions |= OPTION_MPTCP_MPJ_ACK;
ptr += 2;
memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN);
pr_debug("MP_JOIN hmac");
- } else {
- mp_opt->suboptions &= ~OPTIONS_MPTCP_MPJ;
}
break;
@@ -962,9 +962,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
/* subflows are fully established as soon as we get any
* additional ack, including ADD_ADDR.
*/
- subflow->fully_established = 1;
- WRITE_ONCE(msk->fully_established, true);
- goto check_notify;
+ goto set_fully_established;
}
/* If the first established packet does not contain MP_CAPABLE + data
@@ -983,10 +981,13 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (mp_opt->deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
-set_fully_established:
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
- mptcp_subflow_fully_established(subflow, mp_opt);
+
+set_fully_established:
+ mptcp_data_lock((struct sock *)msk);
+ __mptcp_subflow_fully_established(msk, subflow, mp_opt);
+ mptcp_data_unlock((struct sock *)msk);
check_notify:
/* if the subflow is not already linked into the conn_list, we can't
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index bf4d96f6f99a..58d17d9604e7 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -396,19 +396,6 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
}
}
-static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr,
- const struct mptcp_addr_info *addr)
-{
- int i;
-
- for (i = 0; i < nr; i++) {
- if (addrs[i].id == addr->id)
- return true;
- }
-
- return false;
-}
-
/* Fill all the remote addresses into the array addrs[],
* and return the array size.
*/
@@ -440,18 +427,34 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk,
msk->pm.subflows++;
addrs[i++] = remote;
} else {
+ DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+
+ /* Forbid creation of new subflows matching existing
+ * ones, possibly already created by incoming ADD_ADDR
+ */
+ bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+ mptcp_for_each_subflow(msk, subflow)
+ if (READ_ONCE(subflow->local_id) == local->id)
+ __set_bit(subflow->remote_id, unavail_id);
+
mptcp_for_each_subflow(msk, subflow) {
ssk = mptcp_subflow_tcp_sock(subflow);
remote_address((struct sock_common *)ssk, &addrs[i]);
- addrs[i].id = subflow->remote_id;
+ addrs[i].id = READ_ONCE(subflow->remote_id);
if (deny_id0 && !addrs[i].id)
continue;
+ if (test_bit(addrs[i].id, unavail_id))
+ continue;
+
if (!mptcp_pm_addr_families_match(sk, local, &addrs[i]))
continue;
- if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
- msk->pm.subflows < subflows_max) {
+ if (msk->pm.subflows < subflows_max) {
+ /* forbid creating multiple address towards
+ * this id
+ */
+ __set_bit(addrs[i].id, unavail_id);
msk->pm.subflows++;
i++;
}
@@ -799,18 +802,18 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
mptcp_for_each_subflow_safe(msk, subflow, tmp) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ u8 remote_id = READ_ONCE(subflow->remote_id);
int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
- u8 id = subflow->local_id;
+ u8 id = subflow_get_local_id(subflow);
- if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id)
+ if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id)
continue;
if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id))
continue;
pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u",
rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow",
- i, rm_id, subflow->local_id, subflow->remote_id,
- msk->mpc_endpoint_id);
+ i, rm_id, id, remote_id, msk->mpc_endpoint_id);
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
@@ -901,7 +904,8 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
}
static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
- struct mptcp_pm_addr_entry *entry)
+ struct mptcp_pm_addr_entry *entry,
+ bool needs_id)
{
struct mptcp_pm_addr_entry *cur, *del_entry = NULL;
unsigned int addr_max;
@@ -949,7 +953,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
}
}
- if (!entry->addr.id) {
+ if (!entry->addr.id && needs_id) {
find_next:
entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
MPTCP_PM_MAX_ADDR_ID + 1,
@@ -960,7 +964,7 @@ find_next:
}
}
- if (!entry->addr.id)
+ if (!entry->addr.id && needs_id)
goto out;
__set_bit(entry->addr.id, pernet->id_bitmap);
@@ -1048,6 +1052,11 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
if (err)
return err;
+ /* We don't use mptcp_set_state() here because it needs to be called
+ * under the msk socket lock. For the moment, that will not bring
+ * anything more than only calling inet_sk_state_store(), because the
+ * old status is known (TCP_CLOSE).
+ */
inet_sk_state_store(newsk, TCP_LISTEN);
lock_sock(ssk);
err = __inet_listen_sk(ssk, backlog);
@@ -1087,7 +1096,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc
entry->ifindex = 0;
entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT;
entry->lsk = NULL;
- ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
+ ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true);
if (ret < 0)
kfree(entry);
@@ -1100,7 +1109,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc
static const struct genl_multicast_group mptcp_pm_mcgrps[] = {
[MPTCP_PM_CMD_GRP_OFFSET] = { .name = MPTCP_PM_CMD_GRP_NAME, },
[MPTCP_PM_EV_GRP_OFFSET] = { .name = MPTCP_PM_EV_GRP_NAME,
- .flags = GENL_UNS_ADMIN_PERM,
+ .flags = GENL_MCAST_CAP_NET_ADMIN,
},
};
@@ -1280,6 +1289,18 @@ next:
return 0;
}
+static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr,
+ struct genl_info *info)
+{
+ struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
+
+ if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr,
+ mptcp_pm_address_nl_policy, info->extack) &&
+ tb[MPTCP_PM_ADDR_ATTR_ID])
+ return true;
+ return false;
+}
+
int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
@@ -1321,7 +1342,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
goto out_free;
}
}
- ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
+ ret = mptcp_pm_nl_append_new_local_addr(pernet, entry,
+ !mptcp_pm_has_addr_attr_id(attr, info));
if (ret < 0) {
GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret);
goto out_free;
@@ -1975,7 +1997,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk)
if (WARN_ON_ONCE(!sf))
return -EINVAL;
- if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id))
+ if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf)))
return -EMSGSIZE;
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id))
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 5c01b9bc619a..bc97cc30f013 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -26,7 +26,8 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk)
}
static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *entry)
+ struct mptcp_pm_addr_entry *entry,
+ bool needs_id)
{
DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
struct mptcp_pm_addr_entry *match = NULL;
@@ -41,7 +42,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
spin_lock_bh(&msk->pm.lock);
list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true);
- if (addr_match && entry->addr.id == 0)
+ if (addr_match && entry->addr.id == 0 && needs_id)
entry->addr.id = e->addr.id;
id_match = (e->addr.id == entry->addr.id);
if (addr_match && id_match) {
@@ -64,7 +65,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
}
*e = *entry;
- if (!e->addr.id)
+ if (!e->addr.id && needs_id)
e->addr.id = find_next_zero_bit(id_bitmap,
MPTCP_PM_MAX_ADDR_ID + 1,
1);
@@ -130,10 +131,21 @@ int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
struct mptcp_addr_info *skc)
{
- struct mptcp_pm_addr_entry new_entry;
+ struct mptcp_pm_addr_entry *entry = NULL, *e, new_entry;
__be16 msk_sport = ((struct inet_sock *)
inet_sk((struct sock *)msk))->inet_sport;
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (mptcp_addresses_equal(&e->addr, skc, false)) {
+ entry = e;
+ break;
+ }
+ }
+ spin_unlock_bh(&msk->pm.lock);
+ if (entry)
+ return entry->addr.id;
+
memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry));
new_entry.addr = *skc;
new_entry.addr.id = 0;
@@ -142,7 +154,7 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
if (new_entry.addr.port == msk_sport)
new_entry.addr.port = 0;
- return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry);
+ return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true);
}
int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
@@ -187,7 +199,7 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
goto announce_err;
}
- err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val);
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false);
if (err < 0) {
GENL_SET_ERR_MSG(info, "did not match address and id");
goto announce_err;
@@ -222,7 +234,7 @@ static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk,
lock_sock(sk);
mptcp_for_each_subflow(msk, subflow) {
- if (subflow->local_id == 0) {
+ if (READ_ONCE(subflow->local_id) == 0) {
has_id_0 = true;
break;
}
@@ -276,12 +288,12 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
if (!mptcp_pm_is_userspace(msk)) {
GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
- goto remove_err;
+ goto out;
}
if (id_val == 0) {
err = mptcp_userspace_pm_remove_id_zero_address(msk, info);
- goto remove_err;
+ goto out;
}
lock_sock(sk);
@@ -296,7 +308,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
if (!match) {
GENL_SET_ERR_MSG(info, "address with specified id not found");
release_sock(sk);
- goto remove_err;
+ goto out;
}
list_move(&match->list, &free_list);
@@ -310,7 +322,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
}
err = 0;
- remove_err:
+out:
sock_put(sk);
return err;
}
@@ -367,7 +379,7 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
}
local.addr = addr_l;
- err = mptcp_userspace_pm_append_new_local_addr(msk, &local);
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false);
if (err < 0) {
GENL_SET_ERR_MSG(info, "did not match address and id");
goto create_err;
@@ -483,6 +495,16 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info
goto destroy_err;
}
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (addr_l.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) {
+ ipv6_addr_set_v4mapped(addr_l.addr.s_addr, &addr_l.addr6);
+ addr_l.family = AF_INET6;
+ }
+ if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr6)) {
+ ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6);
+ addr_r.family = AF_INET6;
+ }
+#endif
if (addr_l.family != addr_r.family) {
GENL_SET_ERR_MSG(info, "address families do not match");
err = -EINVAL;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 5cd5c3f535a8..7833a49f6214 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -55,28 +55,14 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
return READ_ONCE(msk->wnd_end);
}
-static bool mptcp_is_tcpsk(struct sock *sk)
+static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk)
{
- struct socket *sock = sk->sk_socket;
-
- if (unlikely(sk->sk_prot == &tcp_prot)) {
- /* we are being invoked after mptcp_accept() has
- * accepted a non-mp-capable flow: sk is a tcp_sk,
- * not an mptcp one.
- *
- * Hand the socket over to tcp so all further socket ops
- * bypass mptcp.
- */
- WRITE_ONCE(sock->ops, &inet_stream_ops);
- return true;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- } else if (unlikely(sk->sk_prot == &tcpv6_prot)) {
- WRITE_ONCE(sock->ops, &inet6_stream_ops);
- return true;
+ if (sk->sk_prot == &tcpv6_prot)
+ return &inet6_stream_ops;
#endif
- }
-
- return false;
+ WARN_ON_ONCE(sk->sk_prot != &tcp_prot);
+ return &inet_stream_ops;
}
static int __mptcp_socket_create(struct mptcp_sock *msk)
@@ -99,7 +85,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
subflow->subflow_id = msk->subflow_id++;
/* This is the first subflow, always with id 0 */
- subflow->local_id_valid = 1;
+ WRITE_ONCE(subflow->local_id, 0);
mptcp_sock_graft(msk->first, sk->sk_socket);
iput(SOCK_INODE(ssock));
@@ -443,11 +429,11 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
switch (sk->sk_state) {
case TCP_FIN_WAIT1:
- inet_sk_state_store(sk, TCP_FIN_WAIT2);
+ mptcp_set_state(sk, TCP_FIN_WAIT2);
break;
case TCP_CLOSING:
case TCP_LAST_ACK:
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
break;
}
@@ -608,13 +594,13 @@ static bool mptcp_check_data_fin(struct sock *sk)
switch (sk->sk_state) {
case TCP_ESTABLISHED:
- inet_sk_state_store(sk, TCP_CLOSE_WAIT);
+ mptcp_set_state(sk, TCP_CLOSE_WAIT);
break;
case TCP_FIN_WAIT1:
- inet_sk_state_store(sk, TCP_CLOSING);
+ mptcp_set_state(sk, TCP_CLOSING);
break;
case TCP_FIN_WAIT2:
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
break;
default:
/* Other states not expected */
@@ -789,7 +775,7 @@ static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk)
*/
ssk_state = inet_sk_state_load(ssk);
if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
- inet_sk_state_store(sk, ssk_state);
+ mptcp_set_state(sk, ssk_state);
WRITE_ONCE(sk->sk_err, -err);
/* This barrier is coupled with smp_rmb() in mptcp_poll() */
@@ -1274,6 +1260,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
mpext = mptcp_get_ext(skb);
if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) {
TCP_SKB_CB(skb)->eor = 1;
+ tcp_mark_push(tcp_sk(ssk), skb);
goto alloc_skb;
}
@@ -1519,8 +1506,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
void mptcp_check_and_set_pending(struct sock *sk)
{
- if (mptcp_send_head(sk))
- mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
+ if (mptcp_send_head(sk)) {
+ mptcp_data_lock(sk);
+ mptcp_sk(sk)->cb_flags |= BIT(MPTCP_PUSH_PENDING);
+ mptcp_data_unlock(sk);
+ }
}
static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
@@ -1974,6 +1964,9 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (copied <= 0)
return;
+ if (!msk->rcvspace_init)
+ mptcp_rcv_space_init(msk, msk->first);
+
msk->rcvq_space.copied += copied;
mstamp = div_u64(tcp_clock_ns(), NSEC_PER_USEC);
@@ -2328,9 +2321,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
if (__mptcp_check_fallback(msk))
return false;
- if (tcp_rtx_and_write_queues_empty(sk))
- return false;
-
/* the closing socket has some data untransmitted and/or unacked:
* some data in the mptcp rtx queue has not really xmitted yet.
* keep it simple and re-inject the whole mptcp level rtx queue
@@ -2477,7 +2467,7 @@ out:
inet_sk_state_load(msk->first) == TCP_CLOSE) {
if (sk->sk_state != TCP_ESTABLISHED ||
msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) {
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
mptcp_close_wake_up(sk);
} else {
mptcp_start_tout_timer(sk);
@@ -2572,7 +2562,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
WRITE_ONCE(sk->sk_err, ECONNRESET);
}
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);
@@ -2707,7 +2697,7 @@ static void mptcp_do_fastclose(struct sock *sk)
struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk);
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
mptcp_for_each_subflow_safe(msk, subflow, tmp)
__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
subflow, MPTCP_CF_FASTCLOSE);
@@ -2885,6 +2875,24 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
release_sock(ssk);
}
+void mptcp_set_state(struct sock *sk, int state)
+{
+ int oldstate = sk->sk_state;
+
+ switch (state) {
+ case TCP_ESTABLISHED:
+ if (oldstate != TCP_ESTABLISHED)
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
+ break;
+
+ default:
+ if (oldstate == TCP_ESTABLISHED)
+ MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
+ }
+
+ inet_sk_state_store(sk, state);
+}
+
static const unsigned char new_state[16] = {
/* current state: new state: action: */
[0 /* (Invalid) */] = TCP_CLOSE,
@@ -2907,7 +2915,7 @@ static int mptcp_close_state(struct sock *sk)
int next = (int)new_state[sk->sk_state];
int ns = next & TCP_STATE_MASK;
- inet_sk_state_store(sk, ns);
+ mptcp_set_state(sk, ns);
return next & TCP_ACTION_FIN;
}
@@ -3018,7 +3026,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
mptcp_check_listen_stop(sk);
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
goto cleanup;
}
@@ -3061,7 +3069,7 @@ cleanup:
* state, let's not keep resources busy for no reasons
*/
if (subflows_alive == 0)
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
@@ -3127,7 +3135,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
return -EBUSY;
mptcp_check_listen_stop(sk);
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
mptcp_stop_rtx_timer(sk);
mptcp_stop_tout_timer(sk);
@@ -3141,7 +3149,6 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
WRITE_ONCE(msk->flags, 0);
msk->cb_flags = 0;
- msk->push_pending = 0;
msk->recovery = false;
msk->can_ack = false;
msk->fully_established = false;
@@ -3157,6 +3164,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
msk->bytes_received = 0;
msk->bytes_sent = 0;
msk->bytes_retrans = 0;
+ msk->rcvspace_init = 0;
WRITE_ONCE(sk->sk_shutdown, 0);
sk_error_report(sk);
@@ -3170,8 +3178,50 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
+
+static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk)
+{
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
+ struct ipv6_pinfo *newnp;
+
+ newnp = inet6_sk(newsk);
+
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ if (!opt)
+ net_warn_ratelimited("%s: Failed to copy ip6 options\n", __func__);
+ }
+ RCU_INIT_POINTER(newnp->opt, opt);
+ rcu_read_unlock();
+}
#endif
+static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk)
+{
+ struct ip_options_rcu *inet_opt, *newopt = NULL;
+ const struct inet_sock *inet = inet_sk(sk);
+ struct inet_sock *newinet;
+
+ newinet = inet_sk(newsk);
+
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
+ if (inet_opt) {
+ newopt = sock_kmalloc(newsk, sizeof(*inet_opt) +
+ inet_opt->opt.optlen, GFP_ATOMIC);
+ if (newopt)
+ memcpy(newopt, inet_opt, sizeof(*inet_opt) +
+ inet_opt->opt.optlen);
+ else
+ net_warn_ratelimited("%s: Failed to copy ip options\n", __func__);
+ }
+ RCU_INIT_POINTER(newinet->inet_opt, newopt);
+ rcu_read_unlock();
+}
+
struct sock *mptcp_sk_clone_init(const struct sock *sk,
const struct mptcp_options_received *mp_opt,
struct sock *ssk,
@@ -3179,6 +3229,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
+ struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk;
if (!nsk)
@@ -3191,6 +3242,13 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
__mptcp_init_sock(nsk);
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (nsk->sk_family == AF_INET6)
+ mptcp_copy_ip6_options(nsk, sk);
+ else
+#endif
+ mptcp_copy_ip_options(nsk, sk);
+
msk = mptcp_sk(nsk);
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
@@ -3202,7 +3260,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk->write_seq = subflow_req->idsn + 1;
msk->snd_nxt = msk->write_seq;
msk->snd_una = msk->write_seq;
- msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
+ msk->wnd_end = msk->snd_nxt + tcp_sk(ssk)->snd_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
mptcp_init_sched(msk, mptcp_sk(sk)->sched);
@@ -3215,11 +3273,12 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
/* this can't race with mptcp_close(), as the msk is
* not yet exposted to user-space
*/
- inet_sk_state_store(nsk, TCP_ESTABLISHED);
+ mptcp_set_state(nsk, TCP_ESTABLISHED);
/* The msk maintain a ref to each subflow in the connections list */
WRITE_ONCE(msk->first, ssk);
- list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list);
+ subflow = mptcp_subflow_ctx(ssk);
+ list_add(&subflow->node, &msk->conn_list);
sock_hold(ssk);
/* new mpc subflow takes ownership of the newly
@@ -3234,6 +3293,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
__mptcp_propagate_sndbuf(nsk, ssk);
mptcp_rcv_space_init(msk, ssk);
+
+ if (mp_opt->suboptions & OPTION_MPTCP_MPC_ACK)
+ __mptcp_subflow_fully_established(msk, subflow, mp_opt);
bh_unlock_sock(nsk);
/* note: the newly allocated socket refcount is 2 now */
@@ -3244,6 +3306,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
{
const struct tcp_sock *tp = tcp_sk(ssk);
+ msk->rcvspace_init = 1;
msk->rcvq_space.copied = 0;
msk->rcvq_space.rtt_us = 0;
@@ -3254,46 +3317,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
TCP_INIT_CWND * tp->advmss);
if (msk->rcvq_space.space == 0)
msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;
-
- WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
-}
-
-static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err,
- bool kern)
-{
- struct sock *newsk;
-
- pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
- newsk = inet_csk_accept(ssk, flags, err, kern);
- if (!newsk)
- return NULL;
-
- pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
- if (sk_is_mptcp(newsk)) {
- struct mptcp_subflow_context *subflow;
- struct sock *new_mptcp_sock;
-
- subflow = mptcp_subflow_ctx(newsk);
- new_mptcp_sock = subflow->conn;
-
- /* is_mptcp should be false if subflow->conn is missing, see
- * subflow_syn_recv_sock()
- */
- if (WARN_ON_ONCE(!new_mptcp_sock)) {
- tcp_sk(newsk)->is_mptcp = 0;
- goto out;
- }
-
- newsk = new_mptcp_sock;
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
- } else {
- MPTCP_INC_STATS(sock_net(ssk),
- MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
- }
-
-out:
- newsk->sk_kern_sock = kern;
- return newsk;
}
void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
@@ -3367,8 +3390,7 @@ static void mptcp_release_cb(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(sk);
for (;;) {
- unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
- msk->push_pending;
+ unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED);
struct list_head join_list;
if (!flags)
@@ -3384,7 +3406,6 @@ static void mptcp_release_cb(struct sock *sk)
* datapath acquires the msk socket spinlock while helding
* the subflow socket lock
*/
- msk->push_pending = 0;
msk->cb_flags &= ~flags;
spin_unlock_bh(&sk->sk_lock.slock);
@@ -3512,13 +3533,8 @@ void mptcp_finish_connect(struct sock *ssk)
* accessing the field below
*/
WRITE_ONCE(msk->local_key, subflow->local_key);
- WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
- WRITE_ONCE(msk->snd_nxt, msk->write_seq);
- WRITE_ONCE(msk->snd_una, msk->write_seq);
mptcp_pm_new_connection(msk, ssk, 0);
-
- mptcp_rcv_space_init(msk, ssk);
}
void mptcp_sock_graft(struct sock *sk, struct socket *parent)
@@ -3674,7 +3690,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (IS_ERR(ssk))
return PTR_ERR(ssk);
- inet_sk_state_store(sk, TCP_SYN_SENT);
+ mptcp_set_state(sk, TCP_SYN_SENT);
subflow = mptcp_subflow_ctx(ssk);
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
@@ -3724,7 +3740,7 @@ out:
if (unlikely(err)) {
/* avoid leaving a dangling token in an unconnected socket */
mptcp_token_destroy(msk);
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
return err;
}
@@ -3739,7 +3755,6 @@ static struct proto mptcp_prot = {
.connect = mptcp_connect,
.disconnect = mptcp_disconnect,
.close = mptcp_close,
- .accept = mptcp_accept,
.setsockopt = mptcp_setsockopt,
.getsockopt = mptcp_getsockopt,
.shutdown = mptcp_shutdown,
@@ -3814,13 +3829,13 @@ static int mptcp_listen(struct socket *sock, int backlog)
goto unlock;
}
- inet_sk_state_store(sk, TCP_LISTEN);
+ mptcp_set_state(sk, TCP_LISTEN);
sock_set_flag(sk, SOCK_RCU_FREE);
lock_sock(ssk);
err = __inet_listen_sk(ssk, backlog);
release_sock(ssk);
- inet_sk_state_store(sk, inet_sk_state_load(ssk));
+ mptcp_set_state(sk, inet_sk_state_load(ssk));
if (!err) {
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -3849,18 +3864,36 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssk)
return -EINVAL;
- newsk = mptcp_accept(ssk, flags, &err, kern);
+ pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
+ newsk = inet_csk_accept(ssk, flags, &err, kern);
if (!newsk)
return err;
- lock_sock(newsk);
-
- __inet_accept(sock, newsock, newsk);
- if (!mptcp_is_tcpsk(newsock->sk)) {
- struct mptcp_sock *msk = mptcp_sk(newsk);
+ pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
+ if (sk_is_mptcp(newsk)) {
struct mptcp_subflow_context *subflow;
+ struct sock *new_mptcp_sock;
+
+ subflow = mptcp_subflow_ctx(newsk);
+ new_mptcp_sock = subflow->conn;
+
+ /* is_mptcp should be false if subflow->conn is missing, see
+ * subflow_syn_recv_sock()
+ */
+ if (WARN_ON_ONCE(!new_mptcp_sock)) {
+ tcp_sk(newsk)->is_mptcp = 0;
+ goto tcpfallback;
+ }
+
+ newsk = new_mptcp_sock;
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
+
+ newsk->sk_kern_sock = kern;
+ lock_sock(newsk);
+ __inet_accept(sock, newsock, newsk);
set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
+ msk = mptcp_sk(newsk);
msk->in_accept_queue = 0;
/* set ssk->sk_socket of accept()ed flows to mptcp socket.
@@ -3880,8 +3913,23 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
__mptcp_close_ssk(newsk, msk->first,
mptcp_subflow_ctx(msk->first), 0);
if (unlikely(list_is_singular(&msk->conn_list)))
- inet_sk_state_store(newsk, TCP_CLOSE);
+ mptcp_set_state(newsk, TCP_CLOSE);
}
+ } else {
+ MPTCP_INC_STATS(sock_net(ssk),
+ MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
+tcpfallback:
+ newsk->sk_kern_sock = kern;
+ lock_sock(newsk);
+ __inet_accept(sock, newsock, newsk);
+ /* we are being invoked after accepting a non-mp-capable
+ * flow: sk is a tcp_sk, not an mptcp one.
+ *
+ * Hand the socket over to tcp so all further socket ops
+ * bypass mptcp.
+ */
+ WRITE_ONCE(newsock->sk->sk_socket->ops,
+ mptcp_fallback_tcp_ops(newsock->sk));
}
release_sock(newsk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index aa1a93fe40ff..07f6242afc1a 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -286,7 +286,6 @@ struct mptcp_sock {
int rmem_released;
unsigned long flags;
unsigned long cb_flags;
- unsigned long push_pending;
bool recovery; /* closing subflow write queue reinjected */
bool can_ack;
bool fully_established;
@@ -305,7 +304,8 @@ struct mptcp_sock {
nodelay:1,
fastopening:1,
in_accept_queue:1,
- free_first:1;
+ free_first:1,
+ rcvspace_init:1;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@@ -491,10 +491,9 @@ struct mptcp_subflow_context {
remote_key_valid : 1, /* received the peer key from */
disposable : 1, /* ctx can be free at ulp release time */
stale : 1, /* unable to snd/rcv data, do not use for xmit */
- local_id_valid : 1, /* local_id is correctly initialized */
valid_csum_seen : 1, /* at least one csum validated */
is_mptfo : 1, /* subflow is doing TFO */
- __unused : 9;
+ __unused : 10;
bool data_avail;
bool scheduled;
u32 remote_nonce;
@@ -505,7 +504,7 @@ struct mptcp_subflow_context {
u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */
u64 iasn; /* initial ack sequence number, MPC subflows only */
};
- u8 local_id;
+ s16 local_id; /* if negative not initialized yet */
u8 remote_id;
u8 reset_seen:1;
u8 reset_transient:1;
@@ -556,6 +555,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
{
memset(&subflow->reset, 0, sizeof(subflow->reset));
subflow->request_mptcp = 1;
+ WRITE_ONCE(subflow->local_id, -1);
}
static inline u64
@@ -622,8 +622,9 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
unsigned int mptcp_close_timeout(const struct sock *sk);
int mptcp_get_pm_type(const struct net *net);
const char *mptcp_get_scheduler(const struct net *net);
-void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt);
+void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
void mptcp_check_and_set_pending(struct sock *sk);
void __mptcp_push_pending(struct sock *sk, unsigned int flags);
@@ -641,6 +642,7 @@ bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
void __mptcp_unaccepted_force_close(struct sock *sk);
void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
+void mptcp_set_state(struct sock *sk, int state);
bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
const struct mptcp_addr_info *b, bool use_port);
@@ -788,6 +790,16 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
}
+static inline void mptcp_write_space(struct sock *sk)
+{
+ if (sk_stream_is_writeable(sk)) {
+ /* pairs with memory barrier in mptcp_poll */
+ smp_mb();
+ if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
+ sk_stream_write_space(sk);
+ }
+}
+
static inline void __mptcp_sync_sndbuf(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
@@ -806,6 +818,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk)
/* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */
WRITE_ONCE(sk->sk_sndbuf, new_sndbuf);
+ mptcp_write_space(sk);
}
/* The called held both the msk socket and the subflow socket locks,
@@ -836,16 +849,6 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
local_bh_enable();
}
-static inline void mptcp_write_space(struct sock *sk)
-{
- if (sk_stream_is_writeable(sk)) {
- /* pairs with memory barrier in mptcp_poll */
- smp_mb();
- if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
- sk_stream_write_space(sk);
- }
-}
-
void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags);
#define MPTCP_TOKEN_MAX_RETRIES 4
@@ -951,8 +954,8 @@ void mptcp_event_pm_listener(const struct sock *ssk,
enum mptcp_event_type event);
bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
-void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt);
+void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt);
void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
struct request_sock *req);
@@ -1020,6 +1023,15 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
+static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow)
+{
+ int local_id = READ_ONCE(subflow->local_id);
+
+ if (local_id < 0)
+ return 0;
+ return local_id;
+}
+
void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
@@ -1075,6 +1087,15 @@ static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
}
+static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk)
+{
+ struct sock *ssk = READ_ONCE(msk->first);
+
+ return ssk && ((1 << inet_sk_state_load(ssk)) &
+ (TCPF_ESTABLISHED | TCPF_SYN_SENT |
+ TCPF_SYN_RECV | TCPF_LISTEN));
+}
+
static inline void mptcp_do_fallback(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -1118,7 +1139,8 @@ static inline bool subflow_simultaneous_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1) &&
+ return (1 << sk->sk_state) &
+ (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING) &&
is_active_ssk(subflow) &&
!subflow->conn_finished;
}
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 353680733700..c40f1428e602 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -440,6 +440,8 @@ static bool mptcp_supported_sockopt(int level, int optname)
/* should work fine */
case IP_FREEBIND:
case IP_TRANSPARENT:
+ case IP_BIND_ADDRESS_NO_PORT:
+ case IP_LOCAL_PORT_RANGE:
/* the following are control cmsg related */
case IP_PKTINFO:
@@ -455,7 +457,6 @@ static bool mptcp_supported_sockopt(int level, int optname)
/* common stuff that need some love */
case IP_TOS:
case IP_TTL:
- case IP_BIND_ADDRESS_NO_PORT:
case IP_MTU_DISCOVER:
case IP_RECVERR:
@@ -683,8 +684,8 @@ static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t op
return 0;
}
-static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname,
- sockptr_t optval, unsigned int optlen)
+static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
struct sock *ssk;
@@ -710,6 +711,14 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o
inet_assign_bit(TRANSPARENT, ssk,
inet_test_bit(TRANSPARENT, sk));
break;
+ case IP_BIND_ADDRESS_NO_PORT:
+ inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk,
+ inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
+ break;
+ case IP_LOCAL_PORT_RANGE:
+ WRITE_ONCE(inet_sk(ssk)->local_port_range,
+ READ_ONCE(inet_sk(sk)->local_port_range));
+ break;
default:
release_sock(sk);
WARN_ON_ONCE(1);
@@ -755,7 +764,9 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
switch (optname) {
case IP_FREEBIND:
case IP_TRANSPARENT:
- return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen);
+ case IP_BIND_ADDRESS_NO_PORT:
+ case IP_LOCAL_PORT_RANGE:
+ return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen);
case IP_TOS:
return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen);
}
@@ -938,6 +949,8 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
info->mptcpi_bytes_sent = msk->bytes_sent;
info->mptcpi_bytes_received = msk->bytes_received;
info->mptcpi_bytes_retrans = msk->bytes_retrans;
+ info->mptcpi_subflows_total = info->mptcpi_subflows +
+ __mptcp_has_initial_subflow(msk);
unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
@@ -1348,6 +1361,12 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
switch (optname) {
case IP_TOS:
return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos));
+ case IP_BIND_ADDRESS_NO_PORT:
+ return mptcp_put_int_option(msk, optval, optlen,
+ inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
+ case IP_LOCAL_PORT_RANGE:
+ return mptcp_put_int_option(msk, optval, optlen,
+ READ_ONCE(inet_sk(sk)->local_port_range));
}
return -EOPNOTSUPP;
@@ -1448,6 +1467,8 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
+ inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
+ WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range));
}
void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 852b3f4af000..71ba86246ff8 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -157,8 +157,8 @@ static int subflow_check_req(struct request_sock *req,
mptcp_get_options(skb, &mp_opt);
- opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
- opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
+ opt_mp_capable = !!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYN);
+ opt_mp_join = !!(mp_opt.suboptions & OPTION_MPTCP_MPJ_SYN);
if (opt_mp_capable) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
@@ -254,8 +254,8 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
subflow_init_req(req, sk_listener);
mptcp_get_options(skb, &mp_opt);
- opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
- opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
+ opt_mp_capable = !!(mp_opt.suboptions & OPTION_MPTCP_MPC_ACK);
+ opt_mp_join = !!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK);
if (opt_mp_capable && opt_mp_join)
return -EINVAL;
@@ -421,29 +421,26 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc
void __mptcp_sync_state(struct sock *sk, int state)
{
+ struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
+ struct sock *ssk = msk->first;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ __mptcp_propagate_sndbuf(sk, ssk);
+ if (!msk->rcvspace_init)
+ mptcp_rcv_space_init(msk, ssk);
- __mptcp_propagate_sndbuf(sk, msk->first);
if (sk->sk_state == TCP_SYN_SENT) {
- inet_sk_state_store(sk, state);
+ /* subflow->idsn is always available is TCP_SYN_SENT state,
+ * even for the FASTOPEN scenarios
+ */
+ WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
+ WRITE_ONCE(msk->snd_nxt, msk->write_seq);
+ mptcp_set_state(sk, state);
sk->sk_state_change(sk);
}
}
-static void mptcp_propagate_state(struct sock *sk, struct sock *ssk)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- mptcp_data_lock(sk);
- if (!sock_owned_by_user(sk)) {
- __mptcp_sync_state(sk, ssk->sk_state);
- } else {
- msk->pending_state = ssk->sk_state;
- __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
- }
- mptcp_data_unlock(sk);
-}
-
static void subflow_set_remote_key(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt)
@@ -465,6 +462,31 @@ static void subflow_set_remote_key(struct mptcp_sock *msk,
atomic64_set(&msk->rcv_wnd_sent, subflow->iasn);
}
+static void mptcp_propagate_state(struct sock *sk, struct sock *ssk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ mptcp_data_lock(sk);
+ if (mp_opt) {
+ /* Options are available only in the non fallback cases
+ * avoid updating rx path fields otherwise
+ */
+ WRITE_ONCE(msk->snd_una, subflow->idsn + 1);
+ WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd);
+ subflow_set_remote_key(msk, subflow, mp_opt);
+ }
+
+ if (!sock_owned_by_user(sk)) {
+ __mptcp_sync_state(sk, ssk->sk_state);
+ } else {
+ msk->pending_state = ssk->sk_state;
+ __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
+ }
+ mptcp_data_unlock(sk);
+}
+
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -486,7 +508,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
mptcp_get_options(skb, &mp_opt);
if (subflow->request_mptcp) {
- if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
+ if (!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYNACK)) {
MPTCP_INC_STATS(sock_net(sk),
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
mptcp_do_fallback(sk);
@@ -499,14 +521,13 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (mp_opt.deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
subflow->mp_capable = 1;
- subflow_set_remote_key(msk, subflow, &mp_opt);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
- mptcp_propagate_state(parent, sk);
+ mptcp_propagate_state(parent, sk, subflow, &mp_opt);
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
- if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ)) {
+ if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_SYNACK)) {
subflow->reset_reason = MPTCP_RST_EMPTCP;
goto do_reset;
}
@@ -514,7 +535,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->backup = mp_opt.backup;
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
- subflow->remote_id = mp_opt.join_id;
+ WRITE_ONCE(subflow->remote_id, mp_opt.join_id);
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
subflow, subflow->thmac, subflow->remote_nonce,
subflow->backup);
@@ -545,8 +566,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
}
} else if (mptcp_check_fallback(sk)) {
fallback:
- mptcp_rcv_space_init(msk, sk);
- mptcp_propagate_state(parent, sk);
+ mptcp_propagate_state(parent, sk, subflow, NULL);
}
return;
@@ -557,8 +577,8 @@ do_reset:
static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id)
{
- subflow->local_id = local_id;
- subflow->local_id_valid = 1;
+ WARN_ON_ONCE(local_id < 0 || local_id > 255);
+ WRITE_ONCE(subflow->local_id, local_id);
}
static int subflow_chk_local_id(struct sock *sk)
@@ -567,7 +587,7 @@ static int subflow_chk_local_id(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
int err;
- if (likely(subflow->local_id_valid))
+ if (likely(subflow->local_id >= 0))
return 0;
err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
@@ -731,17 +751,16 @@ void mptcp_subflow_drop_ctx(struct sock *ssk)
kfree_rcu(ctx, rcu);
}
-void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt)
+void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
{
- struct mptcp_sock *msk = mptcp_sk(subflow->conn);
-
subflow_set_remote_key(msk, subflow, mp_opt);
subflow->fully_established = 1;
WRITE_ONCE(msk->fully_established, true);
if (subflow->is_mptfo)
- mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
+ __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
}
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
@@ -783,12 +802,13 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* options.
*/
mptcp_get_options(skb, &mp_opt);
- if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC))
+ if (!(mp_opt.suboptions &
+ (OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_ACK)))
fallback = true;
} else if (subflow_req->mp_join) {
mptcp_get_options(skb, &mp_opt);
- if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ) ||
+ if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK) ||
!subflow_hmac_valid(req, &mp_opt) ||
!mptcp_can_accept_new_subflow(subflow_req->msk)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
@@ -833,7 +853,6 @@ create_child:
* mpc option
*/
if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) {
- mptcp_subflow_fully_established(ctx, &mp_opt);
mptcp_pm_fully_established(owner, child);
ctx->pm_notified = 1;
}
@@ -1548,7 +1567,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
remote_token, local_id, remote_id);
subflow->remote_token = remote_token;
- subflow->remote_id = remote_id;
+ WRITE_ONCE(subflow->remote_id, remote_id);
subflow->request_join = 1;
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
subflow->subflow_id = msk->subflow_id++;
@@ -1712,6 +1731,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
pr_debug("subflow=%p", ctx);
ctx->tcp_sock = sk;
+ WRITE_ONCE(ctx->local_id, -1);
return ctx;
}
@@ -1743,10 +1763,9 @@ static void subflow_state_change(struct sock *sk)
msk = mptcp_sk(parent);
if (subflow_simultaneous_connect(sk)) {
mptcp_do_fallback(sk);
- mptcp_rcv_space_init(msk, sk);
pr_fallback(msk);
subflow->conn_finished = 1;
- mptcp_propagate_state(parent, sk);
+ mptcp_propagate_state(parent, sk, subflow, NULL);
}
/* as recvmsg() does not acquire the subflow socket for ssk selection
@@ -1948,14 +1967,14 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->idsn = subflow_req->idsn;
/* this is the first subflow, id is always 0 */
- new_ctx->local_id_valid = 1;
+ subflow_set_local_id(new_ctx, 0);
} else if (subflow_req->mp_join) {
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
new_ctx->fully_established = 1;
new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup;
- new_ctx->remote_id = subflow_req->remote_id;
+ WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id);
new_ctx->token = subflow_req->token;
new_ctx->thmac = subflow_req->thmac;
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 03757e76bb6b..374412ed780b 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -105,8 +105,11 @@ enum {
struct ncsi_channel_version {
- u32 version; /* Supported BCD encoded NCSI version */
- u32 alpha2; /* Supported BCD encoded NCSI version */
+ u8 major; /* NCSI version major */
+ u8 minor; /* NCSI version minor */
+ u8 update; /* NCSI version update */
+ char alpha1; /* NCSI version alpha1 */
+ char alpha2; /* NCSI version alpha2 */
u8 fw_name[12]; /* Firmware name string */
u32 fw_version; /* Firmware version */
u16 pci_ids[4]; /* PCI identification */
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index fd2236ee9a79..b3ff37a181d7 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -270,7 +270,8 @@ static struct ncsi_cmd_handler {
{ NCSI_PKT_CMD_GPS, 0, ncsi_cmd_handler_default },
{ NCSI_PKT_CMD_OEM, -1, ncsi_cmd_handler_oem },
{ NCSI_PKT_CMD_PLDM, 0, NULL },
- { NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default }
+ { NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default },
+ { NCSI_PKT_CMD_GMCMA, 0, ncsi_cmd_handler_default }
};
static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca)
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index d9da942ad53d..745c788f1d1d 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -689,8 +689,6 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc,
return 0;
}
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)
-
static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca)
{
unsigned char data[NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN];
@@ -716,10 +714,6 @@ static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca)
return ret;
}
-#endif
-
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
-
/* NCSI OEM Command APIs */
static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca)
{
@@ -856,8 +850,6 @@ static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id)
return nch->handler(nca);
}
-#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
-
/* Determine if a given channel from the channel_queue should be used for Tx */
static bool ncsi_channel_is_tx(struct ncsi_dev_priv *ndp,
struct ncsi_channel *nc)
@@ -1039,20 +1031,23 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
goto error;
}
- nd->state = ncsi_dev_state_config_oem_gma;
+ nd->state = IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
+ ? ncsi_dev_state_config_oem_gma
+ : ncsi_dev_state_config_clear_vids;
break;
case ncsi_dev_state_config_oem_gma:
nd->state = ncsi_dev_state_config_clear_vids;
- ret = -1;
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
- nca.type = NCSI_PKT_CMD_OEM;
nca.package = np->id;
nca.channel = nc->id;
ndp->pending_req_num = 1;
- ret = ncsi_gma_handler(&nca, nc->version.mf_id);
-#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
-
+ if (nc->version.major >= 1 && nc->version.minor >= 2) {
+ nca.type = NCSI_PKT_CMD_GMCMA;
+ ret = ncsi_xmit_cmd(&nca);
+ } else {
+ nca.type = NCSI_PKT_CMD_OEM;
+ ret = ncsi_gma_handler(&nca, nc->version.mf_id);
+ }
if (ret < 0)
schedule_work(&ndp->work);
@@ -1404,7 +1399,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
schedule_work(&ndp->work);
break;
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
case ncsi_dev_state_probe_mlx_gma:
ndp->pending_req_num = 1;
@@ -1429,7 +1423,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_probe_cis;
break;
-#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
case ncsi_dev_state_probe_cis:
ndp->pending_req_num = NCSI_RESERVED_CHANNEL;
@@ -1447,7 +1440,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY))
nd->state = ncsi_dev_state_probe_keep_phy;
break;
-#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)
case ncsi_dev_state_probe_keep_phy:
ndp->pending_req_num = 1;
@@ -1460,7 +1452,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
nd->state = ncsi_dev_state_probe_gvi;
break;
-#endif /* CONFIG_NCSI_OEM_CMD_KEEP_PHY */
case ncsi_dev_state_probe_gvi:
case ncsi_dev_state_probe_gc:
case ncsi_dev_state_probe_gls:
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index a3a6753a1db7..2f872d064396 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -71,8 +71,8 @@ static int ncsi_write_channel_info(struct sk_buff *skb,
if (nc == nc->package->preferred_channel)
nla_put_flag(skb, NCSI_CHANNEL_ATTR_FORCED);
- nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.version);
- nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.alpha2);
+ nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MAJOR, nc->version.major);
+ nla_put_u32(skb, NCSI_CHANNEL_ATTR_VERSION_MINOR, nc->version.minor);
nla_put_string(skb, NCSI_CHANNEL_ATTR_VERSION_STR, nc->version.fw_name);
vid_nest = nla_nest_start_noflag(skb, NCSI_CHANNEL_ATTR_VLAN_LIST);
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index ba66c7dc3a21..f2f3b5c1b941 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -197,9 +197,12 @@ struct ncsi_rsp_gls_pkt {
/* Get Version ID */
struct ncsi_rsp_gvi_pkt {
struct ncsi_rsp_pkt_hdr rsp; /* Response header */
- __be32 ncsi_version; /* NCSI version */
+ unsigned char major; /* NCSI version major */
+ unsigned char minor; /* NCSI version minor */
+ unsigned char update; /* NCSI version update */
+ unsigned char alpha1; /* NCSI version alpha1 */
unsigned char reserved[3]; /* Reserved */
- unsigned char alpha2; /* NCSI version */
+ unsigned char alpha2; /* NCSI version alpha2 */
unsigned char fw_name[12]; /* f/w name string */
__be32 fw_version; /* f/w version */
__be16 pci_ids[4]; /* PCI IDs */
@@ -335,6 +338,14 @@ struct ncsi_rsp_gpuuid_pkt {
__be32 checksum;
};
+/* Get MC MAC Address */
+struct ncsi_rsp_gmcma_pkt {
+ struct ncsi_rsp_pkt_hdr rsp;
+ unsigned char address_count;
+ unsigned char reserved[3];
+ unsigned char addresses[][ETH_ALEN];
+};
+
/* AEN: Link State Change */
struct ncsi_aen_lsc_pkt {
struct ncsi_aen_pkt_hdr aen; /* AEN header */
@@ -395,6 +406,7 @@ struct ncsi_aen_hncdsc_pkt {
#define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */
#define NCSI_PKT_CMD_QPNPR 0x56 /* Query Pending NC PLDM request */
#define NCSI_PKT_CMD_SNPR 0x57 /* Send NC PLDM Reply */
+#define NCSI_PKT_CMD_GMCMA 0x58 /* Get MC MAC Address */
/* NCSI packet responses */
@@ -430,6 +442,7 @@ struct ncsi_aen_hncdsc_pkt {
#define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80)
#define NCSI_PKT_RSP_QPNPR (NCSI_PKT_CMD_QPNPR + 0x80)
#define NCSI_PKT_RSP_SNPR (NCSI_PKT_CMD_SNPR + 0x80)
+#define NCSI_PKT_RSP_GMCMA (NCSI_PKT_CMD_GMCMA + 0x80)
/* NCSI response code/reason */
#define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 069c2659074b..bee290d0f48b 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -19,6 +19,19 @@
#include "ncsi-pkt.h"
#include "ncsi-netlink.h"
+/* Nibbles within [0xA, 0xF] add zero "0" to the returned value.
+ * Optional fields (encoded as 0xFF) will default to zero.
+ */
+static u8 decode_bcd_u8(u8 x)
+{
+ int lo = x & 0xF;
+ int hi = x >> 4;
+
+ lo = lo < 0xA ? lo : 0;
+ hi = hi < 0xA ? hi : 0;
+ return lo + hi * 10;
+}
+
static int ncsi_validate_rsp_pkt(struct ncsi_request *nr,
unsigned short payload)
{
@@ -755,9 +768,18 @@ static int ncsi_rsp_handler_gvi(struct ncsi_request *nr)
if (!nc)
return -ENODEV;
- /* Update to channel's version info */
+ /* Update channel's version info
+ *
+ * Major, minor, and update fields are supposed to be
+ * unsigned integers encoded as packed BCD.
+ *
+ * Alpha1 and alpha2 are ISO/IEC 8859-1 characters.
+ */
ncv = &nc->version;
- ncv->version = ntohl(rsp->ncsi_version);
+ ncv->major = decode_bcd_u8(rsp->major);
+ ncv->minor = decode_bcd_u8(rsp->minor);
+ ncv->update = decode_bcd_u8(rsp->update);
+ ncv->alpha1 = rsp->alpha1;
ncv->alpha2 = rsp->alpha2;
memcpy(ncv->fw_name, rsp->fw_name, 12);
ncv->fw_version = ntohl(rsp->fw_version);
@@ -1069,6 +1091,44 @@ static int ncsi_rsp_handler_netlink(struct ncsi_request *nr)
return ret;
}
+static int ncsi_rsp_handler_gmcma(struct ncsi_request *nr)
+{
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct net_device *ndev = ndp->ndev.dev;
+ struct ncsi_rsp_gmcma_pkt *rsp;
+ struct sockaddr saddr;
+ int ret = -1;
+ int i;
+
+ rsp = (struct ncsi_rsp_gmcma_pkt *)skb_network_header(nr->rsp);
+ saddr.sa_family = ndev->type;
+ ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+
+ netdev_info(ndev, "NCSI: Received %d provisioned MAC addresses\n",
+ rsp->address_count);
+ for (i = 0; i < rsp->address_count; i++) {
+ netdev_info(ndev, "NCSI: MAC address %d: %02x:%02x:%02x:%02x:%02x:%02x\n",
+ i, rsp->addresses[i][0], rsp->addresses[i][1],
+ rsp->addresses[i][2], rsp->addresses[i][3],
+ rsp->addresses[i][4], rsp->addresses[i][5]);
+ }
+
+ for (i = 0; i < rsp->address_count; i++) {
+ memcpy(saddr.sa_data, &rsp->addresses[i], ETH_ALEN);
+ ret = ndev->netdev_ops->ndo_set_mac_address(ndev, &saddr);
+ if (ret < 0) {
+ netdev_warn(ndev, "NCSI: Unable to assign %pM to device\n",
+ saddr.sa_data);
+ continue;
+ }
+ netdev_warn(ndev, "NCSI: Set MAC address to %pM\n", saddr.sa_data);
+ break;
+ }
+
+ ndp->gma_flag = ret == 0;
+ return ret;
+}
+
static struct ncsi_rsp_handler {
unsigned char type;
int payload;
@@ -1105,7 +1165,8 @@ static struct ncsi_rsp_handler {
{ NCSI_PKT_RSP_PLDM, -1, ncsi_rsp_handler_pldm },
{ NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid },
{ NCSI_PKT_RSP_QPNPR, -1, ncsi_rsp_handler_pldm },
- { NCSI_PKT_RSP_SNPR, -1, ncsi_rsp_handler_pldm }
+ { NCSI_PKT_RSP_SNPR, -1, ncsi_rsp_handler_pldm },
+ { NCSI_PKT_RSP_GMCMA, -1, ncsi_rsp_handler_gmcma },
};
int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 26ab0e9612d8..cb48a2b9cb9f 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -4,6 +4,8 @@
#ifndef __IP_SET_BITMAP_IP_GEN_H
#define __IP_SET_BITMAP_IP_GEN_H
+#include <linux/rcupdate_wait.h>
+
#define mtype_do_test IPSET_TOKEN(MTYPE, _do_test)
#define mtype_gc_test IPSET_TOKEN(MTYPE, _gc_test)
#define mtype_is_filled IPSET_TOKEN(MTYPE, _is_filled)
@@ -28,6 +30,7 @@
#define mtype_del IPSET_TOKEN(MTYPE, _del)
#define mtype_list IPSET_TOKEN(MTYPE, _list)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
+#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
#define mtype MTYPE
#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
@@ -57,9 +60,6 @@ mtype_destroy(struct ip_set *set)
{
struct mtype *map = set->data;
- if (SET_WITH_TIMEOUT(set))
- del_timer_sync(&map->gc);
-
if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
ip_set_free(map->members);
@@ -288,6 +288,15 @@ mtype_gc(struct timer_list *t)
add_timer(&map->gc);
}
+static void
+mtype_cancel_gc(struct ip_set *set)
+{
+ struct mtype *map = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ del_timer_sync(&map->gc);
+}
+
static const struct ip_set_type_variant mtype = {
.kadt = mtype_kadt,
.uadt = mtype_uadt,
@@ -301,6 +310,7 @@ static const struct ip_set_type_variant mtype = {
.head = mtype_head,
.list = mtype_list,
.same_set = mtype_same_set,
+ .cancel_gc = mtype_cancel_gc,
};
#endif /* __IP_SET_BITMAP_IP_GEN_H */
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 4c133e06be1d..3184cc6be4c9 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1154,6 +1154,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info,
return ret;
cleanup:
+ set->variant->cancel_gc(set);
set->variant->destroy(set);
put_out:
module_put(set->type->me);
@@ -1182,6 +1183,14 @@ ip_set_destroy_set(struct ip_set *set)
kfree(set);
}
+static void
+ip_set_destroy_set_rcu(struct rcu_head *head)
+{
+ struct ip_set *set = container_of(head, struct ip_set, rcu);
+
+ ip_set_destroy_set(set);
+}
+
static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const attr[])
{
@@ -1193,8 +1202,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
- /* Must wait for flush to be really finished in list:set */
- rcu_barrier();
/* Commands are serialized and references are
* protected by the ip_set_ref_lock.
@@ -1206,8 +1213,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
* counter, so if it's already zero, we can proceed
* without holding the lock.
*/
- read_lock_bh(&ip_set_ref_lock);
if (!attr[IPSET_ATTR_SETNAME]) {
+ /* Must wait for flush to be really finished in list:set */
+ rcu_barrier();
+ read_lock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
if (s && (s->ref || s->ref_netlink)) {
@@ -1221,6 +1230,8 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
s = ip_set(inst, i);
if (s) {
ip_set(inst, i) = NULL;
+ /* Must cancel garbage collectors */
+ s->variant->cancel_gc(s);
ip_set_destroy_set(s);
}
}
@@ -1228,6 +1239,9 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
inst->is_destroyed = false;
} else {
u32 flags = flag_exist(info->nlh);
+ u16 features = 0;
+
+ read_lock_bh(&ip_set_ref_lock);
s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
&i);
if (!s) {
@@ -1238,10 +1252,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
ret = -IPSET_ERR_BUSY;
goto out;
}
+ features = s->type->features;
ip_set(inst, i) = NULL;
read_unlock_bh(&ip_set_ref_lock);
-
- ip_set_destroy_set(s);
+ if (features & IPSET_TYPE_NAME) {
+ /* Must wait for flush to be really finished */
+ rcu_barrier();
+ }
+ /* Must cancel garbage collectors */
+ s->variant->cancel_gc(s);
+ call_rcu(&s->rcu, ip_set_destroy_set_rcu);
}
return 0;
out:
@@ -1394,9 +1414,6 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
ip_set(inst, to_id) = from;
write_unlock_bh(&ip_set_ref_lock);
- /* Make sure all readers of the old set pointers are completed. */
- synchronize_rcu();
-
return 0;
}
@@ -2362,6 +2379,7 @@ ip_set_net_exit(struct net *net)
set = ip_set(inst, i);
if (set) {
ip_set(inst, i) = NULL;
+ set->variant->cancel_gc(set);
ip_set_destroy_set(set);
}
}
@@ -2409,8 +2427,11 @@ ip_set_fini(void)
{
nf_unregister_sockopt(&so_set);
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-
unregister_pernet_subsys(&ip_set_net_ops);
+
+ /* Wait for call_rcu() in destroy */
+ rcu_barrier();
+
pr_debug("these are the famous last words\n");
}
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 7c2399541771..cf3ce72c3de6 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -5,6 +5,7 @@
#define _IP_SET_HASH_GEN_H
#include <linux/rcupdate.h>
+#include <linux/rcupdate_wait.h>
#include <linux/jhash.h>
#include <linux/types.h>
#include <linux/netfilter/nfnetlink.h>
@@ -221,6 +222,7 @@ static const union nf_inet_addr zeromask = {};
#undef mtype_gc_do
#undef mtype_gc
#undef mtype_gc_init
+#undef mtype_cancel_gc
#undef mtype_variant
#undef mtype_data_match
@@ -265,6 +267,7 @@ static const union nf_inet_addr zeromask = {};
#define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
@@ -429,7 +432,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
u32 i;
for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference(hbucket(t, i));
+ n = (__force struct hbucket *)hbucket(t, i);
if (!n)
continue;
if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
@@ -449,10 +452,7 @@ mtype_destroy(struct ip_set *set)
struct htype *h = set->data;
struct list_head *l, *lt;
- if (SET_WITH_TIMEOUT(set))
- cancel_delayed_work_sync(&h->gc.dwork);
-
- mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
+ mtype_ahash_destroy(set, (__force struct htable *)h->table, true);
list_for_each_safe(l, lt, &h->ad) {
list_del(l);
kfree(l);
@@ -598,6 +598,15 @@ mtype_gc_init(struct htable_gc *gc)
queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ);
}
+static void
+mtype_cancel_gc(struct ip_set *set)
+{
+ struct htype *h = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ cancel_delayed_work_sync(&h->gc.dwork);
+}
+
static int
mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags);
@@ -1440,6 +1449,7 @@ static const struct ip_set_type_variant mtype_variant = {
.uref = mtype_uref,
.resize = mtype_resize,
.same_set = mtype_same_set,
+ .cancel_gc = mtype_cancel_gc,
.region_lock = true,
};
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 95aeb31c60e0..30a655e5c4fd 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -138,9 +138,9 @@ hash_netiface4_data_next(struct hash_netiface4_elem *next,
#include "ip_set_hash_gen.h"
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-static const char *get_physindev_name(const struct sk_buff *skb)
+static const char *get_physindev_name(const struct sk_buff *skb, struct net *net)
{
- struct net_device *dev = nf_bridge_get_physindev(skb);
+ struct net_device *dev = nf_bridge_get_physindev(skb, net);
return dev ? dev->name : NULL;
}
@@ -177,7 +177,7 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- const char *eiface = SRCDIR ? get_physindev_name(skb) :
+ const char *eiface = SRCDIR ? get_physindev_name(skb, xt_net(par)) :
get_physoutdev_name(skb);
if (!eiface)
@@ -395,7 +395,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- const char *eiface = SRCDIR ? get_physindev_name(skb) :
+ const char *eiface = SRCDIR ? get_physindev_name(skb, xt_net(par)) :
get_physoutdev_name(skb);
if (!eiface)
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index e162636525cf..6c3f28bc59b3 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set)
struct list_set *map = set->data;
struct set_elem *e, *n;
- if (SET_WITH_TIMEOUT(set))
- timer_shutdown_sync(&map->gc);
-
list_for_each_entry_safe(e, n, &map->members, list) {
list_del(&e->list);
ip_set_put_byindex(map->net, e->id);
@@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)
a->extensions == b->extensions;
}
+static void
+list_set_cancel_gc(struct ip_set *set)
+{
+ struct list_set *map = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ timer_shutdown_sync(&map->gc);
+}
+
static const struct ip_set_type_variant set_variant = {
.kadt = list_set_kadt,
.uadt = list_set_uadt,
@@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = {
.head = list_set_head,
.list = list_set_list,
.same_set = list_set_same_set,
+ .cancel_gc = list_set_cancel_gc,
};
static void
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 9065da3cdd12..a743db073887 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -31,6 +31,7 @@
#include <linux/seq_file.h>
#include <linux/jhash.h>
#include <linux/random.h>
+#include <linux/rcupdate_wait.h>
#include <net/net_namespace.h>
#include <net/ip_vs.h>
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index c5970ba416ae..f821ad2e19b3 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -21,6 +21,7 @@
#include <linux/interrupt.h>
#include <linux/sysctl.h>
#include <linux/list.h>
+#include <linux/rcupdate_wait.h>
#include <net/ip_vs.h>
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index eaf9f2ed0067..be74c0906dda 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -1365,7 +1365,7 @@ static int set_mcast_if(struct sock *sk, struct net_device *dev)
struct ipv6_pinfo *np = inet6_sk(sk);
/* IPV6_MULTICAST_IF */
- np->mcast_oif = dev->ifindex;
+ WRITE_ONCE(np->mcast_oif, dev->ifindex);
}
#endif
release_sock(sk);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 9193e109e6b3..65e0259178da 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -271,7 +271,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs,
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_TIME_EXCEED,
ICMPV6_EXC_HOPLIMIT, 0);
- __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
return false;
}
@@ -286,7 +286,7 @@ static inline bool decrement_ttl(struct netns_ipvs *ipvs,
{
if (ip_hdr(skb)->ttl <= 1) {
/* Tell the sender its packet died... */
- __IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
+ IP_INC_STATS(net, IPSTATS_MIB_INHDRERRORS);
icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
return false;
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 2e5f3864d353..5b876fa7f9af 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2756,6 +2756,7 @@ static const struct nf_ct_hook nf_conntrack_hook = {
.get_tuple_skb = nf_conntrack_get_tuple_skb,
.attach = nf_conntrack_attach,
.set_closing = nf_conntrack_set_closing,
+ .confirm = __nf_conntrack_confirm,
};
void nf_conntrack_init_end(void)
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index e697a824b001..540d97715bd2 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -533,6 +533,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
/* Get fields bitmap */
if (nf_h323_error_boundary(bs, 0, f->sz))
return H323_ERROR_BOUND;
+ if (f->sz > 32)
+ return H323_ERROR_RANGE;
bmp = get_bitmap(bs, f->sz);
if (base)
*(unsigned int *)base = bmp;
@@ -589,6 +591,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
bmp2_len = get_bits(bs, 7) + 1;
if (nf_h323_error_boundary(bs, 0, bmp2_len))
return H323_ERROR_BOUND;
+ if (bmp2_len > 32)
+ return H323_ERROR_RANGE;
bmp2 = get_bitmap(bs, bmp2_len);
bmp |= bmp2 >> f->sz;
if (base)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index fb0ae15e96df..3b846cbdc050 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -876,6 +876,7 @@ struct ctnetlink_filter_u32 {
struct ctnetlink_filter {
u8 family;
+ bool zone_filter;
u_int32_t orig_flags;
u_int32_t reply_flags;
@@ -992,13 +993,16 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
if (err)
goto err_filter;
+ if (cda[CTA_ZONE]) {
+ err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
+ if (err < 0)
+ goto err_filter;
+ filter->zone_filter = true;
+ }
+
if (!cda[CTA_FILTER])
return filter;
- err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
- if (err < 0)
- goto err_filter;
-
err = ctnetlink_parse_filter(cda[CTA_FILTER], filter);
if (err < 0)
goto err_filter;
@@ -1043,7 +1047,7 @@ err_filter:
static bool ctnetlink_needs_filter(u8 family, const struct nlattr * const *cda)
{
- return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS];
+ return family || cda[CTA_MARK] || cda[CTA_FILTER] || cda[CTA_STATUS] || cda[CTA_ZONE];
}
static int ctnetlink_start(struct netlink_callback *cb)
@@ -1148,6 +1152,10 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
if (filter->family && nf_ct_l3num(ct) != filter->family)
goto ignore_entry;
+ if (filter->zone_filter &&
+ !nf_ct_zone_equal_any(ct, &filter->zone))
+ goto ignore_entry;
+
if (filter->orig_flags) {
tuple = nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL);
if (!ctnetlink_filter_match_tuple(&filter->orig, tuple,
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index c6bd533983c1..4cc97f971264 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -283,7 +283,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
pr_debug("Setting vtag %x for secondary conntrack\n",
sh->vtag);
ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
- } else {
+ } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) {
/* If it is a shutdown ack OOTB packet, we expect a return
shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
pr_debug("Setting vtag %x for new conn OOTB\n",
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index e573be5afde7..ae493599a3ef 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -457,7 +457,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender,
const struct sk_buff *skb,
unsigned int dataoff,
const struct tcphdr *tcph,
- u32 end, u32 win)
+ u32 end, u32 win,
+ enum ip_conntrack_dir dir)
{
/* SYN-ACK in reply to a SYN
* or SYN from reply direction in simultaneous open.
@@ -471,7 +472,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender,
* Both sides must send the Window Scale option
* to enable window scaling in either direction.
*/
- if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
+ if (dir == IP_CT_DIR_REPLY &&
+ !(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) {
sender->td_scale = 0;
receiver->td_scale = 0;
@@ -542,7 +544,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
if (tcph->syn) {
tcp_init_sender(sender, receiver,
skb, dataoff, tcph,
- end, win);
+ end, win, dir);
if (!tcph->ack)
/* Simultaneous open */
return NFCT_TCP_ACCEPT;
@@ -585,7 +587,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
*/
tcp_init_sender(sender, receiver,
skb, dataoff, tcph,
- end, win);
+ end, win, dir);
if (dir == IP_CT_DIR_REPLY && !tcph->ack)
return NFCT_TCP_ACCEPT;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 920a5a29ae1d..a0571339239c 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -87,12 +87,22 @@ static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
return 0;
}
+static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route,
+ enum flow_offload_tuple_dir dir)
+{
+ struct dst_entry *dst = route->tuple[dir].dst;
+
+ route->tuple[dir].dst = NULL;
+
+ return dst;
+}
+
static int flow_offload_fill_route(struct flow_offload *flow,
- const struct nf_flow_route *route,
+ struct nf_flow_route *route,
enum flow_offload_tuple_dir dir)
{
struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
- struct dst_entry *dst = route->tuple[dir].dst;
+ struct dst_entry *dst = nft_route_dst_fetch(route, dir);
int i, j = 0;
switch (flow_tuple->l3proto) {
@@ -122,6 +132,7 @@ static int flow_offload_fill_route(struct flow_offload *flow,
ETH_ALEN);
flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
+ dst_release(dst);
break;
case FLOW_OFFLOAD_XMIT_XFRM:
case FLOW_OFFLOAD_XMIT_NEIGH:
@@ -146,7 +157,7 @@ static void nft_flow_dst_release(struct flow_offload *flow,
}
void flow_offload_route_init(struct flow_offload *flow,
- const struct nf_flow_route *route)
+ struct nf_flow_route *route)
{
flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 8cc52d2bd31b..e16f158388bb 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -193,11 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type)
return;
}
- BUG_ON(loggers[pf][type] == NULL);
-
rcu_read_lock();
logger = rcu_dereference(loggers[pf][type]);
- module_put(logger->me);
+ if (!logger)
+ WARN_ON_ONCE(1);
+ else
+ module_put(logger->me);
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(nf_logger_put);
diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
index c66689ad2b49..58402226045e 100644
--- a/net/netfilter/nf_log_syslog.c
+++ b/net/netfilter/nf_log_syslog.c
@@ -111,7 +111,8 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u8 pf,
unsigned int hooknum, const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
- const struct nf_loginfo *loginfo, const char *prefix)
+ const struct nf_loginfo *loginfo, const char *prefix,
+ struct net *net)
{
const struct net_device *physoutdev __maybe_unused;
const struct net_device *physindev __maybe_unused;
@@ -121,7 +122,7 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u8 pf,
in ? in->name : "",
out ? out->name : "");
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- physindev = nf_bridge_get_physindev(skb);
+ physindev = nf_bridge_get_physindev(skb, net);
if (physindev && in != physindev)
nf_log_buf_add(m, "PHYSIN=%s ", physindev->name);
physoutdev = nf_bridge_get_physoutdev(skb);
@@ -148,7 +149,7 @@ static void nf_log_arp_packet(struct net *net, u_int8_t pf,
loginfo = &default_loginfo;
nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
- prefix);
+ prefix, net);
dump_arp_packet(m, loginfo, skb, skb_network_offset(skb));
nf_log_buf_close(m);
@@ -845,7 +846,7 @@ static void nf_log_ip_packet(struct net *net, u_int8_t pf,
loginfo = &default_loginfo;
nf_log_dump_packet_common(m, pf, hooknum, skb, in,
- out, loginfo, prefix);
+ out, loginfo, prefix, net);
if (in)
dump_mac_header(m, loginfo, skb);
@@ -880,7 +881,7 @@ static void nf_log_ip6_packet(struct net *net, u_int8_t pf,
loginfo = &default_loginfo;
nf_log_dump_packet_common(m, pf, hooknum, skb, in, out,
- loginfo, prefix);
+ loginfo, prefix, net);
if (in)
dump_mac_header(m, loginfo, skb);
@@ -916,7 +917,7 @@ static void nf_log_unknown_packet(struct net *net, u_int8_t pf,
loginfo = &default_loginfo;
nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
- prefix);
+ prefix, net);
dump_mac_header(m, loginfo, skb);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index c3d7ecbc777c..016c816d91cb 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -551,8 +551,11 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
find_free_id:
if (range->flags & NF_NAT_RANGE_PROTO_OFFSET)
off = (ntohs(*keyptr) - ntohs(range->base_proto.all));
- else
+ else if ((range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL) ||
+ maniptype != NF_NAT_MANIP_DST)
off = get_random_u16();
+ else
+ off = 0;
attempts = range_size;
if (attempts > NF_NAT_MAX_ATTEMPTS)
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 63d1516816b1..e2f334f70281 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -82,11 +82,9 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry)
{
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
const struct sk_buff *skb = entry->skb;
- struct nf_bridge_info *nf_bridge;
- nf_bridge = nf_bridge_info_get(skb);
- if (nf_bridge) {
- entry->physin = nf_bridge_get_physindev(skb);
+ if (nf_bridge_info_exists(skb)) {
+ entry->physin = nf_bridge_get_physindev(skb, entry->state.net);
entry->physout = nf_bridge_get_physoutdev(skb);
} else {
entry->physin = NULL;
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 467671f2d42f..fbbc4fd37349 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -617,7 +617,7 @@ synproxy_recv_client_ack(struct net *net,
struct synproxy_net *snet = synproxy_pernet(net);
int mss;
- mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1);
+ mss = __cookie_v4_check(ip_hdr(skb), th);
if (mss == 0) {
this_cpu_inc(snet->stats->cookie_invalid);
return false;
@@ -1034,7 +1034,7 @@ synproxy_recv_client_ack_ipv6(struct net *net,
struct synproxy_net *snet = synproxy_pernet(net);
int mss;
- mss = nf_cookie_v6_check(ipv6_hdr(skb), th, ntohl(th->ack_seq) - 1);
+ mss = nf_cookie_v6_check(ipv6_hdr(skb), th);
if (mss == 0) {
this_cpu_inc(snet->stats->cookie_invalid);
return false;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index be04af433988..1683dc196b59 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -24,6 +24,7 @@
#include <net/sock.h>
#define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))
+#define NFT_SET_MAX_ANONLEN 16
unsigned int nf_tables_net_id __read_mostly;
@@ -683,15 +684,16 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
return err;
}
-static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
- struct nft_flowtable *flowtable)
+static struct nft_trans *
+nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
+ struct nft_flowtable *flowtable)
{
struct nft_trans *trans;
trans = nft_trans_alloc(ctx, msg_type,
sizeof(struct nft_trans_flowtable));
if (trans == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
if (msg_type == NFT_MSG_NEWFLOWTABLE)
nft_activate_next(ctx->net, flowtable);
@@ -700,22 +702,22 @@ static int nft_trans_flowtable_add(struct nft_ctx *ctx, int msg_type,
nft_trans_flowtable(trans) = flowtable;
nft_trans_commit_list_add_tail(ctx->net, trans);
- return 0;
+ return trans;
}
static int nft_delflowtable(struct nft_ctx *ctx,
struct nft_flowtable *flowtable)
{
- int err;
+ struct nft_trans *trans;
- err = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
- if (err < 0)
- return err;
+ trans = nft_trans_flowtable_add(ctx, NFT_MSG_DELFLOWTABLE, flowtable);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
nft_deactivate_next(ctx->net, flowtable);
nft_use_dec(&ctx->table->use);
- return err;
+ return 0;
}
static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg)
@@ -1250,6 +1252,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
return 0;
err_register_hooks:
+ ctx->table->flags |= NFT_TABLE_F_DORMANT;
nft_trans_destroy(trans);
return ret;
}
@@ -2079,7 +2082,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
struct nft_hook *hook;
int err;
- hook = kmalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
+ hook = kzalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
if (!hook) {
err = -ENOMEM;
goto err_hook_alloc;
@@ -2261,7 +2264,16 @@ static int nft_chain_parse_hook(struct net *net,
return -EOPNOTSUPP;
}
- type = basechain->type;
+ if (nla[NFTA_CHAIN_TYPE]) {
+ type = __nf_tables_chain_type_lookup(nla[NFTA_CHAIN_TYPE],
+ family);
+ if (!type) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_CHAIN_TYPE]);
+ return -ENOENT;
+ }
+ } else {
+ type = basechain->type;
+ }
}
if (!try_module_get(type->owner)) {
@@ -2493,19 +2505,15 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
RCU_INIT_POINTER(chain->blob_gen_0, blob);
RCU_INIT_POINTER(chain->blob_gen_1, blob);
- err = nf_tables_register_hook(net, table, chain);
- if (err < 0)
- goto err_destroy_chain;
-
if (!nft_use_inc(&table->use)) {
err = -EMFILE;
- goto err_use;
+ goto err_destroy_chain;
}
trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
- goto err_unregister_hook;
+ goto err_trans;
}
nft_trans_chain_policy(trans) = NFT_CHAIN_POLICY_UNSET;
@@ -2513,17 +2521,22 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
nft_trans_chain_policy(trans) = policy;
err = nft_chain_add(table, chain);
- if (err < 0) {
- nft_trans_destroy(trans);
- goto err_unregister_hook;
- }
+ if (err < 0)
+ goto err_chain_add;
+
+ /* This must be LAST to ensure no packets are walking over this chain. */
+ err = nf_tables_register_hook(net, table, chain);
+ if (err < 0)
+ goto err_register_hook;
return 0;
-err_unregister_hook:
+err_register_hook:
+ nft_chain_del(chain);
+err_chain_add:
+ nft_trans_destroy(trans);
+err_trans:
nft_use_dec_restore(&table->use);
-err_use:
- nf_tables_unregister_hook(net, table, chain);
err_destroy_chain:
nf_tables_chain_destroy(ctx);
@@ -2968,6 +2981,9 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
*/
int nft_register_expr(struct nft_expr_type *type)
{
+ if (WARN_ON_ONCE(type->maxattr > NFT_EXPR_MAXATTR))
+ return -ENOMEM;
+
nfnl_lock(NFNL_SUBSYS_NFTABLES);
if (type->family == NFPROTO_UNSPEC)
list_add_tail_rcu(&type->list, &nf_tables_expressions);
@@ -3262,14 +3278,13 @@ int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
{
int err;
- if (src->ops->clone) {
- dst->ops = src->ops;
- err = src->ops->clone(dst, src);
- if (err < 0)
- return err;
- } else {
- memcpy(dst, src, src->ops->size);
- }
+ if (WARN_ON_ONCE(!src->ops->clone))
+ return -EINVAL;
+
+ dst->ops = src->ops;
+ err = src->ops->clone(dst, src);
+ if (err < 0)
+ return err;
__module_get(src->ops->type->owner);
@@ -4402,6 +4417,9 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
if (p[1] != 'd' || strchr(p + 2, '%'))
return -EINVAL;
+ if (strnlen(name, NFT_SET_MAX_ANONLEN) >= NFT_SET_MAX_ANONLEN)
+ return -EINVAL;
+
inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL);
if (inuse == NULL)
return -ENOMEM;
@@ -4802,8 +4820,8 @@ static int nft_set_desc_concat_parse(const struct nlattr *attr,
static int nft_set_desc_concat(struct nft_set_desc *desc,
const struct nlattr *nla)
{
+ u32 num_regs = 0, key_num_regs = 0;
struct nlattr *attr;
- u32 num_regs = 0;
int rem, err, i;
nla_for_each_nested(attr, nla, rem) {
@@ -4818,6 +4836,10 @@ static int nft_set_desc_concat(struct nft_set_desc *desc,
for (i = 0; i < desc->field_count; i++)
num_regs += DIV_ROUND_UP(desc->field_len[i], sizeof(u32));
+ key_num_regs = DIV_ROUND_UP(desc->klen, sizeof(u32));
+ if (key_num_regs != num_regs)
+ return -EINVAL;
+
if (num_regs > NFT_REG32_COUNT)
return -E2BIG;
@@ -4979,6 +5001,12 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) ==
(NFT_SET_EVAL | NFT_SET_OBJECT))
return -EOPNOTSUPP;
+ if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) ==
+ (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT))
+ return -EOPNOTSUPP;
+ if ((flags & (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) ==
+ (NFT_SET_CONSTANT | NFT_SET_TIMEOUT))
+ return -EOPNOTSUPP;
}
desc.dtype = 0;
@@ -5039,16 +5067,28 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
}
desc.policy = NFT_SET_POL_PERFORMANCE;
- if (nla[NFTA_SET_POLICY] != NULL)
+ if (nla[NFTA_SET_POLICY] != NULL) {
desc.policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
+ switch (desc.policy) {
+ case NFT_SET_POL_PERFORMANCE:
+ case NFT_SET_POL_MEMORY:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ }
if (nla[NFTA_SET_DESC] != NULL) {
err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
if (err < 0)
return err;
- if (desc.field_count > 1 && !(flags & NFT_SET_CONCAT))
+ if (desc.field_count > 1) {
+ if (!(flags & NFT_SET_CONCAT))
+ return -EINVAL;
+ } else if (flags & NFT_SET_CONCAT) {
return -EINVAL;
+ }
} else if (flags & NFT_SET_CONCAT) {
return -EINVAL;
}
@@ -5390,6 +5430,7 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) {
list_del_rcu(&set->list);
+ set->dead = 1;
if (event)
nf_tables_set_notify(ctx, set, NFT_MSG_DELSET,
GFP_KERNEL);
@@ -5695,7 +5736,7 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
struct nft_set_dump_args *args;
- if (nft_set_elem_expired(ext))
+ if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext))
return 0;
args = container_of(iter, struct nft_set_dump_args, iter);
@@ -5817,10 +5858,6 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
nla_nest_end(skb, nest);
nlmsg_end(skb, nlh);
- if (dump_ctx->reset && args.iter.count > args.iter.skip)
- audit_log_nft_set_reset(table, cb->seq,
- args.iter.count - args.iter.skip);
-
rcu_read_unlock();
if (args.iter.err && args.iter.err != -EMSGSIZE)
@@ -5836,6 +5873,26 @@ nla_put_failure:
return -ENOSPC;
}
+static int nf_tables_dumpreset_set(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk));
+ struct nft_set_dump_ctx *dump_ctx = cb->data;
+ int ret, skip = cb->args[0];
+
+ mutex_lock(&nft_net->commit_mutex);
+
+ ret = nf_tables_dump_set(skb, cb);
+
+ if (cb->args[0] > skip)
+ audit_log_nft_set_reset(dump_ctx->ctx.table, cb->seq,
+ cb->args[0] - skip);
+
+ mutex_unlock(&nft_net->commit_mutex);
+
+ return ret;
+}
+
static int nf_tables_dump_set_start(struct netlink_callback *cb)
{
struct nft_set_dump_ctx *dump_ctx = cb->data;
@@ -5910,7 +5967,7 @@ static int nft_setelem_parse_flags(const struct nft_set *set,
return 0;
}
-static int nft_setelem_parse_key(struct nft_ctx *ctx, struct nft_set *set,
+static int nft_setelem_parse_key(struct nft_ctx *ctx, const struct nft_set *set,
struct nft_data *key, struct nlattr *attr)
{
struct nft_data_desc desc = {
@@ -5963,7 +6020,7 @@ static void *nft_setelem_catchall_get(const struct net *net,
return priv;
}
-static int nft_setelem_get(struct nft_ctx *ctx, struct nft_set *set,
+static int nft_setelem_get(struct nft_ctx *ctx, const struct nft_set *set,
struct nft_set_elem *elem, u32 flags)
{
void *priv;
@@ -5982,7 +6039,7 @@ static int nft_setelem_get(struct nft_ctx *ctx, struct nft_set *set,
return 0;
}
-static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
+static int nft_get_set_elem(struct nft_ctx *ctx, const struct nft_set *set,
const struct nlattr *attr, bool reset)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
@@ -6039,21 +6096,18 @@ err_fill_setelem:
return err;
}
-/* called with rcu_read_lock held */
-static int nf_tables_getsetelem(struct sk_buff *skb,
- const struct nfnl_info *info,
- const struct nlattr * const nla[])
+static int nft_set_dump_ctx_init(struct nft_set_dump_ctx *dump_ctx,
+ const struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const nla[],
+ bool reset)
{
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
u8 family = info->nfmsg->nfgen_family;
- int rem, err = 0, nelems = 0;
struct net *net = info->net;
struct nft_table *table;
struct nft_set *set;
- struct nlattr *attr;
- struct nft_ctx ctx;
- bool reset = false;
table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
genmask, 0);
@@ -6068,10 +6122,22 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
return PTR_ERR(set);
}
- nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+ nft_ctx_init(&dump_ctx->ctx, net, skb,
+ info->nlh, family, table, NULL, nla);
+ dump_ctx->set = set;
+ dump_ctx->reset = reset;
+ return 0;
+}
- if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET)
- reset = true;
+/* called with rcu_read_lock held */
+static int nf_tables_getsetelem(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const nla[])
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct nft_set_dump_ctx dump_ctx;
+ struct nlattr *attr;
+ int rem, err = 0;
if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
@@ -6080,12 +6146,55 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
.done = nf_tables_dump_set_done,
.module = THIS_MODULE,
};
- struct nft_set_dump_ctx dump_ctx = {
- .set = set,
- .ctx = ctx,
- .reset = reset,
+
+ err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, false);
+ if (err)
+ return err;
+
+ c.data = &dump_ctx;
+ return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
+ }
+
+ if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
+ return -EINVAL;
+
+ err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, false);
+ if (err)
+ return err;
+
+ nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+ err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, false);
+ if (err < 0) {
+ NL_SET_BAD_ATTR(extack, attr);
+ break;
+ }
+ }
+
+ return err;
+}
+
+static int nf_tables_getsetelem_reset(struct sk_buff *skb,
+ const struct nfnl_info *info,
+ const struct nlattr * const nla[])
+{
+ struct nftables_pernet *nft_net = nft_pernet(info->net);
+ struct netlink_ext_ack *extack = info->extack;
+ struct nft_set_dump_ctx dump_ctx;
+ int rem, err = 0, nelems = 0;
+ struct nlattr *attr;
+
+ if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .start = nf_tables_dump_set_start,
+ .dump = nf_tables_dumpreset_set,
+ .done = nf_tables_dump_set_done,
+ .module = THIS_MODULE,
};
+ err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, true);
+ if (err)
+ return err;
+
c.data = &dump_ctx;
return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c);
}
@@ -6093,18 +6202,31 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
return -EINVAL;
+ if (!try_module_get(THIS_MODULE))
+ return -EINVAL;
+ rcu_read_unlock();
+ mutex_lock(&nft_net->commit_mutex);
+ rcu_read_lock();
+
+ err = nft_set_dump_ctx_init(&dump_ctx, skb, info, nla, true);
+ if (err)
+ goto out_unlock;
+
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
- err = nft_get_set_elem(&ctx, set, attr, reset);
+ err = nft_get_set_elem(&dump_ctx.ctx, dump_ctx.set, attr, true);
if (err < 0) {
NL_SET_BAD_ATTR(extack, attr);
break;
}
nelems++;
}
+ audit_log_nft_set_reset(dump_ctx.ctx.table, nft_net->base_seq, nelems);
- if (reset)
- audit_log_nft_set_reset(table, nft_pernet(net)->base_seq,
- nelems);
+out_unlock:
+ rcu_read_unlock();
+ mutex_unlock(&nft_net->commit_mutex);
+ rcu_read_lock();
+ module_put(THIS_MODULE);
return err;
}
@@ -6478,7 +6600,7 @@ static int nft_setelem_catchall_deactivate(const struct net *net,
list_for_each_entry(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_is_active(net, ext))
+ if (!nft_is_active_next(net, ext))
continue;
kfree(elem->priv);
@@ -7439,11 +7561,15 @@ nla_put_failure:
return -1;
}
-static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
+static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family)
{
const struct nft_object_type *type;
list_for_each_entry(type, &nf_tables_objects, list) {
+ if (type->family != NFPROTO_UNSPEC &&
+ type->family != family)
+ continue;
+
if (objtype == type->type)
return type;
}
@@ -7451,11 +7577,11 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
}
static const struct nft_object_type *
-nft_obj_type_get(struct net *net, u32 objtype)
+nft_obj_type_get(struct net *net, u32 objtype, u8 family)
{
const struct nft_object_type *type;
- type = __nft_obj_type_get(objtype);
+ type = __nft_obj_type_get(objtype, family);
if (type != NULL && try_module_get(type->owner))
return type;
@@ -7548,7 +7674,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- type = __nft_obj_type_get(objtype);
+ type = __nft_obj_type_get(objtype, family);
if (WARN_ON_ONCE(!type))
return -ENOENT;
@@ -7562,7 +7688,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (!nft_use_inc(&table->use))
return -EMFILE;
- type = nft_obj_type_get(net, objtype);
+ type = nft_obj_type_get(net, objtype, family);
if (IS_ERR(type)) {
err = PTR_ERR(type);
goto err_type;
@@ -8339,9 +8465,9 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
u8 family = info->nfmsg->nfgen_family;
const struct nf_flowtable_type *type;
struct nft_flowtable *flowtable;
- struct nft_hook *hook, *next;
struct net *net = info->net;
struct nft_table *table;
+ struct nft_trans *trans;
struct nft_ctx ctx;
int err;
@@ -8421,34 +8547,34 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
err = nft_flowtable_parse_hook(&ctx, nla, &flowtable_hook, flowtable,
extack, true);
if (err < 0)
- goto err4;
+ goto err_flowtable_parse_hooks;
list_splice(&flowtable_hook.list, &flowtable->hook_list);
flowtable->data.priority = flowtable_hook.priority;
flowtable->hooknum = flowtable_hook.num;
+ trans = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
+ if (IS_ERR(trans)) {
+ err = PTR_ERR(trans);
+ goto err_flowtable_trans;
+ }
+
+ /* This must be LAST to ensure no packets are walking over this flowtable. */
err = nft_register_flowtable_net_hooks(ctx.net, table,
&flowtable->hook_list,
flowtable);
- if (err < 0) {
- nft_hooks_destroy(&flowtable->hook_list);
- goto err4;
- }
-
- err = nft_trans_flowtable_add(&ctx, NFT_MSG_NEWFLOWTABLE, flowtable);
if (err < 0)
- goto err5;
+ goto err_flowtable_hooks;
list_add_tail_rcu(&flowtable->list, &table->flowtables);
return 0;
-err5:
- list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
- nft_unregister_flowtable_hook(net, flowtable, hook);
- list_del_rcu(&hook->list);
- kfree_rcu(hook, rcu);
- }
-err4:
+
+err_flowtable_hooks:
+ nft_trans_destroy(trans);
+err_flowtable_trans:
+ nft_hooks_destroy(&flowtable->hook_list);
+err_flowtable_parse_hooks:
flowtable->data.type->free(&flowtable->data);
err3:
module_put(type->owner);
@@ -9078,7 +9204,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_set_elem_list_policy,
},
[NFT_MSG_GETSETELEM_RESET] = {
- .call = nf_tables_getsetelem,
+ .call = nf_tables_getsetelem_reset,
.type = NFNL_CB_RCU,
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
@@ -9711,6 +9837,7 @@ dead_elem:
struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
{
struct nft_set_elem_catchall *catchall, *next;
+ u64 tstamp = nft_net_tstamp(gc->net);
const struct nft_set *set = gc->set;
struct nft_elem_priv *elem_priv;
struct nft_set_ext *ext;
@@ -9720,7 +9847,7 @@ struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_expired(ext))
+ if (!__nft_set_elem_expired(ext, tstamp))
continue;
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
@@ -10383,6 +10510,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
+ nft_trans_set(trans)->dead = 1;
list_del_rcu(&nft_trans_set(trans)->list);
break;
case NFT_MSG_DELSET:
@@ -10505,6 +10633,7 @@ static bool nf_tables_valid_genid(struct net *net, u32 genid)
bool genid_ok;
mutex_lock(&nft_net->commit_mutex);
+ nft_net->tstamp = get_jiffies_64();
genid_ok = genid == 0 || nft_net->base_seq == genid;
if (!genid_ok)
@@ -10879,16 +11008,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
switch (data->verdict.code) {
- default:
- switch (data->verdict.code & NF_VERDICT_MASK) {
- case NF_ACCEPT:
- case NF_DROP:
- case NF_QUEUE:
- break;
- default:
- return -EINVAL;
- }
- fallthrough;
+ case NF_ACCEPT:
+ case NF_DROP:
+ case NF_QUEUE:
+ break;
case NFT_CONTINUE:
case NFT_BREAK:
case NFT_RETURN:
@@ -10923,6 +11046,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
data->verdict.chain = chain;
break;
+ default:
+ return -EINVAL;
}
desc->len = sizeof(data->verdict);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index f03f4d4d7d88..134e05d31061 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -508,7 +508,7 @@ __build_packet_message(struct nfnl_log_net *log,
htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
- struct net_device *physindev;
+ int physinif;
/* Case 2: indev is bridge group, we need to look for
* physical device (when called from ipv4) */
@@ -516,10 +516,10 @@ __build_packet_message(struct nfnl_log_net *log,
htonl(indev->ifindex)))
goto nla_put_failure;
- physindev = nf_bridge_get_physindev(skb);
- if (physindev &&
+ physinif = nf_bridge_get_physinif(skb);
+ if (physinif &&
nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
- htonl(physindev->ifindex)))
+ htonl(physinif)))
goto nla_put_failure;
}
#endif
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 171d1f52d3dd..5cf38fc0a366 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -232,18 +232,25 @@ static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
if (verdict == NF_ACCEPT ||
verdict == NF_REPEAT ||
verdict == NF_STOP) {
+ unsigned int ct_verdict = verdict;
+
rcu_read_lock();
ct_hook = rcu_dereference(nf_ct_hook);
if (ct_hook)
- verdict = ct_hook->update(entry->state.net, entry->skb);
+ ct_verdict = ct_hook->update(entry->state.net, entry->skb);
rcu_read_unlock();
- switch (verdict & NF_VERDICT_MASK) {
+ switch (ct_verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ /* follow userspace verdict, could be REPEAT */
+ break;
case NF_STOLEN:
nf_queue_entry_free(entry);
return;
+ default:
+ verdict = ct_verdict & NF_VERDICT_MASK;
+ break;
}
-
}
nf_reinject(entry, verdict);
}
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 680fe557686e..274b6f7e6bb5 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -357,9 +357,10 @@ static int nf_tables_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct nft_base_chain *basechain;
struct nftables_pernet *nft_net;
- struct nft_table *table;
struct nft_chain *chain, *nr;
+ struct nft_table *table;
struct nft_ctx ctx = {
.net = dev_net(dev),
};
@@ -371,7 +372,8 @@ static int nf_tables_netdev_event(struct notifier_block *this,
nft_net = nft_pernet(ctx.net);
mutex_lock(&nft_net->commit_mutex);
list_for_each_entry(table, &nft_net->tables, list) {
- if (table->family != NFPROTO_NETDEV)
+ if (table->family != NFPROTO_NETDEV &&
+ table->family != NFPROTO_INET)
continue;
ctx.family = table->family;
@@ -380,6 +382,11 @@ static int nf_tables_netdev_event(struct notifier_block *this,
if (!nft_is_base_chain(chain))
continue;
+ basechain = nft_base_chain(chain);
+ if (table->family == NFPROTO_INET &&
+ basechain->ops.hooknum != NF_INET_INGRESS)
+ continue;
+
ctx.chain = chain;
nft_netdev_event(event, dev, &ctx);
}
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 5284cd2ad532..d3d11dede545 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -135,7 +135,7 @@ static void nft_target_eval_bridge(const struct nft_expr *expr,
static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = {
[NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING },
- [NFTA_TARGET_REV] = { .type = NLA_U32 },
+ [NFTA_TARGET_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_TARGET_INFO] = { .type = NLA_BINARY },
};
@@ -200,6 +200,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1]
static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
{
struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1];
+ u32 l4proto;
u32 flags;
int err;
@@ -212,12 +213,18 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
return -EINVAL;
flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS]));
- if (flags & ~NFT_RULE_COMPAT_F_MASK)
+ if (flags & NFT_RULE_COMPAT_F_UNUSED ||
+ flags & ~NFT_RULE_COMPAT_F_MASK)
return -EINVAL;
if (flags & NFT_RULE_COMPAT_F_INV)
*inv = true;
- *proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
+ l4proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
+ if (l4proto > U16_MAX)
+ return -EINVAL;
+
+ *proto = l4proto;
+
return 0;
}
@@ -350,6 +357,22 @@ static int nft_target_validate(const struct nft_ctx *ctx,
unsigned int hook_mask = 0;
int ret;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET &&
+ ctx->family != NFPROTO_BRIDGE &&
+ ctx->family != NFPROTO_ARP)
+ return -EOPNOTSUPP;
+
+ ret = nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING));
+ if (ret)
+ return ret;
+
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
@@ -413,7 +436,7 @@ static void nft_match_eval(const struct nft_expr *expr,
static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
[NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING },
- [NFTA_MATCH_REV] = { .type = NLA_U32 },
+ [NFTA_MATCH_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_MATCH_INFO] = { .type = NLA_BINARY },
};
@@ -595,6 +618,22 @@ static int nft_match_validate(const struct nft_ctx *ctx,
unsigned int hook_mask = 0;
int ret;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET &&
+ ctx->family != NFPROTO_BRIDGE &&
+ ctx->family != NFPROTO_ARP)
+ return -EOPNOTSUPP;
+
+ ret = nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING));
+ if (ret)
+ return ret;
+
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
@@ -712,7 +751,7 @@ out_put:
static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
[NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING,
.len = NFT_COMPAT_NAME_MAX-1 },
- [NFTA_COMPAT_REV] = { .type = NLA_U32 },
+ [NFTA_COMPAT_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_COMPAT_TYPE] = { .type = NLA_U32 },
};
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 86bb9d7797d9..255640013ab8 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -476,6 +476,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
break;
#endif
case NFT_CT_ID:
+ if (tb[NFTA_CT_DIRECTION])
+ return -EINVAL;
+
len = sizeof(u32);
break;
default:
@@ -1250,7 +1253,30 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx,
if (tb[NFTA_CT_EXPECT_L3PROTO])
priv->l3num = ntohs(nla_get_be16(tb[NFTA_CT_EXPECT_L3PROTO]));
+ switch (priv->l3num) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ if (priv->l3num == ctx->family || ctx->family == NFPROTO_INET)
+ break;
+
+ return -EINVAL;
+ case NFPROTO_INET: /* tuple.src.l3num supports NFPROTO_IPV4/6 only */
+ default:
+ return -EAFNOSUPPORT;
+ }
+
priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]);
+ switch (priv->l4proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ case IPPROTO_DCCP:
+ case IPPROTO_SCTP:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]);
priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]);
priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index ab3362c483b4..ab9576098701 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -361,6 +361,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
}
+ __set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags);
ret = flow_offload_add(flowtable, flow);
if (ret < 0)
goto err_flow_add;
@@ -384,6 +385,11 @@ static int nft_flow_offload_validate(const struct nft_ctx *ctx,
{
unsigned int hook_mask = (1 << NF_INET_FORWARD);
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
return nft_chain_validate_hooks(ctx->chain, hook_mask);
}
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 145dc62c6247..cefa25e0dbb0 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -58,16 +58,19 @@ static inline bool nft_limit_eval(struct nft_limit_priv *priv, u64 cost)
static int nft_limit_init(struct nft_limit_priv *priv,
const struct nlattr * const tb[], bool pkts)
{
- u64 unit, tokens;
+ u64 unit, tokens, rate_with_burst;
+ bool invert = false;
if (tb[NFTA_LIMIT_RATE] == NULL ||
tb[NFTA_LIMIT_UNIT] == NULL)
return -EINVAL;
priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
+ if (priv->rate == 0)
+ return -EINVAL;
+
unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
- priv->nsecs = unit * NSEC_PER_SEC;
- if (priv->rate == 0 || priv->nsecs < unit)
+ if (check_mul_overflow(unit, NSEC_PER_SEC, &priv->nsecs))
return -EOVERFLOW;
if (tb[NFTA_LIMIT_BURST])
@@ -76,18 +79,35 @@ static int nft_limit_init(struct nft_limit_priv *priv,
if (pkts && priv->burst == 0)
priv->burst = NFT_LIMIT_PKT_BURST_DEFAULT;
- if (priv->rate + priv->burst < priv->rate)
+ if (check_add_overflow(priv->rate, priv->burst, &rate_with_burst))
return -EOVERFLOW;
if (pkts) {
- tokens = div64_u64(priv->nsecs, priv->rate) * priv->burst;
+ u64 tmp = div64_u64(priv->nsecs, priv->rate);
+
+ if (check_mul_overflow(tmp, priv->burst, &tokens))
+ return -EOVERFLOW;
} else {
+ u64 tmp;
+
/* The token bucket size limits the number of tokens can be
* accumulated. tokens_max specifies the bucket size.
* tokens_max = unit * (rate + burst) / rate.
*/
- tokens = div64_u64(priv->nsecs * (priv->rate + priv->burst),
- priv->rate);
+ if (check_mul_overflow(priv->nsecs, rate_with_burst, &tmp))
+ return -EOVERFLOW;
+
+ tokens = div64_u64(tmp, priv->rate);
+ }
+
+ if (tb[NFTA_LIMIT_FLAGS]) {
+ u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS]));
+
+ if (flags & ~NFT_LIMIT_F_INV)
+ return -EOPNOTSUPP;
+
+ if (flags & NFT_LIMIT_F_INV)
+ invert = true;
}
priv->limit = kmalloc(sizeof(*priv->limit), GFP_KERNEL_ACCOUNT);
@@ -96,13 +116,7 @@ static int nft_limit_init(struct nft_limit_priv *priv,
priv->limit->tokens = tokens;
priv->tokens_max = priv->limit->tokens;
-
- if (tb[NFTA_LIMIT_FLAGS]) {
- u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS]));
-
- if (flags & NFT_LIMIT_F_INV)
- priv->invert = true;
- }
+ priv->invert = invert;
priv->limit->last = ktime_get_ns();
spin_lock_init(&priv->limit->lock);
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 583885ce7232..808f5802c270 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -143,6 +143,11 @@ static int nft_nat_validate(const struct nft_ctx *ctx,
struct nft_nat *priv = nft_expr_priv(expr);
int err;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 35a2c28caa60..24d977138572 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -166,6 +166,11 @@ static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *exp
const struct nft_rt *priv = nft_expr_priv(expr);
unsigned int hooks;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
switch (priv->key) {
case NFT_RT_NEXTHOP4:
case NFT_RT_NEXTHOP6:
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 6c2061bfdae6..6968a3b34236 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -36,6 +36,7 @@ struct nft_rhash_cmp_arg {
const struct nft_set *set;
const u32 *key;
u8 genmask;
+ u64 tstamp;
};
static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed)
@@ -62,7 +63,7 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
return 1;
if (nft_set_elem_is_dead(&he->ext))
return 1;
- if (nft_set_elem_expired(&he->ext))
+ if (__nft_set_elem_expired(&he->ext, x->tstamp))
return 1;
if (!nft_set_elem_active(&he->ext, x->genmask))
return 1;
@@ -87,6 +88,7 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_cur(net),
.set = set,
.key = key,
+ .tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@@ -106,6 +108,7 @@ nft_rhash_get(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_cur(net),
.set = set,
.key = elem->key.val.data,
+ .tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@@ -131,6 +134,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
.genmask = NFT_GENMASK_ANY,
.set = set,
.key = key,
+ .tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@@ -175,6 +179,7 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
+ .tstamp = nft_net_tstamp(net),
};
struct nft_rhash_elem *prev;
@@ -216,6 +221,7 @@ nft_rhash_deactivate(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
+ .tstamp = nft_net_tstamp(net),
};
rcu_read_lock();
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 7252fcdae349..aa1d9e93a9a0 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -342,9 +342,6 @@
#include "nft_set_pipapo_avx2.h"
#include "nft_set_pipapo.h"
-/* Current working bitmap index, toggled between field matches */
-static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index);
-
/**
* pipapo_refill() - For each set bit, set bits from selected mapping table item
* @map: Bitmap to be scanned for set bits
@@ -412,6 +409,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_scratch *scratch;
unsigned long *res_map, *fill_map;
u8 genmask = nft_genmask_cur(net);
const u8 *rp = (const u8 *)key;
@@ -422,15 +420,17 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
local_bh_disable();
- map_index = raw_cpu_read(nft_pipapo_scratch_index);
-
m = rcu_dereference(priv->match);
if (unlikely(!m || !*raw_cpu_ptr(m->scratch)))
goto out;
- res_map = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0);
- fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max);
+ scratch = *raw_cpu_ptr(m->scratch);
+
+ map_index = scratch->map_index;
+
+ res_map = scratch->map + (map_index ? m->bsize_max : 0);
+ fill_map = scratch->map + (map_index ? 0 : m->bsize_max);
memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
@@ -460,7 +460,7 @@ next_match:
b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt,
last);
if (b < 0) {
- raw_cpu_write(nft_pipapo_scratch_index, map_index);
+ scratch->map_index = map_index;
local_bh_enable();
return false;
@@ -477,7 +477,7 @@ next_match:
* current inactive bitmap is clean and can be reused as
* *next* bitmap (not initial) for the next packet.
*/
- raw_cpu_write(nft_pipapo_scratch_index, map_index);
+ scratch->map_index = map_index;
local_bh_enable();
return true;
@@ -504,6 +504,7 @@ out:
* @set: nftables API set representation
* @data: Key data to be matched against existing elements
* @genmask: If set, check that element is active in given genmask
+ * @tstamp: timestamp to check for expired elements
*
* This is essentially the same as the lookup function, except that it matches
* key data against the uncommitted copy and doesn't use preallocated maps for
@@ -513,7 +514,8 @@ out:
*/
static struct nft_pipapo_elem *pipapo_get(const struct net *net,
const struct nft_set *set,
- const u8 *data, u8 genmask)
+ const u8 *data, u8 genmask,
+ u64 tstamp)
{
struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT);
struct nft_pipapo *priv = nft_set_priv(set);
@@ -566,7 +568,7 @@ next_match:
goto out;
if (last) {
- if (nft_set_elem_expired(&f->mt[b].e->ext))
+ if (__nft_set_elem_expired(&f->mt[b].e->ext, tstamp))
goto next_match;
if ((genmask &&
!nft_set_elem_active(&f->mt[b].e->ext, genmask)))
@@ -603,10 +605,10 @@ static struct nft_elem_priv *
nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
- static struct nft_pipapo_elem *e;
+ struct nft_pipapo_elem *e;
e = pipapo_get(net, set, (const u8 *)elem->key.val.data,
- nft_genmask_cur(net));
+ nft_genmask_cur(net), get_jiffies_64());
if (IS_ERR(e))
return ERR_CAST(e);
@@ -1109,6 +1111,25 @@ static void pipapo_map(struct nft_pipapo_match *m,
}
/**
+ * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address
+ * @m: Matching data
+ * @cpu: CPU number
+ */
+static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu)
+{
+ struct nft_pipapo_scratch *s;
+ void *mem;
+
+ s = *per_cpu_ptr(m->scratch, cpu);
+ if (!s)
+ return;
+
+ mem = s;
+ mem -= s->align_off;
+ kfree(mem);
+}
+
+/**
* pipapo_realloc_scratch() - Reallocate scratch maps for partial match results
* @clone: Copy of matching data with pending insertions and deletions
* @bsize_max: Maximum bucket size, scratch maps cover two buckets
@@ -1121,12 +1142,13 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
int i;
for_each_possible_cpu(i) {
- unsigned long *scratch;
+ struct nft_pipapo_scratch *scratch;
#ifdef NFT_PIPAPO_ALIGN
- unsigned long *scratch_aligned;
+ void *scratch_aligned;
+ u32 align_off;
#endif
-
- scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 +
+ scratch = kzalloc_node(struct_size(scratch, map,
+ bsize_max * 2) +
NFT_PIPAPO_ALIGN_HEADROOM,
GFP_KERNEL, cpu_to_node(i));
if (!scratch) {
@@ -1140,14 +1162,25 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
return -ENOMEM;
}
- kfree(*per_cpu_ptr(clone->scratch, i));
-
- *per_cpu_ptr(clone->scratch, i) = scratch;
+ pipapo_free_scratch(clone, i);
#ifdef NFT_PIPAPO_ALIGN
- scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch);
- *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned;
+ /* Align &scratch->map (not the struct itself): the extra
+ * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node()
+ * above guarantee we can waste up to those bytes in order
+ * to align the map field regardless of its offset within
+ * the struct.
+ */
+ BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM);
+
+ scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map);
+ scratch_aligned -= offsetof(struct nft_pipapo_scratch, map);
+ align_off = scratch_aligned - (void *)scratch;
+
+ scratch = scratch_aligned;
+ scratch->align_off = align_off;
#endif
+ *per_cpu_ptr(clone->scratch, i) = scratch;
}
return 0;
@@ -1173,6 +1206,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
struct nft_pipapo_match *m = priv->clone;
u8 genmask = nft_genmask_next(net);
struct nft_pipapo_elem *e, *dup;
+ u64 tstamp = nft_net_tstamp(net);
struct nft_pipapo_field *f;
const u8 *start_p, *end_p;
int i, bsize_max, err = 0;
@@ -1182,7 +1216,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
else
end = start;
- dup = pipapo_get(net, set, start, genmask);
+ dup = pipapo_get(net, set, start, genmask, tstamp);
if (!IS_ERR(dup)) {
/* Check if we already have the same exact entry */
const struct nft_data *dup_key, *dup_end;
@@ -1204,7 +1238,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
if (PTR_ERR(dup) == -ENOENT) {
/* Look for partially overlapping entries */
- dup = pipapo_get(net, set, end, nft_genmask_next(net));
+ dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp);
}
if (PTR_ERR(dup) != -ENOENT) {
@@ -1301,11 +1335,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
if (!new->scratch)
goto out_scratch;
-#ifdef NFT_PIPAPO_ALIGN
- new->scratch_aligned = alloc_percpu(*new->scratch_aligned);
- if (!new->scratch_aligned)
- goto out_scratch;
-#endif
for_each_possible_cpu(i)
*per_cpu_ptr(new->scratch, i) = NULL;
@@ -1357,10 +1386,7 @@ out_lt:
}
out_scratch_realloc:
for_each_possible_cpu(i)
- kfree(*per_cpu_ptr(new->scratch, i));
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(new->scratch_aligned);
-#endif
+ pipapo_free_scratch(new, i);
out_scratch:
free_percpu(new->scratch);
kfree(new);
@@ -1560,6 +1586,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
+ u64 tstamp = nft_net_tstamp(net);
int rules_f0, first_rule = 0;
struct nft_pipapo_elem *e;
struct nft_trans_gc *gc;
@@ -1594,10 +1621,10 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
/* synchronous gc never fails, there is no need to set on
* NFT_SET_ELEM_DEAD_BIT.
*/
- if (nft_set_elem_expired(&e->ext)) {
+ if (__nft_set_elem_expired(&e->ext, tstamp)) {
priv->dirty = true;
- gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
if (!gc)
return;
@@ -1640,13 +1667,9 @@ static void pipapo_free_match(struct nft_pipapo_match *m)
int i;
for_each_possible_cpu(i)
- kfree(*per_cpu_ptr(m->scratch, i));
+ pipapo_free_scratch(m, i);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
free_percpu(m->scratch);
-
pipapo_free_fields(m);
kfree(m);
@@ -1769,7 +1792,7 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
{
struct nft_pipapo_elem *e;
- e = pipapo_get(net, set, data, nft_genmask_next(net));
+ e = pipapo_get(net, set, data, nft_genmask_next(net), nft_net_tstamp(net));
if (IS_ERR(e))
return NULL;
@@ -2132,7 +2155,7 @@ static int nft_pipapo_init(const struct nft_set *set,
m->field_count = field_count;
m->bsize_max = 0;
- m->scratch = alloc_percpu(unsigned long *);
+ m->scratch = alloc_percpu(struct nft_pipapo_scratch *);
if (!m->scratch) {
err = -ENOMEM;
goto out_scratch;
@@ -2140,16 +2163,6 @@ static int nft_pipapo_init(const struct nft_set *set,
for_each_possible_cpu(i)
*per_cpu_ptr(m->scratch, i) = NULL;
-#ifdef NFT_PIPAPO_ALIGN
- m->scratch_aligned = alloc_percpu(unsigned long *);
- if (!m->scratch_aligned) {
- err = -ENOMEM;
- goto out_free;
- }
- for_each_possible_cpu(i)
- *per_cpu_ptr(m->scratch_aligned, i) = NULL;
-#endif
-
rcu_head_init(&m->rcu);
nft_pipapo_for_each_field(f, i, m) {
@@ -2180,9 +2193,6 @@ static int nft_pipapo_init(const struct nft_set *set,
return 0;
out_free:
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
free_percpu(m->scratch);
out_scratch:
kfree(m);
@@ -2236,11 +2246,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
nft_set_pipapo_match_destroy(ctx, set, m);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
for_each_possible_cpu(cpu)
- kfree(*per_cpu_ptr(m->scratch, cpu));
+ pipapo_free_scratch(m, cpu);
free_percpu(m->scratch);
pipapo_free_fields(m);
kfree(m);
@@ -2253,11 +2260,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
if (priv->dirty)
nft_set_pipapo_match_destroy(ctx, set, m);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(priv->clone->scratch_aligned);
-#endif
for_each_possible_cpu(cpu)
- kfree(*per_cpu_ptr(priv->clone->scratch, cpu));
+ pipapo_free_scratch(priv->clone, cpu);
free_percpu(priv->clone->scratch);
pipapo_free_fields(priv->clone);
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index 1040223da5fa..3842c7341a9f 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -131,20 +131,28 @@ struct nft_pipapo_field {
};
/**
+ * struct nft_pipapo_scratch - percpu data used for lookup and matching
+ * @map_index: Current working bitmap index, toggled between field matches
+ * @align_off: Offset to get the originally allocated address
+ * @map: store partial matching results during lookup
+ */
+struct nft_pipapo_scratch {
+ u8 map_index;
+ u32 align_off;
+ unsigned long map[];
+};
+
+/**
* struct nft_pipapo_match - Data used for lookup and matching
- * @field_count Amount of fields in set
+ * @field_count: Amount of fields in set
* @scratch: Preallocated per-CPU maps for partial matching results
- * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes
* @bsize_max: Maximum lookup table bucket size of all fields, in longs
- * @rcu Matching data is swapped on commits
+ * @rcu: Matching data is swapped on commits
* @f: Fields, with lookup and mapping tables
*/
struct nft_pipapo_match {
int field_count;
-#ifdef NFT_PIPAPO_ALIGN
- unsigned long * __percpu *scratch_aligned;
-#endif
- unsigned long * __percpu *scratch;
+ struct nft_pipapo_scratch * __percpu *scratch;
size_t bsize_max;
struct rcu_head rcu;
struct nft_pipapo_field f[] __counted_by(field_count);
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 52e0d026d30a..a3a8ddca9918 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -57,7 +57,7 @@
/* Jump to label if @reg is zero */
#define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \
- asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
+ asm goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
"je %l[" #label "]" : : : : label)
/* Store 256 bits from YMM register into memory. Contrary to bucket load
@@ -71,9 +71,6 @@
#define NFT_PIPAPO_AVX2_ZERO(reg) \
asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg)
-/* Current working bitmap index, toggled between field matches */
-static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index);
-
/**
* nft_pipapo_avx2_prepare() - Prepare before main algorithm body
*
@@ -1120,11 +1117,12 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
struct nft_pipapo *priv = nft_set_priv(set);
- unsigned long *res, *fill, *scratch;
+ struct nft_pipapo_scratch *scratch;
u8 genmask = nft_genmask_cur(net);
const u8 *rp = (const u8 *)key;
struct nft_pipapo_match *m;
struct nft_pipapo_field *f;
+ unsigned long *res, *fill;
bool map_index;
int i, ret = 0;
@@ -1141,15 +1139,16 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
*/
kernel_fpu_begin_mask(0);
- scratch = *raw_cpu_ptr(m->scratch_aligned);
+ scratch = *raw_cpu_ptr(m->scratch);
if (unlikely(!scratch)) {
kernel_fpu_end();
return false;
}
- map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index);
- res = scratch + (map_index ? m->bsize_max : 0);
- fill = scratch + (map_index ? 0 : m->bsize_max);
+ map_index = scratch->map_index;
+
+ res = scratch->map + (map_index ? m->bsize_max : 0);
+ fill = scratch->map + (map_index ? 0 : m->bsize_max);
/* Starting map doesn't need to be set for this implementation */
@@ -1221,7 +1220,7 @@ next_match:
out:
if (i % 2)
- raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index);
+ scratch->map_index = !map_index;
kernel_fpu_end();
return ret >= 0;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index baa3fea4fe65..9944fe479e53 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -234,7 +234,7 @@ static void nft_rbtree_gc_elem_remove(struct net *net, struct nft_set *set,
static const struct nft_rbtree_elem *
nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
- struct nft_rbtree_elem *rbe, u8 genmask)
+ struct nft_rbtree_elem *rbe)
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
@@ -253,7 +253,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
while (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
if (nft_rbtree_interval_end(rbe_prev) &&
- nft_set_elem_active(&rbe_prev->ext, genmask))
+ nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY))
break;
prev = rb_prev(prev);
@@ -313,6 +313,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree *priv = nft_set_priv(set);
u8 cur_genmask = nft_genmask_cur(net);
u8 genmask = nft_genmask_next(net);
+ u64 tstamp = nft_net_tstamp(net);
int d;
/* Descend the tree to search for an existing element greater than the
@@ -360,11 +361,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
/* perform garbage collection to avoid bogus overlap reports
* but skip new elements in this transaction.
*/
- if (nft_set_elem_expired(&rbe->ext) &&
+ if (__nft_set_elem_expired(&rbe->ext, tstamp) &&
nft_set_elem_active(&rbe->ext, cur_genmask)) {
const struct nft_rbtree_elem *removed_end;
- removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask);
+ removed_end = nft_rbtree_gc_elem(set, priv, rbe);
if (IS_ERR(removed_end))
return PTR_ERR(removed_end);
@@ -551,6 +552,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
const struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
u8 genmask = nft_genmask_next(net);
+ u64 tstamp = nft_net_tstamp(net);
int d;
while (parent != NULL) {
@@ -571,7 +573,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
nft_rbtree_interval_end(this)) {
parent = parent->rb_right;
continue;
- } else if (nft_set_elem_expired(&rbe->ext)) {
+ } else if (__nft_set_elem_expired(&rbe->ext, tstamp)) {
break;
} else if (!nft_set_elem_active(&rbe->ext, genmask)) {
parent = parent->rb_left;
@@ -624,9 +626,10 @@ static void nft_rbtree_gc(struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe, *rbe_end = NULL;
+ struct net *net = read_pnet(&set->net);
+ u64 tstamp = nft_net_tstamp(net);
struct rb_node *node, *next;
struct nft_trans_gc *gc;
- struct net *net;
set = nft_set_container_of(priv);
net = read_pnet(&set->net);
@@ -648,7 +651,7 @@ static void nft_rbtree_gc(struct nft_set *set)
rbe_end = rbe;
continue;
}
- if (!nft_set_elem_expired(&rbe->ext))
+ if (!__nft_set_elem_expired(&rbe->ext, tstamp))
continue;
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 9ed85be79452..f30163e2ca62 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -242,6 +242,11 @@ static int nft_socket_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
return nft_chain_validate_hooks(ctx->chain,
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN) |
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 13da882669a4..1d737f89dfc1 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -186,7 +186,6 @@ static int nft_synproxy_do_init(const struct nft_ctx *ctx,
break;
#endif
case NFPROTO_INET:
- case NFPROTO_BRIDGE:
err = nf_synproxy_ipv4_init(snet, ctx->net);
if (err)
goto nf_ct_failure;
@@ -219,7 +218,6 @@ static void nft_synproxy_do_destroy(const struct nft_ctx *ctx)
break;
#endif
case NFPROTO_INET:
- case NFPROTO_BRIDGE:
nf_synproxy_ipv4_fini(snet, ctx->net);
nf_synproxy_ipv6_fini(snet, ctx->net);
break;
@@ -253,6 +251,11 @@ static int nft_synproxy_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) |
(1 << NF_INET_FORWARD));
}
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index ae15cd693f0e..71412adb73d4 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -316,6 +316,11 @@ static int nft_tproxy_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING);
}
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 9f21953c7433..f735d79d8be5 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -713,6 +713,7 @@ static const struct nft_object_ops nft_tunnel_obj_ops = {
static struct nft_object_type nft_tunnel_obj_type __read_mostly = {
.type = NFT_OBJECT_TUNNEL,
+ .family = NFPROTO_NETDEV,
.ops = &nft_tunnel_obj_ops,
.maxattr = NFTA_TUNNEL_KEY_MAX,
.policy = nft_tunnel_key_policy,
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
index 452f8587adda..1c866757db55 100644
--- a/net/netfilter/nft_xfrm.c
+++ b/net/netfilter/nft_xfrm.c
@@ -235,6 +235,11 @@ static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *e
const struct nft_xfrm *priv = nft_expr_priv(expr);
unsigned int hooks;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
switch (priv->dir) {
case XFRM_POLICY_IN:
hooks = (1 << NF_INET_FORWARD) |
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index ec6ed6fda96c..343e65f377d4 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -59,7 +59,7 @@ physdev_mt(const struct sk_buff *skb, struct xt_action_param *par)
(!!outdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED)))
return false;
- physdev = nf_bridge_get_physindev(skb);
+ physdev = nf_bridge_get_physindev(skb, xt_net(par));
indev = physdev ? physdev->name : NULL;
if ((info->bitmask & XT_PHYSDEV_OP_ISIN &&
diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c
index f1d5b8465217..a07c2216d28b 100644
--- a/net/netlabel/netlabel_calipso.c
+++ b/net/netlabel/netlabel_calipso.c
@@ -54,6 +54,28 @@ static const struct nla_policy calipso_genl_policy[NLBL_CALIPSO_A_MAX + 1] = {
[NLBL_CALIPSO_A_MTYPE] = { .type = NLA_U32 },
};
+static const struct netlbl_calipso_ops *calipso_ops;
+
+/**
+ * netlbl_calipso_ops_register - Register the CALIPSO operations
+ * @ops: ops to register
+ *
+ * Description:
+ * Register the CALIPSO packet engine operations.
+ *
+ */
+const struct netlbl_calipso_ops *
+netlbl_calipso_ops_register(const struct netlbl_calipso_ops *ops)
+{
+ return xchg(&calipso_ops, ops);
+}
+EXPORT_SYMBOL(netlbl_calipso_ops_register);
+
+static const struct netlbl_calipso_ops *netlbl_calipso_ops_get(void)
+{
+ return READ_ONCE(calipso_ops);
+}
+
/* NetLabel Command Handlers
*/
/**
@@ -96,15 +118,18 @@ static int netlbl_calipso_add_pass(struct genl_info *info,
*
*/
static int netlbl_calipso_add(struct sk_buff *skb, struct genl_info *info)
-
{
int ret_val = -EINVAL;
struct netlbl_audit audit_info;
+ const struct netlbl_calipso_ops *ops = netlbl_calipso_ops_get();
if (!info->attrs[NLBL_CALIPSO_A_DOI] ||
!info->attrs[NLBL_CALIPSO_A_MTYPE])
return -EINVAL;
+ if (!ops)
+ return -EOPNOTSUPP;
+
netlbl_netlink_auditinfo(&audit_info);
switch (nla_get_u32(info->attrs[NLBL_CALIPSO_A_MTYPE])) {
case CALIPSO_MAP_PASS:
@@ -363,28 +388,6 @@ int __init netlbl_calipso_genl_init(void)
return genl_register_family(&netlbl_calipso_gnl_family);
}
-static const struct netlbl_calipso_ops *calipso_ops;
-
-/**
- * netlbl_calipso_ops_register - Register the CALIPSO operations
- * @ops: ops to register
- *
- * Description:
- * Register the CALIPSO packet engine operations.
- *
- */
-const struct netlbl_calipso_ops *
-netlbl_calipso_ops_register(const struct netlbl_calipso_ops *ops)
-{
- return xchg(&calipso_ops, ops);
-}
-EXPORT_SYMBOL(netlbl_calipso_ops_register);
-
-static const struct netlbl_calipso_ops *netlbl_calipso_ops_get(void)
-{
- return READ_ONCE(calipso_ops);
-}
-
/**
* calipso_doi_add - Add a new DOI to the CALIPSO protocol engine
* @doi_def: the DOI structure
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index eb086b06d60d..ff315351269f 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -167,7 +167,7 @@ static inline u32 netlink_group_mask(u32 group)
static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
gfp_t gfp_mask)
{
- unsigned int len = skb_end_offset(skb);
+ unsigned int len = skb->len;
struct sk_buff *new;
new = alloc_skb(len, gfp_mask);
@@ -374,7 +374,7 @@ static void netlink_skb_destructor(struct sk_buff *skb)
if (is_vmalloc_addr(skb->head)) {
if (!skb->cloned ||
!atomic_dec_return(&(skb_shinfo(skb)->dataref)))
- vfree(skb->head);
+ vfree_atomic(skb->head);
skb->head = NULL;
}
@@ -1204,8 +1204,7 @@ struct sock *netlink_getsockbyfilp(struct file *filp)
return sock;
}
-static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
- int broadcast)
+struct sk_buff *netlink_alloc_large_skb(unsigned int size, int broadcast)
{
struct sk_buff *skb;
void *data;
@@ -1520,8 +1519,7 @@ out:
int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb,
u32 portid,
u32 group, gfp_t allocation,
- int (*filter)(struct sock *dsk,
- struct sk_buff *skb, void *data),
+ netlink_filter_fn filter,
void *filter_data)
{
struct net *net = sock_net(ssk);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 9c7ffd10df2a..8c7af02f8454 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -631,6 +631,138 @@ static int genl_validate_ops(const struct genl_family *family)
return 0;
}
+static void *genl_sk_priv_alloc(struct genl_family *family)
+{
+ void *priv;
+
+ priv = kzalloc(family->sock_priv_size, GFP_KERNEL);
+ if (!priv)
+ return ERR_PTR(-ENOMEM);
+
+ if (family->sock_priv_init)
+ family->sock_priv_init(priv);
+
+ return priv;
+}
+
+static void genl_sk_priv_free(const struct genl_family *family, void *priv)
+{
+ if (family->sock_priv_destroy)
+ family->sock_priv_destroy(priv);
+ kfree(priv);
+}
+
+static int genl_sk_privs_alloc(struct genl_family *family)
+{
+ if (!family->sock_priv_size)
+ return 0;
+
+ family->sock_privs = kzalloc(sizeof(*family->sock_privs), GFP_KERNEL);
+ if (!family->sock_privs)
+ return -ENOMEM;
+ xa_init(family->sock_privs);
+ return 0;
+}
+
+static void genl_sk_privs_free(const struct genl_family *family)
+{
+ unsigned long id;
+ void *priv;
+
+ if (!family->sock_priv_size)
+ return;
+
+ xa_for_each(family->sock_privs, id, priv)
+ genl_sk_priv_free(family, priv);
+
+ xa_destroy(family->sock_privs);
+ kfree(family->sock_privs);
+}
+
+static void genl_sk_priv_free_by_sock(struct genl_family *family,
+ struct sock *sk)
+{
+ void *priv;
+
+ if (!family->sock_priv_size)
+ return;
+ priv = xa_erase(family->sock_privs, (unsigned long) sk);
+ if (!priv)
+ return;
+ genl_sk_priv_free(family, priv);
+}
+
+static void genl_release(struct sock *sk, unsigned long *groups)
+{
+ struct genl_family *family;
+ unsigned int id;
+
+ down_read(&cb_lock);
+
+ idr_for_each_entry(&genl_fam_idr, family, id)
+ genl_sk_priv_free_by_sock(family, sk);
+
+ up_read(&cb_lock);
+}
+
+/**
+ * __genl_sk_priv_get - Get family private pointer for socket, if exists
+ *
+ * @family: family
+ * @sk: socket
+ *
+ * Lookup a private memory for a Generic netlink family and specified socket.
+ *
+ * Caller should make sure this is called in RCU read locked section.
+ *
+ * Return: valid pointer on success, otherwise negative error value
+ * encoded by ERR_PTR(), NULL in case priv does not exist.
+ */
+void *__genl_sk_priv_get(struct genl_family *family, struct sock *sk)
+{
+ if (WARN_ON_ONCE(!family->sock_privs))
+ return ERR_PTR(-EINVAL);
+ return xa_load(family->sock_privs, (unsigned long) sk);
+}
+
+/**
+ * genl_sk_priv_get - Get family private pointer for socket
+ *
+ * @family: family
+ * @sk: socket
+ *
+ * Lookup a private memory for a Generic netlink family and specified socket.
+ * Allocate the private memory in case it was not already done.
+ *
+ * Return: valid pointer on success, otherwise negative error value
+ * encoded by ERR_PTR().
+ */
+void *genl_sk_priv_get(struct genl_family *family, struct sock *sk)
+{
+ void *priv, *old_priv;
+
+ priv = __genl_sk_priv_get(family, sk);
+ if (priv)
+ return priv;
+
+ /* priv for the family does not exist so far, create it. */
+
+ priv = genl_sk_priv_alloc(family);
+ if (IS_ERR(priv))
+ return ERR_CAST(priv);
+
+ old_priv = xa_cmpxchg(family->sock_privs, (unsigned long) sk, NULL,
+ priv, GFP_KERNEL);
+ if (old_priv) {
+ genl_sk_priv_free(family, priv);
+ if (xa_is_err(old_priv))
+ return ERR_PTR(xa_err(old_priv));
+ /* Race happened, priv for the socket was already inserted. */
+ return old_priv;
+ }
+ return priv;
+}
+
/**
* genl_register_family - register a generic netlink family
* @family: generic netlink family
@@ -659,6 +791,10 @@ int genl_register_family(struct genl_family *family)
goto errout_locked;
}
+ err = genl_sk_privs_alloc(family);
+ if (err)
+ goto errout_locked;
+
/*
* Sadly, a few cases need to be special-cased
* due to them having previously abused the API
@@ -679,7 +815,7 @@ int genl_register_family(struct genl_family *family)
start, end + 1, GFP_KERNEL);
if (family->id < 0) {
err = family->id;
- goto errout_locked;
+ goto errout_sk_privs_free;
}
err = genl_validate_assign_mc_groups(family);
@@ -698,6 +834,8 @@ int genl_register_family(struct genl_family *family)
errout_remove:
idr_remove(&genl_fam_idr, family->id);
+errout_sk_privs_free:
+ genl_sk_privs_free(family);
errout_locked:
genl_unlock_all();
return err;
@@ -728,6 +866,9 @@ int genl_unregister_family(const struct genl_family *family)
up_write(&cb_lock);
wait_event(genl_sk_destructing_waitq,
atomic_read(&genl_sk_destructing_cnt) == 0);
+
+ genl_sk_privs_free(family);
+
genl_unlock();
genl_ctrl_event(CTRL_CMD_DELFAMILY, family, NULL, 0);
@@ -1688,10 +1829,10 @@ static int genl_bind(struct net *net, int group)
continue;
grp = &family->mcgrps[i];
- if ((grp->flags & GENL_UNS_ADMIN_PERM) &&
+ if ((grp->flags & GENL_MCAST_CAP_NET_ADMIN) &&
!ns_capable(net->user_ns, CAP_NET_ADMIN))
ret = -EPERM;
- if (grp->cap_sys_admin &&
+ if ((grp->flags & GENL_MCAST_CAP_SYS_ADMIN) &&
!ns_capable(net->user_ns, CAP_SYS_ADMIN))
ret = -EPERM;
@@ -1708,6 +1849,7 @@ static int __net_init genl_pernet_init(struct net *net)
.input = genl_rcv,
.flags = NL_CFG_F_NONROOT_RECV,
.bind = genl_bind,
+ .release = genl_release,
};
/* we'll bump the group number right afterwards */
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 0eed00184adf..104a80b75477 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -453,16 +453,16 @@ static int nr_create(struct net *net, struct socket *sock, int protocol,
nr_init_timers(sk);
nr->t1 =
- msecs_to_jiffies(sysctl_netrom_transport_timeout);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_timeout));
nr->t2 =
- msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_acknowledge_delay));
nr->n2 =
- msecs_to_jiffies(sysctl_netrom_transport_maximum_tries);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries));
nr->t4 =
- msecs_to_jiffies(sysctl_netrom_transport_busy_delay);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay));
nr->idle =
- msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout);
- nr->window = sysctl_netrom_transport_requested_window_size;
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_no_activity_timeout));
+ nr->window = READ_ONCE(sysctl_netrom_transport_requested_window_size);
nr->bpqext = 1;
nr->state = NR_STATE_0;
@@ -954,7 +954,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
* G8PZT's Xrouter which is sending packets with command type 7
* as an extension of the protocol.
*/
- if (sysctl_netrom_reset_circuit &&
+ if (READ_ONCE(sysctl_netrom_reset_circuit) &&
(frametype != NR_RESET || flags != 0))
nr_transmit_reset(skb, 1);
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 3aaac4a22b38..2c34389c3ce6 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -81,7 +81,7 @@ static int nr_header(struct sk_buff *skb, struct net_device *dev,
buff[6] |= AX25_SSSID_SPARE;
buff += AX25_ADDR_LEN;
- *buff++ = sysctl_netrom_network_ttl_initialiser;
+ *buff++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
*buff++ = NR_PROTO_IP;
*buff++ = NR_PROTO_IP;
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 2f084b6f69d7..97944db6b5ac 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -97,7 +97,7 @@ static int nr_state1_machine(struct sock *sk, struct sk_buff *skb,
break;
case NR_RESET:
- if (sysctl_netrom_reset_circuit)
+ if (READ_ONCE(sysctl_netrom_reset_circuit))
nr_disconnect(sk, ECONNRESET);
break;
@@ -128,7 +128,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb,
break;
case NR_RESET:
- if (sysctl_netrom_reset_circuit)
+ if (READ_ONCE(sysctl_netrom_reset_circuit))
nr_disconnect(sk, ECONNRESET);
break;
@@ -262,7 +262,7 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype
break;
case NR_RESET:
- if (sysctl_netrom_reset_circuit)
+ if (READ_ONCE(sysctl_netrom_reset_circuit))
nr_disconnect(sk, ECONNRESET);
break;
diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c
index 44929657f5b7..5e531394a724 100644
--- a/net/netrom/nr_out.c
+++ b/net/netrom/nr_out.c
@@ -204,7 +204,7 @@ void nr_transmit_buffer(struct sock *sk, struct sk_buff *skb)
dptr[6] |= AX25_SSSID_SPARE;
dptr += AX25_ADDR_LEN;
- *dptr++ = sysctl_netrom_network_ttl_initialiser;
+ *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
if (!nr_route_frame(skb, NULL)) {
kfree_skb(skb);
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index baea3cbd76ca..70480869ad1c 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -153,7 +153,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
nr_neigh->digipeat = NULL;
nr_neigh->ax25 = NULL;
nr_neigh->dev = dev;
- nr_neigh->quality = sysctl_netrom_default_path_quality;
+ nr_neigh->quality = READ_ONCE(sysctl_netrom_default_path_quality);
nr_neigh->locked = 0;
nr_neigh->count = 0;
nr_neigh->number = nr_neigh_no++;
@@ -728,7 +728,7 @@ void nr_link_failed(ax25_cb *ax25, int reason)
nr_neigh->ax25 = NULL;
ax25_cb_put(ax25);
- if (++nr_neigh->failed < sysctl_netrom_link_fails_count) {
+ if (++nr_neigh->failed < READ_ONCE(sysctl_netrom_link_fails_count)) {
nr_neigh_put(nr_neigh);
return;
}
@@ -766,7 +766,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25)
if (ax25 != NULL) {
ret = nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat,
ax25->ax25_dev->dev, 0,
- sysctl_netrom_obsolescence_count_initialiser);
+ READ_ONCE(sysctl_netrom_obsolescence_count_initialiser));
if (ret)
return ret;
}
@@ -780,7 +780,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25)
return ret;
}
- if (!sysctl_netrom_routing_control && ax25 != NULL)
+ if (!READ_ONCE(sysctl_netrom_routing_control) && ax25 != NULL)
return 0;
/* Its Time-To-Live has expired */
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
index e2d2af924cff..c3bbd5880850 100644
--- a/net/netrom/nr_subr.c
+++ b/net/netrom/nr_subr.c
@@ -182,7 +182,8 @@ void nr_write_internal(struct sock *sk, int frametype)
*dptr++ = nr->my_id;
*dptr++ = frametype;
*dptr++ = nr->window;
- if (nr->bpqext) *dptr++ = sysctl_netrom_network_ttl_initialiser;
+ if (nr->bpqext)
+ *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
break;
case NR_DISCREQ:
@@ -236,7 +237,7 @@ void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags)
dptr[6] |= AX25_SSSID_SPARE;
dptr += AX25_ADDR_LEN;
- *dptr++ = sysctl_netrom_network_ttl_initialiser;
+ *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
if (mine) {
*dptr++ = 0;
diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c
index d63d2e5dc60c..dae378f1d52b 100644
--- a/net/nfc/digital_core.c
+++ b/net/nfc/digital_core.c
@@ -858,4 +858,5 @@ void nfc_digital_unregister_device(struct nfc_digital_dev *ddev)
}
EXPORT_SYMBOL(nfc_digital_unregister_device);
+MODULE_DESCRIPTION("NFC Digital protocol stack");
MODULE_LICENSE("GPL");
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 6c9592d05120..cdad47b140fa 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1208,6 +1208,10 @@ void nci_free_device(struct nci_dev *ndev)
{
nfc_free_device(ndev->nfc_dev);
nci_hci_deallocate(ndev);
+
+ /* drop partial rx data packet if present */
+ if (ndev->rx_data_reassembly)
+ kfree_skb(ndev->rx_data_reassembly);
kfree(ndev);
}
EXPORT_SYMBOL(nci_free_device);
@@ -1577,4 +1581,5 @@ static void nci_cmd_work(struct work_struct *work)
}
}
+MODULE_DESCRIPTION("NFC Controller Interface");
MODULE_LICENSE("GPL");
diff --git a/net/nfc/nci/spi.c b/net/nfc/nci/spi.c
index b68150c971d0..6a93533c480e 100644
--- a/net/nfc/nci/spi.c
+++ b/net/nfc/nci/spi.c
@@ -319,4 +319,5 @@ done:
}
EXPORT_SYMBOL_GPL(nci_spi_read);
+MODULE_DESCRIPTION("NFC Controller Interface (NCI) SPI link layer");
MODULE_LICENSE("GPL");
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 88965e2068ac..ebc5728aab4e 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -48,6 +48,7 @@ struct ovs_len_tbl {
#define OVS_ATTR_NESTED -1
#define OVS_ATTR_VARIABLE -2
+#define OVS_COPY_ACTIONS_MAX_DEPTH 16
static bool actions_may_change_flow(const struct nlattr *actions)
{
@@ -2545,13 +2546,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log);
+ u32 mpls_label_count, bool log,
+ u32 depth);
static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log, bool last)
+ u32 mpls_label_count, bool log, bool last,
+ u32 depth)
{
const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
const struct nlattr *probability, *actions;
@@ -2602,7 +2605,8 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
return err;
err = __ovs_nla_copy_actions(net, actions, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -2617,7 +2621,8 @@ static int validate_and_copy_dec_ttl(struct net *net,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log)
+ u32 mpls_label_count, bool log,
+ u32 depth)
{
const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1];
int start, action_start, err, rem;
@@ -2660,7 +2665,8 @@ static int validate_and_copy_dec_ttl(struct net *net,
return action_start;
err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type,
- vlan_tci, mpls_label_count, log);
+ vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -2674,7 +2680,8 @@ static int validate_and_copy_clone(struct net *net,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log, bool last)
+ u32 mpls_label_count, bool log, bool last,
+ u32 depth)
{
int start, err;
u32 exec;
@@ -2694,7 +2701,8 @@ static int validate_and_copy_clone(struct net *net,
return err;
err = __ovs_nla_copy_actions(net, attr, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -3063,7 +3071,7 @@ static int validate_and_copy_check_pkt_len(struct net *net,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
u32 mpls_label_count,
- bool log, bool last)
+ bool log, bool last, u32 depth)
{
const struct nlattr *acts_if_greater, *acts_if_lesser_eq;
struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1];
@@ -3111,7 +3119,8 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return nested_acts_start;
err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -3124,7 +3133,8 @@ static int validate_and_copy_check_pkt_len(struct net *net,
return nested_acts_start;
err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa,
- eth_type, vlan_tci, mpls_label_count, log);
+ eth_type, vlan_tci, mpls_label_count, log,
+ depth + 1);
if (err)
return err;
@@ -3152,12 +3162,16 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa,
__be16 eth_type, __be16 vlan_tci,
- u32 mpls_label_count, bool log)
+ u32 mpls_label_count, bool log,
+ u32 depth)
{
u8 mac_proto = ovs_key_mac_proto(key);
const struct nlattr *a;
int rem, err;
+ if (depth > OVS_COPY_ACTIONS_MAX_DEPTH)
+ return -EOVERFLOW;
+
nla_for_each_nested(a, attr, rem) {
/* Expected argument lengths, (u32)-1 for variable length. */
static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
@@ -3355,7 +3369,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_sample(net, a, key, sfa,
eth_type, vlan_tci,
mpls_label_count,
- log, last);
+ log, last, depth);
if (err)
return err;
skip_copy = true;
@@ -3426,7 +3440,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
err = validate_and_copy_clone(net, a, key, sfa,
eth_type, vlan_tci,
mpls_label_count,
- log, last);
+ log, last, depth);
if (err)
return err;
skip_copy = true;
@@ -3440,7 +3454,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
eth_type,
vlan_tci,
mpls_label_count,
- log, last);
+ log, last,
+ depth);
if (err)
return err;
skip_copy = true;
@@ -3450,7 +3465,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
case OVS_ACTION_ATTR_DEC_TTL:
err = validate_and_copy_dec_ttl(net, a, key, sfa,
eth_type, vlan_tci,
- mpls_label_count, log);
+ mpls_label_count, log,
+ depth);
if (err)
return err;
skip_copy = true;
@@ -3495,7 +3511,8 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
(*sfa)->orig_len = nla_len(attr);
err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
- key->eth.vlan.tci, mpls_label_count, log);
+ key->eth.vlan.tci, mpls_label_count, log,
+ 0);
if (err)
ovs_nla_free_flow_actions(*sfa);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 7adf48549a3b..c9bbc2686690 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2121,13 +2121,13 @@ static int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
+ enum skb_drop_reason drop_reason = SKB_CONSUMED;
struct sock *sk;
struct sockaddr_ll *sll;
struct packet_sock *po;
u8 *skb_head = skb->data;
int skb_len = skb->len;
unsigned int snaplen, res;
- bool is_drop_n_account = false;
if (skb->pkt_type == PACKET_LOOPBACK)
goto drop;
@@ -2217,9 +2217,9 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
return 0;
drop_n_acct:
- is_drop_n_account = true;
atomic_inc(&po->tp_drops);
atomic_inc(&sk->sk_drops);
+ drop_reason = SKB_DROP_REASON_PACKET_SOCK_ERROR;
drop_n_restore:
if (skb_head != skb->data && skb_shared(skb)) {
@@ -2227,16 +2227,14 @@ drop_n_restore:
skb->len = skb_len;
}
drop:
- if (!is_drop_n_account)
- consume_skb(skb);
- else
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
return 0;
}
static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
+ enum skb_drop_reason drop_reason = SKB_CONSUMED;
struct sock *sk;
struct packet_sock *po;
struct sockaddr_ll *sll;
@@ -2250,7 +2248,6 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
struct sk_buff *copy_skb = NULL;
struct timespec64 ts;
__u32 ts_status;
- bool is_drop_n_account = false;
unsigned int slot_id = 0;
int vnet_hdr_sz = 0;
@@ -2498,19 +2495,16 @@ drop_n_restore:
skb->len = skb_len;
}
drop:
- if (!is_drop_n_account)
- consume_skb(skb);
- else
- kfree_skb(skb);
+ kfree_skb_reason(skb, drop_reason);
return 0;
drop_n_account:
spin_unlock(&sk->sk_receive_queue.lock);
atomic_inc(&po->tp_drops);
- is_drop_n_account = true;
+ drop_reason = SKB_DROP_REASON_PACKET_SOCK_ERROR;
sk->sk_data_ready(sk);
- kfree_skb(copy_skb);
+ kfree_skb_reason(copy_skb, drop_reason);
goto drop_n_restore;
}
@@ -4787,5 +4781,6 @@ out:
module_init(packet_init);
module_exit(packet_exit);
+MODULE_DESCRIPTION("Packet socket support (AF_PACKET)");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NETPROTO(PF_PACKET);
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 3aa50dc7535b..976fe250b509 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -34,10 +34,10 @@ static int pn_ioctl(struct sock *sk, int cmd, int *karg)
switch (cmd) {
case SIOCINQ:
- lock_sock(sk);
+ spin_lock_bh(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
*karg = skb ? skb->len : 0;
- release_sock(sk);
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
return 0;
case SIOCPNADDRESOURCE:
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index faba31f2eff2..3dd5f52bc1b5 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -917,6 +917,37 @@ static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len)
return 0;
}
+static unsigned int pep_first_packet_length(struct sock *sk)
+{
+ struct pep_sock *pn = pep_sk(sk);
+ struct sk_buff_head *q;
+ struct sk_buff *skb;
+ unsigned int len = 0;
+ bool found = false;
+
+ if (sock_flag(sk, SOCK_URGINLINE)) {
+ q = &pn->ctrlreq_queue;
+ spin_lock_bh(&q->lock);
+ skb = skb_peek(q);
+ if (skb) {
+ len = skb->len;
+ found = true;
+ }
+ spin_unlock_bh(&q->lock);
+ }
+
+ if (likely(!found)) {
+ q = &sk->sk_receive_queue;
+ spin_lock_bh(&q->lock);
+ skb = skb_peek(q);
+ if (skb)
+ len = skb->len;
+ spin_unlock_bh(&q->lock);
+ }
+
+ return len;
+}
+
static int pep_ioctl(struct sock *sk, int cmd, int *karg)
{
struct pep_sock *pn = pep_sk(sk);
@@ -929,15 +960,7 @@ static int pep_ioctl(struct sock *sk, int cmd, int *karg)
break;
}
- lock_sock(sk);
- if (sock_flag(sk, SOCK_URGINLINE) &&
- !skb_queue_empty(&pn->ctrlreq_queue))
- *karg = skb_peek(&pn->ctrlreq_queue)->len;
- else if (!skb_queue_empty(&sk->sk_receive_queue))
- *karg = skb_peek(&sk->sk_receive_queue)->len;
- else
- *karg = 0;
- release_sock(sk);
+ *karg = pep_first_packet_length(sk);
ret = 0;
break;
diff --git a/net/psample/psample.c b/net/psample/psample.c
index c34e902855db..ddd211a151d0 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -32,7 +32,7 @@ enum psample_nl_multicast_groups {
static const struct genl_multicast_group psample_nl_mcgrps[] = {
[PSAMPLE_NL_MCGRP_CONFIG] = { .name = PSAMPLE_NL_MCGRP_CONFIG_NAME },
[PSAMPLE_NL_MCGRP_SAMPLE] = { .name = PSAMPLE_NL_MCGRP_SAMPLE_NAME,
- .flags = GENL_UNS_ADMIN_PERM },
+ .flags = GENL_MCAST_CAP_NET_ADMIN, },
};
static struct genl_family psample_nl_family __ro_after_init;
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 01c4cdfef45d..8435a20968ef 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -419,7 +419,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, sockptr_t optval,
rs->rs_rx_traces = trace.rx_traces;
for (i = 0; i < rs->rs_rx_traces; i++) {
- if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) {
+ if (trace.rx_trace_pos[i] >= RDS_MSG_RX_DGRAM_TRACE_MAX) {
rs->rs_rx_traces = 0;
return -EFAULT;
}
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index fba82d36593a..a4e3c5de998b 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -301,6 +301,9 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
kfree(sg);
}
ret = PTR_ERR(trans_private);
+ /* Trigger connection so that its ready for the next retry */
+ if (ret == -ENODEV)
+ rds_conn_connect_if_down(cp->cp_conn);
goto out;
}
diff --git a/net/rds/recv.c b/net/rds/recv.c
index c71b923764fd..5627f80013f8 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -425,6 +425,7 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
struct sock *sk = rds_rs_to_sk(rs);
int ret = 0;
unsigned long flags;
+ struct rds_incoming *to_drop = NULL;
write_lock_irqsave(&rs->rs_recv_lock, flags);
if (!list_empty(&inc->i_item)) {
@@ -435,11 +436,14 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
-be32_to_cpu(inc->i_hdr.h_len),
inc->i_hdr.h_dport);
list_del_init(&inc->i_item);
- rds_inc_put(inc);
+ to_drop = inc;
}
}
write_unlock_irqrestore(&rs->rs_recv_lock, flags);
+ if (to_drop)
+ rds_inc_put(to_drop);
+
rdsdebug("inc %p rs %p still %d dropped %d\n", inc, rs, ret, drop);
return ret;
}
@@ -758,16 +762,21 @@ void rds_clear_recv_queue(struct rds_sock *rs)
struct sock *sk = rds_rs_to_sk(rs);
struct rds_incoming *inc, *tmp;
unsigned long flags;
+ LIST_HEAD(to_drop);
write_lock_irqsave(&rs->rs_recv_lock, flags);
list_for_each_entry_safe(inc, tmp, &rs->rs_recv_queue, i_item) {
rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
-be32_to_cpu(inc->i_hdr.h_len),
inc->i_hdr.h_dport);
+ list_move(&inc->i_item, &to_drop);
+ }
+ write_unlock_irqrestore(&rs->rs_recv_lock, flags);
+
+ list_for_each_entry_safe(inc, tmp, &to_drop, i_item) {
list_del_init(&inc->i_item);
rds_inc_put(inc);
}
- write_unlock_irqrestore(&rs->rs_recv_lock, flags);
}
/*
diff --git a/net/rds/send.c b/net/rds/send.c
index 5e57a1581dc6..2899def23865 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1313,12 +1313,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
/* Parse any control messages the user may have included. */
ret = rds_cmsg_send(rs, rm, msg, &allocated_mr, &vct);
- if (ret) {
- /* Trigger connection so that its ready for the next retry */
- if (ret == -EAGAIN)
- rds_conn_connect_if_down(conn);
+ if (ret)
goto out;
- }
if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) {
printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 53b3535a1e4a..05008ce5c421 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -165,7 +165,7 @@ int rds_tcp_accept_one(struct socket *sock)
struct ipv6_pinfo *inet6;
inet6 = inet6_sk(new_sock->sk);
- dev_if = inet6->mcast_oif;
+ dev_if = READ_ONCE(inet6->mcast_oif);
} else {
dev_if = new_sock->sk->sk_bound_dev_if;
}
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 14cc8fe8584b..c3feb4f49d09 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1351,11 +1351,11 @@ static long rfkill_fop_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct rfkill_data *data = file->private_data;
- int ret = -ENOSYS;
+ int ret = -ENOTTY;
u32 size;
if (_IOC_TYPE(cmd) != RFKILL_IOC_MAGIC)
- return -ENOSYS;
+ return -ENOTTY;
mutex_lock(&data->mtx);
switch (_IOC_NR(cmd)) {
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index fa8aec78f63d..465bfe5eb061 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -259,15 +259,61 @@ static int rxrpc_listen(struct socket *sock, int backlog)
}
/**
+ * rxrpc_kernel_lookup_peer - Obtain remote transport endpoint for an address
+ * @sock: The socket through which it will be accessed
+ * @srx: The network address
+ * @gfp: Allocation flags
+ *
+ * Lookup or create a remote transport endpoint record for the specified
+ * address and return it with a ref held.
+ */
+struct rxrpc_peer *rxrpc_kernel_lookup_peer(struct socket *sock,
+ struct sockaddr_rxrpc *srx, gfp_t gfp)
+{
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ int ret;
+
+ ret = rxrpc_validate_address(rx, srx, sizeof(*srx));
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ return rxrpc_lookup_peer(rx->local, srx, gfp);
+}
+EXPORT_SYMBOL(rxrpc_kernel_lookup_peer);
+
+/**
+ * rxrpc_kernel_get_peer - Get a reference on a peer
+ * @peer: The peer to get a reference on.
+ *
+ * Get a record for the remote peer in a call.
+ */
+struct rxrpc_peer *rxrpc_kernel_get_peer(struct rxrpc_peer *peer)
+{
+ return peer ? rxrpc_get_peer(peer, rxrpc_peer_get_application) : NULL;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_peer);
+
+/**
+ * rxrpc_kernel_put_peer - Allow a kernel app to drop a peer reference
+ * @peer: The peer to drop a ref on
+ */
+void rxrpc_kernel_put_peer(struct rxrpc_peer *peer)
+{
+ rxrpc_put_peer(peer, rxrpc_peer_put_application);
+}
+EXPORT_SYMBOL(rxrpc_kernel_put_peer);
+
+/**
* rxrpc_kernel_begin_call - Allow a kernel service to begin a call
* @sock: The socket on which to make the call
- * @srx: The address of the peer to contact
+ * @peer: The peer to contact
* @key: The security context to use (defaults to socket setting)
* @user_call_ID: The ID to use
* @tx_total_len: Total length of data to transmit during the call (or -1)
* @hard_timeout: The maximum lifespan of the call in sec
* @gfp: The allocation constraints
* @notify_rx: Where to send notifications instead of socket queue
+ * @service_id: The ID of the service to contact
* @upgrade: Request service upgrade for call
* @interruptibility: The call is interruptible, or can be canceled.
* @debug_id: The debug ID for tracing to be assigned to the call
@@ -280,13 +326,14 @@ static int rxrpc_listen(struct socket *sock, int backlog)
* supplying @srx and @key.
*/
struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
- struct sockaddr_rxrpc *srx,
+ struct rxrpc_peer *peer,
struct key *key,
unsigned long user_call_ID,
s64 tx_total_len,
u32 hard_timeout,
gfp_t gfp,
rxrpc_notify_rx_t notify_rx,
+ u16 service_id,
bool upgrade,
enum rxrpc_interruptibility interruptibility,
unsigned int debug_id)
@@ -295,13 +342,11 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
struct rxrpc_call_params p;
struct rxrpc_call *call;
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
- int ret;
_enter(",,%x,%lx", key_serial(key), user_call_ID);
- ret = rxrpc_validate_address(rx, srx, sizeof(*srx));
- if (ret < 0)
- return ERR_PTR(ret);
+ if (WARN_ON_ONCE(peer->local != rx->local))
+ return ERR_PTR(-EIO);
lock_sock(&rx->sk);
@@ -319,12 +364,13 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
memset(&cp, 0, sizeof(cp));
cp.local = rx->local;
+ cp.peer = peer;
cp.key = key;
cp.security_level = rx->min_sec_level;
cp.exclusive = false;
cp.upgrade = upgrade;
- cp.service_id = srx->srx_service;
- call = rxrpc_new_client_call(rx, &cp, srx, &p, gfp, debug_id);
+ cp.service_id = service_id;
+ call = rxrpc_new_client_call(rx, &cp, &p, gfp, debug_id);
/* The socket has been unlocked. */
if (!IS_ERR(call)) {
call->notify_rx = notify_rx;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index e8e14c6f904d..7818aae1be8e 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -68,6 +68,7 @@ struct rxrpc_net {
atomic_t nr_calls; /* Count of allocated calls */
atomic_t nr_conns;
+ struct list_head bundle_proc_list; /* List of bundles for proc */
struct list_head conn_proc_list; /* List of conns in this namespace for proc */
struct list_head service_conns; /* Service conns in this namespace */
rwlock_t conn_lock; /* Lock for ->conn_proc_list, ->service_conns */
@@ -198,11 +199,19 @@ struct rxrpc_host_header {
*/
struct rxrpc_skb_priv {
struct rxrpc_connection *conn; /* Connection referred to (poke packet) */
- u16 offset; /* Offset of data */
- u16 len; /* Length of data */
- u8 flags;
+ union {
+ struct {
+ u16 offset; /* Offset of data */
+ u16 len; /* Length of data */
+ u8 flags;
#define RXRPC_RX_VERIFIED 0x01
-
+ };
+ struct {
+ rxrpc_seq_t first_ack; /* First packet in acks table */
+ u8 nr_acks; /* Number of acks+nacks */
+ u8 nr_nacks; /* Number of nacks */
+ };
+ };
struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */
};
@@ -364,6 +373,7 @@ struct rxrpc_conn_proto {
struct rxrpc_conn_parameters {
struct rxrpc_local *local; /* Representation of local endpoint */
+ struct rxrpc_peer *peer; /* Representation of remote endpoint */
struct key *key; /* Security details */
bool exclusive; /* T if conn is exclusive */
bool upgrade; /* T if service ID can be upgraded */
@@ -431,6 +441,7 @@ struct rxrpc_bundle {
struct rxrpc_local *local; /* Representation of local endpoint */
struct rxrpc_peer *peer; /* Remote endpoint */
struct key *key; /* Security details */
+ struct list_head proc_link; /* Link in net->bundle_proc_list */
const struct rxrpc_security *security; /* applied security module */
refcount_t ref;
atomic_t active; /* Number of active users */
@@ -444,6 +455,7 @@ struct rxrpc_bundle {
struct rb_node local_node; /* Node in local->client_conns */
struct list_head waiting_calls; /* Calls waiting for channels */
unsigned long avail_chans; /* Mask of available channels */
+ unsigned int conn_ids[4]; /* Connection IDs. */
struct rxrpc_connection *conns[4]; /* The connections in the bundle (max 4) */
};
@@ -506,7 +518,7 @@ struct rxrpc_connection {
enum rxrpc_call_completion completion; /* Completion condition */
s32 abort_code; /* Abort code of connection abort */
int debug_id; /* debug ID for printks */
- atomic_t serial; /* packet serial number counter */
+ rxrpc_serial_t tx_serial; /* Outgoing packet serial number counter */
unsigned int hi_serial; /* highest serial number received */
u32 service_id; /* Service ID, possibly upgraded */
u32 security_level; /* Security level selected */
@@ -688,11 +700,11 @@ struct rxrpc_call {
u8 cong_dup_acks; /* Count of ACKs showing missing packets */
u8 cong_cumul_acks; /* Cumulative ACK count */
ktime_t cong_tstamp; /* Last time cwnd was changed */
+ struct sk_buff *cong_last_nack; /* Last ACK with nacks received */
/* Receive-phase ACK management (ACKs we send). */
u8 ackr_reason; /* reason to ACK */
u16 ackr_sack_base; /* Starting slot in SACK table ring */
- rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
rxrpc_seq_t ackr_window; /* Base of SACK window */
rxrpc_seq_t ackr_wtop; /* Base of SACK window */
unsigned int ackr_nr_unacked; /* Number of unacked packets */
@@ -726,7 +738,8 @@ struct rxrpc_call {
struct rxrpc_ack_summary {
u16 nr_acks; /* Number of ACKs in packet */
u16 nr_new_acks; /* Number of new ACKs in packet */
- u16 nr_rot_new_acks; /* Number of rotated new ACKs */
+ u16 nr_new_nacks; /* Number of new nacks in packet */
+ u16 nr_retained_nacks; /* Number of nacks retained between ACKs */
u8 ack_reason;
bool saw_nacks; /* Saw NACKs in packet */
bool new_low_nack; /* T if new low NACK found */
@@ -819,6 +832,20 @@ static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb)
#include <trace/events/rxrpc.h>
/*
+ * Allocate the next serial number on a connection. 0 must be skipped.
+ */
+static inline rxrpc_serial_t rxrpc_get_next_serial(struct rxrpc_connection *conn)
+{
+ rxrpc_serial_t serial;
+
+ serial = conn->tx_serial;
+ if (serial == 0)
+ serial = 1;
+ conn->tx_serial = serial + 1;
+ return serial;
+}
+
+/*
* af_rxrpc.c
*/
extern atomic_t rxrpc_n_rx_skbs;
@@ -867,7 +894,6 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long
struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int);
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
struct rxrpc_conn_parameters *,
- struct sockaddr_rxrpc *,
struct rxrpc_call_params *, gfp_t,
unsigned int);
void rxrpc_start_call_timer(struct rxrpc_call *call);
@@ -1076,6 +1102,7 @@ void rxrpc_send_version_request(struct rxrpc_local *local,
/*
* local_object.c
*/
+void rxrpc_local_dont_fragment(const struct rxrpc_local *local, bool set);
struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc *);
struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *, enum rxrpc_local_trace);
struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *, enum rxrpc_local_trace);
@@ -1167,6 +1194,7 @@ void rxrpc_put_peer(struct rxrpc_peer *, enum rxrpc_peer_trace);
*/
extern const struct seq_operations rxrpc_call_seq_ops;
extern const struct seq_operations rxrpc_connection_seq_ops;
+extern const struct seq_operations rxrpc_bundle_seq_ops;
extern const struct seq_operations rxrpc_peer_seq_ops;
extern const struct seq_operations rxrpc_local_seq_ops;
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index e363f21a2014..0f78544d043b 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -43,8 +43,6 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial,
unsigned long expiry = rxrpc_soft_ack_delay;
unsigned long now = jiffies, ack_at;
- call->ackr_serial = serial;
-
if (rxrpc_soft_ack_delay < expiry)
expiry = rxrpc_soft_ack_delay;
if (call->peer->srtt_us != 0)
@@ -114,6 +112,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
{
struct rxrpc_ackpacket *ack = NULL;
+ struct rxrpc_skb_priv *sp;
struct rxrpc_txbuf *txb;
unsigned long resend_at;
rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted);
@@ -141,14 +140,15 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
* explicitly NAK'd packets.
*/
if (ack_skb) {
+ sp = rxrpc_skb(ack_skb);
ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header);
- for (i = 0; i < ack->nAcks; i++) {
+ for (i = 0; i < sp->nr_acks; i++) {
rxrpc_seq_t seq;
if (ack->acks[i] & 1)
continue;
- seq = ntohl(ack->firstPacket) + i;
+ seq = sp->first_ack + i;
if (after(txb->seq, transmitted))
break;
if (after(txb->seq, seq))
@@ -373,7 +373,6 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call)
bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
{
unsigned long now, next, t;
- rxrpc_serial_t ackr_serial;
bool resend = false, expired = false;
s32 abort_code;
@@ -423,8 +422,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
if (time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now);
cmpxchg(&call->delay_ack_at, t, now + MAX_JIFFY_OFFSET);
- ackr_serial = xchg(&call->ackr_serial, 0);
- rxrpc_send_ACK(call, RXRPC_ACK_DELAY, ackr_serial,
+ rxrpc_send_ACK(call, RXRPC_ACK_DELAY, 0,
rxrpc_propose_ack_ping_for_lost_ack);
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 773eecd1e979..9fc9a6c3f685 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -193,7 +193,6 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
* Allocate a new client call.
*/
static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
- struct sockaddr_rxrpc *srx,
struct rxrpc_conn_parameters *cp,
struct rxrpc_call_params *p,
gfp_t gfp,
@@ -211,10 +210,12 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
now = ktime_get_real();
call->acks_latest_ts = now;
call->cong_tstamp = now;
- call->dest_srx = *srx;
+ call->dest_srx = cp->peer->srx;
+ call->dest_srx.srx_service = cp->service_id;
call->interruptibility = p->interruptibility;
call->tx_total_len = p->tx_total_len;
call->key = key_get(cp->key);
+ call->peer = rxrpc_get_peer(cp->peer, rxrpc_peer_get_call);
call->local = rxrpc_get_local(cp->local, rxrpc_local_get_call);
call->security_level = cp->security_level;
if (p->kernel)
@@ -306,10 +307,6 @@ static int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp)
_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
- call->peer = rxrpc_lookup_peer(local, &call->dest_srx, gfp);
- if (!call->peer)
- goto error;
-
ret = rxrpc_look_up_bundle(call, gfp);
if (ret < 0)
goto error;
@@ -334,7 +331,6 @@ error:
*/
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
struct rxrpc_conn_parameters *cp,
- struct sockaddr_rxrpc *srx,
struct rxrpc_call_params *p,
gfp_t gfp,
unsigned int debug_id)
@@ -349,13 +345,18 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
_enter("%p,%lx", rx, p->user_call_ID);
+ if (WARN_ON_ONCE(!cp->peer)) {
+ release_sock(&rx->sk);
+ return ERR_PTR(-EIO);
+ }
+
limiter = rxrpc_get_call_slot(p, gfp);
if (!limiter) {
release_sock(&rx->sk);
return ERR_PTR(-ERESTARTSYS);
}
- call = rxrpc_alloc_client_call(rx, srx, cp, p, gfp, debug_id);
+ call = rxrpc_alloc_client_call(rx, cp, p, gfp, debug_id);
if (IS_ERR(call)) {
release_sock(&rx->sk);
up(limiter);
@@ -545,8 +546,8 @@ void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
*/
static void rxrpc_cleanup_ring(struct rxrpc_call *call)
{
- skb_queue_purge(&call->recvmsg_queue);
- skb_queue_purge(&call->rx_oos_queue);
+ rxrpc_purge_queue(&call->recvmsg_queue);
+ rxrpc_purge_queue(&call->rx_oos_queue);
}
/*
@@ -685,6 +686,7 @@ static void rxrpc_destroy_call(struct work_struct *work)
del_timer_sync(&call->timer);
+ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
rxrpc_cleanup_ring(call);
while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
struct rxrpc_txbuf, call_link))) {
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 1d95f8bc769f..3b9b267a4431 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -91,6 +91,10 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call,
atomic_set(&bundle->active, 1);
INIT_LIST_HEAD(&bundle->waiting_calls);
trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_new);
+
+ write_lock(&bundle->local->rxnet->conn_lock);
+ list_add_tail(&bundle->proc_link, &bundle->local->rxnet->bundle_proc_list);
+ write_unlock(&bundle->local->rxnet->conn_lock);
}
return bundle;
}
@@ -109,6 +113,9 @@ static void rxrpc_free_bundle(struct rxrpc_bundle *bundle)
{
trace_rxrpc_bundle(bundle->debug_id, refcount_read(&bundle->ref),
rxrpc_bundle_free);
+ write_lock(&bundle->local->rxnet->conn_lock);
+ list_del(&bundle->proc_link);
+ write_unlock(&bundle->local->rxnet->conn_lock);
rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle);
key_put(bundle->key);
kfree(bundle);
@@ -338,6 +345,7 @@ static bool rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle,
old = bundle->conns[slot];
if (old) {
bundle->conns[slot] = NULL;
+ bundle->conn_ids[slot] = 0;
trace_rxrpc_client(old, -1, rxrpc_client_replace);
rxrpc_put_connection(old, rxrpc_conn_put_noreuse);
}
@@ -351,6 +359,7 @@ static bool rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle,
rxrpc_activate_bundle(bundle);
conn->bundle_shift = shift;
bundle->conns[slot] = conn;
+ bundle->conn_ids[slot] = conn->debug_id;
for (i = 0; i < RXRPC_MAXCALLS; i++)
set_bit(shift + i, &bundle->avail_chans);
return true;
@@ -671,6 +680,7 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
if (bundle->conns[bindex] == conn) {
_debug("clear slot %u", bindex);
bundle->conns[bindex] = NULL;
+ bundle->conn_ids[bindex] = 0;
for (i = 0; i < RXRPC_MAXCALLS; i++)
clear_bit(conn->bundle_shift + i, &bundle->avail_chans);
rxrpc_put_client_connection_id(bundle->local, conn);
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 95f4bc206b3d..1f251d758cb9 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -95,6 +95,14 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
_enter("%d", conn->debug_id);
+ if (sp && sp->hdr.type == RXRPC_PACKET_TYPE_ACK) {
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &pkt.ack, sizeof(pkt.ack)) < 0)
+ return;
+ if (pkt.ack.reason == RXRPC_ACK_PING_RESPONSE)
+ return;
+ }
+
chan = &conn->channels[channel];
/* If the last call got moved on whilst we were waiting to run, just
@@ -117,7 +125,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
iov[2].iov_base = &ack_info;
iov[2].iov_len = sizeof(ack_info);
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
pkt.whdr.epoch = htonl(conn->proto.epoch);
pkt.whdr.cid = htonl(conn->proto.cid | channel);
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index 89ac05a711a4..39c908a3ca6e 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -25,7 +25,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer,
struct rxrpc_conn_proto k;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rb_node *p;
- unsigned int seq = 0;
+ unsigned int seq = 1;
k.epoch = sp->hdr.epoch;
k.cid = sp->hdr.cid & RXRPC_CIDMASK;
@@ -35,6 +35,7 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer,
* under just the RCU read lock, so we have to check for
* changes.
*/
+ seq++; /* 2 on the 1st/lockless path, otherwise odd */
read_seqbegin_or_lock(&peer->service_conn_lock, &seq);
p = rcu_dereference_raw(peer->service_conns.rb_node);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 92495e73b869..9691de00ade7 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -45,11 +45,9 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
}
cumulative_acks += summary->nr_new_acks;
- cumulative_acks += summary->nr_rot_new_acks;
if (cumulative_acks > 255)
cumulative_acks = 255;
- summary->mode = call->cong_mode;
summary->cwnd = call->cong_cwnd;
summary->ssthresh = call->cong_ssthresh;
summary->cumulative_acks = cumulative_acks;
@@ -151,6 +149,7 @@ out_no_clear_ca:
cwnd = RXRPC_TX_MAX_WINDOW;
call->cong_cwnd = cwnd;
call->cong_cumul_acks = cumulative_acks;
+ summary->mode = call->cong_mode;
trace_rxrpc_congest(call, summary, acked_serial, change);
if (resend)
rxrpc_resend(call, skb);
@@ -213,7 +212,6 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) {
if (before_eq(txb->seq, call->acks_hard_ack))
continue;
- summary->nr_rot_new_acks++;
if (test_bit(RXRPC_TXBUF_LAST, &txb->flags)) {
set_bit(RXRPC_CALL_TX_LAST, &call->flags);
rot_last = true;
@@ -254,6 +252,11 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
{
ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
+ if (unlikely(call->cong_last_nack)) {
+ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
+ call->cong_last_nack = NULL;
+ }
+
switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
case RXRPC_CALL_CLIENT_AWAIT_REPLY:
@@ -703,6 +706,43 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
}
/*
+ * Determine how many nacks from the previous ACK have now been satisfied.
+ */
+static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ rxrpc_seq_t seq)
+{
+ struct sk_buff *skb = call->cong_last_nack;
+ struct rxrpc_ackpacket ack;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ unsigned int i, new_acks = 0, retained_nacks = 0;
+ rxrpc_seq_t old_seq = sp->first_ack;
+ u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(ack);
+
+ if (after_eq(seq, old_seq + sp->nr_acks)) {
+ summary->nr_new_acks += sp->nr_nacks;
+ summary->nr_new_acks += seq - (old_seq + sp->nr_acks);
+ summary->nr_retained_nacks = 0;
+ } else if (seq == old_seq) {
+ summary->nr_retained_nacks = sp->nr_nacks;
+ } else {
+ for (i = 0; i < sp->nr_acks; i++) {
+ if (acks[i] == RXRPC_ACK_TYPE_NACK) {
+ if (before(old_seq + i, seq))
+ new_acks++;
+ else
+ retained_nacks++;
+ }
+ }
+
+ summary->nr_new_acks += new_acks;
+ summary->nr_retained_nacks = retained_nacks;
+ }
+
+ return old_seq + sp->nr_acks;
+}
+
+/*
* Process individual soft ACKs.
*
* Each ACK in the array corresponds to one packet and can be either an ACK or
@@ -711,25 +751,51 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
* the timer on the basis that the peer might just not have processed them at
* the time the ACK was sent.
*/
-static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
- rxrpc_seq_t seq, int nr_acks,
- struct rxrpc_ack_summary *summary)
+static void rxrpc_input_soft_acks(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ struct sk_buff *skb,
+ rxrpc_seq_t seq,
+ rxrpc_seq_t since)
{
- unsigned int i;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ unsigned int i, old_nacks = 0;
+ rxrpc_seq_t lowest_nak = seq + sp->nr_acks;
+ u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
- for (i = 0; i < nr_acks; i++) {
+ for (i = 0; i < sp->nr_acks; i++) {
if (acks[i] == RXRPC_ACK_TYPE_ACK) {
summary->nr_acks++;
- summary->nr_new_acks++;
+ if (after_eq(seq, since))
+ summary->nr_new_acks++;
} else {
- if (!summary->saw_nacks &&
- call->acks_lowest_nak != seq + i) {
- call->acks_lowest_nak = seq + i;
- summary->new_low_nack = true;
- }
summary->saw_nacks = true;
+ if (before(seq, since)) {
+ /* Overlap with previous ACK */
+ old_nacks++;
+ } else {
+ summary->nr_new_nacks++;
+ sp->nr_nacks++;
+ }
+
+ if (before(seq, lowest_nak))
+ lowest_nak = seq;
}
+ seq++;
+ }
+
+ if (lowest_nak != call->acks_lowest_nak) {
+ call->acks_lowest_nak = lowest_nak;
+ summary->new_low_nack = true;
}
+
+ /* We *can* have more nacks than we did - the peer is permitted to drop
+ * packets it has soft-acked and re-request them. Further, it is
+ * possible for the nack distribution to change whilst the number of
+ * nacks stays the same or goes down.
+ */
+ if (old_nacks < summary->nr_retained_nacks)
+ summary->nr_new_acks += summary->nr_retained_nacks - old_nacks;
+ summary->nr_retained_nacks = old_nacks;
}
/*
@@ -773,7 +839,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_ackinfo info;
rxrpc_serial_t ack_serial, acked_serial;
- rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt;
+ rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since;
int nr_acks, offset, ioffset;
_enter("");
@@ -789,6 +855,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
prev_pkt = ntohl(ack.previousPacket);
hard_ack = first_soft_ack - 1;
nr_acks = ack.nAcks;
+ sp->first_ack = first_soft_ack;
+ sp->nr_acks = nr_acks;
summary.ack_reason = (ack.reason < RXRPC_ACK__INVALID ?
ack.reason : RXRPC_ACK__INVALID);
@@ -858,6 +926,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
if (nr_acks > 0)
skb_condense(skb);
+ if (call->cong_last_nack) {
+ since = rxrpc_input_check_prev_ack(call, &summary, first_soft_ack);
+ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
+ call->cong_last_nack = NULL;
+ } else {
+ summary.nr_new_acks = first_soft_ack - call->acks_first_seq;
+ call->acks_lowest_nak = first_soft_ack + nr_acks;
+ since = first_soft_ack;
+ }
+
call->acks_latest_ts = skb->tstamp;
call->acks_first_seq = first_soft_ack;
call->acks_prev_seq = prev_pkt;
@@ -866,7 +944,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
case RXRPC_ACK_PING:
break;
default:
- if (after(acked_serial, call->acks_highest_serial))
+ if (acked_serial && after(acked_serial, call->acks_highest_serial))
call->acks_highest_serial = acked_serial;
break;
}
@@ -905,8 +983,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
if (nr_acks > 0) {
if (offset > (int)skb->len - nr_acks)
return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack);
- rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack,
- nr_acks, &summary);
+ rxrpc_input_soft_acks(call, &summary, skb, first_soft_ack, since);
+ rxrpc_get_skb(skb, rxrpc_skb_get_last_nack);
+ call->cong_last_nack = skb;
}
if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) &&
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index c553a30e9c83..34d307368135 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -37,6 +37,17 @@ static void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, int err,
}
/*
+ * Set or clear the Don't Fragment flag on a socket.
+ */
+void rxrpc_local_dont_fragment(const struct rxrpc_local *local, bool set)
+{
+ if (set)
+ ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DO);
+ else
+ ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DONT);
+}
+
+/*
* Compare a local to an address. Return -ve, 0 or +ve to indicate less than,
* same or greater than.
*
@@ -203,7 +214,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
ip_sock_set_recverr(usk);
/* we want to set the don't fragment bit */
- ip_sock_set_mtu_discover(usk, IP_PMTUDISC_DO);
+ rxrpc_local_dont_fragment(local, true);
/* We want receive timestamps. */
sock_enable_timestamps(usk);
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index a0319c040c25..a4c135d0fbcc 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -45,6 +45,7 @@ static __net_init int rxrpc_init_net(struct net *net)
atomic_set(&rxnet->nr_calls, 1);
atomic_set(&rxnet->nr_conns, 1);
+ INIT_LIST_HEAD(&rxnet->bundle_proc_list);
INIT_LIST_HEAD(&rxnet->conn_proc_list);
INIT_LIST_HEAD(&rxnet->service_conns);
rwlock_init(&rxnet->conn_lock);
@@ -78,6 +79,9 @@ static __net_init int rxrpc_init_net(struct net *net)
proc_create_net("conns", 0444, rxnet->proc_net,
&rxrpc_connection_seq_ops,
sizeof(struct seq_net_private));
+ proc_create_net("bundles", 0444, rxnet->proc_net,
+ &rxrpc_bundle_seq_ops,
+ sizeof(struct seq_net_private));
proc_create_net("peers", 0444, rxnet->proc_net,
&rxrpc_peer_seq_ops,
sizeof(struct seq_net_private));
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 5e53429c6922..4a292f860ae3 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -216,7 +216,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
iov[0].iov_len = sizeof(txb->wire) + sizeof(txb->ack) + n;
len = iov[0].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
txb->wire.serial = htonl(serial);
trace_rxrpc_tx_ack(call->debug_id, serial,
ntohl(txb->ack.firstPacket),
@@ -302,7 +302,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
iov[0].iov_base = &pkt;
iov[0].iov_len = sizeof(pkt);
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
pkt.whdr.serial = htonl(serial);
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt));
@@ -334,7 +334,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
_enter("%x,{%d}", txb->seq, txb->len);
/* Each transmission of a Tx packet needs a new serial number */
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
txb->wire.serial = htonl(serial);
if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
@@ -494,14 +494,12 @@ send_fragmentable:
switch (conn->local->srx.transport.family) {
case AF_INET6:
case AF_INET:
- ip_sock_set_mtu_discover(conn->local->socket->sk,
- IP_PMTUDISC_DONT);
+ rxrpc_local_dont_fragment(conn->local, false);
rxrpc_inc_stat(call->rxnet, stat_tx_data_send_frag);
ret = do_udp_sendmsg(conn->local->socket, &msg, len);
conn->peer->last_tx_at = ktime_get_seconds();
- ip_sock_set_mtu_discover(conn->local->socket->sk,
- IP_PMTUDISC_DO);
+ rxrpc_local_dont_fragment(conn->local, true);
break;
default:
@@ -560,7 +558,7 @@ void rxrpc_send_conn_abort(struct rxrpc_connection *conn)
len = iov[0].iov_len + iov[1].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
whdr.serial = htonl(serial);
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 8d7a715a0bb1..49dcda67a0d5 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -22,6 +22,8 @@
#include <net/ip6_route.h>
#include "ar-internal.h"
+static const struct sockaddr_rxrpc rxrpc_null_addr;
+
/*
* Hash a peer key.
*/
@@ -457,39 +459,53 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *rxnet)
}
/**
- * rxrpc_kernel_get_peer - Get the peer address of a call
+ * rxrpc_kernel_get_call_peer - Get the peer address of a call
* @sock: The socket on which the call is in progress.
* @call: The call to query
- * @_srx: Where to place the result
*
- * Get the address of the remote peer in a call.
+ * Get a record for the remote peer in a call.
*/
-void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
- struct sockaddr_rxrpc *_srx)
+struct rxrpc_peer *rxrpc_kernel_get_call_peer(struct socket *sock, struct rxrpc_call *call)
{
- *_srx = call->peer->srx;
+ return call->peer;
}
-EXPORT_SYMBOL(rxrpc_kernel_get_peer);
+EXPORT_SYMBOL(rxrpc_kernel_get_call_peer);
/**
* rxrpc_kernel_get_srtt - Get a call's peer smoothed RTT
- * @sock: The socket on which the call is in progress.
- * @call: The call to query
- * @_srtt: Where to store the SRTT value.
+ * @peer: The peer to query
*
- * Get the call's peer smoothed RTT in uS.
+ * Get the call's peer smoothed RTT in uS or UINT_MAX if we have no samples.
*/
-bool rxrpc_kernel_get_srtt(struct socket *sock, struct rxrpc_call *call,
- u32 *_srtt)
+unsigned int rxrpc_kernel_get_srtt(const struct rxrpc_peer *peer)
{
- struct rxrpc_peer *peer = call->peer;
+ return peer->rtt_count > 0 ? peer->srtt_us >> 3 : UINT_MAX;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_srtt);
- if (peer->rtt_count == 0) {
- *_srtt = 1000000; /* 1S */
- return false;
- }
+/**
+ * rxrpc_kernel_remote_srx - Get the address of a peer
+ * @peer: The peer to query
+ *
+ * Get a pointer to the address from a peer record. The caller is responsible
+ * for making sure that the address is not deallocated.
+ */
+const struct sockaddr_rxrpc *rxrpc_kernel_remote_srx(const struct rxrpc_peer *peer)
+{
+ return peer ? &peer->srx : &rxrpc_null_addr;
+}
+EXPORT_SYMBOL(rxrpc_kernel_remote_srx);
- *_srtt = call->peer->srtt_us >> 3;
- return true;
+/**
+ * rxrpc_kernel_remote_addr - Get the peer transport address of a call
+ * @peer: The peer to query
+ *
+ * Get a pointer to the transport address from a peer record. The caller is
+ * responsible for making sure that the address is not deallocated.
+ */
+const struct sockaddr *rxrpc_kernel_remote_addr(const struct rxrpc_peer *peer)
+{
+ return (const struct sockaddr *)
+ (peer ? &peer->srx.transport : &rxrpc_null_addr.transport);
}
-EXPORT_SYMBOL(rxrpc_kernel_get_srtt);
+EXPORT_SYMBOL(rxrpc_kernel_remote_addr);
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 682636d3b060..26dc2f26d92d 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -181,7 +181,7 @@ print:
atomic_read(&conn->active),
state,
key_serial(conn->key),
- atomic_read(&conn->serial),
+ conn->tx_serial,
conn->hi_serial,
conn->channels[0].call_id,
conn->channels[1].call_id,
@@ -199,6 +199,82 @@ const struct seq_operations rxrpc_connection_seq_ops = {
};
/*
+ * generate a list of extant virtual bundles in /proc/net/rxrpc/bundles
+ */
+static void *rxrpc_bundle_seq_start(struct seq_file *seq, loff_t *_pos)
+ __acquires(rxnet->conn_lock)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ read_lock(&rxnet->conn_lock);
+ return seq_list_start_head(&rxnet->bundle_proc_list, *_pos);
+}
+
+static void *rxrpc_bundle_seq_next(struct seq_file *seq, void *v,
+ loff_t *pos)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ return seq_list_next(v, &rxnet->bundle_proc_list, pos);
+}
+
+static void rxrpc_bundle_seq_stop(struct seq_file *seq, void *v)
+ __releases(rxnet->conn_lock)
+{
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+
+ read_unlock(&rxnet->conn_lock);
+}
+
+static int rxrpc_bundle_seq_show(struct seq_file *seq, void *v)
+{
+ struct rxrpc_bundle *bundle;
+ struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+ char lbuff[50], rbuff[50];
+
+ if (v == &rxnet->bundle_proc_list) {
+ seq_puts(seq,
+ "Proto Local "
+ " Remote "
+ " SvID Ref Act Flg Key |"
+ " Bundle Conn_0 Conn_1 Conn_2 Conn_3\n"
+ );
+ return 0;
+ }
+
+ bundle = list_entry(v, struct rxrpc_bundle, proc_link);
+
+ sprintf(lbuff, "%pISpc", &bundle->local->srx.transport);
+ sprintf(rbuff, "%pISpc", &bundle->peer->srx.transport);
+ seq_printf(seq,
+ "UDP %-47.47s %-47.47s %4x %3u %3d"
+ " %c%c%c %08x | %08x %08x %08x %08x %08x\n",
+ lbuff,
+ rbuff,
+ bundle->service_id,
+ refcount_read(&bundle->ref),
+ atomic_read(&bundle->active),
+ bundle->try_upgrade ? 'U' : '-',
+ bundle->exclusive ? 'e' : '-',
+ bundle->upgrade ? 'u' : '-',
+ key_serial(bundle->key),
+ bundle->debug_id,
+ bundle->conn_ids[0],
+ bundle->conn_ids[1],
+ bundle->conn_ids[2],
+ bundle->conn_ids[3]);
+
+ return 0;
+}
+
+const struct seq_operations rxrpc_bundle_seq_ops = {
+ .start = rxrpc_bundle_seq_start,
+ .next = rxrpc_bundle_seq_next,
+ .stop = rxrpc_bundle_seq_stop,
+ .show = rxrpc_bundle_seq_show,
+};
+
+/*
* generate a list of extant virtual peers in /proc/net/rxrpc/peers
*/
static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 1bf571a66e02..6b32d61d4cdc 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -664,7 +664,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
len = iov[0].iov_len + iov[1].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
whdr.serial = htonl(serial);
ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len);
@@ -721,10 +721,12 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
whdr.serial = htonl(serial);
+ rxrpc_local_dont_fragment(conn->local, false);
ret = kernel_sendmsg(conn->local->socket, &msg, iov, 3, len);
+ rxrpc_local_dont_fragment(conn->local, true);
if (ret < 0) {
trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
rxrpc_tx_point_rxkad_response);
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 8e0b94714e84..5677d5690a02 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -572,6 +572,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
__acquires(&call->user_mutex)
{
struct rxrpc_conn_parameters cp;
+ struct rxrpc_peer *peer;
struct rxrpc_call *call;
struct key *key;
@@ -584,21 +585,29 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
return ERR_PTR(-EDESTADDRREQ);
}
+ peer = rxrpc_lookup_peer(rx->local, srx, GFP_KERNEL);
+ if (!peer) {
+ release_sock(&rx->sk);
+ return ERR_PTR(-ENOMEM);
+ }
+
key = rx->key;
if (key && !rx->key->payload.data[0])
key = NULL;
memset(&cp, 0, sizeof(cp));
cp.local = rx->local;
+ cp.peer = peer;
cp.key = rx->key;
cp.security_level = rx->min_sec_level;
cp.exclusive = rx->exclusive | p->exclusive;
cp.upgrade = p->upgrade;
cp.service_id = srx->srx_service;
- call = rxrpc_new_client_call(rx, &cp, srx, &p->call, GFP_KERNEL,
+ call = rxrpc_new_client_call(rx, &cp, &p->call, GFP_KERNEL,
atomic_inc_return(&rxrpc_debug_id));
/* The socket is now unlocked */
+ rxrpc_put_peer(peer, rxrpc_peer_put_application);
_leave(" = %p\n", call);
return call;
}
diff --git a/net/sched/Makefile b/net/sched/Makefile
index b5fd49641d91..82c3f78ca486 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -13,7 +13,6 @@ obj-$(CONFIG_NET_ACT_POLICE) += act_police.o
obj-$(CONFIG_NET_ACT_GACT) += act_gact.o
obj-$(CONFIG_NET_ACT_MIRRED) += act_mirred.o
obj-$(CONFIG_NET_ACT_SAMPLE) += act_sample.o
-obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o
obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index c39252d61ebb..3e30d7260493 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -816,6 +816,9 @@ EXPORT_SYMBOL(tcf_idr_cleanup);
* its reference and bind counters, and return 1. Otherwise insert temporary
* error pointer (to prevent concurrent users from inserting actions with same
* index) and return 0.
+ *
+ * May return -EAGAIN for binding actions in case of a parallel add/delete on
+ * the requested index.
*/
int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
@@ -824,43 +827,61 @@ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index,
struct tcf_idrinfo *idrinfo = tn->idrinfo;
struct tc_action *p;
int ret;
+ u32 max;
-again:
- mutex_lock(&idrinfo->lock);
if (*index) {
+again:
+ rcu_read_lock();
p = idr_find(&idrinfo->action_idr, *index);
+
if (IS_ERR(p)) {
/* This means that another process allocated
* index but did not assign the pointer yet.
*/
- mutex_unlock(&idrinfo->lock);
+ rcu_read_unlock();
goto again;
}
- if (p) {
- refcount_inc(&p->tcfa_refcnt);
- if (bind)
- atomic_inc(&p->tcfa_bindcnt);
- *a = p;
- ret = 1;
- } else {
- *a = NULL;
- ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
- *index, GFP_KERNEL);
- if (!ret)
- idr_replace(&idrinfo->action_idr,
- ERR_PTR(-EBUSY), *index);
+ if (!p) {
+ /* Empty slot, try to allocate it */
+ max = *index;
+ rcu_read_unlock();
+ goto new;
+ }
+
+ if (!refcount_inc_not_zero(&p->tcfa_refcnt)) {
+ /* Action was deleted in parallel */
+ rcu_read_unlock();
+ return -EAGAIN;
}
+
+ if (bind)
+ atomic_inc(&p->tcfa_bindcnt);
+ *a = p;
+
+ rcu_read_unlock();
+
+ return 1;
} else {
+ /* Find a slot */
*index = 1;
- *a = NULL;
- ret = idr_alloc_u32(&idrinfo->action_idr, NULL, index,
- UINT_MAX, GFP_KERNEL);
- if (!ret)
- idr_replace(&idrinfo->action_idr, ERR_PTR(-EBUSY),
- *index);
+ max = UINT_MAX;
}
+
+new:
+ *a = NULL;
+
+ mutex_lock(&idrinfo->lock);
+ ret = idr_alloc_u32(&idrinfo->action_idr, ERR_PTR(-EBUSY), index, max,
+ GFP_KERNEL);
mutex_unlock(&idrinfo->lock);
+
+ /* N binds raced for action allocation,
+ * retry for all the ones that failed.
+ */
+ if (ret == -ENOSPC && *index == max)
+ ret = -EAGAIN;
+
return ret;
}
EXPORT_SYMBOL(tcf_idr_check_alloc);
@@ -1098,7 +1119,8 @@ repeat:
}
} else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) {
if (unlikely(!rcu_access_pointer(a->goto_chain))) {
- tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR);
+ tcf_set_drop_reason(skb,
+ SKB_DROP_REASON_TC_CHAIN_NOTFOUND);
return TC_ACT_SHOT;
}
tcf_action_goto_chain_exec(a, res);
@@ -1118,8 +1140,7 @@ int tcf_action_destroy(struct tc_action *actions[], int bind)
struct tc_action *a;
int ret = 0, i;
- for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
- a = actions[i];
+ tcf_act_for_each_action(i, a, actions) {
actions[i] = NULL;
ops = a->ops;
ret = __tcf_idr_release(a, bind, true);
@@ -1136,18 +1157,29 @@ static int tcf_action_put(struct tc_action *p)
return __tcf_action_put(p, false);
}
-/* Put all actions in this array, skip those NULL's. */
static void tcf_action_put_many(struct tc_action *actions[])
{
+ struct tc_action *a;
+ int i;
+
+ tcf_act_for_each_action(i, a, actions) {
+ const struct tc_action_ops *ops = a->ops;
+ if (tcf_action_put(a))
+ module_put(ops->owner);
+ }
+}
+
+static void tca_put_bound_many(struct tc_action *actions[], int init_res[])
+{
+ struct tc_action *a;
int i;
- for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
- struct tc_action *a = actions[i];
- const struct tc_action_ops *ops;
+ tcf_act_for_each_action(i, a, actions) {
+ const struct tc_action_ops *ops = a->ops;
- if (!a)
+ if (init_res[i] == ACT_P_CREATED)
continue;
- ops = a->ops;
+
if (tcf_action_put(a))
module_put(ops->owner);
}
@@ -1211,8 +1243,7 @@ int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
int err = -EINVAL, i;
struct nlattr *nest;
- for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
- a = actions[i];
+ tcf_act_for_each_action(i, a, actions) {
nest = nla_nest_start_noflag(skb, i + 1);
if (nest == NULL)
goto nla_put_failure;
@@ -1274,30 +1305,29 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
[TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY),
};
-void tcf_idr_insert_many(struct tc_action *actions[])
+void tcf_idr_insert_many(struct tc_action *actions[], int init_res[])
{
+ struct tc_action *a;
int i;
- for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
- struct tc_action *a = actions[i];
+ tcf_act_for_each_action(i, a, actions) {
struct tcf_idrinfo *idrinfo;
- if (!a)
+ if (init_res[i] == ACT_P_BOUND)
continue;
+
idrinfo = a->idrinfo;
mutex_lock(&idrinfo->lock);
- /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc if
- * it is just created, otherwise this is just a nop.
- */
+ /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */
idr_replace(&idrinfo->action_idr, a, a->tcfa_index);
mutex_unlock(&idrinfo->lock);
}
}
-struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
- bool rtnl_held,
+struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, u32 flags,
struct netlink_ext_ack *extack)
{
+ bool police = flags & TCA_ACT_FLAGS_POLICE;
struct nlattr *tb[TCA_ACT_MAX + 1];
struct tc_action_ops *a_o;
char act_name[IFNAMSIZ];
@@ -1329,6 +1359,8 @@ struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, bool police,
a_o = tc_lookup_action_n(act_name);
if (a_o == NULL) {
#ifdef CONFIG_MODULES
+ bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL);
+
if (rtnl_held)
rtnl_unlock();
request_module("act_%s", act_name);
@@ -1445,9 +1477,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) {
struct tc_action_ops *a_o;
- a_o = tc_action_load_ops(tb[i], flags & TCA_ACT_FLAGS_POLICE,
- !(flags & TCA_ACT_FLAGS_NO_RTNL),
- extack);
+ a_o = tc_action_load_ops(tb[i], flags, extack);
if (IS_ERR(a_o)) {
err = PTR_ERR(a_o);
goto err_mod;
@@ -1488,7 +1518,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
/* We have to commit them all together, because if any error happened in
* between, we could not handle the failure gracefully.
*/
- tcf_idr_insert_many(actions);
+ tcf_idr_insert_many(actions, init_res);
*attr_size = tcf_action_full_attrs_size(sz);
err = i - 1;
@@ -1497,10 +1527,8 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
err:
tcf_action_destroy(actions, flags & TCA_ACT_FLAGS_BIND);
err_mod:
- for (i = 0; i < TCA_ACT_MAX_PRIO; i++) {
- if (ops[i])
- module_put(ops[i]->owner);
- }
+ for (i = 0; i < TCA_ACT_MAX_PRIO && ops[i]; i++)
+ module_put(ops[i]->owner);
return err;
}
@@ -1753,10 +1781,10 @@ err_out:
static int tcf_action_delete(struct net *net, struct tc_action *actions[])
{
+ struct tc_action *a;
int i;
- for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
- struct tc_action *a = actions[i];
+ tcf_act_for_each_action(i, a, actions) {
const struct tc_action_ops *ops = a->ops;
/* Actions can be deleted concurrently so we must save their
* type and id to search again after reference is released.
@@ -1768,7 +1796,7 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[])
if (tcf_action_put(a)) {
/* last reference, action was deleted concurrently */
module_put(ops->owner);
- } else {
+ } else {
int ret;
/* now do the delete */
@@ -1780,31 +1808,45 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[])
return 0;
}
-static int
-tcf_reoffload_del_notify(struct net *net, struct tc_action *action)
+static struct sk_buff *tcf_reoffload_del_notify_msg(struct net *net,
+ struct tc_action *action)
{
size_t attr_size = tcf_action_fill_size(action);
struct tc_action *actions[TCA_ACT_MAX_PRIO] = {
[0] = action,
};
- const struct tc_action_ops *ops = action->ops;
struct sk_buff *skb;
- int ret;
- skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
- GFP_KERNEL);
+ skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL);
if (!skb)
- return -ENOBUFS;
+ return ERR_PTR(-ENOBUFS);
if (tca_get_fill(skb, actions, 0, 0, 0, RTM_DELACTION, 0, 1, NULL) <= 0) {
kfree_skb(skb);
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
+ }
+
+ return skb;
+}
+
+static int tcf_reoffload_del_notify(struct net *net, struct tc_action *action)
+{
+ const struct tc_action_ops *ops = action->ops;
+ struct sk_buff *skb;
+ int ret;
+
+ if (!rtnl_notify_needed(net, 0, RTNLGRP_TC)) {
+ skb = NULL;
+ } else {
+ skb = tcf_reoffload_del_notify_msg(net, action);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
}
ret = tcf_idr_release_unsafe(action);
if (ret == ACT_P_DELETED) {
module_put(ops->owner);
- ret = rtnetlink_send(skb, net, 0, RTNLGRP_TC, 0);
+ ret = rtnetlink_maybe_send(skb, net, 0, RTNLGRP_TC, 0);
} else {
kfree_skb(skb);
}
@@ -1870,23 +1912,41 @@ int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb,
return 0;
}
-static int
-tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
- u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
+static struct sk_buff *tcf_del_notify_msg(struct net *net, struct nlmsghdr *n,
+ struct tc_action *actions[],
+ u32 portid, size_t attr_size,
+ struct netlink_ext_ack *extack)
{
- int ret;
struct sk_buff *skb;
- skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
- GFP_KERNEL);
+ skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL);
if (!skb)
- return -ENOBUFS;
+ return ERR_PTR(-ENOBUFS);
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
0, 2, extack) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes");
kfree_skb(skb);
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
+ }
+
+ return skb;
+}
+
+static int tcf_del_notify(struct net *net, struct nlmsghdr *n,
+ struct tc_action *actions[], u32 portid,
+ size_t attr_size, struct netlink_ext_ack *extack)
+{
+ struct sk_buff *skb;
+ int ret;
+
+ if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) {
+ skb = NULL;
+ } else {
+ skb = tcf_del_notify_msg(net, n, actions, portid, attr_size,
+ extack);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
}
/* now do the delete */
@@ -1897,9 +1957,8 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
return ret;
}
- ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
- return ret;
+ return rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
}
static int
@@ -1950,26 +2009,44 @@ err:
return ret;
}
-static int
-tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
- u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
+static struct sk_buff *tcf_add_notify_msg(struct net *net, struct nlmsghdr *n,
+ struct tc_action *actions[],
+ u32 portid, size_t attr_size,
+ struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
- skb = alloc_skb(attr_size <= NLMSG_GOODSIZE ? NLMSG_GOODSIZE : attr_size,
- GFP_KERNEL);
+ skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL);
if (!skb)
- return -ENOBUFS;
+ return ERR_PTR(-ENOBUFS);
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
RTM_NEWACTION, 0, 0, extack) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
kfree_skb(skb);
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
- return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
+ return skb;
+}
+
+static int tcf_add_notify(struct net *net, struct nlmsghdr *n,
+ struct tc_action *actions[], u32 portid,
+ size_t attr_size, struct netlink_ext_ack *extack)
+{
+ struct sk_buff *skb;
+
+ if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) {
+ skb = NULL;
+ } else {
+ skb = tcf_add_notify_msg(net, n, actions, portid, attr_size,
+ extack);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+ }
+
+ return rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
}
static int tcf_action_add(struct net *net, struct nlattr *nla,
@@ -1977,7 +2054,7 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
struct netlink_ext_ack *extack)
{
size_t attr_size = 0;
- int loop, ret, i;
+ int loop, ret;
struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
int init_res[TCA_ACT_MAX_PRIO] = {};
@@ -1990,13 +2067,11 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
if (ret < 0)
return ret;
+
ret = tcf_add_notify(net, n, actions, portid, attr_size, extack);
- /* only put existing actions */
- for (i = 0; i < TCA_ACT_MAX_PRIO; i++)
- if (init_res[i] == ACT_P_CREATED)
- actions[i] = NULL;
- tcf_action_put_many(actions);
+ /* only put bound actions */
+ tca_put_bound_many(actions, init_res);
return ret;
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index b0455fda7d0b..6cfee6658103 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -318,7 +318,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
} else if (ret > 0) {
/* Don't override defaults. */
if (bind)
- return 0;
+ return ACT_P_BOUND;
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*act, bind);
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 0d7aee8933c5..f8762756657d 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -146,7 +146,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
} else if (ret > 0) {
ci = to_connmark(*a);
if (bind) {
- err = 0;
+ err = ACT_P_BOUND;
goto out_free;
}
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 8ed285023a40..7f8b1f2f2ed9 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -77,8 +77,8 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
}
ret = ACT_P_CREATED;
} else if (err > 0) {
- if (bind)/* dont override defaults */
- return 0;
+ if (bind) /* dont override defaults */
+ return ACT_P_BOUND;
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f69c47945175..6124d8b128d1 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -850,7 +850,6 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
if (err || !frag)
return err;
- skb_get(skb);
err = nf_ct_handle_fragments(net, skb, zone, family, &proto, &mru);
if (err)
return err;
@@ -999,12 +998,8 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
nh_ofs = skb_network_offset(skb);
skb_pull_rcsum(skb, nh_ofs);
err = tcf_ct_handle_fragments(net, skb, family, p->zone, &defrag);
- if (err == -EINPROGRESS) {
- retval = TC_ACT_STOLEN;
- goto out_clear;
- }
if (err)
- goto drop;
+ goto out_frag;
err = nf_ct_skb_network_trim(skb, family);
if (err)
@@ -1091,6 +1086,11 @@ out_clear:
qdisc_skb_cb(skb)->pkt_len = skb->len;
return retval;
+out_frag:
+ if (err != -EINPROGRESS)
+ tcf_action_inc_drop_qstats(&c->common);
+ return TC_ACT_CONSUMED;
+
drop:
tcf_action_inc_drop_qstats(&c->common);
return TC_ACT_SHOT;
@@ -1349,7 +1349,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla,
res = ACT_P_CREATED;
} else {
if (bind)
- return 0;
+ return ACT_P_BOUND;
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 4d15b6a6169c..e620f9a84afe 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -221,7 +221,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
ret = ACT_P_CREATED;
} else if (err > 0) {
if (bind) /* don't override defaults */
- return 0;
+ return ACT_P_BOUND;
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 904ab3d457ef..4af3b7ec249f 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -108,7 +108,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
ret = ACT_P_CREATED;
} else if (err > 0) {
if (bind)/* dont override defaults */
- return 0;
+ return ACT_P_BOUND;
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 393b78729216..c681cd011afd 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -356,7 +356,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
return err;
if (err && bind)
- return 0;
+ return ACT_P_BOUND;
if (!err) {
ret = tcf_idr_create_from_flags(tn, index, est, a,
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index bc7611b0744c..0e867d13beb5 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -548,7 +548,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
exists = err;
if (exists && bind) {
kfree(p);
- return 0;
+ return ACT_P_BOUND;
}
if (!exists) {
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
deleted file mode 100644
index 598d6e299152..000000000000
--- a/net/sched/act_ipt.c
+++ /dev/null
@@ -1,464 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/sched/act_ipt.c iptables target interface
- *
- *TODO: Add other tables. For now we only support the ipv4 table targets
- *
- * Copyright: Jamal Hadi Salim (2002-13)
- */
-
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <net/netlink.h>
-#include <net/pkt_sched.h>
-#include <linux/tc_act/tc_ipt.h>
-#include <net/tc_act/tc_ipt.h>
-#include <net/tc_wrapper.h>
-#include <net/ip.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-
-static struct tc_action_ops act_ipt_ops;
-static struct tc_action_ops act_xt_ops;
-
-static int ipt_init_target(struct net *net, struct xt_entry_target *t,
- char *table, unsigned int hook)
-{
- struct xt_tgchk_param par;
- struct xt_target *target;
- struct ipt_entry e = {};
- int ret = 0;
-
- target = xt_request_find_target(AF_INET, t->u.user.name,
- t->u.user.revision);
- if (IS_ERR(target))
- return PTR_ERR(target);
-
- t->u.kernel.target = target;
- memset(&par, 0, sizeof(par));
- par.net = net;
- par.table = table;
- par.entryinfo = &e;
- par.target = target;
- par.targinfo = t->data;
- par.hook_mask = 1 << hook;
- par.family = NFPROTO_IPV4;
-
- ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
- if (ret < 0) {
- module_put(t->u.kernel.target->me);
- return ret;
- }
- return 0;
-}
-
-static void ipt_destroy_target(struct xt_entry_target *t, struct net *net)
-{
- struct xt_tgdtor_param par = {
- .target = t->u.kernel.target,
- .targinfo = t->data,
- .family = NFPROTO_IPV4,
- .net = net,
- };
- if (par.target->destroy != NULL)
- par.target->destroy(&par);
- module_put(par.target->me);
-}
-
-static void tcf_ipt_release(struct tc_action *a)
-{
- struct tcf_ipt *ipt = to_ipt(a);
-
- if (ipt->tcfi_t) {
- ipt_destroy_target(ipt->tcfi_t, a->idrinfo->net);
- kfree(ipt->tcfi_t);
- }
- kfree(ipt->tcfi_tname);
-}
-
-static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
- [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ },
- [TCA_IPT_HOOK] = NLA_POLICY_RANGE(NLA_U32, NF_INET_PRE_ROUTING,
- NF_INET_NUMHOOKS),
- [TCA_IPT_INDEX] = { .type = NLA_U32 },
- [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) },
-};
-
-static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a,
- const struct tc_action_ops *ops,
- struct tcf_proto *tp, u32 flags)
-{
- struct tc_action_net *tn = net_generic(net, id);
- bool bind = flags & TCA_ACT_FLAGS_BIND;
- struct nlattr *tb[TCA_IPT_MAX + 1];
- struct tcf_ipt *ipt;
- struct xt_entry_target *td, *t;
- char *tname;
- bool exists = false;
- int ret = 0, err;
- u32 hook = 0;
- u32 index = 0;
-
- if (nla == NULL)
- return -EINVAL;
-
- err = nla_parse_nested_deprecated(tb, TCA_IPT_MAX, nla, ipt_policy,
- NULL);
- if (err < 0)
- return err;
-
- if (tb[TCA_IPT_INDEX] != NULL)
- index = nla_get_u32(tb[TCA_IPT_INDEX]);
-
- err = tcf_idr_check_alloc(tn, &index, a, bind);
- if (err < 0)
- return err;
- exists = err;
- if (exists && bind)
- return 0;
-
- if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) {
- if (exists)
- tcf_idr_release(*a, bind);
- else
- tcf_idr_cleanup(tn, index);
- return -EINVAL;
- }
-
- td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
- if (nla_len(tb[TCA_IPT_TARG]) != td->u.target_size) {
- if (exists)
- tcf_idr_release(*a, bind);
- else
- tcf_idr_cleanup(tn, index);
- return -EINVAL;
- }
-
- if (!exists) {
- ret = tcf_idr_create(tn, index, est, a, ops, bind,
- false, flags);
- if (ret) {
- tcf_idr_cleanup(tn, index);
- return ret;
- }
- ret = ACT_P_CREATED;
- } else {
- if (bind)/* dont override defaults */
- return 0;
-
- if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
- tcf_idr_release(*a, bind);
- return -EEXIST;
- }
- }
-
- err = -EINVAL;
- hook = nla_get_u32(tb[TCA_IPT_HOOK]);
- switch (hook) {
- case NF_INET_PRE_ROUTING:
- break;
- case NF_INET_POST_ROUTING:
- break;
- default:
- goto err1;
- }
-
- if (tb[TCA_IPT_TABLE]) {
- /* mangle only for now */
- if (nla_strcmp(tb[TCA_IPT_TABLE], "mangle"))
- goto err1;
- }
-
- tname = kstrdup("mangle", GFP_KERNEL);
- if (unlikely(!tname))
- goto err1;
-
- t = kmemdup(td, td->u.target_size, GFP_KERNEL);
- if (unlikely(!t))
- goto err2;
-
- err = ipt_init_target(net, t, tname, hook);
- if (err < 0)
- goto err3;
-
- ipt = to_ipt(*a);
-
- spin_lock_bh(&ipt->tcf_lock);
- if (ret != ACT_P_CREATED) {
- ipt_destroy_target(ipt->tcfi_t, net);
- kfree(ipt->tcfi_tname);
- kfree(ipt->tcfi_t);
- }
- ipt->tcfi_tname = tname;
- ipt->tcfi_t = t;
- ipt->tcfi_hook = hook;
- spin_unlock_bh(&ipt->tcf_lock);
- return ret;
-
-err3:
- kfree(t);
-err2:
- kfree(tname);
-err1:
- tcf_idr_release(*a, bind);
- return err;
-}
-
-static int tcf_ipt_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a,
- struct tcf_proto *tp,
- u32 flags, struct netlink_ext_ack *extack)
-{
- return __tcf_ipt_init(net, act_ipt_ops.net_id, nla, est,
- a, &act_ipt_ops, tp, flags);
-}
-
-static int tcf_xt_init(struct net *net, struct nlattr *nla,
- struct nlattr *est, struct tc_action **a,
- struct tcf_proto *tp,
- u32 flags, struct netlink_ext_ack *extack)
-{
- return __tcf_ipt_init(net, act_xt_ops.net_id, nla, est,
- a, &act_xt_ops, tp, flags);
-}
-
-static bool tcf_ipt_act_check(struct sk_buff *skb)
-{
- const struct iphdr *iph;
- unsigned int nhoff, len;
-
- if (!pskb_may_pull(skb, sizeof(struct iphdr)))
- return false;
-
- nhoff = skb_network_offset(skb);
- iph = ip_hdr(skb);
- if (iph->ihl < 5 || iph->version != 4)
- return false;
-
- len = skb_ip_totlen(skb);
- if (skb->len < nhoff + len || len < (iph->ihl * 4u))
- return false;
-
- return pskb_may_pull(skb, iph->ihl * 4u);
-}
-
-TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb,
- const struct tc_action *a,
- struct tcf_result *res)
-{
- char saved_cb[sizeof_field(struct sk_buff, cb)];
- int ret = 0, result = 0;
- struct tcf_ipt *ipt = to_ipt(a);
- struct xt_action_param par;
- struct nf_hook_state state = {
- .net = dev_net(skb->dev),
- .in = skb->dev,
- .hook = ipt->tcfi_hook,
- .pf = NFPROTO_IPV4,
- };
-
- if (skb_protocol(skb, false) != htons(ETH_P_IP))
- return TC_ACT_UNSPEC;
-
- if (skb_unclone(skb, GFP_ATOMIC))
- return TC_ACT_UNSPEC;
-
- if (!tcf_ipt_act_check(skb))
- return TC_ACT_UNSPEC;
-
- if (state.hook == NF_INET_POST_ROUTING) {
- if (!skb_dst(skb))
- return TC_ACT_UNSPEC;
-
- state.out = skb->dev;
- }
-
- memcpy(saved_cb, skb->cb, sizeof(saved_cb));
-
- spin_lock(&ipt->tcf_lock);
-
- tcf_lastuse_update(&ipt->tcf_tm);
- bstats_update(&ipt->tcf_bstats, skb);
-
- /* yes, we have to worry about both in and out dev
- * worry later - danger - this API seems to have changed
- * from earlier kernels
- */
- par.state = &state;
- par.target = ipt->tcfi_t->u.kernel.target;
- par.targinfo = ipt->tcfi_t->data;
-
- memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
-
- ret = par.target->target(skb, &par);
-
- switch (ret) {
- case NF_ACCEPT:
- result = TC_ACT_OK;
- break;
- case NF_DROP:
- result = TC_ACT_SHOT;
- ipt->tcf_qstats.drops++;
- break;
- case XT_CONTINUE:
- result = TC_ACT_PIPE;
- break;
- default:
- net_notice_ratelimited("tc filter: Bogus netfilter code %d assume ACCEPT\n",
- ret);
- result = TC_ACT_OK;
- break;
- }
- spin_unlock(&ipt->tcf_lock);
-
- memcpy(skb->cb, saved_cb, sizeof(skb->cb));
-
- return result;
-
-}
-
-static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind,
- int ref)
-{
- unsigned char *b = skb_tail_pointer(skb);
- struct tcf_ipt *ipt = to_ipt(a);
- struct xt_entry_target *t;
- struct tcf_t tm;
- struct tc_cnt c;
-
- /* for simple targets kernel size == user size
- * user name = target name
- * for foolproof you need to not assume this
- */
-
- spin_lock_bh(&ipt->tcf_lock);
- t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
- if (unlikely(!t))
- goto nla_put_failure;
-
- c.bindcnt = atomic_read(&ipt->tcf_bindcnt) - bind;
- c.refcnt = refcount_read(&ipt->tcf_refcnt) - ref;
- strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name);
-
- if (nla_put(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t) ||
- nla_put_u32(skb, TCA_IPT_INDEX, ipt->tcf_index) ||
- nla_put_u32(skb, TCA_IPT_HOOK, ipt->tcfi_hook) ||
- nla_put(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c) ||
- nla_put_string(skb, TCA_IPT_TABLE, ipt->tcfi_tname))
- goto nla_put_failure;
-
- tcf_tm_dump(&tm, &ipt->tcf_tm);
- if (nla_put_64bit(skb, TCA_IPT_TM, sizeof(tm), &tm, TCA_IPT_PAD))
- goto nla_put_failure;
-
- spin_unlock_bh(&ipt->tcf_lock);
- kfree(t);
- return skb->len;
-
-nla_put_failure:
- spin_unlock_bh(&ipt->tcf_lock);
- nlmsg_trim(skb, b);
- kfree(t);
- return -1;
-}
-
-static struct tc_action_ops act_ipt_ops = {
- .kind = "ipt",
- .id = TCA_ID_IPT,
- .owner = THIS_MODULE,
- .act = tcf_ipt_act,
- .dump = tcf_ipt_dump,
- .cleanup = tcf_ipt_release,
- .init = tcf_ipt_init,
- .size = sizeof(struct tcf_ipt),
-};
-
-static __net_init int ipt_init_net(struct net *net)
-{
- struct tc_action_net *tn = net_generic(net, act_ipt_ops.net_id);
-
- return tc_action_net_init(net, tn, &act_ipt_ops);
-}
-
-static void __net_exit ipt_exit_net(struct list_head *net_list)
-{
- tc_action_net_exit(net_list, act_ipt_ops.net_id);
-}
-
-static struct pernet_operations ipt_net_ops = {
- .init = ipt_init_net,
- .exit_batch = ipt_exit_net,
- .id = &act_ipt_ops.net_id,
- .size = sizeof(struct tc_action_net),
-};
-
-static struct tc_action_ops act_xt_ops = {
- .kind = "xt",
- .id = TCA_ID_XT,
- .owner = THIS_MODULE,
- .act = tcf_ipt_act,
- .dump = tcf_ipt_dump,
- .cleanup = tcf_ipt_release,
- .init = tcf_xt_init,
- .size = sizeof(struct tcf_ipt),
-};
-
-static __net_init int xt_init_net(struct net *net)
-{
- struct tc_action_net *tn = net_generic(net, act_xt_ops.net_id);
-
- return tc_action_net_init(net, tn, &act_xt_ops);
-}
-
-static void __net_exit xt_exit_net(struct list_head *net_list)
-{
- tc_action_net_exit(net_list, act_xt_ops.net_id);
-}
-
-static struct pernet_operations xt_net_ops = {
- .init = xt_init_net,
- .exit_batch = xt_exit_net,
- .id = &act_xt_ops.net_id,
- .size = sizeof(struct tc_action_net),
-};
-
-MODULE_AUTHOR("Jamal Hadi Salim(2002-13)");
-MODULE_DESCRIPTION("Iptables target actions");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("act_xt");
-
-static int __init ipt_init_module(void)
-{
- int ret1, ret2;
-
- ret1 = tcf_register_action(&act_xt_ops, &xt_net_ops);
- if (ret1 < 0)
- pr_err("Failed to load xt action\n");
-
- ret2 = tcf_register_action(&act_ipt_ops, &ipt_net_ops);
- if (ret2 < 0)
- pr_err("Failed to load ipt action\n");
-
- if (ret1 < 0 && ret2 < 0) {
- return ret1;
- } else
- return 0;
-}
-
-static void __exit ipt_cleanup_module(void)
-{
- tcf_unregister_action(&act_ipt_ops, &ipt_net_ops);
- tcf_unregister_action(&act_xt_ops, &xt_net_ops);
-}
-
-module_init(ipt_init_module);
-module_exit(ipt_cleanup_module);
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 0a711c184c29..6faa7d00da09 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -85,10 +85,21 @@ static void tcf_mirred_release(struct tc_action *a)
static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
[TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) },
+ [TCA_MIRRED_BLOCKID] = NLA_POLICY_MIN(NLA_U32, 1),
};
static struct tc_action_ops act_mirred_ops;
+static void tcf_mirred_replace_dev(struct tcf_mirred *m,
+ struct net_device *ndev)
+{
+ struct net_device *odev;
+
+ odev = rcu_replace_pointer(m->tcfm_dev, ndev,
+ lockdep_is_held(&m->tcf_lock));
+ netdev_put(odev, &m->tcfm_dev_tracker);
+}
+
static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
struct tcf_proto *tp,
@@ -124,7 +135,18 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
return err;
exists = err;
if (exists && bind)
- return 0;
+ return ACT_P_BOUND;
+
+ if (tb[TCA_MIRRED_BLOCKID] && parm->ifindex) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Cannot specify Block ID and dev simultaneously");
+ if (exists)
+ tcf_idr_release(*a, bind);
+ else
+ tcf_idr_cleanup(tn, index);
+
+ return -EINVAL;
+ }
switch (parm->eaction) {
case TCA_EGRESS_MIRROR:
@@ -142,9 +164,10 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
}
if (!exists) {
- if (!parm->ifindex) {
+ if (!parm->ifindex && !tb[TCA_MIRRED_BLOCKID]) {
tcf_idr_cleanup(tn, index);
- NL_SET_ERR_MSG_MOD(extack, "Specified device does not exist");
+ NL_SET_ERR_MSG_MOD(extack,
+ "Must specify device or block");
return -EINVAL;
}
ret = tcf_idr_create_from_flags(tn, index, est, a,
@@ -170,7 +193,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
spin_lock_bh(&m->tcf_lock);
if (parm->ifindex) {
- struct net_device *odev, *ndev;
+ struct net_device *ndev;
ndev = dev_get_by_index(net, parm->ifindex);
if (!ndev) {
@@ -179,11 +202,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
goto put_chain;
}
mac_header_xmit = dev_is_mac_header_xmit(ndev);
- odev = rcu_replace_pointer(m->tcfm_dev, ndev,
- lockdep_is_held(&m->tcf_lock));
- netdev_put(odev, &m->tcfm_dev_tracker);
+ tcf_mirred_replace_dev(m, ndev);
netdev_tracker_alloc(ndev, &m->tcfm_dev_tracker, GFP_ATOMIC);
m->tcfm_mac_header_xmit = mac_header_xmit;
+ m->tcfm_blockid = 0;
+ } else if (tb[TCA_MIRRED_BLOCKID]) {
+ tcf_mirred_replace_dev(m, NULL);
+ m->tcfm_mac_header_xmit = false;
+ m->tcfm_blockid = nla_get_u32(tb[TCA_MIRRED_BLOCKID]);
}
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
m->tcfm_eaction = parm->eaction;
@@ -206,18 +232,14 @@ release_idr:
return err;
}
-static bool is_mirred_nested(void)
-{
- return unlikely(__this_cpu_read(mirred_nest_level) > 1);
-}
-
-static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
+static int
+tcf_mirred_forward(bool at_ingress, bool want_ingress, struct sk_buff *skb)
{
int err;
if (!want_ingress)
err = tcf_dev_queue_xmit(skb, dev_queue_xmit);
- else if (is_mirred_nested())
+ else if (!at_ingress)
err = netif_rx(skb);
else
err = netif_receive_skb(skb);
@@ -225,110 +247,213 @@ static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
return err;
}
-TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
- const struct tc_action *a,
- struct tcf_result *res)
+static int tcf_mirred_to_dev(struct sk_buff *skb, struct tcf_mirred *m,
+ struct net_device *dev,
+ const bool m_mac_header_xmit, int m_eaction,
+ int retval)
{
- struct tcf_mirred *m = to_mirred(a);
- struct sk_buff *skb2 = skb;
- bool m_mac_header_xmit;
- struct net_device *dev;
- unsigned int nest_level;
- int retval, err = 0;
- bool use_reinsert;
+ struct sk_buff *skb_to_send = skb;
bool want_ingress;
bool is_redirect;
bool expects_nh;
bool at_ingress;
- int m_eaction;
+ bool dont_clone;
int mac_len;
bool at_nh;
+ int err;
- nest_level = __this_cpu_inc_return(mirred_nest_level);
- if (unlikely(nest_level > MIRRED_NEST_LIMIT)) {
- net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n",
- netdev_name(skb->dev));
- __this_cpu_dec(mirred_nest_level);
- return TC_ACT_SHOT;
- }
-
- tcf_lastuse_update(&m->tcf_tm);
- tcf_action_update_bstats(&m->common, skb);
-
- m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
- m_eaction = READ_ONCE(m->tcfm_eaction);
- retval = READ_ONCE(m->tcf_action);
- dev = rcu_dereference_bh(m->tcfm_dev);
- if (unlikely(!dev)) {
- pr_notice_once("tc mirred: target device is gone\n");
- goto out;
- }
-
+ is_redirect = tcf_mirred_is_act_redirect(m_eaction);
if (unlikely(!(dev->flags & IFF_UP)) || !netif_carrier_ok(dev)) {
net_notice_ratelimited("tc mirred to Houston: device %s is down\n",
dev->name);
- goto out;
+ goto err_cant_do;
}
/* we could easily avoid the clone only if called by ingress and clsact;
* since we can't easily detect the clsact caller, skip clone only for
* ingress - that covers the TC S/W datapath.
*/
- is_redirect = tcf_mirred_is_act_redirect(m_eaction);
at_ingress = skb_at_tc_ingress(skb);
- use_reinsert = at_ingress && is_redirect &&
- tcf_mirred_can_reinsert(retval);
- if (!use_reinsert) {
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (!skb2)
- goto out;
+ dont_clone = skb_at_tc_ingress(skb) && is_redirect &&
+ tcf_mirred_can_reinsert(retval);
+ if (!dont_clone) {
+ skb_to_send = skb_clone(skb, GFP_ATOMIC);
+ if (!skb_to_send)
+ goto err_cant_do;
}
want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
/* All mirred/redirected skbs should clear previous ct info */
- nf_reset_ct(skb2);
+ nf_reset_ct(skb_to_send);
if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */
- skb_dst_drop(skb2);
+ skb_dst_drop(skb_to_send);
expects_nh = want_ingress || !m_mac_header_xmit;
at_nh = skb->data == skb_network_header(skb);
if (at_nh != expects_nh) {
- mac_len = skb_at_tc_ingress(skb) ? skb->mac_len :
+ mac_len = at_ingress ? skb->mac_len :
skb_network_offset(skb);
if (expects_nh) {
/* target device/action expect data at nh */
- skb_pull_rcsum(skb2, mac_len);
+ skb_pull_rcsum(skb_to_send, mac_len);
} else {
/* target device/action expect data at mac */
- skb_push_rcsum(skb2, mac_len);
+ skb_push_rcsum(skb_to_send, mac_len);
}
}
- skb2->skb_iif = skb->dev->ifindex;
- skb2->dev = dev;
+ skb_to_send->skb_iif = skb->dev->ifindex;
+ skb_to_send->dev = dev;
- /* mirror is always swallowed */
if (is_redirect) {
- skb_set_redirected(skb2, skb2->tc_at_ingress);
-
- /* let's the caller reinsert the packet, if possible */
- if (use_reinsert) {
- err = tcf_mirred_forward(want_ingress, skb);
- if (err)
- tcf_action_inc_overlimit_qstats(&m->common);
- __this_cpu_dec(mirred_nest_level);
- return TC_ACT_CONSUMED;
- }
+ if (skb == skb_to_send)
+ retval = TC_ACT_CONSUMED;
+
+ skb_set_redirected(skb_to_send, skb_to_send->tc_at_ingress);
+
+ err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send);
+ } else {
+ err = tcf_mirred_forward(at_ingress, want_ingress, skb_to_send);
+ }
+ if (err)
+ tcf_action_inc_overlimit_qstats(&m->common);
+
+ return retval;
+
+err_cant_do:
+ if (is_redirect)
+ retval = TC_ACT_SHOT;
+ tcf_action_inc_overlimit_qstats(&m->common);
+ return retval;
+}
+
+static int tcf_blockcast_redir(struct sk_buff *skb, struct tcf_mirred *m,
+ struct tcf_block *block, int m_eaction,
+ const u32 exception_ifindex, int retval)
+{
+ struct net_device *dev_prev = NULL;
+ struct net_device *dev = NULL;
+ unsigned long index;
+ int mirred_eaction;
+
+ mirred_eaction = tcf_mirred_act_wants_ingress(m_eaction) ?
+ TCA_INGRESS_MIRROR : TCA_EGRESS_MIRROR;
+
+ xa_for_each(&block->ports, index, dev) {
+ if (index == exception_ifindex)
+ continue;
+
+ if (!dev_prev)
+ goto assign_prev;
+
+ tcf_mirred_to_dev(skb, m, dev_prev,
+ dev_is_mac_header_xmit(dev),
+ mirred_eaction, retval);
+assign_prev:
+ dev_prev = dev;
}
- err = tcf_mirred_forward(want_ingress, skb2);
- if (err) {
-out:
+ if (dev_prev)
+ return tcf_mirred_to_dev(skb, m, dev_prev,
+ dev_is_mac_header_xmit(dev_prev),
+ m_eaction, retval);
+
+ return retval;
+}
+
+static int tcf_blockcast_mirror(struct sk_buff *skb, struct tcf_mirred *m,
+ struct tcf_block *block, int m_eaction,
+ const u32 exception_ifindex, int retval)
+{
+ struct net_device *dev = NULL;
+ unsigned long index;
+
+ xa_for_each(&block->ports, index, dev) {
+ if (index == exception_ifindex)
+ continue;
+
+ tcf_mirred_to_dev(skb, m, dev,
+ dev_is_mac_header_xmit(dev),
+ m_eaction, retval);
+ }
+
+ return retval;
+}
+
+static int tcf_blockcast(struct sk_buff *skb, struct tcf_mirred *m,
+ const u32 blockid, struct tcf_result *res,
+ int retval)
+{
+ const u32 exception_ifindex = skb->dev->ifindex;
+ struct tcf_block *block;
+ bool is_redirect;
+ int m_eaction;
+
+ m_eaction = READ_ONCE(m->tcfm_eaction);
+ is_redirect = tcf_mirred_is_act_redirect(m_eaction);
+
+ /* we are already under rcu protection, so can call block lookup
+ * directly.
+ */
+ block = tcf_block_lookup(dev_net(skb->dev), blockid);
+ if (!block || xa_empty(&block->ports)) {
+ tcf_action_inc_overlimit_qstats(&m->common);
+ return retval;
+ }
+
+ if (is_redirect)
+ return tcf_blockcast_redir(skb, m, block, m_eaction,
+ exception_ifindex, retval);
+
+ /* If it's not redirect, it is mirror */
+ return tcf_blockcast_mirror(skb, m, block, m_eaction, exception_ifindex,
+ retval);
+}
+
+TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
+ const struct tc_action *a,
+ struct tcf_result *res)
+{
+ struct tcf_mirred *m = to_mirred(a);
+ int retval = READ_ONCE(m->tcf_action);
+ unsigned int nest_level;
+ bool m_mac_header_xmit;
+ struct net_device *dev;
+ int m_eaction;
+ u32 blockid;
+
+ nest_level = __this_cpu_inc_return(mirred_nest_level);
+ if (unlikely(nest_level > MIRRED_NEST_LIMIT)) {
+ net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n",
+ netdev_name(skb->dev));
+ retval = TC_ACT_SHOT;
+ goto dec_nest_level;
+ }
+
+ tcf_lastuse_update(&m->tcf_tm);
+ tcf_action_update_bstats(&m->common, skb);
+
+ blockid = READ_ONCE(m->tcfm_blockid);
+ if (blockid) {
+ retval = tcf_blockcast(skb, m, blockid, res, retval);
+ goto dec_nest_level;
+ }
+
+ dev = rcu_dereference_bh(m->tcfm_dev);
+ if (unlikely(!dev)) {
+ pr_notice_once("tc mirred: target device is gone\n");
tcf_action_inc_overlimit_qstats(&m->common);
- if (tcf_mirred_is_act_redirect(m_eaction))
- retval = TC_ACT_SHOT;
+ goto dec_nest_level;
}
+
+ m_mac_header_xmit = READ_ONCE(m->tcfm_mac_header_xmit);
+ m_eaction = READ_ONCE(m->tcfm_eaction);
+
+ retval = tcf_mirred_to_dev(skb, m, dev, m_mac_header_xmit, m_eaction,
+ retval);
+
+dec_nest_level:
__this_cpu_dec(mirred_nest_level);
return retval;
@@ -356,6 +481,7 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
};
struct net_device *dev;
struct tcf_t t;
+ u32 blockid;
spin_lock_bh(&m->tcf_lock);
opt.action = m->tcf_action;
@@ -367,6 +493,10 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind,
if (nla_put(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
+ blockid = m->tcfm_blockid;
+ if (blockid && nla_put_u32(skb, TCA_MIRRED_BLOCKID, blockid))
+ goto nla_put_failure;
+
tcf_tm_dump(&t, &m->tcf_tm);
if (nla_put_64bit(skb, TCA_MIRRED_TM, sizeof(t), &t, TCA_MIRRED_PAD))
goto nla_put_failure;
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index 1010dc632874..34b8edb6cc77 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -195,7 +195,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
return err;
exists = err;
if (exists && bind)
- return 0;
+ return ACT_P_BOUND;
if (!exists) {
ret = tcf_idr_create(tn, index, est, a, &act_mpls_ops, bind,
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 4184af5abbf3..a180e724634e 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -69,7 +69,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
ret = ACT_P_CREATED;
} else if (err > 0) {
if (bind)
- return 0;
+ return ACT_P_BOUND;
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 1ef8fcfa9997..2ef22969f274 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -202,7 +202,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
ret = ACT_P_CREATED;
} else if (err > 0) {
if (bind)
- return 0;
+ return ACT_P_BOUND;
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
ret = -EEXIST;
goto out_release;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index f3121c5a85e9..e119b4a3db9f 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -77,7 +77,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
return err;
exists = err;
if (exists && bind)
- return 0;
+ return ACT_P_BOUND;
if (!exists) {
ret = tcf_idr_create(tn, index, NULL, a,
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 4c670e7568dc..c5c61efe6db4 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -66,7 +66,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
return err;
exists = err;
if (exists && bind)
- return 0;
+ return ACT_P_BOUND;
if (!exists) {
ret = tcf_idr_create(tn, index, est, a,
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 4b84514534f3..0a3e92888295 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -118,7 +118,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
return err;
exists = err;
if (exists && bind)
- return 0;
+ return ACT_P_BOUND;
if (tb[TCA_DEF_DATA] == NULL) {
if (exists)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index ce7008cf291c..754f78b35bb8 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -209,7 +209,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
return err;
exists = err;
if (exists && bind)
- return 0;
+ return ACT_P_BOUND;
if (!flags) {
if (exists)
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index dffa990a9629..bcb673ab0008 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -157,7 +157,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
return err;
exists = err;
if (exists && bind)
- return 0;
+ return ACT_P_BOUND;
if (!lflags) {
if (exists)
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 0c8aa7e686ea..300b08aa8283 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -401,7 +401,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
return err;
exists = err;
if (exists && bind)
- return 0;
+ return ACT_P_BOUND;
switch (parm->t_action) {
case TCA_TUNNEL_KEY_ACT_RELEASE:
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 0251442f5f29..836183011a7c 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -151,7 +151,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
return err;
exists = err;
if (exists && bind)
- return 0;
+ return ACT_P_BOUND;
switch (parm->v_action) {
case TCA_VLAN_ACT_POP:
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 1976bd163986..ff3d396a65aa 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -531,6 +531,7 @@ static void tcf_block_destroy(struct tcf_block *block)
{
mutex_destroy(&block->lock);
mutex_destroy(&block->proto_destroy_lock);
+ xa_destroy(&block->ports);
kfree_rcu(block, rcu);
}
@@ -650,7 +651,7 @@ static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
void *tmplt_priv, u32 chain_index,
struct tcf_block *block, struct sk_buff *oskb,
- u32 seq, u16 flags, bool unicast);
+ u32 seq, u16 flags);
static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
bool explicitly_created)
@@ -685,8 +686,7 @@ static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
if (non_act_refcnt == chain->explicitly_created && !by_act) {
if (non_act_refcnt == 0)
tc_chain_notify_delete(tmplt_ops, tmplt_priv,
- chain->index, block, NULL, 0, 0,
- false);
+ chain->index, block, NULL, 0, 0);
/* Last reference to chain, no need to lock. */
chain->flushing = false;
}
@@ -1003,6 +1003,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
refcount_set(&block->refcnt, 1);
block->net = net;
block->index = block_index;
+ xa_init(&block->ports);
/* Don't store q pointer for blocks which are shared */
if (!tcf_block_shared(block))
@@ -1010,12 +1011,13 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
return block;
}
-static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
+struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
{
struct tcf_net *tn = net_generic(net, tcf_net_id);
return idr_find(&tn->idr, block_index);
}
+EXPORT_SYMBOL(tcf_block_lookup);
static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
{
@@ -1422,10 +1424,19 @@ static void tcf_block_owner_del(struct tcf_block *block,
WARN_ON(1);
}
+static bool tcf_block_tracks_dev(struct tcf_block *block,
+ struct tcf_block_ext_info *ei)
+{
+ return tcf_block_shared(block) &&
+ (ei->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS ||
+ ei->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS);
+}
+
int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
struct tcf_block_ext_info *ei,
struct netlink_ext_ack *extack)
{
+ struct net_device *dev = qdisc_dev(q);
struct net *net = qdisc_net(q);
struct tcf_block *block = NULL;
int err;
@@ -1459,9 +1470,18 @@ int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
if (err)
goto err_block_offload_bind;
+ if (tcf_block_tracks_dev(block, ei)) {
+ err = xa_insert(&block->ports, dev->ifindex, dev, GFP_KERNEL);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "block dev insert failed");
+ goto err_dev_insert;
+ }
+ }
+
*p_block = block;
return 0;
+err_dev_insert:
err_block_offload_bind:
tcf_chain0_head_change_cb_del(block, ei);
err_chain0_head_change_cb_add:
@@ -1500,8 +1520,12 @@ EXPORT_SYMBOL(tcf_block_get);
void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
struct tcf_block_ext_info *ei)
{
+ struct net_device *dev = qdisc_dev(q);
+
if (!block)
return;
+ if (tcf_block_tracks_dev(block, ei))
+ xa_erase(&block->ports, dev->ifindex);
tcf_chain0_head_change_cb_del(block, ei);
tcf_block_owner_del(block, q, ei->binder_type);
@@ -1536,6 +1560,9 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
chain_prev = chain,
chain = __tcf_get_next_chain(block, chain),
tcf_chain_put(chain_prev)) {
+ if (chain->tmplt_ops && add)
+ chain->tmplt_ops->tmplt_reoffload(chain, true, cb,
+ cb_priv);
for (tp = __tcf_get_next_proto(chain, NULL); tp;
tp_prev = tp,
tp = __tcf_get_next_proto(chain, tp),
@@ -1551,6 +1578,9 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
goto err_playback_remove;
}
}
+ if (chain->tmplt_ops && !add)
+ chain->tmplt_ops->tmplt_reoffload(chain, false, cb,
+ cb_priv);
}
return 0;
@@ -1658,7 +1688,6 @@ static inline int __tcf_classify(struct sk_buff *skb,
int act_index,
u32 *last_executed_chain)
{
- u32 orig_reason = res->drop_reason;
#ifdef CONFIG_NET_CLS_ACT
const int max_reclassify_loop = 16;
const struct tcf_proto *first_tp;
@@ -1683,13 +1712,15 @@ reclassify:
*/
if (unlikely(n->tp != tp || n->tp->chain != n->chain ||
!tp->ops->get_exts)) {
- tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR);
+ tcf_set_drop_reason(skb,
+ SKB_DROP_REASON_TC_COOKIE_ERROR);
return TC_ACT_SHOT;
}
exts = tp->ops->get_exts(tp, n->handle);
if (unlikely(!exts || n->exts != exts)) {
- tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR);
+ tcf_set_drop_reason(skb,
+ SKB_DROP_REASON_TC_COOKIE_ERROR);
return TC_ACT_SHOT;
}
@@ -1713,18 +1744,13 @@ reclassify:
goto reset;
}
#endif
- if (err >= 0) {
- /* Policy drop or drop reason is over-written by
- * classifiers with a bogus value(0) */
- if (err == TC_ACT_SHOT &&
- res->drop_reason == SKB_NOT_DROPPED_YET)
- tcf_set_drop_reason(res, orig_reason);
+ if (err >= 0)
return err;
- }
}
if (unlikely(n)) {
- tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR);
+ tcf_set_drop_reason(skb,
+ SKB_DROP_REASON_TC_COOKIE_ERROR);
return TC_ACT_SHOT;
}
@@ -1736,7 +1762,8 @@ reset:
tp->chain->block->index,
tp->prio & 0xffff,
ntohs(tp->protocol));
- tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR);
+ tcf_set_drop_reason(skb,
+ SKB_DROP_REASON_TC_RECLASSIFY_LOOP);
return TC_ACT_SHOT;
}
@@ -1774,7 +1801,8 @@ int tcf_classify(struct sk_buff *skb,
n = tcf_exts_miss_cookie_lookup(ext->act_miss_cookie,
&act_index);
if (!n) {
- tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR);
+ tcf_set_drop_reason(skb,
+ SKB_DROP_REASON_TC_COOKIE_ERROR);
return TC_ACT_SHOT;
}
@@ -1785,7 +1813,9 @@ int tcf_classify(struct sk_buff *skb,
fchain = tcf_chain_lookup_rcu(block, chain);
if (!fchain) {
- tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR);
+ tcf_set_drop_reason(skb,
+ SKB_DROP_REASON_TC_CHAIN_NOTFOUND);
+
return TC_ACT_SHOT;
}
@@ -1807,10 +1837,9 @@ int tcf_classify(struct sk_buff *skb,
ext = tc_skb_ext_alloc(skb);
if (WARN_ON_ONCE(!ext)) {
- tcf_set_drop_reason(res, SKB_DROP_REASON_TC_ERROR);
+ tcf_set_drop_reason(skb, SKB_DROP_REASON_NOMEM);
return TC_ACT_SHOT;
}
-
ext->chain = last_executed_chain;
ext->mru = cb->mru;
ext->post_ct = cb->post_ct;
@@ -2053,6 +2082,9 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
int err = 0;
+ if (!unicast && !rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
+ return 0;
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
@@ -2075,13 +2107,16 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
struct tcf_block *block, struct Qdisc *q,
- u32 parent, void *fh, bool unicast, bool *last,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ u32 parent, void *fh, bool *last, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
int err;
+ if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
+ return tp->ops->delete(tp, fh, last, rtnl_held, extack);
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
@@ -2100,11 +2135,8 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
return err;
}
- if (unicast)
- err = rtnl_unicast(skb, net, portid);
- else
- err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
@@ -2499,9 +2531,8 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
} else {
bool last;
- err = tfilter_del_notify(net, skb, n, tp, block,
- q, parent, fh, false, &last,
- rtnl_held, extack);
+ err = tfilter_del_notify(net, skb, n, tp, block, q, parent, fh,
+ &last, rtnl_held, extack);
if (err)
goto errout;
@@ -2732,6 +2763,7 @@ errout:
}
static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = {
+ [TCA_CHAIN] = { .type = NLA_U32 },
[TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE),
};
@@ -2906,6 +2938,9 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
struct sk_buff *skb;
int err = 0;
+ if (!unicast && !rtnl_notify_needed(net, flags, RTNLGRP_TC))
+ return 0;
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
@@ -2929,12 +2964,15 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
void *tmplt_priv, u32 chain_index,
struct tcf_block *block, struct sk_buff *oskb,
- u32 seq, u16 flags, bool unicast)
+ u32 seq, u16 flags)
{
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
struct net *net = block->net;
struct sk_buff *skb;
+ if (!rtnl_notify_needed(net, flags, RTNLGRP_TC))
+ return 0;
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
@@ -2945,9 +2983,6 @@ static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
return -EINVAL;
}
- if (unicast)
- return rtnl_unicast(skb, net, portid);
-
return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
}
@@ -2971,7 +3006,8 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
ops = tcf_proto_lookup_ops(name, true, extack);
if (IS_ERR(ops))
return PTR_ERR(ops);
- if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
+ if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump ||
+ !ops->tmplt_reoffload) {
NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
module_put(ops->owner);
return -EOPNOTSUPP;
@@ -3293,12 +3329,11 @@ int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **
if (exts->police && tb[exts->police]) {
struct tc_action_ops *a_o;
- a_o = tc_action_load_ops(tb[exts->police], true,
- !(flags & TCA_ACT_FLAGS_NO_RTNL),
+ flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND;
+ a_o = tc_action_load_ops(tb[exts->police], flags,
extack);
if (IS_ERR(a_o))
return PTR_ERR(a_o);
- flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND;
act = tcf_action_init_1(net, tp, tb[exts->police],
rate_tlv, a_o, init_res, flags,
extack);
@@ -3309,7 +3344,7 @@ int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **
act->type = exts->type = TCA_OLD_COMPAT;
exts->actions[0] = act;
exts->nr_actions = 1;
- tcf_idr_insert_many(exts->actions);
+ tcf_idr_insert_many(exts->actions, init_res);
} else if (exts->action && tb[exts->action]) {
int err;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index e5314a31f75a..6ee7064c82fc 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -2460,8 +2460,11 @@ unbind_filter:
}
errout_idr:
- if (!fold)
+ if (!fold) {
+ spin_lock(&tp->lock);
idr_remove(&head->handle_idr, fnew->handle);
+ spin_unlock(&tp->lock);
+ }
__fl_put(fnew);
errout_tb:
kfree(tb);
@@ -2721,6 +2724,28 @@ static void fl_tmplt_destroy(void *tmplt_priv)
kfree(tmplt);
}
+static void fl_tmplt_reoffload(struct tcf_chain *chain, bool add,
+ flow_setup_cb_t *cb, void *cb_priv)
+{
+ struct fl_flow_tmplt *tmplt = chain->tmplt_priv;
+ struct flow_cls_offload cls_flower = {};
+
+ cls_flower.rule = flow_rule_alloc(0);
+ if (!cls_flower.rule)
+ return;
+
+ cls_flower.common.chain_index = chain->index;
+ cls_flower.command = add ? FLOW_CLS_TMPLT_CREATE :
+ FLOW_CLS_TMPLT_DESTROY;
+ cls_flower.cookie = (unsigned long) tmplt;
+ cls_flower.rule->match.dissector = &tmplt->dissector;
+ cls_flower.rule->match.mask = &tmplt->mask;
+ cls_flower.rule->match.key = &tmplt->dummy_key;
+
+ cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
+ kfree(cls_flower.rule);
+}
+
static int fl_dump_key_val(struct sk_buff *skb,
void *val, int val_type,
void *mask, int mask_type, int len)
@@ -3628,6 +3653,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.bind_class = fl_bind_class,
.tmplt_create = fl_tmplt_create,
.tmplt_destroy = fl_tmplt_destroy,
+ .tmplt_reoffload = fl_tmplt_reoffload,
.tmplt_dump = fl_tmplt_dump,
.get_exts = fl_get_exts,
.owner = THIS_MODULE,
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index d5bdfd4a7655..289e1755c26b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -71,7 +71,7 @@ struct tc_u_hnode {
struct tc_u_hnode __rcu *next;
u32 handle;
u32 prio;
- int refcnt;
+ refcount_t refcnt;
unsigned int divisor;
struct idr handle_idr;
bool is_root;
@@ -86,7 +86,7 @@ struct tc_u_hnode {
struct tc_u_common {
struct tc_u_hnode __rcu *hlist;
void *ptr;
- int refcnt;
+ refcount_t refcnt;
struct idr handle_idr;
struct hlist_node hnode;
long knodes;
@@ -359,7 +359,7 @@ static int u32_init(struct tcf_proto *tp)
if (root_ht == NULL)
return -ENOBUFS;
- root_ht->refcnt++;
+ refcount_set(&root_ht->refcnt, 1);
root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
root_ht->prio = tp->prio;
root_ht->is_root = true;
@@ -371,18 +371,20 @@ static int u32_init(struct tcf_proto *tp)
kfree(root_ht);
return -ENOBUFS;
}
+ refcount_set(&tp_c->refcnt, 1);
tp_c->ptr = key;
INIT_HLIST_NODE(&tp_c->hnode);
idr_init(&tp_c->handle_idr);
hlist_add_head(&tp_c->hnode, tc_u_hash(key));
+ } else {
+ refcount_inc(&tp_c->refcnt);
}
- tp_c->refcnt++;
RCU_INIT_POINTER(root_ht->next, tp_c->hlist);
rcu_assign_pointer(tp_c->hlist, root_ht);
- root_ht->refcnt++;
+ /* root_ht must be destroyed when tcf_proto is destroyed */
rcu_assign_pointer(tp->root, root_ht);
tp->data = tp_c;
return 0;
@@ -393,7 +395,7 @@ static void __u32_destroy_key(struct tc_u_knode *n)
struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
tcf_exts_destroy(&n->exts);
- if (ht && --ht->refcnt == 0)
+ if (ht && refcount_dec_and_test(&ht->refcnt))
kfree(ht);
kfree(n);
}
@@ -601,8 +603,6 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
struct tc_u_hnode __rcu **hn;
struct tc_u_hnode *phn;
- WARN_ON(--ht->refcnt);
-
u32_clear_hnode(tp, ht, extack);
hn = &tp_c->hlist;
@@ -630,10 +630,10 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
WARN_ON(root_ht == NULL);
- if (root_ht && --root_ht->refcnt == 1)
+ if (root_ht && refcount_dec_and_test(&root_ht->refcnt))
u32_destroy_hnode(tp, root_ht, extack);
- if (--tp_c->refcnt == 0) {
+ if (refcount_dec_and_test(&tp_c->refcnt)) {
struct tc_u_hnode *ht;
hlist_del(&tp_c->hnode);
@@ -645,7 +645,7 @@ static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
/* u32_destroy_key() will later free ht for us, if it's
* still referenced by some knode
*/
- if (--ht->refcnt == 0)
+ if (refcount_dec_and_test(&ht->refcnt))
kfree_rcu(ht, rcu);
}
@@ -674,7 +674,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
return -EINVAL;
}
- if (ht->refcnt == 1) {
+ if (refcount_dec_if_one(&ht->refcnt)) {
u32_destroy_hnode(tp, ht, extack);
} else {
NL_SET_ERR_MSG_MOD(extack, "Can not delete in-use filter");
@@ -682,7 +682,7 @@ static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
}
out:
- *last = tp_c->refcnt == 1 && tp_c->knodes == 0;
+ *last = refcount_read(&tp_c->refcnt) == 1 && tp_c->knodes == 0;
return ret;
}
@@ -766,14 +766,14 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
NL_SET_ERR_MSG_MOD(extack, "Not linking to root node");
return -EINVAL;
}
- ht_down->refcnt++;
+ refcount_inc(&ht_down->refcnt);
}
ht_old = rtnl_dereference(n->ht_down);
rcu_assign_pointer(n->ht_down, ht_down);
if (ht_old)
- ht_old->refcnt--;
+ refcount_dec(&ht_old->refcnt);
}
if (ifindex >= 0)
@@ -852,7 +852,7 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
/* bump reference count as long as we hold pointer to structure */
if (ht)
- ht->refcnt++;
+ refcount_inc(&ht->refcnt);
return new;
}
@@ -932,7 +932,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
ht_old = rtnl_dereference(n->ht_down);
if (ht_old)
- ht_old->refcnt++;
+ refcount_inc(&ht_old->refcnt);
}
__u32_destroy_key(new);
return err;
@@ -980,7 +980,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return err;
}
}
- ht->refcnt = 1;
+ refcount_set(&ht->refcnt, 1);
ht->divisor = divisor;
ht->handle = handle;
ht->prio = tp->prio;
diff --git a/net/sched/em_canid.c b/net/sched/em_canid.c
index 5ea84decec19..5337bc462755 100644
--- a/net/sched/em_canid.c
+++ b/net/sched/em_canid.c
@@ -222,6 +222,7 @@ static void __exit exit_em_canid(void)
tcf_em_unregister(&em_canid_ops);
}
+MODULE_DESCRIPTION("ematch classifier to match CAN IDs embedded in skb CAN frames");
MODULE_LICENSE("GPL");
module_init(init_em_canid);
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
index f17b049ea530..c90ad7ea26b4 100644
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -87,6 +87,7 @@ static void __exit exit_em_cmp(void)
tcf_em_unregister(&em_cmp_ops);
}
+MODULE_DESCRIPTION("ematch classifier for basic data types(8/16/32 bit) against skb data");
MODULE_LICENSE("GPL");
module_init(init_em_cmp);
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 09d8afd04a2a..8996c73c9779 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -1006,6 +1006,7 @@ static void __exit exit_em_meta(void)
tcf_em_unregister(&em_meta_ops);
}
+MODULE_DESCRIPTION("ematch classifier for various internal kernel metadata, skb metadata and sk metadata");
MODULE_LICENSE("GPL");
module_init(init_em_meta);
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index a83b237cbeb0..4f9f21a05d5e 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -68,6 +68,7 @@ static void __exit exit_em_nbyte(void)
tcf_em_unregister(&em_nbyte_ops);
}
+MODULE_DESCRIPTION("ematch classifier for arbitrary skb multi-bytes");
MODULE_LICENSE("GPL");
module_init(init_em_nbyte);
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index f176afb70559..420c66203b17 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -147,6 +147,7 @@ static void __exit exit_em_text(void)
tcf_em_unregister(&em_text_ops);
}
+MODULE_DESCRIPTION("ematch classifier for embedded text in skbs");
MODULE_LICENSE("GPL");
module_init(init_em_text);
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index 71b070da0437..fdec4db5ec89 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -52,6 +52,7 @@ static void __exit exit_em_u32(void)
tcf_em_unregister(&em_u32_ops);
}
+MODULE_DESCRIPTION("ematch skb classifier using 32 bit chunks of data");
MODULE_LICENSE("GPL");
module_init(init_em_u32);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index e9eaf637220e..36b025cc4fd2 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1003,6 +1003,32 @@ static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
return false;
}
+static int qdisc_get_notify(struct net *net, struct sk_buff *oskb,
+ struct nlmsghdr *n, u32 clid, struct Qdisc *q,
+ struct netlink_ext_ack *extack)
+{
+ struct sk_buff *skb;
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ if (!tc_qdisc_dump_ignore(q, false)) {
+ if (tc_fill_qdisc(skb, q, clid, portid, n->nlmsg_seq, 0,
+ RTM_NEWQDISC, extack) < 0)
+ goto err_out;
+ }
+
+ if (skb->len)
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
+
+err_out:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
static int qdisc_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, u32 clid,
struct Qdisc *old, struct Qdisc *new,
@@ -1011,6 +1037,9 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
+ return 0;
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
@@ -1542,7 +1571,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
if (err != 0)
return err;
} else {
- qdisc_notify(net, skb, n, clid, NULL, q, NULL);
+ qdisc_get_notify(net, skb, n, clid, q, NULL);
}
return 0;
}
@@ -1936,6 +1965,9 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC))
+ return 0;
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
@@ -1949,6 +1981,27 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
n->nlmsg_flags & NLM_F_ECHO);
}
+static int tclass_get_notify(struct net *net, struct sk_buff *oskb,
+ struct nlmsghdr *n, struct Qdisc *q,
+ unsigned long cl, struct netlink_ext_ack *extack)
+{
+ struct sk_buff *skb;
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, RTM_NEWTCLASS,
+ extack) < 0) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
+}
+
static int tclass_del_notify(struct net *net,
const struct Qdisc_class_ops *cops,
struct sk_buff *oskb, struct nlmsghdr *n,
@@ -1962,14 +2015,18 @@ static int tclass_del_notify(struct net *net,
if (!cops->delete)
return -EOPNOTSUPP;
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb)
- return -ENOBUFS;
+ if (rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) {
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
- if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
- RTM_DELTCLASS, extack) < 0) {
- kfree_skb(skb);
- return -EINVAL;
+ if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
+ RTM_DELTCLASS, extack) < 0) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+ } else {
+ skb = NULL;
}
err = cops->delete(q, cl, extack);
@@ -1978,8 +2035,8 @@ static int tclass_del_notify(struct net *net,
return err;
}
- err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
+ err = rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
return err;
}
@@ -2174,7 +2231,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
tc_bind_tclass(q, portid, clid, 0);
goto out;
case RTM_GETTCLASS:
- err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS, extack);
+ err = tclass_get_notify(net, skb, n, q, cl, extack);
goto out;
default:
err = -EINVAL;
diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c
index 9a0b85190a2c..beece8e82c23 100644
--- a/net/sched/sch_cbs.c
+++ b/net/sched/sch_cbs.c
@@ -57,6 +57,8 @@
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
+#include <linux/units.h>
+
#include <net/netevent.h>
#include <net/netlink.h>
#include <net/sch_generic.h>
@@ -65,8 +67,6 @@
static LIST_HEAD(cbs_list);
static DEFINE_SPINLOCK(cbs_list_lock);
-#define BYTES_PER_KBIT (1000LL / 8)
-
struct cbs_sched_data {
bool offload;
int queue;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 4195a4bc26ca..9b3e9262040b 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -522,8 +522,9 @@ static void dev_watchdog(struct timer_list *t)
if (unlikely(timedout_ms)) {
trace_net_dev_xmit_timeout(dev, i);
- WARN_ONCE(1, "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out %u ms\n",
- dev->name, netdev_drivername(dev), i, timedout_ms);
+ netdev_crit(dev, "NETDEV WATCHDOG: CPU: %d: transmit queue %u timed out %u ms\n",
+ raw_smp_processor_id(),
+ i, timedout_ms);
netif_freeze_queues(dev);
dev->netdev_ops->ndo_tx_timeout(dev, i);
netif_unfreeze_queues(dev);
@@ -1050,6 +1051,7 @@ static void qdisc_free_cb(struct rcu_head *head)
static void __qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
+ struct net_device *dev = qdisc_dev(qdisc);
#ifdef CONFIG_NET_SCHED
qdisc_hash_del(qdisc);
@@ -1060,11 +1062,12 @@ static void __qdisc_destroy(struct Qdisc *qdisc)
qdisc_reset(qdisc);
+
if (ops->destroy)
ops->destroy(qdisc);
module_put(ops->owner);
- netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker);
+ netdev_put(dev, &qdisc->dev_tracker);
trace_qdisc_destroy(qdisc);
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index 7182c5a450fb..5c1652181805 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -38,6 +38,14 @@ void sctp_inq_init(struct sctp_inq *queue)
INIT_WORK(&queue->immediate, NULL);
}
+/* Properly release the chunk which is being worked on. */
+static inline void sctp_inq_chunk_free(struct sctp_chunk *chunk)
+{
+ if (chunk->head_skb)
+ chunk->skb = chunk->head_skb;
+ sctp_chunk_free(chunk);
+}
+
/* Release the memory associated with an SCTP inqueue. */
void sctp_inq_free(struct sctp_inq *queue)
{
@@ -53,7 +61,7 @@ void sctp_inq_free(struct sctp_inq *queue)
* free it as well.
*/
if (queue->in_progress) {
- sctp_chunk_free(queue->in_progress);
+ sctp_inq_chunk_free(queue->in_progress);
queue->in_progress = NULL;
}
}
@@ -130,9 +138,7 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
goto new_skb;
}
- if (chunk->head_skb)
- chunk->skb = chunk->head_skb;
- sctp_chunk_free(chunk);
+ sctp_inq_chunk_free(chunk);
chunk = queue->in_progress = NULL;
} else {
/* Nothing to do. Next chunk in the packet, please. */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 7f89e43154c0..6b9fcdb0952a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2099,6 +2099,13 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
pr_debug("%s: sk:%p, msghdr:%p, len:%zd, flags:0x%x, addr_len:%p)\n",
__func__, sk, msg, len, flags, addr_len);
+ if (unlikely(flags & MSG_ERRQUEUE))
+ return inet_recv_error(sk, msg, len, addr_len);
+
+ if (sk_can_busy_loop(sk) &&
+ skb_queue_empty_lockless(&sk->sk_receive_queue))
+ sk_busy_loop(sk, flags & MSG_DONTWAIT);
+
lock_sock(sk);
if (sctp_style(sk, TCP) && !sctp_sstate(sk, ESTABLISHED) &&
@@ -9043,12 +9050,6 @@ struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, int *err)
if (sk->sk_shutdown & RCV_SHUTDOWN)
break;
- if (sk_can_busy_loop(sk)) {
- sk_busy_loop(sk, flags & MSG_DONTWAIT);
-
- if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
- continue;
- }
/* User doesn't want to wait. */
error = -EAGAIN;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 73eebddbbf41..0f53a5c6fd9d 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -677,8 +677,6 @@ static bool smc_isascii(char *hostname)
static void smc_conn_save_peer_info_fce(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *clc)
{
- struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
- (struct smc_clc_msg_accept_confirm_v2 *)clc;
struct smc_clc_first_contact_ext *fce;
int clc_v2_len;
@@ -687,17 +685,15 @@ static void smc_conn_save_peer_info_fce(struct smc_sock *smc,
return;
if (smc->conn.lgr->is_smcd) {
- memcpy(smc->conn.lgr->negotiated_eid, clc_v2->d1.eid,
+ memcpy(smc->conn.lgr->negotiated_eid, clc->d1.eid,
SMC_MAX_EID_LEN);
- clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
- d1);
+ clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm, d1);
} else {
- memcpy(smc->conn.lgr->negotiated_eid, clc_v2->r1.eid,
+ memcpy(smc->conn.lgr->negotiated_eid, clc->r1.eid,
SMC_MAX_EID_LEN);
- clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm_v2,
- r1);
+ clc_v2_len = offsetofend(struct smc_clc_msg_accept_confirm, r1);
}
- fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) + clc_v2_len);
+ fce = (struct smc_clc_first_contact_ext *)(((u8 *)clc) + clc_v2_len);
smc->conn.lgr->peer_os = fce->os_type;
smc->conn.lgr->peer_smc_release = fce->release;
if (smc_isascii(fce->hostname))
@@ -928,6 +924,7 @@ static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code)
smc->clcsock->file->private_data = smc->clcsock;
smc->clcsock->wq.fasync_list =
smc->sk.sk_socket->wq.fasync_list;
+ smc->sk.sk_socket->wq.fasync_list = NULL;
/* There might be some wait entries remaining
* in smc sk->sk_wq and they should be woken up
@@ -1048,7 +1045,8 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
{
int rc = SMC_CLC_DECL_NOSMCDDEV;
struct smcd_dev *smcd;
- int i = 1;
+ int i = 1, entry = 1;
+ bool is_virtual;
u16 chid;
if (smcd_indicated(ini->smc_type_v1))
@@ -1060,14 +1058,23 @@ static int smc_find_ism_v2_device_clnt(struct smc_sock *smc,
chid = smc_ism_get_chid(smcd);
if (!smc_find_ism_v2_is_unique_chid(chid, ini, i))
continue;
+ is_virtual = __smc_ism_is_virtual(chid);
if (!smc_pnet_is_pnetid_set(smcd->pnetid) ||
smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) {
+ if (is_virtual && entry == SMCD_CLC_MAX_V2_GID_ENTRIES)
+ /* It's the last GID-CHID entry left in CLC
+ * Proposal SMC-Dv2 extension, but a virtual
+ * ISM device will take two entries. So give
+ * up it and try the next potential ISM device.
+ */
+ continue;
ini->ism_dev[i] = smcd;
ini->ism_chid[i] = chid;
ini->is_smcd = true;
rc = 0;
i++;
- if (i > SMC_MAX_ISM_DEVS)
+ entry = is_virtual ? entry + 2 : entry + 1;
+ if (entry > SMCD_CLC_MAX_V2_GID_ENTRIES)
break;
}
}
@@ -1149,13 +1156,13 @@ static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc,
}
#define SMC_CLC_MAX_ACCEPT_LEN \
- (sizeof(struct smc_clc_msg_accept_confirm_v2) + \
+ (sizeof(struct smc_clc_msg_accept_confirm) + \
sizeof(struct smc_clc_first_contact_ext_v2x) + \
sizeof(struct smc_clc_msg_trail))
/* CLC handshake during connect */
static int smc_connect_clc(struct smc_sock *smc,
- struct smc_clc_msg_accept_confirm_v2 *aclc2,
+ struct smc_clc_msg_accept_confirm *aclc,
struct smc_init_info *ini)
{
int rc = 0;
@@ -1165,7 +1172,7 @@ static int smc_connect_clc(struct smc_sock *smc,
if (rc)
return rc;
/* receive SMC Accept CLC message */
- return smc_clc_wait_msg(smc, aclc2, SMC_CLC_MAX_ACCEPT_LEN,
+ return smc_clc_wait_msg(smc, aclc, SMC_CLC_MAX_ACCEPT_LEN,
SMC_CLC_ACCEPT, CLC_WAIT_TIME);
}
@@ -1201,10 +1208,8 @@ static int smc_connect_rdma_v2_prepare(struct smc_sock *smc,
struct smc_clc_msg_accept_confirm *aclc,
struct smc_init_info *ini)
{
- struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
- (struct smc_clc_msg_accept_confirm_v2 *)aclc;
struct smc_clc_first_contact_ext *fce =
- smc_get_clc_first_contact_ext(clc_v2, false);
+ smc_get_clc_first_contact_ext(aclc, false);
struct net *net = sock_net(&smc->sk);
int rc;
@@ -1327,10 +1332,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
}
if (aclc->hdr.version > SMC_V1) {
- struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
- (struct smc_clc_msg_accept_confirm_v2 *)aclc;
-
- eid = clc_v2->r1.eid;
+ eid = aclc->r1.eid;
if (ini->first_contact_local)
smc_fill_gid_list(link->lgr, &ini->smcrv2.gidlist,
link->smcibdev, link->gid);
@@ -1371,7 +1373,7 @@ connect_abort:
* Determine from the CHID of the received CLC ACCEPT the ISM device chosen.
*/
static int
-smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm_v2 *aclc,
+smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm *aclc,
struct smc_init_info *ini)
{
int i;
@@ -1398,12 +1400,9 @@ static int smc_connect_ism(struct smc_sock *smc,
ini->first_contact_peer = aclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK;
if (aclc->hdr.version == SMC_V2) {
- struct smc_clc_msg_accept_confirm_v2 *aclc_v2 =
- (struct smc_clc_msg_accept_confirm_v2 *)aclc;
-
if (ini->first_contact_peer) {
struct smc_clc_first_contact_ext *fce =
- smc_get_clc_first_contact_ext(aclc_v2, true);
+ smc_get_clc_first_contact_ext(aclc, true);
ini->release_nr = fce->release;
rc = smc_clc_clnt_v2x_features_validate(fce, ini);
@@ -1411,11 +1410,16 @@ static int smc_connect_ism(struct smc_sock *smc,
return rc;
}
- rc = smc_v2_determine_accepted_chid(aclc_v2, ini);
+ rc = smc_v2_determine_accepted_chid(aclc, ini);
if (rc)
return rc;
+
+ if (__smc_ism_is_virtual(ini->ism_chid[ini->ism_selected]))
+ ini->ism_peer_gid[ini->ism_selected].gid_ext =
+ ntohll(aclc->d1.gid_ext);
+ /* for non-virtual ISM devices, peer gid_ext remains 0. */
}
- ini->ism_peer_gid[ini->ism_selected] = ntohll(aclc->d0.gid);
+ ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid);
/* there is only one lgr role for SMC-D; use server lock */
mutex_lock(&smc_server_lgr_pending);
@@ -1437,12 +1441,8 @@ static int smc_connect_ism(struct smc_sock *smc,
smc_rx_init(smc);
smc_tx_init(smc);
- if (aclc->hdr.version > SMC_V1) {
- struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
- (struct smc_clc_msg_accept_confirm_v2 *)aclc;
-
- eid = clc_v2->d1.eid;
- }
+ if (aclc->hdr.version > SMC_V1)
+ eid = aclc->d1.eid;
rc = smc_clc_send_confirm(smc, ini->first_contact_local,
aclc->hdr.version, eid, ini);
@@ -1493,7 +1493,6 @@ static int smc_connect_check_aclc(struct smc_init_info *ini,
static int __smc_connect(struct smc_sock *smc)
{
u8 version = smc_ism_is_v2_capable() ? SMC_V2 : SMC_V1;
- struct smc_clc_msg_accept_confirm_v2 *aclc2;
struct smc_clc_msg_accept_confirm *aclc;
struct smc_init_info *ini = NULL;
u8 *buf = NULL;
@@ -1541,11 +1540,10 @@ static int __smc_connect(struct smc_sock *smc)
rc = SMC_CLC_DECL_MEM;
goto fallback;
}
- aclc2 = (struct smc_clc_msg_accept_confirm_v2 *)buf;
- aclc = (struct smc_clc_msg_accept_confirm *)aclc2;
+ aclc = (struct smc_clc_msg_accept_confirm *)buf;
/* perform CLC handshake */
- rc = smc_connect_clc(smc, aclc2, ini);
+ rc = smc_connect_clc(smc, aclc, ini);
if (rc) {
/* -EAGAIN on timeout, see tcp_recvmsg() */
if (rc == -EAGAIN) {
@@ -2106,7 +2104,8 @@ static bool smc_is_already_selected(struct smcd_dev *smcd,
/* check for ISM devices matching proposed ISM devices */
static void smc_check_ism_v2_match(struct smc_init_info *ini,
- u16 proposed_chid, u64 proposed_gid,
+ u16 proposed_chid,
+ struct smcd_gid *proposed_gid,
unsigned int *matches)
{
struct smcd_dev *smcd;
@@ -2118,7 +2117,11 @@ static void smc_check_ism_v2_match(struct smc_init_info *ini,
continue;
if (smc_ism_get_chid(smcd) == proposed_chid &&
!smc_ism_cantalk(proposed_gid, ISM_RESERVED_VLANID, smcd)) {
- ini->ism_peer_gid[*matches] = proposed_gid;
+ ini->ism_peer_gid[*matches].gid = proposed_gid->gid;
+ if (__smc_ism_is_virtual(proposed_chid))
+ ini->ism_peer_gid[*matches].gid_ext =
+ proposed_gid->gid_ext;
+ /* non-virtual ISM's peer gid_ext remains 0. */
ini->ism_dev[*matches] = smcd;
(*matches)++;
break;
@@ -2140,9 +2143,11 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
struct smc_clc_v2_extension *smc_v2_ext;
struct smc_clc_msg_smcd *pclc_smcd;
unsigned int matches = 0;
+ struct smcd_gid smcd_gid;
u8 smcd_version;
u8 *eid = NULL;
int i, rc;
+ u16 chid;
if (!(ini->smcd_version & SMC_V2) || !smcd_indicated(ini->smc_type_v2))
goto not_found;
@@ -2152,18 +2157,35 @@ static void smc_find_ism_v2_device_serv(struct smc_sock *new_smc,
smcd_v2_ext = smc_get_clc_smcd_v2_ext(smc_v2_ext);
mutex_lock(&smcd_dev_list.mutex);
- if (pclc_smcd->ism.chid)
+ if (pclc_smcd->ism.chid) {
/* check for ISM device matching proposed native ISM device */
+ smcd_gid.gid = ntohll(pclc_smcd->ism.gid);
+ smcd_gid.gid_ext = 0;
smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid),
- ntohll(pclc_smcd->ism.gid), &matches);
- for (i = 1; i <= smc_v2_ext->hdr.ism_gid_cnt; i++) {
+ &smcd_gid, &matches);
+ }
+ for (i = 0; i < smc_v2_ext->hdr.ism_gid_cnt; i++) {
/* check for ISM devices matching proposed non-native ISM
* devices
*/
- smc_check_ism_v2_match(ini,
- ntohs(smcd_v2_ext->gidchid[i - 1].chid),
- ntohll(smcd_v2_ext->gidchid[i - 1].gid),
- &matches);
+ smcd_gid.gid = ntohll(smcd_v2_ext->gidchid[i].gid);
+ smcd_gid.gid_ext = 0;
+ chid = ntohs(smcd_v2_ext->gidchid[i].chid);
+ if (__smc_ism_is_virtual(chid)) {
+ if ((i + 1) == smc_v2_ext->hdr.ism_gid_cnt ||
+ chid != ntohs(smcd_v2_ext->gidchid[i + 1].chid))
+ /* each virtual ISM device takes two GID-CHID
+ * entries and CHID of the second entry repeats
+ * that of the first entry.
+ *
+ * So check if the next GID-CHID entry exists
+ * and both two entries' CHIDs are the same.
+ */
+ continue;
+ smcd_gid.gid_ext =
+ ntohll(smcd_v2_ext->gidchid[++i].gid);
+ }
+ smc_check_ism_v2_match(ini, chid, &smcd_gid, &matches);
}
mutex_unlock(&smcd_dev_list.mutex);
@@ -2212,7 +2234,8 @@ static void smc_find_ism_v1_device_serv(struct smc_sock *new_smc,
if (!(ini->smcd_version & SMC_V1) || !smcd_indicated(ini->smc_type_v1))
goto not_found;
ini->is_smcd = true; /* prepare ISM check */
- ini->ism_peer_gid[0] = ntohll(pclc_smcd->ism.gid);
+ ini->ism_peer_gid[0].gid = ntohll(pclc_smcd->ism.gid);
+ ini->ism_peer_gid[0].gid_ext = 0;
rc = smc_find_ism_device(new_smc, ini);
if (rc)
goto not_found;
@@ -2461,7 +2484,7 @@ static void smc_listen_work(struct work_struct *work)
if (rc)
goto out_decl;
- rc = smc_clc_srv_v2x_features_validate(pclc, ini);
+ rc = smc_clc_srv_v2x_features_validate(new_smc, pclc, ini);
if (rc)
goto out_decl;
diff --git a/net/smc/smc.h b/net/smc/smc.h
index e377980b8414..df64efd2dee8 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -29,9 +29,6 @@
#define SMCPROTO_SMC 0 /* SMC protocol, IPv4 */
#define SMCPROTO_SMC6 1 /* SMC protocol, IPv6 */
-#define SMC_MAX_ISM_DEVS 8 /* max # of proposed non-native ISM
- * devices
- */
#define SMC_AUTOCORKING_DEFAULT_SIZE 0x10000 /* 64K by default */
extern struct proto smc_proto;
@@ -58,6 +55,13 @@ enum smc_state { /* possible states of an SMC socket */
SMC_PROCESSABORT = 27,
};
+enum smc_supplemental_features {
+ SMC_SPF_VIRT_ISM_DEV = 0,
+};
+
+#define SMC_FEATURE_MASK \
+ (BIT(SMC_SPF_VIRT_ISM_DEV))
+
struct smc_link_group;
struct smc_wr_rx_hdr { /* common prefix part of LLC and CDC to demultiplex */
@@ -196,7 +200,6 @@ struct smc_connection {
* - dec on polled tx cqe
*/
wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/
- atomic_t tx_pushing; /* nr_threads trying tx push */
struct delayed_work tx_work; /* retry of smc_cdc_msg_send */
u32 tx_off; /* base offset in peer rmb */
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 72f4d81a3f41..9a13709bea1c 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -155,10 +155,12 @@ static int smc_clc_ueid_remove(char *ueid)
rc = 0;
}
}
+#if IS_ENABLED(CONFIG_S390)
if (!rc && !smc_clc_eid_table.ueid_cnt) {
smc_clc_eid_table.seid_enabled = 1;
rc = -EAGAIN; /* indicate success and enabling of seid */
}
+#endif
write_unlock(&smc_clc_eid_table.lock);
return rc;
}
@@ -273,22 +275,30 @@ err:
int smc_nl_enable_seid(struct sk_buff *skb, struct genl_info *info)
{
+#if IS_ENABLED(CONFIG_S390)
write_lock(&smc_clc_eid_table.lock);
smc_clc_eid_table.seid_enabled = 1;
write_unlock(&smc_clc_eid_table.lock);
return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
}
int smc_nl_disable_seid(struct sk_buff *skb, struct genl_info *info)
{
int rc = 0;
+#if IS_ENABLED(CONFIG_S390)
write_lock(&smc_clc_eid_table.lock);
if (!smc_clc_eid_table.ueid_cnt)
rc = -ENOENT;
else
smc_clc_eid_table.seid_enabled = 0;
write_unlock(&smc_clc_eid_table.lock);
+#else
+ rc = -EOPNOTSUPP;
+#endif
return rc;
}
@@ -377,9 +387,9 @@ static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc)
/* check arriving CLC accept or confirm */
static bool
-smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm_v2 *clc_v2)
+smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm *clc)
{
- struct smc_clc_msg_hdr *hdr = &clc_v2->hdr;
+ struct smc_clc_msg_hdr *hdr = &clc->hdr;
if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D)
return false;
@@ -418,15 +428,16 @@ smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc)
return true;
}
-static int smc_clc_fill_fce(struct smc_clc_first_contact_ext_v2x *fce,
- struct smc_init_info *ini)
+static int smc_clc_fill_fce_v2x(struct smc_clc_first_contact_ext_v2x *fce_v2x,
+ struct smc_init_info *ini)
{
- int ret = sizeof(*fce);
+ int ret = sizeof(*fce_v2x);
- memset(fce, 0, sizeof(*fce));
- fce->fce_v2_base.os_type = SMC_CLC_OS_LINUX;
- fce->fce_v2_base.release = ini->release_nr;
- memcpy(fce->fce_v2_base.hostname, smc_hostname, sizeof(smc_hostname));
+ memset(fce_v2x, 0, sizeof(*fce_v2x));
+ fce_v2x->fce_v2_base.os_type = SMC_CLC_OS_LINUX;
+ fce_v2x->fce_v2_base.release = ini->release_nr;
+ memcpy(fce_v2x->fce_v2_base.hostname,
+ smc_hostname, sizeof(smc_hostname));
if (ini->is_smcd && ini->release_nr < SMC_RELEASE_1) {
ret = sizeof(struct smc_clc_first_contact_ext);
goto out;
@@ -434,9 +445,10 @@ static int smc_clc_fill_fce(struct smc_clc_first_contact_ext_v2x *fce,
if (ini->release_nr >= SMC_RELEASE_1) {
if (!ini->is_smcd) {
- fce->max_conns = ini->max_conns;
- fce->max_links = ini->max_links;
+ fce_v2x->max_conns = ini->max_conns;
+ fce_v2x->max_links = ini->max_links;
}
+ fce_v2x->feature_mask = htons(ini->feature_mask);
}
out:
@@ -448,7 +460,7 @@ out:
*/
static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl)
{
- struct smc_clc_msg_accept_confirm_v2 *clc_v2;
+ struct smc_clc_msg_accept_confirm *clc;
struct smc_clc_msg_proposal *pclc;
struct smc_clc_msg_decline *dclc;
struct smc_clc_msg_trail *trl;
@@ -466,12 +478,11 @@ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl)
break;
case SMC_CLC_ACCEPT:
case SMC_CLC_CONFIRM:
- clc_v2 = (struct smc_clc_msg_accept_confirm_v2 *)clcm;
- if (!smc_clc_msg_acc_conf_valid(clc_v2))
+ clc = (struct smc_clc_msg_accept_confirm *)clcm;
+ if (!smc_clc_msg_acc_conf_valid(clc))
return false;
trl = (struct smc_clc_msg_trail *)
- ((u8 *)clc_v2 + ntohs(clc_v2->hdr.length) -
- sizeof(*trl));
+ ((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl));
break;
case SMC_CLC_DECLINE:
dclc = (struct smc_clc_msg_decline *)clcm;
@@ -824,6 +835,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
struct smc_clc_smcd_gid_chid *gidchids;
struct smc_clc_msg_proposal_area *pclc;
struct smc_clc_ipv6_prefix *ipv6_prfx;
+ struct net *net = sock_net(&smc->sk);
struct smc_clc_v2_extension *v2_ext;
struct smc_clc_msg_smcd *pclc_smcd;
struct smc_clc_msg_trail *trl;
@@ -881,11 +893,13 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
ETH_ALEN);
}
if (smcd_indicated(ini->smc_type_v1)) {
+ struct smcd_gid smcd_gid;
+
/* add SMC-D specifics */
if (ini->ism_dev[0]) {
smcd = ini->ism_dev[0];
- pclc_smcd->ism.gid =
- htonll(smcd->ops->get_local_gid(smcd));
+ smcd->ops->get_local_gid(smcd, &smcd_gid);
+ pclc_smcd->ism.gid = htonll(smcd_gid.gid);
pclc_smcd->ism.chid =
htons(smc_ism_get_chid(ini->ism_dev[0]));
}
@@ -906,6 +920,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
pclc_smcd->v2_ext_offset = htons(v2_ext_offset);
plen += sizeof(*v2_ext);
+ v2_ext->feature_mask = htons(SMC_FEATURE_MASK);
read_lock(&smc_clc_eid_table.lock);
v2_ext->hdr.eid_cnt = smc_clc_eid_table.ueid_cnt;
plen += smc_clc_eid_table.ueid_cnt * SMC_MAX_EID_LEN;
@@ -917,10 +932,11 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
read_unlock(&smc_clc_eid_table.lock);
}
if (smcd_indicated(ini->smc_type_v2)) {
+ struct smcd_gid smcd_gid;
u8 *eid = NULL;
+ int entry = 0;
v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled;
- v2_ext->hdr.ism_gid_cnt = ini->ism_offered_cnt;
v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) -
offsetofend(struct smc_clnt_opts_area_hdr,
smcd_v2_ext_offset) +
@@ -932,19 +948,31 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
if (ini->ism_offered_cnt) {
for (i = 1; i <= ini->ism_offered_cnt; i++) {
smcd = ini->ism_dev[i];
- gidchids[i - 1].gid =
- htonll(smcd->ops->get_local_gid(smcd));
- gidchids[i - 1].chid =
+ smcd->ops->get_local_gid(smcd, &smcd_gid);
+ gidchids[entry].chid =
htons(smc_ism_get_chid(ini->ism_dev[i]));
+ gidchids[entry].gid = htonll(smcd_gid.gid);
+ if (smc_ism_is_virtual(smcd)) {
+ /* a virtual ISM device takes two
+ * entries. CHID of the second entry
+ * repeats that of the first entry.
+ */
+ gidchids[entry + 1].chid =
+ gidchids[entry].chid;
+ gidchids[entry + 1].gid =
+ htonll(smcd_gid.gid_ext);
+ entry++;
+ }
+ entry++;
}
- plen += ini->ism_offered_cnt *
- sizeof(struct smc_clc_smcd_gid_chid);
+ plen += entry * sizeof(struct smc_clc_smcd_gid_chid);
}
+ v2_ext->hdr.ism_gid_cnt = entry;
}
if (smcr_indicated(ini->smc_type_v2)) {
memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE);
- v2_ext->max_conns = SMC_CONN_PER_LGR_PREFER;
- v2_ext->max_links = SMC_LINKS_PER_LGR_MAX_PREFER;
+ v2_ext->max_conns = net->smc.sysctl_max_conns_per_lgr;
+ v2_ext->max_links = net->smc.sysctl_max_links_per_lgr;
}
pclc_base->hdr.length = htons(plen);
@@ -975,7 +1003,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
vec[i++].iov_len = sizeof(*smcd_v2_ext);
if (ini->ism_offered_cnt) {
vec[i].iov_base = gidchids;
- vec[i++].iov_len = ini->ism_offered_cnt *
+ vec[i++].iov_len = v2_ext->hdr.ism_gid_cnt *
sizeof(struct smc_clc_smcd_gid_chid);
}
}
@@ -996,109 +1024,143 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
return reason_code;
}
+static void
+smcd_clc_prep_confirm_accept(struct smc_connection *conn,
+ struct smc_clc_msg_accept_confirm *clc,
+ int first_contact, u8 version,
+ u8 *eid, struct smc_init_info *ini,
+ int *fce_len,
+ struct smc_clc_first_contact_ext_v2x *fce_v2x,
+ struct smc_clc_msg_trail *trl)
+{
+ struct smcd_dev *smcd = conn->lgr->smcd;
+ struct smcd_gid smcd_gid;
+ u16 chid;
+ int len;
+
+ /* SMC-D specific settings */
+ memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER,
+ sizeof(SMCD_EYECATCHER));
+ smcd->ops->get_local_gid(smcd, &smcd_gid);
+ clc->hdr.typev1 = SMC_TYPE_D;
+ clc->d0.gid = htonll(smcd_gid.gid);
+ clc->d0.token = htonll(conn->rmb_desc->token);
+ clc->d0.dmbe_size = conn->rmbe_size_comp;
+ clc->d0.dmbe_idx = 0;
+ memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
+ if (version == SMC_V1) {
+ clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
+ } else {
+ chid = smc_ism_get_chid(smcd);
+ clc->d1.chid = htons(chid);
+ if (eid && eid[0])
+ memcpy(clc->d1.eid, eid, SMC_MAX_EID_LEN);
+ if (__smc_ism_is_virtual(chid))
+ clc->d1.gid_ext = htonll(smcd_gid.gid_ext);
+ len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
+ if (first_contact) {
+ *fce_len = smc_clc_fill_fce_v2x(fce_v2x, ini);
+ len += *fce_len;
+ }
+ clc->hdr.length = htons(len);
+ }
+ memcpy(trl->eyecatcher, SMCD_EYECATCHER,
+ sizeof(SMCD_EYECATCHER));
+}
+
+static void
+smcr_clc_prep_confirm_accept(struct smc_connection *conn,
+ struct smc_clc_msg_accept_confirm *clc,
+ int first_contact, u8 version,
+ u8 *eid, struct smc_init_info *ini,
+ int *fce_len,
+ struct smc_clc_first_contact_ext_v2x *fce_v2x,
+ struct smc_clc_fce_gid_ext *gle,
+ struct smc_clc_msg_trail *trl)
+{
+ struct smc_link *link = conn->lnk;
+ int len;
+
+ /* SMC-R specific settings */
+ memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER,
+ sizeof(SMC_EYECATCHER));
+ clc->hdr.typev1 = SMC_TYPE_R;
+ memcpy(clc->r0.lcl.id_for_peer, local_systemid,
+ sizeof(local_systemid));
+ memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE);
+ memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1],
+ ETH_ALEN);
+ hton24(clc->r0.qpn, link->roce_qp->qp_num);
+ clc->r0.rmb_rkey =
+ htonl(conn->rmb_desc->mr[link->link_idx]->rkey);
+ clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
+ clc->r0.rmbe_alert_token = htonl(conn->alert_token_local);
+ switch (clc->hdr.type) {
+ case SMC_CLC_ACCEPT:
+ clc->r0.qp_mtu = link->path_mtu;
+ break;
+ case SMC_CLC_CONFIRM:
+ clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu);
+ break;
+ }
+ clc->r0.rmbe_size = conn->rmbe_size_comp;
+ clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ?
+ cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) :
+ cpu_to_be64((u64)sg_dma_address
+ (conn->rmb_desc->sgt[link->link_idx].sgl));
+ hton24(clc->r0.psn, link->psn_initial);
+ if (version == SMC_V1) {
+ clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
+ } else {
+ if (eid && eid[0])
+ memcpy(clc->r1.eid, eid, SMC_MAX_EID_LEN);
+ len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2;
+ if (first_contact) {
+ *fce_len = smc_clc_fill_fce_v2x(fce_v2x, ini);
+ len += *fce_len;
+ fce_v2x->fce_v2_base.v2_direct =
+ !link->lgr->uses_gateway;
+ if (clc->hdr.type == SMC_CLC_CONFIRM) {
+ memset(gle, 0, sizeof(*gle));
+ gle->gid_cnt = ini->smcrv2.gidlist.len;
+ len += sizeof(*gle);
+ len += gle->gid_cnt * sizeof(gle->gid[0]);
+ }
+ }
+ clc->hdr.length = htons(len);
+ }
+ memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
+}
+
/* build and send CLC CONFIRM / ACCEPT message */
static int smc_clc_send_confirm_accept(struct smc_sock *smc,
- struct smc_clc_msg_accept_confirm_v2 *clc_v2,
+ struct smc_clc_msg_accept_confirm *clc,
int first_contact, u8 version,
u8 *eid, struct smc_init_info *ini)
{
+ struct smc_clc_first_contact_ext_v2x fce_v2x;
struct smc_connection *conn = &smc->conn;
- struct smc_clc_first_contact_ext_v2x fce;
- struct smcd_dev *smcd = conn->lgr->smcd;
- struct smc_clc_msg_accept_confirm *clc;
struct smc_clc_fce_gid_ext gle;
struct smc_clc_msg_trail trl;
- int i, len, fce_len;
+ int i, fce_len;
struct kvec vec[5];
struct msghdr msg;
/* send SMC Confirm CLC msg */
- clc = (struct smc_clc_msg_accept_confirm *)clc_v2;
clc->hdr.version = version; /* SMC version */
if (first_contact)
clc->hdr.typev2 |= SMC_FIRST_CONTACT_MASK;
- if (conn->lgr->is_smcd) {
- /* SMC-D specific settings */
- memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER,
- sizeof(SMCD_EYECATCHER));
- clc->hdr.typev1 = SMC_TYPE_D;
- clc->d0.gid = htonll(smcd->ops->get_local_gid(smcd));
- clc->d0.token = htonll(conn->rmb_desc->token);
- clc->d0.dmbe_size = conn->rmbe_size_comp;
- clc->d0.dmbe_idx = 0;
- memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
- if (version == SMC_V1) {
- clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN);
- } else {
- clc_v2->d1.chid = htons(smc_ism_get_chid(smcd));
- if (eid && eid[0])
- memcpy(clc_v2->d1.eid, eid, SMC_MAX_EID_LEN);
- len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2;
- if (first_contact) {
- fce_len = smc_clc_fill_fce(&fce, ini);
- len += fce_len;
- }
- clc_v2->hdr.length = htons(len);
- }
- memcpy(trl.eyecatcher, SMCD_EYECATCHER,
- sizeof(SMCD_EYECATCHER));
- } else {
- struct smc_link *link = conn->lnk;
-
- /* SMC-R specific settings */
- memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER,
- sizeof(SMC_EYECATCHER));
- clc->hdr.typev1 = SMC_TYPE_R;
- clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
- memcpy(clc->r0.lcl.id_for_peer, local_systemid,
- sizeof(local_systemid));
- memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE);
- memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1],
- ETH_ALEN);
- hton24(clc->r0.qpn, link->roce_qp->qp_num);
- clc->r0.rmb_rkey =
- htonl(conn->rmb_desc->mr[link->link_idx]->rkey);
- clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
- clc->r0.rmbe_alert_token = htonl(conn->alert_token_local);
- switch (clc->hdr.type) {
- case SMC_CLC_ACCEPT:
- clc->r0.qp_mtu = link->path_mtu;
- break;
- case SMC_CLC_CONFIRM:
- clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu);
- break;
- }
- clc->r0.rmbe_size = conn->rmbe_size_comp;
- clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ?
- cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) :
- cpu_to_be64((u64)sg_dma_address
- (conn->rmb_desc->sgt[link->link_idx].sgl));
- hton24(clc->r0.psn, link->psn_initial);
- if (version == SMC_V1) {
- clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
- } else {
- if (eid && eid[0])
- memcpy(clc_v2->r1.eid, eid, SMC_MAX_EID_LEN);
- len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2;
- if (first_contact) {
- fce_len = smc_clc_fill_fce(&fce, ini);
- len += fce_len;
- fce.fce_v2_base.v2_direct = !link->lgr->uses_gateway;
- if (clc->hdr.type == SMC_CLC_CONFIRM) {
- memset(&gle, 0, sizeof(gle));
- gle.gid_cnt = ini->smcrv2.gidlist.len;
- len += sizeof(gle);
- len += gle.gid_cnt * sizeof(gle.gid[0]);
- }
- }
- clc_v2->hdr.length = htons(len);
- }
- memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
- }
-
+ if (conn->lgr->is_smcd)
+ smcd_clc_prep_confirm_accept(conn, clc, first_contact,
+ version, eid, ini, &fce_len,
+ &fce_v2x, &trl);
+ else
+ smcr_clc_prep_confirm_accept(conn, clc, first_contact,
+ version, eid, ini, &fce_len,
+ &fce_v2x, &gle, &trl);
memset(&msg, 0, sizeof(msg));
i = 0;
- vec[i].iov_base = clc_v2;
+ vec[i].iov_base = clc;
if (version > SMC_V1)
vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ?
SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 :
@@ -1110,7 +1172,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
SMCR_CLC_ACCEPT_CONFIRM_LEN) -
sizeof(trl);
if (version > SMC_V1 && first_contact) {
- vec[i].iov_base = &fce;
+ vec[i].iov_base = &fce_v2x;
vec[i++].iov_len = fce_len;
if (!conn->lgr->is_smcd) {
if (clc->hdr.type == SMC_CLC_CONFIRM) {
@@ -1132,16 +1194,16 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
u8 version, u8 *eid, struct smc_init_info *ini)
{
- struct smc_clc_msg_accept_confirm_v2 cclc_v2;
+ struct smc_clc_msg_accept_confirm cclc;
int reason_code = 0;
int len;
/* send SMC Confirm CLC msg */
- memset(&cclc_v2, 0, sizeof(cclc_v2));
- cclc_v2.hdr.type = SMC_CLC_CONFIRM;
- len = smc_clc_send_confirm_accept(smc, &cclc_v2, clnt_first_contact,
+ memset(&cclc, 0, sizeof(cclc));
+ cclc.hdr.type = SMC_CLC_CONFIRM;
+ len = smc_clc_send_confirm_accept(smc, &cclc, clnt_first_contact,
version, eid, ini);
- if (len < ntohs(cclc_v2.hdr.length)) {
+ if (len < ntohs(cclc.hdr.length)) {
if (len >= 0) {
reason_code = -ENETUNREACH;
smc->sk.sk_err = -reason_code;
@@ -1157,26 +1219,29 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact,
u8 version, u8 *negotiated_eid, struct smc_init_info *ini)
{
- struct smc_clc_msg_accept_confirm_v2 aclc_v2;
+ struct smc_clc_msg_accept_confirm aclc;
int len;
- memset(&aclc_v2, 0, sizeof(aclc_v2));
- aclc_v2.hdr.type = SMC_CLC_ACCEPT;
- len = smc_clc_send_confirm_accept(new_smc, &aclc_v2, srv_first_contact,
+ memset(&aclc, 0, sizeof(aclc));
+ aclc.hdr.type = SMC_CLC_ACCEPT;
+ len = smc_clc_send_confirm_accept(new_smc, &aclc, srv_first_contact,
version, negotiated_eid, ini);
- if (len < ntohs(aclc_v2.hdr.length))
+ if (len < ntohs(aclc.hdr.length))
len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err;
return len > 0 ? 0 : len;
}
-int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc,
+int smc_clc_srv_v2x_features_validate(struct smc_sock *smc,
+ struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini)
{
struct smc_clc_v2_extension *pclc_v2_ext;
+ struct net *net = sock_net(&smc->sk);
ini->max_conns = SMC_CONN_PER_LGR_MAX;
ini->max_links = SMC_LINKS_ADD_LNK_MAX;
+ ini->feature_mask = SMC_FEATURE_MASK;
if ((!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) ||
ini->release_nr < SMC_RELEASE_1)
@@ -1187,11 +1252,13 @@ int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc,
return SMC_CLC_DECL_NOV2EXT;
if (ini->smcr_version & SMC_V2) {
- ini->max_conns = min_t(u8, pclc_v2_ext->max_conns, SMC_CONN_PER_LGR_PREFER);
+ ini->max_conns = min_t(u8, pclc_v2_ext->max_conns,
+ net->smc.sysctl_max_conns_per_lgr);
if (ini->max_conns < SMC_CONN_PER_LGR_MIN)
return SMC_CLC_DECL_MAXCONNERR;
- ini->max_links = min_t(u8, pclc_v2_ext->max_links, SMC_LINKS_PER_LGR_MAX_PREFER);
+ ini->max_links = min_t(u8, pclc_v2_ext->max_links,
+ net->smc.sysctl_max_links_per_lgr);
if (ini->max_links < SMC_LINKS_ADD_LNK_MIN)
return SMC_CLC_DECL_MAXLINKERR;
}
@@ -1218,6 +1285,8 @@ int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce,
return SMC_CLC_DECL_MAXLINKERR;
ini->max_links = fce_v2x->max_links;
}
+ /* common supplemental features of server and client */
+ ini->feature_mask = ntohs(fce_v2x->feature_mask) & SMC_FEATURE_MASK;
return 0;
}
@@ -1225,10 +1294,8 @@ int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce,
int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc,
struct smc_init_info *ini)
{
- struct smc_clc_msg_accept_confirm_v2 *clc_v2 =
- (struct smc_clc_msg_accept_confirm_v2 *)cclc;
struct smc_clc_first_contact_ext *fce =
- smc_get_clc_first_contact_ext(clc_v2, ini->is_smcd);
+ smc_get_clc_first_contact_ext(cclc, ini->is_smcd);
struct smc_clc_first_contact_ext_v2x *fce_v2x =
(struct smc_clc_first_contact_ext_v2x *)fce;
@@ -1248,6 +1315,8 @@ int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc,
if (fce_v2x->max_links != ini->max_links)
return SMC_CLC_DECL_MAXLINKERR;
}
+ /* common supplemental features returned by client */
+ ini->feature_mask = ntohs(fce_v2x->feature_mask);
return 0;
}
@@ -1269,7 +1338,11 @@ void __init smc_clc_init(void)
INIT_LIST_HEAD(&smc_clc_eid_table.list);
rwlock_init(&smc_clc_eid_table.lock);
smc_clc_eid_table.ueid_cnt = 0;
+#if IS_ENABLED(CONFIG_S390)
smc_clc_eid_table.seid_enabled = 1;
+#else
+ smc_clc_eid_table.seid_enabled = 0;
+#endif
}
void smc_clc_exit(void)
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 08155a96a02a..a9f9bdd26dcd 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -138,7 +138,8 @@ struct smc_clc_v2_extension {
u8 roce[16]; /* RoCEv2 GID */
u8 max_conns;
u8 max_links;
- u8 reserved[14];
+ __be16 feature_mask;
+ u8 reserved[12];
u8 user_eids[][SMC_MAX_EID_LEN];
};
@@ -171,6 +172,11 @@ struct smc_clc_msg_proposal { /* clc proposal message sent by Linux */
#define SMC_CLC_MAX_V6_PREFIX 8
#define SMC_CLC_MAX_UEID 8
+#define SMCD_CLC_MAX_V2_GID_ENTRIES 8 /* max # of CHID-GID entries in CLC
+ * proposal SMC-Dv2 extension.
+ * each ISM device takes one entry and
+ * each virtual ISM takes two entries.
+ */
struct smc_clc_msg_proposal_area {
struct smc_clc_msg_proposal pclc_base;
@@ -180,7 +186,8 @@ struct smc_clc_msg_proposal_area {
struct smc_clc_v2_extension pclc_v2_ext;
u8 user_eids[SMC_CLC_MAX_UEID][SMC_MAX_EID_LEN];
struct smc_clc_smcd_v2_extension pclc_smcd_v2_ext;
- struct smc_clc_smcd_gid_chid pclc_gidchids[SMC_MAX_ISM_DEVS];
+ struct smc_clc_smcd_gid_chid
+ pclc_gidchids[SMCD_CLC_MAX_V2_GID_ENTRIES];
struct smc_clc_msg_trail pclc_trl;
};
@@ -240,9 +247,14 @@ struct smc_clc_first_contact_ext {
struct smc_clc_first_contact_ext_v2x {
struct smc_clc_first_contact_ext fce_v2_base;
- u8 max_conns; /* for SMC-R only */
- u8 max_links; /* for SMC-R only */
- u8 reserved3[2];
+ union {
+ struct {
+ u8 max_conns; /* for SMC-R only */
+ u8 max_links; /* for SMC-R only */
+ };
+ u8 reserved3[2]; /* for SMC-D only */
+ };
+ __be16 feature_mask;
__be32 vendor_exp_options;
u8 reserved4[8];
} __packed; /* format defined in
@@ -259,28 +271,21 @@ struct smc_clc_fce_gid_ext {
struct smc_clc_msg_accept_confirm { /* clc accept / confirm message */
struct smc_clc_msg_hdr hdr;
union {
- struct smcr_clc_msg_accept_confirm r0; /* SMC-R */
- struct { /* SMC-D */
- struct smcd_clc_msg_accept_confirm_common d0;
- u32 reserved5[3];
- };
- };
-} __packed; /* format defined in RFC7609 */
-
-struct smc_clc_msg_accept_confirm_v2 { /* clc accept / confirm message */
- struct smc_clc_msg_hdr hdr;
- union {
struct { /* SMC-R */
struct smcr_clc_msg_accept_confirm r0;
- u8 eid[SMC_MAX_EID_LEN];
- u8 reserved6[8];
- } r1;
+ struct { /* v2 only */
+ u8 eid[SMC_MAX_EID_LEN];
+ u8 reserved6[8];
+ } __packed r1;
+ };
struct { /* SMC-D */
struct smcd_clc_msg_accept_confirm_common d0;
- __be16 chid;
- u8 eid[SMC_MAX_EID_LEN];
- u8 reserved5[8];
- } d1;
+ struct { /* v2 only, but 12 bytes reserved in v1 */
+ __be16 chid;
+ u8 eid[SMC_MAX_EID_LEN];
+ __be64 gid_ext;
+ } __packed d1;
+ };
};
};
@@ -389,24 +394,23 @@ smc_get_clc_smcd_v2_ext(struct smc_clc_v2_extension *prop_v2ext)
}
static inline struct smc_clc_first_contact_ext *
-smc_get_clc_first_contact_ext(struct smc_clc_msg_accept_confirm_v2 *clc_v2,
+smc_get_clc_first_contact_ext(struct smc_clc_msg_accept_confirm *clc,
bool is_smcd)
{
int clc_v2_len;
- if (clc_v2->hdr.version == SMC_V1 ||
- !(clc_v2->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
+ if (clc->hdr.version == SMC_V1 ||
+ !(clc->hdr.typev2 & SMC_FIRST_CONTACT_MASK))
return NULL;
if (is_smcd)
clc_v2_len =
- offsetofend(struct smc_clc_msg_accept_confirm_v2, d1);
+ offsetofend(struct smc_clc_msg_accept_confirm, d1);
else
clc_v2_len =
- offsetofend(struct smc_clc_msg_accept_confirm_v2, r1);
+ offsetofend(struct smc_clc_msg_accept_confirm, r1);
- return (struct smc_clc_first_contact_ext *)(((u8 *)clc_v2) +
- clc_v2_len);
+ return (struct smc_clc_first_contact_ext *)(((u8 *)clc) + clc_v2_len);
}
struct smcd_dev;
@@ -422,7 +426,8 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact,
u8 version, u8 *eid, struct smc_init_info *ini);
int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact,
u8 version, u8 *negotiated_eid, struct smc_init_info *ini);
-int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc,
+int smc_clc_srv_v2x_features_validate(struct smc_sock *smc,
+ struct smc_clc_msg_proposal *pclc,
struct smc_init_info *ini);
int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce,
struct smc_init_info *ini);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index d520ee62c8ec..e4c858411207 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -506,6 +506,7 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
{
char smc_pnet[SMC_MAX_PNETID_LEN + 1];
struct smcd_dev *smcd = lgr->smcd;
+ struct smcd_gid smcd_gid;
struct nlattr *attrs;
void *nlh;
@@ -521,13 +522,19 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
if (nla_put_u32(skb, SMC_NLA_LGR_D_ID, *((u32 *)&lgr->id)))
goto errattr;
+ smcd->ops->get_local_gid(smcd, &smcd_gid);
if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID,
- smcd->ops->get_local_gid(smcd),
- SMC_NLA_LGR_D_PAD))
+ smcd_gid.gid, SMC_NLA_LGR_D_PAD))
goto errattr;
- if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid,
+ if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_EXT_GID,
+ smcd_gid.gid_ext, SMC_NLA_LGR_D_PAD))
+ goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid.gid,
SMC_NLA_LGR_D_PAD))
goto errattr;
+ if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_EXT_GID,
+ lgr->peer_gid.gid_ext, SMC_NLA_LGR_D_PAD))
+ goto errattr;
if (nla_put_u8(skb, SMC_NLA_LGR_D_VLAN_ID, lgr->vlan_id))
goto errattr;
if (nla_put_u32(skb, SMC_NLA_LGR_D_CONNS_NUM, lgr->conns_num))
@@ -876,7 +883,10 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
/* SMC-D specific settings */
smcd = ini->ism_dev[ini->ism_selected];
get_device(smcd->ops->get_dev(smcd));
- lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected];
+ lgr->peer_gid.gid =
+ ini->ism_peer_gid[ini->ism_selected].gid;
+ lgr->peer_gid.gid_ext =
+ ini->ism_peer_gid[ini->ism_selected].gid_ext;
lgr->smcd = ini->ism_dev[ini->ism_selected];
lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list;
lgr_lock = &lgr->smcd->lgr_lock;
@@ -1514,7 +1524,8 @@ void smc_lgr_terminate_sched(struct smc_link_group *lgr)
}
/* Called when peer lgr shutdown (regularly or abnormally) is received */
-void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
+void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid,
+ unsigned short vlan)
{
struct smc_link_group *lgr, *l;
LIST_HEAD(lgr_free_list);
@@ -1522,9 +1533,12 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
/* run common cleanup function and build free list */
spin_lock_bh(&dev->lgr_lock);
list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) {
- if ((!peer_gid || lgr->peer_gid == peer_gid) &&
+ if ((!peer_gid->gid ||
+ (lgr->peer_gid.gid == peer_gid->gid &&
+ !smc_ism_is_virtual(dev) ? 1 :
+ lgr->peer_gid.gid_ext == peer_gid->gid_ext)) &&
(vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) {
- if (peer_gid) /* peer triggered termination */
+ if (peer_gid->gid) /* peer triggered termination */
lgr->peer_shutdown = 1;
list_move(&lgr->list, &lgr_free_list);
lgr->freeing = 1;
@@ -1860,9 +1874,18 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version,
}
static bool smcd_lgr_match(struct smc_link_group *lgr,
- struct smcd_dev *smcismdev, u64 peer_gid)
+ struct smcd_dev *smcismdev,
+ struct smcd_gid *peer_gid)
{
- return lgr->peer_gid == peer_gid && lgr->smcd == smcismdev;
+ if (lgr->peer_gid.gid != peer_gid->gid ||
+ lgr->smcd != smcismdev)
+ return false;
+
+ if (smc_ism_is_virtual(smcismdev) &&
+ lgr->peer_gid.gid_ext != peer_gid->gid_ext)
+ return false;
+
+ return true;
}
/* create a new SMC connection (and a new link group if necessary) */
@@ -1892,7 +1915,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
write_lock_bh(&lgr->conns_lock);
if ((ini->is_smcd ?
smcd_lgr_match(lgr, ini->ism_dev[ini->ism_selected],
- ini->ism_peer_gid[ini->ism_selected]) :
+ &ini->ism_peer_gid[ini->ism_selected]) :
smcr_lgr_match(lgr, ini->smcr_version,
ini->peer_systemid,
ini->peer_gid, ini->peer_mac, role,
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 120027d40469..1f175376037b 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -17,9 +17,11 @@
#include <linux/pci.h>
#include <rdma/ib_verbs.h>
#include <net/genetlink.h>
+#include <net/smc.h>
#include "smc.h"
#include "smc_ib.h"
+#include "smc_clc.h"
#define SMC_RMBS_PER_LGR_MAX 255 /* max. # of RMBs per link group */
#define SMC_CONN_PER_LGR_MIN 16 /* min. # of connections per link group */
@@ -355,7 +357,7 @@ struct smc_link_group {
/* max links can be added in lgr */
};
struct { /* SMC-D */
- u64 peer_gid;
+ struct smcd_gid peer_gid;
/* Peer GID (remote) */
struct smcd_dev *smcd;
/* ISM device for VLAN reg. */
@@ -392,6 +394,11 @@ struct smc_init_info_smcrv2 {
struct smc_gidlist gidlist;
};
+#define SMC_MAX_V2_ISM_DEVS SMCD_CLC_MAX_V2_GID_ENTRIES
+ /* max # of proposed non-native ISM devices,
+ * which can't exceed the max # of CHID-GID
+ * entries in CLC proposal SMC-Dv2 extension.
+ */
struct smc_init_info {
u8 is_smcd;
u8 smc_type_v1;
@@ -401,6 +408,7 @@ struct smc_init_info {
u8 max_links;
u8 first_contact_peer;
u8 first_contact_local;
+ u16 feature_mask;
unsigned short vlan_id;
u32 rc;
u8 negotiated_eid[SMC_MAX_EID_LEN];
@@ -416,9 +424,9 @@ struct smc_init_info {
u32 ib_clcqpn;
struct smc_init_info_smcrv2 smcrv2;
/* SMC-D */
- u64 ism_peer_gid[SMC_MAX_ISM_DEVS + 1];
- struct smcd_dev *ism_dev[SMC_MAX_ISM_DEVS + 1];
- u16 ism_chid[SMC_MAX_ISM_DEVS + 1];
+ struct smcd_gid ism_peer_gid[SMC_MAX_V2_ISM_DEVS + 1];
+ struct smcd_dev *ism_dev[SMC_MAX_V2_ISM_DEVS + 1];
+ u16 ism_chid[SMC_MAX_V2_ISM_DEVS + 1];
u8 ism_offered_cnt; /* # of ISM devices offered */
u8 ism_selected; /* index of selected ISM dev*/
u8 smcd_version;
@@ -544,7 +552,7 @@ void smc_lgr_hold(struct smc_link_group *lgr);
void smc_lgr_put(struct smc_link_group *lgr);
void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport);
void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport);
-void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
+void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid,
unsigned short vlan);
void smc_smcd_terminate_all(struct smcd_dev *dev);
void smc_smcr_terminate_all(struct smc_ib_device *smcibdev);
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 5cc376834c57..5a33908015f3 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -21,6 +21,7 @@
#include "smc.h"
#include "smc_core.h"
+#include "smc_ism.h"
struct smc_diag_dump_ctx {
int pos[2];
@@ -163,16 +164,20 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
}
if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd &&
(req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) &&
- !list_empty(&smc->conn.lgr->list)) {
+ !list_empty(&smc->conn.lgr->list) && smc->conn.rmb_desc) {
struct smc_connection *conn = &smc->conn;
struct smcd_diag_dmbinfo dinfo;
struct smcd_dev *smcd = conn->lgr->smcd;
+ struct smcd_gid smcd_gid;
memset(&dinfo, 0, sizeof(dinfo));
dinfo.linkid = *((u32 *)conn->lgr->id);
- dinfo.peer_gid = conn->lgr->peer_gid;
- dinfo.my_gid = smcd->ops->get_local_gid(smcd);
+ dinfo.peer_gid = conn->lgr->peer_gid.gid;
+ dinfo.peer_gid_ext = conn->lgr->peer_gid.gid_ext;
+ smcd->ops->get_local_gid(smcd, &smcd_gid);
+ dinfo.my_gid = smcd_gid.gid;
+ dinfo.my_gid_ext = smcd_gid.gid_ext;
dinfo.token = conn->rmb_desc->token;
dinfo.peer_token = conn->peer_token;
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 89981dbe46c9..97704a9e84c7 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -844,7 +844,7 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
goto out;
/* the calculated number of cq entries fits to mlx5 cq allocation */
cqe_size_order = cache_line_size() == 128 ? 7 : 6;
- smc_order = MAX_ORDER - cqe_size_order;
+ smc_order = MAX_PAGE_ORDER - cqe_size_order;
if (SMC_MAX_CQE + 2 > (0x00000001 << smc_order) * PAGE_SIZE)
cqattr.cqe = (0x00000001 << smc_order) * PAGE_SIZE - 2;
smcibdev->roce_cq_send = ib_create_cq(smcibdev->ibdev,
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index fbee2493091f..ac88de2a06a0 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -43,8 +43,30 @@ static struct ism_client smc_ism_client = {
};
#endif
+static void smc_ism_create_system_eid(void)
+{
+ struct smc_ism_seid *seid =
+ (struct smc_ism_seid *)smc_ism_v2_system_eid;
+#if IS_ENABLED(CONFIG_S390)
+ struct cpuid id;
+ u16 ident_tail;
+ char tmp[5];
+
+ memcpy(seid->seid_string, "IBM-SYSZ-ISMSEID00000000", 24);
+ get_cpu_id(&id);
+ ident_tail = (u16)(id.ident & SMC_ISM_IDENT_MASK);
+ snprintf(tmp, 5, "%04X", ident_tail);
+ memcpy(seid->serial_number, tmp, 4);
+ snprintf(tmp, 5, "%04X", id.machine);
+ memcpy(seid->type, tmp, 4);
+#else
+ memset(seid, 0, SMC_MAX_EID_LEN);
+#endif
+}
+
/* Test if an ISM communication is possible - same CPC */
-int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd)
+int smc_ism_cantalk(struct smcd_gid *peer_gid, unsigned short vlan_id,
+ struct smcd_dev *smcd)
{
return smcd->ops->query_remote_gid(smcd, peer_gid, vlan_id ? 1 : 0,
vlan_id);
@@ -208,7 +230,7 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
dmb.dmb_len = dmb_len;
dmb.sba_idx = dmb_desc->sba_idx;
dmb.vlan_id = lgr->vlan_id;
- dmb.rgid = lgr->peer_gid;
+ dmb.rgid = lgr->peer_gid.gid;
rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb, &smc_ism_client);
if (!rc) {
dmb_desc->sba_idx = dmb.sba_idx;
@@ -340,18 +362,20 @@ union smcd_sw_event_info {
static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
{
+ struct smcd_gid peer_gid = { .gid = wrk->event.tok,
+ .gid_ext = 0 };
union smcd_sw_event_info ev_info;
ev_info.info = wrk->event.info;
switch (wrk->event.code) {
case ISM_EVENT_CODE_SHUTDOWN: /* Peer shut down DMBs */
- smc_smcd_terminate(wrk->smcd, wrk->event.tok, ev_info.vlan_id);
+ smc_smcd_terminate(wrk->smcd, &peer_gid, ev_info.vlan_id);
break;
case ISM_EVENT_CODE_TESTLINK: /* Activity timer */
if (ev_info.code == ISM_EVENT_REQUEST) {
ev_info.code = ISM_EVENT_RESPONSE;
wrk->smcd->ops->signal_event(wrk->smcd,
- wrk->event.tok,
+ &peer_gid,
ISM_EVENT_REQUEST_IR,
ISM_EVENT_CODE_TESTLINK,
ev_info.info);
@@ -365,10 +389,12 @@ static void smc_ism_event_work(struct work_struct *work)
{
struct smc_ism_event_work *wrk =
container_of(work, struct smc_ism_event_work, work);
+ struct smcd_gid smcd_gid = { .gid = wrk->event.tok,
+ .gid_ext = 0 };
switch (wrk->event.type) {
case ISM_EVENT_GID: /* GID event, token is peer GID */
- smc_smcd_terminate(wrk->smcd, wrk->event.tok, VLAN_VID_MASK);
+ smc_smcd_terminate(wrk->smcd, &smcd_gid, VLAN_VID_MASK);
break;
case ISM_EVENT_DMB:
break;
@@ -426,14 +452,8 @@ static void smcd_register_dev(struct ism_dev *ism)
mutex_lock(&smcd_dev_list.mutex);
if (list_empty(&smcd_dev_list.list)) {
- u8 *system_eid = NULL;
-
- system_eid = smcd->ops->get_system_eid();
- if (smcd->ops->supports_v2()) {
+ if (smcd->ops->supports_v2())
smc_ism_v2_capable = true;
- memcpy(smc_ism_v2_system_eid, system_eid,
- SMC_MAX_EID_LEN);
- }
}
/* sort list: devices without pnetid before devices with pnetid */
if (smcd->pnetid[0])
@@ -525,7 +545,7 @@ int smc_ism_signal_shutdown(struct smc_link_group *lgr)
memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE);
ev_info.vlan_id = lgr->vlan_id;
ev_info.code = ISM_EVENT_REQUEST;
- rc = lgr->smcd->ops->signal_event(lgr->smcd, lgr->peer_gid,
+ rc = lgr->smcd->ops->signal_event(lgr->smcd, &lgr->peer_gid,
ISM_EVENT_REQUEST_IR,
ISM_EVENT_CODE_SHUTDOWN,
ev_info.info);
@@ -537,10 +557,10 @@ int smc_ism_init(void)
{
int rc = 0;
-#if IS_ENABLED(CONFIG_ISM)
smc_ism_v2_capable = false;
- memset(smc_ism_v2_system_eid, 0, SMC_MAX_EID_LEN);
+ smc_ism_create_system_eid();
+#if IS_ENABLED(CONFIG_ISM)
rc = ism_register_client(&smc_ism_client);
#endif
return rc;
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index 832b2f42d79f..ffff40c30a06 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -15,6 +15,9 @@
#include "smc.h"
+#define SMC_VIRTUAL_ISM_CHID_MASK 0xFF00
+#define SMC_ISM_IDENT_MASK 0x00FFFF
+
struct smcd_dev_list { /* List of SMCD devices */
struct list_head list;
struct mutex mutex; /* Protects list of devices */
@@ -28,9 +31,16 @@ struct smc_ism_vlanid { /* VLAN id set on ISM device */
refcount_t refcnt; /* Reference count */
};
+struct smc_ism_seid {
+ u8 seid_string[24];
+ u8 serial_number[4];
+ u8 type[4];
+};
+
struct smcd_dev;
-int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *dev);
+int smc_ism_cantalk(struct smcd_gid *peer_gid, unsigned short vlan_id,
+ struct smcd_dev *dev);
void smc_ism_set_conn(struct smc_connection *conn);
void smc_ism_unset_conn(struct smc_connection *conn);
int smc_ism_get_vlan(struct smcd_dev *dev, unsigned short vlan_id);
@@ -56,4 +66,22 @@ static inline int smc_ism_write(struct smcd_dev *smcd, u64 dmb_tok,
return rc < 0 ? rc : 0;
}
+static inline bool __smc_ism_is_virtual(u16 chid)
+{
+ /* CHIDs in range of 0xFF00 to 0xFFFF are reserved
+ * for virtual ISM device.
+ *
+ * loopback-ism: 0xFFFF
+ * virtio-ism: 0xFF00 ~ 0xFFFE
+ */
+ return ((chid & 0xFF00) == 0xFF00);
+}
+
+static inline bool smc_ism_is_virtual(struct smcd_dev *smcd)
+{
+ u16 chid = smcd->ops->get_chid(smcd);
+
+ return __smc_ism_is_virtual(chid);
+}
+
#endif
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 11775401df68..9f2c58c5a86b 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -1103,8 +1103,8 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
list_for_each_entry(ismdev, &smcd_dev_list.list, list) {
if (smc_pnet_match(ismdev->pnetid, ndev_pnetid) &&
!ismdev->going_away &&
- (!ini->ism_peer_gid[0] ||
- !smc_ism_cantalk(ini->ism_peer_gid[0], ini->vlan_id,
+ (!ini->ism_peer_gid[0].gid ||
+ !smc_ism_cantalk(&ini->ism_peer_gid[0], ini->vlan_id,
ismdev))) {
ini->ism_dev[0] = ismdev;
break;
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index 5cbc18c6e62b..a5946d1b9d60 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -25,6 +25,10 @@ static int max_sndbuf = INT_MAX / 2;
static int max_rcvbuf = INT_MAX / 2;
static const int net_smc_wmem_init = (64 * 1024);
static const int net_smc_rmem_init = (64 * 1024);
+static int links_per_lgr_min = SMC_LINKS_ADD_LNK_MIN;
+static int links_per_lgr_max = SMC_LINKS_ADD_LNK_MAX;
+static int conns_per_lgr_min = SMC_CONN_PER_LGR_MIN;
+static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX;
static struct ctl_table smc_table[] = {
{
@@ -68,6 +72,24 @@ static struct ctl_table smc_table[] = {
.extra1 = &min_rcvbuf,
.extra2 = &max_rcvbuf,
},
+ {
+ .procname = "smcr_max_links_per_lgr",
+ .data = &init_net.smc.sysctl_max_links_per_lgr,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &links_per_lgr_min,
+ .extra2 = &links_per_lgr_max,
+ },
+ {
+ .procname = "smcr_max_conns_per_lgr",
+ .data = &init_net.smc.sysctl_max_conns_per_lgr,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &conns_per_lgr_min,
+ .extra2 = &conns_per_lgr_max,
+ },
{ }
};
@@ -97,6 +119,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init);
WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init);
+ net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER;
+ net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER;
return 0;
diff --git a/net/smc/smc_sysctl.h b/net/smc/smc_sysctl.h
index 0becc11bd2f4..eb2465ae1e15 100644
--- a/net/smc/smc_sysctl.h
+++ b/net/smc/smc_sysctl.h
@@ -23,6 +23,8 @@ void __net_exit smc_sysctl_net_exit(struct net *net);
static inline int smc_sysctl_net_init(struct net *net)
{
net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
+ net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER;
+ net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER;
return 0;
}
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 3b0ff3b589c7..214ac3cbcf9a 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -621,7 +621,7 @@ static int smcd_tx_sndbuf_nonempty(struct smc_connection *conn)
return rc;
}
-static int __smc_tx_sndbuf_nonempty(struct smc_connection *conn)
+int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
{
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
int rc = 0;
@@ -655,34 +655,6 @@ out:
return rc;
}
-int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
-{
- int rc;
-
- /* This make sure only one can send simultaneously to prevent wasting
- * of CPU and CDC slot.
- * Record whether someone has tried to push while we are pushing.
- */
- if (atomic_inc_return(&conn->tx_pushing) > 1)
- return 0;
-
-again:
- atomic_set(&conn->tx_pushing, 1);
- smp_wmb(); /* Make sure tx_pushing is 1 before real send */
- rc = __smc_tx_sndbuf_nonempty(conn);
-
- /* We need to check whether someone else have added some data into
- * the send queue and tried to push but failed after the atomic_set()
- * when we are pushing.
- * If so, we need to push again to prevent those data hang in the send
- * queue.
- */
- if (unlikely(!atomic_dec_and_test(&conn->tx_pushing)))
- goto again;
-
- return rc;
-}
-
/* Wakeup sndbuf consumers from process context
* since there is more data to transmit. The caller
* must hold sock lock.
diff --git a/net/socket.c b/net/socket.c
index 89d79205bf50..ed3df2f749bf 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2163,10 +2163,9 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
struct sockaddr_storage address;
int err;
struct msghdr msg;
- struct iovec iov;
int fput_needed;
- err = import_single_range(ITER_SOURCE, buff, len, &iov, &msg.msg_iter);
+ err = import_ubuf(ITER_SOURCE, buff, len, &msg.msg_iter);
if (unlikely(err))
return err;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
@@ -2228,11 +2227,10 @@ int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
.msg_name = addr ? (struct sockaddr *)&address : NULL,
};
struct socket *sock;
- struct iovec iov;
int err, err2;
int fput_needed;
- err = import_single_range(ITER_DEST, ubuf, size, &iov, &msg.msg_iter);
+ err = import_ubuf(ITER_DEST, ubuf, size, &msg.msg_iter);
if (unlikely(err))
return err;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 1af71fbb0d80..c7af0220f82f 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -2280,6 +2280,7 @@ static void __exit exit_rpcsec_gss(void)
}
MODULE_ALIAS("rpc-auth-6");
+MODULE_DESCRIPTION("Sun RPC Kerberos RPCSEC_GSS client authentication");
MODULE_LICENSE("GPL");
module_param_named(expired_cred_retry_delay,
gss_expired_cred_retry_delay,
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index e31cfdf7eadc..64cff717c3d9 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -650,6 +650,7 @@ static void __exit cleanup_kerberos_module(void)
gss_mech_unregister(&gss_kerberos_mech);
}
+MODULE_DESCRIPTION("Sun RPC Kerberos 5 module");
MODULE_LICENSE("GPL");
module_init(init_kerberos_module);
module_exit(cleanup_kerberos_module);
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 18734e70c5dd..24de94184700 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -866,14 +866,6 @@ svcauth_gss_unwrap_integ(struct svc_rqst *rqstp, u32 seq, struct gss_ctx *ctx)
struct xdr_buf databody_integ;
struct xdr_netobj checksum;
- /* NFS READ normally uses splice to send data in-place. However
- * the data in cache can change after the reply's MIC is computed
- * but before the RPC reply is sent. To prevent the client from
- * rejecting the server-computed MIC in this somewhat rare case,
- * do not use splice with the GSS integrity service.
- */
- clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
-
/* Did we already verify the signature on the original pass through? */
if (rqstp->rq_deferred)
return 0;
@@ -948,8 +940,6 @@ svcauth_gss_unwrap_priv(struct svc_rqst *rqstp, u32 seq, struct gss_ctx *ctx)
struct xdr_buf *buf = xdr->buf;
unsigned int saved_len;
- clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
-
if (xdr_stream_decode_u32(xdr, &len) < 0)
goto unwrap_failed;
if (rqstp->rq_deferred) {
@@ -2014,6 +2004,11 @@ svcauth_gss_domain_release(struct auth_domain *dom)
call_rcu(&dom->rcu_head, svcauth_gss_domain_release_rcu);
}
+static rpc_authflavor_t svcauth_gss_pseudoflavor(struct svc_rqst *rqstp)
+{
+ return svcauth_gss_flavor(rqstp->rq_gssclient);
+}
+
static struct auth_ops svcauthops_gss = {
.name = "rpcsec_gss",
.owner = THIS_MODULE,
@@ -2022,6 +2017,7 @@ static struct auth_ops svcauthops_gss = {
.release = svcauth_gss_release,
.domain_release = svcauth_gss_domain_release,
.set_client = svcauth_gss_set_client,
+ .pseudoflavor = svcauth_gss_pseudoflavor,
};
static int rsi_cache_create_net(struct net *net)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index daa9582ec861..cda0935a68c9 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -287,8 +287,14 @@ static struct rpc_xprt *rpc_clnt_set_transport(struct rpc_clnt *clnt,
static void rpc_clnt_set_nodename(struct rpc_clnt *clnt, const char *nodename)
{
- clnt->cl_nodelen = strlcpy(clnt->cl_nodename,
- nodename, sizeof(clnt->cl_nodename));
+ ssize_t copied;
+
+ copied = strscpy(clnt->cl_nodename,
+ nodename, sizeof(clnt->cl_nodename));
+
+ clnt->cl_nodelen = copied < 0
+ ? sizeof(clnt->cl_nodename) - 1
+ : copied;
}
static int rpc_client_register(struct rpc_clnt *clnt,
@@ -797,15 +803,24 @@ out_revert:
}
EXPORT_SYMBOL_GPL(rpc_switch_client_transport);
-static
-int _rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi,
- void func(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps))
+static struct rpc_xprt_switch *rpc_clnt_xprt_switch_get(struct rpc_clnt *clnt)
{
struct rpc_xprt_switch *xps;
rcu_read_lock();
xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
rcu_read_unlock();
+
+ return xps;
+}
+
+static
+int _rpc_clnt_xprt_iter_init(struct rpc_clnt *clnt, struct rpc_xprt_iter *xpi,
+ void func(struct rpc_xprt_iter *xpi, struct rpc_xprt_switch *xps))
+{
+ struct rpc_xprt_switch *xps;
+
+ xps = rpc_clnt_xprt_switch_get(clnt);
if (xps == NULL)
return -EAGAIN;
func(xpi, xps);
@@ -1302,8 +1317,10 @@ static void call_bc_encode(struct rpc_task *task);
* rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
* rpc_execute against it
* @req: RPC request
+ * @timeout: timeout values to use for this task
*/
-struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
+struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req,
+ struct rpc_timeout *timeout)
{
struct rpc_task *task;
struct rpc_task_setup task_setup_data = {
@@ -1322,7 +1339,7 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
return task;
}
- xprt_init_bc_request(req, task);
+ xprt_init_bc_request(req, task, timeout);
task->tk_action = call_bc_encode;
atomic_inc(&task->tk_count);
@@ -2206,9 +2223,7 @@ call_connect_status(struct rpc_task *task)
struct rpc_xprt *saved = task->tk_xprt;
struct rpc_xprt_switch *xps;
- rcu_read_lock();
- xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
- rcu_read_unlock();
+ xps = rpc_clnt_xprt_switch_get(clnt);
if (xps->xps_nxprts > 1) {
long value;
@@ -3116,7 +3131,6 @@ static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt,
struct rpc_xprt *xprt,
struct rpc_add_xprt_test *data)
{
- struct rpc_xprt_switch *xps;
struct rpc_xprt *main_xprt;
int status = 0;
@@ -3124,7 +3138,6 @@ static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt,
rcu_read_lock();
main_xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
- xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
status = rpc_cmp_addr_port((struct sockaddr *)&xprt->addr,
(struct sockaddr *)&main_xprt->addr);
rcu_read_unlock();
@@ -3135,7 +3148,6 @@ static int rpc_xprt_probe_trunked(struct rpc_clnt *clnt,
status = rpc_clnt_add_xprt_helper(clnt, xprt, data);
out:
xprt_put(xprt);
- xprt_switch_put(xps);
return status;
}
@@ -3250,34 +3262,27 @@ rpc_set_connect_timeout(struct rpc_clnt *clnt,
}
EXPORT_SYMBOL_GPL(rpc_set_connect_timeout);
-void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt)
-{
- rcu_read_lock();
- xprt_switch_put(rcu_dereference(clnt->cl_xpi.xpi_xpswitch));
- rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_put);
-
void rpc_clnt_xprt_set_online(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
{
struct rpc_xprt_switch *xps;
- rcu_read_lock();
- xps = rcu_dereference(clnt->cl_xpi.xpi_xpswitch);
- rcu_read_unlock();
+ xps = rpc_clnt_xprt_switch_get(clnt);
xprt_set_online_locked(xprt, xps);
+ xprt_switch_put(xps);
}
void rpc_clnt_xprt_switch_add_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt)
{
+ struct rpc_xprt_switch *xps;
+
if (rpc_clnt_xprt_switch_has_addr(clnt,
(const struct sockaddr *)&xprt->addr)) {
return rpc_clnt_xprt_set_online(clnt, xprt);
}
- rcu_read_lock();
- rpc_xprt_switch_add_xprt(rcu_dereference(clnt->cl_xpi.xpi_xpswitch),
- xprt);
- rcu_read_unlock();
+
+ xps = rpc_clnt_xprt_switch_get(clnt);
+ rpc_xprt_switch_add_xprt(xps, xprt);
+ xprt_switch_put(xps);
}
EXPORT_SYMBOL_GPL(rpc_clnt_xprt_switch_add_xprt);
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 691c0000e9ea..bab6cab29405 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -148,6 +148,7 @@ cleanup_sunrpc(void)
#endif
rcu_barrier(); /* Wait for completion of call_rcu()'s */
}
+MODULE_DESCRIPTION("Sun RPC core");
MODULE_LICENSE("GPL");
fs_initcall(init_sunrpc); /* Ensure we're initialised before nfs */
module_exit(cleanup_sunrpc);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 3f2ea7a0496f..b969e505c7b7 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -463,7 +463,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
return NULL;
serv->sv_name = prog->pg_name;
serv->sv_program = prog;
- kref_init(&serv->sv_refcnt);
serv->sv_stats = prog->pg_stats;
if (bufsize > RPCSVC_MAXPAYLOAD)
bufsize = RPCSVC_MAXPAYLOAD;
@@ -564,11 +563,13 @@ EXPORT_SYMBOL_GPL(svc_create_pooled);
* protect sv_permsocks and sv_tempsocks.
*/
void
-svc_destroy(struct kref *ref)
+svc_destroy(struct svc_serv **servp)
{
- struct svc_serv *serv = container_of(ref, struct svc_serv, sv_refcnt);
+ struct svc_serv *serv = *servp;
unsigned int i;
+ *servp = NULL;
+
dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name);
timer_shutdown_sync(&serv->sv_temptimer);
@@ -675,7 +676,6 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
if (!rqstp)
return ERR_PTR(-ENOMEM);
- svc_get(serv);
spin_lock_bh(&serv->sv_lock);
serv->sv_nrthreads += 1;
spin_unlock_bh(&serv->sv_lock);
@@ -935,11 +935,6 @@ svc_exit_thread(struct svc_rqst *rqstp)
svc_rqst_free(rqstp);
- svc_put(serv);
- /* That svc_put() cannot be the last, because the thread
- * waiting for SP_VICTIM_REMAINS to clear must hold
- * a reference. So it is still safe to access pool.
- */
clear_and_wake_up_bit(SP_VICTIM_REMAINS, &pool->sp_flags);
}
EXPORT_SYMBOL_GPL(svc_exit_thread);
@@ -1305,8 +1300,6 @@ svc_process_common(struct svc_rqst *rqstp)
int rc;
__be32 *p;
- /* Will be turned off by GSS integrity and privacy services */
- set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* Will be turned off only when NFSv4 Sessions are used */
set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
clear_bit(RQ_DROPME, &rqstp->rq_flags);
@@ -1557,6 +1550,7 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
{
struct rpc_task *task;
int proc_error;
+ struct rpc_timeout timeout;
/* Build the svc_rqst used by the common processing routine */
rqstp->rq_xid = req->rq_xid;
@@ -1602,8 +1596,16 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
return;
}
/* Finally, send the reply synchronously */
+ if (rqstp->bc_to_initval > 0) {
+ timeout.to_initval = rqstp->bc_to_initval;
+ timeout.to_retries = rqstp->bc_to_retries;
+ } else {
+ timeout.to_initval = req->rq_xprt->timeout->to_initval;
+ timeout.to_retries = req->rq_xprt->timeout->to_retries;
+ }
memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
- task = rpc_run_bc_task(req);
+ task = rpc_run_bc_task(req, &timeout);
+
if (IS_ERR(task))
return;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 1b71055fc391..b4a85a227bd7 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1362,29 +1362,36 @@ int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen)
}
EXPORT_SYMBOL_GPL(svc_xprt_names);
-
/*----------------------------------------------------------------------------*/
static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos)
{
unsigned int pidx = (unsigned int)*pos;
- struct svc_serv *serv = m->private;
+ struct svc_info *si = m->private;
dprintk("svc_pool_stats_start, *pidx=%u\n", pidx);
+ mutex_lock(si->mutex);
+
if (!pidx)
return SEQ_START_TOKEN;
- return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]);
+ if (!si->serv)
+ return NULL;
+ return pidx > si->serv->sv_nrpools ? NULL
+ : &si->serv->sv_pools[pidx - 1];
}
static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos)
{
struct svc_pool *pool = p;
- struct svc_serv *serv = m->private;
+ struct svc_info *si = m->private;
+ struct svc_serv *serv = si->serv;
dprintk("svc_pool_stats_next, *pos=%llu\n", *pos);
- if (p == SEQ_START_TOKEN) {
+ if (!serv) {
+ pool = NULL;
+ } else if (p == SEQ_START_TOKEN) {
pool = &serv->sv_pools[0];
} else {
unsigned int pidx = (pool - &serv->sv_pools[0]);
@@ -1399,6 +1406,9 @@ static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos)
static void svc_pool_stats_stop(struct seq_file *m, void *p)
{
+ struct svc_info *si = m->private;
+
+ mutex_unlock(si->mutex);
}
static int svc_pool_stats_show(struct seq_file *m, void *p)
@@ -1426,14 +1436,18 @@ static const struct seq_operations svc_pool_stats_seq_ops = {
.show = svc_pool_stats_show,
};
-int svc_pool_stats_open(struct svc_serv *serv, struct file *file)
+int svc_pool_stats_open(struct svc_info *info, struct file *file)
{
+ struct seq_file *seq;
int err;
err = seq_open(file, &svc_pool_stats_seq_ops);
- if (!err)
- ((struct seq_file *) file->private_data)->private = serv;
- return err;
+ if (err)
+ return err;
+ seq = file->private_data;
+ seq->private = info;
+
+ return 0;
}
EXPORT_SYMBOL(svc_pool_stats_open);
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index aa4429d0b810..1619211f0960 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -160,6 +160,22 @@ svc_auth_unregister(rpc_authflavor_t flavor)
}
EXPORT_SYMBOL_GPL(svc_auth_unregister);
+/**
+ * svc_auth_flavor - return RPC transaction's RPC_AUTH flavor
+ * @rqstp: RPC transaction context
+ *
+ * Returns an RPC flavor or GSS pseudoflavor.
+ */
+rpc_authflavor_t svc_auth_flavor(struct svc_rqst *rqstp)
+{
+ struct auth_ops *aops = rqstp->rq_authop;
+
+ if (!aops->pseudoflavor)
+ return aops->flavour;
+ return aops->pseudoflavor(rqstp);
+}
+EXPORT_SYMBOL_GPL(svc_auth_flavor);
+
/**************************************************
* 'auth_domains' are stored in a hash table indexed by name.
* When the last reference to an 'auth_domain' is dropped,
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 998687421fa6..545017a3daa4 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -717,12 +717,12 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
ARRAY_SIZE(rqstp->rq_bvec), xdr);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
- count, 0);
+ count, rqstp->rq_res.len);
err = sock_sendmsg(svsk->sk_sock, &msg);
if (err == -ECONNREFUSED) {
/* ICMP error on earlier request. */
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
- count, 0);
+ count, rqstp->rq_res.len);
err = sock_sendmsg(svsk->sk_sock, &msg);
}
@@ -1049,18 +1049,14 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
struct rpc_rqst *req = NULL;
struct kvec *src, *dst;
__be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
- __be32 xid;
- __be32 calldir;
-
- xid = *p++;
- calldir = *p;
+ __be32 xid = *p;
if (!bc_xprt)
return -EAGAIN;
spin_lock(&bc_xprt->queue_lock);
req = xprt_lookup_rqst(bc_xprt, xid);
if (!req)
- goto unlock_notfound;
+ goto unlock_eagain;
memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
/*
@@ -1077,12 +1073,6 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
rqstp->rq_arg.len = 0;
spin_unlock(&bc_xprt->queue_lock);
return 0;
-unlock_notfound:
- printk(KERN_NOTICE
- "%s: Got unrecognized reply: "
- "calldir 0x%x xpt_bc_xprt %p xid %08x\n",
- __func__, ntohl(calldir),
- bc_xprt, ntohl(xid));
unlock_eagain:
spin_unlock(&bc_xprt->queue_lock);
return -EAGAIN;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 2364c485540c..af13fdfa6672 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -651,9 +651,9 @@ static unsigned long xprt_abs_ktime_to_jiffies(ktime_t abstime)
jiffies + nsecs_to_jiffies(-delta);
}
-static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req)
+static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req,
+ const struct rpc_timeout *to)
{
- const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
unsigned long majortimeo = req->rq_timeout;
if (to->to_exponential)
@@ -665,9 +665,10 @@ static unsigned long xprt_calc_majortimeo(struct rpc_rqst *req)
return majortimeo;
}
-static void xprt_reset_majortimeo(struct rpc_rqst *req)
+static void xprt_reset_majortimeo(struct rpc_rqst *req,
+ const struct rpc_timeout *to)
{
- req->rq_majortimeo += xprt_calc_majortimeo(req);
+ req->rq_majortimeo += xprt_calc_majortimeo(req, to);
}
static void xprt_reset_minortimeo(struct rpc_rqst *req)
@@ -675,7 +676,8 @@ static void xprt_reset_minortimeo(struct rpc_rqst *req)
req->rq_minortimeo += req->rq_timeout;
}
-static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
+static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req,
+ const struct rpc_timeout *to)
{
unsigned long time_init;
struct rpc_xprt *xprt = req->rq_xprt;
@@ -684,8 +686,9 @@ static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
time_init = jiffies;
else
time_init = xprt_abs_ktime_to_jiffies(task->tk_start);
- req->rq_timeout = task->tk_client->cl_timeout->to_initval;
- req->rq_majortimeo = time_init + xprt_calc_majortimeo(req);
+
+ req->rq_timeout = to->to_initval;
+ req->rq_majortimeo = time_init + xprt_calc_majortimeo(req, to);
req->rq_minortimeo = time_init + req->rq_timeout;
}
@@ -713,7 +716,7 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
} else {
req->rq_timeout = to->to_initval;
req->rq_retries = 0;
- xprt_reset_majortimeo(req);
+ xprt_reset_majortimeo(req, to);
/* Reset the RTT counters == "slow start" */
spin_lock(&xprt->transport_lock);
rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval);
@@ -1886,7 +1889,7 @@ xprt_request_init(struct rpc_task *task)
req->rq_snd_buf.bvec = NULL;
req->rq_rcv_buf.bvec = NULL;
req->rq_release_snd_buf = NULL;
- xprt_init_majortimeo(task, req);
+ xprt_init_majortimeo(task, req, task->tk_client->cl_timeout);
trace_xprt_reserve(req);
}
@@ -1983,7 +1986,8 @@ void xprt_release(struct rpc_task *task)
#ifdef CONFIG_SUNRPC_BACKCHANNEL
void
-xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
+xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task,
+ const struct rpc_timeout *to)
{
struct xdr_buf *xbufp = &req->rq_snd_buf;
@@ -1996,6 +2000,13 @@ xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
*/
xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
xbufp->tail[0].iov_len;
+ /*
+ * Backchannel Replies are sent with !RPC_TASK_SOFT and
+ * RPC_TASK_NO_RETRANS_TIMEOUT. The major timeout setting
+ * affects only how long each Reply waits to be sent when
+ * a transport connection cannot be established.
+ */
+ xprt_init_majortimeo(task, req, to);
}
#endif
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index 701250b305db..720d3ba742ec 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -284,7 +284,7 @@ struct rpc_xprt *_xprt_switch_find_current_entry(struct list_head *head,
if (cur == pos)
found = true;
if (found && ((find_active && xprt_is_active(pos)) ||
- (!find_active && xprt_is_active(pos))))
+ (!find_active && !xprt_is_active(pos))))
return pos;
}
return NULL;
@@ -336,8 +336,9 @@ struct rpc_xprt *xprt_iter_current_entry_offline(struct rpc_xprt_iter *xpi)
xprt_switch_find_current_entry_offline);
}
-bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
- const struct sockaddr *sap)
+static
+bool __rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
+ const struct sockaddr *sap)
{
struct list_head *head;
struct rpc_xprt *pos;
@@ -356,6 +357,18 @@ bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
return false;
}
+bool rpc_xprt_switch_has_addr(struct rpc_xprt_switch *xps,
+ const struct sockaddr *sap)
+{
+ bool res;
+
+ rcu_read_lock();
+ res = __rpc_xprt_switch_has_addr(xps, sap);
+ rcu_read_unlock();
+
+ return res;
+}
+
static
struct rpc_xprt *xprt_switch_find_next_entry(struct list_head *head,
const struct rpc_xprt *cur, bool check_active)
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index f0d5eeed4c88..f86970733eb0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -256,28 +256,44 @@ out_err:
return rc;
}
+struct workqueue_struct *svcrdma_wq;
+
void svc_rdma_cleanup(void)
{
- dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
svc_unreg_xprt_class(&svc_rdma_class);
svc_rdma_proc_cleanup();
+ if (svcrdma_wq) {
+ struct workqueue_struct *wq = svcrdma_wq;
+
+ svcrdma_wq = NULL;
+ destroy_workqueue(wq);
+ }
+
+ dprintk("SVCRDMA Module Removed, deregister RPC RDMA transport\n");
}
int svc_rdma_init(void)
{
+ struct workqueue_struct *wq;
int rc;
- dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
- dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
- dprintk("\tmax_requests : %u\n", svcrdma_max_requests);
- dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests);
- dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
+ wq = alloc_workqueue("svcrdma", WQ_UNBOUND, 0);
+ if (!wq)
+ return -ENOMEM;
rc = svc_rdma_proc_init();
- if (rc)
+ if (rc) {
+ destroy_workqueue(wq);
return rc;
+ }
- /* Register RDMA with the SVC transport switch */
+ svcrdma_wq = wq;
svc_reg_xprt_class(&svc_rdma_class);
+
+ dprintk("SVCRDMA Module Init, register RPC RDMA transport\n");
+ dprintk("\tsvcrdma_ord : %d\n", svcrdma_ord);
+ dprintk("\tmax_requests : %u\n", svcrdma_max_requests);
+ dprintk("\tmax_bc_requests : %u\n", svcrdma_max_bc_requests);
+ dprintk("\tmax_inline : %d\n", svcrdma_max_req_size);
return 0;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 7420a2c990c7..c9be6778643b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -76,15 +76,12 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
struct rpc_rqst *rqst,
struct svc_rdma_send_ctxt *sctxt)
{
- struct svc_rdma_recv_ctxt *rctxt;
+ struct svc_rdma_pcl empty_pcl;
int ret;
- rctxt = svc_rdma_recv_ctxt_get(rdma);
- if (!rctxt)
- return -EIO;
-
- ret = svc_rdma_map_reply_msg(rdma, sctxt, rctxt, &rqst->rq_snd_buf);
- svc_rdma_recv_ctxt_put(rdma, rctxt);
+ pcl_init(&empty_pcl);
+ ret = svc_rdma_map_reply_msg(rdma, sctxt, &empty_pcl, &empty_pcl,
+ &rqst->rq_snd_buf);
if (ret < 0)
return -EIO;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 3b05f90a3e50..d72953f29258 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -115,13 +115,6 @@ svc_rdma_next_recv_ctxt(struct list_head *list)
rc_list);
}
-static void svc_rdma_recv_cid_init(struct svcxprt_rdma *rdma,
- struct rpc_rdma_cid *cid)
-{
- cid->ci_queue_id = rdma->sc_rq_cq->res.id;
- cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
-}
-
static struct svc_rdma_recv_ctxt *
svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
{
@@ -130,7 +123,7 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
dma_addr_t addr;
void *buffer;
- ctxt = kmalloc_node(sizeof(*ctxt), GFP_KERNEL, node);
+ ctxt = kzalloc_node(sizeof(*ctxt), GFP_KERNEL, node);
if (!ctxt)
goto fail0;
buffer = kmalloc_node(rdma->sc_max_req_size, GFP_KERNEL, node);
@@ -156,6 +149,7 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->rc_recv_sge.length = rdma->sc_max_req_size;
ctxt->rc_recv_sge.lkey = rdma->sc_pd->local_dma_lkey;
ctxt->rc_recv_buf = buffer;
+ svc_rdma_cc_init(rdma, &ctxt->rc_cc);
return ctxt;
fail2:
@@ -204,18 +198,11 @@ struct svc_rdma_recv_ctxt *svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
node = llist_del_first(&rdma->sc_recv_ctxts);
if (!node)
- goto out_empty;
- ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
+ return NULL;
-out:
+ ctxt = llist_entry(node, struct svc_rdma_recv_ctxt, rc_node);
ctxt->rc_page_count = 0;
return ctxt;
-
-out_empty:
- ctxt = svc_rdma_recv_ctxt_alloc(rdma);
- if (!ctxt)
- return NULL;
- goto out;
}
/**
@@ -227,6 +214,13 @@ out_empty:
void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt)
{
+ svc_rdma_cc_release(rdma, &ctxt->rc_cc, DMA_FROM_DEVICE);
+
+ /* @rc_page_count is normally zero here, but error flows
+ * can leave pages in @rc_pages.
+ */
+ release_pages(ctxt->rc_pages, ctxt->rc_page_count);
+
pcl_free(&ctxt->rc_call_pcl);
pcl_free(&ctxt->rc_read_pcl);
pcl_free(&ctxt->rc_write_pcl);
@@ -271,13 +265,13 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
if (!ctxt)
break;
- trace_svcrdma_post_recv(ctxt);
+ trace_svcrdma_post_recv(&ctxt->rc_cid);
ctxt->rc_recv_wr.next = recv_chain;
recv_chain = &ctxt->rc_recv_wr;
rdma->sc_pending_recvs++;
}
if (!recv_chain)
- return false;
+ return true;
ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr);
if (ret)
@@ -301,10 +295,27 @@ err_free:
* svc_rdma_post_recvs - Post initial set of Recv WRs
* @rdma: fresh svcxprt_rdma
*
- * Returns true if successful, otherwise false.
+ * Return values:
+ * %true: Receive Queue initialization successful
+ * %false: memory allocation or DMA error
*/
bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
{
+ unsigned int total;
+
+ /* For each credit, allocate enough recv_ctxts for one
+ * posted Receive and one RPC in process.
+ */
+ total = (rdma->sc_max_requests * 2) + rdma->sc_recv_batch;
+ while (total--) {
+ struct svc_rdma_recv_ctxt *ctxt;
+
+ ctxt = svc_rdma_recv_ctxt_alloc(rdma);
+ if (!ctxt)
+ return false;
+ llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
+ }
+
return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests);
}
@@ -373,6 +384,10 @@ void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma)
{
struct svc_rdma_recv_ctxt *ctxt;
+ while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_read_complete_q))) {
+ list_del(&ctxt->rc_list);
+ svc_rdma_recv_ctxt_put(rdma, ctxt);
+ }
while ((ctxt = svc_rdma_next_recv_ctxt(&rdma->sc_rq_dto_q))) {
list_del(&ctxt->rc_list);
svc_rdma_recv_ctxt_put(rdma, ctxt);
@@ -754,6 +769,122 @@ static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt,
return true;
}
+/* Finish constructing the RPC Call message in rqstp::rq_arg.
+ *
+ * The incoming RPC/RDMA message is an RDMA_MSG type message
+ * with a single Read chunk (only the upper layer data payload
+ * was conveyed via RDMA Read).
+ */
+static void svc_rdma_read_complete_one(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *ctxt)
+{
+ struct svc_rdma_chunk *chunk = pcl_first_chunk(&ctxt->rc_read_pcl);
+ struct xdr_buf *buf = &rqstp->rq_arg;
+ unsigned int length;
+
+ /* Split the Receive buffer between the head and tail
+ * buffers at Read chunk's position. XDR roundup of the
+ * chunk is not included in either the pagelist or in
+ * the tail.
+ */
+ buf->tail[0].iov_base = buf->head[0].iov_base + chunk->ch_position;
+ buf->tail[0].iov_len = buf->head[0].iov_len - chunk->ch_position;
+ buf->head[0].iov_len = chunk->ch_position;
+
+ /* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2).
+ *
+ * If the client already rounded up the chunk length, the
+ * length does not change. Otherwise, the length of the page
+ * list is increased to include XDR round-up.
+ *
+ * Currently these chunks always start at page offset 0,
+ * thus the rounded-up length never crosses a page boundary.
+ */
+ buf->pages = &rqstp->rq_pages[0];
+ length = xdr_align_size(chunk->ch_length);
+ buf->page_len = length;
+ buf->len += length;
+ buf->buflen += length;
+}
+
+/* Finish constructing the RPC Call message in rqstp::rq_arg.
+ *
+ * The incoming RPC/RDMA message is an RDMA_MSG type message
+ * with payload in multiple Read chunks and no PZRC.
+ */
+static void svc_rdma_read_complete_multiple(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *ctxt)
+{
+ struct xdr_buf *buf = &rqstp->rq_arg;
+
+ buf->len += ctxt->rc_readbytes;
+ buf->buflen += ctxt->rc_readbytes;
+
+ buf->head[0].iov_base = page_address(rqstp->rq_pages[0]);
+ buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, ctxt->rc_readbytes);
+ buf->pages = &rqstp->rq_pages[1];
+ buf->page_len = ctxt->rc_readbytes - buf->head[0].iov_len;
+}
+
+/* Finish constructing the RPC Call message in rqstp::rq_arg.
+ *
+ * The incoming RPC/RDMA message is an RDMA_NOMSG type message
+ * (the RPC message body was conveyed via RDMA Read).
+ */
+static void svc_rdma_read_complete_pzrc(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *ctxt)
+{
+ struct xdr_buf *buf = &rqstp->rq_arg;
+
+ buf->len += ctxt->rc_readbytes;
+ buf->buflen += ctxt->rc_readbytes;
+
+ buf->head[0].iov_base = page_address(rqstp->rq_pages[0]);
+ buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, ctxt->rc_readbytes);
+ buf->pages = &rqstp->rq_pages[1];
+ buf->page_len = ctxt->rc_readbytes - buf->head[0].iov_len;
+}
+
+static noinline void svc_rdma_read_complete(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *ctxt)
+{
+ unsigned int i;
+
+ /* Transfer the Read chunk pages into @rqstp.rq_pages, replacing
+ * the rq_pages that were already allocated for this rqstp.
+ */
+ release_pages(rqstp->rq_respages, ctxt->rc_page_count);
+ for (i = 0; i < ctxt->rc_page_count; i++)
+ rqstp->rq_pages[i] = ctxt->rc_pages[i];
+
+ /* Update @rqstp's result send buffer to start after the
+ * last page in the RDMA Read payload.
+ */
+ rqstp->rq_respages = &rqstp->rq_pages[ctxt->rc_page_count];
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
+
+ /* Prevent svc_rdma_recv_ctxt_put() from releasing the
+ * pages in ctxt::rc_pages a second time.
+ */
+ ctxt->rc_page_count = 0;
+
+ /* Finish constructing the RPC Call message. The exact
+ * procedure for that depends on what kind of RPC/RDMA
+ * chunks were provided by the client.
+ */
+ rqstp->rq_arg = ctxt->rc_saved_arg;
+ if (pcl_is_empty(&ctxt->rc_call_pcl)) {
+ if (ctxt->rc_read_pcl.cl_count == 1)
+ svc_rdma_read_complete_one(rqstp, ctxt);
+ else
+ svc_rdma_read_complete_multiple(rqstp, ctxt);
+ } else {
+ svc_rdma_read_complete_pzrc(rqstp, ctxt);
+ }
+
+ trace_svcrdma_read_finished(&ctxt->rc_cid);
+}
+
/**
* svc_rdma_recvfrom - Receive an RPC call
* @rqstp: request structure into which to receive an RPC Call
@@ -798,8 +929,15 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_xprt_ctxt = NULL;
- ctxt = NULL;
spin_lock(&rdma_xprt->sc_rq_dto_lock);
+ ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_read_complete_q);
+ if (ctxt) {
+ list_del(&ctxt->rc_list);
+ spin_unlock(&rdma_xprt->sc_rq_dto_lock);
+ svc_xprt_received(xprt);
+ svc_rdma_read_complete(rqstp, ctxt);
+ goto complete;
+ }
ctxt = svc_rdma_next_recv_ctxt(&rdma_xprt->sc_rq_dto_q);
if (ctxt)
list_del(&ctxt->rc_list);
@@ -831,12 +969,10 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
svc_rdma_get_inv_rkey(rdma_xprt, ctxt);
if (!pcl_is_empty(&ctxt->rc_read_pcl) ||
- !pcl_is_empty(&ctxt->rc_call_pcl)) {
- ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt);
- if (ret < 0)
- goto out_readfail;
- }
+ !pcl_is_empty(&ctxt->rc_call_pcl))
+ goto out_readlist;
+complete:
rqstp->rq_xprt_ctxt = ctxt;
rqstp->rq_prot = IPPROTO_MAX;
svc_xprt_copy_addrs(rqstp, xprt);
@@ -848,12 +984,23 @@ out_err:
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return 0;
-out_readfail:
- if (ret == -EINVAL)
- svc_rdma_send_error(rdma_xprt, ctxt, ret);
- svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
- svc_xprt_deferred_close(xprt);
- return -ENOTCONN;
+out_readlist:
+ /* This @rqstp is about to be recycled. Save the work
+ * already done constructing the Call message in rq_arg
+ * so it can be restored when the RDMA Reads have
+ * completed.
+ */
+ ctxt->rc_saved_arg = rqstp->rq_arg;
+
+ ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt);
+ if (ret < 0) {
+ if (ret == -EINVAL)
+ svc_rdma_send_error(rdma_xprt, ctxt, ret);
+ svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
+ svc_xprt_deferred_close(xprt);
+ return ret;
+ }
+ return 0;
out_backchannel:
svc_rdma_handle_bc_reply(rqstp, ctxt);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index e460e25a1d6d..c00fcce61d1e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -39,6 +39,7 @@ struct svc_rdma_rw_ctxt {
struct list_head rw_list;
struct rdma_rw_ctx rw_ctx;
unsigned int rw_nents;
+ unsigned int rw_first_sgl_nents;
struct sg_table rw_sg_table;
struct scatterlist rw_first_sgl[];
};
@@ -53,6 +54,8 @@ svc_rdma_next_ctxt(struct list_head *list)
static struct svc_rdma_rw_ctxt *
svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
{
+ struct ib_device *dev = rdma->sc_cm_id->device;
+ unsigned int first_sgl_nents = dev->attrs.max_send_sge;
struct svc_rdma_rw_ctxt *ctxt;
struct llist_node *node;
@@ -62,32 +65,33 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges)
if (node) {
ctxt = llist_entry(node, struct svc_rdma_rw_ctxt, rw_node);
} else {
- ctxt = kmalloc_node(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE),
- GFP_KERNEL, ibdev_to_node(rdma->sc_cm_id->device));
+ ctxt = kmalloc_node(struct_size(ctxt, rw_first_sgl, first_sgl_nents),
+ GFP_KERNEL, ibdev_to_node(dev));
if (!ctxt)
goto out_noctx;
INIT_LIST_HEAD(&ctxt->rw_list);
+ ctxt->rw_first_sgl_nents = first_sgl_nents;
}
ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl;
if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges,
ctxt->rw_sg_table.sgl,
- SG_CHUNK_SIZE))
+ first_sgl_nents))
goto out_free;
return ctxt;
out_free:
kfree(ctxt);
out_noctx:
- trace_svcrdma_no_rwctx_err(rdma, sges);
+ trace_svcrdma_rwctx_empty(rdma, sges);
return NULL;
}
static void __svc_rdma_put_rw_ctxt(struct svc_rdma_rw_ctxt *ctxt,
struct llist_head *list)
{
- sg_free_table_chained(&ctxt->rw_sg_table, SG_CHUNK_SIZE);
+ sg_free_table_chained(&ctxt->rw_sg_table, ctxt->rw_first_sgl_nents);
llist_add(&ctxt->rw_node, list);
}
@@ -135,57 +139,40 @@ static int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma,
ctxt->rw_sg_table.sgl, ctxt->rw_nents,
0, offset, handle, direction);
if (unlikely(ret < 0)) {
+ trace_svcrdma_dma_map_rw_err(rdma, offset, handle,
+ ctxt->rw_nents, ret);
svc_rdma_put_rw_ctxt(rdma, ctxt);
- trace_svcrdma_dma_map_rw_err(rdma, ctxt->rw_nents, ret);
}
return ret;
}
-/* A chunk context tracks all I/O for moving one Read or Write
- * chunk. This is a set of rdma_rw's that handle data movement
- * for all segments of one chunk.
- *
- * These are small, acquired with a single allocator call, and
- * no more than one is needed per chunk. They are allocated on
- * demand, and not cached.
+/**
+ * svc_rdma_cc_init - Initialize an svc_rdma_chunk_ctxt
+ * @rdma: controlling transport instance
+ * @cc: svc_rdma_chunk_ctxt to be initialized
*/
-struct svc_rdma_chunk_ctxt {
- struct rpc_rdma_cid cc_cid;
- struct ib_cqe cc_cqe;
- struct svcxprt_rdma *cc_rdma;
- struct list_head cc_rwctxts;
- ktime_t cc_posttime;
- int cc_sqecount;
- enum ib_wc_status cc_status;
- struct completion cc_done;
-};
-
-static void svc_rdma_cc_cid_init(struct svcxprt_rdma *rdma,
- struct rpc_rdma_cid *cid)
+void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
+ struct svc_rdma_chunk_ctxt *cc)
{
- cid->ci_queue_id = rdma->sc_sq_cq->res.id;
- cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
-}
+ struct rpc_rdma_cid *cid = &cc->cc_cid;
-static void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
- struct svc_rdma_chunk_ctxt *cc)
-{
- svc_rdma_cc_cid_init(rdma, &cc->cc_cid);
- cc->cc_rdma = rdma;
+ if (unlikely(!cid->ci_completion_id))
+ svc_rdma_send_cid_init(rdma, cid);
INIT_LIST_HEAD(&cc->cc_rwctxts);
cc->cc_sqecount = 0;
}
-/*
- * The consumed rw_ctx's are cleaned and placed on a local llist so
- * that only one atomic llist operation is needed to put them all
- * back on the free list.
+/**
+ * svc_rdma_cc_release - Release resources held by a svc_rdma_chunk_ctxt
+ * @rdma: controlling transport instance
+ * @cc: svc_rdma_chunk_ctxt to be released
+ * @dir: DMA direction
*/
-static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
- enum dma_data_direction dir)
+void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_chunk_ctxt *cc,
+ enum dma_data_direction dir)
{
- struct svcxprt_rdma *rdma = cc->cc_rdma;
struct llist_node *first, *last;
struct svc_rdma_rw_ctxt *ctxt;
LLIST_HEAD(free);
@@ -215,6 +202,8 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
* - Stores arguments for the SGL constructor functions
*/
struct svc_rdma_write_info {
+ struct svcxprt_rdma *wi_rdma;
+
const struct svc_rdma_chunk *wi_chunk;
/* write state of this chunk */
@@ -227,6 +216,7 @@ struct svc_rdma_write_info {
unsigned int wi_next_off;
struct svc_rdma_chunk_ctxt wi_cc;
+ struct work_struct wi_work;
};
static struct svc_rdma_write_info *
@@ -235,25 +225,33 @@ svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma,
{
struct svc_rdma_write_info *info;
- info = kmalloc_node(sizeof(*info), GFP_KERNEL,
+ info = kzalloc_node(sizeof(*info), GFP_KERNEL,
ibdev_to_node(rdma->sc_cm_id->device));
if (!info)
return info;
+ info->wi_rdma = rdma;
info->wi_chunk = chunk;
- info->wi_seg_off = 0;
- info->wi_seg_no = 0;
svc_rdma_cc_init(rdma, &info->wi_cc);
info->wi_cc.cc_cqe.done = svc_rdma_write_done;
return info;
}
-static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
+static void svc_rdma_write_info_free_async(struct work_struct *work)
{
- svc_rdma_cc_release(&info->wi_cc, DMA_TO_DEVICE);
+ struct svc_rdma_write_info *info;
+
+ info = container_of(work, struct svc_rdma_write_info, wi_work);
+ svc_rdma_cc_release(info->wi_rdma, &info->wi_cc, DMA_TO_DEVICE);
kfree(info);
}
+static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
+{
+ INIT_WORK(&info->wi_work, svc_rdma_write_info_free_async);
+ queue_work(svcrdma_wq, &info->wi_work);
+}
+
/**
* svc_rdma_write_done - Write chunk completion
* @cq: controlling Completion Queue
@@ -263,16 +261,16 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
*/
static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct svcxprt_rdma *rdma = cq->cq_context;
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
- struct svcxprt_rdma *rdma = cc->cc_rdma;
struct svc_rdma_write_info *info =
container_of(cc, struct svc_rdma_write_info, wi_cc);
switch (wc->status) {
case IB_WC_SUCCESS:
- trace_svcrdma_wc_write(wc, &cc->cc_cid);
+ trace_svcrdma_wc_write(&cc->cc_cid);
break;
case IB_WC_WR_FLUSH_ERR:
trace_svcrdma_wc_write_flush(wc, &cc->cc_cid);
@@ -289,39 +287,6 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
svc_rdma_write_info_free(info);
}
-/* State for pulling a Read chunk.
- */
-struct svc_rdma_read_info {
- struct svc_rqst *ri_rqst;
- struct svc_rdma_recv_ctxt *ri_readctxt;
- unsigned int ri_pageno;
- unsigned int ri_pageoff;
- unsigned int ri_totalbytes;
-
- struct svc_rdma_chunk_ctxt ri_cc;
-};
-
-static struct svc_rdma_read_info *
-svc_rdma_read_info_alloc(struct svcxprt_rdma *rdma)
-{
- struct svc_rdma_read_info *info;
-
- info = kmalloc_node(sizeof(*info), GFP_KERNEL,
- ibdev_to_node(rdma->sc_cm_id->device));
- if (!info)
- return info;
-
- svc_rdma_cc_init(rdma, &info->ri_cc);
- info->ri_cc.cc_cqe.done = svc_rdma_wc_read_done;
- return info;
-}
-
-static void svc_rdma_read_info_free(struct svc_rdma_read_info *info)
-{
- svc_rdma_cc_release(&info->ri_cc, DMA_FROM_DEVICE);
- kfree(info);
-}
-
/**
* svc_rdma_wc_read_done - Handle completion of an RDMA Read ctx
* @cq: controlling Completion Queue
@@ -330,17 +295,27 @@ static void svc_rdma_read_info_free(struct svc_rdma_read_info *info)
*/
static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
{
+ struct svcxprt_rdma *rdma = cq->cq_context;
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
- struct svc_rdma_read_info *info;
+ struct svc_rdma_recv_ctxt *ctxt;
+ svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
+
+ ctxt = container_of(cc, struct svc_rdma_recv_ctxt, rc_cc);
switch (wc->status) {
case IB_WC_SUCCESS:
- info = container_of(cc, struct svc_rdma_read_info, ri_cc);
- trace_svcrdma_wc_read(wc, &cc->cc_cid, info->ri_totalbytes,
+ trace_svcrdma_wc_read(wc, &cc->cc_cid, ctxt->rc_readbytes,
cc->cc_posttime);
- break;
+
+ spin_lock(&rdma->sc_rq_dto_lock);
+ list_add_tail(&ctxt->rc_list, &rdma->sc_read_complete_q);
+ /* the unlock pairs with the smp_rmb in svc_xprt_ready */
+ set_bit(XPT_DATA, &rdma->sc_xprt.xpt_flags);
+ spin_unlock(&rdma->sc_rq_dto_lock);
+ svc_xprt_enqueue(&rdma->sc_xprt);
+ return;
case IB_WC_WR_FLUSH_ERR:
trace_svcrdma_wc_read_flush(wc, &cc->cc_cid);
break;
@@ -348,10 +323,13 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_read_err(wc, &cc->cc_cid);
}
- svc_rdma_wake_send_waiters(cc->cc_rdma, cc->cc_sqecount);
- cc->cc_status = wc->status;
- complete(&cc->cc_done);
- return;
+ /* The RDMA Read has flushed, so the incoming RPC message
+ * cannot be constructed and must be dropped. Signal the
+ * loss to the client by closing the connection.
+ */
+ svc_rdma_cc_release(rdma, cc, DMA_FROM_DEVICE);
+ svc_rdma_recv_ctxt_put(rdma, ctxt);
+ svc_xprt_deferred_close(&rdma->sc_xprt);
}
/*
@@ -360,9 +338,9 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc)
* even if one or more WRs are flushed. This is true when posting
* an rdma_rw_ctx or when posting a single signaled WR.
*/
-static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
+static int svc_rdma_post_chunk_ctxt(struct svcxprt_rdma *rdma,
+ struct svc_rdma_chunk_ctxt *cc)
{
- struct svcxprt_rdma *rdma = cc->cc_rdma;
struct ib_send_wr *first_wr;
const struct ib_send_wr *bad_wr;
struct list_head *tmp;
@@ -396,14 +374,14 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
}
percpu_counter_inc(&svcrdma_stat_sq_starve);
- trace_svcrdma_sq_full(rdma);
+ trace_svcrdma_sq_full(rdma, &cc->cc_cid);
atomic_add(cc->cc_sqecount, &rdma->sc_sq_avail);
wait_event(rdma->sc_send_wait,
atomic_read(&rdma->sc_sq_avail) > cc->cc_sqecount);
- trace_svcrdma_sq_retry(rdma);
+ trace_svcrdma_sq_retry(rdma, &cc->cc_cid);
} while (1);
- trace_svcrdma_sq_post_err(rdma, ret);
+ trace_svcrdma_sq_post_err(rdma, &cc->cc_cid, ret);
svc_xprt_deferred_close(&rdma->sc_xprt);
/* If even one was posted, there will be a completion. */
@@ -473,7 +451,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
unsigned int remaining)
{
struct svc_rdma_chunk_ctxt *cc = &info->wi_cc;
- struct svcxprt_rdma *rdma = cc->cc_rdma;
+ struct svcxprt_rdma *rdma = info->wi_rdma;
const struct svc_rdma_segment *seg;
struct svc_rdma_rw_ctxt *ctxt;
int ret;
@@ -516,7 +494,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
return 0;
out_overflow:
- trace_svcrdma_small_wrch_err(rdma, remaining, info->wi_seg_no,
+ trace_svcrdma_small_wrch_err(&cc->cc_cid, remaining, info->wi_seg_no,
info->wi_chunk->ch_segcount);
return -E2BIG;
}
@@ -633,7 +611,7 @@ int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
goto out_err;
trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
- ret = svc_rdma_post_chunk_ctxt(cc);
+ ret = svc_rdma_post_chunk_ctxt(rdma, cc);
if (ret < 0)
goto out_err;
return xdr->len;
@@ -680,7 +658,7 @@ int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
goto out_err;
trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount);
- ret = svc_rdma_post_chunk_ctxt(cc);
+ ret = svc_rdma_post_chunk_ctxt(rdma, cc);
if (ret < 0)
goto out_err;
@@ -693,7 +671,8 @@ out_err:
/**
* svc_rdma_build_read_segment - Build RDMA Read WQEs to pull one RDMA segment
- * @info: context for ongoing I/O
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
* @segment: co-ordinates of remote memory to be read
*
* Returns:
@@ -702,20 +681,20 @@ out_err:
* %-ENOMEM: allocating a local resources failed
* %-EIO: a DMA mapping error occurred
*/
-static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
+static int svc_rdma_build_read_segment(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head,
const struct svc_rdma_segment *segment)
{
- struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
- struct svc_rdma_chunk_ctxt *cc = &info->ri_cc;
- struct svc_rqst *rqstp = info->ri_rqst;
+ struct svcxprt_rdma *rdma = svc_rdma_rqst_rdma(rqstp);
+ struct svc_rdma_chunk_ctxt *cc = &head->rc_cc;
unsigned int sge_no, seg_len, len;
struct svc_rdma_rw_ctxt *ctxt;
struct scatterlist *sg;
int ret;
len = segment->rs_length;
- sge_no = PAGE_ALIGN(info->ri_pageoff + len) >> PAGE_SHIFT;
- ctxt = svc_rdma_get_rw_ctxt(cc->cc_rdma, sge_no);
+ sge_no = PAGE_ALIGN(head->rc_pageoff + len) >> PAGE_SHIFT;
+ ctxt = svc_rdma_get_rw_ctxt(rdma, sge_no);
if (!ctxt)
return -ENOMEM;
ctxt->rw_nents = sge_no;
@@ -723,29 +702,27 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
sg = ctxt->rw_sg_table.sgl;
for (sge_no = 0; sge_no < ctxt->rw_nents; sge_no++) {
seg_len = min_t(unsigned int, len,
- PAGE_SIZE - info->ri_pageoff);
+ PAGE_SIZE - head->rc_pageoff);
- if (!info->ri_pageoff)
+ if (!head->rc_pageoff)
head->rc_page_count++;
- sg_set_page(sg, rqstp->rq_pages[info->ri_pageno],
- seg_len, info->ri_pageoff);
+ sg_set_page(sg, rqstp->rq_pages[head->rc_curpage],
+ seg_len, head->rc_pageoff);
sg = sg_next(sg);
- info->ri_pageoff += seg_len;
- if (info->ri_pageoff == PAGE_SIZE) {
- info->ri_pageno++;
- info->ri_pageoff = 0;
+ head->rc_pageoff += seg_len;
+ if (head->rc_pageoff == PAGE_SIZE) {
+ head->rc_curpage++;
+ head->rc_pageoff = 0;
}
len -= seg_len;
- /* Safety check */
- if (len &&
- &rqstp->rq_pages[info->ri_pageno + 1] > rqstp->rq_page_end)
+ if (len && ((head->rc_curpage + 1) > ARRAY_SIZE(rqstp->rq_pages)))
goto out_overrun;
}
- ret = svc_rdma_rw_ctx_init(cc->cc_rdma, ctxt, segment->rs_offset,
+ ret = svc_rdma_rw_ctx_init(rdma, ctxt, segment->rs_offset,
segment->rs_handle, DMA_FROM_DEVICE);
if (ret < 0)
return -EIO;
@@ -756,13 +733,14 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
return 0;
out_overrun:
- trace_svcrdma_page_overrun_err(cc->cc_rdma, rqstp, info->ri_pageno);
+ trace_svcrdma_page_overrun_err(&cc->cc_cid, head->rc_curpage);
return -EINVAL;
}
/**
* svc_rdma_build_read_chunk - Build RDMA Read WQEs to pull one RDMA chunk
- * @info: context for ongoing I/O
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
* @chunk: Read chunk to pull
*
* Return values:
@@ -771,7 +749,8 @@ out_overrun:
* %-ENOMEM: allocating a local resources failed
* %-EIO: a DMA mapping error occurred
*/
-static int svc_rdma_build_read_chunk(struct svc_rdma_read_info *info,
+static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head,
const struct svc_rdma_chunk *chunk)
{
const struct svc_rdma_segment *segment;
@@ -779,56 +758,56 @@ static int svc_rdma_build_read_chunk(struct svc_rdma_read_info *info,
ret = -EINVAL;
pcl_for_each_segment(segment, chunk) {
- ret = svc_rdma_build_read_segment(info, segment);
+ ret = svc_rdma_build_read_segment(rqstp, head, segment);
if (ret < 0)
break;
- info->ri_totalbytes += segment->rs_length;
+ head->rc_readbytes += segment->rs_length;
}
return ret;
}
/**
* svc_rdma_copy_inline_range - Copy part of the inline content into pages
- * @info: context for RDMA Reads
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
* @offset: offset into the Receive buffer of region to copy
* @remaining: length of region to copy
*
* Take a page at a time from rqstp->rq_pages and copy the inline
* content from the Receive buffer into that page. Update
- * info->ri_pageno and info->ri_pageoff so that the next RDMA Read
+ * head->rc_curpage and head->rc_pageoff so that the next RDMA Read
* result will land contiguously with the copied content.
*
* Return values:
* %0: Inline content was successfully copied
* %-EINVAL: offset or length was incorrect
*/
-static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info,
+static int svc_rdma_copy_inline_range(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head,
unsigned int offset,
unsigned int remaining)
{
- struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
unsigned char *dst, *src = head->rc_recv_buf;
- struct svc_rqst *rqstp = info->ri_rqst;
unsigned int page_no, numpages;
- numpages = PAGE_ALIGN(info->ri_pageoff + remaining) >> PAGE_SHIFT;
+ numpages = PAGE_ALIGN(head->rc_pageoff + remaining) >> PAGE_SHIFT;
for (page_no = 0; page_no < numpages; page_no++) {
unsigned int page_len;
page_len = min_t(unsigned int, remaining,
- PAGE_SIZE - info->ri_pageoff);
+ PAGE_SIZE - head->rc_pageoff);
- if (!info->ri_pageoff)
+ if (!head->rc_pageoff)
head->rc_page_count++;
- dst = page_address(rqstp->rq_pages[info->ri_pageno]);
- memcpy(dst + info->ri_pageno, src + offset, page_len);
+ dst = page_address(rqstp->rq_pages[head->rc_curpage]);
+ memcpy(dst + head->rc_curpage, src + offset, page_len);
- info->ri_totalbytes += page_len;
- info->ri_pageoff += page_len;
- if (info->ri_pageoff == PAGE_SIZE) {
- info->ri_pageno++;
- info->ri_pageoff = 0;
+ head->rc_readbytes += page_len;
+ head->rc_pageoff += page_len;
+ if (head->rc_pageoff == PAGE_SIZE) {
+ head->rc_curpage++;
+ head->rc_pageoff = 0;
}
remaining -= page_len;
offset += page_len;
@@ -839,7 +818,8 @@ static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info,
/**
* svc_rdma_read_multiple_chunks - Construct RDMA Reads to pull data item Read chunks
- * @info: context for RDMA Reads
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
*
* The chunk data lands in rqstp->rq_arg as a series of contiguous pages,
* like an incoming TCP call.
@@ -851,11 +831,11 @@ static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info,
* %-ENOTCONN: posting failed (connection is lost),
* %-EIO: rdma_rw initialization failed (DMA mapping, etc).
*/
-static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *info)
+static noinline int
+svc_rdma_read_multiple_chunks(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head)
{
- struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
const struct svc_rdma_pcl *pcl = &head->rc_read_pcl;
- struct xdr_buf *buf = &info->ri_rqst->rq_arg;
struct svc_rdma_chunk *chunk, *next;
unsigned int start, length;
int ret;
@@ -863,12 +843,12 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf
start = 0;
chunk = pcl_first_chunk(pcl);
length = chunk->ch_position;
- ret = svc_rdma_copy_inline_range(info, start, length);
+ ret = svc_rdma_copy_inline_range(rqstp, head, start, length);
if (ret < 0)
return ret;
pcl_for_each_chunk(chunk, pcl) {
- ret = svc_rdma_build_read_chunk(info, chunk);
+ ret = svc_rdma_build_read_chunk(rqstp, head, chunk);
if (ret < 0)
return ret;
@@ -877,31 +857,21 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf
break;
start += length;
- length = next->ch_position - info->ri_totalbytes;
- ret = svc_rdma_copy_inline_range(info, start, length);
+ length = next->ch_position - head->rc_readbytes;
+ ret = svc_rdma_copy_inline_range(rqstp, head, start, length);
if (ret < 0)
return ret;
}
start += length;
length = head->rc_byte_len - start;
- ret = svc_rdma_copy_inline_range(info, start, length);
- if (ret < 0)
- return ret;
-
- buf->len += info->ri_totalbytes;
- buf->buflen += info->ri_totalbytes;
-
- buf->head[0].iov_base = page_address(info->ri_rqst->rq_pages[0]);
- buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes);
- buf->pages = &info->ri_rqst->rq_pages[1];
- buf->page_len = info->ri_totalbytes - buf->head[0].iov_len;
- return 0;
+ return svc_rdma_copy_inline_range(rqstp, head, start, length);
}
/**
* svc_rdma_read_data_item - Construct RDMA Reads to pull data item Read chunks
- * @info: context for RDMA Reads
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
*
* The chunk data lands in the page list of rqstp->rq_arg.pages.
*
@@ -916,50 +886,17 @@ static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *inf
* %-ENOTCONN: posting failed (connection is lost),
* %-EIO: rdma_rw initialization failed (DMA mapping, etc).
*/
-static int svc_rdma_read_data_item(struct svc_rdma_read_info *info)
+static int svc_rdma_read_data_item(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head)
{
- struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
- struct xdr_buf *buf = &info->ri_rqst->rq_arg;
- struct svc_rdma_chunk *chunk;
- unsigned int length;
- int ret;
-
- chunk = pcl_first_chunk(&head->rc_read_pcl);
- ret = svc_rdma_build_read_chunk(info, chunk);
- if (ret < 0)
- goto out;
-
- /* Split the Receive buffer between the head and tail
- * buffers at Read chunk's position. XDR roundup of the
- * chunk is not included in either the pagelist or in
- * the tail.
- */
- buf->tail[0].iov_base = buf->head[0].iov_base + chunk->ch_position;
- buf->tail[0].iov_len = buf->head[0].iov_len - chunk->ch_position;
- buf->head[0].iov_len = chunk->ch_position;
-
- /* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2).
- *
- * If the client already rounded up the chunk length, the
- * length does not change. Otherwise, the length of the page
- * list is increased to include XDR round-up.
- *
- * Currently these chunks always start at page offset 0,
- * thus the rounded-up length never crosses a page boundary.
- */
- buf->pages = &info->ri_rqst->rq_pages[0];
- length = xdr_align_size(chunk->ch_length);
- buf->page_len = length;
- buf->len += length;
- buf->buflen += length;
-
-out:
- return ret;
+ return svc_rdma_build_read_chunk(rqstp, head,
+ pcl_first_chunk(&head->rc_read_pcl));
}
/**
- * svc_rdma_read_chunk_range - Build RDMA Read WQEs for portion of a chunk
- * @info: context for RDMA Reads
+ * svc_rdma_read_chunk_range - Build RDMA Read WRs for portion of a chunk
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
* @chunk: parsed Call chunk to pull
* @offset: offset of region to pull
* @length: length of region to pull
@@ -971,7 +908,8 @@ out:
* %-ENOTCONN: posting failed (connection is lost),
* %-EIO: rdma_rw initialization failed (DMA mapping, etc).
*/
-static int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info,
+static int svc_rdma_read_chunk_range(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head,
const struct svc_rdma_chunk *chunk,
unsigned int offset, unsigned int length)
{
@@ -991,11 +929,11 @@ static int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info,
dummy.rs_length = min_t(u32, length, segment->rs_length) - offset;
dummy.rs_offset = segment->rs_offset + offset;
- ret = svc_rdma_build_read_segment(info, &dummy);
+ ret = svc_rdma_build_read_segment(rqstp, head, &dummy);
if (ret < 0)
break;
- info->ri_totalbytes += dummy.rs_length;
+ head->rc_readbytes += dummy.rs_length;
length -= dummy.rs_length;
offset = 0;
}
@@ -1004,7 +942,8 @@ static int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info,
/**
* svc_rdma_read_call_chunk - Build RDMA Read WQEs to pull a Long Message
- * @info: context for RDMA Reads
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
*
* Return values:
* %0: RDMA Read WQEs were successfully built
@@ -1013,9 +952,9 @@ static int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info,
* %-ENOTCONN: posting failed (connection is lost),
* %-EIO: rdma_rw initialization failed (DMA mapping, etc).
*/
-static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
+static int svc_rdma_read_call_chunk(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head)
{
- struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
const struct svc_rdma_chunk *call_chunk =
pcl_first_chunk(&head->rc_call_pcl);
const struct svc_rdma_pcl *pcl = &head->rc_read_pcl;
@@ -1024,17 +963,18 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
int ret;
if (pcl_is_empty(pcl))
- return svc_rdma_build_read_chunk(info, call_chunk);
+ return svc_rdma_build_read_chunk(rqstp, head, call_chunk);
start = 0;
chunk = pcl_first_chunk(pcl);
length = chunk->ch_position;
- ret = svc_rdma_read_chunk_range(info, call_chunk, start, length);
+ ret = svc_rdma_read_chunk_range(rqstp, head, call_chunk,
+ start, length);
if (ret < 0)
return ret;
pcl_for_each_chunk(chunk, pcl) {
- ret = svc_rdma_build_read_chunk(info, chunk);
+ ret = svc_rdma_build_read_chunk(rqstp, head, chunk);
if (ret < 0)
return ret;
@@ -1043,8 +983,8 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
break;
start += length;
- length = next->ch_position - info->ri_totalbytes;
- ret = svc_rdma_read_chunk_range(info, call_chunk,
+ length = next->ch_position - head->rc_readbytes;
+ ret = svc_rdma_read_chunk_range(rqstp, head, call_chunk,
start, length);
if (ret < 0)
return ret;
@@ -1052,12 +992,14 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
start += length;
length = call_chunk->ch_length - start;
- return svc_rdma_read_chunk_range(info, call_chunk, start, length);
+ return svc_rdma_read_chunk_range(rqstp, head, call_chunk,
+ start, length);
}
/**
* svc_rdma_read_special - Build RDMA Read WQEs to pull a Long Message
- * @info: context for RDMA Reads
+ * @rqstp: RPC transaction context
+ * @head: context for ongoing I/O
*
* The start of the data lands in the first page just after the
* Transport header, and the rest lands in rqstp->rq_arg.pages.
@@ -1073,25 +1015,31 @@ static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
* %-ENOTCONN: posting failed (connection is lost),
* %-EIO: rdma_rw initialization failed (DMA mapping, etc).
*/
-static noinline int svc_rdma_read_special(struct svc_rdma_read_info *info)
+static noinline int svc_rdma_read_special(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head)
{
- struct xdr_buf *buf = &info->ri_rqst->rq_arg;
- int ret;
-
- ret = svc_rdma_read_call_chunk(info);
- if (ret < 0)
- goto out;
-
- buf->len += info->ri_totalbytes;
- buf->buflen += info->ri_totalbytes;
+ return svc_rdma_read_call_chunk(rqstp, head);
+}
- buf->head[0].iov_base = page_address(info->ri_rqst->rq_pages[0]);
- buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes);
- buf->pages = &info->ri_rqst->rq_pages[1];
- buf->page_len = info->ri_totalbytes - buf->head[0].iov_len;
+/* Pages under I/O have been copied to head->rc_pages. Ensure that
+ * svc_xprt_release() does not put them when svc_rdma_recvfrom()
+ * returns. This has to be done after all Read WRs are constructed
+ * to properly handle a page that happens to be part of I/O on behalf
+ * of two different RDMA segments.
+ *
+ * Note: if the subsequent post_send fails, these pages have already
+ * been moved to head->rc_pages and thus will be cleaned up by
+ * svc_rdma_recv_ctxt_put().
+ */
+static void svc_rdma_clear_rqst_pages(struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head)
+{
+ unsigned int i;
-out:
- return ret;
+ for (i = 0; i < head->rc_page_count; i++) {
+ head->rc_pages[i] = rqstp->rq_pages[i];
+ rqstp->rq_pages[i] = NULL;
+ }
}
/**
@@ -1121,49 +1069,27 @@ int svc_rdma_process_read_list(struct svcxprt_rdma *rdma,
struct svc_rqst *rqstp,
struct svc_rdma_recv_ctxt *head)
{
- struct svc_rdma_read_info *info;
- struct svc_rdma_chunk_ctxt *cc;
+ struct svc_rdma_chunk_ctxt *cc = &head->rc_cc;
int ret;
- info = svc_rdma_read_info_alloc(rdma);
- if (!info)
- return -ENOMEM;
- cc = &info->ri_cc;
- info->ri_rqst = rqstp;
- info->ri_readctxt = head;
- info->ri_pageno = 0;
- info->ri_pageoff = 0;
- info->ri_totalbytes = 0;
+ cc->cc_cqe.done = svc_rdma_wc_read_done;
+ cc->cc_sqecount = 0;
+ head->rc_pageoff = 0;
+ head->rc_curpage = 0;
+ head->rc_readbytes = 0;
if (pcl_is_empty(&head->rc_call_pcl)) {
if (head->rc_read_pcl.cl_count == 1)
- ret = svc_rdma_read_data_item(info);
+ ret = svc_rdma_read_data_item(rqstp, head);
else
- ret = svc_rdma_read_multiple_chunks(info);
+ ret = svc_rdma_read_multiple_chunks(rqstp, head);
} else
- ret = svc_rdma_read_special(info);
+ ret = svc_rdma_read_special(rqstp, head);
+ svc_rdma_clear_rqst_pages(rqstp, head);
if (ret < 0)
- goto out_err;
+ return ret;
trace_svcrdma_post_read_chunk(&cc->cc_cid, cc->cc_sqecount);
- init_completion(&cc->cc_done);
- ret = svc_rdma_post_chunk_ctxt(cc);
- if (ret < 0)
- goto out_err;
-
- ret = 1;
- wait_for_completion(&cc->cc_done);
- if (cc->cc_status != IB_WC_SUCCESS)
- ret = -EIO;
-
- /* rq_respages starts after the last arg page */
- rqstp->rq_respages = &rqstp->rq_pages[head->rc_page_count];
- rqstp->rq_next_page = rqstp->rq_respages + 1;
-
- /* Ensure svc_rdma_recv_ctxt_put() does not try to release pages */
- head->rc_page_count = 0;
-
-out_err:
- svc_rdma_read_info_free(info);
- return ret;
+ ret = svc_rdma_post_chunk_ctxt(rdma, cc);
+ return ret < 0 ? ret : 1;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index c6644cca52c5..1a49b7f02041 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -113,13 +113,6 @@
static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc);
-static void svc_rdma_send_cid_init(struct svcxprt_rdma *rdma,
- struct rpc_rdma_cid *cid)
-{
- cid->ci_queue_id = rdma->sc_sq_cq->res.id;
- cid->ci_completion_id = atomic_inc_return(&rdma->sc_completion_ids);
-}
-
static struct svc_rdma_send_ctxt *
svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
{
@@ -129,7 +122,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
void *buffer;
int i;
- ctxt = kmalloc_node(struct_size(ctxt, sc_sges, rdma->sc_max_send_sges),
+ ctxt = kzalloc_node(struct_size(ctxt, sc_sges, rdma->sc_max_send_sges),
GFP_KERNEL, node);
if (!ctxt)
goto fail0;
@@ -143,6 +136,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
svc_rdma_send_cid_init(rdma, &ctxt->sc_cid);
+ ctxt->sc_rdma = rdma;
ctxt->sc_send_wr.next = NULL;
ctxt->sc_send_wr.wr_cqe = &ctxt->sc_cqe;
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
@@ -200,10 +194,11 @@ struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
spin_lock(&rdma->sc_send_lock);
node = llist_del_first(&rdma->sc_send_ctxts);
+ spin_unlock(&rdma->sc_send_lock);
if (!node)
goto out_empty;
+
ctxt = llist_entry(node, struct svc_rdma_send_ctxt, sc_node);
- spin_unlock(&rdma->sc_send_lock);
out:
rpcrdma_set_xdrlen(&ctxt->sc_hdrbuf, 0);
@@ -216,22 +211,14 @@ out:
return ctxt;
out_empty:
- spin_unlock(&rdma->sc_send_lock);
ctxt = svc_rdma_send_ctxt_alloc(rdma);
if (!ctxt)
return NULL;
goto out;
}
-/**
- * svc_rdma_send_ctxt_put - Return send_ctxt to free list
- * @rdma: controlling svcxprt_rdma
- * @ctxt: object to return to the free list
- *
- * Pages left in sc_pages are DMA unmapped and released.
- */
-void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *ctxt)
+static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
{
struct ib_device *device = rdma->sc_cm_id->device;
unsigned int i;
@@ -243,18 +230,40 @@ void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
* remains mapped until @ctxt is destroyed.
*/
for (i = 1; i < ctxt->sc_send_wr.num_sge; i++) {
+ trace_svcrdma_dma_unmap_page(&ctxt->sc_cid,
+ ctxt->sc_sges[i].addr,
+ ctxt->sc_sges[i].length);
ib_dma_unmap_page(device,
ctxt->sc_sges[i].addr,
ctxt->sc_sges[i].length,
DMA_TO_DEVICE);
- trace_svcrdma_dma_unmap_page(rdma,
- ctxt->sc_sges[i].addr,
- ctxt->sc_sges[i].length);
}
llist_add(&ctxt->sc_node, &rdma->sc_send_ctxts);
}
+static void svc_rdma_send_ctxt_put_async(struct work_struct *work)
+{
+ struct svc_rdma_send_ctxt *ctxt;
+
+ ctxt = container_of(work, struct svc_rdma_send_ctxt, sc_work);
+ svc_rdma_send_ctxt_release(ctxt->sc_rdma, ctxt);
+}
+
+/**
+ * svc_rdma_send_ctxt_put - Return send_ctxt to free list
+ * @rdma: controlling svcxprt_rdma
+ * @ctxt: object to return to the free list
+ *
+ * Pages left in sc_pages are DMA unmapped and released.
+ */
+void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
+{
+ INIT_WORK(&ctxt->sc_work, svc_rdma_send_ctxt_put_async);
+ queue_work(svcrdma_wq, &ctxt->sc_work);
+}
+
/**
* svc_rdma_wake_send_waiters - manage Send Queue accounting
* @rdma: controlling transport
@@ -289,7 +298,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
if (unlikely(wc->status != IB_WC_SUCCESS))
goto flushed;
- trace_svcrdma_wc_send(wc, &ctxt->sc_cid);
+ trace_svcrdma_wc_send(&ctxt->sc_cid);
svc_rdma_send_ctxt_put(rdma, ctxt);
return;
@@ -327,13 +336,13 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
while (1) {
if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) {
percpu_counter_inc(&svcrdma_stat_sq_starve);
- trace_svcrdma_sq_full(rdma);
+ trace_svcrdma_sq_full(rdma, &ctxt->sc_cid);
atomic_inc(&rdma->sc_sq_avail);
wait_event(rdma->sc_send_wait,
atomic_read(&rdma->sc_sq_avail) > 1);
if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
return -ENOTCONN;
- trace_svcrdma_sq_retry(rdma);
+ trace_svcrdma_sq_retry(rdma, &ctxt->sc_cid);
continue;
}
@@ -344,7 +353,7 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
return 0;
}
- trace_svcrdma_sq_post_err(rdma, ret);
+ trace_svcrdma_sq_post_err(rdma, &ctxt->sc_cid, ret);
svc_xprt_deferred_close(&rdma->sc_xprt);
wake_up(&rdma->sc_send_wait);
return ret;
@@ -534,14 +543,14 @@ static int svc_rdma_page_dma_map(void *data, struct page *page,
if (ib_dma_mapping_error(dev, dma_addr))
goto out_maperr;
- trace_svcrdma_dma_map_page(rdma, dma_addr, len);
+ trace_svcrdma_dma_map_page(&ctxt->sc_cid, dma_addr, len);
ctxt->sc_sges[ctxt->sc_cur_sge_no].addr = dma_addr;
ctxt->sc_sges[ctxt->sc_cur_sge_no].length = len;
ctxt->sc_send_wr.num_sge++;
return 0;
out_maperr:
- trace_svcrdma_dma_map_err(rdma, dma_addr, len);
+ trace_svcrdma_dma_map_err(&ctxt->sc_cid, dma_addr, len);
return -EIO;
}
@@ -653,7 +662,7 @@ static int svc_rdma_xb_count_sges(const struct xdr_buf *xdr,
* svc_rdma_pull_up_needed - Determine whether to use pull-up
* @rdma: controlling transport
* @sctxt: send_ctxt for the Send WR
- * @rctxt: Write and Reply chunks provided by client
+ * @write_pcl: Write chunk list provided by client
* @xdr: xdr_buf containing RPC message to transmit
*
* Returns:
@@ -662,7 +671,7 @@ static int svc_rdma_xb_count_sges(const struct xdr_buf *xdr,
*/
static bool svc_rdma_pull_up_needed(const struct svcxprt_rdma *rdma,
const struct svc_rdma_send_ctxt *sctxt,
- const struct svc_rdma_recv_ctxt *rctxt,
+ const struct svc_rdma_pcl *write_pcl,
const struct xdr_buf *xdr)
{
/* Resources needed for the transport header */
@@ -672,7 +681,7 @@ static bool svc_rdma_pull_up_needed(const struct svcxprt_rdma *rdma,
};
int ret;
- ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
+ ret = pcl_process_nonpayloads(write_pcl, xdr,
svc_rdma_xb_count_sges, &args);
if (ret < 0)
return false;
@@ -728,7 +737,7 @@ static int svc_rdma_xb_linearize(const struct xdr_buf *xdr,
* svc_rdma_pull_up_reply_msg - Copy Reply into a single buffer
* @rdma: controlling transport
* @sctxt: send_ctxt for the Send WR; xprt hdr is already prepared
- * @rctxt: Write and Reply chunks provided by client
+ * @write_pcl: Write chunk list provided by client
* @xdr: prepared xdr_buf containing RPC message
*
* The device is not capable of sending the reply directly.
@@ -743,7 +752,7 @@ static int svc_rdma_xb_linearize(const struct xdr_buf *xdr,
*/
static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
- const struct svc_rdma_recv_ctxt *rctxt,
+ const struct svc_rdma_pcl *write_pcl,
const struct xdr_buf *xdr)
{
struct svc_rdma_pullup_data args = {
@@ -751,7 +760,7 @@ static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma,
};
int ret;
- ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
+ ret = pcl_process_nonpayloads(write_pcl, xdr,
svc_rdma_xb_linearize, &args);
if (ret < 0)
return ret;
@@ -764,7 +773,8 @@ static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma,
/* svc_rdma_map_reply_msg - DMA map the buffer holding RPC message
* @rdma: controlling transport
* @sctxt: send_ctxt for the Send WR
- * @rctxt: Write and Reply chunks provided by client
+ * @write_pcl: Write chunk list provided by client
+ * @reply_pcl: Reply chunk provided by client
* @xdr: prepared xdr_buf containing RPC message
*
* Returns:
@@ -776,7 +786,8 @@ static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma,
*/
int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
- const struct svc_rdma_recv_ctxt *rctxt,
+ const struct svc_rdma_pcl *write_pcl,
+ const struct svc_rdma_pcl *reply_pcl,
const struct xdr_buf *xdr)
{
struct svc_rdma_map_data args = {
@@ -789,18 +800,18 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
/* If there is a Reply chunk, nothing follows the transport
- * header, and we're done here.
+ * header, so there is nothing to map.
*/
- if (!pcl_is_empty(&rctxt->rc_reply_pcl))
+ if (!pcl_is_empty(reply_pcl))
return 0;
/* For pull-up, svc_rdma_send() will sync the transport header.
* No additional DMA mapping is necessary.
*/
- if (svc_rdma_pull_up_needed(rdma, sctxt, rctxt, xdr))
- return svc_rdma_pull_up_reply_msg(rdma, sctxt, rctxt, xdr);
+ if (svc_rdma_pull_up_needed(rdma, sctxt, write_pcl, xdr))
+ return svc_rdma_pull_up_reply_msg(rdma, sctxt, write_pcl, xdr);
- return pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
+ return pcl_process_nonpayloads(write_pcl, xdr,
svc_rdma_xb_dma_map, &args);
}
@@ -848,7 +859,8 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
{
int ret;
- ret = svc_rdma_map_reply_msg(rdma, sctxt, rctxt, &rqstp->rq_res);
+ ret = svc_rdma_map_reply_msg(rdma, sctxt, &rctxt->rc_write_pcl,
+ &rctxt->rc_reply_pcl, &rqstp->rq_res);
if (ret < 0)
return ret;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 2abd895046ee..4f27325ace4a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -125,6 +125,9 @@ static void qp_event_handler(struct ib_event *event, void *context)
static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
struct net *net, int node)
{
+ static struct lock_class_key svcrdma_rwctx_lock;
+ static struct lock_class_key svcrdma_sctx_lock;
+ static struct lock_class_key svcrdma_dto_lock;
struct svcxprt_rdma *cma_xprt;
cma_xprt = kzalloc_node(sizeof(*cma_xprt), GFP_KERNEL, node);
@@ -134,6 +137,7 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
svc_xprt_init(net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
+ INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
init_llist_head(&cma_xprt->sc_send_ctxts);
init_llist_head(&cma_xprt->sc_recv_ctxts);
init_llist_head(&cma_xprt->sc_rw_ctxts);
@@ -141,8 +145,11 @@ static struct svcxprt_rdma *svc_rdma_create_xprt(struct svc_serv *serv,
spin_lock_init(&cma_xprt->sc_lock);
spin_lock_init(&cma_xprt->sc_rq_dto_lock);
+ lockdep_set_class(&cma_xprt->sc_rq_dto_lock, &svcrdma_dto_lock);
spin_lock_init(&cma_xprt->sc_send_lock);
+ lockdep_set_class(&cma_xprt->sc_send_lock, &svcrdma_sctx_lock);
spin_lock_init(&cma_xprt->sc_rw_ctxt_lock);
+ lockdep_set_class(&cma_xprt->sc_rw_ctxt_lock, &svcrdma_rwctx_lock);
/*
* Note that this implies that the underlying transport support
@@ -391,37 +398,35 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dev = newxprt->sc_cm_id->device;
newxprt->sc_port_num = newxprt->sc_cm_id->port_num;
- /* Qualify the transport resource defaults with the
- * capabilities of this particular device */
+ newxprt->sc_max_req_size = svcrdma_max_req_size;
+ newxprt->sc_max_requests = svcrdma_max_requests;
+ newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
+ newxprt->sc_recv_batch = RPCRDMA_MAX_RECV_BATCH;
+ newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
+
+ /* Qualify the transport's resource defaults with the
+ * capabilities of this particular device.
+ */
+
/* Transport header, head iovec, tail iovec */
newxprt->sc_max_send_sges = 3;
/* Add one SGE per page list entry */
newxprt->sc_max_send_sges += (svcrdma_max_req_size / PAGE_SIZE) + 1;
if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge)
newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
- newxprt->sc_max_req_size = svcrdma_max_req_size;
- newxprt->sc_max_requests = svcrdma_max_requests;
- newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
- newxprt->sc_recv_batch = RPCRDMA_MAX_RECV_BATCH;
rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests +
newxprt->sc_recv_batch;
if (rq_depth > dev->attrs.max_qp_wr) {
- pr_warn("svcrdma: reducing receive depth to %d\n",
- dev->attrs.max_qp_wr);
rq_depth = dev->attrs.max_qp_wr;
newxprt->sc_recv_batch = 1;
newxprt->sc_max_requests = rq_depth - 2;
newxprt->sc_max_bc_requests = 2;
}
- newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES);
ctxts *= newxprt->sc_max_requests;
newxprt->sc_sq_depth = rq_depth + ctxts;
- if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) {
- pr_warn("svcrdma: reducing send depth to %d\n",
- dev->attrs.max_qp_wr);
+ if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr)
newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
- }
atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
newxprt->sc_pd = ib_alloc_pd(dev, 0);
@@ -451,8 +456,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
qp_attr.qp_type = IB_QPT_RC;
qp_attr.send_cq = newxprt->sc_sq_cq;
qp_attr.recv_cq = newxprt->sc_rq_cq;
- dprintk("svcrdma: newxprt->sc_cm_id=%p, newxprt->sc_pd=%p\n",
- newxprt->sc_cm_id, newxprt->sc_pd);
dprintk(" cap.max_send_wr = %d, cap.max_recv_wr = %d\n",
qp_attr.cap.max_send_wr, qp_attr.cap.max_recv_wr);
dprintk(" cap.max_send_sge = %d, cap.max_recv_sge = %d\n",
@@ -506,7 +509,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
}
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
- dprintk("svcrdma: new connection %p accepted:\n", newxprt);
+ dprintk("svcrdma: new connection accepted on device %s:\n", dev->name);
sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
dprintk(" local address : %pIS:%u\n", sap, rpc_get_port(sap));
sap = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
@@ -547,6 +550,7 @@ static void __svc_rdma_free(struct work_struct *work)
/* This blocks until the Completion Queues are empty */
if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
ib_drain_qp(rdma->sc_qp);
+ flush_workqueue(svcrdma_wq);
svc_rdma_flush_recv_queues(rdma);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 28c0771c4e8c..4f8d7efa469f 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1364,7 +1364,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
}
rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id;
- trace_xprtrdma_post_recv(rep);
+ trace_xprtrdma_post_recv(&rep->rr_cid);
rep->rr_recv_wr.next = wr;
wr = &rep->rr_recv_wr;
--needed;
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 5b045284849e..c9189a970eec 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -19,6 +19,35 @@
#include <linux/rtnetlink.h>
#include <net/switchdev.h>
+static bool switchdev_obj_eq(const struct switchdev_obj *a,
+ const struct switchdev_obj *b)
+{
+ const struct switchdev_obj_port_vlan *va, *vb;
+ const struct switchdev_obj_port_mdb *ma, *mb;
+
+ if (a->id != b->id || a->orig_dev != b->orig_dev)
+ return false;
+
+ switch (a->id) {
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ va = SWITCHDEV_OBJ_PORT_VLAN(a);
+ vb = SWITCHDEV_OBJ_PORT_VLAN(b);
+ return va->flags == vb->flags &&
+ va->vid == vb->vid &&
+ va->changed == vb->changed;
+ case SWITCHDEV_OBJ_ID_PORT_MDB:
+ case SWITCHDEV_OBJ_ID_HOST_MDB:
+ ma = SWITCHDEV_OBJ_PORT_MDB(a);
+ mb = SWITCHDEV_OBJ_PORT_MDB(b);
+ return ma->vid == mb->vid &&
+ ether_addr_equal(ma->addr, mb->addr);
+ default:
+ break;
+ }
+
+ BUG();
+}
+
static LIST_HEAD(deferred);
static DEFINE_SPINLOCK(deferred_lock);
@@ -307,6 +336,50 @@ int switchdev_port_obj_del(struct net_device *dev,
}
EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
+/**
+ * switchdev_port_obj_act_is_deferred - Is object action pending?
+ *
+ * @dev: port device
+ * @nt: type of action; add or delete
+ * @obj: object to test
+ *
+ * Returns true if a deferred item is pending, which is
+ * equivalent to the action @nt on an object @obj.
+ *
+ * rtnl_lock must be held.
+ */
+bool switchdev_port_obj_act_is_deferred(struct net_device *dev,
+ enum switchdev_notifier_type nt,
+ const struct switchdev_obj *obj)
+{
+ struct switchdev_deferred_item *dfitem;
+ bool found = false;
+
+ ASSERT_RTNL();
+
+ spin_lock_bh(&deferred_lock);
+
+ list_for_each_entry(dfitem, &deferred, list) {
+ if (dfitem->dev != dev)
+ continue;
+
+ if ((dfitem->func == switchdev_port_obj_add_deferred &&
+ nt == SWITCHDEV_PORT_OBJ_ADD) ||
+ (dfitem->func == switchdev_port_obj_del_deferred &&
+ nt == SWITCHDEV_PORT_OBJ_DEL)) {
+ if (switchdev_obj_eq((const void *)dfitem->data, obj)) {
+ found = true;
+ break;
+ }
+ }
+ }
+
+ spin_unlock_bh(&deferred_lock);
+
+ return found;
+}
+EXPORT_SYMBOL_GPL(switchdev_port_obj_act_is_deferred);
+
static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain);
static BLOCKING_NOTIFIER_HEAD(switchdev_blocking_notif_chain);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 2cde375477e3..878415c43527 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -1086,6 +1086,12 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
#ifdef CONFIG_TIPC_MEDIA_UDP
if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) {
+ if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) {
+ rtnl_unlock();
+ NL_SET_ERR_MSG(info->extack, "UDP option is unsupported");
+ return -EINVAL;
+ }
+
err = tipc_udp_nl_bearer_add(b,
attrs[TIPC_NLA_BEARER_UDP_OPTS]);
if (err) {
diff --git a/net/tipc/link.c b/net/tipc/link.c
index d0143823658d..0716eb5c8a31 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -82,10 +82,7 @@ struct tipc_stats {
* struct tipc_link - TIPC link data structure
* @addr: network address of link's peer node
* @name: link name character string
- * @media_addr: media address to use when sending messages over link
- * @timer: link timer
* @net: pointer to namespace struct
- * @refcnt: reference counter for permanent references (owner node & timer)
* @peer_session: link session # being used by peer end of link
* @peer_bearer_id: bearer id used by link's peer endpoint
* @bearer_id: local bearer id used by link
@@ -94,31 +91,19 @@ struct tipc_stats {
* @state: current state of link FSM
* @peer_caps: bitmap describing capabilities of peer node
* @silent_intv_cnt: # of timer intervals without any reception from peer
- * @proto_msg: template for control messages generated by link
- * @pmsg: convenience pointer to "proto_msg" field
* @priority: current link priority
* @net_plane: current link network plane ('A' through 'H')
* @mon_state: cookie with information needed by link monitor
- * @backlog_limit: backlog queue congestion thresholds (indexed by importance)
- * @exp_msg_count: # of tunnelled messages expected during link changeover
- * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset
* @mtu: current maximum packet size for this link
* @advertised_mtu: advertised own mtu when link is being established
- * @transmitq: queue for sent, non-acked messages
* @backlogq: queue for messages waiting to be sent
- * @snt_nxt: next sequence number to use for outbound messages
* @ackers: # of peers that needs to ack each packet before it can be released
* @acked: # last packet acked by a certain peer. Used for broadcast.
* @rcv_nxt: next sequence number to expect for inbound messages
- * @deferred_queue: deferred queue saved OOS b'cast message received from node
- * @unacked_window: # of inbound messages rx'd without ack'ing back to peer
* @inputq: buffer queue for messages to be delivered upwards
* @namedq: buffer queue for name table messages to be delivered upwards
- * @next_out: ptr to first unsent outbound message in queue
* @wakeupq: linked list of wakeup msgs waiting for link congestion to abate
- * @long_msg_seq_no: next identifier to use for outbound fragmented messages
* @reasm_buf: head of partially reassembled inbound message fragments
- * @bc_rcvr: marks that this is a broadcast receiver link
* @stats: collects statistics regarding link activity
* @session: session to be used by link
* @snd_nxt_state: next send seq number
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index c763008a8adb..079aebb16ed8 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -168,7 +168,7 @@ static struct sk_buff *tipc_get_err_tlv(char *str)
int str_len = strlen(str) + 1;
struct sk_buff *buf;
- buf = tipc_tlv_alloc(TLV_SPACE(str_len));
+ buf = tipc_tlv_alloc(str_len);
if (buf)
tipc_add_tlv(buf, TIPC_TLV_ERROR_STRING, str, str_len);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 1c2c6800949d..b4674f03d71a 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -1003,7 +1003,7 @@ static u16 tls_user_config(struct tls_context *ctx, bool tx)
return 0;
}
-static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
+static int tls_get_info(struct sock *sk, struct sk_buff *skb)
{
u16 version, cipher_type;
struct tls_context *ctx;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index e37b4d2e2acd..211f57164cb6 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -52,6 +52,7 @@ struct tls_decrypt_arg {
struct_group(inargs,
bool zc;
bool async;
+ bool async_done;
u8 tail;
);
@@ -63,6 +64,7 @@ struct tls_decrypt_ctx {
u8 iv[TLS_MAX_IV_SIZE];
u8 aad[TLS_MAX_AAD_SIZE];
u8 tail;
+ bool free_sgout;
struct scatterlist sg[];
};
@@ -187,7 +189,6 @@ static void tls_decrypt_done(void *data, int err)
struct aead_request *aead_req = data;
struct crypto_aead *aead = crypto_aead_reqtfm(aead_req);
struct scatterlist *sgout = aead_req->dst;
- struct scatterlist *sgin = aead_req->src;
struct tls_sw_context_rx *ctx;
struct tls_decrypt_ctx *dctx;
struct tls_context *tls_ctx;
@@ -196,6 +197,17 @@ static void tls_decrypt_done(void *data, int err)
struct sock *sk;
int aead_size;
+ /* If requests get too backlogged crypto API returns -EBUSY and calls
+ * ->complete(-EINPROGRESS) immediately followed by ->complete(0)
+ * to make waiting for backlog to flush with crypto_wait_req() easier.
+ * First wait converts -EBUSY -> -EINPROGRESS, and the second one
+ * -EINPROGRESS -> 0.
+ * We have a single struct crypto_async_request per direction, this
+ * scheme doesn't help us, so just ignore the first ->complete().
+ */
+ if (err == -EINPROGRESS)
+ return;
+
aead_size = sizeof(*aead_req) + crypto_aead_reqsize(aead);
aead_size = ALIGN(aead_size, __alignof__(*dctx));
dctx = (void *)((u8 *)aead_req + aead_size);
@@ -213,7 +225,7 @@ static void tls_decrypt_done(void *data, int err)
}
/* Free the destination pages if skb was not decrypted inplace */
- if (sgout != sgin) {
+ if (dctx->free_sgout) {
/* Skip the first S/G entry as it points to AAD */
for_each_sg(sg_next(sgout), sg, UINT_MAX, pages) {
if (!sg)
@@ -224,10 +236,17 @@ static void tls_decrypt_done(void *data, int err)
kfree(aead_req);
- spin_lock_bh(&ctx->decrypt_compl_lock);
- if (!atomic_dec_return(&ctx->decrypt_pending))
+ if (atomic_dec_and_test(&ctx->decrypt_pending))
complete(&ctx->async_wait.completion);
- spin_unlock_bh(&ctx->decrypt_compl_lock);
+}
+
+static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx)
+{
+ if (!atomic_dec_and_test(&ctx->decrypt_pending))
+ crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ atomic_inc(&ctx->decrypt_pending);
+
+ return ctx->async_wait.err;
}
static int tls_do_decryption(struct sock *sk,
@@ -253,20 +272,33 @@ static int tls_do_decryption(struct sock *sk,
aead_request_set_callback(aead_req,
CRYPTO_TFM_REQ_MAY_BACKLOG,
tls_decrypt_done, aead_req);
+ DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->decrypt_pending) < 1);
atomic_inc(&ctx->decrypt_pending);
} else {
+ DECLARE_CRYPTO_WAIT(wait);
+
aead_request_set_callback(aead_req,
CRYPTO_TFM_REQ_MAY_BACKLOG,
- crypto_req_done, &ctx->async_wait);
+ crypto_req_done, &wait);
+ ret = crypto_aead_decrypt(aead_req);
+ if (ret == -EINPROGRESS || ret == -EBUSY)
+ ret = crypto_wait_req(ret, &wait);
+ return ret;
}
ret = crypto_aead_decrypt(aead_req);
- if (ret == -EINPROGRESS) {
- if (darg->async)
- return 0;
+ if (ret == -EINPROGRESS)
+ return 0;
- ret = crypto_wait_req(ret, &ctx->async_wait);
+ if (ret == -EBUSY) {
+ ret = tls_decrypt_async_wait(ctx);
+ darg->async_done = true;
+ /* all completions have run, we're not doing async anymore */
+ darg->async = false;
+ return ret;
}
+
+ atomic_dec(&ctx->decrypt_pending);
darg->async = false;
return ret;
@@ -439,9 +471,10 @@ static void tls_encrypt_done(void *data, int err)
struct tls_rec *rec = data;
struct scatterlist *sge;
struct sk_msg *msg_en;
- bool ready = false;
struct sock *sk;
- int pending;
+
+ if (err == -EINPROGRESS) /* see the comment in tls_decrypt_done() */
+ return;
msg_en = &rec->msg_encrypted;
@@ -476,23 +509,25 @@ static void tls_encrypt_done(void *data, int err)
/* If received record is at head of tx_list, schedule tx */
first_rec = list_first_entry(&ctx->tx_list,
struct tls_rec, list);
- if (rec == first_rec)
- ready = true;
+ if (rec == first_rec) {
+ /* Schedule the transmission */
+ if (!test_and_set_bit(BIT_TX_SCHEDULED,
+ &ctx->tx_bitmask))
+ schedule_delayed_work(&ctx->tx_work.work, 1);
+ }
}
- spin_lock_bh(&ctx->encrypt_compl_lock);
- pending = atomic_dec_return(&ctx->encrypt_pending);
-
- if (!pending && ctx->async_notify)
+ if (atomic_dec_and_test(&ctx->encrypt_pending))
complete(&ctx->async_wait.completion);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
+}
- if (!ready)
- return;
+static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx)
+{
+ if (!atomic_dec_and_test(&ctx->encrypt_pending))
+ crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ atomic_inc(&ctx->encrypt_pending);
- /* Schedule the transmission */
- if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
- schedule_delayed_work(&ctx->tx_work.work, 1);
+ return ctx->async_wait.err;
}
static int tls_do_encryption(struct sock *sk,
@@ -541,9 +576,14 @@ static int tls_do_encryption(struct sock *sk,
/* Add the record in tx_list */
list_add_tail((struct list_head *)&rec->list, &ctx->tx_list);
+ DEBUG_NET_WARN_ON_ONCE(atomic_read(&ctx->encrypt_pending) < 1);
atomic_inc(&ctx->encrypt_pending);
rc = crypto_aead_encrypt(aead_req);
+ if (rc == -EBUSY) {
+ rc = tls_encrypt_async_wait(ctx);
+ rc = rc ?: -EINPROGRESS;
+ }
if (!rc || rc != -EINPROGRESS) {
atomic_dec(&ctx->encrypt_pending);
sge->offset -= prot->prepend_size;
@@ -984,7 +1024,6 @@ static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg,
int num_zc = 0;
int orig_size;
int ret = 0;
- int pending;
if (!eor && (msg->msg_flags & MSG_EOR))
return -EINVAL;
@@ -1052,7 +1091,11 @@ alloc_encrypted:
if (ret < 0)
goto send_end;
tls_ctx->pending_open_record_frags = true;
- if (full_record || eor || sk_msg_full(msg_pl))
+
+ if (sk_msg_full(msg_pl))
+ full_record = true;
+
+ if (full_record || eor)
goto copied;
continue;
}
@@ -1159,24 +1202,12 @@ trim_sgl:
if (!num_async) {
goto send_end;
} else if (num_zc) {
- /* Wait for pending encryptions to get completed */
- spin_lock_bh(&ctx->encrypt_compl_lock);
- ctx->async_notify = true;
-
- pending = atomic_read(&ctx->encrypt_pending);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
- if (pending)
- crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
- else
- reinit_completion(&ctx->async_wait.completion);
-
- /* There can be no concurrent accesses, since we have no
- * pending encrypt operations
- */
- WRITE_ONCE(ctx->async_notify, false);
+ int err;
- if (ctx->async_wait.err) {
- ret = ctx->async_wait.err;
+ /* Wait for pending encryptions to get completed */
+ err = tls_encrypt_async_wait(ctx);
+ if (err) {
+ ret = err;
copied = 0;
}
}
@@ -1225,7 +1256,6 @@ void tls_sw_splice_eof(struct socket *sock)
ssize_t copied = 0;
bool retrying = false;
int ret = 0;
- int pending;
if (!ctx->open_rec)
return;
@@ -1260,22 +1290,7 @@ retry:
}
/* Wait for pending encryptions to get completed */
- spin_lock_bh(&ctx->encrypt_compl_lock);
- ctx->async_notify = true;
-
- pending = atomic_read(&ctx->encrypt_pending);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
- if (pending)
- crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
- else
- reinit_completion(&ctx->async_wait.completion);
-
- /* There can be no concurrent accesses, since we have no pending
- * encrypt operations
- */
- WRITE_ONCE(ctx->async_notify, false);
-
- if (ctx->async_wait.err)
+ if (tls_encrypt_async_wait(ctx))
goto unlock;
/* Transmit if any encryptions have completed */
@@ -1577,12 +1592,16 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
} else if (out_sg) {
memcpy(sgout, out_sg, n_sgout * sizeof(*sgout));
}
+ dctx->free_sgout = !!pages;
/* Prepare and submit AEAD request */
err = tls_do_decryption(sk, sgin, sgout, dctx->iv,
data_len + prot->tail_size, aead_req, darg);
- if (err)
+ if (err) {
+ if (darg->async_done)
+ goto exit_free_skb;
goto exit_free_pages;
+ }
darg->skb = clear_skb ?: tls_strp_msg(ctx);
clear_skb = NULL;
@@ -1594,6 +1613,9 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
return err;
}
+ if (unlikely(darg->async_done))
+ return 0;
+
if (prot->tail_size)
darg->tail = dctx->tail;
@@ -1765,7 +1787,8 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
u8 *control,
size_t skip,
size_t len,
- bool is_peek)
+ bool is_peek,
+ bool *more)
{
struct sk_buff *skb = skb_peek(&ctx->rx_list);
struct tls_msg *tlm;
@@ -1778,7 +1801,7 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
err = tls_record_content_type(msg, tlm, control);
if (err <= 0)
- goto out;
+ goto more;
if (skip < rxm->full_len)
break;
@@ -1796,12 +1819,12 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
err = tls_record_content_type(msg, tlm, control);
if (err <= 0)
- goto out;
+ goto more;
err = skb_copy_datagram_msg(skb, rxm->offset + skip,
msg, chunk);
if (err < 0)
- goto out;
+ goto more;
len = len - chunk;
copied = copied + chunk;
@@ -1837,6 +1860,10 @@ static int process_rx_list(struct tls_sw_context_rx *ctx,
out:
return copied ? : err;
+more:
+ if (more)
+ *more = true;
+ goto out;
}
static bool
@@ -1936,10 +1963,12 @@ int tls_sw_recvmsg(struct sock *sk,
struct strp_msg *rxm;
struct tls_msg *tlm;
ssize_t copied = 0;
+ ssize_t peeked = 0;
bool async = false;
int target, err;
bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
bool is_peek = flags & MSG_PEEK;
+ bool rx_more = false;
bool released = true;
bool bpf_strp_enabled;
bool zc_capable;
@@ -1959,12 +1988,12 @@ int tls_sw_recvmsg(struct sock *sk,
goto end;
/* Process pending decrypted records. It must be non-zero-copy */
- err = process_rx_list(ctx, msg, &control, 0, len, is_peek);
+ err = process_rx_list(ctx, msg, &control, 0, len, is_peek, &rx_more);
if (err < 0)
goto end;
copied = err;
- if (len <= copied)
+ if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA) || rx_more)
goto end;
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
@@ -2057,6 +2086,8 @@ put_on_rx_list:
decrypted += chunk;
len -= chunk;
__skb_queue_tail(&ctx->rx_list, skb);
+ if (unlikely(control != TLS_RECORD_TYPE_DATA))
+ break;
continue;
}
@@ -2080,8 +2111,10 @@ put_on_rx_list:
if (err < 0)
goto put_on_rx_list_err;
- if (is_peek)
+ if (is_peek) {
+ peeked += chunk;
goto put_on_rx_list;
+ }
if (partially_consumed) {
rxm->offset += chunk;
@@ -2105,16 +2138,10 @@ put_on_rx_list:
recv_end:
if (async) {
- int ret, pending;
+ int ret;
/* Wait for all previously submitted records to be decrypted */
- spin_lock_bh(&ctx->decrypt_compl_lock);
- reinit_completion(&ctx->async_wait.completion);
- pending = atomic_read(&ctx->decrypt_pending);
- spin_unlock_bh(&ctx->decrypt_compl_lock);
- ret = 0;
- if (pending)
- ret = crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ ret = tls_decrypt_async_wait(ctx);
__skb_queue_purge(&ctx->async_hold);
if (ret) {
@@ -2126,12 +2153,11 @@ recv_end:
/* Drain records from the rx_list & copy if required */
if (is_peek || is_kvec)
- err = process_rx_list(ctx, msg, &control, copied,
- decrypted, is_peek);
+ err = process_rx_list(ctx, msg, &control, copied + peeked,
+ decrypted - peeked, is_peek, NULL);
else
err = process_rx_list(ctx, msg, &control, 0,
- async_copy_bytes, is_peek);
- decrypted += max(err, 0);
+ async_copy_bytes, is_peek, NULL);
}
copied += decrypted;
@@ -2431,16 +2457,9 @@ void tls_sw_release_resources_tx(struct sock *sk)
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
struct tls_rec *rec, *tmp;
- int pending;
/* Wait for any pending async encryptions to complete */
- spin_lock_bh(&ctx->encrypt_compl_lock);
- ctx->async_notify = true;
- pending = atomic_read(&ctx->encrypt_pending);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
-
- if (pending)
- crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ tls_encrypt_async_wait(ctx);
tls_tx_records(sk, -1);
@@ -2603,7 +2622,7 @@ static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct soc
}
crypto_init_wait(&sw_ctx_tx->async_wait);
- spin_lock_init(&sw_ctx_tx->encrypt_compl_lock);
+ atomic_set(&sw_ctx_tx->encrypt_pending, 1);
INIT_LIST_HEAD(&sw_ctx_tx->tx_list);
INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler);
sw_ctx_tx->tx_work.sk = sk;
@@ -2624,7 +2643,7 @@ static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx)
}
crypto_init_wait(&sw_ctx_rx->async_wait);
- spin_lock_init(&sw_ctx_rx->decrypt_compl_lock);
+ atomic_set(&sw_ctx_rx->decrypt_pending, 1);
init_waitqueue_head(&sw_ctx_rx->wq);
skb_queue_head_init(&sw_ctx_rx->rx_list);
skb_queue_head_init(&sw_ctx_rx->async_hold);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ac1f2bc18fc9..0748e7ea5210 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -782,19 +782,6 @@ static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
int);
-static int unix_set_peek_off(struct sock *sk, int val)
-{
- struct unix_sock *u = unix_sk(sk);
-
- if (mutex_lock_interruptible(&u->iolock))
- return -EINTR;
-
- WRITE_ONCE(sk->sk_peek_off, val);
- mutex_unlock(&u->iolock);
-
- return 0;
-}
-
#ifdef CONFIG_PROC_FS
static int unix_count_nr_fds(struct sock *sk)
{
@@ -862,7 +849,7 @@ static const struct proto_ops unix_stream_ops = {
.read_skb = unix_stream_read_skb,
.mmap = sock_no_mmap,
.splice_read = unix_stream_splice_read,
- .set_peek_off = unix_set_peek_off,
+ .set_peek_off = sk_set_peek_off,
.show_fdinfo = unix_show_fdinfo,
};
@@ -886,7 +873,7 @@ static const struct proto_ops unix_dgram_ops = {
.read_skb = unix_read_skb,
.recvmsg = unix_dgram_recvmsg,
.mmap = sock_no_mmap,
- .set_peek_off = unix_set_peek_off,
+ .set_peek_off = sk_set_peek_off,
.show_fdinfo = unix_show_fdinfo,
};
@@ -909,7 +896,7 @@ static const struct proto_ops unix_seqpacket_ops = {
.sendmsg = unix_seqpacket_sendmsg,
.recvmsg = unix_seqpacket_recvmsg,
.mmap = sock_no_mmap,
- .set_peek_off = unix_set_peek_off,
+ .set_peek_off = sk_set_peek_off,
.show_fdinfo = unix_show_fdinfo,
};
@@ -1344,13 +1331,11 @@ static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
unix_state_lock(sk1);
return;
}
- if (sk1 < sk2) {
- unix_state_lock(sk1);
- unix_state_lock_nested(sk2);
- } else {
- unix_state_lock(sk2);
- unix_state_lock_nested(sk1);
- }
+ if (sk1 > sk2)
+ swap(sk1, sk2);
+
+ unix_state_lock(sk1);
+ unix_state_lock_nested(sk2, U_LOCK_SECOND);
}
static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
@@ -1591,7 +1576,7 @@ restart:
goto out_unlock;
}
- unix_state_lock_nested(sk);
+ unix_state_lock_nested(sk, U_LOCK_SECOND);
if (sk->sk_state != st) {
unix_state_unlock(sk);
diff --git a/net/unix/diag.c b/net/unix/diag.c
index bec09a3a1d44..be19827eca36 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
* queue lock. With the other's queue locked it's
* OK to lock the state.
*/
- unix_state_lock_nested(req);
+ unix_state_lock_nested(req, U_LOCK_DIAG);
peer = unix_sk(req)->peer;
buf[i++] = (peer ? sock_i_ino(peer) : 0);
unix_state_unlock(req);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 2405f0f9af31..2a81880dac7b 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -284,9 +284,17 @@ void unix_gc(void)
* which are creating the cycle(s).
*/
skb_queue_head_init(&hitlist);
- list_for_each_entry(u, &gc_candidates, link)
+ list_for_each_entry(u, &gc_candidates, link) {
scan_children(&u->sk, inc_inflight, &hitlist);
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ if (u->oob_skb) {
+ kfree_skb(u->oob_skb);
+ u->oob_skb = NULL;
+ }
+#endif
+ }
+
/* not_cycle_list contains those sockets which do not make up a
* cycle. Restore these to the inflight list.
*/
diff --git a/net/unix/scm.c b/net/unix/scm.c
index 6ff628f2349f..822ce0d0d791 100644
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -35,10 +35,8 @@ struct sock *unix_get_socket(struct file *filp)
/* PF_UNIX ? */
if (s && ops && ops->family == PF_UNIX)
u_sock = s;
- } else {
- /* Could be an io_uring instance */
- u_sock = io_uring_get_socket(filp);
}
+
return u_sock;
}
EXPORT_SYMBOL(unix_get_socket);
diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c
index 7ea7c3a0d0d0..bd84785bf8d6 100644
--- a/net/unix/unix_bpf.c
+++ b/net/unix/unix_bpf.c
@@ -161,15 +161,30 @@ int unix_stream_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool r
{
struct sock *sk_pair;
+ /* Restore does not decrement the sk_pair reference yet because we must
+ * keep the a reference to the socket until after an RCU grace period
+ * and any pending sends have completed.
+ */
if (restore) {
sk->sk_write_space = psock->saved_write_space;
sock_replace_proto(sk, psock->sk_proto);
return 0;
}
- sk_pair = unix_peer(sk);
- sock_hold(sk_pair);
- psock->sk_pair = sk_pair;
+ /* psock_update_sk_prot can be called multiple times if psock is
+ * added to multiple maps and/or slots in the same map. There is
+ * also an edge case where replacing a psock with itself can trigger
+ * an extra psock_update_sk_prot during the insert process. So it
+ * must be safe to do multiple calls. Here we need to ensure we don't
+ * increment the refcnt through sock_hold many times. There will only
+ * be a single matching destroy operation.
+ */
+ if (!psock->sk_pair) {
+ sk_pair = unix_peer(sk);
+ sock_hold(sk_pair);
+ psock->sk_pair = sk_pair;
+ }
+
unix_stream_bpf_check_needs_rebuild(psock->sk_proto);
sock_replace_proto(sk, &unix_stream_bpf_prot);
return 0;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 816725af281f..54ba7316f808 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -2264,8 +2264,13 @@ static int vsock_set_rcvlowat(struct sock *sk, int val)
transport = vsk->transport;
- if (transport && transport->set_rcvlowat)
- return transport->set_rcvlowat(vsk, val);
+ if (transport && transport->notify_set_rcvlowat) {
+ int err;
+
+ err = transport->notify_set_rcvlowat(vsk, val);
+ if (err)
+ return err;
+ }
WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
return 0;
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 7cb1a9d2cdb4..e2157e387217 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -816,7 +816,7 @@ int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
}
static
-int hvs_set_rcvlowat(struct vsock_sock *vsk, int val)
+int hvs_notify_set_rcvlowat(struct vsock_sock *vsk, int val)
{
return -EOPNOTSUPP;
}
@@ -856,7 +856,7 @@ static struct vsock_transport hvs_transport = {
.notify_send_pre_enqueue = hvs_notify_send_pre_enqueue,
.notify_send_post_enqueue = hvs_notify_send_post_enqueue,
- .set_rcvlowat = hvs_set_rcvlowat
+ .notify_set_rcvlowat = hvs_notify_set_rcvlowat
};
static bool hvs_check_transport(struct vsock_sock *vsk)
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index af5bab1acee1..1748268e0694 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -153,10 +153,10 @@ virtio_transport_send_pkt_work(struct work_struct *work)
* 'virt_to_phys()' later to fill the buffer descriptor.
* We don't touch memory at "virtual" address of this page.
*/
- va = page_to_virt(skb_frag->bv_page);
+ va = page_to_virt(skb_frag_page(skb_frag));
sg_init_one(sgs[out_sg],
- va + skb_frag->bv_offset,
- skb_frag->bv_len);
+ va + skb_frag_off(skb_frag),
+ skb_frag_size(skb_frag));
out_sg++;
}
}
@@ -537,6 +537,7 @@ static struct virtio_transport virtio_transport = {
.notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
.notify_buffer_size = virtio_transport_notify_buffer_size,
+ .notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
.read_skb = virtio_transport_read_skb,
},
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 6df246b53260..16ff976a86e3 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -557,6 +557,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
struct virtio_vsock_sock *vvs = vsk->trans;
size_t bytes, total = 0;
struct sk_buff *skb;
+ u32 fwd_cnt_delta;
+ bool low_rx_bytes;
int err = -EFAULT;
u32 free_space;
@@ -600,7 +602,10 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
}
}
- free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt);
+ fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt;
+ free_space = vvs->buf_alloc - fwd_cnt_delta;
+ low_rx_bytes = (vvs->rx_bytes <
+ sock_rcvlowat(sk_vsock(vsk), 0, INT_MAX));
spin_unlock_bh(&vvs->rx_lock);
@@ -610,9 +615,11 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
* too high causes extra messages. Too low causes transmitter
* stalls. As stalls are in theory more expensive than extra
* messages, we set the limit to a high value. TODO: experiment
- * with different values.
+ * with different values. Also send credit update message when
+ * number of bytes in rx queue is not enough to wake up reader.
*/
- if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE)
+ if (fwd_cnt_delta &&
+ (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE || low_rx_bytes))
virtio_transport_send_credit_update(vsk);
return total;
@@ -1683,6 +1690,36 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
}
EXPORT_SYMBOL_GPL(virtio_transport_read_skb);
+int virtio_transport_notify_set_rcvlowat(struct vsock_sock *vsk, int val)
+{
+ struct virtio_vsock_sock *vvs = vsk->trans;
+ bool send_update;
+
+ spin_lock_bh(&vvs->rx_lock);
+
+ /* If number of available bytes is less than new SO_RCVLOWAT value,
+ * kick sender to send more data, because sender may sleep in its
+ * 'send()' syscall waiting for enough space at our side. Also
+ * don't send credit update when peer already knows actual value -
+ * such transmission will be useless.
+ */
+ send_update = (vvs->rx_bytes < val) &&
+ (vvs->fwd_cnt != vvs->last_fwd_cnt);
+
+ spin_unlock_bh(&vvs->rx_lock);
+
+ if (send_update) {
+ int err;
+
+ err = virtio_transport_send_credit_update(vsk);
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(virtio_transport_notify_set_rcvlowat);
+
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Asias He");
MODULE_DESCRIPTION("common code for virtio vsock");
diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
index 048640167411..6dea6119f5b2 100644
--- a/net/vmw_vsock/vsock_loopback.c
+++ b/net/vmw_vsock/vsock_loopback.c
@@ -96,6 +96,7 @@ static struct virtio_transport loopback_transport = {
.notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
.notify_buffer_size = virtio_transport_notify_buffer_size,
+ .notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
.read_skb = virtio_transport_read_skb,
},
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index a9ac85e09af3..10345388ad13 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -206,7 +206,6 @@ config CFG80211_KUNIT_TEST
depends on KUNIT
depends on CFG80211
default KUNIT_ALL_TESTS
- depends on !KERNEL_6_2
help
Enable this option to test cfg80211 functions with kunit.
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 089c841528c8..72074fd36df4 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -25,7 +25,7 @@ ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),)
cfg80211-y += extra-certs.o
endif
-$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
+$(obj)/shipped-certs.c: $(sort $(wildcard $(srctree)/$(src)/certs/*.hex))
@$(kecho) " GEN $@"
$(Q)(echo '#include "reg.h"'; \
echo 'const u8 shipped_regdb_certs[] = {'; \
@@ -35,7 +35,7 @@ $(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.hex)
) > $@
$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR) \
- $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR)/*.x509)
+ $(sort $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR)/*.x509))
@$(kecho) " GEN $@"
$(Q)(set -e; \
allf=""; \
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 2d21e423abdb..ceb9174c5c3d 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -141,7 +141,7 @@ static bool cfg80211_edmg_chandef_valid(const struct cfg80211_chan_def *chandef)
return true;
}
-static int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width)
+int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width)
{
int mhz;
@@ -190,6 +190,7 @@ static int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width)
}
return mhz;
}
+EXPORT_SYMBOL(nl80211_chan_width_to_mhz);
static int cfg80211_chandef_get_width(const struct cfg80211_chan_def *c)
{
@@ -514,9 +515,83 @@ static u32 cfg80211_get_end_freq(u32 center_freq,
return end_freq;
}
+static bool
+cfg80211_dfs_permissive_check_wdev(struct cfg80211_registered_device *rdev,
+ enum nl80211_iftype iftype,
+ struct wireless_dev *wdev,
+ struct ieee80211_channel *chan)
+{
+ unsigned int link_id;
+
+ for_each_valid_link(wdev, link_id) {
+ struct ieee80211_channel *other_chan = NULL;
+ struct cfg80211_chan_def chandef = {};
+ int ret;
+
+ /* In order to avoid daisy chaining only allow BSS STA */
+ if (wdev->iftype != NL80211_IFTYPE_STATION ||
+ !wdev->links[link_id].client.current_bss)
+ continue;
+
+ other_chan =
+ wdev->links[link_id].client.current_bss->pub.channel;
+
+ if (!other_chan)
+ continue;
+
+ if (chan == other_chan)
+ return true;
+
+ /* continue if we can't get the channel */
+ ret = rdev_get_channel(rdev, wdev, link_id, &chandef);
+ if (ret)
+ continue;
+
+ if (cfg80211_is_sub_chan(&chandef, chan, false))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Check if P2P GO is allowed to operate on a DFS channel
+ */
+static bool cfg80211_dfs_permissive_chan(struct wiphy *wiphy,
+ enum nl80211_iftype iftype,
+ struct ieee80211_channel *chan)
+{
+ struct wireless_dev *wdev;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+
+ lockdep_assert_held(&rdev->wiphy.mtx);
+
+ if (!wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_DFS_CONCURRENT) ||
+ !(chan->flags & IEEE80211_CHAN_DFS_CONCURRENT))
+ return false;
+
+ /* only valid for P2P GO */
+ if (iftype != NL80211_IFTYPE_P2P_GO)
+ return false;
+
+ /*
+ * Allow only if there's a concurrent BSS
+ */
+ list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
+ bool ret = cfg80211_dfs_permissive_check_wdev(rdev, iftype,
+ wdev, chan);
+ if (ret)
+ return ret;
+ }
+
+ return false;
+}
+
static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy,
u32 center_freq,
- u32 bandwidth)
+ u32 bandwidth,
+ enum nl80211_iftype iftype)
{
struct ieee80211_channel *c;
u32 freq, start_freq, end_freq;
@@ -529,9 +604,11 @@ static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy,
if (!c)
return -EINVAL;
- if (c->flags & IEEE80211_CHAN_RADAR)
+ if (c->flags & IEEE80211_CHAN_RADAR &&
+ !cfg80211_dfs_permissive_chan(wiphy, iftype, c))
return 1;
}
+
return 0;
}
@@ -557,7 +634,7 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
ret = cfg80211_get_chans_dfs_required(wiphy,
ieee80211_chandef_to_khz(chandef),
- width);
+ width, iftype);
if (ret < 0)
return ret;
else if (ret > 0)
@@ -568,7 +645,7 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
ret = cfg80211_get_chans_dfs_required(wiphy,
MHZ_TO_KHZ(chandef->center_freq2),
- width);
+ width, iftype);
if (ret < 0)
return ret;
else if (ret > 0)
@@ -1336,15 +1413,19 @@ static bool _cfg80211_reg_can_beacon(struct wiphy *wiphy,
bool check_no_ir)
{
bool res;
- u32 prohibited_flags = IEEE80211_CHAN_DISABLED |
- IEEE80211_CHAN_RADAR;
+ u32 prohibited_flags = IEEE80211_CHAN_DISABLED;
+ int dfs_required;
trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype, check_no_ir);
if (check_no_ir)
prohibited_flags |= IEEE80211_CHAN_NO_IR;
- if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 &&
+ dfs_required = cfg80211_chandef_dfs_required(wiphy, chandef, iftype);
+ if (dfs_required != 0)
+ prohibited_flags |= IEEE80211_CHAN_RADAR;
+
+ if (dfs_required > 0 &&
cfg80211_chandef_dfs_available(wiphy, chandef)) {
/* We can skip IEEE80211_CHAN_NO_IR if chandef dfs available */
prohibited_flags = IEEE80211_CHAN_DISABLED;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 409d74c57ca0..3fb1b637352a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -1661,6 +1661,7 @@ void wiphy_delayed_work_queue(struct wiphy *wiphy,
unsigned long delay)
{
if (!delay) {
+ del_timer(&dwork->timer);
wiphy_work_queue(wiphy, &dwork->work);
return;
}
diff --git a/net/wireless/core.h b/net/wireless/core.h
index cb61d33d4f1e..13657a85cf61 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -3,7 +3,7 @@
* Wireless configuration interface internals.
*
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2023 Intel Corporation
*/
#ifndef __NET_WIRELESS_CORE_H
#define __NET_WIRELESS_CORE_H
@@ -458,6 +458,9 @@ int cfg80211_scan(struct cfg80211_registered_device *rdev);
extern struct work_struct cfg80211_disconnect_work;
+#define NL80211_BSS_USE_FOR_ALL (NL80211_BSS_USE_FOR_NORMAL | \
+ NL80211_BSS_USE_FOR_MLD_LINK)
+
void cfg80211_set_dfs_state(struct wiphy *wiphy,
const struct cfg80211_chan_def *chandef,
enum nl80211_dfs_state dfs_state);
@@ -546,4 +549,15 @@ int cfg80211_remove_virtual_intf(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev);
void cfg80211_wdev_release_link_bsses(struct wireless_dev *wdev, u16 link_mask);
+#if IS_ENABLED(CONFIG_CFG80211_KUNIT_TEST)
+#define EXPORT_SYMBOL_IF_CFG80211_KUNIT(sym) EXPORT_SYMBOL_IF_KUNIT(sym)
+#define VISIBLE_IF_CFG80211_KUNIT
+size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
+ const u8 *subie, size_t subie_len,
+ u8 *new_ie, size_t new_ie_len);
+#else
+#define EXPORT_SYMBOL_IF_CFG80211_KUNIT(sym)
+#define VISIBLE_IF_CFG80211_KUNIT static
+#endif /* IS_ENABLED(CONFIG_CFG80211_KUNIT_TEST) */
+
#endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index bad9e4fd842f..f635a8b6ca2e 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -22,7 +22,7 @@
void cfg80211_rx_assoc_resp(struct net_device *dev,
- struct cfg80211_rx_assoc_resp_data *data)
+ const struct cfg80211_rx_assoc_resp_data *data)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 1cbbb11ea503..bd54a928bab4 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -818,6 +818,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_HW_TIMESTAMP_ENABLED] = { .type = NLA_FLAG },
[NL80211_ATTR_EMA_RNR_ELEMS] = { .type = NLA_NESTED },
[NL80211_ATTR_MLO_LINK_DISABLED] = { .type = NLA_FLAG },
+ [NL80211_ATTR_BSS_DUMP_INCLUDE_USE_DATA] = { .type = NLA_FLAG },
+ [NL80211_ATTR_MLO_TTLM_DLINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8),
+ [NL80211_ATTR_MLO_TTLM_ULINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8),
};
/* policy for the key attributes */
@@ -1198,6 +1201,15 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy,
if ((chan->flags & IEEE80211_CHAN_NO_EHT) &&
nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_EHT))
goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_DFS_CONCURRENT) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_DFS_CONCURRENT))
+ goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_NO_UHB_VLP_CLIENT) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_UHB_VLP_CLIENT))
+ goto nla_put_failure;
+ if ((chan->flags & IEEE80211_CHAN_NO_UHB_AFC_CLIENT) &&
+ nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_UHB_AFC_CLIENT))
+ goto nla_put_failure;
}
if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER,
@@ -4008,6 +4020,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
}
wiphy_unlock(&rdev->wiphy);
+ if_start = 0;
wp_idx++;
}
out:
@@ -4184,6 +4197,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
if (ntype != NL80211_IFTYPE_MESH_POINT)
return -EINVAL;
+ if (otype != NL80211_IFTYPE_MESH_POINT)
+ return -EINVAL;
if (netif_running(dev))
return -EBUSY;
@@ -4855,7 +4870,7 @@ static struct cfg80211_acl_data *parse_acl_data(struct wiphy *wiphy,
return ERR_PTR(n_entries);
if (n_entries > wiphy->max_acl_mac_addrs)
- return ERR_PTR(-ENOTSUPP);
+ return ERR_PTR(-EOPNOTSUPP);
acl = kzalloc(struct_size(acl, mac_addrs, n_entries), GFP_KERNEL);
if (!acl)
@@ -9342,6 +9357,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
else
eth_broadcast_addr(request->bssid);
+ request->tsf_report_link_id = nl80211_link_id_or_invalid(info->attrs);
request->wdev = wdev;
request->wiphy = &rdev->wiphy;
request->scan_start = jiffies;
@@ -10409,6 +10425,15 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
break;
}
+ if (nla_put_u32(msg, NL80211_BSS_USE_FOR, res->use_for))
+ goto nla_put_failure;
+
+ if (res->cannot_use_reasons &&
+ nla_put_u64_64bit(msg, NL80211_BSS_CANNOT_USE_REASONS,
+ res->cannot_use_reasons,
+ NL80211_BSS_PAD))
+ goto nla_put_failure;
+
nla_nest_end(msg, bss);
genlmsg_end(msg, hdr);
@@ -10426,15 +10451,27 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
struct cfg80211_registered_device *rdev;
struct cfg80211_internal_bss *scan;
struct wireless_dev *wdev;
+ struct nlattr **attrbuf;
int start = cb->args[2], idx = 0;
+ bool dump_include_use_data;
int err;
- err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL);
- if (err)
+ attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL);
+ if (!attrbuf)
+ return -ENOMEM;
+
+ err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, attrbuf);
+ if (err) {
+ kfree(attrbuf);
return err;
+ }
/* nl80211_prepare_wdev_dump acquired it in the successful case */
__acquire(&rdev->wiphy.mtx);
+ dump_include_use_data =
+ attrbuf[NL80211_ATTR_BSS_DUMP_INCLUDE_USE_DATA];
+ kfree(attrbuf);
+
spin_lock_bh(&rdev->bss_lock);
/*
@@ -10451,6 +10488,9 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
list_for_each_entry(scan, &rdev->bss_list, list) {
if (++idx <= start)
continue;
+ if (!dump_include_use_data &&
+ !(scan->pub.use_for & NL80211_BSS_USE_FOR_NORMAL))
+ continue;
if (nl80211_send_bss(skb, cb,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
rdev, wdev, scan) < 0) {
@@ -10902,12 +10942,13 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device *rdev,
const u8 *ssid, int ssid_len,
- struct nlattr **attrs)
+ struct nlattr **attrs,
+ int assoc_link_id, int link_id)
{
struct ieee80211_channel *chan;
struct cfg80211_bss *bss;
const u8 *bssid;
- u32 freq;
+ u32 freq, use_for = 0;
if (!attrs[NL80211_ATTR_MAC] || !attrs[NL80211_ATTR_WIPHY_FREQ])
return ERR_PTR(-EINVAL);
@@ -10922,10 +10963,16 @@ static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device
if (!chan)
return ERR_PTR(-EINVAL);
- bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid,
- ssid, ssid_len,
- IEEE80211_BSS_TYPE_ESS,
- IEEE80211_PRIVACY_ANY);
+ if (assoc_link_id >= 0)
+ use_for = NL80211_BSS_USE_FOR_MLD_LINK;
+ if (assoc_link_id == link_id)
+ use_for |= NL80211_BSS_USE_FOR_NORMAL;
+
+ bss = __cfg80211_get_bss(&rdev->wiphy, chan, bssid,
+ ssid, ssid_len,
+ IEEE80211_BSS_TYPE_ESS,
+ IEEE80211_PRIVACY_ANY,
+ use_for);
if (!bss)
return ERR_PTR(-ENOENT);
@@ -11104,7 +11151,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
goto free;
}
req.links[link_id].bss =
- nl80211_assoc_bss(rdev, ssid, ssid_len, attrs);
+ nl80211_assoc_bss(rdev, ssid, ssid_len, attrs,
+ req.link_id, link_id);
if (IS_ERR(req.links[link_id].bss)) {
err = PTR_ERR(req.links[link_id].bss);
req.links[link_id].bss = NULL;
@@ -11169,7 +11217,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
if (req.link_id >= 0)
return -EINVAL;
- req.bss = nl80211_assoc_bss(rdev, ssid, ssid_len, info->attrs);
+ req.bss = nl80211_assoc_bss(rdev, ssid, ssid_len, info->attrs,
+ -1, -1);
if (IS_ERR(req.bss))
return PTR_ERR(req.bss);
ap_addr = req.bss->bssid;
@@ -12178,16 +12227,18 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info)
return err;
}
-static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
+static int nl80211_set_pmksa(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
- int (*rdev_ops)(struct wiphy *wiphy, struct net_device *dev,
- struct cfg80211_pmksa *pmksa) = NULL;
struct net_device *dev = info->user_ptr[1];
struct cfg80211_pmksa pmksa;
+ bool ap_pmksa_caching_support = false;
memset(&pmksa, 0, sizeof(struct cfg80211_pmksa));
+ ap_pmksa_caching_support = wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_AP_PMKSA_CACHING);
+
if (!info->attrs[NL80211_ATTR_PMKID])
return -EINVAL;
@@ -12196,16 +12247,15 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_MAC]) {
pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
} else if (info->attrs[NL80211_ATTR_SSID] &&
- info->attrs[NL80211_ATTR_FILS_CACHE_ID] &&
- (info->genlhdr->cmd == NL80211_CMD_DEL_PMKSA ||
- info->attrs[NL80211_ATTR_PMK])) {
+ info->attrs[NL80211_ATTR_FILS_CACHE_ID] &&
+ info->attrs[NL80211_ATTR_PMK]) {
pmksa.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
pmksa.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
- pmksa.cache_id =
- nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]);
+ pmksa.cache_id = nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]);
} else {
return -EINVAL;
}
+
if (info->attrs[NL80211_ATTR_PMK]) {
pmksa.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]);
pmksa.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]);
@@ -12217,32 +12267,71 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_PMK_REAUTH_THRESHOLD])
pmksa.pmk_reauth_threshold =
- nla_get_u8(
- info->attrs[NL80211_ATTR_PMK_REAUTH_THRESHOLD]);
+ nla_get_u8(info->attrs[NL80211_ATTR_PMK_REAUTH_THRESHOLD]);
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT &&
- !(dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP &&
- wiphy_ext_feature_isset(&rdev->wiphy,
- NL80211_EXT_FEATURE_AP_PMKSA_CACHING)))
+ !((dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP ||
+ dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO) &&
+ ap_pmksa_caching_support))
return -EOPNOTSUPP;
- switch (info->genlhdr->cmd) {
- case NL80211_CMD_SET_PMKSA:
- rdev_ops = rdev->ops->set_pmksa;
- break;
- case NL80211_CMD_DEL_PMKSA:
- rdev_ops = rdev->ops->del_pmksa;
- break;
- default:
- WARN_ON(1);
- break;
+ if (!rdev->ops->set_pmksa)
+ return -EOPNOTSUPP;
+
+ return rdev_set_pmksa(rdev, dev, &pmksa);
+}
+
+static int nl80211_del_pmksa(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct cfg80211_pmksa pmksa;
+ bool sae_offload_support = false;
+ bool owe_offload_support = false;
+ bool ap_pmksa_caching_support = false;
+
+ memset(&pmksa, 0, sizeof(struct cfg80211_pmksa));
+
+ sae_offload_support = wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_SAE_OFFLOAD);
+ owe_offload_support = wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_OWE_OFFLOAD);
+ ap_pmksa_caching_support = wiphy_ext_feature_isset(&rdev->wiphy,
+ NL80211_EXT_FEATURE_AP_PMKSA_CACHING);
+
+ if (info->attrs[NL80211_ATTR_PMKID])
+ pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]);
+
+ if (info->attrs[NL80211_ATTR_MAC]) {
+ pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
+ } else if (info->attrs[NL80211_ATTR_SSID]) {
+ /* SSID based pmksa flush suppported only for FILS,
+ * OWE/SAE OFFLOAD cases
+ */
+ if (info->attrs[NL80211_ATTR_FILS_CACHE_ID] &&
+ info->attrs[NL80211_ATTR_PMK]) {
+ pmksa.cache_id = nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]);
+ } else if (!sae_offload_support && !owe_offload_support) {
+ return -EINVAL;
+ }
+ pmksa.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
+ pmksa.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
+ } else {
+ return -EINVAL;
}
- if (!rdev_ops)
+ if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
+ dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT &&
+ !((dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP ||
+ dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO) &&
+ ap_pmksa_caching_support))
+ return -EOPNOTSUPP;
+
+ if (!rdev->ops->del_pmksa)
return -EOPNOTSUPP;
- return rdev_ops(&rdev->wiphy, dev, &pmksa);
+ return rdev_del_pmksa(rdev, dev, &pmksa);
}
static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info)
@@ -15846,7 +15935,7 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev,
if (tid_conf->mask & ~mask) {
NL_SET_ERR_MSG(extack, "unsupported TID configuration");
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
}
return 0;
@@ -16239,6 +16328,35 @@ static int nl80211_set_hw_timestamp(struct sk_buff *skb,
return rdev_set_hw_timestamp(rdev, dev, &hwts);
}
+static int
+nl80211_set_ttlm(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg80211_ttlm_params params = {};
+ struct cfg80211_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+ if (wdev->iftype != NL80211_IFTYPE_STATION &&
+ wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
+ return -EOPNOTSUPP;
+
+ if (!wdev->connected)
+ return -ENOLINK;
+
+ if (!info->attrs[NL80211_ATTR_MLO_TTLM_DLINK] ||
+ !info->attrs[NL80211_ATTR_MLO_TTLM_ULINK])
+ return -EINVAL;
+
+ nla_memcpy(params.dlink,
+ info->attrs[NL80211_ATTR_MLO_TTLM_DLINK],
+ sizeof(params.dlink));
+ nla_memcpy(params.ulink,
+ info->attrs[NL80211_ATTR_MLO_TTLM_ULINK],
+ sizeof(params.ulink));
+
+ return rdev_set_ttlm(rdev, dev, &params);
+}
+
#define NL80211_FLAG_NEED_WIPHY 0x01
#define NL80211_FLAG_NEED_NETDEV 0x02
#define NL80211_FLAG_NEED_RTNL 0x04
@@ -16927,7 +17045,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
{
.cmd = NL80211_CMD_SET_PMKSA,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = nl80211_setdel_pmksa,
+ .doit = nl80211_set_pmksa,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP |
NL80211_FLAG_CLEAR_SKB),
@@ -16935,7 +17053,7 @@ static const struct genl_small_ops nl80211_small_ops[] = {
{
.cmd = NL80211_CMD_DEL_PMKSA,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = nl80211_setdel_pmksa,
+ .doit = nl80211_del_pmksa,
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
@@ -17420,6 +17538,12 @@ static const struct genl_small_ops nl80211_small_ops[] = {
.flags = GENL_UNS_ADMIN_PERM,
.internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
},
+ {
+ .cmd = NL80211_CMD_SET_TID_TO_LINK_MAPPING,
+ .doit = nl80211_set_ttlm,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP),
+ },
};
static struct genl_family nl80211_fam __ro_after_init = {
@@ -17751,21 +17875,29 @@ nla_put_failure:
nlmsg_free(msg);
}
+struct nl80211_mlme_event {
+ enum nl80211_commands cmd;
+ const u8 *buf;
+ size_t buf_len;
+ int uapsd_queues;
+ const u8 *req_ies;
+ size_t req_ies_len;
+ bool reconnect;
+};
+
static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
- const u8 *buf, size_t len,
- enum nl80211_commands cmd, gfp_t gfp,
- int uapsd_queues, const u8 *req_ies,
- size_t req_ies_len, bool reconnect)
+ const struct nl80211_mlme_event *event,
+ gfp_t gfp)
{
struct sk_buff *msg;
void *hdr;
- msg = nlmsg_new(100 + len + req_ies_len, gfp);
+ msg = nlmsg_new(100 + event->buf_len + event->req_ies_len, gfp);
if (!msg)
return;
- hdr = nl80211hdr_put(msg, 0, 0, 0, cmd);
+ hdr = nl80211hdr_put(msg, 0, 0, 0, event->cmd);
if (!hdr) {
nlmsg_free(msg);
return;
@@ -17773,22 +17905,24 @@ static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) ||
- nla_put(msg, NL80211_ATTR_FRAME, len, buf) ||
- (req_ies &&
- nla_put(msg, NL80211_ATTR_REQ_IE, req_ies_len, req_ies)))
+ nla_put(msg, NL80211_ATTR_FRAME, event->buf_len, event->buf) ||
+ (event->req_ies &&
+ nla_put(msg, NL80211_ATTR_REQ_IE, event->req_ies_len,
+ event->req_ies)))
goto nla_put_failure;
- if (reconnect && nla_put_flag(msg, NL80211_ATTR_RECONNECT_REQUESTED))
+ if (event->reconnect &&
+ nla_put_flag(msg, NL80211_ATTR_RECONNECT_REQUESTED))
goto nla_put_failure;
- if (uapsd_queues >= 0) {
+ if (event->uapsd_queues >= 0) {
struct nlattr *nla_wmm =
nla_nest_start_noflag(msg, NL80211_ATTR_STA_WME);
if (!nla_wmm)
goto nla_put_failure;
if (nla_put_u8(msg, NL80211_STA_WME_UAPSD_QUEUES,
- uapsd_queues))
+ event->uapsd_queues))
goto nla_put_failure;
nla_nest_end(msg, nla_wmm);
@@ -17808,37 +17942,60 @@ void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
struct net_device *netdev, const u8 *buf,
size_t len, gfp_t gfp)
{
- nl80211_send_mlme_event(rdev, netdev, buf, len,
- NL80211_CMD_AUTHENTICATE, gfp, -1, NULL, 0,
- false);
+ struct nl80211_mlme_event event = {
+ .cmd = NL80211_CMD_AUTHENTICATE,
+ .buf = buf,
+ .buf_len = len,
+ .uapsd_queues = -1,
+ };
+
+ nl80211_send_mlme_event(rdev, netdev, &event, gfp);
}
void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
- struct cfg80211_rx_assoc_resp_data *data)
+ const struct cfg80211_rx_assoc_resp_data *data)
{
- nl80211_send_mlme_event(rdev, netdev, data->buf, data->len,
- NL80211_CMD_ASSOCIATE, GFP_KERNEL,
- data->uapsd_queues,
- data->req_ies, data->req_ies_len, false);
+ struct nl80211_mlme_event event = {
+ .cmd = NL80211_CMD_ASSOCIATE,
+ .buf = data->buf,
+ .buf_len = data->len,
+ .uapsd_queues = data->uapsd_queues,
+ .req_ies = data->req_ies,
+ .req_ies_len = data->req_ies_len,
+ };
+
+ nl80211_send_mlme_event(rdev, netdev, &event, GFP_KERNEL);
}
void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
struct net_device *netdev, const u8 *buf,
size_t len, bool reconnect, gfp_t gfp)
{
- nl80211_send_mlme_event(rdev, netdev, buf, len,
- NL80211_CMD_DEAUTHENTICATE, gfp, -1, NULL, 0,
- reconnect);
+ struct nl80211_mlme_event event = {
+ .cmd = NL80211_CMD_DEAUTHENTICATE,
+ .buf = buf,
+ .buf_len = len,
+ .reconnect = reconnect,
+ .uapsd_queues = -1,
+ };
+
+ nl80211_send_mlme_event(rdev, netdev, &event, gfp);
}
void nl80211_send_disassoc(struct cfg80211_registered_device *rdev,
struct net_device *netdev, const u8 *buf,
size_t len, bool reconnect, gfp_t gfp)
{
- nl80211_send_mlme_event(rdev, netdev, buf, len,
- NL80211_CMD_DISASSOCIATE, gfp, -1, NULL, 0,
- reconnect);
+ struct nl80211_mlme_event event = {
+ .cmd = NL80211_CMD_DISASSOCIATE,
+ .buf = buf,
+ .buf_len = len,
+ .reconnect = reconnect,
+ .uapsd_queues = -1,
+ };
+
+ nl80211_send_mlme_event(rdev, netdev, &event, gfp);
}
void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf,
@@ -17848,28 +18005,31 @@ void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf,
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
const struct ieee80211_mgmt *mgmt = (void *)buf;
- u32 cmd;
+ struct nl80211_mlme_event event = {
+ .buf = buf,
+ .buf_len = len,
+ .uapsd_queues = -1,
+ };
if (WARN_ON(len < 2))
return;
if (ieee80211_is_deauth(mgmt->frame_control)) {
- cmd = NL80211_CMD_UNPROT_DEAUTHENTICATE;
+ event.cmd = NL80211_CMD_UNPROT_DEAUTHENTICATE;
} else if (ieee80211_is_disassoc(mgmt->frame_control)) {
- cmd = NL80211_CMD_UNPROT_DISASSOCIATE;
+ event.cmd = NL80211_CMD_UNPROT_DISASSOCIATE;
} else if (ieee80211_is_beacon(mgmt->frame_control)) {
if (wdev->unprot_beacon_reported &&
elapsed_jiffies_msecs(wdev->unprot_beacon_reported) < 10000)
return;
- cmd = NL80211_CMD_UNPROT_BEACON;
+ event.cmd = NL80211_CMD_UNPROT_BEACON;
wdev->unprot_beacon_reported = jiffies;
} else {
return;
}
trace_cfg80211_rx_unprot_mlme_mgmt(dev, buf, len);
- nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC, -1,
- NULL, 0, false);
+ nl80211_send_mlme_event(rdev, dev, &event, GFP_ATOMIC);
}
EXPORT_SYMBOL(cfg80211_rx_unprot_mlme_mgmt);
@@ -19323,6 +19483,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
break;
}
+ cfg80211_schedule_channels_check(wdev);
cfg80211_sched_dfs_chan_update(rdev);
nl80211_ch_switch_notify(rdev, dev, link_id, chandef, GFP_KERNEL,
@@ -20080,6 +20241,20 @@ nla_put_failure:
}
EXPORT_SYMBOL(cfg80211_update_owe_info_event);
+void cfg80211_schedule_channels_check(struct wireless_dev *wdev)
+{
+ struct wiphy *wiphy = wdev->wiphy;
+
+ /* Schedule channels check if NO_IR or DFS relaxations are supported */
+ if (wdev->iftype == NL80211_IFTYPE_STATION &&
+ (wiphy_ext_feature_isset(wiphy,
+ NL80211_EXT_FEATURE_DFS_CONCURRENT) ||
+ (IS_ENABLED(CONFIG_CFG80211_REG_RELAX_NO_IR) &&
+ wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR)))
+ reg_check_channels();
+}
+EXPORT_SYMBOL(cfg80211_schedule_channels_check);
+
/* initialisation/exit functions */
int __init nl80211_init(void)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index aad40240d9cb..6376f3a87f8a 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -60,7 +60,7 @@ void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
const u8 *buf, size_t len, gfp_t gfp);
void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
- struct cfg80211_rx_assoc_resp_data *data);
+ const struct cfg80211_rx_assoc_resp_data *data);
void nl80211_send_deauth(struct cfg80211_registered_device *rdev,
struct net_device *netdev,
const u8 *buf, size_t len,
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 2214a90cf101..43897a5269b6 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -1046,7 +1046,7 @@ rdev_nan_change_conf(struct cfg80211_registered_device *rdev,
ret = rdev->ops->nan_change_conf(&rdev->wiphy, wdev, conf,
changes);
else
- ret = -ENOTSUPP;
+ ret = -EOPNOTSUPP;
trace_rdev_return_int(&rdev->wiphy, ret);
return ret;
}
@@ -1200,7 +1200,7 @@ rdev_start_radar_detection(struct cfg80211_registered_device *rdev,
struct cfg80211_chan_def *chandef,
u32 cac_time_ms)
{
- int ret = -ENOTSUPP;
+ int ret = -EOPNOTSUPP;
trace_rdev_start_radar_detection(&rdev->wiphy, dev, chandef,
cac_time_ms);
@@ -1226,7 +1226,7 @@ rdev_set_mcast_rate(struct cfg80211_registered_device *rdev,
struct net_device *dev,
int mcast_rate[NUM_NL80211_BANDS])
{
- int ret = -ENOTSUPP;
+ int ret = -EOPNOTSUPP;
trace_rdev_set_mcast_rate(&rdev->wiphy, dev, mcast_rate);
if (rdev->ops->set_mcast_rate)
@@ -1239,7 +1239,7 @@ static inline int
rdev_set_coalesce(struct cfg80211_registered_device *rdev,
struct cfg80211_coalesce *coalesce)
{
- int ret = -ENOTSUPP;
+ int ret = -EOPNOTSUPP;
trace_rdev_set_coalesce(&rdev->wiphy, coalesce);
if (rdev->ops->set_coalesce)
@@ -1524,4 +1524,22 @@ rdev_set_hw_timestamp(struct cfg80211_registered_device *rdev,
return ret;
}
+
+static inline int
+rdev_set_ttlm(struct cfg80211_registered_device *rdev,
+ struct net_device *dev,
+ struct cfg80211_ttlm_params *params)
+{
+ struct wiphy *wiphy = &rdev->wiphy;
+ int ret;
+
+ if (!rdev->ops->set_ttlm)
+ return -EOPNOTSUPP;
+
+ trace_rdev_set_ttlm(wiphy, dev, params);
+ ret = rdev->ops->set_ttlm(wiphy, dev, params);
+ trace_rdev_return_int(wiphy, ret);
+
+ return ret;
+}
#endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2ef4f6cc7a32..2741b626919a 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1593,6 +1593,12 @@ static u32 map_regdom_flags(u32 rd_flags)
channel_flags |= IEEE80211_CHAN_NO_320MHZ;
if (rd_flags & NL80211_RRF_NO_EHT)
channel_flags |= IEEE80211_CHAN_NO_EHT;
+ if (rd_flags & NL80211_RRF_DFS_CONCURRENT)
+ channel_flags |= IEEE80211_CHAN_DFS_CONCURRENT;
+ if (rd_flags & NL80211_RRF_NO_UHB_VLP_CLIENT)
+ channel_flags |= IEEE80211_CHAN_NO_UHB_VLP_CLIENT;
+ if (rd_flags & NL80211_RRF_NO_UHB_AFC_CLIENT)
+ channel_flags |= IEEE80211_CHAN_NO_UHB_AFC_CLIENT;
if (rd_flags & NL80211_RRF_PSD)
channel_flags |= IEEE80211_CHAN_PSD;
return channel_flags;
@@ -2478,7 +2484,7 @@ static void reg_check_chans_work(struct work_struct *work)
rtnl_unlock();
}
-static void reg_check_channels(void)
+void reg_check_channels(void)
{
/*
* Give usermode a chance to do something nicer (move to another
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index a703e53c23ee..a02ef5609f52 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -181,6 +181,11 @@ bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2);
*/
int reg_reload_regdb(void);
+/**
+ * reg_check_channels - schedule regulatory enforcement
+ */
+void reg_check_channels(void);
+
extern const u8 shipped_regdb_certs[];
extern unsigned int shipped_regdb_certs_len;
extern const u8 extra_regdb_certs[];
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 9e5ccffd6868..389a52c29bfc 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -20,6 +20,7 @@
#include <net/cfg80211.h>
#include <net/cfg80211-wext.h>
#include <net/iw_handler.h>
+#include <kunit/visibility.h>
#include "core.h"
#include "nl80211.h"
#include "wext-compat.h"
@@ -303,9 +304,10 @@ static size_t cfg80211_copy_elem_with_frags(const struct element *elem,
return *pos - buf;
}
-static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
- const u8 *subie, size_t subie_len,
- u8 *new_ie, size_t new_ie_len)
+VISIBLE_IF_CFG80211_KUNIT size_t
+cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
+ const u8 *subie, size_t subie_len,
+ u8 *new_ie, size_t new_ie_len)
{
const struct element *non_inherit_elem, *parent, *sub;
u8 *pos = new_ie;
@@ -413,6 +415,7 @@ static size_t cfg80211_gen_new_ie(const u8 *ie, size_t ielen,
return pos - new_ie;
}
+EXPORT_SYMBOL_IF_CFG80211_KUNIT(cfg80211_gen_new_ie);
static bool is_bss(struct cfg80211_bss *a, const u8 *bssid,
const u8 *ssid, size_t ssid_len)
@@ -1535,12 +1538,13 @@ static bool cfg80211_bss_type_match(u16 capability,
}
/* Returned bss is reference counted and must be cleaned up appropriately. */
-struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
- struct ieee80211_channel *channel,
- const u8 *bssid,
- const u8 *ssid, size_t ssid_len,
- enum ieee80211_bss_type bss_type,
- enum ieee80211_privacy privacy)
+struct cfg80211_bss *__cfg80211_get_bss(struct wiphy *wiphy,
+ struct ieee80211_channel *channel,
+ const u8 *bssid,
+ const u8 *ssid, size_t ssid_len,
+ enum ieee80211_bss_type bss_type,
+ enum ieee80211_privacy privacy,
+ u32 use_for)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
struct cfg80211_internal_bss *bss, *res = NULL;
@@ -1565,6 +1569,8 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
continue;
if (!is_valid_ether_addr(bss->pub.bssid))
continue;
+ if ((bss->pub.use_for & use_for) != use_for)
+ continue;
/* Don't get expired BSS structs */
if (time_after(now, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE) &&
!atomic_read(&bss->hold))
@@ -1582,7 +1588,7 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
trace_cfg80211_return_bss(&res->pub);
return &res->pub;
}
-EXPORT_SYMBOL(cfg80211_get_bss);
+EXPORT_SYMBOL(__cfg80211_get_bss);
static void rb_insert_bss(struct cfg80211_registered_device *rdev,
struct cfg80211_internal_bss *bss)
@@ -1725,6 +1731,61 @@ static void cfg80211_update_hidden_bsses(struct cfg80211_internal_bss *known,
}
}
+static void cfg80211_check_stuck_ecsa(struct cfg80211_registered_device *rdev,
+ struct cfg80211_internal_bss *known,
+ const struct cfg80211_bss_ies *old)
+{
+ const struct ieee80211_ext_chansw_ie *ecsa;
+ const struct element *elem_new, *elem_old;
+ const struct cfg80211_bss_ies *new, *bcn;
+
+ if (known->pub.proberesp_ecsa_stuck)
+ return;
+
+ new = rcu_dereference_protected(known->pub.proberesp_ies,
+ lockdep_is_held(&rdev->bss_lock));
+ if (WARN_ON(!new))
+ return;
+
+ if (new->tsf - old->tsf < USEC_PER_SEC)
+ return;
+
+ elem_old = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ old->data, old->len);
+ if (!elem_old)
+ return;
+
+ elem_new = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ new->data, new->len);
+ if (!elem_new)
+ return;
+
+ bcn = rcu_dereference_protected(known->pub.beacon_ies,
+ lockdep_is_held(&rdev->bss_lock));
+ if (bcn &&
+ cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ bcn->data, bcn->len))
+ return;
+
+ if (elem_new->datalen != elem_old->datalen)
+ return;
+ if (elem_new->datalen < sizeof(struct ieee80211_ext_chansw_ie))
+ return;
+ if (memcmp(elem_new->data, elem_old->data, elem_new->datalen))
+ return;
+
+ ecsa = (void *)elem_new->data;
+
+ if (!ecsa->mode)
+ return;
+
+ if (ecsa->new_ch_num !=
+ ieee80211_frequency_to_channel(known->pub.channel->center_freq))
+ return;
+
+ known->pub.proberesp_ecsa_stuck = 1;
+}
+
static bool
cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
struct cfg80211_internal_bss *known,
@@ -1744,9 +1805,13 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
/* Override possible earlier Beacon frame IEs */
rcu_assign_pointer(known->pub.ies,
new->pub.proberesp_ies);
- if (old)
+ if (old) {
+ cfg80211_check_stuck_ecsa(rdev, known, old);
kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
- } else if (rcu_access_pointer(new->pub.beacon_ies)) {
+ }
+ }
+
+ if (rcu_access_pointer(new->pub.beacon_ies)) {
const struct cfg80211_bss_ies *old;
if (known->pub.hidden_beacon_bss &&
@@ -1800,6 +1865,8 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
ether_addr_copy(known->parent_bssid, new->parent_bssid);
known->pub.max_bssid_indicator = new->pub.max_bssid_indicator;
known->pub.bssid_index = new->pub.bssid_index;
+ known->pub.use_for &= new->pub.use_for;
+ known->pub.cannot_use_reasons = new->pub.cannot_use_reasons;
return true;
}
@@ -1811,15 +1878,15 @@ __cfg80211_bss_update(struct cfg80211_registered_device *rdev,
bool signal_valid, unsigned long ts)
{
struct cfg80211_internal_bss *found = NULL;
+ struct cfg80211_bss_ies *ies;
if (WARN_ON(!tmp->pub.channel))
- return NULL;
+ goto free_ies;
tmp->ts = ts;
- if (WARN_ON(!rcu_access_pointer(tmp->pub.ies))) {
- return NULL;
- }
+ if (WARN_ON(!rcu_access_pointer(tmp->pub.ies)))
+ goto free_ies;
found = rb_find_bss(rdev, tmp, BSS_CMP_REGULAR);
@@ -1829,7 +1896,6 @@ __cfg80211_bss_update(struct cfg80211_registered_device *rdev,
} else {
struct cfg80211_internal_bss *new;
struct cfg80211_internal_bss *hidden;
- struct cfg80211_bss_ies *ies;
/*
* create a copy -- the "res" variable that is passed in
@@ -1838,15 +1904,8 @@ __cfg80211_bss_update(struct cfg80211_registered_device *rdev,
*/
new = kzalloc(sizeof(*new) + rdev->wiphy.bss_priv_size,
GFP_ATOMIC);
- if (!new) {
- ies = (void *)rcu_dereference(tmp->pub.beacon_ies);
- if (ies)
- kfree_rcu(ies, rcu_head);
- ies = (void *)rcu_dereference(tmp->pub.proberesp_ies);
- if (ies)
- kfree_rcu(ies, rcu_head);
- return NULL;
- }
+ if (!new)
+ goto free_ies;
memcpy(new, tmp, sizeof(*new));
new->refcount = 1;
INIT_LIST_HEAD(&new->hidden_list);
@@ -1864,8 +1923,12 @@ __cfg80211_bss_update(struct cfg80211_registered_device *rdev,
list_add(&new->hidden_list,
&hidden->hidden_list);
hidden->refcount++;
+
+ ies = (void *)rcu_access_pointer(new->pub.beacon_ies);
rcu_assign_pointer(new->pub.beacon_ies,
hidden->pub.beacon_ies);
+ if (ies)
+ kfree_rcu(ies, rcu_head);
}
} else {
/*
@@ -1902,6 +1965,16 @@ __cfg80211_bss_update(struct cfg80211_registered_device *rdev,
bss_ref_get(rdev, found);
return found;
+
+free_ies:
+ ies = (void *)rcu_dereference(tmp->pub.beacon_ies);
+ if (ies)
+ kfree_rcu(ies, rcu_head);
+ ies = (void *)rcu_dereference(tmp->pub.proberesp_ies);
+ if (ies)
+ kfree_rcu(ies, rcu_head);
+
+ return NULL;
}
struct cfg80211_internal_bss *
@@ -2044,6 +2117,9 @@ struct cfg80211_inform_single_bss_data {
struct cfg80211_bss *source_bss;
u8 max_bssid_indicator;
u8 bssid_index;
+
+ u8 use_for;
+ u64 cannot_use_reasons;
};
/* Returned bss is reference counted and must be cleaned up appropriately. */
@@ -2089,6 +2165,8 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy,
tmp.ts_boottime = drv_data->boottime_ns;
tmp.parent_tsf = drv_data->parent_tsf;
ether_addr_copy(tmp.parent_bssid, drv_data->parent_bssid);
+ tmp.pub.use_for = data->use_for;
+ tmp.pub.cannot_use_reasons = data->cannot_use_reasons;
if (data->bss_source != BSS_SOURCE_DIRECT) {
tmp.pub.transmitted_bss = data->source_bss;
@@ -2259,6 +2337,8 @@ cfg80211_parse_mbssid_data(struct wiphy *wiphy,
.beacon_interval = tx_data->beacon_interval,
.source_bss = source_bss,
.bss_source = BSS_SOURCE_MBSSID,
+ .use_for = tx_data->use_for,
+ .cannot_use_reasons = tx_data->cannot_use_reasons,
};
const u8 *mbssid_index_ie;
const struct element *elem, *sub;
@@ -2521,7 +2601,7 @@ error:
return NULL;
}
-static bool
+static u8
cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id,
const struct ieee80211_neighbor_ap_info **ap_info,
const u8 **tbtt_info)
@@ -2540,6 +2620,7 @@ cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id,
u16 params;
u8 length, i, count, mld_params_offset;
u8 type, lid;
+ u32 use_for;
info = (void *)pos;
count = u8_get_bits(info->tbtt_info_hdr,
@@ -2549,20 +2630,22 @@ cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id,
pos += sizeof(*info);
if (count * length > end - pos)
- return false;
+ return 0;
type = u8_get_bits(info->tbtt_info_hdr,
IEEE80211_AP_INFO_TBTT_HDR_TYPE);
- /* Only accept full TBTT information. NSTR mobile APs
- * use the shortened version, but we ignore them here.
- */
if (type == IEEE80211_TBTT_INFO_TYPE_TBTT &&
length >=
offsetofend(struct ieee80211_tbtt_info_ge_11,
mld_params)) {
mld_params_offset =
offsetof(struct ieee80211_tbtt_info_ge_11, mld_params);
+ use_for = NL80211_BSS_USE_FOR_ALL;
+ } else if (type == IEEE80211_TBTT_INFO_TYPE_MLD &&
+ length >= sizeof(struct ieee80211_rnr_mld_params)) {
+ mld_params_offset = 0;
+ use_for = NL80211_BSS_USE_FOR_MLD_LINK;
} else {
pos += count * length;
continue;
@@ -2580,7 +2663,7 @@ cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id,
*ap_info = info;
*tbtt_info = pos;
- return true;
+ return use_for;
}
pos += length;
@@ -2588,13 +2671,15 @@ cfg80211_tbtt_info_for_mld_ap(const u8 *ie, size_t ielen, u8 mld_id, u8 link_id,
}
}
- return false;
+ return 0;
}
-static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
- struct cfg80211_inform_single_bss_data *tx_data,
- struct cfg80211_bss *source_bss,
- gfp_t gfp)
+static void
+cfg80211_parse_ml_elem_sta_data(struct wiphy *wiphy,
+ struct cfg80211_inform_single_bss_data *tx_data,
+ struct cfg80211_bss *source_bss,
+ const struct element *elem,
+ gfp_t gfp)
{
struct cfg80211_inform_single_bss_data data = {
.drv_data = tx_data->drv_data,
@@ -2603,9 +2688,9 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
.bss_source = BSS_SOURCE_STA_PROFILE,
};
struct ieee80211_multi_link_elem *ml_elem;
- const struct element *elem;
struct cfg80211_mle *mle;
u16 control;
+ u8 ml_common_len;
u8 *new_ie;
struct cfg80211_bss *bss;
int mld_id;
@@ -2613,15 +2698,7 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
const u8 *pos;
u8 i;
- if (!source_bss)
- return;
-
- if (tx_data->ftype != CFG80211_BSS_FTYPE_PRESP)
- return;
-
- elem = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_MULTI_LINK,
- tx_data->ie, tx_data->ielen);
- if (!elem || !ieee80211_mle_size_ok(elem->data + 1, elem->datalen - 1))
+ if (!ieee80211_mle_size_ok(elem->data + 1, elem->datalen - 1))
return;
ml_elem = (void *)elem->data + 1;
@@ -2636,6 +2713,8 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
!(control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP))
return;
+ ml_common_len = ml_elem->variable[0];
+
/* length + MLD MAC address + link ID info + BSS Params Change Count */
pos = ml_elem->variable + 1 + 6 + 1 + 1;
@@ -2647,8 +2726,11 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
/* MLD capabilities and operations */
pos += 2;
- /* Not included when the (nontransmitted) AP is responding itself,
- * but defined to zero then (Draft P802.11be_D3.0, 9.4.2.170.2)
+ /*
+ * The MLD ID of the reporting AP is always zero. It is set if the AP
+ * is part of an MBSSID set and will be non-zero for ML Elements
+ * relating to a nontransmitted BSS (matching the Multi-BSSID Index,
+ * Draft P802.11be_D3.2, 35.3.4.2)
*/
if (u16_get_bits(control, IEEE80211_MLC_BASIC_PRES_MLD_ID)) {
mld_id = *pos;
@@ -2676,7 +2758,7 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
const u8 *profile;
const u8 *tbtt_info;
ssize_t profile_len;
- u8 link_id;
+ u8 link_id, use_for;
if (!ieee80211_mle_basic_sta_prof_size_ok((u8 *)mle->sta_prof[i],
mle->sta_prof_len[i]))
@@ -2718,9 +2800,11 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
profile_len -= 2;
/* Find in RNR to look up channel information */
- if (!cfg80211_tbtt_info_for_mld_ap(tx_data->ie, tx_data->ielen,
- mld_id, link_id,
- &ap_info, &tbtt_info))
+ use_for = cfg80211_tbtt_info_for_mld_ap(tx_data->ie,
+ tx_data->ielen,
+ mld_id, link_id,
+ &ap_info, &tbtt_info);
+ if (!use_for)
continue;
/* We could sanity check the BSSID is included */
@@ -2732,6 +2816,14 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
freq = ieee80211_channel_to_freq_khz(ap_info->channel, band);
data.channel = ieee80211_get_channel_khz(wiphy, freq);
+ if (use_for == NL80211_BSS_USE_FOR_MLD_LINK &&
+ !(wiphy->flags & WIPHY_FLAG_SUPPORTS_NSTR_NONPRIMARY)) {
+ use_for = 0;
+ data.cannot_use_reasons =
+ NL80211_BSS_CANNOT_USE_NSTR_NONPRIMARY;
+ }
+ data.use_for = use_for;
+
/* Generate new elements */
memset(new_ie, 0, IEEE80211_MAX_DATA_LEN);
data.ie = new_ie;
@@ -2742,6 +2834,34 @@ static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
if (!data.ielen)
continue;
+ /* The generated elements do not contain:
+ * - Basic ML element
+ * - A TBTT entry in the RNR for the transmitting AP
+ *
+ * This information is needed both internally and in userspace
+ * as such, we should append it here.
+ */
+ if (data.ielen + 3 + sizeof(*ml_elem) + ml_common_len >
+ IEEE80211_MAX_DATA_LEN)
+ continue;
+
+ /* Copy the Basic Multi-Link element including the common
+ * information, and then fix up the link ID.
+ * Note that the ML element length has been verified and we
+ * also checked that it contains the link ID.
+ */
+ new_ie[data.ielen++] = WLAN_EID_EXTENSION;
+ new_ie[data.ielen++] = 1 + sizeof(*ml_elem) + ml_common_len;
+ new_ie[data.ielen++] = WLAN_EID_EXT_EHT_MULTI_LINK;
+ memcpy(new_ie + data.ielen, ml_elem,
+ sizeof(*ml_elem) + ml_common_len);
+
+ new_ie[data.ielen + sizeof(*ml_elem) + 1 + ETH_ALEN] = link_id;
+
+ data.ielen += sizeof(*ml_elem) + ml_common_len;
+
+ /* TODO: Add an RNR containing only the reporting AP */
+
bss = cfg80211_inform_single_bss_data(wiphy, &data, gfp);
if (!bss)
break;
@@ -2753,6 +2873,25 @@ out:
kfree(mle);
}
+static void cfg80211_parse_ml_sta_data(struct wiphy *wiphy,
+ struct cfg80211_inform_single_bss_data *tx_data,
+ struct cfg80211_bss *source_bss,
+ gfp_t gfp)
+{
+ const struct element *elem;
+
+ if (!source_bss)
+ return;
+
+ if (tx_data->ftype != CFG80211_BSS_FTYPE_PRESP)
+ return;
+
+ for_each_element_extid(elem, WLAN_EID_EXT_EHT_MULTI_LINK,
+ tx_data->ie, tx_data->ielen)
+ cfg80211_parse_ml_elem_sta_data(wiphy, tx_data, source_bss,
+ elem, gfp);
+}
+
struct cfg80211_bss *
cfg80211_inform_bss_data(struct wiphy *wiphy,
struct cfg80211_inform_bss *data,
@@ -2769,6 +2908,10 @@ cfg80211_inform_bss_data(struct wiphy *wiphy,
.beacon_interval = beacon_interval,
.ie = ie,
.ielen = ielen,
+ .use_for = data->restrict_use ?
+ data->use_for :
+ NL80211_BSS_USE_FOR_ALL,
+ .cannot_use_reasons = data->cannot_use_reasons,
};
struct cfg80211_bss *res;
@@ -2786,6 +2929,36 @@ cfg80211_inform_bss_data(struct wiphy *wiphy,
}
EXPORT_SYMBOL(cfg80211_inform_bss_data);
+static bool cfg80211_uhb_power_type_valid(const u8 *ie,
+ size_t ielen,
+ const u32 flags)
+{
+ const struct element *tmp;
+ struct ieee80211_he_operation *he_oper;
+
+ tmp = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ie, ielen);
+ if (tmp && tmp->datalen >= sizeof(*he_oper) + 1) {
+ const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
+
+ he_oper = (void *)&tmp->data[1];
+ he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper);
+
+ if (!he_6ghz_oper)
+ return false;
+
+ switch (u8_get_bits(he_6ghz_oper->control,
+ IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO)) {
+ case IEEE80211_6GHZ_CTRL_REG_LPI_AP:
+ return true;
+ case IEEE80211_6GHZ_CTRL_REG_SP_AP:
+ return !(flags & IEEE80211_CHAN_NO_UHB_AFC_CLIENT);
+ case IEEE80211_6GHZ_CTRL_REG_VLP_AP:
+ return !(flags & IEEE80211_CHAN_NO_UHB_VLP_CLIENT);
+ }
+ }
+ return false;
+}
+
/* cfg80211_inform_bss_width_frame helper */
static struct cfg80211_bss *
cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
@@ -2844,6 +3017,14 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
if (!channel)
return NULL;
+ if (channel->band == NL80211_BAND_6GHZ &&
+ !cfg80211_uhb_power_type_valid(variable, ielen, channel->flags)) {
+ data->restrict_use = 1;
+ data->use_for = 0;
+ data->cannot_use_reasons =
+ NL80211_BSS_CANNOT_USE_UHB_PWR_MISMATCH;
+ }
+
if (ext) {
const struct ieee80211_s1g_bcn_compat_ie *compat;
const struct element *elem;
@@ -2899,6 +3080,10 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy,
tmp.pub.chains = data->chains;
memcpy(tmp.pub.chain_signal, data->chain_signal, IEEE80211_MAX_CHAINS);
ether_addr_copy(tmp.parent_bssid, data->parent_bssid);
+ tmp.pub.use_for = data->restrict_use ?
+ data->use_for :
+ NL80211_BSS_USE_FOR_ALL;
+ tmp.pub.cannot_use_reasons = data->cannot_use_reasons;
signal_valid = data->chan == channel;
spin_lock_bh(&rdev->bss_lock);
@@ -2930,6 +3115,10 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy,
.ie = mgmt->u.probe_resp.variable,
.ielen = len - offsetof(struct ieee80211_mgmt,
u.probe_resp.variable),
+ .use_for = data->restrict_use ?
+ data->use_for :
+ NL80211_BSS_USE_FOR_ALL,
+ .cannot_use_reasons = data->cannot_use_reasons,
};
struct cfg80211_bss *res;
@@ -3080,10 +3269,9 @@ void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev,
if (new) {
/* to save time, update IEs for transmitting bss only */
- if (cfg80211_update_known_bss(rdev, cbss, new, false)) {
- new->pub.proberesp_ies = NULL;
- new->pub.beacon_ies = NULL;
- }
+ cfg80211_update_known_bss(rdev, cbss, new, false);
+ new->pub.proberesp_ies = NULL;
+ new->pub.beacon_ies = NULL;
list_for_each_entry_safe(nontrans_bss, tmp,
&new->pub.nontrans_list,
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index acfe66da7109..195c8532734b 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -1394,6 +1394,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
#endif
schedule_work(&cfg80211_disconnect_work);
+
+ cfg80211_schedule_channels_check(wdev);
}
void cfg80211_disconnected(struct net_device *dev, u16 reason,
diff --git a/net/wireless/tests/Makefile b/net/wireless/tests/Makefile
index fa8e297bbc5e..1f6622fcb758 100644
--- a/net/wireless/tests/Makefile
+++ b/net/wireless/tests/Makefile
@@ -1,3 +1,3 @@
-cfg80211-tests-y += module.o fragmentation.o
+cfg80211-tests-y += module.o fragmentation.o scan.o util.o
obj-$(CONFIG_CFG80211_KUNIT_TEST) += cfg80211-tests.o
diff --git a/net/wireless/tests/scan.c b/net/wireless/tests/scan.c
new file mode 100644
index 000000000000..77854161cd22
--- /dev/null
+++ b/net/wireless/tests/scan.c
@@ -0,0 +1,625 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KUnit tests for inform_bss functions
+ *
+ * Copyright (C) 2023 Intel Corporation
+ */
+#include <linux/ieee80211.h>
+#include <net/cfg80211.h>
+#include <kunit/test.h>
+#include <kunit/skbuff.h>
+#include "../core.h"
+#include "util.h"
+
+/* mac80211 helpers for element building */
+#include "../../mac80211/ieee80211_i.h"
+
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+
+struct test_elem {
+ u8 id;
+ u8 len;
+ union {
+ u8 data[255];
+ struct {
+ u8 eid;
+ u8 edata[254];
+ };
+ };
+};
+
+static struct gen_new_ie_case {
+ const char *desc;
+ struct test_elem parent_ies[16];
+ struct test_elem child_ies[16];
+ struct test_elem result_ies[16];
+} gen_new_ie_cases[] = {
+ {
+ .desc = "ML not inherited",
+ .parent_ies = {
+ { .id = WLAN_EID_EXTENSION, .len = 255,
+ .eid = WLAN_EID_EXT_EHT_MULTI_LINK },
+ },
+ .child_ies = {
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ .result_ies = {
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ },
+ {
+ .desc = "fragments are ignored if previous len not 255",
+ .parent_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 254, },
+ { .id = WLAN_EID_FRAGMENT, .len = 125, },
+ },
+ .child_ies = {
+ { .id = WLAN_EID_SSID, .len = 2 },
+ { .id = WLAN_EID_FRAGMENT, .len = 125, },
+ },
+ .result_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 254, },
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ },
+ {
+ .desc = "fragments inherited",
+ .parent_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 255, },
+ { .id = WLAN_EID_FRAGMENT, .len = 125, },
+ },
+ .child_ies = {
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ .result_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 255, },
+ { .id = WLAN_EID_FRAGMENT, .len = 125, },
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ },
+ {
+ .desc = "fragments copied",
+ .parent_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 255, },
+ { .id = WLAN_EID_FRAGMENT, .len = 125, },
+ },
+ .child_ies = {
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ .result_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 255, },
+ { .id = WLAN_EID_FRAGMENT, .len = 125, },
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ },
+ {
+ .desc = "multiple elements inherit",
+ .parent_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 255, },
+ { .id = WLAN_EID_FRAGMENT, .len = 125, },
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 123, },
+ },
+ .child_ies = {
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ .result_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 255, },
+ { .id = WLAN_EID_FRAGMENT, .len = 125, },
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 123, },
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ },
+ {
+ .desc = "one child element overrides",
+ .parent_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 255, },
+ { .id = WLAN_EID_FRAGMENT, .len = 125, },
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 123, },
+ },
+ .child_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 127, },
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ .result_ies = {
+ { .id = WLAN_EID_REDUCED_NEIGHBOR_REPORT, .len = 127, },
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ },
+ {
+ .desc = "empty elements from parent",
+ .parent_ies = {
+ { .id = 0x1, .len = 0, },
+ { .id = WLAN_EID_EXTENSION, .len = 1, .eid = 0x10 },
+ },
+ .child_ies = {
+ },
+ .result_ies = {
+ { .id = 0x1, .len = 0, },
+ { .id = WLAN_EID_EXTENSION, .len = 1, .eid = 0x10 },
+ },
+ },
+ {
+ .desc = "empty elements from child",
+ .parent_ies = {
+ },
+ .child_ies = {
+ { .id = 0x1, .len = 0, },
+ { .id = WLAN_EID_EXTENSION, .len = 1, .eid = 0x10 },
+ },
+ .result_ies = {
+ { .id = 0x1, .len = 0, },
+ { .id = WLAN_EID_EXTENSION, .len = 1, .eid = 0x10 },
+ },
+ },
+ {
+ .desc = "invalid extended elements ignored",
+ .parent_ies = {
+ { .id = WLAN_EID_EXTENSION, .len = 0 },
+ },
+ .child_ies = {
+ { .id = WLAN_EID_EXTENSION, .len = 0 },
+ },
+ .result_ies = {
+ },
+ },
+ {
+ .desc = "multiple extended elements",
+ .parent_ies = {
+ { .id = WLAN_EID_EXTENSION, .len = 3,
+ .eid = WLAN_EID_EXT_HE_CAPABILITY },
+ { .id = WLAN_EID_EXTENSION, .len = 5,
+ .eid = WLAN_EID_EXT_ASSOC_DELAY_INFO },
+ { .id = WLAN_EID_EXTENSION, .len = 7,
+ .eid = WLAN_EID_EXT_HE_OPERATION },
+ { .id = WLAN_EID_EXTENSION, .len = 11,
+ .eid = WLAN_EID_EXT_FILS_REQ_PARAMS },
+ },
+ .child_ies = {
+ { .id = WLAN_EID_SSID, .len = 13 },
+ { .id = WLAN_EID_EXTENSION, .len = 17,
+ .eid = WLAN_EID_EXT_HE_CAPABILITY },
+ { .id = WLAN_EID_EXTENSION, .len = 11,
+ .eid = WLAN_EID_EXT_FILS_KEY_CONFIRM },
+ { .id = WLAN_EID_EXTENSION, .len = 19,
+ .eid = WLAN_EID_EXT_HE_OPERATION },
+ },
+ .result_ies = {
+ { .id = WLAN_EID_EXTENSION, .len = 17,
+ .eid = WLAN_EID_EXT_HE_CAPABILITY },
+ { .id = WLAN_EID_EXTENSION, .len = 5,
+ .eid = WLAN_EID_EXT_ASSOC_DELAY_INFO },
+ { .id = WLAN_EID_EXTENSION, .len = 19,
+ .eid = WLAN_EID_EXT_HE_OPERATION },
+ { .id = WLAN_EID_EXTENSION, .len = 11,
+ .eid = WLAN_EID_EXT_FILS_REQ_PARAMS },
+ { .id = WLAN_EID_SSID, .len = 13 },
+ { .id = WLAN_EID_EXTENSION, .len = 11,
+ .eid = WLAN_EID_EXT_FILS_KEY_CONFIRM },
+ },
+ },
+ {
+ .desc = "non-inherit element",
+ .parent_ies = {
+ { .id = 0x1, .len = 7, },
+ { .id = 0x2, .len = 11, },
+ { .id = 0x3, .len = 13, },
+ { .id = WLAN_EID_EXTENSION, .len = 17, .eid = 0x10 },
+ { .id = WLAN_EID_EXTENSION, .len = 19, .eid = 0x11 },
+ { .id = WLAN_EID_EXTENSION, .len = 23, .eid = 0x12 },
+ { .id = WLAN_EID_EXTENSION, .len = 29, .eid = 0x14 },
+ },
+ .child_ies = {
+ { .id = WLAN_EID_EXTENSION,
+ .eid = WLAN_EID_EXT_NON_INHERITANCE,
+ .len = 10,
+ .edata = { 0x3, 0x1, 0x2, 0x3,
+ 0x4, 0x10, 0x11, 0x13, 0x14 } },
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ .result_ies = {
+ { .id = WLAN_EID_EXTENSION, .len = 23, .eid = 0x12 },
+ { .id = WLAN_EID_SSID, .len = 2 },
+ },
+ },
+};
+KUNIT_ARRAY_PARAM_DESC(gen_new_ie, gen_new_ie_cases, desc)
+
+static void test_gen_new_ie(struct kunit *test)
+{
+ const struct gen_new_ie_case *params = test->param_value;
+ struct sk_buff *parent = kunit_zalloc_skb(test, 1024, GFP_KERNEL);
+ struct sk_buff *child = kunit_zalloc_skb(test, 1024, GFP_KERNEL);
+ struct sk_buff *reference = kunit_zalloc_skb(test, 1024, GFP_KERNEL);
+ u8 *out = kunit_kzalloc(test, IEEE80211_MAX_DATA_LEN, GFP_KERNEL);
+ size_t len;
+ int i;
+
+ KUNIT_ASSERT_NOT_NULL(test, parent);
+ KUNIT_ASSERT_NOT_NULL(test, child);
+ KUNIT_ASSERT_NOT_NULL(test, reference);
+ KUNIT_ASSERT_NOT_NULL(test, out);
+
+ for (i = 0; i < ARRAY_SIZE(params->parent_ies); i++) {
+ if (params->parent_ies[i].len != 0) {
+ skb_put_u8(parent, params->parent_ies[i].id);
+ skb_put_u8(parent, params->parent_ies[i].len);
+ skb_put_data(parent, params->parent_ies[i].data,
+ params->parent_ies[i].len);
+ }
+
+ if (params->child_ies[i].len != 0) {
+ skb_put_u8(child, params->child_ies[i].id);
+ skb_put_u8(child, params->child_ies[i].len);
+ skb_put_data(child, params->child_ies[i].data,
+ params->child_ies[i].len);
+ }
+
+ if (params->result_ies[i].len != 0) {
+ skb_put_u8(reference, params->result_ies[i].id);
+ skb_put_u8(reference, params->result_ies[i].len);
+ skb_put_data(reference, params->result_ies[i].data,
+ params->result_ies[i].len);
+ }
+ }
+
+ len = cfg80211_gen_new_ie(parent->data, parent->len,
+ child->data, child->len,
+ out, IEEE80211_MAX_DATA_LEN);
+ KUNIT_EXPECT_EQ(test, len, reference->len);
+ KUNIT_EXPECT_MEMEQ(test, out, reference->data, reference->len);
+ memset(out, 0, IEEE80211_MAX_DATA_LEN);
+
+ /* Exactly enough space */
+ len = cfg80211_gen_new_ie(parent->data, parent->len,
+ child->data, child->len,
+ out, reference->len);
+ KUNIT_EXPECT_EQ(test, len, reference->len);
+ KUNIT_EXPECT_MEMEQ(test, out, reference->data, reference->len);
+ memset(out, 0, IEEE80211_MAX_DATA_LEN);
+
+ /* Not enough space (or expected zero length) */
+ len = cfg80211_gen_new_ie(parent->data, parent->len,
+ child->data, child->len,
+ out, reference->len - 1);
+ KUNIT_EXPECT_EQ(test, len, 0);
+}
+
+static void test_gen_new_ie_malformed(struct kunit *test)
+{
+ struct sk_buff *malformed = kunit_zalloc_skb(test, 1024, GFP_KERNEL);
+ u8 *out = kunit_kzalloc(test, IEEE80211_MAX_DATA_LEN, GFP_KERNEL);
+ size_t len;
+
+ KUNIT_ASSERT_NOT_NULL(test, malformed);
+ KUNIT_ASSERT_NOT_NULL(test, out);
+
+ skb_put_u8(malformed, WLAN_EID_SSID);
+ skb_put_u8(malformed, 3);
+ skb_put(malformed, 3);
+ skb_put_u8(malformed, WLAN_EID_REDUCED_NEIGHBOR_REPORT);
+ skb_put_u8(malformed, 10);
+ skb_put(malformed, 9);
+
+ len = cfg80211_gen_new_ie(malformed->data, malformed->len,
+ out, 0,
+ out, IEEE80211_MAX_DATA_LEN);
+ KUNIT_EXPECT_EQ(test, len, 5);
+
+ len = cfg80211_gen_new_ie(out, 0,
+ malformed->data, malformed->len,
+ out, IEEE80211_MAX_DATA_LEN);
+ KUNIT_EXPECT_EQ(test, len, 5);
+}
+
+struct inform_bss {
+ struct kunit *test;
+
+ int inform_bss_count;
+};
+
+static void inform_bss_inc_counter(struct wiphy *wiphy,
+ struct cfg80211_bss *bss,
+ const struct cfg80211_bss_ies *ies,
+ void *drv_data)
+{
+ struct inform_bss *ctx = t_wiphy_ctx(wiphy);
+
+ ctx->inform_bss_count++;
+
+ rcu_read_lock();
+ KUNIT_EXPECT_PTR_EQ(ctx->test, drv_data, ctx);
+ KUNIT_EXPECT_PTR_EQ(ctx->test, ies, rcu_dereference(bss->ies));
+ rcu_read_unlock();
+}
+
+static void test_inform_bss_ssid_only(struct kunit *test)
+{
+ struct inform_bss ctx = {
+ .test = test,
+ };
+ struct wiphy *wiphy = T_WIPHY(test, ctx);
+ struct t_wiphy_priv *w_priv = wiphy_priv(wiphy);
+ struct cfg80211_inform_bss inform_bss = {
+ .signal = 50,
+ .drv_data = &ctx,
+ };
+ const u8 bssid[ETH_ALEN] = { 0x10, 0x22, 0x33, 0x44, 0x55, 0x66 };
+ u64 tsf = 0x1000000000000000ULL;
+ int beacon_int = 100;
+ u16 capability = 0x1234;
+ static const u8 input[] = {
+ [0] = WLAN_EID_SSID,
+ [1] = 4,
+ [2] = 'T', 'E', 'S', 'T'
+ };
+ struct cfg80211_bss *bss, *other;
+ const struct cfg80211_bss_ies *ies;
+
+ w_priv->ops->inform_bss = inform_bss_inc_counter;
+
+ inform_bss.chan = ieee80211_get_channel_khz(wiphy, MHZ_TO_KHZ(2412));
+ KUNIT_ASSERT_NOT_NULL(test, inform_bss.chan);
+
+ bss = cfg80211_inform_bss_data(wiphy, &inform_bss,
+ CFG80211_BSS_FTYPE_PRESP, bssid, tsf,
+ capability, beacon_int,
+ input, sizeof(input),
+ GFP_KERNEL);
+ KUNIT_EXPECT_NOT_NULL(test, bss);
+ KUNIT_EXPECT_EQ(test, ctx.inform_bss_count, 1);
+
+ /* Check values in returned bss are correct */
+ KUNIT_EXPECT_EQ(test, bss->signal, inform_bss.signal);
+ KUNIT_EXPECT_EQ(test, bss->beacon_interval, beacon_int);
+ KUNIT_EXPECT_EQ(test, bss->capability, capability);
+ KUNIT_EXPECT_EQ(test, bss->bssid_index, 0);
+ KUNIT_EXPECT_PTR_EQ(test, bss->channel, inform_bss.chan);
+ KUNIT_EXPECT_MEMEQ(test, bssid, bss->bssid, sizeof(bssid));
+
+ /* Check the IEs have the expected value */
+ rcu_read_lock();
+ ies = rcu_dereference(bss->ies);
+ KUNIT_EXPECT_NOT_NULL(test, ies);
+ KUNIT_EXPECT_EQ(test, ies->tsf, tsf);
+ KUNIT_EXPECT_EQ(test, ies->len, sizeof(input));
+ KUNIT_EXPECT_MEMEQ(test, ies->data, input, sizeof(input));
+ rcu_read_unlock();
+
+ /* Check we can look up the BSS - by SSID */
+ other = cfg80211_get_bss(wiphy, NULL, NULL, "TEST", 4,
+ IEEE80211_BSS_TYPE_ANY,
+ IEEE80211_PRIVACY_ANY);
+ KUNIT_EXPECT_PTR_EQ(test, bss, other);
+ cfg80211_put_bss(wiphy, other);
+
+ /* Check we can look up the BSS - by BSSID */
+ other = cfg80211_get_bss(wiphy, NULL, bssid, NULL, 0,
+ IEEE80211_BSS_TYPE_ANY,
+ IEEE80211_PRIVACY_ANY);
+ KUNIT_EXPECT_PTR_EQ(test, bss, other);
+ cfg80211_put_bss(wiphy, other);
+
+ cfg80211_put_bss(wiphy, bss);
+}
+
+static struct inform_bss_ml_sta_case {
+ const char *desc;
+ int mld_id;
+ bool sta_prof_vendor_elems;
+} inform_bss_ml_sta_cases[] = {
+ { .desc = "no_mld_id", .mld_id = 0, .sta_prof_vendor_elems = false },
+ { .desc = "mld_id_eq_1", .mld_id = 1, .sta_prof_vendor_elems = true },
+};
+KUNIT_ARRAY_PARAM_DESC(inform_bss_ml_sta, inform_bss_ml_sta_cases, desc)
+
+static void test_inform_bss_ml_sta(struct kunit *test)
+{
+ const struct inform_bss_ml_sta_case *params = test->param_value;
+ struct inform_bss ctx = {
+ .test = test,
+ };
+ struct wiphy *wiphy = T_WIPHY(test, ctx);
+ struct t_wiphy_priv *w_priv = wiphy_priv(wiphy);
+ struct cfg80211_inform_bss inform_bss = {
+ .signal = 50,
+ .drv_data = &ctx,
+ };
+ struct cfg80211_bss *bss, *link_bss;
+ const struct cfg80211_bss_ies *ies;
+
+ /* sending station */
+ const u8 bssid[ETH_ALEN] = { 0x10, 0x22, 0x33, 0x44, 0x55, 0x66 };
+ u64 tsf = 0x1000000000000000ULL;
+ int beacon_int = 100;
+ u16 capability = 0x1234;
+
+ /* Building the frame *************************************************/
+ struct sk_buff *input = kunit_zalloc_skb(test, 1024, GFP_KERNEL);
+ u8 *len_mle, *len_prof;
+ u8 link_id = 2;
+ struct {
+ struct ieee80211_neighbor_ap_info info;
+ struct ieee80211_tbtt_info_ge_11 ap;
+ } __packed rnr = {
+ .info = {
+ .tbtt_info_hdr = u8_encode_bits(0, IEEE80211_AP_INFO_TBTT_HDR_COUNT),
+ .tbtt_info_len = sizeof(struct ieee80211_tbtt_info_ge_11),
+ .op_class = 81,
+ .channel = 11,
+ },
+ .ap = {
+ .tbtt_offset = 0xff,
+ .bssid = { 0x10, 0x22, 0x33, 0x44, 0x55, 0x67 },
+ .short_ssid = 0, /* unused */
+ .bss_params = 0,
+ .psd_20 = 0,
+ .mld_params.mld_id = params->mld_id,
+ .mld_params.params =
+ le16_encode_bits(link_id,
+ IEEE80211_RNR_MLD_PARAMS_LINK_ID),
+ }
+ };
+ struct {
+ __le16 control;
+ u8 var_len;
+ u8 mld_mac_addr[ETH_ALEN];
+ u8 link_id_info;
+ u8 params_change_count;
+ __le16 mld_caps_and_ops;
+ u8 mld_id;
+ __le16 ext_mld_caps_and_ops;
+ } __packed mle_basic_common_info = {
+ .control =
+ cpu_to_le16(IEEE80211_ML_CONTROL_TYPE_BASIC |
+ IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT |
+ IEEE80211_MLC_BASIC_PRES_LINK_ID |
+ (params->mld_id ? IEEE80211_MLC_BASIC_PRES_MLD_ID : 0) |
+ IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP),
+ .mld_id = params->mld_id,
+ .mld_caps_and_ops = cpu_to_le16(0x0102),
+ .ext_mld_caps_and_ops = cpu_to_le16(0x0304),
+ .var_len = sizeof(mle_basic_common_info) - 2 -
+ (params->mld_id ? 0 : 1),
+ .mld_mac_addr = { 0x10, 0x22, 0x33, 0x44, 0x55, 0x60 },
+ };
+ struct {
+ __le16 control;
+ u8 var_len;
+ u8 bssid[ETH_ALEN];
+ __le16 beacon_int;
+ __le64 tsf_offset;
+ __le16 capabilities; /* already part of payload */
+ } __packed sta_prof = {
+ .control =
+ cpu_to_le16(IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE |
+ IEEE80211_MLE_STA_CONTROL_STA_MAC_ADDR_PRESENT |
+ IEEE80211_MLE_STA_CONTROL_BEACON_INT_PRESENT |
+ IEEE80211_MLE_STA_CONTROL_TSF_OFFS_PRESENT |
+ u16_encode_bits(link_id,
+ IEEE80211_MLE_STA_CONTROL_LINK_ID)),
+ .var_len = sizeof(sta_prof) - 2 - 2,
+ .bssid = { *rnr.ap.bssid },
+ .beacon_int = cpu_to_le16(101),
+ .tsf_offset = cpu_to_le64(-123ll),
+ .capabilities = cpu_to_le16(0xdead),
+ };
+
+ KUNIT_ASSERT_NOT_NULL(test, input);
+
+ w_priv->ops->inform_bss = inform_bss_inc_counter;
+
+ inform_bss.chan = ieee80211_get_channel_khz(wiphy, MHZ_TO_KHZ(2412));
+ KUNIT_ASSERT_NOT_NULL(test, inform_bss.chan);
+
+ skb_put_u8(input, WLAN_EID_SSID);
+ skb_put_u8(input, 4);
+ skb_put_data(input, "TEST", 4);
+
+ skb_put_u8(input, WLAN_EID_REDUCED_NEIGHBOR_REPORT);
+ skb_put_u8(input, sizeof(rnr));
+ skb_put_data(input, &rnr, sizeof(rnr));
+
+ /* build a multi-link element */
+ skb_put_u8(input, WLAN_EID_EXTENSION);
+ len_mle = skb_put(input, 1);
+ skb_put_u8(input, WLAN_EID_EXT_EHT_MULTI_LINK);
+ skb_put_data(input, &mle_basic_common_info, sizeof(mle_basic_common_info));
+ if (!params->mld_id)
+ t_skb_remove_member(input, typeof(mle_basic_common_info), mld_id);
+ /* with a STA profile inside */
+ skb_put_u8(input, IEEE80211_MLE_SUBELEM_PER_STA_PROFILE);
+ len_prof = skb_put(input, 1);
+ skb_put_data(input, &sta_prof, sizeof(sta_prof));
+
+ if (params->sta_prof_vendor_elems) {
+ /* Put two (vendor) element into sta_prof */
+ skb_put_u8(input, WLAN_EID_VENDOR_SPECIFIC);
+ skb_put_u8(input, 160);
+ skb_put(input, 160);
+
+ skb_put_u8(input, WLAN_EID_VENDOR_SPECIFIC);
+ skb_put_u8(input, 165);
+ skb_put(input, 165);
+ }
+
+ /* fragment STA profile */
+ ieee80211_fragment_element(input, len_prof,
+ IEEE80211_MLE_SUBELEM_FRAGMENT);
+ /* fragment MLE */
+ ieee80211_fragment_element(input, len_mle, WLAN_EID_FRAGMENT);
+
+ /* Put a (vendor) element after the ML element */
+ skb_put_u8(input, WLAN_EID_VENDOR_SPECIFIC);
+ skb_put_u8(input, 155);
+ skb_put(input, 155);
+
+ /* Submit *************************************************************/
+ bss = cfg80211_inform_bss_data(wiphy, &inform_bss,
+ CFG80211_BSS_FTYPE_PRESP, bssid, tsf,
+ capability, beacon_int,
+ input->data, input->len,
+ GFP_KERNEL);
+ KUNIT_EXPECT_NOT_NULL(test, bss);
+ KUNIT_EXPECT_EQ(test, ctx.inform_bss_count, 2);
+
+ /* Check link_bss *****************************************************/
+ link_bss = cfg80211_get_bss(wiphy, NULL, sta_prof.bssid, NULL, 0,
+ IEEE80211_BSS_TYPE_ANY,
+ IEEE80211_PRIVACY_ANY);
+ KUNIT_ASSERT_NOT_NULL(test, link_bss);
+ KUNIT_EXPECT_EQ(test, link_bss->signal, 0);
+ KUNIT_EXPECT_EQ(test, link_bss->beacon_interval,
+ le16_to_cpu(sta_prof.beacon_int));
+ KUNIT_EXPECT_EQ(test, link_bss->capability,
+ le16_to_cpu(sta_prof.capabilities));
+ KUNIT_EXPECT_EQ(test, link_bss->bssid_index, 0);
+ KUNIT_EXPECT_PTR_EQ(test, link_bss->channel,
+ ieee80211_get_channel_khz(wiphy, MHZ_TO_KHZ(2462)));
+
+ rcu_read_lock();
+ ies = rcu_dereference(link_bss->ies);
+ KUNIT_EXPECT_NOT_NULL(test, ies);
+ KUNIT_EXPECT_EQ(test, ies->tsf, tsf + le64_to_cpu(sta_prof.tsf_offset));
+ /* Resulting length should be:
+ * SSID (inherited) + RNR (inherited) + vendor element(s) +
+ * MLE common info + MLE header and control
+ */
+ if (params->sta_prof_vendor_elems)
+ KUNIT_EXPECT_EQ(test, ies->len,
+ 6 + 2 + sizeof(rnr) + 2 + 160 + 2 + 165 +
+ mle_basic_common_info.var_len + 5);
+ else
+ KUNIT_EXPECT_EQ(test, ies->len,
+ 6 + 2 + sizeof(rnr) + 2 + 155 +
+ mle_basic_common_info.var_len + 5);
+ rcu_read_unlock();
+
+ cfg80211_put_bss(wiphy, bss);
+ cfg80211_put_bss(wiphy, link_bss);
+}
+
+static struct kunit_case gen_new_ie_test_cases[] = {
+ KUNIT_CASE_PARAM(test_gen_new_ie, gen_new_ie_gen_params),
+ KUNIT_CASE(test_gen_new_ie_malformed),
+ {}
+};
+
+static struct kunit_suite gen_new_ie = {
+ .name = "cfg80211-ie-generation",
+ .test_cases = gen_new_ie_test_cases,
+};
+
+kunit_test_suite(gen_new_ie);
+
+static struct kunit_case inform_bss_test_cases[] = {
+ KUNIT_CASE(test_inform_bss_ssid_only),
+ KUNIT_CASE_PARAM(test_inform_bss_ml_sta, inform_bss_ml_sta_gen_params),
+ {}
+};
+
+static struct kunit_suite inform_bss = {
+ .name = "cfg80211-inform-bss",
+ .test_cases = inform_bss_test_cases,
+};
+
+kunit_test_suite(inform_bss);
diff --git a/net/wireless/tests/util.c b/net/wireless/tests/util.c
new file mode 100644
index 000000000000..8abdaeb820ce
--- /dev/null
+++ b/net/wireless/tests/util.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KUnit fixture to have a (configurable) wiphy
+ *
+ * Copyright (C) 2023 Intel Corporation
+ */
+#include <linux/ieee80211.h>
+#include <net/cfg80211.h>
+#include <kunit/test.h>
+#include <kunit/test-bug.h>
+#include "util.h"
+
+int t_wiphy_init(struct kunit_resource *resource, void *ctx)
+{
+ struct kunit *test = kunit_get_current_test();
+ struct cfg80211_ops *ops;
+ struct wiphy *wiphy;
+ struct t_wiphy_priv *priv;
+
+ ops = kzalloc(sizeof(*ops), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_NULL(test, ops);
+
+ wiphy = wiphy_new_nm(ops, sizeof(*priv), "kunit");
+ KUNIT_ASSERT_NOT_NULL(test, wiphy);
+
+ priv = wiphy_priv(wiphy);
+ priv->ctx = ctx;
+ priv->ops = ops;
+
+ /* Initialize channels, feel free to add more here channels/bands */
+ memcpy(priv->channels_2ghz, channels_2ghz, sizeof(channels_2ghz));
+ wiphy->bands[NL80211_BAND_2GHZ] = &priv->band_2ghz;
+ priv->band_2ghz.channels = priv->channels_2ghz;
+ priv->band_2ghz.n_channels = ARRAY_SIZE(channels_2ghz);
+
+ resource->data = wiphy;
+ resource->name = "wiphy";
+
+ return 0;
+}
+
+void t_wiphy_exit(struct kunit_resource *resource)
+{
+ struct t_wiphy_priv *priv;
+ struct cfg80211_ops *ops;
+
+ priv = wiphy_priv(resource->data);
+ ops = priv->ops;
+
+ /* Should we ensure anything about the state here?
+ * e.g. full destruction or no calls to any ops on destruction?
+ */
+
+ wiphy_free(resource->data);
+ kfree(ops);
+}
diff --git a/net/wireless/tests/util.h b/net/wireless/tests/util.h
new file mode 100644
index 000000000000..6de712e0d432
--- /dev/null
+++ b/net/wireless/tests/util.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Utilities for cfg80211 unit testing
+ *
+ * Copyright (C) 2023 Intel Corporation
+ */
+#ifndef __CFG80211_UTILS_H
+#define __CFG80211_UTILS_H
+
+#define CHAN2G(_freq) { \
+ .band = NL80211_BAND_2GHZ, \
+ .center_freq = (_freq), \
+ .hw_value = (_freq), \
+}
+
+static const struct ieee80211_channel channels_2ghz[] = {
+ CHAN2G(2412), /* Channel 1 */
+ CHAN2G(2417), /* Channel 2 */
+ CHAN2G(2422), /* Channel 3 */
+ CHAN2G(2427), /* Channel 4 */
+ CHAN2G(2432), /* Channel 5 */
+ CHAN2G(2437), /* Channel 6 */
+ CHAN2G(2442), /* Channel 7 */
+ CHAN2G(2447), /* Channel 8 */
+ CHAN2G(2452), /* Channel 9 */
+ CHAN2G(2457), /* Channel 10 */
+ CHAN2G(2462), /* Channel 11 */
+ CHAN2G(2467), /* Channel 12 */
+ CHAN2G(2472), /* Channel 13 */
+ CHAN2G(2484), /* Channel 14 */
+};
+
+struct t_wiphy_priv {
+ struct kunit *test;
+ struct cfg80211_ops *ops;
+
+ void *ctx;
+
+ struct ieee80211_supported_band band_2ghz;
+ struct ieee80211_channel channels_2ghz[ARRAY_SIZE(channels_2ghz)];
+};
+
+#define T_WIPHY(test, ctx) ({ \
+ struct wiphy *__wiphy = \
+ kunit_alloc_resource(test, t_wiphy_init, \
+ t_wiphy_exit, \
+ GFP_KERNEL, &(ctx)); \
+ \
+ KUNIT_ASSERT_NOT_NULL(test, __wiphy); \
+ __wiphy; \
+ })
+#define t_wiphy_ctx(wiphy) (((struct t_wiphy_priv *)wiphy_priv(wiphy))->ctx)
+
+int t_wiphy_init(struct kunit_resource *resource, void *data);
+void t_wiphy_exit(struct kunit_resource *resource);
+
+#define t_skb_remove_member(skb, type, member) do { \
+ memmove((skb)->data + (skb)->len - sizeof(type) + \
+ offsetof(type, member), \
+ (skb)->data + (skb)->len - sizeof(type) + \
+ offsetofend(type, member), \
+ offsetofend(type, member)); \
+ skb_trim(skb, (skb)->len - sizeof_field(type, member)); \
+ } while (0)
+
+#endif /* __CFG80211_UTILS_H */
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 30cd1bd58aac..1f374c8a17a5 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2928,7 +2928,7 @@ DEFINE_EVENT(netdev_evt_only, cfg80211_send_rx_auth,
TRACE_EVENT(cfg80211_send_rx_assoc,
TP_PROTO(struct net_device *netdev,
- struct cfg80211_rx_assoc_resp_data *data),
+ const struct cfg80211_rx_assoc_resp_data *data),
TP_ARGS(netdev, data),
TP_STRUCT__entry(
NETDEV_ENTRY
@@ -3979,6 +3979,26 @@ TRACE_EVENT(cfg80211_links_removed,
__entry->link_mask)
);
+TRACE_EVENT(rdev_set_ttlm,
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_ttlm_params *params),
+ TP_ARGS(wiphy, netdev, params),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ __array(u8, dlink, sizeof(u16) * 8)
+ __array(u8, ulink, sizeof(u16) * 8)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ memcpy(__entry->dlink, params->dlink, sizeof(params->dlink));
+ memcpy(__entry->ulink, params->ulink, sizeof(params->ulink));
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT,
+ WIPHY_PR_ARG, NETDEV_PR_ARG)
+);
+
#endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */
#undef TRACE_INCLUDE_PATH
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 626b858b4b35..d1ce3bee2797 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -980,7 +980,63 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
}
}
+ /* The default mapping as defined Section 2.3 in RFC8325: The three
+ * Most Significant Bits (MSBs) of the DSCP are used as the
+ * corresponding L2 markings.
+ */
ret = dscp >> 5;
+
+ /* Handle specific DSCP values for which the default mapping (as
+ * described above) doesn't adhere to the intended usage of the DSCP
+ * value. See section 4 in RFC8325. Specifically, for the following
+ * Diffserv Service Classes no update is needed:
+ * - Standard: DF
+ * - Low Priority Data: CS1
+ * - Multimedia Streaming: AF31, AF32, AF33
+ * - Multimedia Conferencing: AF41, AF42, AF43
+ * - Network Control Traffic: CS7
+ * - Real-Time Interactive: CS4
+ */
+ switch (dscp >> 2) {
+ case 10:
+ case 12:
+ case 14:
+ /* High throughput data: AF11, AF12, AF13 */
+ ret = 0;
+ break;
+ case 16:
+ /* Operations, Administration, and Maintenance and Provisioning:
+ * CS2
+ */
+ ret = 0;
+ break;
+ case 18:
+ case 20:
+ case 22:
+ /* Low latency data: AF21, AF22, AF23 */
+ ret = 3;
+ break;
+ case 24:
+ /* Broadcasting video: CS3 */
+ ret = 4;
+ break;
+ case 40:
+ /* Signaling: CS5 */
+ ret = 5;
+ break;
+ case 44:
+ /* Voice Admit: VA */
+ ret = 6;
+ break;
+ case 46:
+ /* Telephony traffic: EF */
+ ret = 6;
+ break;
+ case 48:
+ /* Network Control Traffic: CS6 */
+ ret = 7;
+ break;
+ }
out:
return array_index_nospec(ret, IEEE80211_NUM_TIDS);
}
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index aad8ffeaee04..f7a7c7798c3b 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -704,7 +704,7 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
rc = -EINVAL;
}
release_sock(sk);
- SOCK_DEBUG(sk, "x25_bind: socket is bound\n");
+ net_dbg_ratelimited("x25_bind: socket is bound\n");
out:
return rc;
}
@@ -1165,10 +1165,10 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
goto out;
}
- SOCK_DEBUG(sk, "x25_sendmsg: sendto: Addresses built.\n");
+ net_dbg_ratelimited("x25_sendmsg: sendto: Addresses built.\n");
/* Build a packet */
- SOCK_DEBUG(sk, "x25_sendmsg: sendto: building packet.\n");
+ net_dbg_ratelimited("x25_sendmsg: sendto: building packet.\n");
if ((msg->msg_flags & MSG_OOB) && len > 32)
len = 32;
@@ -1187,7 +1187,7 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
/*
* Put the data on the end
*/
- SOCK_DEBUG(sk, "x25_sendmsg: Copying user data\n");
+ net_dbg_ratelimited("x25_sendmsg: Copying user data\n");
skb_reset_transport_header(skb);
skb_put(skb, len);
@@ -1211,7 +1211,7 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
/*
* Push down the X.25 header
*/
- SOCK_DEBUG(sk, "x25_sendmsg: Building X.25 Header.\n");
+ net_dbg_ratelimited("x25_sendmsg: Building X.25 Header.\n");
if (msg->msg_flags & MSG_OOB) {
if (x25->neighbour->extended) {
@@ -1245,8 +1245,8 @@ static int x25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
skb->data[0] |= X25_Q_BIT;
}
- SOCK_DEBUG(sk, "x25_sendmsg: Built header.\n");
- SOCK_DEBUG(sk, "x25_sendmsg: Transmitting buffer\n");
+ net_dbg_ratelimited("x25_sendmsg: Built header.\n");
+ net_dbg_ratelimited("x25_sendmsg: Transmitting buffer\n");
rc = -ENOTCONN;
if (sk->sk_state != TCP_ESTABLISHED)
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 8e1a49b0c0dc..6dadb217e101 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -282,7 +282,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk,
* They want reverse charging, we won't accept it.
*/
if ((theirs.reverse & 0x01 ) && (ours->reverse & 0x01)) {
- SOCK_DEBUG(sk, "X.25: rejecting reverse charging request\n");
+ net_dbg_ratelimited("X.25: rejecting reverse charging request\n");
return -1;
}
@@ -294,11 +294,11 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk,
int ours_in = ours->throughput & 0x0f;
int ours_out = ours->throughput & 0xf0;
if (!ours_in || theirs_in < ours_in) {
- SOCK_DEBUG(sk, "X.25: inbound throughput negotiated\n");
+ net_dbg_ratelimited("X.25: inbound throughput negotiated\n");
new->throughput = (new->throughput & 0xf0) | theirs_in;
}
if (!ours_out || theirs_out < ours_out) {
- SOCK_DEBUG(sk,
+ net_dbg_ratelimited(
"X.25: outbound throughput negotiated\n");
new->throughput = (new->throughput & 0x0f) | theirs_out;
}
@@ -306,22 +306,22 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk,
if (theirs.pacsize_in && theirs.pacsize_out) {
if (theirs.pacsize_in < ours->pacsize_in) {
- SOCK_DEBUG(sk, "X.25: packet size inwards negotiated down\n");
+ net_dbg_ratelimited("X.25: packet size inwards negotiated down\n");
new->pacsize_in = theirs.pacsize_in;
}
if (theirs.pacsize_out < ours->pacsize_out) {
- SOCK_DEBUG(sk, "X.25: packet size outwards negotiated down\n");
+ net_dbg_ratelimited("X.25: packet size outwards negotiated down\n");
new->pacsize_out = theirs.pacsize_out;
}
}
if (theirs.winsize_in && theirs.winsize_out) {
if (theirs.winsize_in < ours->winsize_in) {
- SOCK_DEBUG(sk, "X.25: window size inwards negotiated down\n");
+ net_dbg_ratelimited("X.25: window size inwards negotiated down\n");
new->winsize_in = theirs.winsize_in;
}
if (theirs.winsize_out < ours->winsize_out) {
- SOCK_DEBUG(sk, "X.25: window size outwards negotiated down\n");
+ net_dbg_ratelimited("X.25: window size outwards negotiated down\n");
new->winsize_out = theirs.winsize_out;
}
}
diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c
index dbc0940bf35f..f8922b0e23a4 100644
--- a/net/x25/x25_out.c
+++ b/net/x25/x25_out.c
@@ -72,7 +72,7 @@ int x25_output(struct sock *sk, struct sk_buff *skb)
kfree_skb(skb);
return sent;
}
- SOCK_DEBUG(sk, "x25_output: fragment alloc"
+ net_dbg_ratelimited("x25_output: fragment alloc"
" failed, err=%d, %d bytes "
"sent\n", err, sent);
return err;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 06cead2b8e34..caa340134b0e 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -148,6 +148,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem)
return 0;
}
+#define XDP_UMEM_FLAGS_VALID ( \
+ XDP_UMEM_UNALIGNED_CHUNK_FLAG | \
+ XDP_UMEM_TX_SW_CSUM | \
+ 0)
+
static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
{
bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
@@ -167,7 +172,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
return -EINVAL;
}
- if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+ if (mr->flags & ~XDP_UMEM_FLAGS_VALID)
return -EINVAL;
if (!unaligned_chunks && !is_power_of_2(chunk_size))
@@ -199,6 +204,9 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
return -EINVAL;
+ if (mr->tx_metadata_len >= 256 || mr->tx_metadata_len % 8)
+ return -EINVAL;
+
umem->size = size;
umem->headroom = headroom;
umem->chunk_size = chunk_size;
@@ -207,6 +215,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
umem->pgs = NULL;
umem->user = NULL;
umem->flags = mr->flags;
+ umem->tx_metadata_len = mr->tx_metadata_len;
INIT_LIST_HEAD(&umem->xsk_dma_list);
refcount_set(&umem->users, 1);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 3da0b52f308d..b78c0e095e22 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -167,8 +167,10 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
contd = XDP_PKT_CONTD;
err = __xsk_rcv_zc(xs, xskb, len, contd);
- if (err || likely(!frags))
- goto out;
+ if (err)
+ goto err;
+ if (likely(!frags))
+ return 0;
xskb_list = &xskb->pool->xskb_list;
list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
@@ -177,11 +179,13 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
len = pos->xdp.data_end - pos->xdp.data;
err = __xsk_rcv_zc(xs, pos, len, contd);
if (err)
- return err;
+ goto err;
list_del(&pos->xskb_list_node);
}
-out:
+ return 0;
+err:
+ xsk_buff_free(xdp);
return err;
}
@@ -571,6 +575,13 @@ static u32 xsk_get_num_desc(struct sk_buff *skb)
static void xsk_destruct_skb(struct sk_buff *skb)
{
+ struct xsk_tx_metadata_compl *compl = &skb_shinfo(skb)->xsk_meta;
+
+ if (compl->tx_timestamp) {
+ /* sw completion timestamp, not a real one */
+ *compl->tx_timestamp = ktime_get_tai_fast_ns();
+ }
+
xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb));
sock_wfree(skb);
}
@@ -655,8 +666,10 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
struct xdp_desc *desc)
{
+ struct xsk_tx_metadata *meta = NULL;
struct net_device *dev = xs->dev;
struct sk_buff *skb = xs->skb;
+ bool first_frag = false;
int err;
if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) {
@@ -687,6 +700,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
kfree_skb(skb);
goto free_err;
}
+
+ first_frag = true;
} else {
int nr_frags = skb_shinfo(skb)->nr_frags;
struct page *page;
@@ -707,7 +722,40 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
memcpy(vaddr, buffer, len);
kunmap_local(vaddr);
- skb_add_rx_frag(skb, nr_frags, page, 0, len, 0);
+ skb_add_rx_frag(skb, nr_frags, page, 0, len, PAGE_SIZE);
+ refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc);
+ }
+
+ if (first_frag && desc->options & XDP_TX_METADATA) {
+ if (unlikely(xs->pool->tx_metadata_len == 0)) {
+ err = -EINVAL;
+ goto free_err;
+ }
+
+ meta = buffer - xs->pool->tx_metadata_len;
+ if (unlikely(!xsk_buff_valid_tx_metadata(meta))) {
+ err = -EINVAL;
+ goto free_err;
+ }
+
+ if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) {
+ if (unlikely(meta->request.csum_start +
+ meta->request.csum_offset +
+ sizeof(__sum16) > len)) {
+ err = -EINVAL;
+ goto free_err;
+ }
+
+ skb->csum_start = hr + meta->request.csum_start;
+ skb->csum_offset = meta->request.csum_offset;
+ skb->ip_summed = CHECKSUM_PARTIAL;
+
+ if (unlikely(xs->pool->tx_sw_csum)) {
+ err = skb_checksum_help(skb);
+ if (err)
+ goto free_err;
+ }
+ }
}
}
@@ -715,6 +763,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
skb->priority = READ_ONCE(xs->sk.sk_priority);
skb->mark = READ_ONCE(xs->sk.sk_mark);
skb->destructor = xsk_destruct_skb;
+ xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta);
xsk_set_destructor_arg(skb);
return skb;
@@ -1282,6 +1331,14 @@ struct xdp_umem_reg_v1 {
__u32 headroom;
};
+struct xdp_umem_reg_v2 {
+ __u64 addr; /* Start of packet data area */
+ __u64 len; /* Length of packet data area */
+ __u32 chunk_size;
+ __u32 headroom;
+ __u32 flags;
+};
+
static int xsk_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
@@ -1325,8 +1382,10 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(struct xdp_umem_reg_v1))
return -EINVAL;
- else if (optlen < sizeof(mr))
+ else if (optlen < sizeof(struct xdp_umem_reg_v2))
mr_size = sizeof(struct xdp_umem_reg_v1);
+ else if (optlen < sizeof(mr))
+ mr_size = sizeof(struct xdp_umem_reg_v2);
if (copy_from_sockptr(&mr, optval, mr_size))
return -EFAULT;
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 49cb9f9a09be..ce60ecd48a4d 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -85,6 +85,8 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs,
XDP_PACKET_HEADROOM;
pool->umem = umem;
pool->addrs = umem->addrs;
+ pool->tx_metadata_len = umem->tx_metadata_len;
+ pool->tx_sw_csum = umem->flags & XDP_UMEM_TX_SW_CSUM;
INIT_LIST_HEAD(&pool->free_list);
INIT_LIST_HEAD(&pool->xskb_list);
INIT_LIST_HEAD(&pool->xsk_tx_list);
@@ -123,6 +125,18 @@ void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq)
}
EXPORT_SYMBOL(xp_set_rxq_info);
+void xp_fill_cb(struct xsk_buff_pool *pool, struct xsk_cb_desc *desc)
+{
+ u32 i;
+
+ for (i = 0; i < pool->heads_cnt; i++) {
+ struct xdp_buff_xsk *xskb = &pool->heads[i];
+
+ memcpy(xskb->cb + desc->off, desc->src, desc->bytes);
+ }
+}
+EXPORT_SYMBOL(xp_fill_cb);
+
static void xp_disable_drv_zc(struct xsk_buff_pool *pool)
{
struct netdev_bpf bpf;
@@ -541,6 +555,7 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
xskb->xdp.data_meta = xskb->xdp.data;
+ xskb->xdp.flags = 0;
if (pool->dma_need_sync) {
dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 13354a1e4280..6f2d1621c992 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -137,21 +137,23 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
static inline bool xp_unused_options_set(u32 options)
{
- return options & ~XDP_PKT_CONTD;
+ return options & ~(XDP_PKT_CONTD | XDP_TX_METADATA);
}
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 offset = desc->addr & (pool->chunk_size - 1);
+ u64 addr = desc->addr - pool->tx_metadata_len;
+ u64 len = desc->len + pool->tx_metadata_len;
+ u64 offset = addr & (pool->chunk_size - 1);
if (!desc->len)
return false;
- if (offset + desc->len > pool->chunk_size)
+ if (offset + len > pool->chunk_size)
return false;
- if (desc->addr >= pool->addrs_cnt)
+ if (addr >= pool->addrs_cnt)
return false;
if (xp_unused_options_set(desc->options))
@@ -162,16 +164,17 @@ static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 addr = xp_unaligned_add_offset_to_addr(desc->addr);
+ u64 addr = xp_unaligned_add_offset_to_addr(desc->addr) - pool->tx_metadata_len;
+ u64 len = desc->len + pool->tx_metadata_len;
if (!desc->len)
return false;
- if (desc->len > pool->chunk_size)
+ if (len > pool->chunk_size)
return false;
- if (addr >= pool->addrs_cnt || addr + desc->len > pool->addrs_cnt ||
- xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
+ if (addr >= pool->addrs_cnt || addr + len > pool->addrs_cnt ||
+ xp_desc_crosses_non_contig_pg(pool, addr, len))
return false;
if (xp_unused_options_set(desc->options))
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index cd47f88921f5..547cec77ba03 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -21,3 +21,4 @@ obj-$(CONFIG_XFRM_USER_COMPAT) += xfrm_compat.o
obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o
+obj-$(CONFIG_DEBUG_INFO_BTF) += xfrm_state_bpf.o
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 41533c631431..e6da7e8495c9 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -858,4 +858,5 @@ int xfrm_count_pfkey_enc_supported(void)
}
EXPORT_SYMBOL_GPL(xfrm_count_pfkey_enc_supported);
+MODULE_DESCRIPTION("XFRM Algorithm interface");
MODULE_LICENSE("GPL");
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 3784534c9185..653e51ae3964 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -407,7 +407,7 @@ bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
struct net_device *dev = x->xso.dev;
- if (!x->type_offload || x->encap)
+ if (!x->type_offload)
return false;
if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET ||
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 662c83beb345..e5722c95b8bb 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -704,9 +704,13 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
{
struct net *net = dev_net(skb_dst(skb)->dev);
struct xfrm_state *x = skb_dst(skb)->xfrm;
+ int family;
int err;
- switch (x->outer_mode.family) {
+ family = (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) ? x->outer_mode.family
+ : skb_dst(skb)->ops->family;
+
+ switch (family) {
case AF_INET:
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c13dc3ef7910..da6ecc6b3e15 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2694,7 +2694,9 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m)
mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);
- family = xfrm[i]->props.family;
+ if (xfrm[i]->xso.type != XFRM_DEV_OFFLOAD_PACKET)
+ family = xfrm[i]->props.family;
+
oif = fl->flowi_oif ? : fl->flowi_l3mdev;
dst = xfrm_dst_lookup(xfrm[i], tos, oif,
&saddr, &daddr, family, mark);
@@ -3416,7 +3418,7 @@ decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reve
}
fl4->flowi4_proto = flkeys->basic.ip_proto;
- fl4->flowi4_tos = flkeys->ip.tos;
+ fl4->flowi4_tos = flkeys->ip.tos & ~INET_ECN_MASK;
}
#if IS_ENABLED(CONFIG_IPV6)
@@ -4218,6 +4220,8 @@ void __init xfrm_init(void)
#ifdef CONFIG_XFRM_ESPINTCP
espintcp_init();
#endif
+
+ register_xfrm_state_bpf();
}
#ifdef CONFIG_AUDITSYSCALL
diff --git a/net/xfrm/xfrm_state_bpf.c b/net/xfrm/xfrm_state_bpf.c
new file mode 100644
index 000000000000..9e20d4a377f7
--- /dev/null
+++ b/net/xfrm/xfrm_state_bpf.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Unstable XFRM state BPF helpers.
+ *
+ * Note that it is allowed to break compatibility for these functions since the
+ * interface they are exposed through to BPF programs is explicitly unstable.
+ */
+
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
+#include <net/xdp.h>
+#include <net/xfrm.h>
+
+/* bpf_xfrm_state_opts - Options for XFRM state lookup helpers
+ *
+ * Members:
+ * @error - Out parameter, set for any errors encountered
+ * Values:
+ * -EINVAL - netns_id is less than -1
+ * -EINVAL - opts__sz isn't BPF_XFRM_STATE_OPTS_SZ
+ * -ENONET - No network namespace found for netns_id
+ * -ENOENT - No xfrm_state found
+ * @netns_id - Specify the network namespace for lookup
+ * Values:
+ * BPF_F_CURRENT_NETNS (-1)
+ * Use namespace associated with ctx
+ * [0, S32_MAX]
+ * Network Namespace ID
+ * @mark - XFRM mark to match on
+ * @daddr - Destination address to match on
+ * @spi - Security parameter index to match on
+ * @proto - IP protocol to match on (eg. IPPROTO_ESP)
+ * @family - Protocol family to match on (AF_INET/AF_INET6)
+ */
+struct bpf_xfrm_state_opts {
+ s32 error;
+ s32 netns_id;
+ u32 mark;
+ xfrm_address_t daddr;
+ __be32 spi;
+ u8 proto;
+ u16 family;
+};
+
+enum {
+ BPF_XFRM_STATE_OPTS_SZ = sizeof(struct bpf_xfrm_state_opts),
+};
+
+__bpf_kfunc_start_defs();
+
+/* bpf_xdp_get_xfrm_state - Get XFRM state
+ *
+ * A `struct xfrm_state *`, if found, must be released with a corresponding
+ * bpf_xdp_xfrm_state_release.
+ *
+ * Parameters:
+ * @ctx - Pointer to ctx (xdp_md) in XDP program
+ * Cannot be NULL
+ * @opts - Options for lookup (documented above)
+ * Cannot be NULL
+ * @opts__sz - Length of the bpf_xfrm_state_opts structure
+ * Must be BPF_XFRM_STATE_OPTS_SZ
+ */
+__bpf_kfunc struct xfrm_state *
+bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts, u32 opts__sz)
+{
+ struct xdp_buff *xdp = (struct xdp_buff *)ctx;
+ struct net *net = dev_net(xdp->rxq->dev);
+ struct xfrm_state *x;
+
+ if (!opts || opts__sz < sizeof(opts->error))
+ return NULL;
+
+ if (opts__sz != BPF_XFRM_STATE_OPTS_SZ) {
+ opts->error = -EINVAL;
+ return NULL;
+ }
+
+ if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) {
+ opts->error = -EINVAL;
+ return NULL;
+ }
+
+ if (opts->netns_id >= 0) {
+ net = get_net_ns_by_id(net, opts->netns_id);
+ if (unlikely(!net)) {
+ opts->error = -ENONET;
+ return NULL;
+ }
+ }
+
+ x = xfrm_state_lookup(net, opts->mark, &opts->daddr, opts->spi,
+ opts->proto, opts->family);
+
+ if (opts->netns_id >= 0)
+ put_net(net);
+ if (!x)
+ opts->error = -ENOENT;
+
+ return x;
+}
+
+/* bpf_xdp_xfrm_state_release - Release acquired xfrm_state object
+ *
+ * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
+ * the program if any references remain in the program in all of the explored
+ * states.
+ *
+ * Parameters:
+ * @x - Pointer to referenced xfrm_state object, obtained using
+ * bpf_xdp_get_xfrm_state.
+ */
+__bpf_kfunc void bpf_xdp_xfrm_state_release(struct xfrm_state *x)
+{
+ xfrm_state_put(x);
+}
+
+__bpf_kfunc_end_defs();
+
+BTF_SET8_START(xfrm_state_kfunc_set)
+BTF_ID_FLAGS(func, bpf_xdp_get_xfrm_state, KF_RET_NULL | KF_ACQUIRE)
+BTF_ID_FLAGS(func, bpf_xdp_xfrm_state_release, KF_RELEASE)
+BTF_SET8_END(xfrm_state_kfunc_set)
+
+static const struct btf_kfunc_id_set xfrm_state_xdp_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &xfrm_state_kfunc_set,
+};
+
+int __init register_xfrm_state_bpf(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP,
+ &xfrm_state_xdp_kfunc_set);
+}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ad01997c3aa9..912c1189ba41 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2017,6 +2017,9 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb)
if (xp->xfrm_nr == 0)
return 0;
+ if (xp->xfrm_nr > XFRM_MAX_DEPTH)
+ return -ENOBUFS;
+
for (i = 0; i < xp->xfrm_nr; i++) {
struct xfrm_user_tmpl *up = &vec[i];
struct xfrm_tmpl *kp = &xp->xfrm_vec[i];
@@ -3888,5 +3891,6 @@ static void __exit xfrm_user_exit(void)
module_init(xfrm_user_init);
module_exit(xfrm_user_exit);
+MODULE_DESCRIPTION("XFRM User interface");
MODULE_LICENSE("GPL");
MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_XFRM);