aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/batman-adv/translation-table.c2
-rw-r--r--net/bluetooth/hci_core.c9
-rw-r--r--net/bluetooth/hci_debugfs.c48
-rw-r--r--net/bluetooth/hci_event.c40
-rw-r--r--net/bluetooth/hci_request.c4
-rw-r--r--net/bluetooth/l2cap_core.c8
-rw-r--r--net/bluetooth/l2cap_sock.c7
-rw-r--r--net/bluetooth/rfcomm/core.c2
-rw-r--r--net/bluetooth/sco.c26
-rw-r--r--net/bridge/netfilter/ebtables.c6
-rw-r--r--net/core/dev.c4
-rw-r--r--net/core/dev_ioctl.c2
-rw-r--r--net/core/filter.c5
-rw-r--r--net/core/rtnetlink.c11
-rw-r--r--net/core/scm.c2
-rw-r--r--net/core/sock.c52
-rw-r--r--net/core/sock_diag.c10
-rw-r--r--net/core/sock_map.c6
-rw-r--r--net/ethernet/eth.c12
-rw-r--r--net/hsr/hsr_framereg.c20
-rw-r--r--net/hsr/hsr_framereg.h1
-rw-r--r--net/hsr/hsr_main.c15
-rw-r--r--net/ipv4/arp.c3
-rw-r--r--net/ipv4/devinet.c21
-rw-r--r--net/ipv4/igmp.c28
-rw-r--r--net/ipv4/inet_connection_sock.c14
-rw-r--r--net/ipv4/inet_diag.c6
-rw-r--r--net/ipv4/inet_timewait_sock.c32
-rw-r--r--net/ipv4/ip_gre.c5
-rw-r--r--net/ipv4/ip_sockglue.c100
-rw-r--r--net/ipv4/ip_tunnel.c43
-rw-r--r--net/ipv4/ipmr.c13
-rw-r--r--net/ipv4/netfilter/arp_tables.c8
-rw-r--r--net/ipv4/netfilter/ip_tables.c8
-rw-r--r--net/ipv4/route.c7
-rw-r--r--net/ipv4/tcp.c6
-rw-r--r--net/ipv4/udp.c16
-rw-r--r--net/ipv4/udp_offload.c13
-rw-r--r--net/ipv6/addrconf.c35
-rw-r--r--net/ipv6/fib6_rules.c6
-rw-r--r--net/ipv6/ip6_fib.c21
-rw-r--r--net/ipv6/ip6_gre.c3
-rw-r--r--net/ipv6/ipv6_sockglue.c18
-rw-r--r--net/ipv6/netfilter/ip6_tables.c8
-rw-r--r--net/ipv6/route.c21
-rw-r--r--net/ipv6/seg6.c20
-rw-r--r--net/ipv6/udp.c7
-rw-r--r--net/ipv6/udp_offload.c8
-rw-r--r--net/iucv/iucv.c4
-rw-r--r--net/kcm/kcmsock.c3
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/l2tp/l2tp_ppp.c4
-rw-r--r--net/mac80211/cfg.c5
-rw-r--r--net/mac80211/sta_info.c2
-rw-r--r--net/mac80211/tx.c2
-rw-r--r--net/mac802154/llsec.c18
-rw-r--r--net/mptcp/diag.c9
-rw-r--r--net/mptcp/protocol.c52
-rw-r--r--net/mptcp/subflow.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c6
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c2
-rw-r--r--net/netfilter/nf_tables_api.c85
-rw-r--r--net/netfilter/nft_chain_filter.c4
-rw-r--r--net/netfilter/nft_compat.c20
-rw-r--r--net/netfilter/nft_ct.c11
-rw-r--r--net/netfilter/nft_set_pipapo.c19
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/netrom/af_netrom.c14
-rw-r--r--net/netrom/nr_dev.c2
-rw-r--r--net/netrom/nr_in.c6
-rw-r--r--net/netrom/nr_out.c2
-rw-r--r--net/netrom/nr_route.c8
-rw-r--r--net/netrom/nr_subr.c5
-rw-r--r--net/nfc/nci/core.c5
-rw-r--r--net/openvswitch/conntrack.c9
-rw-r--r--net/packet/af_packet.c16
-rw-r--r--net/rds/rdma.c3
-rw-r--r--net/rds/send.c11
-rw-r--r--net/sched/Kconfig42
-rw-r--r--net/sched/Makefile3
-rw-r--r--net/sched/act_skbmod.c10
-rw-r--r--net/sched/sch_atm.c709
-rw-r--r--net/sched/sch_cbq.c1816
-rw-r--r--net/sched/sch_dsmark.c521
-rw-r--r--net/smc/smc_pnet.c10
-rw-r--r--net/sunrpc/addr.c4
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c27
-rw-r--r--net/tls/tls_main.c2
-rw-r--r--net/tls/tls_sw.c12
-rw-r--r--net/unix/af_unix.c4
-rw-r--r--net/unix/garbage.c37
-rw-r--r--net/unix/scm.c12
-rw-r--r--net/wireless/nl80211.c3
-rw-r--r--net/x25/af_x25.c4
-rw-r--r--net/xdp/xsk.c2
-rw-r--r--net/xfrm/xfrm_user.c3
97 files changed, 826 insertions, 3495 deletions
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 9e8ebac9b7e7..f5019f698105 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -4188,7 +4188,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface)
spin_lock_bh(&bat_priv->tt.commit_lock);
- while (true) {
+ while (timeout) {
table_size = batadv_tt_local_table_transmit_size(bat_priv);
if (packet_size_max >= table_size)
break;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index e33fe4b1c4e2..b9cf5bc9364c 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2175,7 +2175,7 @@ int hci_get_dev_info(void __user *arg)
else
flags = hdev->flags;
- strcpy(di.name, hdev->name);
+ strscpy(di.name, hdev->name, sizeof(di.name));
di.bdaddr = hdev->bdaddr;
di.type = (hdev->bus & 0x0f) | ((hdev->dev_type & 0x03) << 4);
di.flags = flags;
@@ -2318,6 +2318,7 @@ static void hci_error_reset(struct work_struct *work)
{
struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset);
+ hci_dev_hold(hdev);
BT_DBG("%s", hdev->name);
if (hdev->hw_error)
@@ -2325,10 +2326,10 @@ static void hci_error_reset(struct work_struct *work)
else
bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code);
- if (hci_dev_do_close(hdev))
- return;
+ if (!hci_dev_do_close(hdev))
+ hci_dev_do_open(hdev);
- hci_dev_do_open(hdev);
+ hci_dev_put(hdev);
}
void hci_uuids_clear(struct hci_dev *hdev)
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index d4efc4aa55af..131bb56bf2af 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -216,10 +216,12 @@ static int conn_info_min_age_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val == 0 || val > hdev->conn_info_max_age)
+ hci_dev_lock(hdev);
+ if (val == 0 || val > hdev->conn_info_max_age) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->conn_info_min_age = val;
hci_dev_unlock(hdev);
@@ -244,10 +246,12 @@ static int conn_info_max_age_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val == 0 || val < hdev->conn_info_min_age)
+ hci_dev_lock(hdev);
+ if (val == 0 || val < hdev->conn_info_min_age) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->conn_info_max_age = val;
hci_dev_unlock(hdev);
@@ -526,10 +530,12 @@ static int sniff_min_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val == 0 || val % 2 || val > hdev->sniff_max_interval)
+ hci_dev_lock(hdev);
+ if (val == 0 || val % 2 || val > hdev->sniff_max_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->sniff_min_interval = val;
hci_dev_unlock(hdev);
@@ -554,10 +560,12 @@ static int sniff_max_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val == 0 || val % 2 || val < hdev->sniff_min_interval)
+ hci_dev_lock(hdev);
+ if (val == 0 || val % 2 || val < hdev->sniff_min_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->sniff_max_interval = val;
hci_dev_unlock(hdev);
@@ -798,10 +806,12 @@ static int conn_min_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_conn_min_interval = val;
hci_dev_unlock(hdev);
@@ -826,10 +836,12 @@ static int conn_max_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_conn_max_interval = val;
hci_dev_unlock(hdev);
@@ -938,10 +950,12 @@ static int adv_min_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_adv_min_interval = val;
hci_dev_unlock(hdev);
@@ -966,10 +980,12 @@ static int adv_max_interval_set(void *data, u64 val)
{
struct hci_dev *hdev = data;
- if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval)
+ hci_dev_lock(hdev);
+ if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) {
+ hci_dev_unlock(hdev);
return -EINVAL;
+ }
- hci_dev_lock(hdev);
hdev->le_adv_max_interval = val;
hci_dev_unlock(hdev);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 4027c79786fd..58c029958759 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2636,6 +2636,31 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
if (test_bit(HCI_ENCRYPT, &hdev->flags))
set_bit(HCI_CONN_ENCRYPT, &conn->flags);
+ /* "Link key request" completed ahead of "connect request" completes */
+ if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) &&
+ ev->link_type == ACL_LINK) {
+ struct link_key *key;
+ struct hci_cp_read_enc_key_size cp;
+
+ key = hci_find_link_key(hdev, &ev->bdaddr);
+ if (key) {
+ set_bit(HCI_CONN_ENCRYPT, &conn->flags);
+
+ if (!(hdev->commands[20] & 0x10)) {
+ conn->enc_key_size = HCI_LINK_KEY_SIZE;
+ } else {
+ cp.handle = cpu_to_le16(conn->handle);
+ if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE,
+ sizeof(cp), &cp)) {
+ bt_dev_err(hdev, "sending read key size failed");
+ conn->enc_key_size = HCI_LINK_KEY_SIZE;
+ }
+ }
+
+ hci_encrypt_cfm(conn, ev->status);
+ }
+ }
+
/* Get remote features */
if (conn->type == ACL_LINK) {
struct hci_cp_read_remote_features cp;
@@ -2979,8 +3004,6 @@ static void hci_remote_name_evt(struct hci_dev *hdev, struct sk_buff *skb)
BT_DBG("%s", hdev->name);
- hci_conn_check_pending(hdev);
-
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
@@ -4612,9 +4635,12 @@ static void hci_io_capa_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
- if (!conn || !hci_conn_ssp_enabled(conn))
+ if (!conn || !hci_dev_test_flag(hdev, HCI_SSP_ENABLED))
goto unlock;
+ /* Assume remote supports SSP since it has triggered this event */
+ set_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
+
hci_conn_hold(conn);
if (!hci_dev_test_flag(hdev, HCI_MGMT))
@@ -5922,6 +5948,10 @@ static void hci_le_remote_conn_param_req_evt(struct hci_dev *hdev,
return send_conn_param_neg_reply(hdev, handle,
HCI_ERROR_UNKNOWN_CONN_ID);
+ if (max > hcon->le_conn_max_interval)
+ return send_conn_param_neg_reply(hdev, handle,
+ HCI_ERROR_INVALID_LL_PARAMS);
+
if (hci_check_conn_params(min, max, latency, timeout))
return send_conn_param_neg_reply(hdev, handle,
HCI_ERROR_INVALID_LL_PARAMS);
@@ -6139,10 +6169,10 @@ static void hci_store_wake_reason(struct hci_dev *hdev, u8 event,
* keep track of the bdaddr of the connection event that woke us up.
*/
if (event == HCI_EV_CONN_REQUEST) {
- bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
+ bacpy(&hdev->wake_addr, &conn_request->bdaddr);
hdev->wake_addr_type = BDADDR_BREDR;
} else if (event == HCI_EV_CONN_COMPLETE) {
- bacpy(&hdev->wake_addr, &conn_request->bdaddr);
+ bacpy(&hdev->wake_addr, &conn_complete->bdaddr);
hdev->wake_addr_type = BDADDR_BREDR;
} else if (event == HCI_EV_LE_META) {
struct hci_ev_le_meta *le_ev = (void *)skb->data;
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index a0f980e61505..7ce6db1ac558 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -107,8 +107,10 @@ static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
if (hdev->req_status == HCI_REQ_PEND) {
hdev->req_result = result;
hdev->req_status = HCI_REQ_DONE;
- if (skb)
+ if (skb) {
+ kfree_skb(hdev->req_skb);
hdev->req_skb = skb_get(skb);
+ }
wake_up_interruptible(&hdev->req_wait_q);
}
}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index a752032e12fc..580b6d6b970d 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -5609,7 +5609,13 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn,
memset(&rsp, 0, sizeof(rsp));
- err = hci_check_conn_params(min, max, latency, to_multiplier);
+ if (max > hcon->le_conn_max_interval) {
+ BT_DBG("requested connection interval exceeds current bounds.");
+ err = -EINVAL;
+ } else {
+ err = hci_check_conn_params(min, max, latency, to_multiplier);
+ }
+
if (err)
rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED);
else
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 756523e5402a..3a2be1b4a574 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -456,7 +456,8 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
struct l2cap_chan *chan = l2cap_pi(sk)->chan;
struct l2cap_options opts;
struct l2cap_conninfo cinfo;
- int len, err = 0;
+ int err = 0;
+ size_t len;
u32 opt;
BT_DBG("sk %p", sk);
@@ -503,7 +504,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
BT_DBG("mode 0x%2.2x", chan->mode);
- len = min_t(unsigned int, len, sizeof(opts));
+ len = min(len, sizeof(opts));
if (copy_to_user(optval, (char *) &opts, len))
err = -EFAULT;
@@ -553,7 +554,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
cinfo.hci_handle = chan->conn->hcon->handle;
memcpy(cinfo.dev_class, chan->conn->hcon->dev_class, 3);
- len = min_t(unsigned int, len, sizeof(cinfo));
+ len = min(len, sizeof(cinfo));
if (copy_to_user(optval, (char *) &cinfo, len))
err = -EFAULT;
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 8d6fce9005bd..4f54c7df3a94 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -1937,7 +1937,7 @@ static struct rfcomm_session *rfcomm_process_rx(struct rfcomm_session *s)
/* Get data directly from socket receive queue without copying it. */
while ((skb = skb_dequeue(&sk->sk_receive_queue))) {
skb_orphan(skb);
- if (!skb_linearize(skb)) {
+ if (!skb_linearize(skb) && sk->sk_state != BT_CLOSED) {
s = rfcomm_recv_frame(s, skb);
if (!s)
break;
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 8244d3ae185b..2115ca6d7e17 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -825,7 +825,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
struct sock *sk = sock->sk;
- int len, err = 0;
+ int err = 0;
struct bt_voice voice;
u32 opt;
@@ -841,10 +841,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
break;
}
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
@@ -861,11 +860,10 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
voice.setting = sco_pi(sk)->setting;
- len = min_t(unsigned int, sizeof(voice), optlen);
- if (copy_from_sockptr(&voice, optval, len)) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&voice, sizeof(voice), optval,
+ optlen);
+ if (err)
break;
- }
/* Explicitly check for these values */
if (voice.setting != BT_VOICE_TRANSPARENT &&
@@ -878,10 +876,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
break;
case BT_PKT_STATUS:
- if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
- err = -EFAULT;
+ err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+ if (err)
break;
- }
if (opt)
sco_pi(sk)->cmsg_mask |= SCO_CMSG_PKT_STATUS;
@@ -904,7 +901,8 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname,
struct sock *sk = sock->sk;
struct sco_options opts;
struct sco_conninfo cinfo;
- int len, err = 0;
+ int err = 0;
+ size_t len;
BT_DBG("sk %p", sk);
@@ -926,7 +924,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname,
BT_DBG("mtu %d", opts.mtu);
- len = min_t(unsigned int, len, sizeof(opts));
+ len = min(len, sizeof(opts));
if (copy_to_user(optval, (char *)&opts, len))
err = -EFAULT;
@@ -944,7 +942,7 @@ static int sco_sock_getsockopt_old(struct socket *sock, int optname,
cinfo.hci_handle = sco_pi(sk)->conn->hcon->handle;
memcpy(cinfo.dev_class, sco_pi(sk)->conn->hcon->dev_class, 3);
- len = min_t(unsigned int, len, sizeof(cinfo));
+ len = min(len, sizeof(cinfo));
if (copy_to_user(optval, (char *)&cinfo, len))
err = -EFAULT;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index bab14186f9ad..14a06d8b1a2d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1070,6 +1070,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
struct ebt_table_info *newinfo;
struct ebt_replace tmp;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1309,6 +1311,8 @@ static int update_counters(struct net *net, sockptr_t arg, unsigned int len)
{
struct ebt_replace hlp;
+ if (len < sizeof(hlp))
+ return -EINVAL;
if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
return -EFAULT;
@@ -2238,6 +2242,8 @@ static int compat_update_counters(struct net *net, sockptr_t arg,
{
struct compat_ebt_replace hlp;
+ if (len < sizeof(hlp))
+ return -EINVAL;
if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
return -EFAULT;
diff --git a/net/core/dev.c b/net/core/dev.c
index d8582a485f2e..8dc7653a22df 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2324,7 +2324,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
rcu_read_lock();
again:
list_for_each_entry_rcu(ptype, ptype_list, list) {
- if (ptype->ignore_outgoing)
+ if (READ_ONCE(ptype->ignore_outgoing))
continue;
/* Never send packets back to the socket
@@ -8795,7 +8795,7 @@ EXPORT_SYMBOL(dev_set_mac_address_user);
int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name)
{
- size_t size = sizeof(sa->sa_data);
+ size_t size = sizeof(sa->sa_data_min);
struct net_device *dev;
int ret = 0;
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index e508901de2c9..a7aa5915aa10 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -245,7 +245,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
if (ifr->ifr_hwaddr.sa_family != dev->type)
return -EINVAL;
memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
- min(sizeof(ifr->ifr_hwaddr.sa_data),
+ min(sizeof(ifr->ifr_hwaddr.sa_data_min),
(size_t)dev->addr_len));
call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
return 0;
diff --git a/net/core/filter.c b/net/core/filter.c
index 6cfc8fb0562a..49e4d1535cc8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -9903,8 +9903,7 @@ int sk_detach_filter(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_detach_filter);
-int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
- unsigned int len)
+int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len)
{
struct sock_fprog_kern *fprog;
struct sk_filter *filter;
@@ -9935,7 +9934,7 @@ int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
goto out;
ret = -EFAULT;
- if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
+ if (copy_to_sockptr(optval, fprog->filter, bpf_classic_proglen(fprog)))
goto out;
/* Instead of bytes, the API requests to return the number
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 021dcfdae283..8938320f7ba3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -4903,10 +4903,9 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct ifinfomsg *ifm;
struct net_device *dev;
- struct nlattr *br_spec, *attr = NULL;
+ struct nlattr *br_spec, *attr, *br_flags_attr = NULL;
int rem, err = -EOPNOTSUPP;
u16 flags = 0;
- bool have_flags = false;
if (nlmsg_len(nlh) < sizeof(*ifm))
return -EINVAL;
@@ -4924,11 +4923,11 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (br_spec) {
nla_for_each_nested(attr, br_spec, rem) {
- if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) {
+ if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) {
if (nla_len(attr) < sizeof(flags))
return -EINVAL;
- have_flags = true;
+ br_flags_attr = attr;
flags = nla_get_u16(attr);
}
@@ -4972,8 +4971,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}
- if (have_flags)
- memcpy(nla_data(attr), &flags, sizeof(flags));
+ if (br_flags_attr)
+ memcpy(nla_data(br_flags_attr), &flags, sizeof(flags));
out:
return err;
}
diff --git a/net/core/scm.c b/net/core/scm.c
index 3c7f160720d3..d09849cb60f0 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -105,7 +105,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
if (fd < 0 || !(file = fget_raw(fd)))
return -EBADF;
/* don't allow io_uring files */
- if (io_uring_get_socket(file)) {
+ if (io_is_uring_fops(file)) {
fput(file);
return -EINVAL;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index e8aa90b909ed..7be0310327db 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -644,8 +644,8 @@ out:
return ret;
}
-static int sock_getbindtodevice(struct sock *sk, char __user *optval,
- int __user *optlen, int len)
+static int sock_getbindtodevice(struct sock *sk, sockptr_t optval,
+ sockptr_t optlen, int len)
{
int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
@@ -668,12 +668,12 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
len = strlen(devname) + 1;
ret = -EFAULT;
- if (copy_to_user(optval, devname, len))
+ if (copy_to_sockptr(optval, devname, len))
goto out;
zero:
ret = -EFAULT;
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
goto out;
ret = 0;
@@ -1281,22 +1281,25 @@ static void cred_to_ucred(struct pid *pid, const struct cred *cred,
}
}
-static int groups_to_user(gid_t __user *dst, const struct group_info *src)
+static int groups_to_user(sockptr_t dst, const struct group_info *src)
{
struct user_namespace *user_ns = current_user_ns();
int i;
- for (i = 0; i < src->ngroups; i++)
- if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i))
+ for (i = 0; i < src->ngroups; i++) {
+ gid_t gid = from_kgid_munged(user_ns, src->gid[i]);
+
+ if (copy_to_sockptr_offset(dst, i * sizeof(gid), &gid, sizeof(gid)))
return -EFAULT;
+ }
return 0;
}
-int sock_getsockopt(struct socket *sock, int level, int optname,
- char __user *optval, int __user *optlen)
+static int sk_getsockopt(struct sock *sk, int level, int optname,
+ sockptr_t optval, sockptr_t optlen)
{
- struct sock *sk = sock->sk;
+ struct socket *sock = sk->sk_socket;
union {
int val;
@@ -1312,7 +1315,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
int lv = sizeof(int);
int len;
- if (get_user(len, optlen))
+ if (copy_from_sockptr(&len, optlen, sizeof(int)))
return -EFAULT;
if (len < 0)
return -EINVAL;
@@ -1445,7 +1448,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
spin_unlock(&sk->sk_peer_lock);
- if (copy_to_user(optval, &peercred, len))
+ if (copy_to_sockptr(optval, &peercred, len))
return -EFAULT;
goto lenout;
}
@@ -1463,11 +1466,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
if (len < n * sizeof(gid_t)) {
len = n * sizeof(gid_t);
put_cred(cred);
- return put_user(len, optlen) ? -EFAULT : -ERANGE;
+ return copy_to_sockptr(optlen, &len, sizeof(int)) ? -EFAULT : -ERANGE;
}
len = n * sizeof(gid_t);
- ret = groups_to_user((gid_t __user *)optval, cred->group_info);
+ ret = groups_to_user(optval, cred->group_info);
put_cred(cred);
if (ret)
return ret;
@@ -1483,7 +1486,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
return -ENOTCONN;
if (lv < len)
return -EINVAL;
- if (copy_to_user(optval, address, len))
+ if (copy_to_sockptr(optval, address, len))
return -EFAULT;
goto lenout;
}
@@ -1500,7 +1503,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_PEERSEC:
- return security_socket_getpeersec_stream(sock, optval, optlen, len);
+ return security_socket_getpeersec_stream(sock,
+ optval, optlen, len);
case SO_MARK:
v.val = sk->sk_mark;
@@ -1528,7 +1532,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
return sock_getbindtodevice(sk, optval, optlen, len);
case SO_GET_FILTER:
- len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
+ len = sk_get_filter(sk, optval, len);
if (len < 0)
return len;
@@ -1575,7 +1579,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
sk_get_meminfo(sk, meminfo);
len = min_t(unsigned int, len, sizeof(meminfo));
- if (copy_to_user(optval, &meminfo, len))
+ if (copy_to_sockptr(optval, &meminfo, len))
return -EFAULT;
goto lenout;
@@ -1625,14 +1629,22 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
if (len > lv)
len = lv;
- if (copy_to_user(optval, &v, len))
+ if (copy_to_sockptr(optval, &v, len))
return -EFAULT;
lenout:
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
return 0;
}
+int sock_getsockopt(struct socket *sock, int level, int optname,
+ char __user *optval, int __user *optlen)
+{
+ return sk_getsockopt(sock->sk, level, optname,
+ USER_SOCKPTR(optval),
+ USER_SOCKPTR(optlen));
+}
+
/*
* Initialize an sk_lock.
*
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index c9c45b935f99..bce65b519ee8 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -189,7 +189,7 @@ int sock_diag_register(const struct sock_diag_handler *hndl)
if (sock_diag_handlers[hndl->family])
err = -EBUSY;
else
- sock_diag_handlers[hndl->family] = hndl;
+ WRITE_ONCE(sock_diag_handlers[hndl->family], hndl);
mutex_unlock(&sock_diag_table_mutex);
return err;
@@ -205,7 +205,7 @@ void sock_diag_unregister(const struct sock_diag_handler *hnld)
mutex_lock(&sock_diag_table_mutex);
BUG_ON(sock_diag_handlers[family] != hnld);
- sock_diag_handlers[family] = NULL;
+ WRITE_ONCE(sock_diag_handlers[family], NULL);
mutex_unlock(&sock_diag_table_mutex);
}
EXPORT_SYMBOL_GPL(sock_diag_unregister);
@@ -223,7 +223,7 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EINVAL;
req->sdiag_family = array_index_nospec(req->sdiag_family, AF_MAX);
- if (sock_diag_handlers[req->sdiag_family] == NULL)
+ if (READ_ONCE(sock_diag_handlers[req->sdiag_family]) == NULL)
sock_load_diag_module(req->sdiag_family, 0);
mutex_lock(&sock_diag_table_mutex);
@@ -282,12 +282,12 @@ static int sock_diag_bind(struct net *net, int group)
switch (group) {
case SKNLGRP_INET_TCP_DESTROY:
case SKNLGRP_INET_UDP_DESTROY:
- if (!sock_diag_handlers[AF_INET])
+ if (!READ_ONCE(sock_diag_handlers[AF_INET]))
sock_load_diag_module(AF_INET, 0);
break;
case SKNLGRP_INET6_TCP_DESTROY:
case SKNLGRP_INET6_UDP_DESTROY:
- if (!sock_diag_handlers[AF_INET6])
+ if (!READ_ONCE(sock_diag_handlers[AF_INET6]))
sock_load_diag_module(AF_INET6, 0);
break;
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index f375ef150149..52e395a189df 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -422,6 +422,9 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
struct sock *sk;
int err = 0;
+ if (irqs_disabled())
+ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
+
raw_spin_lock_bh(&stab->lock);
sk = *psk;
if (!sk_test || sk_test == sk)
@@ -955,6 +958,9 @@ static int sock_hash_delete_elem(struct bpf_map *map, void *key)
struct bpf_shtab_elem *elem;
int ret = -ENOENT;
+ if (irqs_disabled())
+ return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
+
hash = sock_hash_bucket_hash(key, key_size);
bucket = sock_hash_select_bucket(htab, hash);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index dac65180c4ef..61cb40368723 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -164,17 +164,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
eth = (struct ethhdr *)skb->data;
skb_pull_inline(skb, ETH_HLEN);
- if (unlikely(!ether_addr_equal_64bits(eth->h_dest,
- dev->dev_addr))) {
- if (unlikely(is_multicast_ether_addr_64bits(eth->h_dest))) {
- if (ether_addr_equal_64bits(eth->h_dest, dev->broadcast))
- skb->pkt_type = PACKET_BROADCAST;
- else
- skb->pkt_type = PACKET_MULTICAST;
- } else {
- skb->pkt_type = PACKET_OTHERHOST;
- }
- }
+ eth_skb_pkt_type(skb, dev);
/*
* Some variants of DSA tagging don't have an ethertype field
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 5c5d4485aaab..8edc0f792c05 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -237,6 +237,10 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct list_head *node_db,
*/
if (ethhdr->h_proto == htons(ETH_P_PRP) ||
ethhdr->h_proto == htons(ETH_P_HSR)) {
+ /* Check if skb contains hsr_ethhdr */
+ if (skb->mac_len < sizeof(struct hsr_ethhdr))
+ return NULL;
+
/* Use the existing sequence_nr from the tag as starting point
* for filtering duplicate frames.
*/
@@ -329,9 +333,12 @@ void hsr_handle_sup_frame(struct hsr_frame_info *frame)
node_real->addr_B_port = port_rcv->type;
spin_lock_bh(&hsr->list_lock);
- list_del_rcu(&node_curr->mac_list);
+ if (!node_curr->removed) {
+ list_del_rcu(&node_curr->mac_list);
+ node_curr->removed = true;
+ kfree_rcu(node_curr, rcu_head);
+ }
spin_unlock_bh(&hsr->list_lock);
- kfree_rcu(node_curr, rcu_head);
done:
/* PRP uses v0 header */
@@ -508,9 +515,12 @@ void hsr_prune_nodes(struct timer_list *t)
if (time_is_before_jiffies(timestamp +
msecs_to_jiffies(HSR_NODE_FORGET_TIME))) {
hsr_nl_nodedown(hsr, node->macaddress_A);
- list_del_rcu(&node->mac_list);
- /* Note that we need to free this entry later: */
- kfree_rcu(node, rcu_head);
+ if (!node->removed) {
+ list_del_rcu(&node->mac_list);
+ node->removed = true;
+ /* Note that we need to free this entry later: */
+ kfree_rcu(node, rcu_head);
+ }
}
}
spin_unlock_bh(&hsr->list_lock);
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index d68017ad29b4..f5ff87f049df 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -86,6 +86,7 @@ struct hsr_node {
bool san_a;
bool san_b;
u16 seq_out[HSR_PT_PORTS];
+ bool removed;
struct rcu_head rcu_head;
};
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index f7e284f23b1f..b727650d8a8e 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -148,14 +148,21 @@ static struct notifier_block hsr_nb = {
static int __init hsr_init(void)
{
- int res;
+ int err;
BUILD_BUG_ON(sizeof(struct hsr_tag) != HSR_HLEN);
- register_netdevice_notifier(&hsr_nb);
- res = hsr_netlink_init();
+ err = register_netdevice_notifier(&hsr_nb);
+ if (err)
+ return err;
+
+ err = hsr_netlink_init();
+ if (err) {
+ unregister_netdevice_notifier(&hsr_nb);
+ return err;
+ }
- return res;
+ return 0;
}
static void __exit hsr_exit(void)
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index e00788cd9ca4..55cf4e73f11d 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1111,7 +1111,8 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev)
if (neigh) {
if (!(neigh->nud_state & NUD_NOARP)) {
read_lock_bh(&neigh->lock);
- memcpy(r->arp_ha.sa_data, neigh->ha, dev->addr_len);
+ memcpy(r->arp_ha.sa_data, neigh->ha,
+ min(dev->addr_len, (unsigned char)sizeof(r->arp_ha.sa_data_min)));
r->arp_flags = arp_state_to_flags(neigh);
read_unlock_bh(&neigh->lock);
r->arp_ha.sa_family = dev->type;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index da1ca8081c03..9ac7d47d27b8 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1798,6 +1798,21 @@ done:
return err;
}
+/* Combine dev_addr_genid and dev_base_seq to detect changes.
+ */
+static u32 inet_base_seq(const struct net *net)
+{
+ u32 res = atomic_read(&net->ipv4.dev_addr_genid) +
+ net->dev_base_seq;
+
+ /* Must not return 0 (see nl_dump_check_consistent()).
+ * Chose a value far away from 0.
+ */
+ if (!res)
+ res = 0x80000000;
+ return res;
+}
+
static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
{
const struct nlmsghdr *nlh = cb->nlh;
@@ -1849,8 +1864,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
idx = 0;
head = &tgt_net->dev_index_head[h];
rcu_read_lock();
- cb->seq = atomic_read(&tgt_net->ipv4.dev_addr_genid) ^
- tgt_net->dev_base_seq;
+ cb->seq = inet_base_seq(tgt_net);
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
@@ -2249,8 +2263,7 @@ static int inet_netconf_dump_devconf(struct sk_buff *skb,
idx = 0;
head = &net->dev_index_head[h];
rcu_read_lock();
- cb->seq = atomic_read(&net->ipv4.dev_addr_genid) ^
- net->dev_base_seq;
+ cb->seq = inet_base_seq(net);
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index cb55fede03c0..f0a313747b95 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2493,8 +2493,8 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
goto done;
}
newpsl->sl_max = newpsl->sl_count = msf->imsf_numsrc;
- memcpy(newpsl->sl_addr, msf->imsf_slist,
- msf->imsf_numsrc * sizeof(msf->imsf_slist[0]));
+ memcpy(newpsl->sl_addr, msf->imsf_slist_flex,
+ flex_array_size(msf, imsf_slist_flex, msf->imsf_numsrc));
err = ip_mc_add_src(in_dev, &msf->imsf_multiaddr,
msf->imsf_fmode, newpsl->sl_count, newpsl->sl_addr, 0);
if (err) {
@@ -2526,11 +2526,10 @@ done:
err = ip_mc_leave_group(sk, &imr);
return err;
}
-
int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
- struct ip_msfilter __user *optval, int __user *optlen)
+ sockptr_t optval, sockptr_t optlen)
{
- int err, len, count, copycount;
+ int err, len, count, copycount, msf_size;
struct ip_mreqn imr;
__be32 addr = msf->imsf_multiaddr;
struct ip_mc_socklist *pmc;
@@ -2571,14 +2570,17 @@ int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
count = psl->sl_count;
}
copycount = count < msf->imsf_numsrc ? count : msf->imsf_numsrc;
- len = copycount * sizeof(psl->sl_addr[0]);
+ len = flex_array_size(psl, sl_addr, copycount);
msf->imsf_numsrc = count;
- if (put_user(IP_MSFILTER_SIZE(copycount), optlen) ||
- copy_to_user(optval, msf, IP_MSFILTER_SIZE(0))) {
+ msf_size = IP_MSFILTER_SIZE(copycount);
+ if (copy_to_sockptr(optlen, &msf_size, sizeof(int)) ||
+ copy_to_sockptr(optval, msf, IP_MSFILTER_SIZE(0))) {
return -EFAULT;
}
if (len &&
- copy_to_user(&optval->imsf_slist[0], psl->sl_addr, len))
+ copy_to_sockptr_offset(optval,
+ offsetof(struct ip_msfilter, imsf_slist_flex),
+ psl->sl_addr, len))
return -EFAULT;
return 0;
done:
@@ -2586,7 +2588,7 @@ done:
}
int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
- struct sockaddr_storage __user *p)
+ sockptr_t optval, size_t ss_offset)
{
int i, count, copycount;
struct sockaddr_in *psin;
@@ -2616,15 +2618,17 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
count = psl ? psl->sl_count : 0;
copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
gsf->gf_numsrc = count;
- for (i = 0; i < copycount; i++, p++) {
+ for (i = 0; i < copycount; i++) {
struct sockaddr_storage ss;
psin = (struct sockaddr_in *)&ss;
memset(&ss, 0, sizeof(ss));
psin->sin_family = AF_INET;
psin->sin_addr.s_addr = psl->sl_addr[i];
- if (copy_to_user(p, &ss, sizeof(ss)))
+ if (copy_to_sockptr_offset(optval, ss_offset,
+ &ss, sizeof(ss)))
return -EFAULT;
+ ss_offset += sizeof(ss);
}
return 0;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index b15c9ad0095a..6ebe43b4d28f 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -580,6 +580,20 @@ void inet_csk_clear_xmit_timers(struct sock *sk)
}
EXPORT_SYMBOL(inet_csk_clear_xmit_timers);
+void inet_csk_clear_xmit_timers_sync(struct sock *sk)
+{
+ struct inet_connection_sock *icsk = inet_csk(sk);
+
+ /* ongoing timer handlers need to acquire socket lock. */
+ sock_not_owned_by_me(sk);
+
+ icsk->icsk_pending = icsk->icsk_ack.pending = 0;
+
+ sk_stop_timer_sync(sk, &icsk->icsk_retransmit_timer);
+ sk_stop_timer_sync(sk, &icsk->icsk_delack_timer);
+ sk_stop_timer_sync(sk, &sk->sk_timer);
+}
+
void inet_csk_delete_keepalive_timer(struct sock *sk)
{
sk_stop_timer(sk, &sk->sk_timer);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index fa9f1de58df4..27a5a7d66d18 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -57,7 +57,7 @@ static const struct inet_diag_handler *inet_diag_lock_handler(int proto)
return ERR_PTR(-ENOENT);
}
- if (!inet_diag_table[proto])
+ if (!READ_ONCE(inet_diag_table[proto]))
sock_load_diag_module(AF_INET, proto);
mutex_lock(&inet_diag_table_mutex);
@@ -1413,7 +1413,7 @@ int inet_diag_register(const struct inet_diag_handler *h)
mutex_lock(&inet_diag_table_mutex);
err = -EEXIST;
if (!inet_diag_table[type]) {
- inet_diag_table[type] = h;
+ WRITE_ONCE(inet_diag_table[type], h);
err = 0;
}
mutex_unlock(&inet_diag_table_mutex);
@@ -1430,7 +1430,7 @@ void inet_diag_unregister(const struct inet_diag_handler *h)
return;
mutex_lock(&inet_diag_table_mutex);
- inet_diag_table[type] = NULL;
+ WRITE_ONCE(inet_diag_table[type], NULL);
mutex_unlock(&inet_diag_table_mutex);
}
EXPORT_SYMBOL_GPL(inet_diag_unregister);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index c411c87ae865..85cb44bfa3ba 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -254,12 +254,12 @@ void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
}
EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
+/* Remove all non full sockets (TIME_WAIT and NEW_SYN_RECV) for dead netns */
void inet_twsk_purge(struct inet_hashinfo *hashinfo, int family)
{
- struct inet_timewait_sock *tw;
- struct sock *sk;
struct hlist_nulls_node *node;
unsigned int slot;
+ struct sock *sk;
for (slot = 0; slot <= hashinfo->ehash_mask; slot++) {
struct inet_ehash_bucket *head = &hashinfo->ehash[slot];
@@ -268,25 +268,35 @@ restart_rcu:
rcu_read_lock();
restart:
sk_nulls_for_each_rcu(sk, node, &head->chain) {
- if (sk->sk_state != TCP_TIME_WAIT)
+ int state = inet_sk_state_load(sk);
+
+ if ((1 << state) & ~(TCPF_TIME_WAIT |
+ TCPF_NEW_SYN_RECV))
continue;
- tw = inet_twsk(sk);
- if ((tw->tw_family != family) ||
- refcount_read(&twsk_net(tw)->count))
+
+ if (sk->sk_family != family ||
+ refcount_read(&sock_net(sk)->count))
continue;
- if (unlikely(!refcount_inc_not_zero(&tw->tw_refcnt)))
+ if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
continue;
- if (unlikely((tw->tw_family != family) ||
- refcount_read(&twsk_net(tw)->count))) {
- inet_twsk_put(tw);
+ if (unlikely(sk->sk_family != family ||
+ refcount_read(&sock_net(sk)->count))) {
+ sock_gen_put(sk);
goto restart;
}
rcu_read_unlock();
local_bh_disable();
- inet_twsk_deschedule_put(tw);
+ if (state == TCP_TIME_WAIT) {
+ inet_twsk_deschedule_put(inet_twsk(sk));
+ } else {
+ struct request_sock *req = inet_reqsk(sk);
+
+ inet_csk_reqsk_queue_drop_and_put(req->rsk_listener,
+ req);
+ }
local_bh_enable();
goto restart_rcu;
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index a6ad0fe1387c..0ac652fef06d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -278,8 +278,13 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
tpi->flags | TUNNEL_NO_KEY,
iph->saddr, iph->daddr, 0);
} else {
+ if (unlikely(!pskb_may_pull(skb,
+ gre_hdr_len + sizeof(*ershdr))))
+ return PACKET_REJECT;
+
ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
ver = ershdr->ver;
+ iph = ip_hdr(skb);
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
tpi->flags | TUNNEL_KEY,
iph->saddr, iph->daddr, tpi->key);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 1b35afd326b8..b300d0988d52 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -670,12 +670,11 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex,
struct sockaddr_storage *group,
struct sockaddr_storage *list)
{
- int msize = IP_MSFILTER_SIZE(numsrc);
struct ip_msfilter *msf;
struct sockaddr_in *psin;
int err, i;
- msf = kmalloc(msize, GFP_KERNEL);
+ msf = kmalloc(IP_MSFILTER_SIZE(numsrc), GFP_KERNEL);
if (!msf)
return -ENOBUFS;
@@ -691,7 +690,7 @@ static int set_mcast_msfilter(struct sock *sk, int ifindex,
if (psin->sin_family != AF_INET)
goto Eaddrnotavail;
- msf->imsf_slist[i] = psin->sin_addr.s_addr;
+ msf->imsf_slist_flex[i] = psin->sin_addr.s_addr;
}
err = ip_mc_msfilter(sk, msf, ifindex);
kfree(msf);
@@ -798,7 +797,8 @@ static int ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval, int optlen)
goto out_free_gsf;
err = set_mcast_msfilter(sk, gsf->gf_interface, gsf->gf_numsrc,
- gsf->gf_fmode, &gsf->gf_group, gsf->gf_slist);
+ gsf->gf_fmode, &gsf->gf_group,
+ gsf->gf_slist_flex);
out_free_gsf:
kfree(gsf);
return err;
@@ -807,7 +807,7 @@ out_free_gsf:
static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
int optlen)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter *gf32;
unsigned int n;
void *p;
@@ -821,7 +821,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
p = kmalloc(optlen + 4, GFP_KERNEL);
if (!p)
return -ENOMEM;
- gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+ gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */
err = -EFAULT;
if (copy_from_sockptr(gf32, optval, optlen))
@@ -834,7 +834,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
goto out_free_gsf;
err = -EINVAL;
- if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen)
+ if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen)
goto out_free_gsf;
/* numsrc >= (4G-140)/128 overflow in 32 bits */
@@ -842,7 +842,7 @@ static int compat_ip_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (n > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
goto out_free_gsf;
err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
- &gf32->gf_group, gf32->gf_slist);
+ &gf32->gf_group, gf32->gf_slist_flex);
out_free_gsf:
kfree(p);
return err;
@@ -1460,37 +1460,37 @@ static bool getsockopt_needs_rtnl(int optname)
return false;
}
-static int ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
- int __user *optlen, int len)
+static int ip_get_mcast_msfilter(struct sock *sk, sockptr_t optval,
+ sockptr_t optlen, int len)
{
- const int size0 = offsetof(struct group_filter, gf_slist);
- struct group_filter __user *p = optval;
+ const int size0 = offsetof(struct group_filter, gf_slist_flex);
struct group_filter gsf;
- int num;
+ int num, gsf_size;
int err;
if (len < size0)
return -EINVAL;
- if (copy_from_user(&gsf, p, size0))
+ if (copy_from_sockptr(&gsf, optval, size0))
return -EFAULT;
num = gsf.gf_numsrc;
- err = ip_mc_gsfget(sk, &gsf, p->gf_slist);
+ err = ip_mc_gsfget(sk, &gsf, optval,
+ offsetof(struct group_filter, gf_slist_flex));
if (err)
return err;
if (gsf.gf_numsrc < num)
num = gsf.gf_numsrc;
- if (put_user(GROUP_FILTER_SIZE(num), optlen) ||
- copy_to_user(p, &gsf, size0))
+ gsf_size = GROUP_FILTER_SIZE(num);
+ if (copy_to_sockptr(optlen, &gsf_size, sizeof(int)) ||
+ copy_to_sockptr(optval, &gsf, size0))
return -EFAULT;
return 0;
}
-static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
- int __user *optlen, int len)
+static int compat_ip_get_mcast_msfilter(struct sock *sk, sockptr_t optval,
+ sockptr_t optlen, int len)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
- struct compat_group_filter __user *p = optval;
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter gf32;
struct group_filter gf;
int num;
@@ -1498,7 +1498,7 @@ static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
if (len < size0)
return -EINVAL;
- if (copy_from_user(&gf32, p, size0))
+ if (copy_from_sockptr(&gf32, optval, size0))
return -EFAULT;
gf.gf_interface = gf32.gf_interface;
@@ -1506,21 +1506,24 @@ static int compat_ip_get_mcast_msfilter(struct sock *sk, void __user *optval,
num = gf.gf_numsrc = gf32.gf_numsrc;
gf.gf_group = gf32.gf_group;
- err = ip_mc_gsfget(sk, &gf, p->gf_slist);
+ err = ip_mc_gsfget(sk, &gf, optval,
+ offsetof(struct compat_group_filter, gf_slist_flex));
if (err)
return err;
if (gf.gf_numsrc < num)
num = gf.gf_numsrc;
len = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32));
- if (put_user(len, optlen) ||
- put_user(gf.gf_fmode, &p->gf_fmode) ||
- put_user(gf.gf_numsrc, &p->gf_numsrc))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)) ||
+ copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_fmode),
+ &gf.gf_fmode, sizeof(gf.gf_fmode)) ||
+ copy_to_sockptr_offset(optval, offsetof(struct compat_group_filter, gf_numsrc),
+ &gf.gf_numsrc, sizeof(gf.gf_numsrc)))
return -EFAULT;
return 0;
}
static int do_ip_getsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int __user *optlen)
+ sockptr_t optval, sockptr_t optlen)
{
struct inet_sock *inet = inet_sk(sk);
bool needs_rtnl = getsockopt_needs_rtnl(optname);
@@ -1533,7 +1536,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
if (ip_mroute_opt(optname))
return ip_mroute_getsockopt(sk, optname, optval, optlen);
- if (get_user(len, optlen))
+ if (copy_from_sockptr(&len, optlen, sizeof(int)))
return -EFAULT;
if (len < 0)
return -EINVAL;
@@ -1558,15 +1561,17 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
inet_opt->opt.optlen);
release_sock(sk);
- if (opt->optlen == 0)
- return put_user(0, optlen);
+ if (opt->optlen == 0) {
+ len = 0;
+ return copy_to_sockptr(optlen, &len, sizeof(int));
+ }
ip_options_undo(opt);
len = min_t(unsigned int, len, opt->optlen);
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, opt->__data, len))
+ if (copy_to_sockptr(optval, opt->__data, len))
return -EFAULT;
return 0;
}
@@ -1657,9 +1662,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
addr.s_addr = inet->mc_addr;
release_sock(sk);
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &addr, len))
+ if (copy_to_sockptr(optval, &addr, len))
return -EFAULT;
return 0;
}
@@ -1671,12 +1676,11 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
err = -EINVAL;
goto out;
}
- if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
+ if (copy_from_sockptr(&msf, optval, IP_MSFILTER_SIZE(0))) {
err = -EFAULT;
goto out;
}
- err = ip_mc_msfget(sk, &msf,
- (struct ip_msfilter __user *)optval, optlen);
+ err = ip_mc_msfget(sk, &msf, optval, optlen);
goto out;
}
case MCAST_MSFILTER:
@@ -1698,8 +1702,13 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
if (sk->sk_type != SOCK_STREAM)
return -ENOPROTOOPT;
- msg.msg_control_is_user = true;
- msg.msg_control_user = optval;
+ if (optval.is_kernel) {
+ msg.msg_control_is_user = false;
+ msg.msg_control = optval.kernel;
+ } else {
+ msg.msg_control_is_user = true;
+ msg.msg_control_user = optval.user;
+ }
msg.msg_controllen = len;
msg.msg_flags = in_compat_syscall() ? MSG_CMSG_COMPAT : 0;
@@ -1720,7 +1729,7 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
}
len -= msg.msg_controllen;
- return put_user(len, optlen);
+ return copy_to_sockptr(optlen, &len, sizeof(int));
}
case IP_FREEBIND:
val = inet->freebind;
@@ -1743,15 +1752,15 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
unsigned char ucval = (unsigned char)val;
len = 1;
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &ucval, 1))
+ if (copy_to_sockptr(optval, &ucval, 1))
return -EFAULT;
} else {
len = min_t(unsigned int, sizeof(int), len);
- if (put_user(len, optlen))
+ if (copy_to_sockptr(optlen, &len, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &val, len))
+ if (copy_to_sockptr(optval, &val, len))
return -EFAULT;
}
return 0;
@@ -1768,7 +1777,8 @@ int ip_getsockopt(struct sock *sk, int level,
{
int err;
- err = do_ip_getsockopt(sk, level, optname, optval, optlen);
+ err = do_ip_getsockopt(sk, level, optname,
+ USER_SOCKPTR(optval), USER_SOCKPTR(optlen));
#if IS_ENABLED(CONFIG_BPFILTER_UMH)
if (optname >= BPFILTER_IPT_SO_GET_INFO &&
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 99f70b990eb1..0953d805cbbe 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -364,7 +364,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
bool log_ecn_error)
{
const struct iphdr *iph = ip_hdr(skb);
- int err;
+ int nh, err;
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(iph->daddr)) {
@@ -390,8 +390,21 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
tunnel->i_seqno = ntohl(tpi->seq) + 1;
}
+ /* Save offset of outer header relative to skb->head,
+ * because we are going to reset the network header to the inner header
+ * and might change skb->head.
+ */
+ nh = skb_network_header(skb) - skb->head;
+
skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
+ if (!pskb_inet_may_pull(skb)) {
+ DEV_STATS_INC(tunnel->dev, rx_length_errors);
+ DEV_STATS_INC(tunnel->dev, rx_errors);
+ goto drop;
+ }
+ iph = (struct iphdr *)(skb->head + nh);
+
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
if (log_ecn_error)
@@ -540,6 +553,20 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
return 0;
}
+static void ip_tunnel_adj_headroom(struct net_device *dev, unsigned int headroom)
+{
+ /* we must cap headroom to some upperlimit, else pskb_expand_head
+ * will overflow header offsets in skb_headers_offset_update().
+ */
+ static const unsigned int max_allowed = 512;
+
+ if (headroom > max_allowed)
+ headroom = max_allowed;
+
+ if (headroom > READ_ONCE(dev->needed_headroom))
+ WRITE_ONCE(dev->needed_headroom, headroom);
+}
+
void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
u8 proto, int tunnel_hlen)
{
@@ -613,13 +640,13 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
}
headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
- if (headroom > READ_ONCE(dev->needed_headroom))
- WRITE_ONCE(dev->needed_headroom, headroom);
-
- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ if (skb_cow_head(skb, headroom)) {
ip_rt_put(rt);
goto tx_dropped;
}
+
+ ip_tunnel_adj_headroom(dev, headroom);
+
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
df, !net_eq(tunnel->net, dev_net(dev)));
return;
@@ -797,16 +824,16 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
- if (max_headroom > READ_ONCE(dev->needed_headroom))
- WRITE_ONCE(dev->needed_headroom, max_headroom);
- if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
+ if (skb_cow_head(skb, max_headroom)) {
ip_rt_put(rt);
dev->stats.tx_dropped++;
kfree_skb(skb);
return;
}
+ ip_tunnel_adj_headroom(dev, max_headroom);
+
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
df, !net_eq(tunnel->net, dev_net(dev)));
return;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index be1976536f1c..db184cb826b9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1540,7 +1540,8 @@ out:
}
/* Getsock opt support for the multicast routing system. */
-int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
+int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
+ sockptr_t optlen)
{
int olr;
int val;
@@ -1571,14 +1572,16 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
return -ENOPROTOOPT;
}
- if (get_user(olr, optlen))
+ if (copy_from_sockptr(&olr, optlen, sizeof(int)))
return -EFAULT;
- olr = min_t(unsigned int, olr, sizeof(int));
if (olr < 0)
return -EINVAL;
- if (put_user(olr, optlen))
+
+ olr = min_t(unsigned int, olr, sizeof(int));
+
+ if (copy_to_sockptr(optlen, &olr, sizeof(int)))
return -EFAULT;
- if (copy_to_user(optval, &val, olr))
+ if (copy_to_sockptr(optval, &val, olr))
return -EFAULT;
return 0;
}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index d6d45d820d79..5823e89b8a73 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -955,6 +955,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct arpt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -963,6 +965,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
@@ -1253,6 +1257,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct arpt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1261,6 +1267,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ec981618b7b2..22e9ff592cd7 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1109,6 +1109,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ipt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1117,6 +1119,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
@@ -1493,6 +1497,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ipt_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1501,6 +1507,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d360c7d70e8a..cc409cc0789c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -955,13 +955,11 @@ void ip_rt_send_redirect(struct sk_buff *skb)
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
peer->rate_last = jiffies;
++peer->n_redirects;
-#ifdef CONFIG_IP_ROUTE_VERBOSE
- if (log_martians &&
+ if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians &&
peer->n_redirects == ip_rt_redirect_number)
net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
&ip_hdr(skb)->saddr, inet_iif(skb),
&ip_hdr(skb)->daddr, &gw);
-#endif
}
out_put_peer:
inet_putpeer(peer);
@@ -2090,6 +2088,9 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
int err = -EINVAL;
u32 tag = 0;
+ if (!in_dev)
+ return -EINVAL;
+
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
goto martian_source;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a5c15e2d193f..ac6cb2dc6038 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2717,6 +2717,8 @@ void tcp_close(struct sock *sk, long timeout)
lock_sock(sk);
__tcp_close(sk, timeout);
release_sock(sk);
+ if (!sk->sk_net_refcnt)
+ inet_csk_clear_xmit_timers_sync(sk);
sock_put(sk);
}
EXPORT_SYMBOL(tcp_close);
@@ -3742,11 +3744,11 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
if (get_user(len, optlen))
return -EFAULT;
- len = min_t(unsigned int, len, sizeof(int));
-
if (len < 0)
return -EINVAL;
+ len = min_t(unsigned int, len, sizeof(int));
+
switch (optname) {
case TCP_MAXSEG:
val = tp->mss_cache;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 476f79f1563a..16ff3962b24d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -602,6 +602,13 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk,
}
DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
+EXPORT_SYMBOL(udp_encap_needed_key);
+
+#if IS_ENABLED(CONFIG_IPV6)
+DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+EXPORT_SYMBOL(udpv6_encap_needed_key);
+#endif
+
void udp_encap_enable(void)
{
static_branch_inc(&udp_encap_needed_key);
@@ -1118,16 +1125,17 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (msg->msg_controllen) {
err = udp_cmsg_send(sk, msg, &ipc.gso_size);
- if (err > 0)
+ if (err > 0) {
err = ip_cmsg_send(sk, msg, &ipc,
sk->sk_family == AF_INET6);
+ connected = 0;
+ }
if (unlikely(err < 0)) {
kfree(ipc.opt);
return err;
}
if (ipc.opt)
free = 1;
- connected = 0;
}
if (!ipc.opt) {
struct ip_options_rcu *inet_opt;
@@ -2748,11 +2756,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
if (get_user(len, optlen))
return -EFAULT;
- len = min_t(unsigned int, len, sizeof(int));
-
if (len < 0)
return -EINVAL;
+ len = min_t(unsigned int, len, sizeof(int));
+
switch (optname) {
case UDP_CORK:
val = READ_ONCE(up->corkflag);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index f4b8e56068e0..445d8bc30fdd 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -512,6 +512,11 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
unsigned int off = skb_gro_offset(skb);
int flush = 1;
+ /* We can do L4 aggregation only if the packet can't land in a tunnel
+ * otherwise we could corrupt the inner stream. Detecting such packets
+ * cannot be foolproof and the aggregation might still happen in some
+ * cases. Such packets should be caught in udp_unexpected_gso later.
+ */
NAPI_GRO_CB(skb)->is_flist = 0;
if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;
@@ -668,13 +673,7 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
- if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
- if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
- skb->csum_level++;
- } else {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->csum_level = 0;
- }
+ __skb_incr_checksum_unnecessary(skb);
return 0;
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 79787a1f5ab3..8a6f4cdd5a48 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -698,6 +698,22 @@ errout:
return err;
}
+/* Combine dev_addr_genid and dev_base_seq to detect changes.
+ */
+static u32 inet6_base_seq(const struct net *net)
+{
+ u32 res = atomic_read(&net->ipv6.dev_addr_genid) +
+ net->dev_base_seq;
+
+ /* Must not return 0 (see nl_dump_check_consistent()).
+ * Chose a value far away from 0.
+ */
+ if (!res)
+ res = 0x80000000;
+ return res;
+}
+
+
static int inet6_netconf_dump_devconf(struct sk_buff *skb,
struct netlink_callback *cb)
{
@@ -731,8 +747,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
idx = 0;
head = &net->dev_index_head[h];
rcu_read_lock();
- cb->seq = atomic_read(&net->ipv6.dev_addr_genid) ^
- net->dev_base_seq;
+ cb->seq = inet6_base_seq(net);
hlist_for_each_entry_rcu(dev, head, index_hlist) {
if (idx < s_idx)
goto cont;
@@ -2029,9 +2044,10 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
if (ipv6_addr_equal(&ifp->addr, addr)) {
if (!dev || ifp->idev->dev == dev ||
!(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
- result = ifp;
- in6_ifa_hold(ifp);
- break;
+ if (in6_ifa_hold_safe(ifp)) {
+ result = ifp;
+ break;
+ }
}
}
}
@@ -5288,7 +5304,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
}
rcu_read_lock();
- cb->seq = atomic_read(&tgt_net->ipv6.dev_addr_genid) ^ tgt_net->dev_base_seq;
+ cb->seq = inet6_base_seq(tgt_net);
for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
idx = 0;
head = &tgt_net->dev_index_head[h];
@@ -5421,9 +5437,10 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
}
addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer);
- if (!addr)
- return -EINVAL;
-
+ if (!addr) {
+ err = -EINVAL;
+ goto errout;
+ }
ifm = nlmsg_data(nlh);
if (ifm->ifa_index)
dev = dev_get_by_index(tgt_net, ifm->ifa_index);
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 3e4c87b29b11..55cd23b7a935 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -446,6 +446,11 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
+ nla_total_size(16); /* src */
}
+static void fib6_rule_flush_cache(struct fib_rules_ops *ops)
+{
+ rt_genid_bump_ipv6(ops->fro_net);
+}
+
static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
.family = AF_INET6,
.rule_size = sizeof(struct fib6_rule),
@@ -458,6 +463,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
.compare = fib6_rule_compare,
.fill = fib6_rule_fill,
.nlmsg_payload = fib6_rule_nlmsg_payload,
+ .flush_cache = fib6_rule_flush_cache,
.nlgroup = RTNLGRP_IPV6_RULE,
.policy = fib6_rule_policy,
.owner = THIS_MODULE,
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 608205c632c8..b79e571e5a86 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -643,19 +643,19 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
if (!w) {
/* New dump:
*
- * 1. hook callback destructor.
- */
- cb->args[3] = (long)cb->done;
- cb->done = fib6_dump_done;
-
- /*
- * 2. allocate and initialize walker.
+ * 1. allocate and initialize walker.
*/
w = kzalloc(sizeof(*w), GFP_ATOMIC);
if (!w)
return -ENOMEM;
w->func = fib6_dump_node;
cb->args[2] = (long)w;
+
+ /* 2. hook callback destructor.
+ */
+ cb->args[3] = (long)cb->done;
+ cb->done = fib6_dump_done;
+
}
arg.skb = skb;
@@ -1373,7 +1373,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
struct nl_info *info, struct netlink_ext_ack *extack)
{
struct fib6_table *table = rt->fib6_table;
- struct fib6_node *fn, *pn = NULL;
+ struct fib6_node *fn;
+#ifdef CONFIG_IPV6_SUBTREES
+ struct fib6_node *pn = NULL;
+#endif
int err = -ENOMEM;
int allow_create = 1;
int replace_required = 0;
@@ -1397,9 +1400,9 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
goto out;
}
+#ifdef CONFIG_IPV6_SUBTREES
pn = fn;
-#ifdef CONFIG_IPV6_SUBTREES
if (rt->fib6_src.plen) {
struct fib6_node *sn;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 2df1036330f8..13ac0ccdc8d7 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -533,6 +533,9 @@ static int ip6erspan_rcv(struct sk_buff *skb,
struct ip6_tnl *tunnel;
u8 ver;
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
+ return PACKET_REJECT;
+
ipv6h = ipv6_hdr(skb);
ershdr = (struct erspan_base_hdr *)skb->data;
ver = ershdr->ver;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 7b4b457a8b87..0ac527cd5d56 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -225,7 +225,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen)
goto out_free_gsf;
- ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist);
+ ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist_flex);
out_free_gsf:
kfree(gsf);
return ret;
@@ -234,7 +234,7 @@ out_free_gsf:
static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
int optlen)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter *gf32;
void *p;
int ret;
@@ -249,7 +249,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (!p)
return -ENOMEM;
- gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+ gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */
ret = -EFAULT;
if (copy_from_sockptr(gf32, optval, optlen))
goto out_free_p;
@@ -261,14 +261,14 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
goto out_free_p;
ret = -EINVAL;
- if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen)
+ if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen)
goto out_free_p;
ret = ip6_mc_msfilter(sk, &(struct group_filter){
.gf_interface = gf32->gf_interface,
.gf_group = gf32->gf_group,
.gf_fmode = gf32->gf_fmode,
- .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist);
+ .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist_flex);
out_free_p:
kfree(p);
@@ -1051,7 +1051,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
int __user *optlen, int len)
{
- const int size0 = offsetof(struct group_filter, gf_slist);
+ const int size0 = offsetof(struct group_filter, gf_slist_flex);
struct group_filter __user *p = optval;
struct group_filter gsf;
int num;
@@ -1065,7 +1065,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
return -EADDRNOTAVAIL;
num = gsf.gf_numsrc;
lock_sock(sk);
- err = ip6_mc_msfget(sk, &gsf, p->gf_slist);
+ err = ip6_mc_msfget(sk, &gsf, p->gf_slist_flex);
if (!err) {
if (num > gsf.gf_numsrc)
num = gsf.gf_numsrc;
@@ -1080,7 +1080,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval,
int __user *optlen)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter __user *p = optval;
struct compat_group_filter gf32;
struct group_filter gf;
@@ -1103,7 +1103,7 @@ static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval,
return -EADDRNOTAVAIL;
lock_sock(sk);
- err = ip6_mc_msfget(sk, &gf, p->gf_slist);
+ err = ip6_mc_msfget(sk, &gf, p->gf_slist_flex);
release_sock(sk);
if (err)
return err;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 99bb11d16712..df7cd3d285e4 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1127,6 +1127,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ip6t_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1135,6 +1137,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
@@ -1503,6 +1507,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
void *loc_cpu_entry;
struct ip6t_entry *iter;
+ if (len < sizeof(tmp))
+ return -EINVAL;
if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1511,6 +1517,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
return -ENOMEM;
if (tmp.num_counters == 0)
return -EINVAL;
+ if ((u64)len < (u64)tmp.size + sizeof(tmp))
+ return -EINVAL;
tmp.name[sizeof(tmp.name)-1] = 0;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b23e42efb3df..2d53c362f309 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5235,19 +5235,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
err_nh = NULL;
list_for_each_entry(nh, &rt6_nh_list, next) {
err = __ip6_ins_rt(nh->fib6_info, info, extack);
- fib6_info_release(nh->fib6_info);
-
- if (!err) {
- /* save reference to last route successfully inserted */
- rt_last = nh->fib6_info;
-
- /* save reference to first route for notification */
- if (!rt_notif)
- rt_notif = nh->fib6_info;
- }
- /* nh->fib6_info is used or freed at this point, reset to NULL*/
- nh->fib6_info = NULL;
if (err) {
if (replace && nhn)
NL_SET_ERR_MSG_MOD(extack,
@@ -5255,6 +5243,12 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
err_nh = nh;
goto add_errout;
}
+ /* save reference to last route successfully inserted */
+ rt_last = nh->fib6_info;
+
+ /* save reference to first route for notification */
+ if (!rt_notif)
+ rt_notif = nh->fib6_info;
/* Because each route is added like a single route we remove
* these flags after the first nexthop: if there is a collision,
@@ -5315,8 +5309,7 @@ add_errout:
cleanup:
list_for_each_entry_safe(nh, nh_safe, &rt6_nh_list, next) {
- if (nh->fib6_info)
- fib6_info_release(nh->fib6_info);
+ fib6_info_release(nh->fib6_info);
list_del(&nh->next);
kfree(nh);
}
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 2278c0234c49..a8439fded12d 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -451,22 +451,24 @@ int __init seg6_init(void)
{
int err;
- err = genl_register_family(&seg6_genl_family);
+ err = register_pernet_subsys(&ip6_segments_ops);
if (err)
goto out;
- err = register_pernet_subsys(&ip6_segments_ops);
+ err = genl_register_family(&seg6_genl_family);
if (err)
- goto out_unregister_genl;
+ goto out_unregister_pernet;
#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
err = seg6_iptunnel_init();
if (err)
- goto out_unregister_pernet;
+ goto out_unregister_genl;
err = seg6_local_init();
- if (err)
- goto out_unregister_pernet;
+ if (err) {
+ seg6_iptunnel_exit();
+ goto out_unregister_genl;
+ }
#endif
#ifdef CONFIG_IPV6_SEG6_HMAC
@@ -487,11 +489,11 @@ out_unregister_iptun:
#endif
#endif
#ifdef CONFIG_IPV6_SEG6_LWTUNNEL
-out_unregister_pernet:
- unregister_pernet_subsys(&ip6_segments_ops);
-#endif
out_unregister_genl:
genl_unregister_family(&seg6_genl_family);
+#endif
+out_unregister_pernet:
+ unregister_pernet_subsys(&ip6_segments_ops);
goto out;
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5385037209a6..8c9672e7a7dd 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -474,7 +474,7 @@ csum_copy_err:
goto try_again;
}
-DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
void udpv6_encap_enable(void)
{
static_branch_inc(&udpv6_encap_needed_key);
@@ -1453,9 +1453,11 @@ do_udp_sendmsg:
ipc6.opt = opt;
err = udp_cmsg_send(sk, msg, &ipc6.gso_size);
- if (err > 0)
+ if (err > 0) {
err = ip6_datagram_send_ctl(sock_net(sk), sk, msg, &fl6,
&ipc6);
+ connected = false;
+ }
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
@@ -1467,7 +1469,6 @@ do_udp_sendmsg:
}
if (!(opt->opt_nflen|opt->opt_flen))
opt = NULL;
- connected = false;
}
if (!opt) {
opt = txopt_get(np);
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index ebee748f25b9..7752e1e921f8 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -169,13 +169,7 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
- if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
- if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
- skb->csum_level++;
- } else {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->csum_level = 0;
- }
+ __skb_incr_checksum_unnecessary(skb);
return 0;
}
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 6f84978a7726..ed0dbdbba4d9 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -156,7 +156,7 @@ static char iucv_error_pathid[16] = "INVALID PATHID";
static LIST_HEAD(iucv_handler_list);
/*
- * iucv_path_table: an array of iucv_path structures.
+ * iucv_path_table: array of pointers to iucv_path structures.
*/
static struct iucv_path **iucv_path_table;
static unsigned long iucv_max_pathid;
@@ -542,7 +542,7 @@ static int iucv_enable(void)
get_online_cpus();
rc = -ENOMEM;
- alloc_size = iucv_max_pathid * sizeof(struct iucv_path);
+ alloc_size = iucv_max_pathid * sizeof(*iucv_path_table);
iucv_path_table = kzalloc(alloc_size, GFP_KERNEL);
if (!iucv_path_table)
goto out;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 39b3c7fbf9f6..7420b4f19b45 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1275,10 +1275,11 @@ static int kcm_getsockopt(struct socket *sock, int level, int optname,
if (get_user(len, optlen))
return -EFAULT;
- len = min_t(unsigned int, len, sizeof(int));
if (len < 0)
return -EINVAL;
+ len = min_t(unsigned int, len, sizeof(int));
+
switch (optname) {
case KCM_RECV_DISABLE:
val = kcm->rx_disabled;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 9746c624a550..eb3d81bcce6d 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -628,7 +628,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
back_from_confirm:
lock_sock(sk);
- ulen = len + skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0;
+ ulen = len + (skb_queue_empty(&sk->sk_write_queue) ? transhdrlen : 0);
err = ip6_append_data(sk, ip_generic_getfrag, msg,
ulen, transhdrlen, &ipc6,
&fl6, (struct rt6_info *)dst,
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 5ecc0f200944..b1d89c850f68 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1357,11 +1357,11 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname,
if (get_user(len, optlen))
return -EFAULT;
- len = min_t(unsigned int, len, sizeof(int));
-
if (len < 0)
return -EINVAL;
+ len = min_t(unsigned int, len, sizeof(int));
+
err = -ENOTCONN;
if (!sk->sk_user_data)
goto end;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 45bb6f275598..0c3da7771b48 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1811,15 +1811,14 @@ static int ieee80211_change_station(struct wiphy *wiphy,
}
if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
- sta->sdata->u.vlan.sta) {
- ieee80211_clear_fast_rx(sta);
+ sta->sdata->u.vlan.sta)
RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL);
- }
if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
ieee80211_vif_dec_num_mcast(sta->sdata);
sta->sdata = vlansdata;
+ ieee80211_check_fast_rx(sta);
ieee80211_check_fast_xmit(sta);
if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) {
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 2e84360990f0..44bd03c6b847 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -700,6 +700,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
if (ieee80211_vif_is_mesh(&sdata->vif))
mesh_accept_plinks_update(sdata);
+ ieee80211_check_fast_xmit(sta);
+
return 0;
out_remove:
sta_info_hash_del(local, sta);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 55abc06214c4..0d6d12fc3c07 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2959,7 +2959,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
sdata->vif.type == NL80211_IFTYPE_STATION)
goto out;
- if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+ if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !sta->uploaded)
goto out;
if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 55550ead2ced..a4cc9d077c59 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -265,19 +265,27 @@ fail:
return -ENOMEM;
}
+static void mac802154_llsec_key_del_rcu(struct rcu_head *rcu)
+{
+ struct ieee802154_llsec_key_entry *pos;
+ struct mac802154_llsec_key *mkey;
+
+ pos = container_of(rcu, struct ieee802154_llsec_key_entry, rcu);
+ mkey = container_of(pos->key, struct mac802154_llsec_key, key);
+
+ llsec_key_put(mkey);
+ kfree_sensitive(pos);
+}
+
int mac802154_llsec_key_del(struct mac802154_llsec *sec,
const struct ieee802154_llsec_key_id *key)
{
struct ieee802154_llsec_key_entry *pos;
list_for_each_entry(pos, &sec->table.keys, list) {
- struct mac802154_llsec_key *mkey;
-
- mkey = container_of(pos->key, struct mac802154_llsec_key, key);
-
if (llsec_key_id_equal(&pos->id, key)) {
list_del_rcu(&pos->list);
- llsec_key_put(mkey);
+ call_rcu(&pos->rcu, mac802154_llsec_key_del_rcu);
return 0;
}
}
diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c
index a536586742f2..d7ca71c59754 100644
--- a/net/mptcp/diag.c
+++ b/net/mptcp/diag.c
@@ -13,17 +13,22 @@
#include <uapi/linux/mptcp.h>
#include "protocol.h"
-static int subflow_get_info(const struct sock *sk, struct sk_buff *skb)
+static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
{
struct mptcp_subflow_context *sf;
struct nlattr *start;
u32 flags = 0;
+ bool slow;
int err;
+ if (inet_sk_state_load(sk) == TCP_LISTEN)
+ return 0;
+
start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP);
if (!start)
return -EMSGSIZE;
+ slow = lock_sock_fast(sk);
rcu_read_lock();
sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data);
if (!sf) {
@@ -69,11 +74,13 @@ static int subflow_get_info(const struct sock *sk, struct sk_buff *skb)
}
rcu_read_unlock();
+ unlock_sock_fast(sk, slow);
nla_nest_end(skb, start);
return 0;
nla_failure:
rcu_read_unlock();
+ unlock_sock_fast(sk, slow);
nla_nest_cancel(skb, start);
return err;
}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 72d944e6a641..6be7e7592291 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2052,8 +2052,50 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
+
+static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk)
+{
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
+ struct ipv6_pinfo *newnp;
+
+ newnp = inet6_sk(newsk);
+
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ if (!opt)
+ net_warn_ratelimited("%s: Failed to copy ip6 options\n", __func__);
+ }
+ RCU_INIT_POINTER(newnp->opt, opt);
+ rcu_read_unlock();
+}
#endif
+static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk)
+{
+ struct ip_options_rcu *inet_opt, *newopt = NULL;
+ const struct inet_sock *inet = inet_sk(sk);
+ struct inet_sock *newinet;
+
+ newinet = inet_sk(newsk);
+
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
+ if (inet_opt) {
+ newopt = sock_kmalloc(newsk, sizeof(*inet_opt) +
+ inet_opt->opt.optlen, GFP_ATOMIC);
+ if (newopt)
+ memcpy(newopt, inet_opt, sizeof(*inet_opt) +
+ inet_opt->opt.optlen);
+ else
+ net_warn_ratelimited("%s: Failed to copy ip options\n", __func__);
+ }
+ RCU_INIT_POINTER(newinet->inet_opt, newopt);
+ rcu_read_unlock();
+}
+
struct sock *mptcp_sk_clone(const struct sock *sk,
const struct mptcp_options_received *mp_opt,
struct request_sock *req)
@@ -2073,6 +2115,13 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
__mptcp_init_sock(nsk);
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (nsk->sk_family == AF_INET6)
+ mptcp_copy_ip6_options(nsk, sk);
+ else
+#endif
+ mptcp_copy_ip_options(nsk, sk);
+
msk = mptcp_sk(nsk);
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
@@ -2169,9 +2218,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
__MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
local_bh_enable();
- } else {
- MPTCP_INC_STATS(sock_net(sk),
- MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
}
out:
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 607519246bf2..276fe9f44df7 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -595,6 +595,9 @@ create_child:
if (fallback_is_fatal)
goto dispose_child;
+ if (fallback)
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
+
subflow_drop_ctx(child);
goto out;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index a0921adc31a9..1e689c714127 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -126,7 +126,8 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
if (sctph->source != cp->vport || payload_csum ||
skb->ip_summed == CHECKSUM_PARTIAL) {
sctph->source = cp->vport;
- sctp_nat_csum(skb, sctph, sctphoff);
+ if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb))
+ sctp_nat_csum(skb, sctph, sctphoff);
} else {
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
@@ -174,7 +175,8 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
(skb->ip_summed == CHECKSUM_PARTIAL &&
!(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) {
sctph->dest = cp->dport;
- sctp_nat_csum(skb, sctph, sctphoff);
+ if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb))
+ sctp_nat_csum(skb, sctph, sctphoff);
} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index e697a824b001..540d97715bd2 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -533,6 +533,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
/* Get fields bitmap */
if (nf_h323_error_boundary(bs, 0, f->sz))
return H323_ERROR_BOUND;
+ if (f->sz > 32)
+ return H323_ERROR_RANGE;
bmp = get_bitmap(bs, f->sz);
if (base)
*(unsigned int *)base = bmp;
@@ -589,6 +591,8 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f,
bmp2_len = get_bits(bs, 7) + 1;
if (nf_h323_error_boundary(bs, 0, bmp2_len))
return H323_ERROR_BOUND;
+ if (bmp2_len > 32)
+ return H323_ERROR_RANGE;
bmp2 = get_bitmap(bs, bmp2_len);
bmp |= bmp2 >> f->sz;
if (base)
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index e7545bcca805..6b2a215b2786 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -299,7 +299,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
pr_debug("Setting vtag %x for secondary conntrack\n",
sh->vtag);
ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
- } else {
+ } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) {
/* If it is a shutdown ack OOTB packet, we expect a return
shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
pr_debug("Setting vtag %x for new conn OOTB\n",
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f586e8b3c6cf..858d09b54eaa 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1084,6 +1084,24 @@ static void nf_tables_table_disable(struct net *net, struct nft_table *table)
#define __NFT_TABLE_F_UPDATE (__NFT_TABLE_F_WAS_DORMANT | \
__NFT_TABLE_F_WAS_AWAKEN)
+static bool nft_table_pending_update(const struct nft_ctx *ctx)
+{
+ struct nftables_pernet *nft_net = net_generic(ctx->net, nf_tables_net_id);
+ struct nft_trans *trans;
+
+ if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+ return true;
+
+ list_for_each_entry(trans, &nft_net->commit_list, list) {
+ if (trans->ctx.table == ctx->table &&
+ trans->msg_type == NFT_MSG_DELCHAIN &&
+ nft_is_base_chain(trans->ctx.chain))
+ return true;
+ }
+
+ return false;
+}
+
static int nf_tables_updtable(struct nft_ctx *ctx)
{
struct nft_trans *trans;
@@ -1097,11 +1115,11 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
if (flags & ~NFT_TABLE_F_DORMANT)
return -EINVAL;
- if (flags == ctx->table->flags)
+ if (flags == (ctx->table->flags & NFT_TABLE_F_MASK))
return 0;
/* No dormant off/on/off/on games in single transaction */
- if (ctx->table->flags & __NFT_TABLE_F_UPDATE)
+ if (nft_table_pending_update(ctx))
return -EINVAL;
trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
@@ -1132,6 +1150,7 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
return 0;
err_register_hooks:
+ ctx->table->flags |= NFT_TABLE_F_DORMANT;
nft_trans_destroy(trans);
return ret;
}
@@ -2224,6 +2243,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct nft_stats __percpu *stats = NULL;
struct nft_chain_hook hook;
+ if (table->flags & __NFT_TABLE_F_UPDATE)
+ return -EINVAL;
+
if (flags & NFT_CHAIN_BINDING)
return -EOPNOTSUPP;
@@ -2717,7 +2739,7 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family,
{
const struct nft_expr_type *type, *candidate = NULL;
- list_for_each_entry(type, &nf_tables_expressions, list) {
+ list_for_each_entry_rcu(type, &nf_tables_expressions, list) {
if (!nla_strcmp(nla, type->name)) {
if (!type->family && !candidate)
candidate = type;
@@ -2749,9 +2771,13 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net,
if (nla == NULL)
return ERR_PTR(-EINVAL);
+ rcu_read_lock();
type = __nft_expr_type_get(family, nla);
- if (type != NULL && try_module_get(type->owner))
+ if (type != NULL && try_module_get(type->owner)) {
+ rcu_read_unlock();
return type;
+ }
+ rcu_read_unlock();
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
@@ -4409,6 +4435,12 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
if ((flags & (NFT_SET_EVAL | NFT_SET_OBJECT)) ==
(NFT_SET_EVAL | NFT_SET_OBJECT))
return -EOPNOTSUPP;
+ if ((flags & (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT | NFT_SET_EVAL)) ==
+ (NFT_SET_ANONYMOUS | NFT_SET_TIMEOUT))
+ return -EOPNOTSUPP;
+ if ((flags & (NFT_SET_CONSTANT | NFT_SET_TIMEOUT)) ==
+ (NFT_SET_CONSTANT | NFT_SET_TIMEOUT))
+ return -EOPNOTSUPP;
}
dtype = 0;
@@ -4450,6 +4482,9 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
+ if (flags & NFT_SET_ANONYMOUS)
+ return -EOPNOTSUPP;
+
err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout);
if (err)
return err;
@@ -4458,6 +4493,10 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
+
+ if (flags & NFT_SET_ANONYMOUS)
+ return -EOPNOTSUPP;
+
gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
}
@@ -4753,6 +4792,7 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) {
list_del_rcu(&set->list);
+ set->dead = 1;
if (event)
nf_tables_set_notify(ctx, set, NFT_MSG_DELSET,
GFP_KERNEL);
@@ -6861,11 +6901,12 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
return err;
}
+/* call under rcu_read_lock */
static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
{
const struct nf_flowtable_type *type;
- list_for_each_entry(type, &nf_tables_flowtables, list) {
+ list_for_each_entry_rcu(type, &nf_tables_flowtables, list) {
if (family == type->family)
return type;
}
@@ -6877,9 +6918,13 @@ nft_flowtable_type_get(struct net *net, u8 family)
{
const struct nf_flowtable_type *type;
+ rcu_read_lock();
type = __nft_flowtable_type_get(family);
- if (type != NULL && try_module_get(type->owner))
+ if (type != NULL && try_module_get(type->owner)) {
+ rcu_read_unlock();
return type;
+ }
+ rcu_read_unlock();
lockdep_nfnl_nft_mutex_not_held();
#ifdef CONFIG_MODULES
@@ -8777,10 +8822,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
struct nft_trans *trans, *next;
LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
+ int err = 0;
if (action == NFNL_ABORT_VALIDATE &&
nf_tables_validate(net) < 0)
- return -EAGAIN;
+ err = -EAGAIN;
list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list,
list) {
@@ -8940,12 +8986,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nf_tables_abort_release(trans);
}
- if (action == NFNL_ABORT_AUTOLOAD)
- nf_tables_module_autoload(net);
- else
- nf_tables_module_autoload_cleanup(net);
-
- return 0;
+ return err;
}
static int nf_tables_abort(struct net *net, struct sk_buff *skb,
@@ -8959,6 +9000,16 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
ret = __nf_tables_abort(net, action);
nft_gc_seq_end(nft_net, gc_seq);
+ WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+
+ /* module autoload needs to happen after GC sequence update because it
+ * temporarily releases and grabs mutex again.
+ */
+ if (action == NFNL_ABORT_AUTOLOAD)
+ nf_tables_module_autoload(net);
+ else
+ nf_tables_module_autoload_cleanup(net);
+
mutex_unlock(&nft_net->commit_mutex);
return ret;
@@ -9693,8 +9744,11 @@ static void __net_exit nf_tables_exit_net(struct net *net)
gc_seq = nft_gc_seq_begin(nft_net);
- if (!list_empty(&nft_net->commit_list))
- __nf_tables_abort(net, NFNL_ABORT_NONE);
+ WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+
+ if (!list_empty(&nft_net->module_list))
+ nf_tables_module_autoload_cleanup(net);
+
__nft_release_tables(net);
nft_gc_seq_end(nft_net, gc_seq);
@@ -9778,6 +9832,7 @@ static void __exit nf_tables_module_exit(void)
unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
nft_chain_filter_fini();
nft_chain_route_fini();
+ nf_tables_trans_destroy_flush_work();
unregister_pernet_subsys(&nf_tables_net_ops);
cancel_work_sync(&trans_gc_work);
cancel_work_sync(&trans_destroy_work);
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index a18582a4ecf3..aad676402919 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -339,7 +339,9 @@ static void nft_netdev_event(unsigned long event, struct net_device *dev,
return;
if (n > 1) {
- nf_unregister_net_hook(ctx->net, &found->ops);
+ if (!(ctx->chain->table->flags & NFT_TABLE_F_DORMANT))
+ nf_unregister_net_hook(ctx->net, &found->ops);
+
list_del_rcu(&found->list);
kfree_rcu(found, rcu);
return;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 77c7362a7db8..3e0a6e7930c6 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -336,10 +336,20 @@ static int nft_target_validate(const struct nft_ctx *ctx,
if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET &&
ctx->family != NFPROTO_BRIDGE &&
ctx->family != NFPROTO_ARP)
return -EOPNOTSUPP;
+ ret = nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING));
+ if (ret)
+ return ret;
+
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
@@ -584,10 +594,20 @@ static int nft_match_validate(const struct nft_ctx *ctx,
if (ctx->family != NFPROTO_IPV4 &&
ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET &&
ctx->family != NFPROTO_BRIDGE &&
ctx->family != NFPROTO_ARP)
return -EOPNOTSUPP;
+ ret = nft_chain_validate_hooks(ctx->chain,
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING));
+ if (ret)
+ return ret;
+
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 2b15dbbca98b..2a8dfa68f6e2 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -1188,14 +1188,13 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx,
switch (priv->l3num) {
case NFPROTO_IPV4:
case NFPROTO_IPV6:
- if (priv->l3num != ctx->family)
- return -EINVAL;
+ if (priv->l3num == ctx->family || ctx->family == NFPROTO_INET)
+ break;
- fallthrough;
- case NFPROTO_INET:
- break;
+ return -EINVAL;
+ case NFPROTO_INET: /* tuple.src.l3num supports NFPROTO_IPV4/6 only */
default:
- return -EOPNOTSUPP;
+ return -EAFNOSUPPORT;
}
priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]);
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 70a59a35d176..5a8521abd8f5 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1980,6 +1980,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
rules_fx = rules_f0;
nft_pipapo_for_each_field(f, i, m) {
+ bool last = i == m->field_count - 1;
+
if (!pipapo_match_field(f, start, rules_fx,
match_start, match_end))
break;
@@ -1992,16 +1994,18 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
- }
- if (i == m->field_count) {
- priv->dirty = true;
- pipapo_drop(m, rulemap);
- return;
+ if (last && f->mt[rulemap[i].to].e == e) {
+ priv->dirty = true;
+ pipapo_drop(m, rulemap);
+ return;
+ }
}
first_rule += rules_f0;
}
+
+ WARN_ON_ONCE(1); /* elem_priv not found */
}
/**
@@ -2226,8 +2230,6 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
if (m) {
rcu_barrier();
- nft_set_pipapo_match_destroy(ctx, set, m);
-
for_each_possible_cpu(cpu)
pipapo_free_scratch(m, cpu);
free_percpu(m->scratch);
@@ -2239,8 +2241,7 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
if (priv->clone) {
m = priv->clone;
- if (priv->dirty)
- nft_set_pipapo_match_destroy(ctx, set, m);
+ nft_set_pipapo_match_destroy(ctx, set, m);
for_each_possible_cpu(cpu)
pipapo_free_scratch(priv->clone, cpu);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 359f07a53ecc..a2b14434d7aa 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -157,7 +157,7 @@ static inline u32 netlink_group_mask(u32 group)
static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
gfp_t gfp_mask)
{
- unsigned int len = skb_end_offset(skb);
+ unsigned int len = skb->len;
struct sk_buff *new;
new = alloc_skb(len, gfp_mask);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 24747163122b..37d0bf6cab45 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -453,16 +453,16 @@ static int nr_create(struct net *net, struct socket *sock, int protocol,
nr_init_timers(sk);
nr->t1 =
- msecs_to_jiffies(sysctl_netrom_transport_timeout);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_timeout));
nr->t2 =
- msecs_to_jiffies(sysctl_netrom_transport_acknowledge_delay);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_acknowledge_delay));
nr->n2 =
- msecs_to_jiffies(sysctl_netrom_transport_maximum_tries);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_maximum_tries));
nr->t4 =
- msecs_to_jiffies(sysctl_netrom_transport_busy_delay);
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_busy_delay));
nr->idle =
- msecs_to_jiffies(sysctl_netrom_transport_no_activity_timeout);
- nr->window = sysctl_netrom_transport_requested_window_size;
+ msecs_to_jiffies(READ_ONCE(sysctl_netrom_transport_no_activity_timeout));
+ nr->window = READ_ONCE(sysctl_netrom_transport_requested_window_size);
nr->bpqext = 1;
nr->state = NR_STATE_0;
@@ -954,7 +954,7 @@ int nr_rx_frame(struct sk_buff *skb, struct net_device *dev)
* G8PZT's Xrouter which is sending packets with command type 7
* as an extension of the protocol.
*/
- if (sysctl_netrom_reset_circuit &&
+ if (READ_ONCE(sysctl_netrom_reset_circuit) &&
(frametype != NR_RESET || flags != 0))
nr_transmit_reset(skb, 1);
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 29e418c8c6c3..4caee8754b79 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -81,7 +81,7 @@ static int nr_header(struct sk_buff *skb, struct net_device *dev,
buff[6] |= AX25_SSSID_SPARE;
buff += AX25_ADDR_LEN;
- *buff++ = sysctl_netrom_network_ttl_initialiser;
+ *buff++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
*buff++ = NR_PROTO_IP;
*buff++ = NR_PROTO_IP;
diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c
index 69e58906c32b..034f79d11ae1 100644
--- a/net/netrom/nr_in.c
+++ b/net/netrom/nr_in.c
@@ -97,7 +97,7 @@ static int nr_state1_machine(struct sock *sk, struct sk_buff *skb,
break;
case NR_RESET:
- if (sysctl_netrom_reset_circuit)
+ if (READ_ONCE(sysctl_netrom_reset_circuit))
nr_disconnect(sk, ECONNRESET);
break;
@@ -128,7 +128,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb,
break;
case NR_RESET:
- if (sysctl_netrom_reset_circuit)
+ if (READ_ONCE(sysctl_netrom_reset_circuit))
nr_disconnect(sk, ECONNRESET);
break;
@@ -263,7 +263,7 @@ static int nr_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype
break;
case NR_RESET:
- if (sysctl_netrom_reset_circuit)
+ if (READ_ONCE(sysctl_netrom_reset_circuit))
nr_disconnect(sk, ECONNRESET);
break;
diff --git a/net/netrom/nr_out.c b/net/netrom/nr_out.c
index 44929657f5b7..5e531394a724 100644
--- a/net/netrom/nr_out.c
+++ b/net/netrom/nr_out.c
@@ -204,7 +204,7 @@ void nr_transmit_buffer(struct sock *sk, struct sk_buff *skb)
dptr[6] |= AX25_SSSID_SPARE;
dptr += AX25_ADDR_LEN;
- *dptr++ = sysctl_netrom_network_ttl_initialiser;
+ *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
if (!nr_route_frame(skb, NULL)) {
kfree_skb(skb);
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index 78da5eab252a..895702337c92 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -153,7 +153,7 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic,
nr_neigh->digipeat = NULL;
nr_neigh->ax25 = NULL;
nr_neigh->dev = dev;
- nr_neigh->quality = sysctl_netrom_default_path_quality;
+ nr_neigh->quality = READ_ONCE(sysctl_netrom_default_path_quality);
nr_neigh->locked = 0;
nr_neigh->count = 0;
nr_neigh->number = nr_neigh_no++;
@@ -725,7 +725,7 @@ void nr_link_failed(ax25_cb *ax25, int reason)
nr_neigh->ax25 = NULL;
ax25_cb_put(ax25);
- if (++nr_neigh->failed < sysctl_netrom_link_fails_count) {
+ if (++nr_neigh->failed < READ_ONCE(sysctl_netrom_link_fails_count)) {
nr_neigh_put(nr_neigh);
return;
}
@@ -763,7 +763,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25)
if (ax25 != NULL) {
ret = nr_add_node(nr_src, "", &ax25->dest_addr, ax25->digipeat,
ax25->ax25_dev->dev, 0,
- sysctl_netrom_obsolescence_count_initialiser);
+ READ_ONCE(sysctl_netrom_obsolescence_count_initialiser));
if (ret)
return ret;
}
@@ -777,7 +777,7 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25)
return ret;
}
- if (!sysctl_netrom_routing_control && ax25 != NULL)
+ if (!READ_ONCE(sysctl_netrom_routing_control) && ax25 != NULL)
return 0;
/* Its Time-To-Live has expired */
diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c
index e2d2af924cff..c3bbd5880850 100644
--- a/net/netrom/nr_subr.c
+++ b/net/netrom/nr_subr.c
@@ -182,7 +182,8 @@ void nr_write_internal(struct sock *sk, int frametype)
*dptr++ = nr->my_id;
*dptr++ = frametype;
*dptr++ = nr->window;
- if (nr->bpqext) *dptr++ = sysctl_netrom_network_ttl_initialiser;
+ if (nr->bpqext)
+ *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
break;
case NR_DISCREQ:
@@ -236,7 +237,7 @@ void __nr_transmit_reply(struct sk_buff *skb, int mine, unsigned char cmdflags)
dptr[6] |= AX25_SSSID_SPARE;
dptr += AX25_ADDR_LEN;
- *dptr++ = sysctl_netrom_network_ttl_initialiser;
+ *dptr++ = READ_ONCE(sysctl_netrom_network_ttl_initialiser);
if (mine) {
*dptr++ = 0;
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 5bfaf06f7be7..d8002065baae 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1502,6 +1502,11 @@ static void nci_rx_work(struct work_struct *work)
nfc_send_to_raw_sock(ndev->nfc_dev, skb,
RAW_PAYLOAD_NCI, NFC_DIRECTION_RX);
+ if (!nci_plen(skb->data)) {
+ kfree_skb(skb);
+ break;
+ }
+
/* Process frame */
switch (nci_mt(skb->data)) {
case NCI_MT_RSP_PKT:
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 0f0f380e81a4..30f5e414018b 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1692,8 +1692,9 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
if (ct_info.timeout[0]) {
if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
ct_info.timeout))
- pr_info_ratelimited("Failed to associated timeout "
- "policy `%s'\n", ct_info.timeout);
+ OVS_NLERR(log,
+ "Failed to associated timeout policy '%s'",
+ ct_info.timeout);
else
ct_info.nf_ct_timeout = rcu_dereference(
nf_ct_timeout_find(ct_info.ct)->timeout);
@@ -1901,9 +1902,9 @@ static void ovs_ct_limit_exit(struct net *net, struct ovs_net *ovs_net)
for (i = 0; i < CT_LIMIT_HASH_BUCKETS; ++i) {
struct hlist_head *head = &info->limits[i];
struct ovs_ct_limit *ct_limit;
+ struct hlist_node *next;
- hlist_for_each_entry_rcu(ct_limit, head, hlist_node,
- lockdep_ovsl_is_held())
+ hlist_for_each_entry_safe(ct_limit, next, head, hlist_node)
kfree_rcu(ct_limit, rcu);
}
kfree(info->limits);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index bd383c739836..04f5fbb13156 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1871,7 +1871,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
*/
spkt->spkt_family = dev->type;
- strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
+ strscpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
spkt->spkt_protocol = skb->protocol;
/*
@@ -3254,7 +3254,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
int addr_len)
{
struct sock *sk = sock->sk;
- char name[sizeof(uaddr->sa_data) + 1];
+ char name[sizeof(uaddr->sa_data_min) + 1];
/*
* Check legality
@@ -3265,8 +3265,8 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
/* uaddr->sa_data comes from the userspace, it's not guaranteed to be
* zero-terminated.
*/
- memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data));
- name[sizeof(uaddr->sa_data)] = 0;
+ memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min));
+ name[sizeof(uaddr->sa_data_min)] = 0;
return packet_do_bind(sk, name, 0, 0);
}
@@ -3540,11 +3540,11 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
return -EOPNOTSUPP;
uaddr->sa_family = AF_PACKET;
- memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
+ memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data_min));
rcu_read_lock();
dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex));
if (dev)
- strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
+ strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data_min));
rcu_read_unlock();
return sizeof(*uaddr);
@@ -3955,7 +3955,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
if (val < 0 || val > 1)
return -EINVAL;
- po->prot_hook.ignore_outgoing = !!val;
+ WRITE_ONCE(po->prot_hook.ignore_outgoing, !!val);
return 0;
}
case PACKET_TX_HAS_OFF:
@@ -4087,7 +4087,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
0);
break;
case PACKET_IGNORE_OUTGOING:
- val = po->prot_hook.ignore_outgoing;
+ val = READ_ONCE(po->prot_hook.ignore_outgoing);
break;
case PACKET_ROLLOVER_STATS:
if (!po->rollover)
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 6f1a50d50d06..3df0affff6b0 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -301,6 +301,9 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
kfree(sg);
}
ret = PTR_ERR(trans_private);
+ /* Trigger connection so that its ready for the next retry */
+ if (ret == -ENODEV && cp)
+ rds_conn_connect_if_down(cp->cp_conn);
goto out;
}
diff --git a/net/rds/send.c b/net/rds/send.c
index 985d0b7713ac..1923eaa91e93 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -103,13 +103,12 @@ EXPORT_SYMBOL_GPL(rds_send_path_reset);
static int acquire_in_xmit(struct rds_conn_path *cp)
{
- return test_and_set_bit(RDS_IN_XMIT, &cp->cp_flags) == 0;
+ return test_and_set_bit_lock(RDS_IN_XMIT, &cp->cp_flags) == 0;
}
static void release_in_xmit(struct rds_conn_path *cp)
{
- clear_bit(RDS_IN_XMIT, &cp->cp_flags);
- smp_mb__after_atomic();
+ clear_bit_unlock(RDS_IN_XMIT, &cp->cp_flags);
/*
* We don't use wait_on_bit()/wake_up_bit() because our waking is in a
* hot path and finding waiters is very rare. We don't want to walk
@@ -1314,12 +1313,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
/* Parse any control messages the user may have included. */
ret = rds_cmsg_send(rs, rm, msg, &allocated_mr, &vct);
- if (ret) {
- /* Trigger connection so that its ready for the next retry */
- if (ret == -EAGAIN)
- rds_conn_connect_if_down(conn);
+ if (ret)
goto out;
- }
if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) {
printk_ratelimited(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2046c16b29f0..e5f7675e587b 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -45,23 +45,6 @@ if NET_SCHED
comment "Queueing/Scheduling"
-config NET_SCH_CBQ
- tristate "Class Based Queueing (CBQ)"
- help
- Say Y here if you want to use the Class-Based Queueing (CBQ) packet
- scheduling algorithm. This algorithm classifies the waiting packets
- into a tree-like hierarchy of classes; the leaves of this tree are
- in turn scheduled by separate algorithms.
-
- See the top of <file:net/sched/sch_cbq.c> for more details.
-
- CBQ is a commonly used scheduler, so if you're unsure, you should
- say Y here. Then say Y to all the queueing algorithms below that you
- want to use as leaf disciplines.
-
- To compile this code as a module, choose M here: the
- module will be called sch_cbq.
-
config NET_SCH_HTB
tristate "Hierarchical Token Bucket (HTB)"
help
@@ -85,20 +68,6 @@ config NET_SCH_HFSC
To compile this code as a module, choose M here: the
module will be called sch_hfsc.
-config NET_SCH_ATM
- tristate "ATM Virtual Circuits (ATM)"
- depends on ATM
- help
- Say Y here if you want to use the ATM pseudo-scheduler. This
- provides a framework for invoking classifiers, which in turn
- select classes of this queuing discipline. Each class maps
- the flow(s) it is handling to a given virtual circuit.
-
- See the top of <file:net/sched/sch_atm.c> for more details.
-
- To compile this code as a module, choose M here: the
- module will be called sch_atm.
-
config NET_SCH_PRIO
tristate "Multi Band Priority Queueing (PRIO)"
help
@@ -217,17 +186,6 @@ config NET_SCH_GRED
To compile this code as a module, choose M here: the
module will be called sch_gred.
-config NET_SCH_DSMARK
- tristate "Differentiated Services marker (DSMARK)"
- help
- Say Y if you want to schedule packets according to the
- Differentiated Services architecture proposed in RFC 2475.
- Technical information on this method, with pointers to associated
- RFCs, is available at <http://www.gta.ufrj.br/diffserv/>.
-
- To compile this code as a module, choose M here: the
- module will be called sch_dsmark.
-
config NET_SCH_NETEM
tristate "Network emulator (NETEM)"
help
diff --git a/net/sched/Makefile b/net/sched/Makefile
index df2bcd785f7d..1b8d0fc6614c 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -32,20 +32,17 @@ obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
obj-$(CONFIG_NET_ACT_CT) += act_ct.o
obj-$(CONFIG_NET_ACT_GATE) += act_gate.o
obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
-obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o
obj-$(CONFIG_NET_SCH_RED) += sch_red.o
obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o
obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o
-obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o
obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o
obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o
-obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index aa98dcac94b9..934765a2edeb 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -219,13 +219,13 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
struct tcf_skbmod *d = to_skbmod(a);
unsigned char *b = skb_tail_pointer(skb);
struct tcf_skbmod_params *p;
- struct tc_skbmod opt = {
- .index = d->tcf_index,
- .refcnt = refcount_read(&d->tcf_refcnt) - ref,
- .bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
- };
+ struct tc_skbmod opt;
struct tcf_t t;
+ memset(&opt, 0, sizeof(opt));
+ opt.index = d->tcf_index;
+ opt.refcnt = refcount_read(&d->tcf_refcnt) - ref,
+ opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind;
spin_lock_bh(&d->tcf_lock);
opt.action = d->tcf_action;
p = rcu_dereference_protected(d->skbmod_p,
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
deleted file mode 100644
index 95967ce1f370..000000000000
--- a/net/sched/sch_atm.c
+++ /dev/null
@@ -1,709 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* net/sched/sch_atm.c - ATM VC selection "queueing discipline" */
-
-/* Written 1998-2000 by Werner Almesberger, EPFL ICA */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <linux/atmdev.h>
-#include <linux/atmclip.h>
-#include <linux/rtnetlink.h>
-#include <linux/file.h> /* for fput */
-#include <net/netlink.h>
-#include <net/pkt_sched.h>
-#include <net/pkt_cls.h>
-
-/*
- * The ATM queuing discipline provides a framework for invoking classifiers
- * (aka "filters"), which in turn select classes of this queuing discipline.
- * Each class maps the flow(s) it is handling to a given VC. Multiple classes
- * may share the same VC.
- *
- * When creating a class, VCs are specified by passing the number of the open
- * socket descriptor by which the calling process references the VC. The kernel
- * keeps the VC open at least until all classes using it are removed.
- *
- * In this file, most functions are named atm_tc_* to avoid confusion with all
- * the atm_* in net/atm. This naming convention differs from what's used in the
- * rest of net/sched.
- *
- * Known bugs:
- * - sometimes messes up the IP stack
- * - any manipulations besides the few operations described in the README, are
- * untested and likely to crash the system
- * - should lock the flow while there is data in the queue (?)
- */
-
-#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back))
-
-struct atm_flow_data {
- struct Qdisc_class_common common;
- struct Qdisc *q; /* FIFO, TBF, etc. */
- struct tcf_proto __rcu *filter_list;
- struct tcf_block *block;
- struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */
- void (*old_pop)(struct atm_vcc *vcc,
- struct sk_buff *skb); /* chaining */
- struct atm_qdisc_data *parent; /* parent qdisc */
- struct socket *sock; /* for closing */
- int ref; /* reference count */
- struct gnet_stats_basic_packed bstats;
- struct gnet_stats_queue qstats;
- struct list_head list;
- struct atm_flow_data *excess; /* flow for excess traffic;
- NULL to set CLP instead */
- int hdr_len;
- unsigned char hdr[]; /* header data; MUST BE LAST */
-};
-
-struct atm_qdisc_data {
- struct atm_flow_data link; /* unclassified skbs go here */
- struct list_head flows; /* NB: "link" is also on this
- list */
- struct tasklet_struct task; /* dequeue tasklet */
-};
-
-/* ------------------------- Class/flow operations ------------------------- */
-
-static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow;
-
- list_for_each_entry(flow, &p->flows, list) {
- if (flow->common.classid == classid)
- return flow;
- }
- return NULL;
-}
-
-static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
- struct Qdisc *new, struct Qdisc **old,
- struct netlink_ext_ack *extack)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow = (struct atm_flow_data *)arg;
-
- pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",
- sch, p, flow, new, old);
- if (list_empty(&flow->list))
- return -EINVAL;
- if (!new)
- new = &noop_qdisc;
- *old = flow->q;
- flow->q = new;
- if (*old)
- qdisc_reset(*old);
- return 0;
-}
-
-static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl)
-{
- struct atm_flow_data *flow = (struct atm_flow_data *)cl;
-
- pr_debug("atm_tc_leaf(sch %p,flow %p)\n", sch, flow);
- return flow ? flow->q : NULL;
-}
-
-static unsigned long atm_tc_find(struct Qdisc *sch, u32 classid)
-{
- struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
- struct atm_flow_data *flow;
-
- pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid);
- flow = lookup_flow(sch, classid);
- pr_debug("%s: flow %p\n", __func__, flow);
- return (unsigned long)flow;
-}
-
-static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
- unsigned long parent, u32 classid)
-{
- struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
- struct atm_flow_data *flow;
-
- pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid);
- flow = lookup_flow(sch, classid);
- if (flow)
- flow->ref++;
- pr_debug("%s: flow %p\n", __func__, flow);
- return (unsigned long)flow;
-}
-
-/*
- * atm_tc_put handles all destructions, including the ones that are explicitly
- * requested (atm_tc_destroy, etc.). The assumption here is that we never drop
- * anything that still seems to be in use.
- */
-static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow = (struct atm_flow_data *)cl;
-
- pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
- if (--flow->ref)
- return;
- pr_debug("atm_tc_put: destroying\n");
- list_del_init(&flow->list);
- pr_debug("atm_tc_put: qdisc %p\n", flow->q);
- qdisc_put(flow->q);
- tcf_block_put(flow->block);
- if (flow->sock) {
- pr_debug("atm_tc_put: f_count %ld\n",
- file_count(flow->sock->file));
- flow->vcc->pop = flow->old_pop;
- sockfd_put(flow->sock);
- }
- if (flow->excess)
- atm_tc_put(sch, (unsigned long)flow->excess);
- if (flow != &p->link)
- kfree(flow);
- /*
- * If flow == &p->link, the qdisc no longer works at this point and
- * needs to be removed. (By the caller of atm_tc_put.)
- */
-}
-
-static void sch_atm_pop(struct atm_vcc *vcc, struct sk_buff *skb)
-{
- struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent;
-
- pr_debug("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p);
- VCC2FLOW(vcc)->old_pop(vcc, skb);
- tasklet_schedule(&p->task);
-}
-
-static const u8 llc_oui_ip[] = {
- 0xaa, /* DSAP: non-ISO */
- 0xaa, /* SSAP: non-ISO */
- 0x03, /* Ctrl: Unnumbered Information Command PDU */
- 0x00, /* OUI: EtherType */
- 0x00, 0x00,
- 0x08, 0x00
-}; /* Ethertype IP (0800) */
-
-static const struct nla_policy atm_policy[TCA_ATM_MAX + 1] = {
- [TCA_ATM_FD] = { .type = NLA_U32 },
- [TCA_ATM_EXCESS] = { .type = NLA_U32 },
-};
-
-static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
- struct nlattr **tca, unsigned long *arg,
- struct netlink_ext_ack *extack)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow = (struct atm_flow_data *)*arg;
- struct atm_flow_data *excess = NULL;
- struct nlattr *opt = tca[TCA_OPTIONS];
- struct nlattr *tb[TCA_ATM_MAX + 1];
- struct socket *sock;
- int fd, error, hdr_len;
- void *hdr;
-
- pr_debug("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x,"
- "flow %p,opt %p)\n", sch, p, classid, parent, flow, opt);
- /*
- * The concept of parents doesn't apply for this qdisc.
- */
- if (parent && parent != TC_H_ROOT && parent != sch->handle)
- return -EINVAL;
- /*
- * ATM classes cannot be changed. In order to change properties of the
- * ATM connection, that socket needs to be modified directly (via the
- * native ATM API. In order to send a flow to a different VC, the old
- * class needs to be removed and a new one added. (This may be changed
- * later.)
- */
- if (flow)
- return -EBUSY;
- if (opt == NULL)
- return -EINVAL;
-
- error = nla_parse_nested_deprecated(tb, TCA_ATM_MAX, opt, atm_policy,
- NULL);
- if (error < 0)
- return error;
-
- if (!tb[TCA_ATM_FD])
- return -EINVAL;
- fd = nla_get_u32(tb[TCA_ATM_FD]);
- pr_debug("atm_tc_change: fd %d\n", fd);
- if (tb[TCA_ATM_HDR]) {
- hdr_len = nla_len(tb[TCA_ATM_HDR]);
- hdr = nla_data(tb[TCA_ATM_HDR]);
- } else {
- hdr_len = RFC1483LLC_LEN;
- hdr = NULL; /* default LLC/SNAP for IP */
- }
- if (!tb[TCA_ATM_EXCESS])
- excess = NULL;
- else {
- excess = (struct atm_flow_data *)
- atm_tc_find(sch, nla_get_u32(tb[TCA_ATM_EXCESS]));
- if (!excess)
- return -ENOENT;
- }
- pr_debug("atm_tc_change: type %d, payload %d, hdr_len %d\n",
- opt->nla_type, nla_len(opt), hdr_len);
- sock = sockfd_lookup(fd, &error);
- if (!sock)
- return error; /* f_count++ */
- pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file));
- if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) {
- error = -EPROTOTYPE;
- goto err_out;
- }
- /* @@@ should check if the socket is really operational or we'll crash
- on vcc->send */
- if (classid) {
- if (TC_H_MAJ(classid ^ sch->handle)) {
- pr_debug("atm_tc_change: classid mismatch\n");
- error = -EINVAL;
- goto err_out;
- }
- } else {
- int i;
- unsigned long cl;
-
- for (i = 1; i < 0x8000; i++) {
- classid = TC_H_MAKE(sch->handle, 0x8000 | i);
- cl = atm_tc_find(sch, classid);
- if (!cl)
- break;
- }
- }
- pr_debug("atm_tc_change: new id %x\n", classid);
- flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL);
- pr_debug("atm_tc_change: flow %p\n", flow);
- if (!flow) {
- error = -ENOBUFS;
- goto err_out;
- }
-
- error = tcf_block_get(&flow->block, &flow->filter_list, sch,
- extack);
- if (error) {
- kfree(flow);
- goto err_out;
- }
-
- flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid,
- extack);
- if (!flow->q)
- flow->q = &noop_qdisc;
- pr_debug("atm_tc_change: qdisc %p\n", flow->q);
- flow->sock = sock;
- flow->vcc = ATM_SD(sock); /* speedup */
- flow->vcc->user_back = flow;
- pr_debug("atm_tc_change: vcc %p\n", flow->vcc);
- flow->old_pop = flow->vcc->pop;
- flow->parent = p;
- flow->vcc->pop = sch_atm_pop;
- flow->common.classid = classid;
- flow->ref = 1;
- flow->excess = excess;
- list_add(&flow->list, &p->link.list);
- flow->hdr_len = hdr_len;
- if (hdr)
- memcpy(flow->hdr, hdr, hdr_len);
- else
- memcpy(flow->hdr, llc_oui_ip, sizeof(llc_oui_ip));
- *arg = (unsigned long)flow;
- return 0;
-err_out:
- sockfd_put(sock);
- return error;
-}
-
-static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow = (struct atm_flow_data *)arg;
-
- pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
- if (list_empty(&flow->list))
- return -EINVAL;
- if (rcu_access_pointer(flow->filter_list) || flow == &p->link)
- return -EBUSY;
- /*
- * Reference count must be 2: one for "keepalive" (set at class
- * creation), and one for the reference held when calling delete.
- */
- if (flow->ref < 2) {
- pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref);
- return -EINVAL;
- }
- if (flow->ref > 2)
- return -EBUSY; /* catch references via excess, etc. */
- atm_tc_put(sch, arg);
- return 0;
-}
-
-static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow;
-
- pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
- if (walker->stop)
- return;
- list_for_each_entry(flow, &p->flows, list) {
- if (walker->count >= walker->skip &&
- walker->fn(sch, (unsigned long)flow, walker) < 0) {
- walker->stop = 1;
- break;
- }
- walker->count++;
- }
-}
-
-static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl,
- struct netlink_ext_ack *extack)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow = (struct atm_flow_data *)cl;
-
- pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
- return flow ? flow->block : p->link.block;
-}
-
-/* --------------------------- Qdisc operations ---------------------------- */
-
-static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
- struct sk_buff **to_free)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow;
- struct tcf_result res;
- int result;
- int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-
- pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
- result = TC_ACT_OK; /* be nice to gcc */
- flow = NULL;
- if (TC_H_MAJ(skb->priority) != sch->handle ||
- !(flow = (struct atm_flow_data *)atm_tc_find(sch, skb->priority))) {
- struct tcf_proto *fl;
-
- list_for_each_entry(flow, &p->flows, list) {
- fl = rcu_dereference_bh(flow->filter_list);
- if (fl) {
- result = tcf_classify(skb, fl, &res, true);
- if (result < 0)
- continue;
- if (result == TC_ACT_SHOT)
- goto done;
-
- flow = (struct atm_flow_data *)res.class;
- if (!flow)
- flow = lookup_flow(sch, res.classid);
- goto drop;
- }
- }
- flow = NULL;
-done:
- ;
- }
- if (!flow) {
- flow = &p->link;
- } else {
- if (flow->vcc)
- ATM_SKB(skb)->atm_options = flow->vcc->atm_options;
- /*@@@ looks good ... but it's not supposed to work :-) */
-#ifdef CONFIG_NET_CLS_ACT
- switch (result) {
- case TC_ACT_QUEUED:
- case TC_ACT_STOLEN:
- case TC_ACT_TRAP:
- __qdisc_drop(skb, to_free);
- return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- case TC_ACT_SHOT:
- __qdisc_drop(skb, to_free);
- goto drop;
- case TC_ACT_RECLASSIFY:
- if (flow->excess)
- flow = flow->excess;
- else
- ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP;
- break;
- }
-#endif
- }
-
- ret = qdisc_enqueue(skb, flow->q, to_free);
- if (ret != NET_XMIT_SUCCESS) {
-drop: __maybe_unused
- if (net_xmit_drop_count(ret)) {
- qdisc_qstats_drop(sch);
- if (flow)
- flow->qstats.drops++;
- }
- return ret;
- }
- /*
- * Okay, this may seem weird. We pretend we've dropped the packet if
- * it goes via ATM. The reason for this is that the outer qdisc
- * expects to be able to q->dequeue the packet later on if we return
- * success at this place. Also, sch->q.qdisc needs to reflect whether
- * there is a packet egligible for dequeuing or not. Note that the
- * statistics of the outer qdisc are necessarily wrong because of all
- * this. There's currently no correct solution for this.
- */
- if (flow == &p->link) {
- sch->q.qlen++;
- return NET_XMIT_SUCCESS;
- }
- tasklet_schedule(&p->task);
- return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-}
-
-/*
- * Dequeue packets and send them over ATM. Note that we quite deliberately
- * avoid checking net_device's flow control here, simply because sch_atm
- * uses its own channels, which have nothing to do with any CLIP/LANE/or
- * non-ATM interfaces.
- */
-
-static void sch_atm_dequeue(unsigned long data)
-{
- struct Qdisc *sch = (struct Qdisc *)data;
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow;
- struct sk_buff *skb;
-
- pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p);
- list_for_each_entry(flow, &p->flows, list) {
- if (flow == &p->link)
- continue;
- /*
- * If traffic is properly shaped, this won't generate nasty
- * little bursts. Otherwise, it may ... (but that's okay)
- */
- while ((skb = flow->q->ops->peek(flow->q))) {
- if (!atm_may_send(flow->vcc, skb->truesize))
- break;
-
- skb = qdisc_dequeue_peeked(flow->q);
- if (unlikely(!skb))
- break;
-
- qdisc_bstats_update(sch, skb);
- bstats_update(&flow->bstats, skb);
- pr_debug("atm_tc_dequeue: sending on class %p\n", flow);
- /* remove any LL header somebody else has attached */
- skb_pull(skb, skb_network_offset(skb));
- if (skb_headroom(skb) < flow->hdr_len) {
- struct sk_buff *new;
-
- new = skb_realloc_headroom(skb, flow->hdr_len);
- dev_kfree_skb(skb);
- if (!new)
- continue;
- skb = new;
- }
- pr_debug("sch_atm_dequeue: ip %p, data %p\n",
- skb_network_header(skb), skb->data);
- ATM_SKB(skb)->vcc = flow->vcc;
- memcpy(skb_push(skb, flow->hdr_len), flow->hdr,
- flow->hdr_len);
- refcount_add(skb->truesize,
- &sk_atm(flow->vcc)->sk_wmem_alloc);
- /* atm.atm_options are already set by atm_tc_enqueue */
- flow->vcc->send(flow->vcc, skb);
- }
- }
-}
-
-static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct sk_buff *skb;
-
- pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p);
- tasklet_schedule(&p->task);
- skb = qdisc_dequeue_peeked(p->link.q);
- if (skb)
- sch->q.qlen--;
- return skb;
-}
-
-static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
-
- pr_debug("atm_tc_peek(sch %p,[qdisc %p])\n", sch, p);
-
- return p->link.q->ops->peek(p->link.q);
-}
-
-static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt,
- struct netlink_ext_ack *extack)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- int err;
-
- pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
- INIT_LIST_HEAD(&p->flows);
- INIT_LIST_HEAD(&p->link.list);
- list_add(&p->link.list, &p->flows);
- p->link.q = qdisc_create_dflt(sch->dev_queue,
- &pfifo_qdisc_ops, sch->handle, extack);
- if (!p->link.q)
- p->link.q = &noop_qdisc;
- pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
- p->link.vcc = NULL;
- p->link.sock = NULL;
- p->link.common.classid = sch->handle;
- p->link.ref = 1;
-
- err = tcf_block_get(&p->link.block, &p->link.filter_list, sch,
- extack);
- if (err)
- return err;
-
- tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch);
- return 0;
-}
-
-static void atm_tc_reset(struct Qdisc *sch)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow;
-
- pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
- list_for_each_entry(flow, &p->flows, list)
- qdisc_reset(flow->q);
-}
-
-static void atm_tc_destroy(struct Qdisc *sch)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow, *tmp;
-
- pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
- list_for_each_entry(flow, &p->flows, list) {
- tcf_block_put(flow->block);
- flow->block = NULL;
- }
-
- list_for_each_entry_safe(flow, tmp, &p->flows, list) {
- if (flow->ref > 1)
- pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref);
- atm_tc_put(sch, (unsigned long)flow);
- }
- tasklet_kill(&p->task);
-}
-
-static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
- struct sk_buff *skb, struct tcmsg *tcm)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow = (struct atm_flow_data *)cl;
- struct nlattr *nest;
-
- pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
- sch, p, flow, skb, tcm);
- if (list_empty(&flow->list))
- return -EINVAL;
- tcm->tcm_handle = flow->common.classid;
- tcm->tcm_info = flow->q->handle;
-
- nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
-
- if (nla_put(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr))
- goto nla_put_failure;
- if (flow->vcc) {
- struct sockaddr_atmpvc pvc;
- int state;
-
- memset(&pvc, 0, sizeof(pvc));
- pvc.sap_family = AF_ATMPVC;
- pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1;
- pvc.sap_addr.vpi = flow->vcc->vpi;
- pvc.sap_addr.vci = flow->vcc->vci;
- if (nla_put(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc))
- goto nla_put_failure;
- state = ATM_VF2VS(flow->vcc->flags);
- if (nla_put_u32(skb, TCA_ATM_STATE, state))
- goto nla_put_failure;
- }
- if (flow->excess) {
- if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->common.classid))
- goto nla_put_failure;
- } else {
- if (nla_put_u32(skb, TCA_ATM_EXCESS, 0))
- goto nla_put_failure;
- }
- return nla_nest_end(skb, nest);
-
-nla_put_failure:
- nla_nest_cancel(skb, nest);
- return -1;
-}
-static int
-atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
- struct gnet_dump *d)
-{
- struct atm_flow_data *flow = (struct atm_flow_data *)arg;
-
- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
- d, NULL, &flow->bstats) < 0 ||
- gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0)
- return -1;
-
- return 0;
-}
-
-static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb)
-{
- return 0;
-}
-
-static const struct Qdisc_class_ops atm_class_ops = {
- .graft = atm_tc_graft,
- .leaf = atm_tc_leaf,
- .find = atm_tc_find,
- .change = atm_tc_change,
- .delete = atm_tc_delete,
- .walk = atm_tc_walk,
- .tcf_block = atm_tc_tcf_block,
- .bind_tcf = atm_tc_bind_filter,
- .unbind_tcf = atm_tc_put,
- .dump = atm_tc_dump_class,
- .dump_stats = atm_tc_dump_class_stats,
-};
-
-static struct Qdisc_ops atm_qdisc_ops __read_mostly = {
- .cl_ops = &atm_class_ops,
- .id = "atm",
- .priv_size = sizeof(struct atm_qdisc_data),
- .enqueue = atm_tc_enqueue,
- .dequeue = atm_tc_dequeue,
- .peek = atm_tc_peek,
- .init = atm_tc_init,
- .reset = atm_tc_reset,
- .destroy = atm_tc_destroy,
- .dump = atm_tc_dump,
- .owner = THIS_MODULE,
-};
-
-static int __init atm_init(void)
-{
- return register_qdisc(&atm_qdisc_ops);
-}
-
-static void __exit atm_exit(void)
-{
- unregister_qdisc(&atm_qdisc_ops);
-}
-
-module_init(atm_init)
-module_exit(atm_exit)
-MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
deleted file mode 100644
index 3da5eb313c24..000000000000
--- a/net/sched/sch_cbq.c
+++ /dev/null
@@ -1,1816 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/sched/sch_cbq.c Class-Based Queueing discipline.
- *
- * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <net/netlink.h>
-#include <net/pkt_sched.h>
-#include <net/pkt_cls.h>
-
-
-/* Class-Based Queueing (CBQ) algorithm.
- =======================================
-
- Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
- Management Models for Packet Networks",
- IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
-
- [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
-
- [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
- Parameters", 1996
-
- [4] Sally Floyd and Michael Speer, "Experimental Results
- for Class-Based Queueing", 1998, not published.
-
- -----------------------------------------------------------------------
-
- Algorithm skeleton was taken from NS simulator cbq.cc.
- If someone wants to check this code against the LBL version,
- he should take into account that ONLY the skeleton was borrowed,
- the implementation is different. Particularly:
-
- --- The WRR algorithm is different. Our version looks more
- reasonable (I hope) and works when quanta are allowed to be
- less than MTU, which is always the case when real time classes
- have small rates. Note, that the statement of [3] is
- incomplete, delay may actually be estimated even if class
- per-round allotment is less than MTU. Namely, if per-round
- allotment is W*r_i, and r_1+...+r_k = r < 1
-
- delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
-
- In the worst case we have IntServ estimate with D = W*r+k*MTU
- and C = MTU*r. The proof (if correct at all) is trivial.
-
-
- --- It seems that cbq-2.0 is not very accurate. At least, I cannot
- interpret some places, which look like wrong translations
- from NS. Anyone is advised to find these differences
- and explain to me, why I am wrong 8).
-
- --- Linux has no EOI event, so that we cannot estimate true class
- idle time. Workaround is to consider the next dequeue event
- as sign that previous packet is finished. This is wrong because of
- internal device queueing, but on a permanently loaded link it is true.
- Moreover, combined with clock integrator, this scheme looks
- very close to an ideal solution. */
-
-struct cbq_sched_data;
-
-
-struct cbq_class {
- struct Qdisc_class_common common;
- struct cbq_class *next_alive; /* next class with backlog in this priority band */
-
-/* Parameters */
- unsigned char priority; /* class priority */
- unsigned char priority2; /* priority to be used after overlimit */
- unsigned char ewma_log; /* time constant for idle time calculation */
-
- u32 defmap;
-
- /* Link-sharing scheduler parameters */
- long maxidle; /* Class parameters: see below. */
- long offtime;
- long minidle;
- u32 avpkt;
- struct qdisc_rate_table *R_tab;
-
- /* General scheduler (WRR) parameters */
- long allot;
- long quantum; /* Allotment per WRR round */
- long weight; /* Relative allotment: see below */
-
- struct Qdisc *qdisc; /* Ptr to CBQ discipline */
- struct cbq_class *split; /* Ptr to split node */
- struct cbq_class *share; /* Ptr to LS parent in the class tree */
- struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
- struct cbq_class *borrow; /* NULL if class is bandwidth limited;
- parent otherwise */
- struct cbq_class *sibling; /* Sibling chain */
- struct cbq_class *children; /* Pointer to children chain */
-
- struct Qdisc *q; /* Elementary queueing discipline */
-
-
-/* Variables */
- unsigned char cpriority; /* Effective priority */
- unsigned char delayed;
- unsigned char level; /* level of the class in hierarchy:
- 0 for leaf classes, and maximal
- level of children + 1 for nodes.
- */
-
- psched_time_t last; /* Last end of service */
- psched_time_t undertime;
- long avgidle;
- long deficit; /* Saved deficit for WRR */
- psched_time_t penalized;
- struct gnet_stats_basic_packed bstats;
- struct gnet_stats_queue qstats;
- struct net_rate_estimator __rcu *rate_est;
- struct tc_cbq_xstats xstats;
-
- struct tcf_proto __rcu *filter_list;
- struct tcf_block *block;
-
- int filters;
-
- struct cbq_class *defaults[TC_PRIO_MAX + 1];
-};
-
-struct cbq_sched_data {
- struct Qdisc_class_hash clhash; /* Hash table of all classes */
- int nclasses[TC_CBQ_MAXPRIO + 1];
- unsigned int quanta[TC_CBQ_MAXPRIO + 1];
-
- struct cbq_class link;
-
- unsigned int activemask;
- struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes
- with backlog */
-
-#ifdef CONFIG_NET_CLS_ACT
- struct cbq_class *rx_class;
-#endif
- struct cbq_class *tx_class;
- struct cbq_class *tx_borrowed;
- int tx_len;
- psched_time_t now; /* Cached timestamp */
- unsigned int pmask;
-
- struct hrtimer delay_timer;
- struct qdisc_watchdog watchdog; /* Watchdog timer,
- started when CBQ has
- backlog, but cannot
- transmit just now */
- psched_tdiff_t wd_expires;
- int toplevel;
- u32 hgenerator;
-};
-
-
-#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len)
-
-static inline struct cbq_class *
-cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
-{
- struct Qdisc_class_common *clc;
-
- clc = qdisc_class_find(&q->clhash, classid);
- if (clc == NULL)
- return NULL;
- return container_of(clc, struct cbq_class, common);
-}
-
-#ifdef CONFIG_NET_CLS_ACT
-
-static struct cbq_class *
-cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
-{
- struct cbq_class *cl;
-
- for (cl = this->tparent; cl; cl = cl->tparent) {
- struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
-
- if (new != NULL && new != this)
- return new;
- }
- return NULL;
-}
-
-#endif
-
-/* Classify packet. The procedure is pretty complicated, but
- * it allows us to combine link sharing and priority scheduling
- * transparently.
- *
- * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
- * so that it resolves to split nodes. Then packets are classified
- * by logical priority, or a more specific classifier may be attached
- * to the split node.
- */
-
-static struct cbq_class *
-cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *head = &q->link;
- struct cbq_class **defmap;
- struct cbq_class *cl = NULL;
- u32 prio = skb->priority;
- struct tcf_proto *fl;
- struct tcf_result res;
-
- /*
- * Step 1. If skb->priority points to one of our classes, use it.
- */
- if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
- (cl = cbq_class_lookup(q, prio)) != NULL)
- return cl;
-
- *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- for (;;) {
- int result = 0;
- defmap = head->defaults;
-
- fl = rcu_dereference_bh(head->filter_list);
- /*
- * Step 2+n. Apply classifier.
- */
- result = tcf_classify(skb, fl, &res, true);
- if (!fl || result < 0)
- goto fallback;
- if (result == TC_ACT_SHOT)
- return NULL;
-
- cl = (void *)res.class;
- if (!cl) {
- if (TC_H_MAJ(res.classid))
- cl = cbq_class_lookup(q, res.classid);
- else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
- cl = defmap[TC_PRIO_BESTEFFORT];
-
- if (cl == NULL)
- goto fallback;
- }
- if (cl->level >= head->level)
- goto fallback;
-#ifdef CONFIG_NET_CLS_ACT
- switch (result) {
- case TC_ACT_QUEUED:
- case TC_ACT_STOLEN:
- case TC_ACT_TRAP:
- *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- fallthrough;
- case TC_ACT_RECLASSIFY:
- return cbq_reclassify(skb, cl);
- }
-#endif
- if (cl->level == 0)
- return cl;
-
- /*
- * Step 3+n. If classifier selected a link sharing class,
- * apply agency specific classifier.
- * Repeat this procdure until we hit a leaf node.
- */
- head = cl;
- }
-
-fallback:
- cl = head;
-
- /*
- * Step 4. No success...
- */
- if (TC_H_MAJ(prio) == 0 &&
- !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
- !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
- return head;
-
- return cl;
-}
-
-/*
- * A packet has just been enqueued on the empty class.
- * cbq_activate_class adds it to the tail of active class list
- * of its priority band.
- */
-
-static inline void cbq_activate_class(struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- int prio = cl->cpriority;
- struct cbq_class *cl_tail;
-
- cl_tail = q->active[prio];
- q->active[prio] = cl;
-
- if (cl_tail != NULL) {
- cl->next_alive = cl_tail->next_alive;
- cl_tail->next_alive = cl;
- } else {
- cl->next_alive = cl;
- q->activemask |= (1<<prio);
- }
-}
-
-/*
- * Unlink class from active chain.
- * Note that this same procedure is done directly in cbq_dequeue*
- * during round-robin procedure.
- */
-
-static void cbq_deactivate_class(struct cbq_class *this)
-{
- struct cbq_sched_data *q = qdisc_priv(this->qdisc);
- int prio = this->cpriority;
- struct cbq_class *cl;
- struct cbq_class *cl_prev = q->active[prio];
-
- do {
- cl = cl_prev->next_alive;
- if (cl == this) {
- cl_prev->next_alive = cl->next_alive;
- cl->next_alive = NULL;
-
- if (cl == q->active[prio]) {
- q->active[prio] = cl_prev;
- if (cl == q->active[prio]) {
- q->active[prio] = NULL;
- q->activemask &= ~(1<<prio);
- return;
- }
- }
- return;
- }
- } while ((cl_prev = cl) != q->active[prio]);
-}
-
-static void
-cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
-{
- int toplevel = q->toplevel;
-
- if (toplevel > cl->level) {
- psched_time_t now = psched_get_time();
-
- do {
- if (cl->undertime < now) {
- q->toplevel = cl->level;
- return;
- }
- } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
- }
-}
-
-static int
-cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
- struct sk_buff **to_free)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- int ret;
- struct cbq_class *cl = cbq_classify(skb, sch, &ret);
-
-#ifdef CONFIG_NET_CLS_ACT
- q->rx_class = cl;
-#endif
- if (cl == NULL) {
- if (ret & __NET_XMIT_BYPASS)
- qdisc_qstats_drop(sch);
- __qdisc_drop(skb, to_free);
- return ret;
- }
-
- ret = qdisc_enqueue(skb, cl->q, to_free);
- if (ret == NET_XMIT_SUCCESS) {
- sch->q.qlen++;
- cbq_mark_toplevel(q, cl);
- if (!cl->next_alive)
- cbq_activate_class(cl);
- return ret;
- }
-
- if (net_xmit_drop_count(ret)) {
- qdisc_qstats_drop(sch);
- cbq_mark_toplevel(q, cl);
- cl->qstats.drops++;
- }
- return ret;
-}
-
-/* Overlimit action: penalize leaf class by adding offtime */
-static void cbq_overlimit(struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- psched_tdiff_t delay = cl->undertime - q->now;
-
- if (!cl->delayed) {
- delay += cl->offtime;
-
- /*
- * Class goes to sleep, so that it will have no
- * chance to work avgidle. Let's forgive it 8)
- *
- * BTW cbq-2.0 has a crap in this
- * place, apparently they forgot to shift it by cl->ewma_log.
- */
- if (cl->avgidle < 0)
- delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
- if (cl->avgidle < cl->minidle)
- cl->avgidle = cl->minidle;
- if (delay <= 0)
- delay = 1;
- cl->undertime = q->now + delay;
-
- cl->xstats.overactions++;
- cl->delayed = 1;
- }
- if (q->wd_expires == 0 || q->wd_expires > delay)
- q->wd_expires = delay;
-
- /* Dirty work! We must schedule wakeups based on
- * real available rate, rather than leaf rate,
- * which may be tiny (even zero).
- */
- if (q->toplevel == TC_CBQ_MAXLEVEL) {
- struct cbq_class *b;
- psched_tdiff_t base_delay = q->wd_expires;
-
- for (b = cl->borrow; b; b = b->borrow) {
- delay = b->undertime - q->now;
- if (delay < base_delay) {
- if (delay <= 0)
- delay = 1;
- base_delay = delay;
- }
- }
-
- q->wd_expires = base_delay;
- }
-}
-
-static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
- psched_time_t now)
-{
- struct cbq_class *cl;
- struct cbq_class *cl_prev = q->active[prio];
- psched_time_t sched = now;
-
- if (cl_prev == NULL)
- return 0;
-
- do {
- cl = cl_prev->next_alive;
- if (now - cl->penalized > 0) {
- cl_prev->next_alive = cl->next_alive;
- cl->next_alive = NULL;
- cl->cpriority = cl->priority;
- cl->delayed = 0;
- cbq_activate_class(cl);
-
- if (cl == q->active[prio]) {
- q->active[prio] = cl_prev;
- if (cl == q->active[prio]) {
- q->active[prio] = NULL;
- return 0;
- }
- }
-
- cl = cl_prev->next_alive;
- } else if (sched - cl->penalized > 0)
- sched = cl->penalized;
- } while ((cl_prev = cl) != q->active[prio]);
-
- return sched - now;
-}
-
-static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
-{
- struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
- delay_timer);
- struct Qdisc *sch = q->watchdog.qdisc;
- psched_time_t now;
- psched_tdiff_t delay = 0;
- unsigned int pmask;
-
- now = psched_get_time();
-
- pmask = q->pmask;
- q->pmask = 0;
-
- while (pmask) {
- int prio = ffz(~pmask);
- psched_tdiff_t tmp;
-
- pmask &= ~(1<<prio);
-
- tmp = cbq_undelay_prio(q, prio, now);
- if (tmp > 0) {
- q->pmask |= 1<<prio;
- if (tmp < delay || delay == 0)
- delay = tmp;
- }
- }
-
- if (delay) {
- ktime_t time;
-
- time = 0;
- time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
- hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED);
- }
-
- __netif_schedule(qdisc_root(sch));
- return HRTIMER_NORESTART;
-}
-
-/*
- * It is mission critical procedure.
- *
- * We "regenerate" toplevel cutoff, if transmitting class
- * has backlog and it is not regulated. It is not part of
- * original CBQ description, but looks more reasonable.
- * Probably, it is wrong. This question needs further investigation.
- */
-
-static inline void
-cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
- struct cbq_class *borrowed)
-{
- if (cl && q->toplevel >= borrowed->level) {
- if (cl->q->q.qlen > 1) {
- do {
- if (borrowed->undertime == PSCHED_PASTPERFECT) {
- q->toplevel = borrowed->level;
- return;
- }
- } while ((borrowed = borrowed->borrow) != NULL);
- }
-#if 0
- /* It is not necessary now. Uncommenting it
- will save CPU cycles, but decrease fairness.
- */
- q->toplevel = TC_CBQ_MAXLEVEL;
-#endif
- }
-}
-
-static void
-cbq_update(struct cbq_sched_data *q)
-{
- struct cbq_class *this = q->tx_class;
- struct cbq_class *cl = this;
- int len = q->tx_len;
- psched_time_t now;
-
- q->tx_class = NULL;
- /* Time integrator. We calculate EOS time
- * by adding expected packet transmission time.
- */
- now = q->now + L2T(&q->link, len);
-
- for ( ; cl; cl = cl->share) {
- long avgidle = cl->avgidle;
- long idle;
-
- cl->bstats.packets++;
- cl->bstats.bytes += len;
-
- /*
- * (now - last) is total time between packet right edges.
- * (last_pktlen/rate) is "virtual" busy time, so that
- *
- * idle = (now - last) - last_pktlen/rate
- */
-
- idle = now - cl->last;
- if ((unsigned long)idle > 128*1024*1024) {
- avgidle = cl->maxidle;
- } else {
- idle -= L2T(cl, len);
-
- /* true_avgidle := (1-W)*true_avgidle + W*idle,
- * where W=2^{-ewma_log}. But cl->avgidle is scaled:
- * cl->avgidle == true_avgidle/W,
- * hence:
- */
- avgidle += idle - (avgidle>>cl->ewma_log);
- }
-
- if (avgidle <= 0) {
- /* Overlimit or at-limit */
-
- if (avgidle < cl->minidle)
- avgidle = cl->minidle;
-
- cl->avgidle = avgidle;
-
- /* Calculate expected time, when this class
- * will be allowed to send.
- * It will occur, when:
- * (1-W)*true_avgidle + W*delay = 0, i.e.
- * idle = (1/W - 1)*(-true_avgidle)
- * or
- * idle = (1 - W)*(-cl->avgidle);
- */
- idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
-
- /*
- * That is not all.
- * To maintain the rate allocated to the class,
- * we add to undertime virtual clock,
- * necessary to complete transmitted packet.
- * (len/phys_bandwidth has been already passed
- * to the moment of cbq_update)
- */
-
- idle -= L2T(&q->link, len);
- idle += L2T(cl, len);
-
- cl->undertime = now + idle;
- } else {
- /* Underlimit */
-
- cl->undertime = PSCHED_PASTPERFECT;
- if (avgidle > cl->maxidle)
- cl->avgidle = cl->maxidle;
- else
- cl->avgidle = avgidle;
- }
- if ((s64)(now - cl->last) > 0)
- cl->last = now;
- }
-
- cbq_update_toplevel(q, this, q->tx_borrowed);
-}
-
-static inline struct cbq_class *
-cbq_under_limit(struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- struct cbq_class *this_cl = cl;
-
- if (cl->tparent == NULL)
- return cl;
-
- if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
- cl->delayed = 0;
- return cl;
- }
-
- do {
- /* It is very suspicious place. Now overlimit
- * action is generated for not bounded classes
- * only if link is completely congested.
- * Though it is in agree with ancestor-only paradigm,
- * it looks very stupid. Particularly,
- * it means that this chunk of code will either
- * never be called or result in strong amplification
- * of burstiness. Dangerous, silly, and, however,
- * no another solution exists.
- */
- cl = cl->borrow;
- if (!cl) {
- this_cl->qstats.overlimits++;
- cbq_overlimit(this_cl);
- return NULL;
- }
- if (cl->level > q->toplevel)
- return NULL;
- } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
-
- cl->delayed = 0;
- return cl;
-}
-
-static inline struct sk_buff *
-cbq_dequeue_prio(struct Qdisc *sch, int prio)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl_tail, *cl_prev, *cl;
- struct sk_buff *skb;
- int deficit;
-
- cl_tail = cl_prev = q->active[prio];
- cl = cl_prev->next_alive;
-
- do {
- deficit = 0;
-
- /* Start round */
- do {
- struct cbq_class *borrow = cl;
-
- if (cl->q->q.qlen &&
- (borrow = cbq_under_limit(cl)) == NULL)
- goto skip_class;
-
- if (cl->deficit <= 0) {
- /* Class exhausted its allotment per
- * this round. Switch to the next one.
- */
- deficit = 1;
- cl->deficit += cl->quantum;
- goto next_class;
- }
-
- skb = cl->q->dequeue(cl->q);
-
- /* Class did not give us any skb :-(
- * It could occur even if cl->q->q.qlen != 0
- * f.e. if cl->q == "tbf"
- */
- if (skb == NULL)
- goto skip_class;
-
- cl->deficit -= qdisc_pkt_len(skb);
- q->tx_class = cl;
- q->tx_borrowed = borrow;
- if (borrow != cl) {
-#ifndef CBQ_XSTATS_BORROWS_BYTES
- borrow->xstats.borrows++;
- cl->xstats.borrows++;
-#else
- borrow->xstats.borrows += qdisc_pkt_len(skb);
- cl->xstats.borrows += qdisc_pkt_len(skb);
-#endif
- }
- q->tx_len = qdisc_pkt_len(skb);
-
- if (cl->deficit <= 0) {
- q->active[prio] = cl;
- cl = cl->next_alive;
- cl->deficit += cl->quantum;
- }
- return skb;
-
-skip_class:
- if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
- /* Class is empty or penalized.
- * Unlink it from active chain.
- */
- cl_prev->next_alive = cl->next_alive;
- cl->next_alive = NULL;
-
- /* Did cl_tail point to it? */
- if (cl == cl_tail) {
- /* Repair it! */
- cl_tail = cl_prev;
-
- /* Was it the last class in this band? */
- if (cl == cl_tail) {
- /* Kill the band! */
- q->active[prio] = NULL;
- q->activemask &= ~(1<<prio);
- if (cl->q->q.qlen)
- cbq_activate_class(cl);
- return NULL;
- }
-
- q->active[prio] = cl_tail;
- }
- if (cl->q->q.qlen)
- cbq_activate_class(cl);
-
- cl = cl_prev;
- }
-
-next_class:
- cl_prev = cl;
- cl = cl->next_alive;
- } while (cl_prev != cl_tail);
- } while (deficit);
-
- q->active[prio] = cl_prev;
-
- return NULL;
-}
-
-static inline struct sk_buff *
-cbq_dequeue_1(struct Qdisc *sch)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct sk_buff *skb;
- unsigned int activemask;
-
- activemask = q->activemask & 0xFF;
- while (activemask) {
- int prio = ffz(~activemask);
- activemask &= ~(1<<prio);
- skb = cbq_dequeue_prio(sch, prio);
- if (skb)
- return skb;
- }
- return NULL;
-}
-
-static struct sk_buff *
-cbq_dequeue(struct Qdisc *sch)
-{
- struct sk_buff *skb;
- struct cbq_sched_data *q = qdisc_priv(sch);
- psched_time_t now;
-
- now = psched_get_time();
-
- if (q->tx_class)
- cbq_update(q);
-
- q->now = now;
-
- for (;;) {
- q->wd_expires = 0;
-
- skb = cbq_dequeue_1(sch);
- if (skb) {
- qdisc_bstats_update(sch, skb);
- sch->q.qlen--;
- return skb;
- }
-
- /* All the classes are overlimit.
- *
- * It is possible, if:
- *
- * 1. Scheduler is empty.
- * 2. Toplevel cutoff inhibited borrowing.
- * 3. Root class is overlimit.
- *
- * Reset 2d and 3d conditions and retry.
- *
- * Note, that NS and cbq-2.0 are buggy, peeking
- * an arbitrary class is appropriate for ancestor-only
- * sharing, but not for toplevel algorithm.
- *
- * Our version is better, but slower, because it requires
- * two passes, but it is unavoidable with top-level sharing.
- */
-
- if (q->toplevel == TC_CBQ_MAXLEVEL &&
- q->link.undertime == PSCHED_PASTPERFECT)
- break;
-
- q->toplevel = TC_CBQ_MAXLEVEL;
- q->link.undertime = PSCHED_PASTPERFECT;
- }
-
- /* No packets in scheduler or nobody wants to give them to us :-(
- * Sigh... start watchdog timer in the last case.
- */
-
- if (sch->q.qlen) {
- qdisc_qstats_overlimit(sch);
- if (q->wd_expires)
- qdisc_watchdog_schedule(&q->watchdog,
- now + q->wd_expires);
- }
- return NULL;
-}
-
-/* CBQ class maintanance routines */
-
-static void cbq_adjust_levels(struct cbq_class *this)
-{
- if (this == NULL)
- return;
-
- do {
- int level = 0;
- struct cbq_class *cl;
-
- cl = this->children;
- if (cl) {
- do {
- if (cl->level > level)
- level = cl->level;
- } while ((cl = cl->sibling) != this->children);
- }
- this->level = level + 1;
- } while ((this = this->tparent) != NULL);
-}
-
-static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
-{
- struct cbq_class *cl;
- unsigned int h;
-
- if (q->quanta[prio] == 0)
- return;
-
- for (h = 0; h < q->clhash.hashsize; h++) {
- hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
- /* BUGGGG... Beware! This expression suffer of
- * arithmetic overflows!
- */
- if (cl->priority == prio) {
- cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
- q->quanta[prio];
- }
- if (cl->quantum <= 0 ||
- cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) {
- pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
- cl->common.classid, cl->quantum);
- cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
- }
- }
- }
-}
-
-static void cbq_sync_defmap(struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- struct cbq_class *split = cl->split;
- unsigned int h;
- int i;
-
- if (split == NULL)
- return;
-
- for (i = 0; i <= TC_PRIO_MAX; i++) {
- if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
- split->defaults[i] = NULL;
- }
-
- for (i = 0; i <= TC_PRIO_MAX; i++) {
- int level = split->level;
-
- if (split->defaults[i])
- continue;
-
- for (h = 0; h < q->clhash.hashsize; h++) {
- struct cbq_class *c;
-
- hlist_for_each_entry(c, &q->clhash.hash[h],
- common.hnode) {
- if (c->split == split && c->level < level &&
- c->defmap & (1<<i)) {
- split->defaults[i] = c;
- level = c->level;
- }
- }
- }
- }
-}
-
-static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
-{
- struct cbq_class *split = NULL;
-
- if (splitid == 0) {
- split = cl->split;
- if (!split)
- return;
- splitid = split->common.classid;
- }
-
- if (split == NULL || split->common.classid != splitid) {
- for (split = cl->tparent; split; split = split->tparent)
- if (split->common.classid == splitid)
- break;
- }
-
- if (split == NULL)
- return;
-
- if (cl->split != split) {
- cl->defmap = 0;
- cbq_sync_defmap(cl);
- cl->split = split;
- cl->defmap = def & mask;
- } else
- cl->defmap = (cl->defmap & ~mask) | (def & mask);
-
- cbq_sync_defmap(cl);
-}
-
-static void cbq_unlink_class(struct cbq_class *this)
-{
- struct cbq_class *cl, **clp;
- struct cbq_sched_data *q = qdisc_priv(this->qdisc);
-
- qdisc_class_hash_remove(&q->clhash, &this->common);
-
- if (this->tparent) {
- clp = &this->sibling;
- cl = *clp;
- do {
- if (cl == this) {
- *clp = cl->sibling;
- break;
- }
- clp = &cl->sibling;
- } while ((cl = *clp) != this->sibling);
-
- if (this->tparent->children == this) {
- this->tparent->children = this->sibling;
- if (this->sibling == this)
- this->tparent->children = NULL;
- }
- } else {
- WARN_ON(this->sibling != this);
- }
-}
-
-static void cbq_link_class(struct cbq_class *this)
-{
- struct cbq_sched_data *q = qdisc_priv(this->qdisc);
- struct cbq_class *parent = this->tparent;
-
- this->sibling = this;
- qdisc_class_hash_insert(&q->clhash, &this->common);
-
- if (parent == NULL)
- return;
-
- if (parent->children == NULL) {
- parent->children = this;
- } else {
- this->sibling = parent->children->sibling;
- parent->children->sibling = this;
- }
-}
-
-static void
-cbq_reset(struct Qdisc *sch)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl;
- int prio;
- unsigned int h;
-
- q->activemask = 0;
- q->pmask = 0;
- q->tx_class = NULL;
- q->tx_borrowed = NULL;
- qdisc_watchdog_cancel(&q->watchdog);
- hrtimer_cancel(&q->delay_timer);
- q->toplevel = TC_CBQ_MAXLEVEL;
- q->now = psched_get_time();
-
- for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
- q->active[prio] = NULL;
-
- for (h = 0; h < q->clhash.hashsize; h++) {
- hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
- qdisc_reset(cl->q);
-
- cl->next_alive = NULL;
- cl->undertime = PSCHED_PASTPERFECT;
- cl->avgidle = cl->maxidle;
- cl->deficit = cl->quantum;
- cl->cpriority = cl->priority;
- }
- }
-}
-
-
-static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
-{
- if (lss->change & TCF_CBQ_LSS_FLAGS) {
- cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
- cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
- }
- if (lss->change & TCF_CBQ_LSS_EWMA)
- cl->ewma_log = lss->ewma_log;
- if (lss->change & TCF_CBQ_LSS_AVPKT)
- cl->avpkt = lss->avpkt;
- if (lss->change & TCF_CBQ_LSS_MINIDLE)
- cl->minidle = -(long)lss->minidle;
- if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
- cl->maxidle = lss->maxidle;
- cl->avgidle = lss->maxidle;
- }
- if (lss->change & TCF_CBQ_LSS_OFFTIME)
- cl->offtime = lss->offtime;
- return 0;
-}
-
-static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
-{
- q->nclasses[cl->priority]--;
- q->quanta[cl->priority] -= cl->weight;
- cbq_normalize_quanta(q, cl->priority);
-}
-
-static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
-{
- q->nclasses[cl->priority]++;
- q->quanta[cl->priority] += cl->weight;
- cbq_normalize_quanta(q, cl->priority);
-}
-
-static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
-
- if (wrr->allot)
- cl->allot = wrr->allot;
- if (wrr->weight)
- cl->weight = wrr->weight;
- if (wrr->priority) {
- cl->priority = wrr->priority - 1;
- cl->cpriority = cl->priority;
- if (cl->priority >= cl->priority2)
- cl->priority2 = TC_CBQ_MAXPRIO - 1;
- }
-
- cbq_addprio(q, cl);
- return 0;
-}
-
-static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
-{
- cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
- return 0;
-}
-
-static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
- [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
- [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
- [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
- [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
- [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
- [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
- [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
-};
-
-static int cbq_opt_parse(struct nlattr *tb[TCA_CBQ_MAX + 1],
- struct nlattr *opt,
- struct netlink_ext_ack *extack)
-{
- int err;
-
- if (!opt) {
- NL_SET_ERR_MSG(extack, "CBQ options are required for this operation");
- return -EINVAL;
- }
-
- err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt,
- cbq_policy, extack);
- if (err < 0)
- return err;
-
- if (tb[TCA_CBQ_WRROPT]) {
- const struct tc_cbq_wrropt *wrr = nla_data(tb[TCA_CBQ_WRROPT]);
-
- if (wrr->priority > TC_CBQ_MAXPRIO) {
- NL_SET_ERR_MSG(extack, "priority is bigger than TC_CBQ_MAXPRIO");
- err = -EINVAL;
- }
- }
- return err;
-}
-
-static int cbq_init(struct Qdisc *sch, struct nlattr *opt,
- struct netlink_ext_ack *extack)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct nlattr *tb[TCA_CBQ_MAX + 1];
- struct tc_ratespec *r;
- int err;
-
- qdisc_watchdog_init(&q->watchdog, sch);
- hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
- q->delay_timer.function = cbq_undelay;
-
- err = cbq_opt_parse(tb, opt, extack);
- if (err < 0)
- return err;
-
- if (!tb[TCA_CBQ_RTAB] || !tb[TCA_CBQ_RATE]) {
- NL_SET_ERR_MSG(extack, "Rate specification missing or incomplete");
- return -EINVAL;
- }
-
- r = nla_data(tb[TCA_CBQ_RATE]);
-
- q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB], extack);
- if (!q->link.R_tab)
- return -EINVAL;
-
- err = tcf_block_get(&q->link.block, &q->link.filter_list, sch, extack);
- if (err)
- goto put_rtab;
-
- err = qdisc_class_hash_init(&q->clhash);
- if (err < 0)
- goto put_block;
-
- q->link.sibling = &q->link;
- q->link.common.classid = sch->handle;
- q->link.qdisc = sch;
- q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
- sch->handle, NULL);
- if (!q->link.q)
- q->link.q = &noop_qdisc;
- else
- qdisc_hash_add(q->link.q, true);
-
- q->link.priority = TC_CBQ_MAXPRIO - 1;
- q->link.priority2 = TC_CBQ_MAXPRIO - 1;
- q->link.cpriority = TC_CBQ_MAXPRIO - 1;
- q->link.allot = psched_mtu(qdisc_dev(sch));
- q->link.quantum = q->link.allot;
- q->link.weight = q->link.R_tab->rate.rate;
-
- q->link.ewma_log = TC_CBQ_DEF_EWMA;
- q->link.avpkt = q->link.allot/2;
- q->link.minidle = -0x7FFFFFFF;
-
- q->toplevel = TC_CBQ_MAXLEVEL;
- q->now = psched_get_time();
-
- cbq_link_class(&q->link);
-
- if (tb[TCA_CBQ_LSSOPT])
- cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
-
- cbq_addprio(q, &q->link);
- return 0;
-
-put_block:
- tcf_block_put(q->link.block);
-
-put_rtab:
- qdisc_put_rtab(q->link.R_tab);
- return err;
-}
-
-static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
-{
- unsigned char *b = skb_tail_pointer(skb);
-
- if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate))
- goto nla_put_failure;
- return skb->len;
-
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-
-static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
-{
- unsigned char *b = skb_tail_pointer(skb);
- struct tc_cbq_lssopt opt;
-
- opt.flags = 0;
- if (cl->borrow == NULL)
- opt.flags |= TCF_CBQ_LSS_BOUNDED;
- if (cl->share == NULL)
- opt.flags |= TCF_CBQ_LSS_ISOLATED;
- opt.ewma_log = cl->ewma_log;
- opt.level = cl->level;
- opt.avpkt = cl->avpkt;
- opt.maxidle = cl->maxidle;
- opt.minidle = (u32)(-cl->minidle);
- opt.offtime = cl->offtime;
- opt.change = ~0;
- if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt))
- goto nla_put_failure;
- return skb->len;
-
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-
-static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
-{
- unsigned char *b = skb_tail_pointer(skb);
- struct tc_cbq_wrropt opt;
-
- memset(&opt, 0, sizeof(opt));
- opt.flags = 0;
- opt.allot = cl->allot;
- opt.priority = cl->priority + 1;
- opt.cpriority = cl->cpriority + 1;
- opt.weight = cl->weight;
- if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt))
- goto nla_put_failure;
- return skb->len;
-
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-
-static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
-{
- unsigned char *b = skb_tail_pointer(skb);
- struct tc_cbq_fopt opt;
-
- if (cl->split || cl->defmap) {
- opt.split = cl->split ? cl->split->common.classid : 0;
- opt.defmap = cl->defmap;
- opt.defchange = ~0;
- if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt))
- goto nla_put_failure;
- }
- return skb->len;
-
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-
-static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
-{
- if (cbq_dump_lss(skb, cl) < 0 ||
- cbq_dump_rate(skb, cl) < 0 ||
- cbq_dump_wrr(skb, cl) < 0 ||
- cbq_dump_fopt(skb, cl) < 0)
- return -1;
- return 0;
-}
-
-static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct nlattr *nest;
-
- nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
- if (cbq_dump_attr(skb, &q->link) < 0)
- goto nla_put_failure;
- return nla_nest_end(skb, nest);
-
-nla_put_failure:
- nla_nest_cancel(skb, nest);
- return -1;
-}
-
-static int
-cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
-
- q->link.xstats.avgidle = q->link.avgidle;
- return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
-}
-
-static int
-cbq_dump_class(struct Qdisc *sch, unsigned long arg,
- struct sk_buff *skb, struct tcmsg *tcm)
-{
- struct cbq_class *cl = (struct cbq_class *)arg;
- struct nlattr *nest;
-
- if (cl->tparent)
- tcm->tcm_parent = cl->tparent->common.classid;
- else
- tcm->tcm_parent = TC_H_ROOT;
- tcm->tcm_handle = cl->common.classid;
- tcm->tcm_info = cl->q->handle;
-
- nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
- if (cbq_dump_attr(skb, cl) < 0)
- goto nla_put_failure;
- return nla_nest_end(skb, nest);
-
-nla_put_failure:
- nla_nest_cancel(skb, nest);
- return -1;
-}
-
-static int
-cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
- struct gnet_dump *d)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl = (struct cbq_class *)arg;
- __u32 qlen;
-
- cl->xstats.avgidle = cl->avgidle;
- cl->xstats.undertime = 0;
- qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog);
-
- if (cl->undertime != PSCHED_PASTPERFECT)
- cl->xstats.undertime = cl->undertime - q->now;
-
- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
- d, NULL, &cl->bstats) < 0 ||
- gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
- gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
- return -1;
-
- return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
-}
-
-static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
- struct Qdisc **old, struct netlink_ext_ack *extack)
-{
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- if (new == NULL) {
- new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
- cl->common.classid, extack);
- if (new == NULL)
- return -ENOBUFS;
- }
-
- *old = qdisc_replace(sch, new, &cl->q);
- return 0;
-}
-
-static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
-{
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- return cl->q;
-}
-
-static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
-{
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- cbq_deactivate_class(cl);
-}
-
-static unsigned long cbq_find(struct Qdisc *sch, u32 classid)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
-
- return (unsigned long)cbq_class_lookup(q, classid);
-}
-
-static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
-
- WARN_ON(cl->filters);
-
- tcf_block_put(cl->block);
- qdisc_put(cl->q);
- qdisc_put_rtab(cl->R_tab);
- gen_kill_estimator(&cl->rate_est);
- if (cl != &q->link)
- kfree(cl);
-}
-
-static void cbq_destroy(struct Qdisc *sch)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct hlist_node *next;
- struct cbq_class *cl;
- unsigned int h;
-
-#ifdef CONFIG_NET_CLS_ACT
- q->rx_class = NULL;
-#endif
- /*
- * Filters must be destroyed first because we don't destroy the
- * classes from root to leafs which means that filters can still
- * be bound to classes which have been destroyed already. --TGR '04
- */
- for (h = 0; h < q->clhash.hashsize; h++) {
- hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
- tcf_block_put(cl->block);
- cl->block = NULL;
- }
- }
- for (h = 0; h < q->clhash.hashsize; h++) {
- hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
- common.hnode)
- cbq_destroy_class(sch, cl);
- }
- qdisc_class_hash_destroy(&q->clhash);
-}
-
-static int
-cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
- unsigned long *arg, struct netlink_ext_ack *extack)
-{
- int err;
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl = (struct cbq_class *)*arg;
- struct nlattr *opt = tca[TCA_OPTIONS];
- struct nlattr *tb[TCA_CBQ_MAX + 1];
- struct cbq_class *parent;
- struct qdisc_rate_table *rtab = NULL;
-
- err = cbq_opt_parse(tb, opt, extack);
- if (err < 0)
- return err;
-
- if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE]) {
- NL_SET_ERR_MSG(extack, "Neither overlimit strategy nor policing attributes can be used for changing class params");
- return -EOPNOTSUPP;
- }
-
- if (cl) {
- /* Check parent */
- if (parentid) {
- if (cl->tparent &&
- cl->tparent->common.classid != parentid) {
- NL_SET_ERR_MSG(extack, "Invalid parent id");
- return -EINVAL;
- }
- if (!cl->tparent && parentid != TC_H_ROOT) {
- NL_SET_ERR_MSG(extack, "Parent must be root");
- return -EINVAL;
- }
- }
-
- if (tb[TCA_CBQ_RATE]) {
- rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
- tb[TCA_CBQ_RTAB], extack);
- if (rtab == NULL)
- return -EINVAL;
- }
-
- if (tca[TCA_RATE]) {
- err = gen_replace_estimator(&cl->bstats, NULL,
- &cl->rate_est,
- NULL,
- qdisc_root_sleeping_running(sch),
- tca[TCA_RATE]);
- if (err) {
- NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator");
- qdisc_put_rtab(rtab);
- return err;
- }
- }
-
- /* Change class parameters */
- sch_tree_lock(sch);
-
- if (cl->next_alive != NULL)
- cbq_deactivate_class(cl);
-
- if (rtab) {
- qdisc_put_rtab(cl->R_tab);
- cl->R_tab = rtab;
- }
-
- if (tb[TCA_CBQ_LSSOPT])
- cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
-
- if (tb[TCA_CBQ_WRROPT]) {
- cbq_rmprio(q, cl);
- cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
- }
-
- if (tb[TCA_CBQ_FOPT])
- cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
-
- if (cl->q->q.qlen)
- cbq_activate_class(cl);
-
- sch_tree_unlock(sch);
-
- return 0;
- }
-
- if (parentid == TC_H_ROOT)
- return -EINVAL;
-
- if (!tb[TCA_CBQ_WRROPT] || !tb[TCA_CBQ_RATE] || !tb[TCA_CBQ_LSSOPT]) {
- NL_SET_ERR_MSG(extack, "One of the following attributes MUST be specified: WRR, rate or link sharing");
- return -EINVAL;
- }
-
- rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB],
- extack);
- if (rtab == NULL)
- return -EINVAL;
-
- if (classid) {
- err = -EINVAL;
- if (TC_H_MAJ(classid ^ sch->handle) ||
- cbq_class_lookup(q, classid)) {
- NL_SET_ERR_MSG(extack, "Specified class not found");
- goto failure;
- }
- } else {
- int i;
- classid = TC_H_MAKE(sch->handle, 0x8000);
-
- for (i = 0; i < 0x8000; i++) {
- if (++q->hgenerator >= 0x8000)
- q->hgenerator = 1;
- if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
- break;
- }
- err = -ENOSR;
- if (i >= 0x8000) {
- NL_SET_ERR_MSG(extack, "Unable to generate classid");
- goto failure;
- }
- classid = classid|q->hgenerator;
- }
-
- parent = &q->link;
- if (parentid) {
- parent = cbq_class_lookup(q, parentid);
- err = -EINVAL;
- if (!parent) {
- NL_SET_ERR_MSG(extack, "Failed to find parentid");
- goto failure;
- }
- }
-
- err = -ENOBUFS;
- cl = kzalloc(sizeof(*cl), GFP_KERNEL);
- if (cl == NULL)
- goto failure;
-
- err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
- if (err) {
- kfree(cl);
- goto failure;
- }
-
- if (tca[TCA_RATE]) {
- err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
- NULL,
- qdisc_root_sleeping_running(sch),
- tca[TCA_RATE]);
- if (err) {
- NL_SET_ERR_MSG(extack, "Couldn't create new estimator");
- tcf_block_put(cl->block);
- kfree(cl);
- goto failure;
- }
- }
-
- cl->R_tab = rtab;
- rtab = NULL;
- cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid,
- NULL);
- if (!cl->q)
- cl->q = &noop_qdisc;
- else
- qdisc_hash_add(cl->q, true);
-
- cl->common.classid = classid;
- cl->tparent = parent;
- cl->qdisc = sch;
- cl->allot = parent->allot;
- cl->quantum = cl->allot;
- cl->weight = cl->R_tab->rate.rate;
-
- sch_tree_lock(sch);
- cbq_link_class(cl);
- cl->borrow = cl->tparent;
- if (cl->tparent != &q->link)
- cl->share = cl->tparent;
- cbq_adjust_levels(parent);
- cl->minidle = -0x7FFFFFFF;
- cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
- cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
- if (cl->ewma_log == 0)
- cl->ewma_log = q->link.ewma_log;
- if (cl->maxidle == 0)
- cl->maxidle = q->link.maxidle;
- if (cl->avpkt == 0)
- cl->avpkt = q->link.avpkt;
- if (tb[TCA_CBQ_FOPT])
- cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
- sch_tree_unlock(sch);
-
- qdisc_class_hash_grow(sch, &q->clhash);
-
- *arg = (unsigned long)cl;
- return 0;
-
-failure:
- qdisc_put_rtab(rtab);
- return err;
-}
-
-static int cbq_delete(struct Qdisc *sch, unsigned long arg)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- if (cl->filters || cl->children || cl == &q->link)
- return -EBUSY;
-
- sch_tree_lock(sch);
-
- qdisc_purge_queue(cl->q);
-
- if (cl->next_alive)
- cbq_deactivate_class(cl);
-
- if (q->tx_borrowed == cl)
- q->tx_borrowed = q->tx_class;
- if (q->tx_class == cl) {
- q->tx_class = NULL;
- q->tx_borrowed = NULL;
- }
-#ifdef CONFIG_NET_CLS_ACT
- if (q->rx_class == cl)
- q->rx_class = NULL;
-#endif
-
- cbq_unlink_class(cl);
- cbq_adjust_levels(cl->tparent);
- cl->defmap = 0;
- cbq_sync_defmap(cl);
-
- cbq_rmprio(q, cl);
- sch_tree_unlock(sch);
-
- cbq_destroy_class(sch, cl);
- return 0;
-}
-
-static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg,
- struct netlink_ext_ack *extack)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- if (cl == NULL)
- cl = &q->link;
-
- return cl->block;
-}
-
-static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
- u32 classid)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *p = (struct cbq_class *)parent;
- struct cbq_class *cl = cbq_class_lookup(q, classid);
-
- if (cl) {
- if (p && p->level <= cl->level)
- return 0;
- cl->filters++;
- return (unsigned long)cl;
- }
- return 0;
-}
-
-static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
-{
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- cl->filters--;
-}
-
-static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl;
- unsigned int h;
-
- if (arg->stop)
- return;
-
- for (h = 0; h < q->clhash.hashsize; h++) {
- hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
- if (arg->count < arg->skip) {
- arg->count++;
- continue;
- }
- if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
- arg->stop = 1;
- return;
- }
- arg->count++;
- }
- }
-}
-
-static const struct Qdisc_class_ops cbq_class_ops = {
- .graft = cbq_graft,
- .leaf = cbq_leaf,
- .qlen_notify = cbq_qlen_notify,
- .find = cbq_find,
- .change = cbq_change_class,
- .delete = cbq_delete,
- .walk = cbq_walk,
- .tcf_block = cbq_tcf_block,
- .bind_tcf = cbq_bind_filter,
- .unbind_tcf = cbq_unbind_filter,
- .dump = cbq_dump_class,
- .dump_stats = cbq_dump_class_stats,
-};
-
-static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
- .next = NULL,
- .cl_ops = &cbq_class_ops,
- .id = "cbq",
- .priv_size = sizeof(struct cbq_sched_data),
- .enqueue = cbq_enqueue,
- .dequeue = cbq_dequeue,
- .peek = qdisc_peek_dequeued,
- .init = cbq_init,
- .reset = cbq_reset,
- .destroy = cbq_destroy,
- .change = NULL,
- .dump = cbq_dump,
- .dump_stats = cbq_dump_stats,
- .owner = THIS_MODULE,
-};
-
-static int __init cbq_module_init(void)
-{
- return register_qdisc(&cbq_qdisc_ops);
-}
-static void __exit cbq_module_exit(void)
-{
- unregister_qdisc(&cbq_qdisc_ops);
-}
-module_init(cbq_module_init)
-module_exit(cbq_module_exit)
-MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
deleted file mode 100644
index a75bc7f80cd7..000000000000
--- a/net/sched/sch_dsmark.c
+++ /dev/null
@@ -1,521 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* net/sched/sch_dsmark.c - Differentiated Services field marker */
-
-/* Written 1998-2000 by Werner Almesberger, EPFL ICA */
-
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
-#include <linux/bitops.h>
-#include <net/pkt_sched.h>
-#include <net/pkt_cls.h>
-#include <net/dsfield.h>
-#include <net/inet_ecn.h>
-#include <asm/byteorder.h>
-
-/*
- * classid class marking
- * ------- ----- -------
- * n/a 0 n/a
- * x:0 1 use entry [0]
- * ... ... ...
- * x:y y>0 y+1 use entry [y]
- * ... ... ...
- * x:indices-1 indices use entry [indices-1]
- * ... ... ...
- * x:y y+1 use entry [y & (indices-1)]
- * ... ... ...
- * 0xffff 0x10000 use entry [indices-1]
- */
-
-
-#define NO_DEFAULT_INDEX (1 << 16)
-
-struct mask_value {
- u8 mask;
- u8 value;
-};
-
-struct dsmark_qdisc_data {
- struct Qdisc *q;
- struct tcf_proto __rcu *filter_list;
- struct tcf_block *block;
- struct mask_value *mv;
- u16 indices;
- u8 set_tc_index;
- u32 default_index; /* index range is 0...0xffff */
-#define DSMARK_EMBEDDED_SZ 16
- struct mask_value embedded[DSMARK_EMBEDDED_SZ];
-};
-
-static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
-{
- return index <= p->indices && index > 0;
-}
-
-/* ------------------------- Class/flow operations ------------------------- */
-
-static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
- struct Qdisc *new, struct Qdisc **old,
- struct netlink_ext_ack *extack)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
- pr_debug("%s(sch %p,[qdisc %p],new %p,old %p)\n",
- __func__, sch, p, new, old);
-
- if (new == NULL) {
- new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
- sch->handle, NULL);
- if (new == NULL)
- new = &noop_qdisc;
- }
-
- *old = qdisc_replace(sch, new, &p->q);
- return 0;
-}
-
-static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- return p->q;
-}
-
-static unsigned long dsmark_find(struct Qdisc *sch, u32 classid)
-{
- return TC_H_MIN(classid) + 1;
-}
-
-static unsigned long dsmark_bind_filter(struct Qdisc *sch,
- unsigned long parent, u32 classid)
-{
- pr_debug("%s(sch %p,[qdisc %p],classid %x)\n",
- __func__, sch, qdisc_priv(sch), classid);
-
- return dsmark_find(sch, classid);
-}
-
-static void dsmark_unbind_filter(struct Qdisc *sch, unsigned long cl)
-{
-}
-
-static const struct nla_policy dsmark_policy[TCA_DSMARK_MAX + 1] = {
- [TCA_DSMARK_INDICES] = { .type = NLA_U16 },
- [TCA_DSMARK_DEFAULT_INDEX] = { .type = NLA_U16 },
- [TCA_DSMARK_SET_TC_INDEX] = { .type = NLA_FLAG },
- [TCA_DSMARK_MASK] = { .type = NLA_U8 },
- [TCA_DSMARK_VALUE] = { .type = NLA_U8 },
-};
-
-static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
- struct nlattr **tca, unsigned long *arg,
- struct netlink_ext_ack *extack)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- struct nlattr *opt = tca[TCA_OPTIONS];
- struct nlattr *tb[TCA_DSMARK_MAX + 1];
- int err = -EINVAL;
-
- pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n",
- __func__, sch, p, classid, parent, *arg);
-
- if (!dsmark_valid_index(p, *arg)) {
- err = -ENOENT;
- goto errout;
- }
-
- if (!opt)
- goto errout;
-
- err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt,
- dsmark_policy, NULL);
- if (err < 0)
- goto errout;
-
- if (tb[TCA_DSMARK_VALUE])
- p->mv[*arg - 1].value = nla_get_u8(tb[TCA_DSMARK_VALUE]);
-
- if (tb[TCA_DSMARK_MASK])
- p->mv[*arg - 1].mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
-
- err = 0;
-
-errout:
- return err;
-}
-
-static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
- if (!dsmark_valid_index(p, arg))
- return -EINVAL;
-
- p->mv[arg - 1].mask = 0xff;
- p->mv[arg - 1].value = 0;
-
- return 0;
-}
-
-static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- int i;
-
- pr_debug("%s(sch %p,[qdisc %p],walker %p)\n",
- __func__, sch, p, walker);
-
- if (walker->stop)
- return;
-
- for (i = 0; i < p->indices; i++) {
- if (p->mv[i].mask == 0xff && !p->mv[i].value)
- goto ignore;
- if (walker->count >= walker->skip) {
- if (walker->fn(sch, i + 1, walker) < 0) {
- walker->stop = 1;
- break;
- }
- }
-ignore:
- walker->count++;
- }
-}
-
-static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl,
- struct netlink_ext_ack *extack)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
- return p->block;
-}
-
-/* --------------------------- Qdisc operations ---------------------------- */
-
-static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
- struct sk_buff **to_free)
-{
- unsigned int len = qdisc_pkt_len(skb);
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- int err;
-
- pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
-
- if (p->set_tc_index) {
- int wlen = skb_network_offset(skb);
-
- switch (skb_protocol(skb, true)) {
- case htons(ETH_P_IP):
- wlen += sizeof(struct iphdr);
- if (!pskb_may_pull(skb, wlen) ||
- skb_try_make_writable(skb, wlen))
- goto drop;
-
- skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
- & ~INET_ECN_MASK;
- break;
-
- case htons(ETH_P_IPV6):
- wlen += sizeof(struct ipv6hdr);
- if (!pskb_may_pull(skb, wlen) ||
- skb_try_make_writable(skb, wlen))
- goto drop;
-
- skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
- & ~INET_ECN_MASK;
- break;
- default:
- skb->tc_index = 0;
- break;
- }
- }
-
- if (TC_H_MAJ(skb->priority) == sch->handle)
- skb->tc_index = TC_H_MIN(skb->priority);
- else {
- struct tcf_result res;
- struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
- int result = tcf_classify(skb, fl, &res, false);
-
- pr_debug("result %d class 0x%04x\n", result, res.classid);
-
- switch (result) {
-#ifdef CONFIG_NET_CLS_ACT
- case TC_ACT_QUEUED:
- case TC_ACT_STOLEN:
- case TC_ACT_TRAP:
- __qdisc_drop(skb, to_free);
- return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
-
- case TC_ACT_SHOT:
- goto drop;
-#endif
- case TC_ACT_OK:
- skb->tc_index = TC_H_MIN(res.classid);
- break;
-
- default:
- if (p->default_index != NO_DEFAULT_INDEX)
- skb->tc_index = p->default_index;
- break;
- }
- }
-
- err = qdisc_enqueue(skb, p->q, to_free);
- if (err != NET_XMIT_SUCCESS) {
- if (net_xmit_drop_count(err))
- qdisc_qstats_drop(sch);
- return err;
- }
-
- sch->qstats.backlog += len;
- sch->q.qlen++;
-
- return NET_XMIT_SUCCESS;
-
-drop:
- qdisc_drop(skb, sch, to_free);
- return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-}
-
-static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- struct sk_buff *skb;
- u32 index;
-
- pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
-
- skb = qdisc_dequeue_peeked(p->q);
- if (skb == NULL)
- return NULL;
-
- qdisc_bstats_update(sch, skb);
- qdisc_qstats_backlog_dec(sch, skb);
- sch->q.qlen--;
-
- index = skb->tc_index & (p->indices - 1);
- pr_debug("index %d->%d\n", skb->tc_index, index);
-
- switch (skb_protocol(skb, true)) {
- case htons(ETH_P_IP):
- ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask,
- p->mv[index].value);
- break;
- case htons(ETH_P_IPV6):
- ipv6_change_dsfield(ipv6_hdr(skb), p->mv[index].mask,
- p->mv[index].value);
- break;
- default:
- /*
- * Only complain if a change was actually attempted.
- * This way, we can send non-IP traffic through dsmark
- * and don't need yet another qdisc as a bypass.
- */
- if (p->mv[index].mask != 0xff || p->mv[index].value)
- pr_warn("%s: unsupported protocol %d\n",
- __func__, ntohs(skb_protocol(skb, true)));
- break;
- }
-
- return skb;
-}
-
-static struct sk_buff *dsmark_peek(struct Qdisc *sch)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
- pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
-
- return p->q->ops->peek(p->q);
-}
-
-static int dsmark_init(struct Qdisc *sch, struct nlattr *opt,
- struct netlink_ext_ack *extack)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- struct nlattr *tb[TCA_DSMARK_MAX + 1];
- int err = -EINVAL;
- u32 default_index = NO_DEFAULT_INDEX;
- u16 indices;
- int i;
-
- pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt);
-
- if (!opt)
- goto errout;
-
- err = tcf_block_get(&p->block, &p->filter_list, sch, extack);
- if (err)
- return err;
-
- err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt,
- dsmark_policy, NULL);
- if (err < 0)
- goto errout;
-
- err = -EINVAL;
- if (!tb[TCA_DSMARK_INDICES])
- goto errout;
- indices = nla_get_u16(tb[TCA_DSMARK_INDICES]);
-
- if (hweight32(indices) != 1)
- goto errout;
-
- if (tb[TCA_DSMARK_DEFAULT_INDEX])
- default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]);
-
- if (indices <= DSMARK_EMBEDDED_SZ)
- p->mv = p->embedded;
- else
- p->mv = kmalloc_array(indices, sizeof(*p->mv), GFP_KERNEL);
- if (!p->mv) {
- err = -ENOMEM;
- goto errout;
- }
- for (i = 0; i < indices; i++) {
- p->mv[i].mask = 0xff;
- p->mv[i].value = 0;
- }
- p->indices = indices;
- p->default_index = default_index;
- p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
-
- p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle,
- NULL);
- if (p->q == NULL)
- p->q = &noop_qdisc;
- else
- qdisc_hash_add(p->q, true);
-
- pr_debug("%s: qdisc %p\n", __func__, p->q);
-
- err = 0;
-errout:
- return err;
-}
-
-static void dsmark_reset(struct Qdisc *sch)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
- pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
- if (p->q)
- qdisc_reset(p->q);
-}
-
-static void dsmark_destroy(struct Qdisc *sch)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
- pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
-
- tcf_block_put(p->block);
- qdisc_put(p->q);
- if (p->mv != p->embedded)
- kfree(p->mv);
-}
-
-static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
- struct sk_buff *skb, struct tcmsg *tcm)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- struct nlattr *opts = NULL;
-
- pr_debug("%s(sch %p,[qdisc %p],class %ld\n", __func__, sch, p, cl);
-
- if (!dsmark_valid_index(p, cl))
- return -EINVAL;
-
- tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1);
- tcm->tcm_info = p->q->handle;
-
- opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (opts == NULL)
- goto nla_put_failure;
- if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) ||
- nla_put_u8(skb, TCA_DSMARK_VALUE, p->mv[cl - 1].value))
- goto nla_put_failure;
-
- return nla_nest_end(skb, opts);
-
-nla_put_failure:
- nla_nest_cancel(skb, opts);
- return -EMSGSIZE;
-}
-
-static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- struct nlattr *opts = NULL;
-
- opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (opts == NULL)
- goto nla_put_failure;
- if (nla_put_u16(skb, TCA_DSMARK_INDICES, p->indices))
- goto nla_put_failure;
-
- if (p->default_index != NO_DEFAULT_INDEX &&
- nla_put_u16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index))
- goto nla_put_failure;
-
- if (p->set_tc_index &&
- nla_put_flag(skb, TCA_DSMARK_SET_TC_INDEX))
- goto nla_put_failure;
-
- return nla_nest_end(skb, opts);
-
-nla_put_failure:
- nla_nest_cancel(skb, opts);
- return -EMSGSIZE;
-}
-
-static const struct Qdisc_class_ops dsmark_class_ops = {
- .graft = dsmark_graft,
- .leaf = dsmark_leaf,
- .find = dsmark_find,
- .change = dsmark_change,
- .delete = dsmark_delete,
- .walk = dsmark_walk,
- .tcf_block = dsmark_tcf_block,
- .bind_tcf = dsmark_bind_filter,
- .unbind_tcf = dsmark_unbind_filter,
- .dump = dsmark_dump_class,
-};
-
-static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = {
- .next = NULL,
- .cl_ops = &dsmark_class_ops,
- .id = "dsmark",
- .priv_size = sizeof(struct dsmark_qdisc_data),
- .enqueue = dsmark_enqueue,
- .dequeue = dsmark_dequeue,
- .peek = dsmark_peek,
- .init = dsmark_init,
- .reset = dsmark_reset,
- .destroy = dsmark_destroy,
- .change = NULL,
- .dump = dsmark_dump,
- .owner = THIS_MODULE,
-};
-
-static int __init dsmark_module_init(void)
-{
- return register_qdisc(&dsmark_qdisc_ops);
-}
-
-static void __exit dsmark_module_exit(void)
-{
- unregister_qdisc(&dsmark_qdisc_ops);
-}
-
-module_init(dsmark_module_init)
-module_exit(dsmark_module_exit)
-
-MODULE_LICENSE("GPL");
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 30bae60d626c..ed9cfa11b589 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -797,6 +797,16 @@ static void smc_pnet_create_pnetids_list(struct net *net)
u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
struct net_device *dev;
+ /* Newly created netns do not have devices.
+ * Do not even acquire rtnl.
+ */
+ if (list_empty(&net->dev_base_head))
+ return;
+
+ /* Note: This might not be needed, because smc_pnet_netdev_event()
+ * is also calling smc_pnet_add_base_pnetid() when handling
+ * NETDEV_UP event.
+ */
rtnl_lock();
for_each_netdev(net, dev)
smc_pnet_add_base_pnetid(net, dev, ndev_pnetid);
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index d435bffc6199..97ff11973c49 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -284,10 +284,10 @@ char *rpc_sockaddr2uaddr(const struct sockaddr *sap, gfp_t gfp_flags)
}
if (snprintf(portbuf, sizeof(portbuf),
- ".%u.%u", port >> 8, port & 0xff) > (int)sizeof(portbuf))
+ ".%u.%u", port >> 8, port & 0xff) >= (int)sizeof(portbuf))
return NULL;
- if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) > sizeof(addrbuf))
+ if (strlcat(addrbuf, portbuf, sizeof(addrbuf)) >= sizeof(addrbuf))
return NULL;
return kstrdup(addrbuf, gfp_flags);
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 2ff7b7083eba..e265b8d38aa1 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -250,8 +250,8 @@ static int gssx_dec_option_array(struct xdr_stream *xdr,
creds = kzalloc(sizeof(struct svc_cred), GFP_KERNEL);
if (!creds) {
- kfree(oa->data);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto free_oa;
}
oa->data[0].option.data = CREDS_VALUE;
@@ -265,29 +265,40 @@ static int gssx_dec_option_array(struct xdr_stream *xdr,
/* option buffer */
p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- return -ENOSPC;
+ if (unlikely(p == NULL)) {
+ err = -ENOSPC;
+ goto free_creds;
+ }
length = be32_to_cpup(p);
p = xdr_inline_decode(xdr, length);
- if (unlikely(p == NULL))
- return -ENOSPC;
+ if (unlikely(p == NULL)) {
+ err = -ENOSPC;
+ goto free_creds;
+ }
if (length == sizeof(CREDS_VALUE) &&
memcmp(p, CREDS_VALUE, sizeof(CREDS_VALUE)) == 0) {
/* We have creds here. parse them */
err = gssx_dec_linux_creds(xdr, creds);
if (err)
- return err;
+ goto free_creds;
oa->data[0].value.len = 1; /* presence */
} else {
/* consume uninteresting buffer */
err = gssx_dec_buffer(xdr, &dummy);
if (err)
- return err;
+ goto free_creds;
}
}
return 0;
+
+free_creds:
+ kfree(creds);
+free_oa:
+ kfree(oa->data);
+ oa->data = NULL;
+ return err;
}
static int gssx_dec_status(struct xdr_stream *xdr,
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 7ee3c8b03a39..2bbacd9b97e5 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -800,7 +800,7 @@ static void tls_update(struct sock *sk, struct proto *p,
}
}
-static int tls_get_info(const struct sock *sk, struct sk_buff *skb)
+static int tls_get_info(struct sock *sk, struct sk_buff *skb)
{
u16 version, cipher_type;
struct tls_context *ctx;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index dd980438f201..46f1c19f7c60 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1754,6 +1754,7 @@ int tls_sw_recvmsg(struct sock *sk,
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct tls_prot_info *prot = &tls_ctx->prot_info;
struct sk_psock *psock;
+ int num_async, pending;
unsigned char control = 0;
ssize_t decrypted = 0;
struct strp_msg *rxm;
@@ -1766,8 +1767,6 @@ int tls_sw_recvmsg(struct sock *sk,
bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
bool is_peek = flags & MSG_PEEK;
bool bpf_strp_enabled;
- int num_async = 0;
- int pending;
flags |= nonblock;
@@ -1784,17 +1783,18 @@ int tls_sw_recvmsg(struct sock *sk,
if (err < 0) {
tls_err_abort(sk, err);
goto end;
- } else {
- copied = err;
}
- if (len <= copied)
- goto recv_end;
+ copied = err;
+ if (len <= copied || (copied && control != TLS_RECORD_TYPE_DATA))
+ goto end;
target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
len = len - copied;
timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ decrypted = 0;
+ num_async = 0;
while (len && (decrypted + copied < target || ctx->recv_pkt)) {
bool retain_skb = false;
bool zc = false;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index b003d0597f4b..224b1fdc8227 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -817,11 +817,11 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
sk->sk_write_space = unix_write_space;
sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
sk->sk_destruct = unix_sock_destructor;
- u = unix_sk(sk);
+ u = unix_sk(sk);
+ u->inflight = 0;
u->path.dentry = NULL;
u->path.mnt = NULL;
spin_lock_init(&u->lock);
- atomic_long_set(&u->inflight, 0);
INIT_LIST_HEAD(&u->link);
mutex_init(&u->iolock); /* single task reading lock */
mutex_init(&u->bindlock); /* single task binding lock */
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index dc2763540393..133ba5be4b58 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -166,17 +166,18 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
static void dec_inflight(struct unix_sock *usk)
{
- atomic_long_dec(&usk->inflight);
+ usk->inflight--;
}
static void inc_inflight(struct unix_sock *usk)
{
- atomic_long_inc(&usk->inflight);
+ usk->inflight++;
}
static void inc_inflight_move_tail(struct unix_sock *u)
{
- atomic_long_inc(&u->inflight);
+ u->inflight++;
+
/* If this still might be part of a cycle, move it to the end
* of the list, so that it's checked even if it was already
* passed over
@@ -198,7 +199,7 @@ void wait_for_unix_gc(void)
if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
!READ_ONCE(gc_in_progress))
unix_gc();
- wait_event(unix_gc_wait, gc_in_progress == false);
+ wait_event(unix_gc_wait, !READ_ONCE(gc_in_progress));
}
/* The external entry point: unix_gc() */
@@ -234,20 +235,34 @@ void unix_gc(void)
* receive queues. Other, non candidate sockets _can_ be
* added to queue, so we must make sure only to touch
* candidates.
+ *
+ * Embryos, though never candidates themselves, affect which
+ * candidates are reachable by the garbage collector. Before
+ * being added to a listener's queue, an embryo may already
+ * receive data carrying SCM_RIGHTS, potentially making the
+ * passed socket a candidate that is not yet reachable by the
+ * collector. It becomes reachable once the embryo is
+ * enqueued. Therefore, we must ensure that no SCM-laden
+ * embryo appears in a (candidate) listener's queue between
+ * consecutive scan_children() calls.
*/
list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
+ struct sock *sk = &u->sk;
long total_refs;
- long inflight_refs;
- total_refs = file_count(u->sk.sk_socket->file);
- inflight_refs = atomic_long_read(&u->inflight);
+ total_refs = file_count(sk->sk_socket->file);
- BUG_ON(inflight_refs < 1);
- BUG_ON(total_refs < inflight_refs);
- if (total_refs == inflight_refs) {
+ BUG_ON(!u->inflight);
+ BUG_ON(total_refs < u->inflight);
+ if (total_refs == u->inflight) {
list_move_tail(&u->link, &gc_candidates);
__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
+
+ if (sk->sk_state == TCP_LISTEN) {
+ unix_state_lock_nested(sk, U_LOCK_GC_LISTENER);
+ unix_state_unlock(sk);
+ }
}
}
@@ -271,7 +286,7 @@ void unix_gc(void)
/* Move cursor to after the current position. */
list_move(&cursor, &u->link);
- if (atomic_long_read(&u->inflight) > 0) {
+ if (u->inflight) {
list_move_tail(&u->link, &not_cycle_list);
__clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
scan_children(&u->sk, inc_inflight_move_tail, NULL);
diff --git a/net/unix/scm.c b/net/unix/scm.c
index e8e2a00bb0f5..4eff7da9f6f9 100644
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -34,10 +34,8 @@ struct sock *unix_get_socket(struct file *filp)
/* PF_UNIX ? */
if (s && sock->ops && sock->ops->family == PF_UNIX)
u_sock = s;
- } else {
- /* Could be an io_uring instance */
- u_sock = io_uring_get_socket(filp);
}
+
return u_sock;
}
EXPORT_SYMBOL(unix_get_socket);
@@ -54,12 +52,13 @@ void unix_inflight(struct user_struct *user, struct file *fp)
if (s) {
struct unix_sock *u = unix_sk(s);
- if (atomic_long_inc_return(&u->inflight) == 1) {
+ if (!u->inflight) {
BUG_ON(!list_empty(&u->link));
list_add_tail(&u->link, &gc_inflight_list);
} else {
BUG_ON(list_empty(&u->link));
}
+ u->inflight++;
/* Paired with READ_ONCE() in wait_for_unix_gc() */
WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
}
@@ -76,10 +75,11 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
if (s) {
struct unix_sock *u = unix_sk(s);
- BUG_ON(!atomic_long_read(&u->inflight));
+ BUG_ON(!u->inflight);
BUG_ON(list_empty(&u->link));
- if (atomic_long_dec_and_test(&u->inflight))
+ u->inflight--;
+ if (!u->inflight)
list_del_init(&u->link);
/* Paired with READ_ONCE() in wait_for_unix_gc() */
WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0ac829c8f188..933591f9704b 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3595,6 +3595,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
if_idx++;
}
+ if_start = 0;
wp_idx++;
}
out:
@@ -3771,6 +3772,8 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
if (ntype != NL80211_IFTYPE_MESH_POINT)
return -EINVAL;
+ if (otype != NL80211_IFTYPE_MESH_POINT)
+ return -EINVAL;
if (netif_running(dev))
return -EBUSY;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 161dc194e634..a7ecf2956cdd 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -470,12 +470,12 @@ static int x25_getsockopt(struct socket *sock, int level, int optname,
if (get_user(len, optlen))
goto out;
- len = min_t(unsigned int, len, sizeof(int));
-
rc = -EINVAL;
if (len < 0)
goto out;
+ len = min_t(unsigned int, len, sizeof(int));
+
rc = -EFAULT;
if (put_user(len, optlen))
goto out;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index d04f91f4d09d..562d69f17b4c 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -895,6 +895,8 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
struct xsk_queue **q;
int entries;
+ if (optlen < sizeof(entries))
+ return -EINVAL;
if (copy_from_sockptr(&entries, optval, sizeof(entries)))
return -EFAULT;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 8fce2e93bb3b..070946d09381 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1753,6 +1753,9 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb)
if (xp->xfrm_nr == 0)
return 0;
+ if (xp->xfrm_nr > XFRM_MAX_DEPTH)
+ return -ENOBUFS;
+
for (i = 0; i < xp->xfrm_nr; i++) {
struct xfrm_user_tmpl *up = &vec[i];
struct xfrm_tmpl *kp = &xp->xfrm_vec[i];