aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bluetooth/l2cap_core.c3
-rw-r--r--net/bluetooth/sco.c4
-rw-r--r--net/bridge/br_forward.c9
-rw-r--r--net/core/filter.c115
-rw-r--r--net/core/net_namespace.c13
-rw-r--r--net/core/rtnetlink.c2
-rw-r--r--net/core/skbuff.c27
-rw-r--r--net/core/skmsg.c53
-rw-r--r--net/core/sock.c4
-rw-r--r--net/core/sock_map.c3
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_bpf.c51
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c8
-rw-r--r--net/ipv4/tcp_output.c4
-rw-r--r--net/ipv4/udp_offload.c12
-rw-r--r--net/ipv4/xfrm4_input.c6
-rw-r--r--net/ipv6/fib6_rules.c6
-rw-r--r--net/ipv6/xfrm6_input.c6
-rw-r--r--net/l2tp/l2tp_eth.c3
-rw-r--r--net/mac80211/ieee80211_i.h4
-rw-r--r--net/mptcp/protocol.c3
-rw-r--r--net/netlink/af_netlink.c23
-rw-r--r--net/nsh/nsh.c14
-rw-r--r--net/phonet/pn_netlink.c2
-rw-r--r--net/sunrpc/clnt.c5
-rw-r--r--net/sunrpc/svc_xprt.c16
-rw-r--r--net/tipc/msg.c8
-rw-r--r--net/tls/tls_sw.c199
-rw-r--r--net/wireless/nl80211.c2
-rw-r--r--net/wireless/trace.h2
-rw-r--r--net/xfrm/xfrm_input.c8
32 files changed, 418 insertions, 203 deletions
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 11bfc8737e6c..900b35297585 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -435,6 +435,9 @@ static void l2cap_chan_timeout(struct work_struct *work)
BT_DBG("chan %p state %s", chan, state_to_string(chan->state));
+ if (!conn)
+ return;
+
mutex_lock(&conn->chan_lock);
/* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling
* this work. No need to call l2cap_chan_hold(chan) here again.
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 57c6a4f845a3..431e09cac178 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -83,6 +83,10 @@ static void sco_sock_timeout(struct work_struct *work)
struct sock *sk;
sco_conn_lock(conn);
+ if (!conn->hcon) {
+ sco_conn_unlock(conn);
+ return;
+ }
sk = conn->sk;
if (sk)
sock_hold(sk);
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 011bd3c59da1..1b66c276118a 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -253,6 +253,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb,
{
struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
const unsigned char *src = eth_hdr(skb)->h_source;
+ struct sk_buff *nskb;
if (!should_deliver(p, skb))
return;
@@ -261,12 +262,16 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb,
if (skb->dev == p->dev && ether_addr_equal(src, addr))
return;
- skb = skb_copy(skb, GFP_ATOMIC);
- if (!skb) {
+ __skb_push(skb, ETH_HLEN);
+ nskb = pskb_copy(skb, GFP_ATOMIC);
+ __skb_pull(skb, ETH_HLEN);
+ if (!nskb) {
DEV_STATS_INC(dev, tx_dropped);
return;
}
+ skb = nskb;
+ __skb_pull(skb, ETH_HLEN);
if (!is_broadcast_ether_addr(addr))
memcpy(eth_hdr(skb)->h_dest, addr, ETH_ALEN);
diff --git a/net/core/filter.c b/net/core/filter.c
index 457d1a164ad5..47eb1bd47aa6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3987,37 +3987,75 @@ u32 xdp_master_redirect(struct xdp_buff *xdp)
}
EXPORT_SYMBOL_GPL(xdp_master_redirect);
-int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
+static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri,
+ struct net_device *dev,
+ struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
u32 map_id = ri->map_id;
+ int err;
+
+ ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
+ ri->map_type = BPF_MAP_TYPE_UNSPEC;
+
+ err = __xsk_map_redirect(fwd, xdp);
+ if (unlikely(err))
+ goto err;
+
+ _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
+ return 0;
+err:
+ _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
+ return err;
+}
+
+static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
+ struct net_device *dev,
+ struct xdp_frame *xdpf,
+ struct bpf_prog *xdp_prog)
+{
+ enum bpf_map_type map_type = ri->map_type;
+ void *fwd = ri->tgt_value;
+ u32 map_id = ri->map_id;
+ u32 flags = ri->flags;
struct bpf_map *map;
int err;
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
+ ri->flags = 0;
ri->map_type = BPF_MAP_TYPE_UNSPEC;
+ if (unlikely(!xdpf)) {
+ err = -EOVERFLOW;
+ goto err;
+ }
+
switch (map_type) {
case BPF_MAP_TYPE_DEVMAP:
fallthrough;
case BPF_MAP_TYPE_DEVMAP_HASH:
- map = READ_ONCE(ri->map);
- if (unlikely(map)) {
+ if (unlikely(flags & BPF_F_BROADCAST)) {
+ map = READ_ONCE(ri->map);
+
+ /* The map pointer is cleared when the map is being torn
+ * down by bpf_clear_redirect_map()
+ */
+ if (unlikely(!map)) {
+ err = -ENOENT;
+ break;
+ }
+
WRITE_ONCE(ri->map, NULL);
- err = dev_map_enqueue_multi(xdp, dev, map,
- ri->flags & BPF_F_EXCLUDE_INGRESS);
+ err = dev_map_enqueue_multi(xdpf, dev, map,
+ flags & BPF_F_EXCLUDE_INGRESS);
} else {
- err = dev_map_enqueue(fwd, xdp, dev);
+ err = dev_map_enqueue(fwd, xdpf, dev);
}
break;
case BPF_MAP_TYPE_CPUMAP:
- err = cpu_map_enqueue(fwd, xdp, dev);
- break;
- case BPF_MAP_TYPE_XSKMAP:
- err = __xsk_map_redirect(fwd, xdp);
+ err = cpu_map_enqueue(fwd, xdpf, dev);
break;
case BPF_MAP_TYPE_UNSPEC:
if (map_id == INT_MAX) {
@@ -4026,7 +4064,7 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
err = -EINVAL;
break;
}
- err = dev_xdp_enqueue(fwd, xdp, dev);
+ err = dev_xdp_enqueue(fwd, xdpf, dev);
break;
}
fallthrough;
@@ -4043,14 +4081,40 @@ err:
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
return err;
}
+
+int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ enum bpf_map_type map_type = ri->map_type;
+
+ if (map_type == BPF_MAP_TYPE_XSKMAP)
+ return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
+
+ return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp),
+ xdp_prog);
+}
EXPORT_SYMBOL_GPL(xdp_do_redirect);
+int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp,
+ struct xdp_frame *xdpf, struct bpf_prog *xdp_prog)
+{
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+ enum bpf_map_type map_type = ri->map_type;
+
+ if (map_type == BPF_MAP_TYPE_XSKMAP)
+ return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
+
+ return __xdp_do_redirect_frame(ri, dev, xdpf, xdp_prog);
+}
+EXPORT_SYMBOL_GPL(xdp_do_redirect_frame);
+
static int xdp_do_generic_redirect_map(struct net_device *dev,
struct sk_buff *skb,
struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog,
- void *fwd,
- enum bpf_map_type map_type, u32 map_id)
+ struct bpf_prog *xdp_prog, void *fwd,
+ enum bpf_map_type map_type, u32 map_id,
+ u32 flags)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct bpf_map *map;
@@ -4060,11 +4124,20 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
case BPF_MAP_TYPE_DEVMAP:
fallthrough;
case BPF_MAP_TYPE_DEVMAP_HASH:
- map = READ_ONCE(ri->map);
- if (unlikely(map)) {
+ if (unlikely(flags & BPF_F_BROADCAST)) {
+ map = READ_ONCE(ri->map);
+
+ /* The map pointer is cleared when the map is being torn
+ * down by bpf_clear_redirect_map()
+ */
+ if (unlikely(!map)) {
+ err = -ENOENT;
+ break;
+ }
+
WRITE_ONCE(ri->map, NULL);
err = dev_map_redirect_multi(dev, skb, xdp_prog, map,
- ri->flags & BPF_F_EXCLUDE_INGRESS);
+ flags & BPF_F_EXCLUDE_INGRESS);
} else {
err = dev_map_generic_redirect(fwd, skb, xdp_prog);
}
@@ -4101,9 +4174,11 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
enum bpf_map_type map_type = ri->map_type;
void *fwd = ri->tgt_value;
u32 map_id = ri->map_id;
+ u32 flags = ri->flags;
int err;
ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
+ ri->flags = 0;
ri->map_type = BPF_MAP_TYPE_UNSPEC;
if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
@@ -4123,7 +4198,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
return 0;
}
- return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id);
+ return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id, flags);
err:
_trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
return err;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index dcddc54d0840..a209db33fa5f 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -68,12 +68,15 @@ DEFINE_COOKIE(net_cookie);
static struct net_generic *net_alloc_generic(void)
{
+ unsigned int gen_ptrs = READ_ONCE(max_gen_ptrs);
+ unsigned int generic_size;
struct net_generic *ng;
- unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
+
+ generic_size = offsetof(struct net_generic, ptr[gen_ptrs]);
ng = kzalloc(generic_size, GFP_KERNEL);
if (ng)
- ng->s.len = max_gen_ptrs;
+ ng->s.len = gen_ptrs;
return ng;
}
@@ -1211,7 +1214,11 @@ static int register_pernet_operations(struct list_head *list,
if (error < 0)
return error;
*ops->id = error;
- max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1);
+ /* This does not require READ_ONCE as writers already hold
+ * pernet_ops_rwsem. But WRITE_ONCE is needed to protect
+ * net_alloc_generic.
+ */
+ WRITE_ONCE(max_gen_ptrs, max(max_gen_ptrs, *ops->id + 1));
}
error = __register_pernet_operations(list, ops);
if (error) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ef218e290dfb..d25632fbfa89 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2383,7 +2383,7 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) {
if (nla_type(attr) != IFLA_VF_VLAN_INFO ||
- nla_len(attr) < NLA_HDRLEN) {
+ nla_len(attr) < sizeof(struct ifla_vf_vlan_info)) {
return -EINVAL;
}
if (len >= MAX_VLAN_LIST_LEN)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a42431860af9..4ec8cfd357eb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1583,11 +1583,17 @@ static inline int skb_alloc_rx_flag(const struct sk_buff *skb)
struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
{
- int headerlen = skb_headroom(skb);
- unsigned int size = skb_end_offset(skb) + skb->data_len;
- struct sk_buff *n = __alloc_skb(size, gfp_mask,
- skb_alloc_rx_flag(skb), NUMA_NO_NODE);
+ struct sk_buff *n;
+ unsigned int size;
+ int headerlen;
+
+ if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST))
+ return NULL;
+ headerlen = skb_headroom(skb);
+ size = skb_end_offset(skb) + skb->data_len;
+ n = __alloc_skb(size, gfp_mask,
+ skb_alloc_rx_flag(skb), NUMA_NO_NODE);
if (!n)
return NULL;
@@ -1899,12 +1905,17 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
/*
* Allocate the copy buffer
*/
- struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom,
- gfp_mask, skb_alloc_rx_flag(skb),
- NUMA_NO_NODE);
- int oldheadroom = skb_headroom(skb);
int head_copy_len, head_copy_off;
+ struct sk_buff *n;
+ int oldheadroom;
+
+ if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST))
+ return NULL;
+ oldheadroom = skb_headroom(skb);
+ n = __alloc_skb(newheadroom + skb->len + newtailroom,
+ gfp_mask, skb_alloc_rx_flag(skb),
+ NUMA_NO_NODE);
if (!n)
return NULL;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 9cd14212dcd0..ec8671eccae0 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -481,8 +481,6 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
msg_rx = sk_psock_peek_msg(psock);
}
out:
- if (psock->work_state.skb && copied > 0)
- schedule_work(&psock->work);
return copied;
}
EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
@@ -617,42 +615,33 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
static void sk_psock_skb_state(struct sk_psock *psock,
struct sk_psock_work_state *state,
- struct sk_buff *skb,
int len, int off)
{
spin_lock_bh(&psock->ingress_lock);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
- state->skb = skb;
state->len = len;
state->off = off;
- } else {
- sock_drop(psock->sk, skb);
}
spin_unlock_bh(&psock->ingress_lock);
}
static void sk_psock_backlog(struct work_struct *work)
{
- struct sk_psock *psock = container_of(work, struct sk_psock, work);
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct sk_psock *psock = container_of(dwork, struct sk_psock, work);
struct sk_psock_work_state *state = &psock->work_state;
struct sk_buff *skb = NULL;
+ u32 len = 0, off = 0;
bool ingress;
- u32 len, off;
int ret;
mutex_lock(&psock->work_mutex);
- if (unlikely(state->skb)) {
- spin_lock_bh(&psock->ingress_lock);
- skb = state->skb;
+ if (unlikely(state->len)) {
len = state->len;
off = state->off;
- state->skb = NULL;
- spin_unlock_bh(&psock->ingress_lock);
}
- if (skb)
- goto start;
- while ((skb = skb_dequeue(&psock->ingress_skb))) {
+ while ((skb = skb_peek(&psock->ingress_skb))) {
len = skb->len;
off = 0;
if (skb_bpf_strparser(skb)) {
@@ -661,7 +650,6 @@ static void sk_psock_backlog(struct work_struct *work)
off = stm->offset;
len = stm->full_len;
}
-start:
ingress = skb_bpf_ingress(skb);
skb_bpf_redirect_clear(skb);
do {
@@ -671,22 +659,28 @@ start:
len, ingress);
if (ret <= 0) {
if (ret == -EAGAIN) {
- sk_psock_skb_state(psock, state, skb,
- len, off);
+ sk_psock_skb_state(psock, state, len, off);
+
+ /* Delay slightly to prioritize any
+ * other work that might be here.
+ */
+ if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+ schedule_delayed_work(&psock->work, 1);
goto end;
}
/* Hard errors break pipe and stop xmit. */
sk_psock_report_error(psock, ret ? -ret : EPIPE);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
- sock_drop(psock->sk, skb);
goto end;
}
off += ret;
len -= ret;
} while (len);
- if (!ingress)
+ skb = skb_dequeue(&psock->ingress_skb);
+ if (!ingress) {
kfree_skb(skb);
+ }
}
end:
mutex_unlock(&psock->work_mutex);
@@ -727,7 +721,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
INIT_LIST_HEAD(&psock->link);
spin_lock_init(&psock->link_lock);
- INIT_WORK(&psock->work, sk_psock_backlog);
+ INIT_DELAYED_WORK(&psock->work, sk_psock_backlog);
mutex_init(&psock->work_mutex);
INIT_LIST_HEAD(&psock->ingress_msg);
spin_lock_init(&psock->ingress_lock);
@@ -779,11 +773,6 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
skb_bpf_redirect_clear(skb);
sock_drop(psock->sk, skb);
}
- kfree_skb(psock->work_state.skb);
- /* We null the skb here to ensure that calls to sk_psock_backlog
- * do not pick up the free'd skb.
- */
- psock->work_state.skb = NULL;
__sk_psock_purge_ingress_msg(psock);
}
@@ -802,7 +791,6 @@ void sk_psock_stop(struct sk_psock *psock)
spin_lock_bh(&psock->ingress_lock);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
sk_psock_cork_free(psock);
- __sk_psock_zap_ingress(psock);
spin_unlock_bh(&psock->ingress_lock);
}
@@ -816,7 +804,8 @@ static void sk_psock_destroy(struct work_struct *work)
sk_psock_done_strp(psock);
- cancel_work_sync(&psock->work);
+ cancel_delayed_work_sync(&psock->work);
+ __sk_psock_zap_ingress(psock);
mutex_destroy(&psock->work_mutex);
psock_progs_drop(&psock->progs);
@@ -931,7 +920,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
}
skb_queue_tail(&psock_other->ingress_skb, skb);
- schedule_work(&psock_other->work);
+ schedule_delayed_work(&psock_other->work, 0);
spin_unlock_bh(&psock_other->ingress_lock);
return 0;
}
@@ -1011,7 +1000,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
spin_lock_bh(&psock->ingress_lock);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
skb_queue_tail(&psock->ingress_skb, skb);
- schedule_work(&psock->work);
+ schedule_delayed_work(&psock->work, 0);
err = 0;
}
spin_unlock_bh(&psock->ingress_lock);
@@ -1042,7 +1031,7 @@ static void sk_psock_write_space(struct sock *sk)
psock = sk_psock(sk);
if (likely(psock)) {
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
- schedule_work(&psock->work);
+ schedule_delayed_work(&psock->work, 0);
write_space = psock->saved_write_space;
}
rcu_read_unlock();
diff --git a/net/core/sock.c b/net/core/sock.c
index b41d5e6953c7..399d0b801b09 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -459,7 +459,7 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
unsigned long flags;
struct sk_buff_head *list = &sk->sk_receive_queue;
- if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
+ if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) {
atomic_inc(&sk->sk_drops);
trace_sock_rcvqueue_full(sk, skb);
return -ENOMEM;
@@ -511,7 +511,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
skb->dev = NULL;
- if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
+ if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) {
atomic_inc(&sk->sk_drops);
goto discard_and_relse;
}
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 4e42bc679bac..2ded250ac0d2 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1577,9 +1577,10 @@ void sock_map_close(struct sock *sk, long timeout)
rcu_read_unlock();
sk_psock_stop(psock);
release_sock(sk);
- cancel_work_sync(&psock->work);
+ cancel_delayed_work_sync(&psock->work);
sk_psock_put(sk, psock);
}
+
/* Make sure we do not recurse. This is a bug.
* Leak the socket instead of crashing on a stack overflow.
*/
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 17033e0cfecb..e348b69ef0f5 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2692,7 +2692,7 @@ void tcp_shutdown(struct sock *sk, int how)
/* If we've already sent a FIN, or it's a closed state, skip this. */
if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_SYN_SENT |
- TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) {
+ TCPF_CLOSE_WAIT)) {
/* Clear out any half completed packets. FIN if needed. */
if (tcp_close_state(sk))
tcp_send_fin(sk);
@@ -2803,7 +2803,7 @@ void __tcp_close(struct sock *sk, long timeout)
* machine. State transitions:
*
* TCP_ESTABLISHED -> TCP_FIN_WAIT1
- * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible)
+ * TCP_SYN_RECV -> TCP_FIN_WAIT1 (it is difficult)
* TCP_CLOSE_WAIT -> TCP_LAST_ACK
*
* are legal only when FIN has been sent (i.e. in window),
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index e3a9477293ce..5fdef5ddfbbe 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -174,6 +174,24 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
return ret;
}
+static bool is_next_msg_fin(struct sk_psock *psock)
+{
+ struct scatterlist *sge;
+ struct sk_msg *msg_rx;
+ int i;
+
+ msg_rx = sk_psock_peek_msg(psock);
+ i = msg_rx->sg.start;
+ sge = sk_msg_elem(msg_rx, i);
+ if (!sge->length) {
+ struct sk_buff *skb = msg_rx->skb;
+
+ if (skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+ return true;
+ }
+ return false;
+}
+
static int tcp_bpf_recvmsg_parser(struct sock *sk,
struct msghdr *msg,
size_t len,
@@ -195,8 +213,41 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
lock_sock(sk);
+
+ /* We may have received data on the sk_receive_queue pre-accept and
+ * then we can not use read_skb in this context because we haven't
+ * assigned a sk_socket yet so have no link to the ops. The work-around
+ * is to check the sk_receive_queue and in these cases read skbs off
+ * queue again. The read_skb hook is not running at this point because
+ * of lock_sock so we avoid having multiple runners in read_skb.
+ */
+ if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) {
+ tcp_data_ready(sk);
+ /* This handles the ENOMEM errors if we both receive data
+ * pre accept and are already under memory pressure. At least
+ * let user know to retry.
+ */
+ if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) {
+ copied = -EAGAIN;
+ goto out;
+ }
+ }
+
msg_bytes_ready:
copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
+ /* The typical case for EFAULT is the socket was gracefully
+ * shutdown with a FIN pkt. So check here the other case is
+ * some error on copy_page_to_iter which would be unexpected.
+ * On fin return correct return code to zero.
+ */
+ if (copied == -EFAULT) {
+ bool is_fin = is_next_msg_fin(psock);
+
+ if (is_fin) {
+ copied = 0;
+ goto out;
+ }
+ }
if (!copied) {
long timeo;
int data;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e51b5d887c24..52a9d7f96da4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6543,6 +6543,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
tcp_initialize_rcv_mss(sk);
tcp_fast_path_on(tp);
+ if (sk->sk_shutdown & SEND_SHUTDOWN)
+ tcp_shutdown(sk, SEND_SHUTDOWN);
break;
case TCP_FIN_WAIT1: {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0666be6b9ec9..e162bed1916a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -153,6 +153,12 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
if (tcptw->tw_ts_recent_stamp &&
(!twp || (reuse && time_after32(ktime_get_seconds(),
tcptw->tw_ts_recent_stamp)))) {
+ /* inet_twsk_hashdance() sets sk_refcnt after putting twsk
+ * and releasing the bucket lock.
+ */
+ if (unlikely(!refcount_inc_not_zero(&sktw->sk_refcnt)))
+ return 0;
+
/* In case of repair and re-using TIME-WAIT sockets we still
* want to be sure that it is safe as above but honor the
* sequence numbers and time stamps set as part of the repair
@@ -173,7 +179,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
}
- sock_hold(sktw);
+
return 1;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d8817d6c7b96..0fb84e57a2d4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3441,7 +3441,9 @@ void tcp_send_fin(struct sock *sk)
return;
}
} else {
- skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation);
+ skb = alloc_skb_fclone(MAX_TCP_HEADER,
+ sk_gfp_mask(sk, GFP_ATOMIC |
+ __GFP_NOWARN));
if (unlikely(!skb))
return;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 7c6ac47b0bb1..c61268849948 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -434,6 +434,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
struct sk_buff *p;
unsigned int ulen;
int ret = 0;
+ int flush;
/* requires non zero csum, for symmetry with GSO */
if (!uh->check) {
@@ -467,13 +468,22 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head,
return p;
}
+ flush = NAPI_GRO_CB(p)->flush;
+
+ if (NAPI_GRO_CB(p)->flush_id != 1 ||
+ NAPI_GRO_CB(p)->count != 1 ||
+ !NAPI_GRO_CB(p)->is_atomic)
+ flush |= NAPI_GRO_CB(p)->flush_id;
+ else
+ NAPI_GRO_CB(p)->is_atomic = false;
+
/* Terminate the flow on len mismatch or if it grow "too much".
* Under small packet flood GRO count could elsewhere grow a lot
* leading to excessive truesize values.
* On len mismatch merge the first packet shorter than gso_size,
* otherwise complete the GRO packet.
*/
- if (ulen > ntohs(uh2->len)) {
+ if (ulen > ntohs(uh2->len) || flush) {
pp = p;
} else {
if (NAPI_GRO_CB(skb)->is_flist) {
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index eac206a290d0..1f50517289fd 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -61,7 +61,11 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
ip_send_check(iph);
if (xo && (xo->flags & XFRM_GRO)) {
- skb_mac_header_rebuild(skb);
+ /* The full l2 header needs to be preserved so that re-injecting the packet at l2
+ * works correctly in the presence of vlan tags.
+ */
+ skb_mac_header_rebuild_full(skb, xo->orig_mac_len);
+ skb_reset_network_header(skb);
skb_reset_transport_header(skb);
return 0;
}
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 8e9e80eb0f32..a4caaead74c1 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -232,8 +232,12 @@ static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
rt = pol_lookup_func(lookup,
net, table, flp6, arg->lookup_data, flags);
if (rt != net->ipv6.ip6_null_entry) {
+ struct inet6_dev *idev = ip6_dst_idev(&rt->dst);
+
+ if (!idev)
+ goto again;
err = fib6_rule_saddr(net, rule, flags, flp6,
- ip6_dst_idev(&rt->dst)->dev);
+ idev->dev);
if (err == -EAGAIN)
goto again;
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 4907ab241d6b..7dbefbb338ca 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -56,7 +56,11 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
skb_postpush_rcsum(skb, skb_network_header(skb), nhlen);
if (xo && (xo->flags & XFRM_GRO)) {
- skb_mac_header_rebuild(skb);
+ /* The full l2 header needs to be preserved so that re-injecting the packet at l2
+ * works correctly in the presence of vlan tags.
+ */
+ skb_mac_header_rebuild_full(skb, xo->orig_mac_len);
+ skb_reset_network_header(skb);
skb_reset_transport_header(skb);
return 0;
}
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 6cd97c75445c..9a36e174984c 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -136,6 +136,9 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
/* checksums verified by L2TP */
skb->ip_summed = CHECKSUM_NONE;
+ /* drop outer flow-hash */
+ skb_clear_hash(skb);
+
skb_dst_drop(skb);
nf_reset_ct(skb);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 21549a440b38..03f8c8bdab76 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -113,7 +113,7 @@ struct ieee80211_bss {
};
/**
- * enum ieee80211_corrupt_data_flags - BSS data corruption flags
+ * enum ieee80211_bss_corrupt_data_flags - BSS data corruption flags
* @IEEE80211_BSS_CORRUPT_BEACON: last beacon frame received was corrupted
* @IEEE80211_BSS_CORRUPT_PROBE_RESP: last probe response received was corrupted
*
@@ -126,7 +126,7 @@ enum ieee80211_bss_corrupt_data_flags {
};
/**
- * enum ieee80211_valid_data_flags - BSS valid data flags
+ * enum ieee80211_bss_valid_data_flags - BSS valid data flags
* @IEEE80211_BSS_VALID_WMM: WMM/UAPSD data was gathered from non-corrupt IE
* @IEEE80211_BSS_VALID_RATES: Supported rates were gathered from non-corrupt IE
* @IEEE80211_BSS_VALID_ERP: ERP flag was gathered from non-corrupt IE
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index cde62dafda49..3c3f630f4943 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3395,6 +3395,9 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
mptcp_subflow_early_fallback(msk, subflow);
}
+
+ WRITE_ONCE(msk->write_seq, subflow->idsn);
+ WRITE_ONCE(msk->snd_nxt, subflow->idsn);
if (likely(!__mptcp_check_fallback(msk)))
MPTCP_INC_STATS(sock_net(sock->sk), MPTCP_MIB_MPCAPABLEACTIVE);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 82df02695bbd..216445dd44db 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -352,7 +352,7 @@ static void netlink_overrun(struct sock *sk)
if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) {
if (!test_and_set_bit(NETLINK_S_CONGESTED,
&nlk_sk(sk)->state)) {
- sk->sk_err = ENOBUFS;
+ WRITE_ONCE(sk->sk_err, ENOBUFS);
sk_error_report(sk);
}
}
@@ -1591,7 +1591,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
goto out;
}
- sk->sk_err = p->code;
+ WRITE_ONCE(sk->sk_err, p->code);
sk_error_report(sk);
out:
return ret;
@@ -1935,7 +1935,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
int noblock = flags & MSG_DONTWAIT;
- size_t copied;
+ size_t copied, max_recvmsg_len;
struct sk_buff *skb, *data_skb;
int err, ret;
@@ -1968,9 +1968,10 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
#endif
/* Record the max length of recvmsg() calls for future allocations */
- nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len);
- nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len,
- SKB_WITH_OVERHEAD(32768));
+ max_recvmsg_len = max(READ_ONCE(nlk->max_recvmsg_len), len);
+ max_recvmsg_len = min_t(size_t, max_recvmsg_len,
+ SKB_WITH_OVERHEAD(32768));
+ WRITE_ONCE(nlk->max_recvmsg_len, max_recvmsg_len);
copied = data_skb->len;
if (len < copied) {
@@ -2005,7 +2006,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
ret = netlink_dump(sk);
if (ret) {
- sk->sk_err = -ret;
+ WRITE_ONCE(sk->sk_err, -ret);
sk_error_report(sk);
}
}
@@ -2219,6 +2220,7 @@ static int netlink_dump(struct sock *sk)
struct netlink_ext_ack extack = {};
struct netlink_callback *cb;
struct sk_buff *skb = NULL;
+ size_t max_recvmsg_len;
struct module *module;
int err = -ENOBUFS;
int alloc_min_size;
@@ -2241,8 +2243,9 @@ static int netlink_dump(struct sock *sk)
cb = &nlk->cb;
alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
- if (alloc_min_size < nlk->max_recvmsg_len) {
- alloc_size = nlk->max_recvmsg_len;
+ max_recvmsg_len = READ_ONCE(nlk->max_recvmsg_len);
+ if (alloc_min_size < max_recvmsg_len) {
+ alloc_size = max_recvmsg_len;
skb = alloc_skb(alloc_size,
(GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) |
__GFP_NOWARN | __GFP_NORETRY);
@@ -2439,7 +2442,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
skb = nlmsg_new(payload + tlvlen, GFP_KERNEL);
if (!skb) {
- NETLINK_CB(in_skb).sk->sk_err = ENOBUFS;
+ WRITE_ONCE(NETLINK_CB(in_skb).sk->sk_err, ENOBUFS);
sk_error_report(NETLINK_CB(in_skb).sk);
return;
}
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index 0f23e5e8e03e..3e0fc71d95a1 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c
@@ -76,13 +76,15 @@ EXPORT_SYMBOL_GPL(nsh_pop);
static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
+ unsigned int outer_hlen, mac_len, nsh_len;
struct sk_buff *segs = ERR_PTR(-EINVAL);
u16 mac_offset = skb->mac_header;
- unsigned int nsh_len, mac_len;
- __be16 proto;
+ __be16 outer_proto, proto;
skb_reset_network_header(skb);
+ outer_proto = skb->protocol;
+ outer_hlen = skb_mac_header_len(skb);
mac_len = skb->mac_len;
if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN)))
@@ -112,10 +114,10 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
}
for (skb = segs; skb; skb = skb->next) {
- skb->protocol = htons(ETH_P_NSH);
- __skb_push(skb, nsh_len);
- skb->mac_header = mac_offset;
- skb->network_header = skb->mac_header + mac_len;
+ skb->protocol = outer_proto;
+ __skb_push(skb, nsh_len + outer_hlen);
+ skb_reset_mac_header(skb);
+ skb_set_network_header(skb, outer_hlen);
skb->mac_len = mac_len;
}
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 59aebe296890..dd4c7e9a634f 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -193,7 +193,7 @@ void rtm_phonet_notify(int event, struct net_device *dev, u8 dst)
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) +
+ skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct rtmsg)) +
nla_total_size(1) + nla_total_size(4), GFP_KERNEL);
if (skb == NULL)
goto errout;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index af1ca707c3d3..59fd6dedbbed 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -398,7 +398,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
clnt->cl_maxproc = version->nrprocs;
clnt->cl_prog = args->prognumber ? : program->number;
clnt->cl_vers = version->number;
- clnt->cl_stats = program->stats;
+ clnt->cl_stats = args->stats ? : program->stats;
clnt->cl_metrics = rpc_alloc_iostats(clnt);
rpc_init_pipe_dir_head(&clnt->cl_pipedir_objects);
err = -ENOMEM;
@@ -677,6 +677,7 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt)
.version = clnt->cl_vers,
.authflavor = clnt->cl_auth->au_flavor,
.cred = clnt->cl_cred,
+ .stats = clnt->cl_stats,
};
return __rpc_clone_client(&args, clnt);
}
@@ -699,6 +700,7 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
.version = clnt->cl_vers,
.authflavor = flavor,
.cred = clnt->cl_cred,
+ .stats = clnt->cl_stats,
};
return __rpc_clone_client(&args, clnt);
}
@@ -979,6 +981,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
.version = vers,
.authflavor = old->cl_auth->au_flavor,
.cred = old->cl_cred,
+ .stats = old->cl_stats,
};
struct rpc_clnt *clnt;
int err;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d3ea5b4c854f..2e8367cc528a 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -700,8 +700,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
/* Made progress, don't sleep yet */
continue;
- set_current_state(TASK_INTERRUPTIBLE);
- if (signalled() || kthread_should_stop()) {
+ set_current_state(TASK_IDLE);
+ if (kthread_should_stop()) {
set_current_state(TASK_RUNNING);
return -EINTR;
}
@@ -736,7 +736,7 @@ rqst_should_sleep(struct svc_rqst *rqstp)
return false;
/* are we shutting down? */
- if (signalled() || kthread_should_stop())
+ if (kthread_should_stop())
return false;
/* are we freezing? */
@@ -758,11 +758,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
if (rqstp->rq_xprt)
goto out_found;
- /*
- * We have to be able to interrupt this wait
- * to bring down the daemons ...
- */
- set_current_state(TASK_INTERRUPTIBLE);
+ set_current_state(TASK_IDLE);
smp_mb__before_atomic();
clear_bit(SP_CONGESTED, &pool->sp_flags);
clear_bit(RQ_BUSY, &rqstp->rq_flags);
@@ -784,7 +780,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
if (!time_left)
atomic_long_inc(&pool->sp_stats.threads_timedout);
- if (signalled() || kthread_should_stop())
+ if (kthread_should_stop())
return ERR_PTR(-EINTR);
return ERR_PTR(-EAGAIN);
out_found:
@@ -882,7 +878,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
try_to_freeze();
cond_resched();
err = -EINTR;
- if (signalled() || kthread_should_stop())
+ if (kthread_should_stop())
goto out;
xprt = svc_get_next_xprt(rqstp, timeout);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 5c9fd4791c4b..76284fc538eb 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -142,9 +142,9 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
if (fragid == FIRST_FRAGMENT) {
if (unlikely(head))
goto err;
- *buf = NULL;
if (skb_has_frag_list(frag) && __skb_linearize(frag))
goto err;
+ *buf = NULL;
frag = skb_unshare(frag, GFP_ATOMIC);
if (unlikely(!frag))
goto err;
@@ -156,6 +156,11 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
if (!head)
goto err;
+ /* Either the input skb ownership is transferred to headskb
+ * or the input skb is freed, clear the reference to avoid
+ * bad access on error path.
+ */
+ *buf = NULL;
if (skb_try_coalesce(head, frag, &headstolen, &delta)) {
kfree_skb_partial(frag, headstolen);
} else {
@@ -179,7 +184,6 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf)
*headbuf = NULL;
return 1;
}
- *buf = NULL;
return 0;
err:
kfree_skb(*buf);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index fc55b65695e5..90f6cbe5cd5d 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -174,7 +174,17 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
struct scatterlist *sg;
struct sk_buff *skb;
unsigned int pages;
- int pending;
+
+ /* If requests get too backlogged crypto API returns -EBUSY and calls
+ * ->complete(-EINPROGRESS) immediately followed by ->complete(0)
+ * to make waiting for backlog to flush with crypto_wait_req() easier.
+ * First wait converts -EBUSY -> -EINPROGRESS, and the second one
+ * -EINPROGRESS -> 0.
+ * We have a single struct crypto_async_request per direction, this
+ * scheme doesn't help us, so just ignore the first ->complete().
+ */
+ if (err == -EINPROGRESS)
+ return;
skb = (struct sk_buff *)req->data;
tls_ctx = tls_get_ctx(skb->sk);
@@ -221,12 +231,17 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err)
kfree(aead_req);
- spin_lock_bh(&ctx->decrypt_compl_lock);
- pending = atomic_dec_return(&ctx->decrypt_pending);
-
- if (!pending && ctx->async_notify)
+ if (atomic_dec_and_test(&ctx->decrypt_pending))
complete(&ctx->async_wait.completion);
- spin_unlock_bh(&ctx->decrypt_compl_lock);
+}
+
+static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx)
+{
+ if (!atomic_dec_and_test(&ctx->decrypt_pending))
+ crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ atomic_inc(&ctx->decrypt_pending);
+
+ return ctx->async_wait.err;
}
static int tls_do_decryption(struct sock *sk,
@@ -260,6 +275,7 @@ static int tls_do_decryption(struct sock *sk,
aead_request_set_callback(aead_req,
CRYPTO_TFM_REQ_MAY_BACKLOG,
tls_decrypt_done, skb);
+ BUILD_BUG_ON_INVALID(atomic_read(&ctx->decrypt_pending) < 1);
atomic_inc(&ctx->decrypt_pending);
} else {
aead_request_set_callback(aead_req,
@@ -268,6 +284,10 @@ static int tls_do_decryption(struct sock *sk,
}
ret = crypto_aead_decrypt(aead_req);
+ if (ret == -EBUSY) {
+ ret = tls_decrypt_async_wait(ctx);
+ ret = ret ?: -EINPROGRESS;
+ }
if (ret == -EINPROGRESS) {
if (darg->async)
return 0;
@@ -449,7 +469,9 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
struct sk_msg *msg_en;
struct tls_rec *rec;
bool ready = false;
- int pending;
+
+ if (err == -EINPROGRESS) /* see the comment in tls_decrypt_done() */
+ return;
rec = container_of(aead_req, struct tls_rec, aead_req);
msg_en = &rec->msg_encrypted;
@@ -484,12 +506,8 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
ready = true;
}
- spin_lock_bh(&ctx->encrypt_compl_lock);
- pending = atomic_dec_return(&ctx->encrypt_pending);
-
- if (!pending && ctx->async_notify)
+ if (atomic_dec_and_test(&ctx->encrypt_pending))
complete(&ctx->async_wait.completion);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
if (!ready)
return;
@@ -499,6 +517,15 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
schedule_delayed_work(&ctx->tx_work.work, 1);
}
+static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx)
+{
+ if (!atomic_dec_and_test(&ctx->encrypt_pending))
+ crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ atomic_inc(&ctx->encrypt_pending);
+
+ return ctx->async_wait.err;
+}
+
static int tls_do_encryption(struct sock *sk,
struct tls_context *tls_ctx,
struct tls_sw_context_tx *ctx,
@@ -538,9 +565,14 @@ static int tls_do_encryption(struct sock *sk,
/* Add the record in tx_list */
list_add_tail((struct list_head *)&rec->list, &ctx->tx_list);
+ BUILD_BUG_ON_INVALID(atomic_read(&ctx->encrypt_pending) < 1);
atomic_inc(&ctx->encrypt_pending);
rc = crypto_aead_encrypt(aead_req);
+ if (rc == -EBUSY) {
+ rc = tls_encrypt_async_wait(ctx);
+ rc = rc ?: -EINPROGRESS;
+ }
if (!rc || rc != -EINPROGRESS) {
atomic_dec(&ctx->encrypt_pending);
sge->offset -= prot->prepend_size;
@@ -949,7 +981,6 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
int num_zc = 0;
int orig_size;
int ret = 0;
- int pending;
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
MSG_CMSG_COMPAT))
@@ -1118,24 +1149,12 @@ trim_sgl:
if (!num_async) {
goto send_end;
} else if (num_zc) {
- /* Wait for pending encryptions to get completed */
- spin_lock_bh(&ctx->encrypt_compl_lock);
- ctx->async_notify = true;
-
- pending = atomic_read(&ctx->encrypt_pending);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
- if (pending)
- crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
- else
- reinit_completion(&ctx->async_wait.completion);
-
- /* There can be no concurrent accesses, since we have no
- * pending encrypt operations
- */
- WRITE_ONCE(ctx->async_notify, false);
+ int err;
- if (ctx->async_wait.err) {
- ret = ctx->async_wait.err;
+ /* Wait for pending encryptions to get completed */
+ err = tls_encrypt_async_wait(ctx);
+ if (err) {
+ ret = err;
copied = 0;
}
}
@@ -1913,31 +1932,16 @@ pick_next_record:
recv_end:
if (async) {
- int pending;
-
/* Wait for all previously submitted records to be decrypted */
- spin_lock_bh(&ctx->decrypt_compl_lock);
- ctx->async_notify = true;
- pending = atomic_read(&ctx->decrypt_pending);
- spin_unlock_bh(&ctx->decrypt_compl_lock);
- if (pending) {
- err = crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
- if (err) {
- /* one of async decrypt failed */
- tls_err_abort(sk, err);
- copied = 0;
- decrypted = 0;
- goto end;
- }
- } else {
- reinit_completion(&ctx->async_wait.completion);
+ err = tls_decrypt_async_wait(ctx);
+ if (err) {
+ /* one of async decrypt failed */
+ tls_err_abort(sk, err);
+ copied = 0;
+ decrypted = 0;
+ goto end;
}
- /* There can be no concurrent accesses, since we have no
- * pending decrypt operations
- */
- WRITE_ONCE(ctx->async_notify, false);
-
/* Drain records from the rx_list & copy if required */
if (is_peek || is_kvec)
err = process_rx_list(ctx, msg, &control, copied,
@@ -2154,16 +2158,9 @@ void tls_sw_release_resources_tx(struct sock *sk)
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
struct tls_rec *rec, *tmp;
- int pending;
/* Wait for any pending async encryptions to complete */
- spin_lock_bh(&ctx->encrypt_compl_lock);
- ctx->async_notify = true;
- pending = atomic_read(&ctx->encrypt_pending);
- spin_unlock_bh(&ctx->encrypt_compl_lock);
-
- if (pending)
- crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ tls_encrypt_async_wait(ctx);
tls_tx_records(sk, -1);
@@ -2301,6 +2298,46 @@ void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx)
strp_check_rcv(&rx_ctx->strp);
}
+static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct sock *sk)
+{
+ struct tls_sw_context_tx *sw_ctx_tx;
+
+ if (!ctx->priv_ctx_tx) {
+ sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL);
+ if (!sw_ctx_tx)
+ return NULL;
+ } else {
+ sw_ctx_tx = ctx->priv_ctx_tx;
+ }
+
+ crypto_init_wait(&sw_ctx_tx->async_wait);
+ atomic_set(&sw_ctx_tx->encrypt_pending, 1);
+ INIT_LIST_HEAD(&sw_ctx_tx->tx_list);
+ INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler);
+ sw_ctx_tx->tx_work.sk = sk;
+
+ return sw_ctx_tx;
+}
+
+static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx)
+{
+ struct tls_sw_context_rx *sw_ctx_rx;
+
+ if (!ctx->priv_ctx_rx) {
+ sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL);
+ if (!sw_ctx_rx)
+ return NULL;
+ } else {
+ sw_ctx_rx = ctx->priv_ctx_rx;
+ }
+
+ crypto_init_wait(&sw_ctx_rx->async_wait);
+ atomic_set(&sw_ctx_rx->decrypt_pending, 1);
+ skb_queue_head_init(&sw_ctx_rx->rx_list);
+
+ return sw_ctx_rx;
+}
+
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
@@ -2327,46 +2364,22 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx)
}
if (tx) {
- if (!ctx->priv_ctx_tx) {
- sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL);
- if (!sw_ctx_tx) {
- rc = -ENOMEM;
- goto out;
- }
- ctx->priv_ctx_tx = sw_ctx_tx;
- } else {
- sw_ctx_tx =
- (struct tls_sw_context_tx *)ctx->priv_ctx_tx;
- }
- } else {
- if (!ctx->priv_ctx_rx) {
- sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL);
- if (!sw_ctx_rx) {
- rc = -ENOMEM;
- goto out;
- }
- ctx->priv_ctx_rx = sw_ctx_rx;
- } else {
- sw_ctx_rx =
- (struct tls_sw_context_rx *)ctx->priv_ctx_rx;
- }
- }
+ ctx->priv_ctx_tx = init_ctx_tx(ctx, sk);
+ if (!ctx->priv_ctx_tx)
+ return -ENOMEM;
- if (tx) {
- crypto_init_wait(&sw_ctx_tx->async_wait);
- spin_lock_init(&sw_ctx_tx->encrypt_compl_lock);
+ sw_ctx_tx = ctx->priv_ctx_tx;
crypto_info = &ctx->crypto_send.info;
cctx = &ctx->tx;
aead = &sw_ctx_tx->aead_send;
- INIT_LIST_HEAD(&sw_ctx_tx->tx_list);
- INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler);
- sw_ctx_tx->tx_work.sk = sk;
} else {
- crypto_init_wait(&sw_ctx_rx->async_wait);
- spin_lock_init(&sw_ctx_rx->decrypt_compl_lock);
+ ctx->priv_ctx_rx = init_ctx_rx(ctx);
+ if (!ctx->priv_ctx_rx)
+ return -ENOMEM;
+
+ sw_ctx_rx = ctx->priv_ctx_rx;
crypto_info = &ctx->crypto_recv.info;
cctx = &ctx->rx;
- skb_queue_head_init(&sw_ctx_rx->rx_list);
aead = &sw_ctx_rx->aead_recv;
}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 99149b10f86f..d758ec565589 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -12890,6 +12890,8 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info)
error:
for (i = 0; i < new_coalesce.n_rules; i++) {
tmp_rule = &new_coalesce.rules[i];
+ if (!tmp_rule)
+ continue;
for (j = 0; j < tmp_rule->n_patterns; j++)
kfree(tmp_rule->patterns[j].mask);
kfree(tmp_rule->patterns);
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 19b78d472283..dafea8bfcf3c 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -963,7 +963,7 @@ TRACE_EVENT(rdev_get_mpp,
TRACE_EVENT(rdev_dump_mpp,
TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx,
u8 *dst, u8 *mpp),
- TP_ARGS(wiphy, netdev, _idx, mpp, dst),
+ TP_ARGS(wiphy, netdev, _idx, dst, mpp),
TP_STRUCT__entry(
WIPHY_ENTRY
NETDEV_ENTRY
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index a6861832710d..7f326a01cbce 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -400,11 +400,15 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
*/
static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
{
+ struct xfrm_offload *xo = xfrm_offload(skb);
int ihl = skb->data - skb_transport_header(skb);
if (skb->transport_header != skb->network_header) {
memmove(skb_transport_header(skb),
skb_network_header(skb), ihl);
+ if (xo)
+ xo->orig_mac_len =
+ skb_mac_header_was_set(skb) ? skb_mac_header_len(skb) : 0;
skb->network_header = skb->transport_header;
}
ip_hdr(skb)->tot_len = htons(skb->len + ihl);
@@ -415,11 +419,15 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb)
static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_IPV6)
+ struct xfrm_offload *xo = xfrm_offload(skb);
int ihl = skb->data - skb_transport_header(skb);
if (skb->transport_header != skb->network_header) {
memmove(skb_transport_header(skb),
skb_network_header(skb), ihl);
+ if (xo)
+ xo->orig_mac_len =
+ skb_mac_header_was_set(skb) ? skb_mac_header_len(skb) : 0;
skb->network_header = skb->transport_header;
}
ipv6_hdr(skb)->payload_len = htons(skb->len + ihl -