diff options
Diffstat (limited to 'net')
32 files changed, 418 insertions, 203 deletions
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 11bfc8737e6c..900b35297585 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -435,6 +435,9 @@ static void l2cap_chan_timeout(struct work_struct *work) BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); + if (!conn) + return; + mutex_lock(&conn->chan_lock); /* __set_chan_timer() calls l2cap_chan_hold(chan) while scheduling * this work. No need to call l2cap_chan_hold(chan) here again. diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 57c6a4f845a3..431e09cac178 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -83,6 +83,10 @@ static void sco_sock_timeout(struct work_struct *work) struct sock *sk; sco_conn_lock(conn); + if (!conn->hcon) { + sco_conn_unlock(conn); + return; + } sk = conn->sk; if (sk) sock_hold(sk); diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 011bd3c59da1..1b66c276118a 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -253,6 +253,7 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, { struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev; const unsigned char *src = eth_hdr(skb)->h_source; + struct sk_buff *nskb; if (!should_deliver(p, skb)) return; @@ -261,12 +262,16 @@ static void maybe_deliver_addr(struct net_bridge_port *p, struct sk_buff *skb, if (skb->dev == p->dev && ether_addr_equal(src, addr)) return; - skb = skb_copy(skb, GFP_ATOMIC); - if (!skb) { + __skb_push(skb, ETH_HLEN); + nskb = pskb_copy(skb, GFP_ATOMIC); + __skb_pull(skb, ETH_HLEN); + if (!nskb) { DEV_STATS_INC(dev, tx_dropped); return; } + skb = nskb; + __skb_pull(skb, ETH_HLEN); if (!is_broadcast_ether_addr(addr)) memcpy(eth_hdr(skb)->h_dest, addr, ETH_ALEN); diff --git a/net/core/filter.c b/net/core/filter.c index 457d1a164ad5..47eb1bd47aa6 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3987,37 +3987,75 @@ u32 xdp_master_redirect(struct xdp_buff *xdp) } EXPORT_SYMBOL_GPL(xdp_master_redirect); -int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) +static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri, + struct net_device *dev, + struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; u32 map_id = ri->map_id; + int err; + + ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->map_type = BPF_MAP_TYPE_UNSPEC; + + err = __xsk_map_redirect(fwd, xdp); + if (unlikely(err)) + goto err; + + _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index); + return 0; +err: + _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err); + return err; +} + +static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri, + struct net_device *dev, + struct xdp_frame *xdpf, + struct bpf_prog *xdp_prog) +{ + enum bpf_map_type map_type = ri->map_type; + void *fwd = ri->tgt_value; + u32 map_id = ri->map_id; + u32 flags = ri->flags; struct bpf_map *map; int err; ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->flags = 0; ri->map_type = BPF_MAP_TYPE_UNSPEC; + if (unlikely(!xdpf)) { + err = -EOVERFLOW; + goto err; + } + switch (map_type) { case BPF_MAP_TYPE_DEVMAP: fallthrough; case BPF_MAP_TYPE_DEVMAP_HASH: - map = READ_ONCE(ri->map); - if (unlikely(map)) { + if (unlikely(flags & BPF_F_BROADCAST)) { + map = READ_ONCE(ri->map); + + /* The map pointer is cleared when the map is being torn + * down by bpf_clear_redirect_map() + */ + if (unlikely(!map)) { + err = -ENOENT; + break; + } + WRITE_ONCE(ri->map, NULL); - err = dev_map_enqueue_multi(xdp, dev, map, - ri->flags & BPF_F_EXCLUDE_INGRESS); + err = dev_map_enqueue_multi(xdpf, dev, map, + flags & BPF_F_EXCLUDE_INGRESS); } else { - err = dev_map_enqueue(fwd, xdp, dev); + err = dev_map_enqueue(fwd, xdpf, dev); } break; case BPF_MAP_TYPE_CPUMAP: - err = cpu_map_enqueue(fwd, xdp, dev); - break; - case BPF_MAP_TYPE_XSKMAP: - err = __xsk_map_redirect(fwd, xdp); + err = cpu_map_enqueue(fwd, xdpf, dev); break; case BPF_MAP_TYPE_UNSPEC: if (map_id == INT_MAX) { @@ -4026,7 +4064,7 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, err = -EINVAL; break; } - err = dev_xdp_enqueue(fwd, xdp, dev); + err = dev_xdp_enqueue(fwd, xdpf, dev); break; } fallthrough; @@ -4043,14 +4081,40 @@ err: _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err); return err; } + +int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) +{ + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + enum bpf_map_type map_type = ri->map_type; + + if (map_type == BPF_MAP_TYPE_XSKMAP) + return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog); + + return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp), + xdp_prog); +} EXPORT_SYMBOL_GPL(xdp_do_redirect); +int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp, + struct xdp_frame *xdpf, struct bpf_prog *xdp_prog) +{ + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + enum bpf_map_type map_type = ri->map_type; + + if (map_type == BPF_MAP_TYPE_XSKMAP) + return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog); + + return __xdp_do_redirect_frame(ri, dev, xdpf, xdp_prog); +} +EXPORT_SYMBOL_GPL(xdp_do_redirect_frame); + static int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, - void *fwd, - enum bpf_map_type map_type, u32 map_id) + struct bpf_prog *xdp_prog, void *fwd, + enum bpf_map_type map_type, u32 map_id, + u32 flags) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); struct bpf_map *map; @@ -4060,11 +4124,20 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, case BPF_MAP_TYPE_DEVMAP: fallthrough; case BPF_MAP_TYPE_DEVMAP_HASH: - map = READ_ONCE(ri->map); - if (unlikely(map)) { + if (unlikely(flags & BPF_F_BROADCAST)) { + map = READ_ONCE(ri->map); + + /* The map pointer is cleared when the map is being torn + * down by bpf_clear_redirect_map() + */ + if (unlikely(!map)) { + err = -ENOENT; + break; + } + WRITE_ONCE(ri->map, NULL); err = dev_map_redirect_multi(dev, skb, xdp_prog, map, - ri->flags & BPF_F_EXCLUDE_INGRESS); + flags & BPF_F_EXCLUDE_INGRESS); } else { err = dev_map_generic_redirect(fwd, skb, xdp_prog); } @@ -4101,9 +4174,11 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; u32 map_id = ri->map_id; + u32 flags = ri->flags; int err; ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->flags = 0; ri->map_type = BPF_MAP_TYPE_UNSPEC; if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) { @@ -4123,7 +4198,7 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, return 0; } - return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id); + return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id, flags); err: _trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err); return err; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index dcddc54d0840..a209db33fa5f 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -68,12 +68,15 @@ DEFINE_COOKIE(net_cookie); static struct net_generic *net_alloc_generic(void) { + unsigned int gen_ptrs = READ_ONCE(max_gen_ptrs); + unsigned int generic_size; struct net_generic *ng; - unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); + + generic_size = offsetof(struct net_generic, ptr[gen_ptrs]); ng = kzalloc(generic_size, GFP_KERNEL); if (ng) - ng->s.len = max_gen_ptrs; + ng->s.len = gen_ptrs; return ng; } @@ -1211,7 +1214,11 @@ static int register_pernet_operations(struct list_head *list, if (error < 0) return error; *ops->id = error; - max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1); + /* This does not require READ_ONCE as writers already hold + * pernet_ops_rwsem. But WRITE_ONCE is needed to protect + * net_alloc_generic. + */ + WRITE_ONCE(max_gen_ptrs, max(max_gen_ptrs, *ops->id + 1)); } error = __register_pernet_operations(list, ops); if (error) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index ef218e290dfb..d25632fbfa89 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2383,7 +2383,7 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) { if (nla_type(attr) != IFLA_VF_VLAN_INFO || - nla_len(attr) < NLA_HDRLEN) { + nla_len(attr) < sizeof(struct ifla_vf_vlan_info)) { return -EINVAL; } if (len >= MAX_VLAN_LIST_LEN) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a42431860af9..4ec8cfd357eb 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1583,11 +1583,17 @@ static inline int skb_alloc_rx_flag(const struct sk_buff *skb) struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) { - int headerlen = skb_headroom(skb); - unsigned int size = skb_end_offset(skb) + skb->data_len; - struct sk_buff *n = __alloc_skb(size, gfp_mask, - skb_alloc_rx_flag(skb), NUMA_NO_NODE); + struct sk_buff *n; + unsigned int size; + int headerlen; + + if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)) + return NULL; + headerlen = skb_headroom(skb); + size = skb_end_offset(skb) + skb->data_len; + n = __alloc_skb(size, gfp_mask, + skb_alloc_rx_flag(skb), NUMA_NO_NODE); if (!n) return NULL; @@ -1899,12 +1905,17 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, /* * Allocate the copy buffer */ - struct sk_buff *n = __alloc_skb(newheadroom + skb->len + newtailroom, - gfp_mask, skb_alloc_rx_flag(skb), - NUMA_NO_NODE); - int oldheadroom = skb_headroom(skb); int head_copy_len, head_copy_off; + struct sk_buff *n; + int oldheadroom; + + if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)) + return NULL; + oldheadroom = skb_headroom(skb); + n = __alloc_skb(newheadroom + skb->len + newtailroom, + gfp_mask, skb_alloc_rx_flag(skb), + NUMA_NO_NODE); if (!n) return NULL; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 9cd14212dcd0..ec8671eccae0 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -481,8 +481,6 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, msg_rx = sk_psock_peek_msg(psock); } out: - if (psock->work_state.skb && copied > 0) - schedule_work(&psock->work); return copied; } EXPORT_SYMBOL_GPL(sk_msg_recvmsg); @@ -617,42 +615,33 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb, static void sk_psock_skb_state(struct sk_psock *psock, struct sk_psock_work_state *state, - struct sk_buff *skb, int len, int off) { spin_lock_bh(&psock->ingress_lock); if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { - state->skb = skb; state->len = len; state->off = off; - } else { - sock_drop(psock->sk, skb); } spin_unlock_bh(&psock->ingress_lock); } static void sk_psock_backlog(struct work_struct *work) { - struct sk_psock *psock = container_of(work, struct sk_psock, work); + struct delayed_work *dwork = to_delayed_work(work); + struct sk_psock *psock = container_of(dwork, struct sk_psock, work); struct sk_psock_work_state *state = &psock->work_state; struct sk_buff *skb = NULL; + u32 len = 0, off = 0; bool ingress; - u32 len, off; int ret; mutex_lock(&psock->work_mutex); - if (unlikely(state->skb)) { - spin_lock_bh(&psock->ingress_lock); - skb = state->skb; + if (unlikely(state->len)) { len = state->len; off = state->off; - state->skb = NULL; - spin_unlock_bh(&psock->ingress_lock); } - if (skb) - goto start; - while ((skb = skb_dequeue(&psock->ingress_skb))) { + while ((skb = skb_peek(&psock->ingress_skb))) { len = skb->len; off = 0; if (skb_bpf_strparser(skb)) { @@ -661,7 +650,6 @@ static void sk_psock_backlog(struct work_struct *work) off = stm->offset; len = stm->full_len; } -start: ingress = skb_bpf_ingress(skb); skb_bpf_redirect_clear(skb); do { @@ -671,22 +659,28 @@ start: len, ingress); if (ret <= 0) { if (ret == -EAGAIN) { - sk_psock_skb_state(psock, state, skb, - len, off); + sk_psock_skb_state(psock, state, len, off); + + /* Delay slightly to prioritize any + * other work that might be here. + */ + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) + schedule_delayed_work(&psock->work, 1); goto end; } /* Hard errors break pipe and stop xmit. */ sk_psock_report_error(psock, ret ? -ret : EPIPE); sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); - sock_drop(psock->sk, skb); goto end; } off += ret; len -= ret; } while (len); - if (!ingress) + skb = skb_dequeue(&psock->ingress_skb); + if (!ingress) { kfree_skb(skb); + } } end: mutex_unlock(&psock->work_mutex); @@ -727,7 +721,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node) INIT_LIST_HEAD(&psock->link); spin_lock_init(&psock->link_lock); - INIT_WORK(&psock->work, sk_psock_backlog); + INIT_DELAYED_WORK(&psock->work, sk_psock_backlog); mutex_init(&psock->work_mutex); INIT_LIST_HEAD(&psock->ingress_msg); spin_lock_init(&psock->ingress_lock); @@ -779,11 +773,6 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock) skb_bpf_redirect_clear(skb); sock_drop(psock->sk, skb); } - kfree_skb(psock->work_state.skb); - /* We null the skb here to ensure that calls to sk_psock_backlog - * do not pick up the free'd skb. - */ - psock->work_state.skb = NULL; __sk_psock_purge_ingress_msg(psock); } @@ -802,7 +791,6 @@ void sk_psock_stop(struct sk_psock *psock) spin_lock_bh(&psock->ingress_lock); sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); sk_psock_cork_free(psock); - __sk_psock_zap_ingress(psock); spin_unlock_bh(&psock->ingress_lock); } @@ -816,7 +804,8 @@ static void sk_psock_destroy(struct work_struct *work) sk_psock_done_strp(psock); - cancel_work_sync(&psock->work); + cancel_delayed_work_sync(&psock->work); + __sk_psock_zap_ingress(psock); mutex_destroy(&psock->work_mutex); psock_progs_drop(&psock->progs); @@ -931,7 +920,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb) } skb_queue_tail(&psock_other->ingress_skb, skb); - schedule_work(&psock_other->work); + schedule_delayed_work(&psock_other->work, 0); spin_unlock_bh(&psock_other->ingress_lock); return 0; } @@ -1011,7 +1000,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb, spin_lock_bh(&psock->ingress_lock); if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { skb_queue_tail(&psock->ingress_skb, skb); - schedule_work(&psock->work); + schedule_delayed_work(&psock->work, 0); err = 0; } spin_unlock_bh(&psock->ingress_lock); @@ -1042,7 +1031,7 @@ static void sk_psock_write_space(struct sock *sk) psock = sk_psock(sk); if (likely(psock)) { if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) - schedule_work(&psock->work); + schedule_delayed_work(&psock->work, 0); write_space = psock->saved_write_space; } rcu_read_unlock(); diff --git a/net/core/sock.c b/net/core/sock.c index b41d5e6953c7..399d0b801b09 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -459,7 +459,7 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) unsigned long flags; struct sk_buff_head *list = &sk->sk_receive_queue; - if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) { + if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) { atomic_inc(&sk->sk_drops); trace_sock_rcvqueue_full(sk, skb); return -ENOMEM; @@ -511,7 +511,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, skb->dev = NULL; - if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { + if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) { atomic_inc(&sk->sk_drops); goto discard_and_relse; } diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 4e42bc679bac..2ded250ac0d2 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1577,9 +1577,10 @@ void sock_map_close(struct sock *sk, long timeout) rcu_read_unlock(); sk_psock_stop(psock); release_sock(sk); - cancel_work_sync(&psock->work); + cancel_delayed_work_sync(&psock->work); sk_psock_put(sk, psock); } + /* Make sure we do not recurse. This is a bug. * Leak the socket instead of crashing on a stack overflow. */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 17033e0cfecb..e348b69ef0f5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2692,7 +2692,7 @@ void tcp_shutdown(struct sock *sk, int how) /* If we've already sent a FIN, or it's a closed state, skip this. */ if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_SYN_SENT | - TCPF_SYN_RECV | TCPF_CLOSE_WAIT)) { + TCPF_CLOSE_WAIT)) { /* Clear out any half completed packets. FIN if needed. */ if (tcp_close_state(sk)) tcp_send_fin(sk); @@ -2803,7 +2803,7 @@ void __tcp_close(struct sock *sk, long timeout) * machine. State transitions: * * TCP_ESTABLISHED -> TCP_FIN_WAIT1 - * TCP_SYN_RECV -> TCP_FIN_WAIT1 (forget it, it's impossible) + * TCP_SYN_RECV -> TCP_FIN_WAIT1 (it is difficult) * TCP_CLOSE_WAIT -> TCP_LAST_ACK * * are legal only when FIN has been sent (i.e. in window), diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index e3a9477293ce..5fdef5ddfbbe 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -174,6 +174,24 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock, return ret; } +static bool is_next_msg_fin(struct sk_psock *psock) +{ + struct scatterlist *sge; + struct sk_msg *msg_rx; + int i; + + msg_rx = sk_psock_peek_msg(psock); + i = msg_rx->sg.start; + sge = sk_msg_elem(msg_rx, i); + if (!sge->length) { + struct sk_buff *skb = msg_rx->skb; + + if (skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + return true; + } + return false; +} + static int tcp_bpf_recvmsg_parser(struct sock *sk, struct msghdr *msg, size_t len, @@ -195,8 +213,41 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk, return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); lock_sock(sk); + + /* We may have received data on the sk_receive_queue pre-accept and + * then we can not use read_skb in this context because we haven't + * assigned a sk_socket yet so have no link to the ops. The work-around + * is to check the sk_receive_queue and in these cases read skbs off + * queue again. The read_skb hook is not running at this point because + * of lock_sock so we avoid having multiple runners in read_skb. + */ + if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) { + tcp_data_ready(sk); + /* This handles the ENOMEM errors if we both receive data + * pre accept and are already under memory pressure. At least + * let user know to retry. + */ + if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) { + copied = -EAGAIN; + goto out; + } + } + msg_bytes_ready: copied = sk_msg_recvmsg(sk, psock, msg, len, flags); + /* The typical case for EFAULT is the socket was gracefully + * shutdown with a FIN pkt. So check here the other case is + * some error on copy_page_to_iter which would be unexpected. + * On fin return correct return code to zero. + */ + if (copied == -EFAULT) { + bool is_fin = is_next_msg_fin(psock); + + if (is_fin) { + copied = 0; + goto out; + } + } if (!copied) { long timeo; int data; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e51b5d887c24..52a9d7f96da4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -6543,6 +6543,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) tcp_initialize_rcv_mss(sk); tcp_fast_path_on(tp); + if (sk->sk_shutdown & SEND_SHUTDOWN) + tcp_shutdown(sk, SEND_SHUTDOWN); break; case TCP_FIN_WAIT1: { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0666be6b9ec9..e162bed1916a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -153,6 +153,12 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) if (tcptw->tw_ts_recent_stamp && (!twp || (reuse && time_after32(ktime_get_seconds(), tcptw->tw_ts_recent_stamp)))) { + /* inet_twsk_hashdance() sets sk_refcnt after putting twsk + * and releasing the bucket lock. + */ + if (unlikely(!refcount_inc_not_zero(&sktw->sk_refcnt))) + return 0; + /* In case of repair and re-using TIME-WAIT sockets we still * want to be sure that it is safe as above but honor the * sequence numbers and time stamps set as part of the repair @@ -173,7 +179,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) tp->rx_opt.ts_recent = tcptw->tw_ts_recent; tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; } - sock_hold(sktw); + return 1; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d8817d6c7b96..0fb84e57a2d4 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3441,7 +3441,9 @@ void tcp_send_fin(struct sock *sk) return; } } else { - skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); + skb = alloc_skb_fclone(MAX_TCP_HEADER, + sk_gfp_mask(sk, GFP_ATOMIC | + __GFP_NOWARN)); if (unlikely(!skb)) return; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 7c6ac47b0bb1..c61268849948 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -434,6 +434,7 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, struct sk_buff *p; unsigned int ulen; int ret = 0; + int flush; /* requires non zero csum, for symmetry with GSO */ if (!uh->check) { @@ -467,13 +468,22 @@ static struct sk_buff *udp_gro_receive_segment(struct list_head *head, return p; } + flush = NAPI_GRO_CB(p)->flush; + + if (NAPI_GRO_CB(p)->flush_id != 1 || + NAPI_GRO_CB(p)->count != 1 || + !NAPI_GRO_CB(p)->is_atomic) + flush |= NAPI_GRO_CB(p)->flush_id; + else + NAPI_GRO_CB(p)->is_atomic = false; + /* Terminate the flow on len mismatch or if it grow "too much". * Under small packet flood GRO count could elsewhere grow a lot * leading to excessive truesize values. * On len mismatch merge the first packet shorter than gso_size, * otherwise complete the GRO packet. */ - if (ulen > ntohs(uh2->len)) { + if (ulen > ntohs(uh2->len) || flush) { pp = p; } else { if (NAPI_GRO_CB(skb)->is_flist) { diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index eac206a290d0..1f50517289fd 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -61,7 +61,11 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) ip_send_check(iph); if (xo && (xo->flags & XFRM_GRO)) { - skb_mac_header_rebuild(skb); + /* The full l2 header needs to be preserved so that re-injecting the packet at l2 + * works correctly in the presence of vlan tags. + */ + skb_mac_header_rebuild_full(skb, xo->orig_mac_len); + skb_reset_network_header(skb); skb_reset_transport_header(skb); return 0; } diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 8e9e80eb0f32..a4caaead74c1 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -232,8 +232,12 @@ static int __fib6_rule_action(struct fib_rule *rule, struct flowi *flp, rt = pol_lookup_func(lookup, net, table, flp6, arg->lookup_data, flags); if (rt != net->ipv6.ip6_null_entry) { + struct inet6_dev *idev = ip6_dst_idev(&rt->dst); + + if (!idev) + goto again; err = fib6_rule_saddr(net, rule, flags, flp6, - ip6_dst_idev(&rt->dst)->dev); + idev->dev); if (err == -EAGAIN) goto again; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 4907ab241d6b..7dbefbb338ca 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -56,7 +56,11 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) skb_postpush_rcsum(skb, skb_network_header(skb), nhlen); if (xo && (xo->flags & XFRM_GRO)) { - skb_mac_header_rebuild(skb); + /* The full l2 header needs to be preserved so that re-injecting the packet at l2 + * works correctly in the presence of vlan tags. + */ + skb_mac_header_rebuild_full(skb, xo->orig_mac_len); + skb_reset_network_header(skb); skb_reset_transport_header(skb); return 0; } diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 6cd97c75445c..9a36e174984c 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -136,6 +136,9 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, /* checksums verified by L2TP */ skb->ip_summed = CHECKSUM_NONE; + /* drop outer flow-hash */ + skb_clear_hash(skb); + skb_dst_drop(skb); nf_reset_ct(skb); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 21549a440b38..03f8c8bdab76 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -113,7 +113,7 @@ struct ieee80211_bss { }; /** - * enum ieee80211_corrupt_data_flags - BSS data corruption flags + * enum ieee80211_bss_corrupt_data_flags - BSS data corruption flags * @IEEE80211_BSS_CORRUPT_BEACON: last beacon frame received was corrupted * @IEEE80211_BSS_CORRUPT_PROBE_RESP: last probe response received was corrupted * @@ -126,7 +126,7 @@ enum ieee80211_bss_corrupt_data_flags { }; /** - * enum ieee80211_valid_data_flags - BSS valid data flags + * enum ieee80211_bss_valid_data_flags - BSS valid data flags * @IEEE80211_BSS_VALID_WMM: WMM/UAPSD data was gathered from non-corrupt IE * @IEEE80211_BSS_VALID_RATES: Supported rates were gathered from non-corrupt IE * @IEEE80211_BSS_VALID_ERP: ERP flag was gathered from non-corrupt IE diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index cde62dafda49..3c3f630f4943 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3395,6 +3395,9 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT); mptcp_subflow_early_fallback(msk, subflow); } + + WRITE_ONCE(msk->write_seq, subflow->idsn); + WRITE_ONCE(msk->snd_nxt, subflow->idsn); if (likely(!__mptcp_check_fallback(msk))) MPTCP_INC_STATS(sock_net(sock->sk), MPTCP_MIB_MPCAPABLEACTIVE); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 82df02695bbd..216445dd44db 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -352,7 +352,7 @@ static void netlink_overrun(struct sock *sk) if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) { if (!test_and_set_bit(NETLINK_S_CONGESTED, &nlk_sk(sk)->state)) { - sk->sk_err = ENOBUFS; + WRITE_ONCE(sk->sk_err, ENOBUFS); sk_error_report(sk); } } @@ -1591,7 +1591,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) goto out; } - sk->sk_err = p->code; + WRITE_ONCE(sk->sk_err, p->code); sk_error_report(sk); out: return ret; @@ -1935,7 +1935,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); int noblock = flags & MSG_DONTWAIT; - size_t copied; + size_t copied, max_recvmsg_len; struct sk_buff *skb, *data_skb; int err, ret; @@ -1968,9 +1968,10 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, #endif /* Record the max length of recvmsg() calls for future allocations */ - nlk->max_recvmsg_len = max(nlk->max_recvmsg_len, len); - nlk->max_recvmsg_len = min_t(size_t, nlk->max_recvmsg_len, - SKB_WITH_OVERHEAD(32768)); + max_recvmsg_len = max(READ_ONCE(nlk->max_recvmsg_len), len); + max_recvmsg_len = min_t(size_t, max_recvmsg_len, + SKB_WITH_OVERHEAD(32768)); + WRITE_ONCE(nlk->max_recvmsg_len, max_recvmsg_len); copied = data_skb->len; if (len < copied) { @@ -2005,7 +2006,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) { ret = netlink_dump(sk); if (ret) { - sk->sk_err = -ret; + WRITE_ONCE(sk->sk_err, -ret); sk_error_report(sk); } } @@ -2219,6 +2220,7 @@ static int netlink_dump(struct sock *sk) struct netlink_ext_ack extack = {}; struct netlink_callback *cb; struct sk_buff *skb = NULL; + size_t max_recvmsg_len; struct module *module; int err = -ENOBUFS; int alloc_min_size; @@ -2241,8 +2243,9 @@ static int netlink_dump(struct sock *sk) cb = &nlk->cb; alloc_min_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); - if (alloc_min_size < nlk->max_recvmsg_len) { - alloc_size = nlk->max_recvmsg_len; + max_recvmsg_len = READ_ONCE(nlk->max_recvmsg_len); + if (alloc_min_size < max_recvmsg_len) { + alloc_size = max_recvmsg_len; skb = alloc_skb(alloc_size, (GFP_KERNEL & ~__GFP_DIRECT_RECLAIM) | __GFP_NOWARN | __GFP_NORETRY); @@ -2439,7 +2442,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, skb = nlmsg_new(payload + tlvlen, GFP_KERNEL); if (!skb) { - NETLINK_CB(in_skb).sk->sk_err = ENOBUFS; + WRITE_ONCE(NETLINK_CB(in_skb).sk->sk_err, ENOBUFS); sk_error_report(NETLINK_CB(in_skb).sk); return; } diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c index 0f23e5e8e03e..3e0fc71d95a1 100644 --- a/net/nsh/nsh.c +++ b/net/nsh/nsh.c @@ -76,13 +76,15 @@ EXPORT_SYMBOL_GPL(nsh_pop); static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, netdev_features_t features) { + unsigned int outer_hlen, mac_len, nsh_len; struct sk_buff *segs = ERR_PTR(-EINVAL); u16 mac_offset = skb->mac_header; - unsigned int nsh_len, mac_len; - __be16 proto; + __be16 outer_proto, proto; skb_reset_network_header(skb); + outer_proto = skb->protocol; + outer_hlen = skb_mac_header_len(skb); mac_len = skb->mac_len; if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN))) @@ -112,10 +114,10 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, } for (skb = segs; skb; skb = skb->next) { - skb->protocol = htons(ETH_P_NSH); - __skb_push(skb, nsh_len); - skb->mac_header = mac_offset; - skb->network_header = skb->mac_header + mac_len; + skb->protocol = outer_proto; + __skb_push(skb, nsh_len + outer_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, outer_hlen); skb->mac_len = mac_len; } diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 59aebe296890..dd4c7e9a634f 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -193,7 +193,7 @@ void rtm_phonet_notify(int event, struct net_device *dev, u8 dst) struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(1) + nla_total_size(4), GFP_KERNEL); if (skb == NULL) goto errout; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index af1ca707c3d3..59fd6dedbbed 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -398,7 +398,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, clnt->cl_maxproc = version->nrprocs; clnt->cl_prog = args->prognumber ? : program->number; clnt->cl_vers = version->number; - clnt->cl_stats = program->stats; + clnt->cl_stats = args->stats ? : program->stats; clnt->cl_metrics = rpc_alloc_iostats(clnt); rpc_init_pipe_dir_head(&clnt->cl_pipedir_objects); err = -ENOMEM; @@ -677,6 +677,7 @@ struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt) .version = clnt->cl_vers, .authflavor = clnt->cl_auth->au_flavor, .cred = clnt->cl_cred, + .stats = clnt->cl_stats, }; return __rpc_clone_client(&args, clnt); } @@ -699,6 +700,7 @@ rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor) .version = clnt->cl_vers, .authflavor = flavor, .cred = clnt->cl_cred, + .stats = clnt->cl_stats, }; return __rpc_clone_client(&args, clnt); } @@ -979,6 +981,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, .version = vers, .authflavor = old->cl_auth->au_flavor, .cred = old->cl_cred, + .stats = old->cl_stats, }; struct rpc_clnt *clnt; int err; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index d3ea5b4c854f..2e8367cc528a 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -700,8 +700,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) /* Made progress, don't sleep yet */ continue; - set_current_state(TASK_INTERRUPTIBLE); - if (signalled() || kthread_should_stop()) { + set_current_state(TASK_IDLE); + if (kthread_should_stop()) { set_current_state(TASK_RUNNING); return -EINTR; } @@ -736,7 +736,7 @@ rqst_should_sleep(struct svc_rqst *rqstp) return false; /* are we shutting down? */ - if (signalled() || kthread_should_stop()) + if (kthread_should_stop()) return false; /* are we freezing? */ @@ -758,11 +758,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) if (rqstp->rq_xprt) goto out_found; - /* - * We have to be able to interrupt this wait - * to bring down the daemons ... - */ - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(TASK_IDLE); smp_mb__before_atomic(); clear_bit(SP_CONGESTED, &pool->sp_flags); clear_bit(RQ_BUSY, &rqstp->rq_flags); @@ -784,7 +780,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) if (!time_left) atomic_long_inc(&pool->sp_stats.threads_timedout); - if (signalled() || kthread_should_stop()) + if (kthread_should_stop()) return ERR_PTR(-EINTR); return ERR_PTR(-EAGAIN); out_found: @@ -882,7 +878,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) try_to_freeze(); cond_resched(); err = -EINTR; - if (signalled() || kthread_should_stop()) + if (kthread_should_stop()) goto out; xprt = svc_get_next_xprt(rqstp, timeout); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 5c9fd4791c4b..76284fc538eb 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -142,9 +142,9 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (fragid == FIRST_FRAGMENT) { if (unlikely(head)) goto err; - *buf = NULL; if (skb_has_frag_list(frag) && __skb_linearize(frag)) goto err; + *buf = NULL; frag = skb_unshare(frag, GFP_ATOMIC); if (unlikely(!frag)) goto err; @@ -156,6 +156,11 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (!head) goto err; + /* Either the input skb ownership is transferred to headskb + * or the input skb is freed, clear the reference to avoid + * bad access on error path. + */ + *buf = NULL; if (skb_try_coalesce(head, frag, &headstolen, &delta)) { kfree_skb_partial(frag, headstolen); } else { @@ -179,7 +184,6 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) *headbuf = NULL; return 1; } - *buf = NULL; return 0; err: kfree_skb(*buf); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index fc55b65695e5..90f6cbe5cd5d 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -174,7 +174,17 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) struct scatterlist *sg; struct sk_buff *skb; unsigned int pages; - int pending; + + /* If requests get too backlogged crypto API returns -EBUSY and calls + * ->complete(-EINPROGRESS) immediately followed by ->complete(0) + * to make waiting for backlog to flush with crypto_wait_req() easier. + * First wait converts -EBUSY -> -EINPROGRESS, and the second one + * -EINPROGRESS -> 0. + * We have a single struct crypto_async_request per direction, this + * scheme doesn't help us, so just ignore the first ->complete(). + */ + if (err == -EINPROGRESS) + return; skb = (struct sk_buff *)req->data; tls_ctx = tls_get_ctx(skb->sk); @@ -221,12 +231,17 @@ static void tls_decrypt_done(struct crypto_async_request *req, int err) kfree(aead_req); - spin_lock_bh(&ctx->decrypt_compl_lock); - pending = atomic_dec_return(&ctx->decrypt_pending); - - if (!pending && ctx->async_notify) + if (atomic_dec_and_test(&ctx->decrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->decrypt_compl_lock); +} + +static int tls_decrypt_async_wait(struct tls_sw_context_rx *ctx) +{ + if (!atomic_dec_and_test(&ctx->decrypt_pending)) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + atomic_inc(&ctx->decrypt_pending); + + return ctx->async_wait.err; } static int tls_do_decryption(struct sock *sk, @@ -260,6 +275,7 @@ static int tls_do_decryption(struct sock *sk, aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, tls_decrypt_done, skb); + BUILD_BUG_ON_INVALID(atomic_read(&ctx->decrypt_pending) < 1); atomic_inc(&ctx->decrypt_pending); } else { aead_request_set_callback(aead_req, @@ -268,6 +284,10 @@ static int tls_do_decryption(struct sock *sk, } ret = crypto_aead_decrypt(aead_req); + if (ret == -EBUSY) { + ret = tls_decrypt_async_wait(ctx); + ret = ret ?: -EINPROGRESS; + } if (ret == -EINPROGRESS) { if (darg->async) return 0; @@ -449,7 +469,9 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err) struct sk_msg *msg_en; struct tls_rec *rec; bool ready = false; - int pending; + + if (err == -EINPROGRESS) /* see the comment in tls_decrypt_done() */ + return; rec = container_of(aead_req, struct tls_rec, aead_req); msg_en = &rec->msg_encrypted; @@ -484,12 +506,8 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err) ready = true; } - spin_lock_bh(&ctx->encrypt_compl_lock); - pending = atomic_dec_return(&ctx->encrypt_pending); - - if (!pending && ctx->async_notify) + if (atomic_dec_and_test(&ctx->encrypt_pending)) complete(&ctx->async_wait.completion); - spin_unlock_bh(&ctx->encrypt_compl_lock); if (!ready) return; @@ -499,6 +517,15 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err) schedule_delayed_work(&ctx->tx_work.work, 1); } +static int tls_encrypt_async_wait(struct tls_sw_context_tx *ctx) +{ + if (!atomic_dec_and_test(&ctx->encrypt_pending)) + crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + atomic_inc(&ctx->encrypt_pending); + + return ctx->async_wait.err; +} + static int tls_do_encryption(struct sock *sk, struct tls_context *tls_ctx, struct tls_sw_context_tx *ctx, @@ -538,9 +565,14 @@ static int tls_do_encryption(struct sock *sk, /* Add the record in tx_list */ list_add_tail((struct list_head *)&rec->list, &ctx->tx_list); + BUILD_BUG_ON_INVALID(atomic_read(&ctx->encrypt_pending) < 1); atomic_inc(&ctx->encrypt_pending); rc = crypto_aead_encrypt(aead_req); + if (rc == -EBUSY) { + rc = tls_encrypt_async_wait(ctx); + rc = rc ?: -EINPROGRESS; + } if (!rc || rc != -EINPROGRESS) { atomic_dec(&ctx->encrypt_pending); sge->offset -= prot->prepend_size; @@ -949,7 +981,6 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) int num_zc = 0; int orig_size; int ret = 0; - int pending; if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_CMSG_COMPAT)) @@ -1118,24 +1149,12 @@ trim_sgl: if (!num_async) { goto send_end; } else if (num_zc) { - /* Wait for pending encryptions to get completed */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - else - reinit_completion(&ctx->async_wait.completion); - - /* There can be no concurrent accesses, since we have no - * pending encrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); + int err; - if (ctx->async_wait.err) { - ret = ctx->async_wait.err; + /* Wait for pending encryptions to get completed */ + err = tls_encrypt_async_wait(ctx); + if (err) { + ret = err; copied = 0; } } @@ -1913,31 +1932,16 @@ pick_next_record: recv_end: if (async) { - int pending; - /* Wait for all previously submitted records to be decrypted */ - spin_lock_bh(&ctx->decrypt_compl_lock); - ctx->async_notify = true; - pending = atomic_read(&ctx->decrypt_pending); - spin_unlock_bh(&ctx->decrypt_compl_lock); - if (pending) { - err = crypto_wait_req(-EINPROGRESS, &ctx->async_wait); - if (err) { - /* one of async decrypt failed */ - tls_err_abort(sk, err); - copied = 0; - decrypted = 0; - goto end; - } - } else { - reinit_completion(&ctx->async_wait.completion); + err = tls_decrypt_async_wait(ctx); + if (err) { + /* one of async decrypt failed */ + tls_err_abort(sk, err); + copied = 0; + decrypted = 0; + goto end; } - /* There can be no concurrent accesses, since we have no - * pending decrypt operations - */ - WRITE_ONCE(ctx->async_notify, false); - /* Drain records from the rx_list & copy if required */ if (is_peek || is_kvec) err = process_rx_list(ctx, msg, &control, copied, @@ -2154,16 +2158,9 @@ void tls_sw_release_resources_tx(struct sock *sk) struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); struct tls_rec *rec, *tmp; - int pending; /* Wait for any pending async encryptions to complete */ - spin_lock_bh(&ctx->encrypt_compl_lock); - ctx->async_notify = true; - pending = atomic_read(&ctx->encrypt_pending); - spin_unlock_bh(&ctx->encrypt_compl_lock); - - if (pending) - crypto_wait_req(-EINPROGRESS, &ctx->async_wait); + tls_encrypt_async_wait(ctx); tls_tx_records(sk, -1); @@ -2301,6 +2298,46 @@ void tls_sw_strparser_arm(struct sock *sk, struct tls_context *tls_ctx) strp_check_rcv(&rx_ctx->strp); } +static struct tls_sw_context_tx *init_ctx_tx(struct tls_context *ctx, struct sock *sk) +{ + struct tls_sw_context_tx *sw_ctx_tx; + + if (!ctx->priv_ctx_tx) { + sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); + if (!sw_ctx_tx) + return NULL; + } else { + sw_ctx_tx = ctx->priv_ctx_tx; + } + + crypto_init_wait(&sw_ctx_tx->async_wait); + atomic_set(&sw_ctx_tx->encrypt_pending, 1); + INIT_LIST_HEAD(&sw_ctx_tx->tx_list); + INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); + sw_ctx_tx->tx_work.sk = sk; + + return sw_ctx_tx; +} + +static struct tls_sw_context_rx *init_ctx_rx(struct tls_context *ctx) +{ + struct tls_sw_context_rx *sw_ctx_rx; + + if (!ctx->priv_ctx_rx) { + sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); + if (!sw_ctx_rx) + return NULL; + } else { + sw_ctx_rx = ctx->priv_ctx_rx; + } + + crypto_init_wait(&sw_ctx_rx->async_wait); + atomic_set(&sw_ctx_rx->decrypt_pending, 1); + skb_queue_head_init(&sw_ctx_rx->rx_list); + + return sw_ctx_rx; +} + int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) { struct tls_context *tls_ctx = tls_get_ctx(sk); @@ -2327,46 +2364,22 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) } if (tx) { - if (!ctx->priv_ctx_tx) { - sw_ctx_tx = kzalloc(sizeof(*sw_ctx_tx), GFP_KERNEL); - if (!sw_ctx_tx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_tx = sw_ctx_tx; - } else { - sw_ctx_tx = - (struct tls_sw_context_tx *)ctx->priv_ctx_tx; - } - } else { - if (!ctx->priv_ctx_rx) { - sw_ctx_rx = kzalloc(sizeof(*sw_ctx_rx), GFP_KERNEL); - if (!sw_ctx_rx) { - rc = -ENOMEM; - goto out; - } - ctx->priv_ctx_rx = sw_ctx_rx; - } else { - sw_ctx_rx = - (struct tls_sw_context_rx *)ctx->priv_ctx_rx; - } - } + ctx->priv_ctx_tx = init_ctx_tx(ctx, sk); + if (!ctx->priv_ctx_tx) + return -ENOMEM; - if (tx) { - crypto_init_wait(&sw_ctx_tx->async_wait); - spin_lock_init(&sw_ctx_tx->encrypt_compl_lock); + sw_ctx_tx = ctx->priv_ctx_tx; crypto_info = &ctx->crypto_send.info; cctx = &ctx->tx; aead = &sw_ctx_tx->aead_send; - INIT_LIST_HEAD(&sw_ctx_tx->tx_list); - INIT_DELAYED_WORK(&sw_ctx_tx->tx_work.work, tx_work_handler); - sw_ctx_tx->tx_work.sk = sk; } else { - crypto_init_wait(&sw_ctx_rx->async_wait); - spin_lock_init(&sw_ctx_rx->decrypt_compl_lock); + ctx->priv_ctx_rx = init_ctx_rx(ctx); + if (!ctx->priv_ctx_rx) + return -ENOMEM; + + sw_ctx_rx = ctx->priv_ctx_rx; crypto_info = &ctx->crypto_recv.info; cctx = &ctx->rx; - skb_queue_head_init(&sw_ctx_rx->rx_list); aead = &sw_ctx_rx->aead_recv; } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 99149b10f86f..d758ec565589 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12890,6 +12890,8 @@ static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info) error: for (i = 0; i < new_coalesce.n_rules; i++) { tmp_rule = &new_coalesce.rules[i]; + if (!tmp_rule) + continue; for (j = 0; j < tmp_rule->n_patterns; j++) kfree(tmp_rule->patterns[j].mask); kfree(tmp_rule->patterns); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 19b78d472283..dafea8bfcf3c 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -963,7 +963,7 @@ TRACE_EVENT(rdev_get_mpp, TRACE_EVENT(rdev_dump_mpp, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx, u8 *dst, u8 *mpp), - TP_ARGS(wiphy, netdev, _idx, mpp, dst), + TP_ARGS(wiphy, netdev, _idx, dst, mpp), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index a6861832710d..7f326a01cbce 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -400,11 +400,15 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb) */ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) { + struct xfrm_offload *xo = xfrm_offload(skb); int ihl = skb->data - skb_transport_header(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), skb_network_header(skb), ihl); + if (xo) + xo->orig_mac_len = + skb_mac_header_was_set(skb) ? skb_mac_header_len(skb) : 0; skb->network_header = skb->transport_header; } ip_hdr(skb)->tot_len = htons(skb->len + ihl); @@ -415,11 +419,15 @@ static int xfrm4_transport_input(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_transport_input(struct xfrm_state *x, struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IPV6) + struct xfrm_offload *xo = xfrm_offload(skb); int ihl = skb->data - skb_transport_header(skb); if (skb->transport_header != skb->network_header) { memmove(skb_transport_header(skb), skb_network_header(skb), ihl); + if (xo) + xo->orig_mac_len = + skb_mac_header_was_set(skb) ? skb_mac_header_len(skb) : 0; skb->network_header = skb->transport_header; } ipv6_hdr(skb)->payload_len = htons(skb->len + ihl - |