aboutsummaryrefslogtreecommitdiffstats
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/act_api.h3
-rw-r--r--include/net/addrconf.h1
-rw-r--r--include/net/af_rxrpc.h14
-rw-r--r--include/net/arp.h4
-rw-r--r--include/net/bonding.h4
-rw-r--r--include/net/busy_poll.h6
-rw-r--r--include/net/cfg80211.h5
-rw-r--r--include/net/dst.h25
-rw-r--r--include/net/dst_ops.h3
-rw-r--r--include/net/fib_rules.h1
-rw-r--r--include/net/flow_dissector.h12
-rw-r--r--include/net/fq.h2
-rw-r--r--include/net/fq_impl.h8
-rw-r--r--include/net/genetlink.h8
-rw-r--r--include/net/inet_connection_sock.h4
-rw-r--r--include/net/inet_ecn.h80
-rw-r--r--include/net/inet_hashtables.h18
-rw-r--r--include/net/ip.h5
-rw-r--r--include/net/ip6_route.h1
-rw-r--r--include/net/ip_fib.h1
-rw-r--r--include/net/ip_vs.h11
-rw-r--r--include/net/ipv6.h2
-rw-r--r--include/net/ipv6_stubs.h6
-rw-r--r--include/net/ipx.h5
-rw-r--r--include/net/llc_conn.h2
-rw-r--r--include/net/mac80211.h4
-rw-r--r--include/net/ndisc.h8
-rw-r--r--include/net/neighbour.h7
-rw-r--r--include/net/net_namespace.h7
-rw-r--r--include/net/netfilter/nf_conntrack.h2
-rw-r--r--include/net/netfilter/nf_tables.h3
-rw-r--r--include/net/netns/nftables.h1
-rw-r--r--include/net/pkt_cls.h33
-rw-r--r--include/net/pkt_sched.h11
-rw-r--r--include/net/sch_generic.h35
-rw-r--r--include/net/sctp/constants.h8
-rw-r--r--include/net/sctp/structs.h3
-rw-r--r--include/net/sock.h74
-rw-r--r--include/net/tc_act/tc_police.h4
-rw-r--r--include/net/tc_act/tc_sample.h2
-rw-r--r--include/net/tcp.h50
-rw-r--r--include/net/tls.h23
-rw-r--r--include/net/udp.h10
-rw-r--r--include/net/xfrm.h16
44 files changed, 363 insertions, 169 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 3a1a72990fce..0ed71a918569 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -69,7 +69,8 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm)
{
dtm->install = jiffies_to_clock_t(jiffies - stm->install);
dtm->lastuse = jiffies_to_clock_t(jiffies - stm->lastuse);
- dtm->firstuse = jiffies_to_clock_t(jiffies - stm->firstuse);
+ dtm->firstuse = stm->firstuse ?
+ jiffies_to_clock_t(jiffies - stm->firstuse) : 0;
dtm->expires = jiffies_to_clock_t(stm->expires);
}
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 3f62b347b04a..ab8b3eb53d4b 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -273,6 +273,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex,
const struct in6_addr *addr);
int ipv6_sock_ac_drop(struct sock *sk, int ifindex,
const struct in6_addr *addr);
+void __ipv6_sock_ac_close(struct sock *sk);
void ipv6_sock_ac_close(struct sock *sk);
int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr);
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index 1abae3c340a5..55b980b21f4b 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -16,6 +16,12 @@ struct sock;
struct socket;
struct rxrpc_call;
+enum rxrpc_interruptibility {
+ RXRPC_INTERRUPTIBLE, /* Call is interruptible */
+ RXRPC_PREINTERRUPTIBLE, /* Call can be cancelled whilst waiting for a slot */
+ RXRPC_UNINTERRUPTIBLE, /* Call should not be interruptible at all */
+};
+
/*
* Debug ID counter for tracing.
*/
@@ -41,7 +47,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *,
gfp_t,
rxrpc_notify_rx_t,
bool,
- bool,
+ enum rxrpc_interruptibility,
unsigned int);
int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
struct msghdr *, size_t,
@@ -53,14 +59,12 @@ bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
struct sockaddr_rxrpc *);
-u64 rxrpc_kernel_get_rtt(struct socket *, struct rxrpc_call *);
+bool rxrpc_kernel_get_srtt(struct socket *, struct rxrpc_call *, u32 *);
int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
rxrpc_user_attach_call_t, unsigned long, gfp_t,
unsigned int);
void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
-bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *,
- u32 *);
-void rxrpc_kernel_probe_life(struct socket *, struct rxrpc_call *);
+bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *);
u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
bool rxrpc_kernel_get_reply_time(struct socket *, struct rxrpc_call *,
ktime_t *);
diff --git a/include/net/arp.h b/include/net/arp.h
index c8f580a0e6b1..4950191f6b2b 100644
--- a/include/net/arp.h
+++ b/include/net/arp.h
@@ -57,8 +57,8 @@ static inline void __ipv4_confirm_neigh(struct net_device *dev, u32 key)
unsigned long now = jiffies;
/* avoid dirtying neighbour */
- if (n->confirmed != now)
- n->confirmed = now;
+ if (READ_ONCE(n->confirmed) != now)
+ WRITE_ONCE(n->confirmed, now);
}
rcu_read_unlock_bh();
}
diff --git a/include/net/bonding.h b/include/net/bonding.h
index b46d68acf701..b26e440a1fa4 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -149,7 +149,6 @@ struct slave {
unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS];
s8 link; /* one of BOND_LINK_XXXX */
s8 link_new_state; /* one of BOND_LINK_XXXX */
- s8 new_link;
u8 backup:1, /* indicates backup slave. Value corresponds with
BOND_STATE_ACTIVE and BOND_STATE_BACKUP */
inactive:1, /* indicates inactive slave */
@@ -229,6 +228,7 @@ struct bonding {
struct dentry *debug_dir;
#endif /* CONFIG_DEBUG_FS */
struct rtnl_link_stats64 bond_stats;
+ struct lock_class_key stats_lock_key;
};
#define bond_slave_get_rcu(dev) \
@@ -539,7 +539,7 @@ static inline void bond_propose_link_state(struct slave *slave, int state)
static inline void bond_commit_link_state(struct slave *slave, bool notify)
{
- if (slave->link == slave->link_new_state)
+ if (slave->link_new_state == BOND_LINK_NOCHANGE)
return;
slave->link = slave->link_new_state;
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 127a5c4e3699..86e028388bad 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -122,7 +122,7 @@ static inline void skb_mark_napi_id(struct sk_buff *skb,
static inline void sk_mark_napi_id(struct sock *sk, const struct sk_buff *skb)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
- sk->sk_napi_id = skb->napi_id;
+ WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
#endif
sk_rx_queue_set(sk, skb);
}
@@ -132,8 +132,8 @@ static inline void sk_mark_napi_id_once(struct sock *sk,
const struct sk_buff *skb)
{
#ifdef CONFIG_NET_RX_BUSY_POLL
- if (!sk->sk_napi_id)
- sk->sk_napi_id = skb->napi_id;
+ if (!READ_ONCE(sk->sk_napi_id))
+ WRITE_ONCE(sk->sk_napi_id, skb->napi_id);
#endif
}
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8b13bd05befa..73bfed327f68 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3413,6 +3413,9 @@ struct cfg80211_update_owe_info {
*
* @start_radar_detection: Start radar detection in the driver.
*
+ * @end_cac: End running CAC, probably because a related CAC
+ * was finished on another phy.
+ *
* @update_ft_ies: Provide updated Fast BSS Transition information to the
* driver. If the SME is in the driver/firmware, this information can be
* used in building Authentication and Reassociation Request frames.
@@ -3739,6 +3742,8 @@ struct cfg80211_ops {
struct net_device *dev,
struct cfg80211_chan_def *chandef,
u32 cac_time_ms);
+ void (*end_cac)(struct wiphy *wiphy,
+ struct net_device *dev);
int (*update_ft_ies)(struct wiphy *wiphy, struct net_device *dev,
struct cfg80211_update_ft_ies_params *ftie);
int (*crit_proto_start)(struct wiphy *wiphy,
diff --git a/include/net/dst.h b/include/net/dst.h
index f8206d3fed2f..2fbb03797d14 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -82,7 +82,7 @@ struct dst_entry {
struct dst_metrics {
u32 metrics[RTAX_MAX];
refcount_t refcnt;
-};
+} __aligned(4); /* Low pointer bits contain DST_METRICS_FLAGS */
extern const struct dst_metrics dst_default_metrics;
u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
@@ -401,7 +401,15 @@ static inline struct neighbour *dst_neigh_lookup(const struct dst_entry *dst, co
static inline struct neighbour *dst_neigh_lookup_skb(const struct dst_entry *dst,
struct sk_buff *skb)
{
- struct neighbour *n = dst->ops->neigh_lookup(dst, skb, NULL);
+ struct neighbour *n = NULL;
+
+ /* The packets from tunnel devices (eg bareudp) may have only
+ * metadata in the dst pointer of skb. Hence a pointer check of
+ * neigh_lookup is needed.
+ */
+ if (dst->ops->neigh_lookup)
+ n = dst->ops->neigh_lookup(dst, skb, NULL);
+
return IS_ERR(n) ? NULL : n;
}
@@ -516,7 +524,16 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu)
struct dst_entry *dst = skb_dst(skb);
if (dst && dst->ops->update_pmtu)
- dst->ops->update_pmtu(dst, NULL, skb, mtu);
+ dst->ops->update_pmtu(dst, NULL, skb, mtu, true);
+}
+
+/* update dst pmtu but not do neighbor confirm */
+static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu)
+{
+ struct dst_entry *dst = skb_dst(skb);
+
+ if (dst && dst->ops->update_pmtu)
+ dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
}
static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
@@ -526,7 +543,7 @@ static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
u32 encap_mtu = dst_mtu(encap_dst);
if (skb->len > encap_mtu - headroom)
- skb_dst_update_pmtu(skb, encap_mtu - headroom);
+ skb_dst_update_pmtu_no_confirm(skb, encap_mtu - headroom);
}
#endif /* _NET_DST_H */
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 5ec645f27ee3..443863c7b8da 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -27,7 +27,8 @@ struct dst_ops {
struct dst_entry * (*negative_advice)(struct dst_entry *);
void (*link_failure)(struct sk_buff *);
void (*update_pmtu)(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu);
+ struct sk_buff *skb, u32 mtu,
+ bool confirm_neigh);
void (*redirect)(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
int (*local_out)(struct net *net, struct sock *sk, struct sk_buff *skb);
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index b473df5b9512..10886c19cfc5 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -107,6 +107,7 @@ struct fib_rule_notifier_info {
[FRA_OIFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, \
[FRA_PRIORITY] = { .type = NLA_U32 }, \
[FRA_FWMARK] = { .type = NLA_U32 }, \
+ [FRA_TUN_ID] = { .type = NLA_U64 }, \
[FRA_FWMASK] = { .type = NLA_U32 }, \
[FRA_TABLE] = { .type = NLA_U32 }, \
[FRA_SUPPRESS_PREFIXLEN] = { .type = NLA_U32 }, \
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index dfabc0503446..457ac35050cf 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -4,6 +4,8 @@
#include <linux/types.h>
#include <linux/in6.h>
+#include <linux/siphash.h>
+#include <linux/string.h>
#include <uapi/linux/if_ether.h>
/**
@@ -253,7 +255,7 @@ struct flow_keys_basic {
struct flow_keys {
struct flow_dissector_key_control control;
#define FLOW_KEYS_HASH_START_FIELD basic
- struct flow_dissector_key_basic basic;
+ struct flow_dissector_key_basic basic __aligned(SIPHASH_ALIGNMENT);
struct flow_dissector_key_tags tags;
struct flow_dissector_key_vlan vlan;
struct flow_dissector_key_vlan cvlan;
@@ -313,4 +315,12 @@ struct bpf_flow_dissector {
void *data_end;
};
+static inline void
+flow_dissector_init_keys(struct flow_dissector_key_control *key_control,
+ struct flow_dissector_key_basic *key_basic)
+{
+ memset(key_control, 0, sizeof(*key_control));
+ memset(key_basic, 0, sizeof(*key_basic));
+}
+
#endif
diff --git a/include/net/fq.h b/include/net/fq.h
index d126b5d20261..2ad85e683041 100644
--- a/include/net/fq.h
+++ b/include/net/fq.h
@@ -69,7 +69,7 @@ struct fq {
struct list_head backlogs;
spinlock_t lock;
u32 flows_cnt;
- u32 perturbation;
+ siphash_key_t perturbation;
u32 limit;
u32 memory_limit;
u32 memory_usage;
diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h
index be40a4b327e3..38a9a3d1222b 100644
--- a/include/net/fq_impl.h
+++ b/include/net/fq_impl.h
@@ -108,7 +108,7 @@ begin:
static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb)
{
- u32 hash = skb_get_hash_perturb(skb, fq->perturbation);
+ u32 hash = skb_get_hash_perturb(skb, &fq->perturbation);
return reciprocal_scale(hash, fq->flows_cnt);
}
@@ -308,12 +308,12 @@ static int fq_init(struct fq *fq, int flows_cnt)
INIT_LIST_HEAD(&fq->backlogs);
spin_lock_init(&fq->lock);
fq->flows_cnt = max_t(u32, flows_cnt, 1);
- fq->perturbation = prandom_u32();
+ get_random_bytes(&fq->perturbation, sizeof(fq->perturbation));
fq->quantum = 300;
fq->limit = 8192;
fq->memory_limit = 16 << 20; /* 16 MBytes */
- fq->flows = kcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL);
+ fq->flows = kvcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL);
if (!fq->flows)
return -ENOMEM;
@@ -331,7 +331,7 @@ static void fq_reset(struct fq *fq,
for (i = 0; i < fq->flows_cnt; i++)
fq_flow_reset(fq, &fq->flows[i], free_func);
- kfree(fq->flows);
+ kvfree(fq->flows);
fq->flows = NULL;
}
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 9292f1c588b7..2d9e67a69cbe 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -35,12 +35,6 @@ struct genl_info;
* do additional, common, filtering and return an error
* @post_doit: called after an operation's doit callback, it may
* undo operations done by pre_doit, for example release locks
- * @mcast_bind: a socket bound to the given multicast group (which
- * is given as the offset into the groups array)
- * @mcast_unbind: a socket was unbound from the given multicast group.
- * Note that unbind() will not be called symmetrically if the
- * generic netlink family is removed while there are still open
- * sockets.
* @attrbuf: buffer to store parsed attributes (private)
* @mcgrps: multicast groups used by this family
* @n_mcgrps: number of multicast groups
@@ -64,8 +58,6 @@ struct genl_family {
void (*post_doit)(const struct genl_ops *ops,
struct sk_buff *skb,
struct genl_info *info);
- int (*mcast_bind)(struct net *net, int group);
- void (*mcast_unbind)(struct net *net, int group);
struct nlattr ** attrbuf; /* private */
const struct genl_ops * ops;
const struct genl_multicast_group *mcgrps;
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index c57d53e7e02c..1ba40380b4df 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -309,6 +309,10 @@ int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, unsigned int optlen);
+/* update the fast reuse flag when adding a socket */
+void inet_csk_update_fastreuse(struct inet_bind_bucket *tb,
+ struct sock *sk);
+
struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
#define TCP_PINGPONG_THRESH 3
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index c8e2bebd8d93..e1eaf1780288 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -4,6 +4,7 @@
#include <linux/ip.h>
#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
#include <net/inet_sock.h>
#include <net/dsfield.h>
@@ -99,6 +100,20 @@ static inline int IP_ECN_set_ce(struct iphdr *iph)
return 1;
}
+static inline int IP_ECN_set_ect1(struct iphdr *iph)
+{
+ u32 check = (__force u32)iph->check;
+
+ if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0)
+ return 0;
+
+ check += (__force u16)htons(0x100);
+
+ iph->check = (__force __sum16)(check + (check>=0xFFFF));
+ iph->tos ^= INET_ECN_MASK;
+ return 1;
+}
+
static inline void IP_ECN_clear(struct iphdr *iph)
{
iph->tos &= ~INET_ECN_MASK;
@@ -134,6 +149,22 @@ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph)
return 1;
}
+static inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph)
+{
+ __be32 from, to;
+
+ if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0)
+ return 0;
+
+ from = *(__be32 *)iph;
+ to = from ^ htonl(INET_ECN_MASK << 20);
+ *(__be32 *)iph = to;
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
+ (__force __wsum)to);
+ return 1;
+}
+
static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner)
{
dscp &= ~INET_ECN_MASK;
@@ -142,7 +173,7 @@ static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner)
static inline int INET_ECN_set_ce(struct sk_buff *skb)
{
- switch (skb->protocol) {
+ switch (skb_protocol(skb, true)) {
case cpu_to_be16(ETH_P_IP):
if (skb_network_header(skb) + sizeof(struct iphdr) <=
skb_tail_pointer(skb))
@@ -159,6 +190,25 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb)
return 0;
}
+static inline int INET_ECN_set_ect1(struct sk_buff *skb)
+{
+ switch (skb_protocol(skb, true)) {
+ case cpu_to_be16(ETH_P_IP):
+ if (skb_network_header(skb) + sizeof(struct iphdr) <=
+ skb_tail_pointer(skb))
+ return IP_ECN_set_ect1(ip_hdr(skb));
+ break;
+
+ case cpu_to_be16(ETH_P_IPV6):
+ if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
+ skb_tail_pointer(skb))
+ return IP6_ECN_set_ect1(skb, ipv6_hdr(skb));
+ break;
+ }
+
+ return 0;
+}
+
/*
* RFC 6040 4.2
* To decapsulate the inner header at the tunnel egress, a compliant
@@ -208,8 +258,12 @@ static inline int INET_ECN_decapsulate(struct sk_buff *skb,
int rc;
rc = __INET_ECN_decapsulate(outer, inner, &set_ce);
- if (!rc && set_ce)
- INET_ECN_set_ce(skb);
+ if (!rc) {
+ if (set_ce)
+ INET_ECN_set_ce(skb);
+ else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1)
+ INET_ECN_set_ect1(skb);
+ }
return rc;
}
@@ -219,12 +273,16 @@ static inline int IP_ECN_decapsulate(const struct iphdr *oiph,
{
__u8 inner;
- if (skb->protocol == htons(ETH_P_IP))
+ switch (skb_protocol(skb, true)) {
+ case htons(ETH_P_IP):
inner = ip_hdr(skb)->tos;
- else if (skb->protocol == htons(ETH_P_IPV6))
+ break;
+ case htons(ETH_P_IPV6):
inner = ipv6_get_dsfield(ipv6_hdr(skb));
- else
+ break;
+ default:
return 0;
+ }
return INET_ECN_decapsulate(skb, oiph->tos, inner);
}
@@ -234,12 +292,16 @@ static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h,
{
__u8 inner;
- if (skb->protocol == htons(ETH_P_IP))
+ switch (skb_protocol(skb, true)) {
+ case htons(ETH_P_IP):
inner = ip_hdr(skb)->tos;
- else if (skb->protocol == htons(ETH_P_IPV6))
+ break;
+ case htons(ETH_P_IPV6):
inner = ipv6_get_dsfield(ipv6_hdr(skb));
- else
+ break;
+ default:
return 0;
+ }
return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner);
}
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index af2b4c065a04..59802eb8d2cc 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -103,13 +103,19 @@ struct inet_bind_hashbucket {
struct hlist_head chain;
};
-/*
- * Sockets can be hashed in established or listening table
+/* Sockets can be hashed in established or listening table.
+ * We must use different 'nulls' end-of-chain value for all hash buckets :
+ * A socket might transition from ESTABLISH to LISTEN state without
+ * RCU grace period. A lookup in ehash table needs to handle this case.
*/
+#define LISTENING_NULLS_BASE (1U << 29)
struct inet_listen_hashbucket {
spinlock_t lock;
unsigned int count;
- struct hlist_head head;
+ union {
+ struct hlist_head head;
+ struct hlist_nulls_head nulls_head;
+ };
};
/* This is for listening sockets, thus all sockets which possess wildcards. */
@@ -179,6 +185,12 @@ static inline spinlock_t *inet_ehash_lockp(
int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo);
+static inline void inet_hashinfo2_free_mod(struct inet_hashinfo *h)
+{
+ kfree(h->lhash2);
+ h->lhash2 = NULL;
+}
+
static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
{
kvfree(hashinfo->ehash_locks);
diff --git a/include/net/ip.h b/include/net/ip.h
index 49c672c8cdae..561ab1755f73 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -721,4 +721,9 @@ int ip_misc_proc_init(void);
int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family,
struct netlink_ext_ack *extack);
+static inline bool inetdev_valid_mtu(unsigned int mtu)
+{
+ return likely(mtu >= IPV4_MIN_MTU);
+}
+
#endif /* _IP_H */
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index ee7405e759ba..77c5a171703d 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -235,6 +235,7 @@ static inline bool ipv6_anycast_destination(const struct dst_entry *dst,
return rt->rt6i_flags & RTF_ANYCAST ||
(rt->rt6i_dst.plen < 127 &&
+ !(rt->rt6i_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) &&
ipv6_addr_equal(&rt->rt6i_dst.addr, daddr));
}
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index bbeff32fb6cb..3cddf849d5b4 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -242,7 +242,6 @@ struct fib_dump_filter {
u32 table_id;
/* filter_set is an optimization that an entry is set */
bool filter_set;
- bool dump_all_families;
unsigned char protocol;
unsigned char rt_type;
unsigned int flags;
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index b36a1df93e7c..01e034774c5b 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -887,6 +887,7 @@ struct netns_ipvs {
struct delayed_work defense_work; /* Work handler */
int drop_rate;
int drop_counter;
+ int old_secure_tcp;
atomic_t dropentry;
/* locks in ctl.c */
spinlock_t dropentry_lock; /* drop entry handling */
@@ -1615,18 +1616,16 @@ static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
}
#endif /* CONFIG_IP_VS_NFCT */
-/* Really using conntrack? */
-static inline bool ip_vs_conn_uses_conntrack(struct ip_vs_conn *cp,
- struct sk_buff *skb)
+/* Using old conntrack that can not be redirected to another real server? */
+static inline bool ip_vs_conn_uses_old_conntrack(struct ip_vs_conn *cp,
+ struct sk_buff *skb)
{
#ifdef CONFIG_IP_VS_NFCT
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
- if (!(cp->flags & IP_VS_CONN_F_NFCT))
- return false;
ct = nf_ct_get(skb, &ctinfo);
- if (ct)
+ if (ct && nf_ct_is_confirmed(ct))
return true;
#endif
return false;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 60d9480bc4d1..e3ec0e4c6d0e 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -955,7 +955,7 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
struct flowi6 *fl6);
-struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
+struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst);
struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst,
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 6c0c4fde16f8..174a5ae48889 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -24,8 +24,10 @@ struct ipv6_stub {
const struct in6_addr *addr);
int (*ipv6_sock_mc_drop)(struct sock *sk, int ifindex,
const struct in6_addr *addr);
- int (*ipv6_dst_lookup)(struct net *net, struct sock *sk,
- struct dst_entry **dst, struct flowi6 *fl6);
+ struct dst_entry *(*ipv6_dst_lookup_flow)(struct net *net,
+ const struct sock *sk,
+ struct flowi6 *fl6,
+ const struct in6_addr *final_dst);
int (*ipv6_route_input)(struct sk_buff *skb);
struct fib6_table *(*fib6_get_table)(struct net *net, u32 id);
diff --git a/include/net/ipx.h b/include/net/ipx.h
index baf090390998..9d1342807b59 100644
--- a/include/net/ipx.h
+++ b/include/net/ipx.h
@@ -47,11 +47,6 @@ struct ipxhdr {
/* From af_ipx.c */
extern int sysctl_ipx_pprop_broadcasting;
-static __inline__ struct ipxhdr *ipx_hdr(struct sk_buff *skb)
-{
- return (struct ipxhdr *)skb_transport_header(skb);
-}
-
struct ipx_interface {
/* IPX address */
__be32 if_netnum;
diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h
index df528a623548..ea985aa7a6c5 100644
--- a/include/net/llc_conn.h
+++ b/include/net/llc_conn.h
@@ -104,7 +104,7 @@ void llc_sk_reset(struct sock *sk);
/* Access to a connection */
int llc_conn_state_process(struct sock *sk, struct sk_buff *skb);
-int llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb);
+void llc_conn_send_pdu(struct sock *sk, struct sk_buff *skb);
void llc_conn_rtn_pdu(struct sock *sk, struct sk_buff *skb);
void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit);
void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit);
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 456f2edf78dc..3a8e445257cd 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -5911,7 +5911,9 @@ enum rate_control_capabilities {
struct rate_control_ops {
unsigned long capa;
const char *name;
- void *(*alloc)(struct ieee80211_hw *hw, struct dentry *debugfsdir);
+ void *(*alloc)(struct ieee80211_hw *hw);
+ void (*add_debugfs)(struct ieee80211_hw *hw, void *priv,
+ struct dentry *debugfsdir);
void (*free)(void *priv);
void *(*alloc_sta)(void *priv, struct ieee80211_sta *sta, gfp_t gfp);
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 366150053043..83b5d79fd07a 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -413,8 +413,8 @@ static inline void __ipv6_confirm_neigh(struct net_device *dev,
unsigned long now = jiffies;
/* avoid dirtying neighbour */
- if (n->confirmed != now)
- n->confirmed = now;
+ if (READ_ONCE(n->confirmed) != now)
+ WRITE_ONCE(n->confirmed, now);
}
rcu_read_unlock_bh();
}
@@ -430,8 +430,8 @@ static inline void __ipv6_confirm_neigh_stub(struct net_device *dev,
unsigned long now = jiffies;
/* avoid dirtying neighbour */
- if (n->confirmed != now)
- n->confirmed = now;
+ if (READ_ONCE(n->confirmed) != now)
+ WRITE_ONCE(n->confirmed, now);
}
rcu_read_unlock_bh();
}
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 50a67bd6a434..8ec77bfdc1a4 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -72,7 +72,6 @@ struct neigh_parms {
struct net_device *dev;
struct list_head list;
int (*neigh_setup)(struct neighbour *);
- void (*neigh_cleanup)(struct neighbour *);
struct neigh_table *tbl;
void *sysctl_table;
@@ -439,8 +438,8 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
{
unsigned long now = jiffies;
- if (neigh->used != now)
- neigh->used = now;
+ if (READ_ONCE(neigh->used) != now)
+ WRITE_ONCE(neigh->used, now);
if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE)))
return __neigh_event_send(neigh, skb);
return 0;
@@ -468,7 +467,7 @@ static inline int neigh_hh_output(const struct hh_cache *hh, struct sk_buff *skb
do {
seq = read_seqbegin(&hh->hh_lock);
- hh_len = hh->hh_len;
+ hh_len = READ_ONCE(hh->hh_len);
if (likely(hh_len <= HH_DATA_MOD)) {
hh_alen = HH_DATA_MOD;
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 12689ddfc24c..7a4f05f3c7f8 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -329,7 +329,7 @@ static inline struct net *read_pnet(const possible_net_t *pnet)
#define __net_initconst __initconst
#endif
-int peernet2id_alloc(struct net *net, struct net *peer);
+int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp);
int peernet2id(struct net *net, struct net *peer);
bool peernet_has_id(struct net *net, struct net *peer);
struct net *get_net_ns_by_id(struct net *net, int id);
@@ -353,8 +353,13 @@ struct pernet_operations {
* synchronize_rcu() related to these pernet_operations,
* instead of separate synchronize_rcu() for every net.
* Please, avoid synchronize_rcu() at all, where it's possible.
+ *
+ * Note that a combination of pre_exit() and exit() can
+ * be used, since a synchronize_rcu() is guaranteed between
+ * the calls.
*/
int (*init)(struct net *net);
+ void (*pre_exit)(struct net *net);
void (*exit)(struct net *net);
void (*exit_batch)(struct list_head *net_exit_list);
unsigned int *id;
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index d2bc733a2ef1..e79087419924 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -90,7 +90,7 @@ struct nf_conn {
struct hlist_node nat_bysource;
#endif
/* all members below initialized via memset */
- u8 __nfct_init_offset[0];
+ struct { } __nfct_init_offset;
/* If we were expected by an expectation, this will be it */
struct nf_conn *master;
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 930d062940b7..1634397e302f 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -792,7 +792,8 @@ struct nft_expr_ops {
*/
struct nft_expr {
const struct nft_expr_ops *ops;
- unsigned char data[];
+ unsigned char data[]
+ __attribute__((aligned(__alignof__(u64))));
};
static inline void *nft_expr_priv(const struct nft_expr *expr)
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index 286fd960896f..a1a8d45adb42 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -7,6 +7,7 @@
struct netns_nftables {
struct list_head tables;
struct list_head commit_list;
+ struct list_head module_list;
struct mutex commit_mutex;
unsigned int base_seq;
u8 gencursor;
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 514e3c80ecc1..bc692c7c893d 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -241,31 +241,38 @@ __cls_set_class(unsigned long *clp, unsigned long cl)
return xchg(clp, cl);
}
-static inline unsigned long
-cls_set_class(struct Qdisc *q, unsigned long *clp, unsigned long cl)
+static inline void
+__tcf_bind_filter(struct Qdisc *q, struct tcf_result *r, unsigned long base)
{
- unsigned long old_cl;
+ unsigned long cl;
- sch_tree_lock(q);
- old_cl = __cls_set_class(clp, cl);
- sch_tree_unlock(q);
- return old_cl;
+ cl = q->ops->cl_ops->bind_tcf(q, base, r->classid);
+ cl = __cls_set_class(&r->class, cl);
+ if (cl)
+ q->ops->cl_ops->unbind_tcf(q, cl);
}
static inline void
tcf_bind_filter(struct tcf_proto *tp, struct tcf_result *r, unsigned long base)
{
struct Qdisc *q = tp->chain->block->q;
- unsigned long cl;
/* Check q as it is not set for shared blocks. In that case,
* setting class is not supported.
*/
if (!q)
return;
- cl = q->ops->cl_ops->bind_tcf(q, base, r->classid);
- cl = cls_set_class(q, &r->class, cl);
- if (cl)
+ sch_tree_lock(q);
+ __tcf_bind_filter(q, r, base);
+ sch_tree_unlock(q);
+}
+
+static inline void
+__tcf_unbind_filter(struct Qdisc *q, struct tcf_result *r)
+{
+ unsigned long cl;
+
+ if ((cl = __cls_set_class(&r->class, 0)) != 0)
q->ops->cl_ops->unbind_tcf(q, cl);
}
@@ -273,12 +280,10 @@ static inline void
tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r)
{
struct Qdisc *q = tp->chain->block->q;
- unsigned long cl;
if (!q)
return;
- if ((cl = __cls_set_class(&r->class, 0)) != 0)
- q->ops->cl_ops->unbind_tcf(q, cl);
+ __tcf_unbind_filter(q, r);
}
struct tcf_exts {
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index aa99c73c3fbd..2abecd015f6d 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -128,17 +128,6 @@ static inline void qdisc_run(struct Qdisc *q)
}
}
-static inline __be16 tc_skb_protocol(const struct sk_buff *skb)
-{
- /* We need to take extra care in case the skb came via
- * vlan accelerated path. In that case, use skb->vlan_proto
- * as the original vlan header was already stripped.
- */
- if (skb_vlan_tag_present(skb))
- return skb->vlan_proto;
- return skb->protocol;
-}
-
/* Calculate maximal size of packet seen by hard_start_xmit
routine of this device.
*/
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 21f434f3ac9e..1430cc4a27a3 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -13,6 +13,7 @@
#include <linux/refcount.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
+#include <linux/hashtable.h>
#include <net/gen_stats.h>
#include <net/rtnetlink.h>
@@ -151,8 +152,8 @@ static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
{
if (qdisc_is_percpu_stats(qdisc))
- return qdisc->empty;
- return !qdisc->q.qlen;
+ return READ_ONCE(qdisc->empty);
+ return !READ_ONCE(qdisc->q.qlen);
}
static inline bool qdisc_run_begin(struct Qdisc *qdisc)
@@ -160,7 +161,7 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
if (qdisc->flags & TCQ_F_NOLOCK) {
if (!spin_trylock(&qdisc->seqlock))
return false;
- qdisc->empty = false;
+ WRITE_ONCE(qdisc->empty, false);
} else if (qdisc_is_running(qdisc)) {
return false;
}
@@ -315,7 +316,8 @@ struct tcf_proto_ops {
int (*reoffload)(struct tcf_proto *tp, bool add,
tc_setup_cb_t *cb, void *cb_priv,
struct netlink_ext_ack *extack);
- void (*bind_class)(void *, u32, unsigned long);
+ void (*bind_class)(void *, u32, unsigned long,
+ void *, unsigned long);
void * (*tmplt_create)(struct net *net,
struct tcf_chain *chain,
struct nlattr **tca,
@@ -361,6 +363,7 @@ struct tcf_proto {
bool deleting;
refcount_t refcnt;
struct rcu_head rcu;
+ struct hlist_node destroy_ht_node;
};
struct qdisc_skb_cb {
@@ -398,6 +401,7 @@ struct tcf_block {
struct mutex lock;
struct list_head chain_list;
u32 index; /* block index for shared blocks */
+ u32 classid; /* which class this block belongs to */
refcount_t refcnt;
struct net *net;
struct Qdisc *q;
@@ -411,6 +415,8 @@ struct tcf_block {
struct list_head filter_chain_list;
} chain0;
struct rcu_head rcu;
+ DECLARE_HASHTABLE(proto_destroy_ht, 7);
+ struct mutex proto_destroy_lock; /* Lock for proto_destroy hashtable. */
};
#ifdef CONFIG_PROVE_LOCKING
@@ -522,6 +528,11 @@ static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc)
return q;
}
+static inline struct Qdisc *qdisc_root_bh(const struct Qdisc *qdisc)
+{
+ return rcu_dereference_bh(qdisc->dev_queue->qdisc);
+}
+
static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc)
{
return qdisc->dev_queue->qdisc_sleeping;
@@ -688,22 +699,6 @@ void __qdisc_calculate_pkt_len(struct sk_buff *skb,
const struct qdisc_size_table *stab);
int skb_do_redirect(struct sk_buff *);
-static inline void skb_reset_tc(struct sk_buff *skb)
-{
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_redirected = 0;
-#endif
-}
-
-static inline bool skb_is_tc_redirected(const struct sk_buff *skb)
-{
-#ifdef CONFIG_NET_CLS_ACT
- return skb->tc_redirected;
-#else
- return false;
-#endif
-}
-
static inline bool skb_at_tc_ingress(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_CLS_ACT
diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h
index 823afc42a3aa..06e1deeef464 100644
--- a/include/net/sctp/constants.h
+++ b/include/net/sctp/constants.h
@@ -341,11 +341,13 @@ enum {
ipv4_is_anycast_6to4(a))
/* Flags used for the bind address copy functions. */
-#define SCTP_ADDR6_ALLOWED 0x00000001 /* IPv6 address is allowed by
+#define SCTP_ADDR4_ALLOWED 0x00000001 /* IPv4 address is allowed by
local sock family */
-#define SCTP_ADDR4_PEERSUPP 0x00000002 /* IPv4 address is supported by
+#define SCTP_ADDR6_ALLOWED 0x00000002 /* IPv6 address is allowed by
+ local sock family */
+#define SCTP_ADDR4_PEERSUPP 0x00000004 /* IPv4 address is supported by
peer */
-#define SCTP_ADDR6_PEERSUPP 0x00000004 /* IPv6 address is supported by
+#define SCTP_ADDR6_PEERSUPP 0x00000008 /* IPv6 address is supported by
peer */
/* Reasons to retransmit. */
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 0767701ef362..f128b09aa822 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1240,6 +1240,9 @@ struct sctp_ep_common {
/* What socket does this endpoint belong to? */
struct sock *sk;
+ /* Cache netns and it won't change once set */
+ struct net *net;
+
/* This is where we receive inbound chunks. */
struct sctp_inq inqueue;
diff --git a/include/net/sock.h b/include/net/sock.h
index 526de911cd91..86ec9c407746 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -723,6 +723,11 @@ static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_h
hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
}
+static inline void __sk_nulls_add_node_tail_rcu(struct sock *sk, struct hlist_nulls_head *list)
+{
+ hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
+}
+
static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
{
sock_hold(sk);
@@ -844,6 +849,8 @@ static inline int sk_memalloc_socks(void)
{
return static_branch_unlikely(&memalloc_socks_key);
}
+
+void __receive_sock(struct file *file);
#else
static inline int sk_memalloc_socks(void)
@@ -851,6 +858,8 @@ static inline int sk_memalloc_socks(void)
return 0;
}
+static inline void __receive_sock(struct file *file)
+{ }
#endif
static inline gfp_t sk_gfp_mask(const struct sock *sk, gfp_t gfp_mask)
@@ -878,12 +887,17 @@ static inline bool sk_acceptq_is_full(const struct sock *sk)
*/
static inline int sk_stream_min_wspace(const struct sock *sk)
{
- return sk->sk_wmem_queued >> 1;
+ return READ_ONCE(sk->sk_wmem_queued) >> 1;
}
static inline int sk_stream_wspace(const struct sock *sk)
{
- return sk->sk_sndbuf - sk->sk_wmem_queued;
+ return READ_ONCE(sk->sk_sndbuf) - READ_ONCE(sk->sk_wmem_queued);
+}
+
+static inline void sk_wmem_queued_add(struct sock *sk, int val)
+{
+ WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val);
}
void sk_stream_write_space(struct sock *sk);
@@ -949,8 +963,8 @@ static inline void sk_incoming_cpu_update(struct sock *sk)
{
int cpu = raw_smp_processor_id();
- if (unlikely(sk->sk_incoming_cpu != cpu))
- sk->sk_incoming_cpu = cpu;
+ if (unlikely(READ_ONCE(sk->sk_incoming_cpu) != cpu))
+ WRITE_ONCE(sk->sk_incoming_cpu, cpu);
}
static inline void sock_rps_record_flow_hash(__u32 hash)
@@ -1207,7 +1221,7 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
{
- if (sk->sk_wmem_queued >= sk->sk_sndbuf)
+ if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf))
return false;
return sk->sk_prot->stream_memory_free ?
@@ -1467,7 +1481,7 @@ DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key);
static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
{
sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
- sk->sk_wmem_queued -= skb->truesize;
+ sk_wmem_queued_add(sk, -skb->truesize);
sk_mem_uncharge(sk, skb->truesize);
if (static_branch_unlikely(&tcp_tx_skb_cache_key) &&
!sk->sk_tx_skb_cache && !skb_cloned(skb)) {
@@ -1793,7 +1807,6 @@ static inline int sk_rx_queue_get(const struct sock *sk)
static inline void sk_set_socket(struct sock *sk, struct socket *sock)
{
- sk_tx_queue_clear(sk);
sk->sk_socket = sock;
}
@@ -1935,8 +1948,8 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie);
static inline void sk_dst_confirm(struct sock *sk)
{
- if (!sk->sk_dst_pending_confirm)
- sk->sk_dst_pending_confirm = 1;
+ if (!READ_ONCE(sk->sk_dst_pending_confirm))
+ WRITE_ONCE(sk->sk_dst_pending_confirm, 1);
}
static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n)
@@ -1946,10 +1959,10 @@ static inline void sock_confirm_neigh(struct sk_buff *skb, struct neighbour *n)
unsigned long now = jiffies;
/* avoid dirtying neighbour */
- if (n->confirmed != now)
- n->confirmed = now;
- if (sk && sk->sk_dst_pending_confirm)
- sk->sk_dst_pending_confirm = 0;
+ if (READ_ONCE(n->confirmed) != now)
+ WRITE_ONCE(n->confirmed, now);
+ if (sk && READ_ONCE(sk->sk_dst_pending_confirm))
+ WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
}
}
@@ -2014,7 +2027,7 @@ static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *fro
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
- sk->sk_wmem_queued += copy;
+ sk_wmem_queued_add(sk, copy);
sk_mem_charge(sk, copy);
return 0;
}
@@ -2220,10 +2233,14 @@ static inline void sk_wake_async(const struct sock *sk, int how, int band)
static inline void sk_stream_moderate_sndbuf(struct sock *sk)
{
- if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) {
- sk->sk_sndbuf = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
- sk->sk_sndbuf = max_t(u32, sk->sk_sndbuf, SOCK_MIN_SNDBUF);
- }
+ u32 val;
+
+ if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
+ return;
+
+ val = min(sk->sk_sndbuf, sk->sk_wmem_queued >> 1);
+
+ WRITE_ONCE(sk->sk_sndbuf, max_t(u32, val, SOCK_MIN_SNDBUF));
}
struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
@@ -2233,12 +2250,17 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
* sk_page_frag - return an appropriate page_frag
* @sk: socket
*
- * If socket allocation mode allows current thread to sleep, it means its
- * safe to use the per task page_frag instead of the per socket one.
+ * Use the per task page_frag instead of the per socket one for
+ * optimization when we know that we're in the normal context and owns
+ * everything that's associated with %current.
+ *
+ * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest
+ * inside other socket operations and end up recursing into sk_page_frag()
+ * while it's already in use.
*/
static inline struct page_frag *sk_page_frag(struct sock *sk)
{
- if (gfpflags_allow_blocking(sk->sk_allocation))
+ if (gfpflags_normal_context(sk->sk_allocation))
return &current->task_frag;
return &sk->sk_frag;
@@ -2251,7 +2273,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag);
*/
static inline bool sock_writeable(const struct sock *sk)
{
- return refcount_read(&sk->sk_wmem_alloc) < (sk->sk_sndbuf >> 1);
+ return refcount_read(&sk->sk_wmem_alloc) < (READ_ONCE(sk->sk_sndbuf) >> 1);
}
static inline gfp_t gfp_any(void)
@@ -2326,7 +2348,7 @@ static inline ktime_t sock_read_timestamp(struct sock *sk)
return kt;
#else
- return sk->sk_stamp;
+ return READ_ONCE(sk->sk_stamp);
#endif
}
@@ -2337,7 +2359,7 @@ static inline void sock_write_timestamp(struct sock *sk, ktime_t kt)
sk->sk_stamp = kt;
write_sequnlock(&sk->sk_stamp_seq);
#else
- sk->sk_stamp = kt;
+ WRITE_ONCE(sk->sk_stamp, kt);
#endif
}
@@ -2568,9 +2590,9 @@ static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
*/
static inline void sk_pacing_shift_update(struct sock *sk, int val)
{
- if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val)
+ if (!sk || !sk_fullsock(sk) || READ_ONCE(sk->sk_pacing_shift) == val)
return;
- sk->sk_pacing_shift = val;
+ WRITE_ONCE(sk->sk_pacing_shift, val);
}
/* if a socket is bound to a device, check that the given device
diff --git a/include/net/tc_act/tc_police.h b/include/net/tc_act/tc_police.h
index 8b9ef3664262..cfdc7cb82cad 100644
--- a/include/net/tc_act/tc_police.h
+++ b/include/net/tc_act/tc_police.h
@@ -54,7 +54,7 @@ static inline u64 tcf_police_rate_bytes_ps(const struct tc_action *act)
struct tcf_police *police = to_police(act);
struct tcf_police_params *params;
- params = rcu_dereference_bh(police->params);
+ params = rcu_dereference_bh_rtnl(police->params);
return params->rate.rate_bytes_ps;
}
@@ -63,7 +63,7 @@ static inline s64 tcf_police_tcfp_burst(const struct tc_action *act)
struct tcf_police *police = to_police(act);
struct tcf_police_params *params;
- params = rcu_dereference_bh(police->params);
+ params = rcu_dereference_bh_rtnl(police->params);
return params->tcfp_burst;
}
diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h
index 0a559d4b6f0f..b4fce0fae645 100644
--- a/include/net/tc_act/tc_sample.h
+++ b/include/net/tc_act/tc_sample.h
@@ -44,7 +44,7 @@ static inline int tcf_sample_trunc_size(const struct tc_action *a)
static inline struct psample_group *
tcf_sample_psample_group(const struct tc_action *a)
{
- return rcu_dereference(to_sample(a)->psample_group);
+ return rcu_dereference_rtnl(to_sample(a)->psample_group);
}
#endif /* __NET_TC_SAMPLE_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 2ee06191c488..a893ad19ac4f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -49,7 +49,7 @@ extern struct inet_hashinfo tcp_hashinfo;
extern struct percpu_counter tcp_orphan_count;
void tcp_time_wait(struct sock *sk, int state, int timeo);
-#define MAX_TCP_HEADER (128 + MAX_HEADER)
+#define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER)
#define MAX_TCP_OPTION_SPACE 40
#define TCP_MIN_SND_MSS 48
#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE)
@@ -483,15 +483,16 @@ static inline void tcp_synq_overflow(const struct sock *sk)
reuse = rcu_dereference(sk->sk_reuseport_cb);
if (likely(reuse)) {
last_overflow = READ_ONCE(reuse->synq_overflow_ts);
- if (time_after32(now, last_overflow + HZ))
+ if (!time_between32(now, last_overflow,
+ last_overflow + HZ))
WRITE_ONCE(reuse->synq_overflow_ts, now);
return;
}
}
- last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
- if (time_after32(now, last_overflow + HZ))
- tcp_sk(sk)->rx_opt.ts_recent_stamp = now;
+ last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp);
+ if (!time_between32(now, last_overflow, last_overflow + HZ))
+ WRITE_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp, now);
}
/* syncookies: no recent synqueue overflow on this listening socket? */
@@ -506,13 +507,23 @@ static inline bool tcp_synq_no_recent_overflow(const struct sock *sk)
reuse = rcu_dereference(sk->sk_reuseport_cb);
if (likely(reuse)) {
last_overflow = READ_ONCE(reuse->synq_overflow_ts);
- return time_after32(now, last_overflow +
- TCP_SYNCOOKIE_VALID);
+ return !time_between32(now, last_overflow - HZ,
+ last_overflow +
+ TCP_SYNCOOKIE_VALID);
}
}
- last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp;
- return time_after32(now, last_overflow + TCP_SYNCOOKIE_VALID);
+ last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp);
+
+ /* If last_overflow <= jiffies <= last_overflow + TCP_SYNCOOKIE_VALID,
+ * then we're under synflood. However, we have to use
+ * 'last_overflow - HZ' as lower bound. That's because a concurrent
+ * tcp_synq_overflow() could update .ts_recent_stamp after we read
+ * jiffies but before we store .ts_recent_stamp into last_overflow,
+ * which could lead to rejecting a valid syncookie.
+ */
+ return !time_between32(now, last_overflow - HZ,
+ last_overflow + TCP_SYNCOOKIE_VALID);
}
static inline u32 tcp_cookie_time(void)
@@ -1369,13 +1380,27 @@ static inline int tcp_win_from_space(const struct sock *sk, int space)
/* Note: caller must be prepared to deal with negative returns */
static inline int tcp_space(const struct sock *sk)
{
- return tcp_win_from_space(sk, sk->sk_rcvbuf - sk->sk_backlog.len -
+ return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) -
+ READ_ONCE(sk->sk_backlog.len) -
atomic_read(&sk->sk_rmem_alloc));
}
static inline int tcp_full_space(const struct sock *sk)
{
- return tcp_win_from_space(sk, sk->sk_rcvbuf);
+ return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf));
+}
+
+/* We provision sk_rcvbuf around 200% of sk_rcvlowat.
+ * If 87.5 % (7/8) of the space has been consumed, we want to override
+ * SO_RCVLOWAT constraint, since we are receiving skbs with too small
+ * len/truesize ratio.
+ */
+static inline bool tcp_rmem_pressure(const struct sock *sk)
+{
+ int rcvbuf = READ_ONCE(sk->sk_rcvbuf);
+ int threshold = rcvbuf - (rcvbuf >> 3);
+
+ return atomic_read(&sk->sk_rmem_alloc) > threshold;
}
extern void tcp_openreq_init_rwin(struct request_sock *req,
@@ -1873,7 +1898,8 @@ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
static inline bool tcp_stream_memory_free(const struct sock *sk, int wake)
{
const struct tcp_sock *tp = tcp_sk(sk);
- u32 notsent_bytes = tp->write_seq - tp->snd_nxt;
+ u32 notsent_bytes = READ_ONCE(tp->write_seq) -
+ READ_ONCE(tp->snd_nxt);
return (notsent_bytes << wake) < tcp_notsent_lowat(tp);
}
diff --git a/include/net/tls.h b/include/net/tls.h
index 889df0312cd1..f458227161a9 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -40,6 +40,7 @@
#include <linux/socket.h>
#include <linux/tcp.h>
#include <linux/skmsg.h>
+#include <linux/mutex.h>
#include <net/tcp.h>
#include <net/strparser.h>
@@ -120,7 +121,6 @@ struct tls_rec {
struct list_head list;
int tx_ready;
int tx_flags;
- int inplace_crypto;
struct sk_msg msg_plaintext;
struct sk_msg msg_encrypted;
@@ -156,6 +156,8 @@ struct tls_sw_context_tx {
struct tls_rec *open_rec;
struct list_head tx_list;
atomic_t encrypt_pending;
+ /* protect crypto_wait with encrypt_pending */
+ spinlock_t encrypt_compl_lock;
int async_notify;
int async_capable;
@@ -175,6 +177,8 @@ struct tls_sw_context_rx {
int async_capable;
bool decrypted;
atomic_t decrypt_pending;
+ /* protect crypto_wait with decrypt_pending*/
+ spinlock_t decrypt_compl_lock;
bool async_notify;
};
@@ -265,6 +269,10 @@ struct tls_context {
bool in_tcp_sendpages;
bool pending_open_record_frags;
+ struct mutex tx_lock; /* protects partially_sent_* fields and
+ * per-type TX fields
+ */
+
int (*push_pending_record)(struct sock *sk, int flags);
void (*sk_write_space)(struct sock *sk);
@@ -322,6 +330,8 @@ int tls_sk_attach(struct sock *sk, int optname, char __user *optval,
int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx);
int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags);
int tls_sw_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
void tls_sw_close(struct sock *sk, long timeout);
@@ -362,7 +372,7 @@ int tls_push_sg(struct sock *sk, struct tls_context *ctx,
int flags);
int tls_push_partial_record(struct sock *sk, struct tls_context *ctx,
int flags);
-bool tls_free_partial_record(struct sock *sk, struct tls_context *ctx);
+void tls_free_partial_record(struct sock *sk, struct tls_context *ctx);
static inline struct tls_msg *tls_msg(struct sk_buff *skb)
{
@@ -537,6 +547,15 @@ static inline bool tls_sw_has_ctx_tx(const struct sock *sk)
return !!tls_sw_ctx_tx(ctx);
}
+static inline bool tls_sw_has_ctx_rx(const struct sock *sk)
+{
+ struct tls_context *ctx = tls_get_ctx(sk);
+
+ if (!ctx)
+ return false;
+ return !!tls_sw_ctx_rx(ctx);
+}
+
void tls_sw_write_space(struct sock *sk, struct tls_context *ctx);
void tls_device_write_space(struct sock *sk, struct tls_context *ctx);
diff --git a/include/net/udp.h b/include/net/udp.h
index 79d141d2103b..f2c1178f6e12 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -476,6 +476,16 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
if (!inet_get_convert_csum(sk))
features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+ /* UDP segmentation expects packets of type CHECKSUM_PARTIAL or
+ * CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial
+ * packets in udp_gro_complete_segment. As does UDP GSO, verified by
+ * udp_send_skb. But when those packets are looped in dev_loopback_xmit
+ * their ip_summed is set to CHECKSUM_UNNECESSARY. Reset in this
+ * specific case, where PARTIAL is both correct and required.
+ */
+ if (skb->pkt_type == PACKET_LOOPBACK)
+ skb->ip_summed = CHECKSUM_PARTIAL;
+
/* the GSO CB lays after the UDP one, no need to save and restore any
* CB fragment
*/
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a2907873ed56..e410d02d0524 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -950,7 +950,7 @@ struct xfrm_dst {
static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst)
{
#ifdef CONFIG_XFRM
- if (dst->xfrm) {
+ if (dst->xfrm || (dst->flags & DST_XFRM_QUEUE)) {
const struct xfrm_dst *xdst = (const struct xfrm_dst *) dst;
return xdst->path;
@@ -962,7 +962,7 @@ static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst)
static inline struct dst_entry *xfrm_dst_child(const struct dst_entry *dst)
{
#ifdef CONFIG_XFRM
- if (dst->xfrm) {
+ if (dst->xfrm || (dst->flags & DST_XFRM_QUEUE)) {
struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
return xdst->child;
}
@@ -1019,6 +1019,7 @@ struct xfrm_offload {
#define XFRM_GRO 32
#define XFRM_ESP_NO_TRAILER 64
#define XFRM_DEV_RESUME 128
+#define XFRM_XMIT 256
__u32 status;
#define CRYPTO_SUCCESS 1
@@ -1644,13 +1645,16 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
void *);
void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net);
int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
-struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id,
- u8 type, int dir,
+struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net,
+ const struct xfrm_mark *mark,
+ u32 if_id, u8 type, int dir,
struct xfrm_selector *sel,
struct xfrm_sec_ctx *ctx, int delete,
int *err);
-struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, u8,
- int dir, u32 id, int delete, int *err);
+struct xfrm_policy *xfrm_policy_byid(struct net *net,
+ const struct xfrm_mark *mark, u32 if_id,
+ u8 type, int dir, u32 id, int delete,
+ int *err);
int xfrm_policy_flush(struct net *net, u8 type, bool task_valid);
void xfrm_policy_hash_rebuild(struct net *net);
u32 xfrm_get_acqseq(void);