aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c3
-rw-r--r--net/ipv4/datagram.c20
-rw-r--r--net/ipv4/fib_semantics.c4
-rw-r--r--net/ipv4/gre_offload.c5
-rw-r--r--net/ipv4/icmp.c2
-rw-r--r--net/ipv4/igmp.c14
-rw-r--r--net/ipv4/inetpeer.c18
-rw-r--r--net/ipv4/ip_forward.c4
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ip_options.c4
-rw-r--r--net/ipv4/ip_output.c19
-rw-r--r--net/ipv4/ip_tunnel.c61
-rw-r--r--net/ipv4/ip_tunnel_core.c5
-rw-r--r--net/ipv4/ip_vti.c2
-rw-r--r--net/ipv4/ipip.c5
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/netfilter/arp_tables.c6
-rw-r--r--net/ipv4/netfilter/ip_tables.c6
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c5
-rw-r--r--net/ipv4/ping.c15
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c88
-rw-r--r--net/ipv4/tcp.c76
-rw-r--r--net/ipv4/tcp_cubic.c2
-rw-r--r--net/ipv4/tcp_input.c29
-rw-r--r--net/ipv4/tcp_ipv4.c5
-rw-r--r--net/ipv4/tcp_offload.c2
-rw-r--r--net/ipv4/tcp_output.c17
-rw-r--r--net/ipv4/tcp_vegas.c3
-rw-r--r--net/ipv4/tcp_veno.c2
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c2
32 files changed, 234 insertions, 200 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 19ab78aca547..07bd8edef417 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1434,6 +1434,9 @@ static int inet_gro_complete(struct sk_buff *skb, int nhoff)
int proto = iph->protocol;
int err = -ENOSYS;
+ if (skb->encapsulation)
+ skb_set_inner_network_header(skb, nhoff);
+
csum_replace2(&iph->check, iph->tot_len, newlen);
iph->tot_len = newlen;
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 8b5134c582f1..a3095fdefbed 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -86,18 +86,26 @@ out:
}
EXPORT_SYMBOL(ip4_datagram_connect);
+/* Because UDP xmit path can manipulate sk_dst_cache without holding
+ * socket lock, we need to use sk_dst_set() here,
+ * even if we own the socket lock.
+ */
void ip4_datagram_release_cb(struct sock *sk)
{
const struct inet_sock *inet = inet_sk(sk);
const struct ip_options_rcu *inet_opt;
__be32 daddr = inet->inet_daddr;
+ struct dst_entry *dst;
struct flowi4 fl4;
struct rtable *rt;
- if (! __sk_dst_get(sk) || __sk_dst_check(sk, 0))
- return;
-
rcu_read_lock();
+
+ dst = __sk_dst_get(sk);
+ if (!dst || !dst->obsolete || dst->ops->check(dst, 0)) {
+ rcu_read_unlock();
+ return;
+ }
inet_opt = rcu_dereference(inet->inet_opt);
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;
@@ -105,8 +113,10 @@ void ip4_datagram_release_cb(struct sock *sk)
inet->inet_saddr, inet->inet_dport,
inet->inet_sport, sk->sk_protocol,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if);
- if (!IS_ERR(rt))
- __sk_dst_set(sk, &rt->dst);
+
+ dst = !IS_ERR(rt) ? &rt->dst : NULL;
+ sk_dst_set(sk, dst);
+
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(ip4_datagram_release_cb);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b53f0bf84dca..017fa5e17594 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -535,7 +535,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
return 1;
attrlen = rtnh_attrlen(rtnh);
- if (attrlen < 0) {
+ if (attrlen > 0) {
struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
@@ -820,13 +820,13 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
if (fi == NULL)
goto failure;
+ fib_info_cnt++;
if (cfg->fc_mx) {
fi->fib_metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
if (!fi->fib_metrics)
goto failure;
} else
fi->fib_metrics = (u32 *) dst_default_metrics;
- fib_info_cnt++;
fi->fib_net = hold_net(net);
fi->fib_protocol = cfg->fc_protocol;
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index f1d32280cb54..8c8493ea6b1c 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -50,7 +50,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
greh = (struct gre_base_hdr *)skb_transport_header(skb);
- ghl = skb_inner_network_header(skb) - skb_transport_header(skb);
+ ghl = skb_inner_mac_header(skb) - skb_transport_header(skb);
if (unlikely(ghl < sizeof(*greh)))
goto out;
@@ -255,6 +255,9 @@ static int gre_gro_complete(struct sk_buff *skb, int nhoff)
int err = -ENOENT;
__be16 type;
+ skb->encapsulation = 1;
+ skb_shinfo(skb)->gso_type = SKB_GSO_GRE;
+
type = greh->protocol;
if (greh->flags & GRE_KEY)
grehlen += GRE_HEADER_SECTION;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 0134663fdbce..1e4aa8354f93 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -732,8 +732,6 @@ static void icmp_unreach(struct sk_buff *skb)
/* fall through */
case 0:
info = ntohs(icmph->un.frag.mtu);
- if (!info)
- goto out;
}
break;
case ICMP_SR_FAILED:
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 97e4d1655d26..0ffcd4d64e0a 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -369,7 +369,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
pip->saddr = fl4.saddr;
pip->protocol = IPPROTO_IGMP;
pip->tot_len = 0; /* filled in later */
- ip_select_ident(skb, &rt->dst, NULL);
+ ip_select_ident(skb, NULL);
((u8 *)&pip[1])[0] = IPOPT_RA;
((u8 *)&pip[1])[1] = 4;
((u8 *)&pip[1])[2] = 0;
@@ -714,7 +714,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
iph->daddr = dst;
iph->saddr = fl4.saddr;
iph->protocol = IPPROTO_IGMP;
- ip_select_ident(skb, &rt->dst, NULL);
+ ip_select_ident(skb, NULL);
((u8 *)&iph[1])[0] = IPOPT_RA;
((u8 *)&iph[1])[1] = 4;
((u8 *)&iph[1])[2] = 0;
@@ -1952,6 +1952,10 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
rtnl_lock();
in_dev = ip_mc_find_dev(net, imr);
+ if (!in_dev) {
+ ret = -ENODEV;
+ goto out;
+ }
ifindex = imr->imr_ifindex;
for (imlp = &inet->mc_list;
(iml = rtnl_dereference(*imlp)) != NULL;
@@ -1969,16 +1973,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
*imlp = iml->next_rcu;
- if (in_dev)
- ip_mc_dec_group(in_dev, group);
+ ip_mc_dec_group(in_dev, group);
rtnl_unlock();
/* decrease mem now to avoid the memleak warning */
atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
kfree_rcu(iml, rcu);
return 0;
}
- if (!in_dev)
- ret = -ENODEV;
+out:
rtnl_unlock();
return ret;
}
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 48f424465112..bf2cb4a4714b 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -26,20 +26,7 @@
* Theory of operations.
* We keep one entry for each peer IP address. The nodes contains long-living
* information about the peer which doesn't depend on routes.
- * At this moment this information consists only of ID field for the next
- * outgoing IP packet. This field is incremented with each packet as encoded
- * in inet_getid() function (include/net/inetpeer.h).
- * At the moment of writing this notes identifier of IP packets is generated
- * to be unpredictable using this code only for packets subjected
- * (actually or potentially) to defragmentation. I.e. DF packets less than
- * PMTU in size when local fragmentation is disabled use a constant ID and do
- * not use this code (see ip_select_ident() in include/net/ip.h).
*
- * Route cache entries hold references to our nodes.
- * New cache entries get references via lookup by destination IP address in
- * the avl tree. The reference is grabbed only when it's needed i.e. only
- * when we try to output IP packet which needs an unpredictable ID (see
- * __ip_select_ident() in net/ipv4/route.c).
* Nodes are removed only when reference counter goes to 0.
* When it's happened the node may be removed when a sufficient amount of
* time has been passed since its last use. The less-recently-used entry can
@@ -62,7 +49,6 @@
* refcnt: atomically against modifications on other CPU;
* usually under some other lock to prevent node disappearing
* daddr: unchangeable
- * ip_id_count: atomic value (no lock needed)
*/
static struct kmem_cache *peer_cachep __read_mostly;
@@ -497,10 +483,6 @@ relookup:
p->daddr = *daddr;
atomic_set(&p->refcnt, 1);
atomic_set(&p->rid, 0);
- atomic_set(&p->ip_id_count,
- (daddr->family == AF_INET) ?
- secure_ip_id(daddr->addr.a4) :
- secure_ipv6_id(daddr->addr.a6));
p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
p->rate_tokens = 0;
/* 60*HZ is arbitrary, but chosen enough high so that the first
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index f3869c186d97..1c6bd4359cbd 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -42,12 +42,12 @@
static bool ip_may_fragment(const struct sk_buff *skb)
{
return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
- !skb->local_df;
+ skb->local_df;
}
static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
{
- if (skb->len <= mtu || skb->local_df)
+ if (skb->len <= mtu)
return false;
if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ec4f762efda5..94213c891565 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -463,6 +463,7 @@ static const struct net_device_ops ipgre_netdev_ops = {
static void ipgre_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ipgre_netdev_ops;
+ dev->type = ARPHRD_IPGRE;
ip_tunnel_setup(dev, ipgre_net_id);
}
@@ -501,7 +502,6 @@ static int ipgre_tunnel_init(struct net_device *dev)
memcpy(dev->dev_addr, &iph->saddr, 4);
memcpy(dev->broadcast, &iph->daddr, 4);
- dev->type = ARPHRD_IPGRE;
dev->flags = IFF_NOARP;
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
dev->addr_len = 4;
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index f4ab72e19af9..96f90b89df32 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -288,6 +288,10 @@ int ip_options_compile(struct net *net,
optptr++;
continue;
}
+ if (unlikely(l < 2)) {
+ pp_ptr = optptr;
+ goto error;
+ }
optlen = optptr[1];
if (optlen < 2 || optlen > l) {
pp_ptr = optptr;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 73c6b63bba74..844323b6cfb9 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
- ip_select_ident(skb, &rt->dst, sk);
+ ip_select_ident(skb, sk);
if (opt && opt->opt.optlen) {
iph->ihl += opt->opt.optlen>>2;
@@ -386,8 +386,7 @@ packet_routed:
ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
}
- ip_select_ident_more(skb, &rt->dst, sk,
- (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+ ip_select_ident_segs(skb, sk, skb_shinfo(skb)->gso_segs ?: 1);
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
@@ -1338,7 +1337,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
ip_copy_addrs(iph, fl4);
- ip_select_ident(skb, &rt->dst, sk);
+ ip_select_ident(skb, sk);
if (opt) {
iph->ihl += opt->optlen>>2;
@@ -1488,6 +1487,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
struct sk_buff *nskb;
struct sock *sk;
struct inet_sock *inet;
+ int err;
if (ip_options_echo(&replyopts.opt.opt, skb))
return;
@@ -1526,8 +1526,13 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
sock_net_set(sk, net);
__skb_queue_head_init(&sk->sk_write_queue);
sk->sk_sndbuf = sysctl_wmem_default;
- ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
- &ipc, &rt, MSG_DONTWAIT);
+ err = ip_append_data(sk, &fl4, ip_reply_glue_bits, arg->iov->iov_base,
+ len, 0, &ipc, &rt, MSG_DONTWAIT);
+ if (unlikely(err)) {
+ ip_flush_pending_frames(sk);
+ goto out;
+ }
+
nskb = skb_peek(&sk->sk_write_queue);
if (nskb) {
if (arg->csumoffset >= 0)
@@ -1539,7 +1544,7 @@ void ip_send_unicast_reply(struct net *net, struct sk_buff *skb, __be32 daddr,
skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
ip_push_pending_frames(sk, &fl4);
}
-
+out:
put_cpu_var(unicast_sock);
ip_rt_put(rt);
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index a82a22d8f77f..0a4af0920af3 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -69,28 +69,25 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
}
static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
- struct dst_entry *dst)
+ struct dst_entry *dst, __be32 saddr)
{
struct dst_entry *old_dst;
- if (dst) {
- if (dst->flags & DST_NOCACHE)
- dst = NULL;
- else
- dst_clone(dst);
- }
+ dst_clone(dst);
old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
dst_release(old_dst);
+ idst->saddr = saddr;
}
-static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst)
+static void tunnel_dst_set(struct ip_tunnel *t,
+ struct dst_entry *dst, __be32 saddr)
{
- __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst);
+ __tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst, saddr);
}
static void tunnel_dst_reset(struct ip_tunnel *t)
{
- tunnel_dst_set(t, NULL);
+ tunnel_dst_set(t, NULL, 0);
}
void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
@@ -98,23 +95,29 @@ void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
int i;
for_each_possible_cpu(i)
- __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
+ __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
}
EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
-static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
+static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
+ u32 cookie, __be32 *saddr)
{
+ struct ip_tunnel_dst *idst;
struct dst_entry *dst;
rcu_read_lock();
- dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
+ idst = this_cpu_ptr(t->dst_cache);
+ dst = rcu_dereference(idst->dst);
+ if (dst && !atomic_inc_not_zero(&dst->__refcnt))
+ dst = NULL;
if (dst) {
- if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
- rcu_read_unlock();
+ if (!dst->obsolete || dst->ops->check(dst, cookie)) {
+ *saddr = idst->saddr;
+ } else {
tunnel_dst_reset(t);
- return NULL;
+ dst_release(dst);
+ dst = NULL;
}
- dst_hold(dst);
}
rcu_read_unlock();
return (struct rtable *)dst;
@@ -173,6 +176,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
hlist_for_each_entry_rcu(t, head, hash_node) {
if (remote != t->parms.iph.daddr ||
+ t->parms.iph.saddr != 0 ||
!(t->dev->flags & IFF_UP))
continue;
@@ -189,10 +193,11 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
head = &itn->tunnels[hash];
hlist_for_each_entry_rcu(t, head, hash_node) {
- if ((local != t->parms.iph.saddr &&
- (local != t->parms.iph.daddr ||
- !ipv4_is_multicast(local))) ||
- !(t->dev->flags & IFF_UP))
+ if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
+ (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
+ continue;
+
+ if (!(t->dev->flags & IFF_UP))
continue;
if (!ip_tunnel_key_match(&t->parms, flags, key))
@@ -209,6 +214,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
hlist_for_each_entry_rcu(t, head, hash_node) {
if (t->parms.i_key != key ||
+ t->parms.iph.saddr != 0 ||
+ t->parms.iph.daddr != 0 ||
!(t->dev->flags & IFF_UP))
continue;
@@ -362,7 +369,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
if (!IS_ERR(rt)) {
tdev = rt->dst.dev;
- tunnel_dst_set(tunnel, &rt->dst);
+ tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
ip_rt_put(rt);
}
if (dev->type != ARPHRD_ETHER)
@@ -438,6 +445,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
tunnel->i_seqno = ntohl(tpi->seq) + 1;
}
+ skb_reset_network_header(skb);
+
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
if (log_ecn_error)
@@ -534,9 +543,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
unsigned int max_headroom; /* The extra header space needed */
__be32 dst;
int err;
- bool connected = true;
+ bool connected;
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
+ connected = (tunnel->parms.iph.daddr != 0);
dst = tnl_params->daddr;
if (dst == 0) {
@@ -603,7 +613,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
- rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
+ rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
if (!rt) {
rt = ip_route_output_key(tunnel->net, &fl4);
@@ -613,7 +623,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
if (connected)
- tunnel_dst_set(tunnel, &rt->dst);
+ tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
}
if (rt->dst.dev == dev) {
@@ -872,6 +882,7 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
*/
if (!IS_ERR(itn->fb_tunnel_dev)) {
itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
}
rtnl_unlock();
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 8d69626f2206..791a419f0699 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -74,7 +74,7 @@ int iptunnel_xmit(struct rtable *rt, struct sk_buff *skb,
iph->daddr = dst;
iph->saddr = src;
iph->ttl = ttl;
- __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
+ __ip_select_ident(iph, skb_shinfo(skb)->gso_segs ?: 1);
err = ip_local_out(skb);
if (unlikely(net_xmit_eval(err)))
@@ -91,11 +91,12 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
skb_pull_rcsum(skb, hdr_len);
if (inner_proto == htons(ETH_P_TEB)) {
- struct ethhdr *eh = (struct ethhdr *)skb->data;
+ struct ethhdr *eh;
if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
return -ENOMEM;
+ eh = (struct ethhdr *)skb->data;
if (likely(ntohs(eh->h_proto) >= ETH_P_802_3_MIN))
skb->protocol = eh->h_proto;
else
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 48eafae51769..e4a8f76c8995 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -207,6 +207,7 @@ static const struct net_device_ops vti_netdev_ops = {
static void vti_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &vti_netdev_ops;
+ dev->type = ARPHRD_TUNNEL;
ip_tunnel_setup(dev, vti_net_id);
}
@@ -218,7 +219,6 @@ static int vti_tunnel_init(struct net_device *dev)
memcpy(dev->dev_addr, &iph->saddr, 4);
memcpy(dev->broadcast, &iph->daddr, 4);
- dev->type = ARPHRD_TUNNEL;
dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
dev->mtu = ETH_DATA_LEN;
dev->flags = IFF_NOARP;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 812b18351462..62eaa005e146 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -149,13 +149,13 @@ static int ipip_err(struct sk_buff *skb, u32 info)
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->dev->ifindex, 0, IPPROTO_IPIP, 0);
+ t->parms.link, 0, IPPROTO_IPIP, 0);
err = 0;
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
+ ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
IPPROTO_IPIP, 0);
err = 0;
goto out;
@@ -486,4 +486,5 @@ static void __exit ipip_fini(void)
module_init(ipip_init);
module_exit(ipip_fini);
MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("ipip");
MODULE_ALIAS_NETDEV("tunl0");
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 28863570dd60..1149fc2290e2 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1663,7 +1663,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
iph->protocol = IPPROTO_IPIP;
iph->ihl = 5;
iph->tot_len = htons(skb->len);
- ip_select_ident(skb, skb_dst(skb), NULL);
+ ip_select_ident(skb, NULL);
ip_send_check(iph);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 59da7cde0724..f95b6f93814b 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1044,8 +1044,10 @@ static int __do_replace(struct net *net, const char *name,
xt_free_table_info(oldinfo);
if (copy_to_user(counters_ptr, counters,
- sizeof(struct xt_counters) * num_counters) != 0)
- ret = -EFAULT;
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n");
+ }
vfree(counters);
xt_table_unlock(t);
return ret;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 718dfbd30cbe..99e810f84671 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1231,8 +1231,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
xt_free_table_info(oldinfo);
if (copy_to_user(counters_ptr, counters,
- sizeof(struct xt_counters) * num_counters) != 0)
- ret = -EFAULT;
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
+ }
vfree(counters);
xt_table_unlock(t);
return ret;
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 12e13bd82b5b..f40f321b41fc 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -22,7 +22,6 @@
#endif
#include <net/netfilter/nf_conntrack_zones.h>
-/* Returns new sk_buff, or NULL */
static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
{
int err;
@@ -33,8 +32,10 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
err = ip_defrag(skb, user);
local_bh_enable();
- if (!err)
+ if (!err) {
ip_send_check(ip_hdr(skb));
+ skb->local_df = 1;
+ }
return err;
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 2d11c094296e..e21934b06d4c 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -252,26 +252,33 @@ int ping_init_sock(struct sock *sk)
{
struct net *net = sock_net(sk);
kgid_t group = current_egid();
- struct group_info *group_info = get_current_groups();
- int i, j, count = group_info->ngroups;
+ struct group_info *group_info;
+ int i, j, count;
kgid_t low, high;
+ int ret = 0;
inet_get_ping_group_range_net(net, &low, &high);
if (gid_lte(low, group) && gid_lte(group, high))
return 0;
+ group_info = get_current_groups();
+ count = group_info->ngroups;
for (i = 0; i < group_info->nblocks; i++) {
int cp_count = min_t(int, NGROUPS_PER_BLOCK, count);
for (j = 0; j < cp_count; j++) {
kgid_t gid = group_info->blocks[i][j];
if (gid_lte(low, gid) && gid_lte(gid, high))
- return 0;
+ goto out_release_group;
}
count -= cp_count;
}
- return -EACCES;
+ ret = -EACCES;
+
+out_release_group:
+ put_group_info(group_info);
+ return ret;
}
EXPORT_SYMBOL_GPL(ping_init_sock);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index c04518f4850a..11c8d81fdc59 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -389,7 +389,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
iph->check = 0;
iph->tot_len = htons(length);
if (!iph->id)
- ip_select_ident(skb, &rt->dst, NULL);
+ ip_select_ident(skb, NULL);
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4c011ec69ed4..487bb6252520 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -89,6 +89,7 @@
#include <linux/rcupdate.h>
#include <linux/times.h>
#include <linux/slab.h>
+#include <linux/jhash.h>
#include <net/dst.h>
#include <net/net_namespace.h>
#include <net/protocol.h>
@@ -462,39 +463,45 @@ static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
return neigh_create(&arp_tbl, pkey, dev);
}
-/*
- * Peer allocation may fail only in serious out-of-memory conditions. However
- * we still can generate some output.
- * Random ID selection looks a bit dangerous because we have no chances to
- * select ID being unique in a reasonable period of time.
- * But broken packet identifier may be better than no packet at all.
+#define IP_IDENTS_SZ 2048u
+struct ip_ident_bucket {
+ atomic_t id;
+ u32 stamp32;
+};
+
+static struct ip_ident_bucket *ip_idents __read_mostly;
+
+/* In order to protect privacy, we add a perturbation to identifiers
+ * if one generator is seldom used. This makes hard for an attacker
+ * to infer how many packets were sent between two points in time.
*/
-static void ip_select_fb_ident(struct iphdr *iph)
+u32 ip_idents_reserve(u32 hash, int segs)
{
- static DEFINE_SPINLOCK(ip_fb_id_lock);
- static u32 ip_fallback_id;
- u32 salt;
+ struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ;
+ u32 old = ACCESS_ONCE(bucket->stamp32);
+ u32 now = (u32)jiffies;
+ u32 delta = 0;
- spin_lock_bh(&ip_fb_id_lock);
- salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
- iph->id = htons(salt & 0xFFFF);
- ip_fallback_id = salt;
- spin_unlock_bh(&ip_fb_id_lock);
+ if (old != now && cmpxchg(&bucket->stamp32, old, now) == old)
+ delta = prandom_u32_max(now - old);
+
+ return atomic_add_return(segs + delta, &bucket->id) - segs;
}
+EXPORT_SYMBOL(ip_idents_reserve);
-void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
+void __ip_select_ident(struct iphdr *iph, int segs)
{
- struct net *net = dev_net(dst->dev);
- struct inet_peer *peer;
+ static u32 ip_idents_hashrnd __read_mostly;
+ u32 hash, id;
- peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
- if (peer) {
- iph->id = htons(inet_getid(peer, more));
- inet_putpeer(peer);
- return;
- }
+ net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
- ip_select_fb_ident(iph);
+ hash = jhash_3words((__force u32)iph->daddr,
+ (__force u32)iph->saddr,
+ iph->protocol,
+ ip_idents_hashrnd);
+ id = ip_idents_reserve(hash, segs);
+ iph->id = htons(id);
}
EXPORT_SYMBOL(__ip_select_ident);
@@ -1029,7 +1036,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
const struct iphdr *iph = (const struct iphdr *) skb->data;
struct flowi4 fl4;
struct rtable *rt;
- struct dst_entry *dst;
+ struct dst_entry *odst = NULL;
bool new = false;
bh_lock_sock(sk);
@@ -1037,16 +1044,17 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
if (!ip_sk_accept_pmtu(sk))
goto out;
- rt = (struct rtable *) __sk_dst_get(sk);
+ odst = sk_dst_get(sk);
- if (sock_owned_by_user(sk) || !rt) {
+ if (sock_owned_by_user(sk) || !odst) {
__ipv4_sk_update_pmtu(skb, sk, mtu);
goto out;
}
__build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
- if (!__sk_dst_check(sk, 0)) {
+ rt = (struct rtable *)odst;
+ if (odst->obsolete && odst->ops->check(odst, 0) == NULL) {
rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
if (IS_ERR(rt))
goto out;
@@ -1056,8 +1064,7 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
__ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
- dst = dst_check(&rt->dst, 0);
- if (!dst) {
+ if (!dst_check(&rt->dst, 0)) {
if (new)
dst_release(&rt->dst);
@@ -1069,10 +1076,11 @@ void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
}
if (new)
- __sk_dst_set(sk, &rt->dst);
+ sk_dst_set(sk, &rt->dst);
out:
bh_unlock_sock(sk);
+ dst_release(odst);
}
EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
@@ -1526,7 +1534,7 @@ static int __mkroute_input(struct sk_buff *skb,
struct in_device *out_dev;
unsigned int flags = 0;
bool do_cache;
- u32 itag;
+ u32 itag = 0;
/* get a working reference to the output device */
out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
@@ -2260,9 +2268,9 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
return rt;
if (flp4->flowi4_proto)
- rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
- flowi4_to_flowi(flp4),
- sk, 0);
+ rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
+ flowi4_to_flowi(flp4),
+ sk, 0);
return rt;
}
@@ -2364,7 +2372,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
}
} else
#endif
- if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
+ if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
goto nla_put_failure;
}
@@ -2717,6 +2725,12 @@ int __init ip_rt_init(void)
{
int rc = 0;
+ ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
+ if (!ip_idents)
+ panic("IP: failed to allocate ip_idents\n");
+
+ prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
+
#ifdef CONFIG_IP_ROUTE_CLASSID
ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
if (!ip_rt_acct)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 97c8f5620c43..29d240b87af1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1108,7 +1108,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (unlikely(tp->repair)) {
if (tp->repair_queue == TCP_RECV_QUEUE) {
copied = tcp_send_rcvq(sk, msg, size);
- goto out;
+ goto out_nopush;
}
err = -EINVAL;
@@ -1175,13 +1175,6 @@ new_segment:
goto wait_for_memory;
/*
- * All packets are restored as if they have
- * already been sent.
- */
- if (tp->repair)
- TCP_SKB_CB(skb)->when = tcp_time_stamp;
-
- /*
* Check whether we can use HW checksum.
*/
if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
@@ -1190,6 +1183,13 @@ new_segment:
skb_entail(sk, skb);
copy = size_goal;
max = size_goal;
+
+ /* All packets are restored as if they have
+ * already been sent. skb_mstamp isn't set to
+ * avoid wrong rtt estimation.
+ */
+ if (tp->repair)
+ TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
}
/* Try to append data to the end of skb. */
@@ -1282,6 +1282,7 @@ wait_for_memory:
out:
if (copied)
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
+out_nopush:
release_sock(sk);
return copied + copied_syn;
@@ -2953,61 +2954,42 @@ EXPORT_SYMBOL(compat_tcp_getsockopt);
#endif
#ifdef CONFIG_TCP_MD5SIG
-static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool __read_mostly;
+static DEFINE_PER_CPU(struct tcp_md5sig_pool, tcp_md5sig_pool);
static DEFINE_MUTEX(tcp_md5sig_mutex);
-
-static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
-{
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu);
-
- if (p->md5_desc.tfm)
- crypto_free_hash(p->md5_desc.tfm);
- }
- free_percpu(pool);
-}
+static bool tcp_md5sig_pool_populated = false;
static void __tcp_alloc_md5sig_pool(void)
{
int cpu;
- struct tcp_md5sig_pool __percpu *pool;
-
- pool = alloc_percpu(struct tcp_md5sig_pool);
- if (!pool)
- return;
for_each_possible_cpu(cpu) {
- struct crypto_hash *hash;
-
- hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR_OR_NULL(hash))
- goto out_free;
+ if (!per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm) {
+ struct crypto_hash *hash;
- per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
+ hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR_OR_NULL(hash))
+ return;
+ per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash;
+ }
}
- /* before setting tcp_md5sig_pool, we must commit all writes
- * to memory. See ACCESS_ONCE() in tcp_get_md5sig_pool()
+ /* before setting tcp_md5sig_pool_populated, we must commit all writes
+ * to memory. See smp_rmb() in tcp_get_md5sig_pool()
*/
smp_wmb();
- tcp_md5sig_pool = pool;
- return;
-out_free:
- __tcp_free_md5sig_pool(pool);
+ tcp_md5sig_pool_populated = true;
}
bool tcp_alloc_md5sig_pool(void)
{
- if (unlikely(!tcp_md5sig_pool)) {
+ if (unlikely(!tcp_md5sig_pool_populated)) {
mutex_lock(&tcp_md5sig_mutex);
- if (!tcp_md5sig_pool)
+ if (!tcp_md5sig_pool_populated)
__tcp_alloc_md5sig_pool();
mutex_unlock(&tcp_md5sig_mutex);
}
- return tcp_md5sig_pool != NULL;
+ return tcp_md5sig_pool_populated;
}
EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
@@ -3021,13 +3003,13 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
*/
struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
{
- struct tcp_md5sig_pool __percpu *p;
-
local_bh_disable();
- p = ACCESS_ONCE(tcp_md5sig_pool);
- if (p)
- return __this_cpu_ptr(p);
+ if (tcp_md5sig_pool_populated) {
+ /* coupled with smp_wmb() in __tcp_alloc_md5sig_pool() */
+ smp_rmb();
+ return this_cpu_ptr(&tcp_md5sig_pool);
+ }
local_bh_enable();
return NULL;
}
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 828e4c3ffbaf..121a9a22dc98 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -409,7 +409,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT;
ratio += cnt;
- ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT);
+ ca->delayed_ack = clamp(ratio, 1U, ACK_RATIO_LIMIT);
}
/* Some calls are for duplicates without timetamps */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eeaac399420d..22917918fa80 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1113,7 +1113,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb,
}
/* D-SACK for already forgotten data... Do dumb counting. */
- if (dup_sack && tp->undo_marker && tp->undo_retrans &&
+ if (dup_sack && tp->undo_marker && tp->undo_retrans > 0 &&
!after(end_seq_0, prior_snd_una) &&
after(end_seq_0, tp->undo_marker))
tp->undo_retrans--;
@@ -1169,7 +1169,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb,
unsigned int new_len = (pkt_len / mss) * mss;
if (!in_sack && new_len < pkt_len) {
new_len += mss;
- if (new_len > skb->len)
+ if (new_len >= skb->len)
return 0;
}
pkt_len = new_len;
@@ -1193,7 +1193,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
/* Account D-SACK for retransmitted packet. */
if (dup_sack && (sacked & TCPCB_RETRANS)) {
- if (tp->undo_marker && tp->undo_retrans &&
+ if (tp->undo_marker && tp->undo_retrans > 0 &&
after(end_seq, tp->undo_marker))
tp->undo_retrans--;
if (sacked & TCPCB_SACKED_ACKED)
@@ -1894,7 +1894,7 @@ static void tcp_clear_retrans_partial(struct tcp_sock *tp)
tp->lost_out = 0;
tp->undo_marker = 0;
- tp->undo_retrans = 0;
+ tp->undo_retrans = -1;
}
void tcp_clear_retrans(struct tcp_sock *tp)
@@ -2663,7 +2663,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
tp->prior_ssthresh = 0;
tp->undo_marker = tp->snd_una;
- tp->undo_retrans = tp->retrans_out;
+ tp->undo_retrans = tp->retrans_out ? : -1;
if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
if (!ece_ack)
@@ -2678,18 +2678,16 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
*/
static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
bool recovered = !before(tp->snd_una, tp->high_seq);
if (tp->frto) { /* F-RTO RFC5682 sec 3.1 (sack enhanced version). */
- if (flag & FLAG_ORIG_SACK_ACKED) {
- /* Step 3.b. A timeout is spurious if not all data are
- * lost, i.e., never-retransmitted data are (s)acked.
- */
- tcp_try_undo_loss(sk, true);
+ /* Step 3.b. A timeout is spurious if not all data are
+ * lost, i.e., never-retransmitted data are (s)acked.
+ */
+ if (tcp_try_undo_loss(sk, flag & FLAG_ORIG_SACK_ACKED))
return;
- }
+
if (after(tp->snd_nxt, tp->high_seq) &&
(flag & FLAG_DATA_SACKED || is_dupack)) {
tp->frto = 0; /* Loss was real: 2nd part of step 3.a */
@@ -2705,12 +2703,9 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
if (recovered) {
/* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
- icsk->icsk_retransmits = 0;
tcp_try_undo_recovery(sk);
return;
}
- if (flag & FLAG_DATA_ACKED)
- icsk->icsk_retransmits = 0;
if (tcp_is_reno(tp)) {
/* A Reno DUPACK means new data in F-RTO step 2.b above are
* delivered. Lower inflight to clock out (re)tranmissions.
@@ -3399,8 +3394,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);
- if (after(ack, prior_snd_una))
+ if (after(ack, prior_snd_una)) {
flag |= FLAG_SND_UNA_ADVANCED;
+ icsk->icsk_retransmits = 0;
+ }
prior_fackets = tp->fackets_out;
prior_in_flight = tcp_packets_in_flight(tp);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1e4eac779f51..a782d5be132e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -269,7 +269,7 @@ EXPORT_SYMBOL(tcp_v4_connect);
* It can be called through tcp_release_cb() if socket was owned by user
* at the time tcp_v4_err() was called to handle ICMP message.
*/
-static void tcp_v4_mtu_reduced(struct sock *sk)
+void tcp_v4_mtu_reduced(struct sock *sk)
{
struct dst_entry *dst;
struct inet_sock *inet = inet_sk(sk);
@@ -300,6 +300,7 @@ static void tcp_v4_mtu_reduced(struct sock *sk)
tcp_simple_retransmit(sk);
} /* else let the usual retransmit timer handle it */
}
+EXPORT_SYMBOL(tcp_v4_mtu_reduced);
static void do_redirect(struct sk_buff *skb, struct sock *sk)
{
@@ -2117,6 +2118,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
.compat_setsockopt = compat_ip_setsockopt,
.compat_getsockopt = compat_ip_getsockopt,
#endif
+ .mtu_reduced = tcp_v4_mtu_reduced,
};
EXPORT_SYMBOL(ipv4_specific);
@@ -2736,7 +2738,6 @@ struct proto tcp_prot = {
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v4_do_rcv,
.release_cb = tcp_release_cb,
- .mtu_reduced = tcp_v4_mtu_reduced,
.hash = inet_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index b92b81718ca4..c25953a386d0 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -310,7 +310,7 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
th->check = ~tcp_v4_check(skb->len - thoff, iph->saddr,
iph->daddr, 0);
- skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+ skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
return tcp_gro_complete(skb);
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 17a11e65e57f..91b98e5a17aa 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -787,7 +787,7 @@ void tcp_release_cb(struct sock *sk)
__sock_put(sk);
}
if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
- sk->sk_prot->mtu_reduced(sk);
+ inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
__sock_put(sk);
}
}
@@ -1876,8 +1876,11 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
BUG_ON(!tso_segs);
- if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE)
+ if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
+ /* "when" is used as a start point for the retransmit timer */
+ TCP_SKB_CB(skb)->when = tcp_time_stamp;
goto repair; /* Skip network transmission */
+ }
cwnd_quota = tcp_cwnd_test(tp, skb);
if (!cwnd_quota) {
@@ -2066,9 +2069,7 @@ void tcp_send_loss_probe(struct sock *sk)
if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
goto rearm_timer;
- /* Probe with zero data doesn't trigger fast recovery. */
- if (skb->len > 0)
- err = __tcp_retransmit_skb(sk, skb);
+ err = __tcp_retransmit_skb(sk, skb);
/* Record snd_nxt for loss detection. */
if (likely(!err))
@@ -2448,8 +2449,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if (!tp->retrans_stamp)
tp->retrans_stamp = TCP_SKB_CB(skb)->when;
- tp->undo_retrans += tcp_skb_pcount(skb);
-
/* snd_nxt is stored to detect loss of retransmitted segment,
* see tcp_input.c tcp_sacktag_write_queue().
*/
@@ -2457,6 +2456,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
} else {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
}
+
+ if (tp->undo_retrans < 0)
+ tp->undo_retrans = 0;
+ tp->undo_retrans += tcp_skb_pcount(skb);
return err;
}
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 06cae62bf208..6b1a5fd60598 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -219,7 +219,8 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked,
* This is:
* (actual rate in segments) * baseRTT
*/
- target_cwnd = tp->snd_cwnd * vegas->baseRTT / rtt;
+ target_cwnd = (u64)tp->snd_cwnd * vegas->baseRTT;
+ do_div(target_cwnd, rtt);
/* Calculate the difference between the window we had,
* and the window we would like to have. This quantity
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 326475a94865..603ad498e18a 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -145,7 +145,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked,
rtt = veno->minrtt;
- target_cwnd = (tp->snd_cwnd * veno->basertt);
+ target_cwnd = (u64)tp->snd_cwnd * veno->basertt;
target_cwnd <<= V_PARAM_SHIFT;
do_div(target_cwnd, rtt);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 77bd16fa9f34..b25e852625d8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1833,6 +1833,10 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net,
unsigned int count, slot = udp_hashfn(net, hnum, udp_table.mask);
struct udp_hslot *hslot = &udp_table.hash[slot];
+ /* Do not bother scanning a too big list */
+ if (hslot->count > 10)
+ return NULL;
+
rcu_read_lock();
begin:
count = 0;
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index 31b18152528f..1f564a1487a3 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -117,12 +117,12 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
- ip_select_ident(skb, dst->child, NULL);
top_iph->ttl = ip4_dst_hoplimit(dst->child);
top_iph->saddr = x->props.saddr.a4;
top_iph->daddr = x->id.daddr.a4;
+ ip_select_ident(skb, NULL);
return 0;
}