aboutsummaryrefslogtreecommitdiffstats
path: root/net/mptcp
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp')
-rw-r--r--net/mptcp/diag.c11
-rw-r--r--net/mptcp/fastopen.c6
-rw-r--r--net/mptcp/mib.c1
-rw-r--r--net/mptcp/mib.h8
-rw-r--r--net/mptcp/mptcp_pm_gen.c2
-rw-r--r--net/mptcp/mptcp_pm_gen.h2
-rw-r--r--net/mptcp/options.c17
-rw-r--r--net/mptcp/pm_netlink.c76
-rw-r--r--net/mptcp/pm_userspace.c46
-rw-r--r--net/mptcp/protocol.c252
-rw-r--r--net/mptcp/protocol.h62
-rw-r--r--net/mptcp/sockopt.c29
-rw-r--r--net/mptcp/subflow.c105
13 files changed, 393 insertions, 224 deletions
diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c
index a536586742f2..7017dd60659d 100644
--- a/net/mptcp/diag.c
+++ b/net/mptcp/diag.c
@@ -13,17 +13,22 @@
#include <uapi/linux/mptcp.h>
#include "protocol.h"
-static int subflow_get_info(const struct sock *sk, struct sk_buff *skb)
+static int subflow_get_info(struct sock *sk, struct sk_buff *skb)
{
struct mptcp_subflow_context *sf;
struct nlattr *start;
u32 flags = 0;
+ bool slow;
int err;
+ if (inet_sk_state_load(sk) == TCP_LISTEN)
+ return 0;
+
start = nla_nest_start_noflag(skb, INET_ULP_INFO_MPTCP);
if (!start)
return -EMSGSIZE;
+ slow = lock_sock_fast(sk);
rcu_read_lock();
sf = rcu_dereference(inet_csk(sk)->icsk_ulp_data);
if (!sf) {
@@ -63,17 +68,19 @@ static int subflow_get_info(const struct sock *sk, struct sk_buff *skb)
sf->map_data_len) ||
nla_put_u32(skb, MPTCP_SUBFLOW_ATTR_FLAGS, flags) ||
nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_REM, sf->remote_id) ||
- nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, sf->local_id)) {
+ nla_put_u8(skb, MPTCP_SUBFLOW_ATTR_ID_LOC, subflow_get_local_id(sf))) {
err = -EMSGSIZE;
goto nla_failure;
}
rcu_read_unlock();
+ unlock_sock_fast(sk, slow);
nla_nest_end(skb, start);
return 0;
nla_failure:
rcu_read_unlock();
+ unlock_sock_fast(sk, slow);
nla_nest_cancel(skb, start);
return err;
}
diff --git a/net/mptcp/fastopen.c b/net/mptcp/fastopen.c
index 74698582a285..ad28da655f8b 100644
--- a/net/mptcp/fastopen.c
+++ b/net/mptcp/fastopen.c
@@ -59,13 +59,12 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
mptcp_data_unlock(sk);
}
-void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt)
+void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
{
struct sock *sk = (struct sock *)msk;
struct sk_buff *skb;
- mptcp_data_lock(sk);
skb = skb_peek_tail(&sk->sk_receive_queue);
if (skb) {
WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);
@@ -77,5 +76,4 @@ void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_
}
pr_debug("msk=%p ack_seq=%llx", msk, msk->ack_seq);
- mptcp_data_unlock(sk);
}
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index a0990c365a2e..c30405e76833 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -66,6 +66,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED),
SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE),
SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
+ SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB),
SNMP_MIB_SENTINEL
};
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index cae71d947252..dd7fd1f246b5 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -65,6 +65,7 @@ enum linux_mptcp_mib_field {
* conflict with another subflow while updating msk rcv wnd
*/
MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */
+ MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */
__MPTCP_MIB_MAX
};
@@ -95,4 +96,11 @@ static inline void __MPTCP_INC_STATS(struct net *net,
__SNMP_INC_STATS(net->mib.mptcp_statistics, field);
}
+static inline void MPTCP_DEC_STATS(struct net *net,
+ enum linux_mptcp_mib_field field)
+{
+ if (likely(net->mib.mptcp_statistics))
+ SNMP_DEC_STATS(net->mib.mptcp_statistics, field);
+}
+
bool mptcp_mib_alloc(struct net *net);
diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c
index a2325e70ddab..670da7822e6c 100644
--- a/net/mptcp/mptcp_pm_gen.c
+++ b/net/mptcp/mptcp_pm_gen.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
/* Do not edit directly, auto-generated from: */
-/* Documentation/netlink/specs/mptcp.yaml */
+/* Documentation/netlink/specs/mptcp_pm.yaml */
/* YNL-GEN kernel source */
#include <net/netlink.h>
diff --git a/net/mptcp/mptcp_pm_gen.h b/net/mptcp/mptcp_pm_gen.h
index 10579d184587..ac9fc7225b6a 100644
--- a/net/mptcp/mptcp_pm_gen.h
+++ b/net/mptcp/mptcp_pm_gen.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
/* Do not edit directly, auto-generated from: */
-/* Documentation/netlink/specs/mptcp.yaml */
+/* Documentation/netlink/specs/mptcp_pm.yaml */
/* YNL-GEN kernel header */
#ifndef _LINUX_MPTCP_PM_GEN_H
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index c53914012d01..63fc0758c22d 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -123,8 +123,8 @@ static void mptcp_parse_option(const struct sk_buff *skb,
break;
case MPTCPOPT_MP_JOIN:
- mp_opt->suboptions |= OPTIONS_MPTCP_MPJ;
if (opsize == TCPOLEN_MPTCP_MPJ_SYN) {
+ mp_opt->suboptions |= OPTION_MPTCP_MPJ_SYN;
mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
mp_opt->join_id = *ptr++;
mp_opt->token = get_unaligned_be32(ptr);
@@ -135,6 +135,7 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->backup, mp_opt->join_id,
mp_opt->token, mp_opt->nonce);
} else if (opsize == TCPOLEN_MPTCP_MPJ_SYNACK) {
+ mp_opt->suboptions |= OPTION_MPTCP_MPJ_SYNACK;
mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP;
mp_opt->join_id = *ptr++;
mp_opt->thmac = get_unaligned_be64(ptr);
@@ -145,11 +146,10 @@ static void mptcp_parse_option(const struct sk_buff *skb,
mp_opt->backup, mp_opt->join_id,
mp_opt->thmac, mp_opt->nonce);
} else if (opsize == TCPOLEN_MPTCP_MPJ_ACK) {
+ mp_opt->suboptions |= OPTION_MPTCP_MPJ_ACK;
ptr += 2;
memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN);
pr_debug("MP_JOIN hmac");
- } else {
- mp_opt->suboptions &= ~OPTIONS_MPTCP_MPJ;
}
break;
@@ -962,9 +962,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
/* subflows are fully established as soon as we get any
* additional ack, including ADD_ADDR.
*/
- subflow->fully_established = 1;
- WRITE_ONCE(msk->fully_established, true);
- goto check_notify;
+ goto set_fully_established;
}
/* If the first established packet does not contain MP_CAPABLE + data
@@ -983,10 +981,13 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
if (mp_opt->deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
-set_fully_established:
if (unlikely(!READ_ONCE(msk->pm.server_side)))
pr_warn_once("bogus mpc option on established client sk");
- mptcp_subflow_fully_established(subflow, mp_opt);
+
+set_fully_established:
+ mptcp_data_lock((struct sock *)msk);
+ __mptcp_subflow_fully_established(msk, subflow, mp_opt);
+ mptcp_data_unlock((struct sock *)msk);
check_notify:
/* if the subflow is not already linked into the conn_list, we can't
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index bf4d96f6f99a..58d17d9604e7 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -396,19 +396,6 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
}
}
-static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned int nr,
- const struct mptcp_addr_info *addr)
-{
- int i;
-
- for (i = 0; i < nr; i++) {
- if (addrs[i].id == addr->id)
- return true;
- }
-
- return false;
-}
-
/* Fill all the remote addresses into the array addrs[],
* and return the array size.
*/
@@ -440,18 +427,34 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk,
msk->pm.subflows++;
addrs[i++] = remote;
} else {
+ DECLARE_BITMAP(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+
+ /* Forbid creation of new subflows matching existing
+ * ones, possibly already created by incoming ADD_ADDR
+ */
+ bitmap_zero(unavail_id, MPTCP_PM_MAX_ADDR_ID + 1);
+ mptcp_for_each_subflow(msk, subflow)
+ if (READ_ONCE(subflow->local_id) == local->id)
+ __set_bit(subflow->remote_id, unavail_id);
+
mptcp_for_each_subflow(msk, subflow) {
ssk = mptcp_subflow_tcp_sock(subflow);
remote_address((struct sock_common *)ssk, &addrs[i]);
- addrs[i].id = subflow->remote_id;
+ addrs[i].id = READ_ONCE(subflow->remote_id);
if (deny_id0 && !addrs[i].id)
continue;
+ if (test_bit(addrs[i].id, unavail_id))
+ continue;
+
if (!mptcp_pm_addr_families_match(sk, local, &addrs[i]))
continue;
- if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
- msk->pm.subflows < subflows_max) {
+ if (msk->pm.subflows < subflows_max) {
+ /* forbid creating multiple address towards
+ * this id
+ */
+ __set_bit(addrs[i].id, unavail_id);
msk->pm.subflows++;
i++;
}
@@ -799,18 +802,18 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
mptcp_for_each_subflow_safe(msk, subflow, tmp) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ u8 remote_id = READ_ONCE(subflow->remote_id);
int how = RCV_SHUTDOWN | SEND_SHUTDOWN;
- u8 id = subflow->local_id;
+ u8 id = subflow_get_local_id(subflow);
- if (rm_type == MPTCP_MIB_RMADDR && subflow->remote_id != rm_id)
+ if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id)
continue;
if (rm_type == MPTCP_MIB_RMSUBFLOW && !mptcp_local_id_match(msk, id, rm_id))
continue;
pr_debug(" -> %s rm_list_ids[%d]=%u local_id=%u remote_id=%u mpc_id=%u",
rm_type == MPTCP_MIB_RMADDR ? "address" : "subflow",
- i, rm_id, subflow->local_id, subflow->remote_id,
- msk->mpc_endpoint_id);
+ i, rm_id, id, remote_id, msk->mpc_endpoint_id);
spin_unlock_bh(&msk->pm.lock);
mptcp_subflow_shutdown(sk, ssk, how);
@@ -901,7 +904,8 @@ static void __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
}
static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
- struct mptcp_pm_addr_entry *entry)
+ struct mptcp_pm_addr_entry *entry,
+ bool needs_id)
{
struct mptcp_pm_addr_entry *cur, *del_entry = NULL;
unsigned int addr_max;
@@ -949,7 +953,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
}
}
- if (!entry->addr.id) {
+ if (!entry->addr.id && needs_id) {
find_next:
entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
MPTCP_PM_MAX_ADDR_ID + 1,
@@ -960,7 +964,7 @@ find_next:
}
}
- if (!entry->addr.id)
+ if (!entry->addr.id && needs_id)
goto out;
__set_bit(entry->addr.id, pernet->id_bitmap);
@@ -1048,6 +1052,11 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
if (err)
return err;
+ /* We don't use mptcp_set_state() here because it needs to be called
+ * under the msk socket lock. For the moment, that will not bring
+ * anything more than only calling inet_sk_state_store(), because the
+ * old status is known (TCP_CLOSE).
+ */
inet_sk_state_store(newsk, TCP_LISTEN);
lock_sock(ssk);
err = __inet_listen_sk(ssk, backlog);
@@ -1087,7 +1096,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc
entry->ifindex = 0;
entry->flags = MPTCP_PM_ADDR_FLAG_IMPLICIT;
entry->lsk = NULL;
- ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
+ ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true);
if (ret < 0)
kfree(entry);
@@ -1100,7 +1109,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc
static const struct genl_multicast_group mptcp_pm_mcgrps[] = {
[MPTCP_PM_CMD_GRP_OFFSET] = { .name = MPTCP_PM_CMD_GRP_NAME, },
[MPTCP_PM_EV_GRP_OFFSET] = { .name = MPTCP_PM_EV_GRP_NAME,
- .flags = GENL_UNS_ADMIN_PERM,
+ .flags = GENL_MCAST_CAP_NET_ADMIN,
},
};
@@ -1280,6 +1289,18 @@ next:
return 0;
}
+static bool mptcp_pm_has_addr_attr_id(const struct nlattr *attr,
+ struct genl_info *info)
+{
+ struct nlattr *tb[MPTCP_PM_ADDR_ATTR_MAX + 1];
+
+ if (!nla_parse_nested_deprecated(tb, MPTCP_PM_ADDR_ATTR_MAX, attr,
+ mptcp_pm_address_nl_policy, info->extack) &&
+ tb[MPTCP_PM_ADDR_ATTR_ID])
+ return true;
+ return false;
+}
+
int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr *attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
@@ -1321,7 +1342,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
goto out_free;
}
}
- ret = mptcp_pm_nl_append_new_local_addr(pernet, entry);
+ ret = mptcp_pm_nl_append_new_local_addr(pernet, entry,
+ !mptcp_pm_has_addr_attr_id(attr, info));
if (ret < 0) {
GENL_SET_ERR_MSG_FMT(info, "too many addresses or duplicate one: %d", ret);
goto out_free;
@@ -1975,7 +1997,7 @@ static int mptcp_event_add_subflow(struct sk_buff *skb, const struct sock *ssk)
if (WARN_ON_ONCE(!sf))
return -EINVAL;
- if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, sf->local_id))
+ if (nla_put_u8(skb, MPTCP_ATTR_LOC_ID, subflow_get_local_id(sf)))
return -EMSGSIZE;
if (nla_put_u8(skb, MPTCP_ATTR_REM_ID, sf->remote_id))
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 5c01b9bc619a..bc97cc30f013 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -26,7 +26,8 @@ void mptcp_free_local_addr_list(struct mptcp_sock *msk)
}
static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
- struct mptcp_pm_addr_entry *entry)
+ struct mptcp_pm_addr_entry *entry,
+ bool needs_id)
{
DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
struct mptcp_pm_addr_entry *match = NULL;
@@ -41,7 +42,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
spin_lock_bh(&msk->pm.lock);
list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
addr_match = mptcp_addresses_equal(&e->addr, &entry->addr, true);
- if (addr_match && entry->addr.id == 0)
+ if (addr_match && entry->addr.id == 0 && needs_id)
entry->addr.id = e->addr.id;
id_match = (e->addr.id == entry->addr.id);
if (addr_match && id_match) {
@@ -64,7 +65,7 @@ static int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
}
*e = *entry;
- if (!e->addr.id)
+ if (!e->addr.id && needs_id)
e->addr.id = find_next_zero_bit(id_bitmap,
MPTCP_PM_MAX_ADDR_ID + 1,
1);
@@ -130,10 +131,21 @@ int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
struct mptcp_addr_info *skc)
{
- struct mptcp_pm_addr_entry new_entry;
+ struct mptcp_pm_addr_entry *entry = NULL, *e, new_entry;
__be16 msk_sport = ((struct inet_sock *)
inet_sk((struct sock *)msk))->inet_sport;
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(e, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (mptcp_addresses_equal(&e->addr, skc, false)) {
+ entry = e;
+ break;
+ }
+ }
+ spin_unlock_bh(&msk->pm.lock);
+ if (entry)
+ return entry->addr.id;
+
memset(&new_entry, 0, sizeof(struct mptcp_pm_addr_entry));
new_entry.addr = *skc;
new_entry.addr.id = 0;
@@ -142,7 +154,7 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk,
if (new_entry.addr.port == msk_sport)
new_entry.addr.port = 0;
- return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry);
+ return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true);
}
int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
@@ -187,7 +199,7 @@ int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info)
goto announce_err;
}
- err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val);
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &addr_val, false);
if (err < 0) {
GENL_SET_ERR_MSG(info, "did not match address and id");
goto announce_err;
@@ -222,7 +234,7 @@ static int mptcp_userspace_pm_remove_id_zero_address(struct mptcp_sock *msk,
lock_sock(sk);
mptcp_for_each_subflow(msk, subflow) {
- if (subflow->local_id == 0) {
+ if (READ_ONCE(subflow->local_id) == 0) {
has_id_0 = true;
break;
}
@@ -276,12 +288,12 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
if (!mptcp_pm_is_userspace(msk)) {
GENL_SET_ERR_MSG(info, "invalid request; userspace PM not selected");
- goto remove_err;
+ goto out;
}
if (id_val == 0) {
err = mptcp_userspace_pm_remove_id_zero_address(msk, info);
- goto remove_err;
+ goto out;
}
lock_sock(sk);
@@ -296,7 +308,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
if (!match) {
GENL_SET_ERR_MSG(info, "address with specified id not found");
release_sock(sk);
- goto remove_err;
+ goto out;
}
list_move(&match->list, &free_list);
@@ -310,7 +322,7 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
}
err = 0;
- remove_err:
+out:
sock_put(sk);
return err;
}
@@ -367,7 +379,7 @@ int mptcp_pm_nl_subflow_create_doit(struct sk_buff *skb, struct genl_info *info)
}
local.addr = addr_l;
- err = mptcp_userspace_pm_append_new_local_addr(msk, &local);
+ err = mptcp_userspace_pm_append_new_local_addr(msk, &local, false);
if (err < 0) {
GENL_SET_ERR_MSG(info, "did not match address and id");
goto create_err;
@@ -483,6 +495,16 @@ int mptcp_pm_nl_subflow_destroy_doit(struct sk_buff *skb, struct genl_info *info
goto destroy_err;
}
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (addr_l.family == AF_INET && ipv6_addr_v4mapped(&addr_r.addr6)) {
+ ipv6_addr_set_v4mapped(addr_l.addr.s_addr, &addr_l.addr6);
+ addr_l.family = AF_INET6;
+ }
+ if (addr_r.family == AF_INET && ipv6_addr_v4mapped(&addr_l.addr6)) {
+ ipv6_addr_set_v4mapped(addr_r.addr.s_addr, &addr_r.addr6);
+ addr_r.family = AF_INET6;
+ }
+#endif
if (addr_l.family != addr_r.family) {
GENL_SET_ERR_MSG(info, "address families do not match");
err = -EINVAL;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 5cd5c3f535a8..7833a49f6214 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -55,28 +55,14 @@ static u64 mptcp_wnd_end(const struct mptcp_sock *msk)
return READ_ONCE(msk->wnd_end);
}
-static bool mptcp_is_tcpsk(struct sock *sk)
+static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk)
{
- struct socket *sock = sk->sk_socket;
-
- if (unlikely(sk->sk_prot == &tcp_prot)) {
- /* we are being invoked after mptcp_accept() has
- * accepted a non-mp-capable flow: sk is a tcp_sk,
- * not an mptcp one.
- *
- * Hand the socket over to tcp so all further socket ops
- * bypass mptcp.
- */
- WRITE_ONCE(sock->ops, &inet_stream_ops);
- return true;
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- } else if (unlikely(sk->sk_prot == &tcpv6_prot)) {
- WRITE_ONCE(sock->ops, &inet6_stream_ops);
- return true;
+ if (sk->sk_prot == &tcpv6_prot)
+ return &inet6_stream_ops;
#endif
- }
-
- return false;
+ WARN_ON_ONCE(sk->sk_prot != &tcp_prot);
+ return &inet_stream_ops;
}
static int __mptcp_socket_create(struct mptcp_sock *msk)
@@ -99,7 +85,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
subflow->subflow_id = msk->subflow_id++;
/* This is the first subflow, always with id 0 */
- subflow->local_id_valid = 1;
+ WRITE_ONCE(subflow->local_id, 0);
mptcp_sock_graft(msk->first, sk->sk_socket);
iput(SOCK_INODE(ssock));
@@ -443,11 +429,11 @@ static void mptcp_check_data_fin_ack(struct sock *sk)
switch (sk->sk_state) {
case TCP_FIN_WAIT1:
- inet_sk_state_store(sk, TCP_FIN_WAIT2);
+ mptcp_set_state(sk, TCP_FIN_WAIT2);
break;
case TCP_CLOSING:
case TCP_LAST_ACK:
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
break;
}
@@ -608,13 +594,13 @@ static bool mptcp_check_data_fin(struct sock *sk)
switch (sk->sk_state) {
case TCP_ESTABLISHED:
- inet_sk_state_store(sk, TCP_CLOSE_WAIT);
+ mptcp_set_state(sk, TCP_CLOSE_WAIT);
break;
case TCP_FIN_WAIT1:
- inet_sk_state_store(sk, TCP_CLOSING);
+ mptcp_set_state(sk, TCP_CLOSING);
break;
case TCP_FIN_WAIT2:
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
break;
default:
/* Other states not expected */
@@ -789,7 +775,7 @@ static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk)
*/
ssk_state = inet_sk_state_load(ssk);
if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
- inet_sk_state_store(sk, ssk_state);
+ mptcp_set_state(sk, ssk_state);
WRITE_ONCE(sk->sk_err, -err);
/* This barrier is coupled with smp_rmb() in mptcp_poll() */
@@ -1274,6 +1260,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
mpext = mptcp_get_ext(skb);
if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) {
TCP_SKB_CB(skb)->eor = 1;
+ tcp_mark_push(tcp_sk(ssk), skb);
goto alloc_skb;
}
@@ -1519,8 +1506,11 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
void mptcp_check_and_set_pending(struct sock *sk)
{
- if (mptcp_send_head(sk))
- mptcp_sk(sk)->push_pending |= BIT(MPTCP_PUSH_PENDING);
+ if (mptcp_send_head(sk)) {
+ mptcp_data_lock(sk);
+ mptcp_sk(sk)->cb_flags |= BIT(MPTCP_PUSH_PENDING);
+ mptcp_data_unlock(sk);
+ }
}
static int __subflow_push_pending(struct sock *sk, struct sock *ssk,
@@ -1974,6 +1964,9 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
if (copied <= 0)
return;
+ if (!msk->rcvspace_init)
+ mptcp_rcv_space_init(msk, msk->first);
+
msk->rcvq_space.copied += copied;
mstamp = div_u64(tcp_clock_ns(), NSEC_PER_USEC);
@@ -2328,9 +2321,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
if (__mptcp_check_fallback(msk))
return false;
- if (tcp_rtx_and_write_queues_empty(sk))
- return false;
-
/* the closing socket has some data untransmitted and/or unacked:
* some data in the mptcp rtx queue has not really xmitted yet.
* keep it simple and re-inject the whole mptcp level rtx queue
@@ -2477,7 +2467,7 @@ out:
inet_sk_state_load(msk->first) == TCP_CLOSE) {
if (sk->sk_state != TCP_ESTABLISHED ||
msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) {
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
mptcp_close_wake_up(sk);
} else {
mptcp_start_tout_timer(sk);
@@ -2572,7 +2562,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
WRITE_ONCE(sk->sk_err, ECONNRESET);
}
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);
@@ -2707,7 +2697,7 @@ static void mptcp_do_fastclose(struct sock *sk)
struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk);
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
mptcp_for_each_subflow_safe(msk, subflow, tmp)
__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
subflow, MPTCP_CF_FASTCLOSE);
@@ -2885,6 +2875,24 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
release_sock(ssk);
}
+void mptcp_set_state(struct sock *sk, int state)
+{
+ int oldstate = sk->sk_state;
+
+ switch (state) {
+ case TCP_ESTABLISHED:
+ if (oldstate != TCP_ESTABLISHED)
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
+ break;
+
+ default:
+ if (oldstate == TCP_ESTABLISHED)
+ MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB);
+ }
+
+ inet_sk_state_store(sk, state);
+}
+
static const unsigned char new_state[16] = {
/* current state: new state: action: */
[0 /* (Invalid) */] = TCP_CLOSE,
@@ -2907,7 +2915,7 @@ static int mptcp_close_state(struct sock *sk)
int next = (int)new_state[sk->sk_state];
int ns = next & TCP_STATE_MASK;
- inet_sk_state_store(sk, ns);
+ mptcp_set_state(sk, ns);
return next & TCP_ACTION_FIN;
}
@@ -3018,7 +3026,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
mptcp_check_listen_stop(sk);
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
goto cleanup;
}
@@ -3061,7 +3069,7 @@ cleanup:
* state, let's not keep resources busy for no reasons
*/
if (subflows_alive == 0)
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
@@ -3127,7 +3135,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
return -EBUSY;
mptcp_check_listen_stop(sk);
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
mptcp_stop_rtx_timer(sk);
mptcp_stop_tout_timer(sk);
@@ -3141,7 +3149,6 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
WRITE_ONCE(msk->flags, 0);
msk->cb_flags = 0;
- msk->push_pending = 0;
msk->recovery = false;
msk->can_ack = false;
msk->fully_established = false;
@@ -3157,6 +3164,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
msk->bytes_received = 0;
msk->bytes_sent = 0;
msk->bytes_retrans = 0;
+ msk->rcvspace_init = 0;
WRITE_ONCE(sk->sk_shutdown, 0);
sk_error_report(sk);
@@ -3170,8 +3178,50 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
}
+
+static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk)
+{
+ const struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
+ struct ipv6_pinfo *newnp;
+
+ newnp = inet6_sk(newsk);
+
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ if (!opt)
+ net_warn_ratelimited("%s: Failed to copy ip6 options\n", __func__);
+ }
+ RCU_INIT_POINTER(newnp->opt, opt);
+ rcu_read_unlock();
+}
#endif
+static void mptcp_copy_ip_options(struct sock *newsk, const struct sock *sk)
+{
+ struct ip_options_rcu *inet_opt, *newopt = NULL;
+ const struct inet_sock *inet = inet_sk(sk);
+ struct inet_sock *newinet;
+
+ newinet = inet_sk(newsk);
+
+ rcu_read_lock();
+ inet_opt = rcu_dereference(inet->inet_opt);
+ if (inet_opt) {
+ newopt = sock_kmalloc(newsk, sizeof(*inet_opt) +
+ inet_opt->opt.optlen, GFP_ATOMIC);
+ if (newopt)
+ memcpy(newopt, inet_opt, sizeof(*inet_opt) +
+ inet_opt->opt.optlen);
+ else
+ net_warn_ratelimited("%s: Failed to copy ip options\n", __func__);
+ }
+ RCU_INIT_POINTER(newinet->inet_opt, newopt);
+ rcu_read_unlock();
+}
+
struct sock *mptcp_sk_clone_init(const struct sock *sk,
const struct mptcp_options_received *mp_opt,
struct sock *ssk,
@@ -3179,6 +3229,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
+ struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk;
if (!nsk)
@@ -3191,6 +3242,13 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
__mptcp_init_sock(nsk);
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (nsk->sk_family == AF_INET6)
+ mptcp_copy_ip6_options(nsk, sk);
+ else
+#endif
+ mptcp_copy_ip_options(nsk, sk);
+
msk = mptcp_sk(nsk);
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
@@ -3202,7 +3260,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk->write_seq = subflow_req->idsn + 1;
msk->snd_nxt = msk->write_seq;
msk->snd_una = msk->write_seq;
- msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
+ msk->wnd_end = msk->snd_nxt + tcp_sk(ssk)->snd_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
mptcp_init_sched(msk, mptcp_sk(sk)->sched);
@@ -3215,11 +3273,12 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
/* this can't race with mptcp_close(), as the msk is
* not yet exposted to user-space
*/
- inet_sk_state_store(nsk, TCP_ESTABLISHED);
+ mptcp_set_state(nsk, TCP_ESTABLISHED);
/* The msk maintain a ref to each subflow in the connections list */
WRITE_ONCE(msk->first, ssk);
- list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list);
+ subflow = mptcp_subflow_ctx(ssk);
+ list_add(&subflow->node, &msk->conn_list);
sock_hold(ssk);
/* new mpc subflow takes ownership of the newly
@@ -3234,6 +3293,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
__mptcp_propagate_sndbuf(nsk, ssk);
mptcp_rcv_space_init(msk, ssk);
+
+ if (mp_opt->suboptions & OPTION_MPTCP_MPC_ACK)
+ __mptcp_subflow_fully_established(msk, subflow, mp_opt);
bh_unlock_sock(nsk);
/* note: the newly allocated socket refcount is 2 now */
@@ -3244,6 +3306,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
{
const struct tcp_sock *tp = tcp_sk(ssk);
+ msk->rcvspace_init = 1;
msk->rcvq_space.copied = 0;
msk->rcvq_space.rtt_us = 0;
@@ -3254,46 +3317,6 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk)
TCP_INIT_CWND * tp->advmss);
if (msk->rcvq_space.space == 0)
msk->rcvq_space.space = TCP_INIT_CWND * TCP_MSS_DEFAULT;
-
- WRITE_ONCE(msk->wnd_end, msk->snd_nxt + tcp_sk(ssk)->snd_wnd);
-}
-
-static struct sock *mptcp_accept(struct sock *ssk, int flags, int *err,
- bool kern)
-{
- struct sock *newsk;
-
- pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
- newsk = inet_csk_accept(ssk, flags, err, kern);
- if (!newsk)
- return NULL;
-
- pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
- if (sk_is_mptcp(newsk)) {
- struct mptcp_subflow_context *subflow;
- struct sock *new_mptcp_sock;
-
- subflow = mptcp_subflow_ctx(newsk);
- new_mptcp_sock = subflow->conn;
-
- /* is_mptcp should be false if subflow->conn is missing, see
- * subflow_syn_recv_sock()
- */
- if (WARN_ON_ONCE(!new_mptcp_sock)) {
- tcp_sk(newsk)->is_mptcp = 0;
- goto out;
- }
-
- newsk = new_mptcp_sock;
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
- } else {
- MPTCP_INC_STATS(sock_net(ssk),
- MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
- }
-
-out:
- newsk->sk_kern_sock = kern;
- return newsk;
}
void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
@@ -3367,8 +3390,7 @@ static void mptcp_release_cb(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(sk);
for (;;) {
- unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
- msk->push_pending;
+ unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED);
struct list_head join_list;
if (!flags)
@@ -3384,7 +3406,6 @@ static void mptcp_release_cb(struct sock *sk)
* datapath acquires the msk socket spinlock while helding
* the subflow socket lock
*/
- msk->push_pending = 0;
msk->cb_flags &= ~flags;
spin_unlock_bh(&sk->sk_lock.slock);
@@ -3512,13 +3533,8 @@ void mptcp_finish_connect(struct sock *ssk)
* accessing the field below
*/
WRITE_ONCE(msk->local_key, subflow->local_key);
- WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
- WRITE_ONCE(msk->snd_nxt, msk->write_seq);
- WRITE_ONCE(msk->snd_una, msk->write_seq);
mptcp_pm_new_connection(msk, ssk, 0);
-
- mptcp_rcv_space_init(msk, ssk);
}
void mptcp_sock_graft(struct sock *sk, struct socket *parent)
@@ -3674,7 +3690,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (IS_ERR(ssk))
return PTR_ERR(ssk);
- inet_sk_state_store(sk, TCP_SYN_SENT);
+ mptcp_set_state(sk, TCP_SYN_SENT);
subflow = mptcp_subflow_ctx(ssk);
#ifdef CONFIG_TCP_MD5SIG
/* no MPTCP if MD5SIG is enabled on this socket or we may run out of
@@ -3724,7 +3740,7 @@ out:
if (unlikely(err)) {
/* avoid leaving a dangling token in an unconnected socket */
mptcp_token_destroy(msk);
- inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_set_state(sk, TCP_CLOSE);
return err;
}
@@ -3739,7 +3755,6 @@ static struct proto mptcp_prot = {
.connect = mptcp_connect,
.disconnect = mptcp_disconnect,
.close = mptcp_close,
- .accept = mptcp_accept,
.setsockopt = mptcp_setsockopt,
.getsockopt = mptcp_getsockopt,
.shutdown = mptcp_shutdown,
@@ -3814,13 +3829,13 @@ static int mptcp_listen(struct socket *sock, int backlog)
goto unlock;
}
- inet_sk_state_store(sk, TCP_LISTEN);
+ mptcp_set_state(sk, TCP_LISTEN);
sock_set_flag(sk, SOCK_RCU_FREE);
lock_sock(ssk);
err = __inet_listen_sk(ssk, backlog);
release_sock(ssk);
- inet_sk_state_store(sk, inet_sk_state_load(ssk));
+ mptcp_set_state(sk, inet_sk_state_load(ssk));
if (!err) {
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -3849,18 +3864,36 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssk)
return -EINVAL;
- newsk = mptcp_accept(ssk, flags, &err, kern);
+ pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk));
+ newsk = inet_csk_accept(ssk, flags, &err, kern);
if (!newsk)
return err;
- lock_sock(newsk);
-
- __inet_accept(sock, newsock, newsk);
- if (!mptcp_is_tcpsk(newsock->sk)) {
- struct mptcp_sock *msk = mptcp_sk(newsk);
+ pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk));
+ if (sk_is_mptcp(newsk)) {
struct mptcp_subflow_context *subflow;
+ struct sock *new_mptcp_sock;
+
+ subflow = mptcp_subflow_ctx(newsk);
+ new_mptcp_sock = subflow->conn;
+
+ /* is_mptcp should be false if subflow->conn is missing, see
+ * subflow_syn_recv_sock()
+ */
+ if (WARN_ON_ONCE(!new_mptcp_sock)) {
+ tcp_sk(newsk)->is_mptcp = 0;
+ goto tcpfallback;
+ }
+
+ newsk = new_mptcp_sock;
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
+
+ newsk->sk_kern_sock = kern;
+ lock_sock(newsk);
+ __inet_accept(sock, newsock, newsk);
set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
+ msk = mptcp_sk(newsk);
msk->in_accept_queue = 0;
/* set ssk->sk_socket of accept()ed flows to mptcp socket.
@@ -3880,8 +3913,23 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
__mptcp_close_ssk(newsk, msk->first,
mptcp_subflow_ctx(msk->first), 0);
if (unlikely(list_is_singular(&msk->conn_list)))
- inet_sk_state_store(newsk, TCP_CLOSE);
+ mptcp_set_state(newsk, TCP_CLOSE);
}
+ } else {
+ MPTCP_INC_STATS(sock_net(ssk),
+ MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
+tcpfallback:
+ newsk->sk_kern_sock = kern;
+ lock_sock(newsk);
+ __inet_accept(sock, newsock, newsk);
+ /* we are being invoked after accepting a non-mp-capable
+ * flow: sk is a tcp_sk, not an mptcp one.
+ *
+ * Hand the socket over to tcp so all further socket ops
+ * bypass mptcp.
+ */
+ WRITE_ONCE(newsock->sk->sk_socket->ops,
+ mptcp_fallback_tcp_ops(newsock->sk));
}
release_sock(newsk);
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index aa1a93fe40ff..07f6242afc1a 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -286,7 +286,6 @@ struct mptcp_sock {
int rmem_released;
unsigned long flags;
unsigned long cb_flags;
- unsigned long push_pending;
bool recovery; /* closing subflow write queue reinjected */
bool can_ack;
bool fully_established;
@@ -305,7 +304,8 @@ struct mptcp_sock {
nodelay:1,
fastopening:1,
in_accept_queue:1,
- free_first:1;
+ free_first:1,
+ rcvspace_init:1;
struct work_struct work;
struct sk_buff *ooo_last_skb;
struct rb_root out_of_order_queue;
@@ -491,10 +491,9 @@ struct mptcp_subflow_context {
remote_key_valid : 1, /* received the peer key from */
disposable : 1, /* ctx can be free at ulp release time */
stale : 1, /* unable to snd/rcv data, do not use for xmit */
- local_id_valid : 1, /* local_id is correctly initialized */
valid_csum_seen : 1, /* at least one csum validated */
is_mptfo : 1, /* subflow is doing TFO */
- __unused : 9;
+ __unused : 10;
bool data_avail;
bool scheduled;
u32 remote_nonce;
@@ -505,7 +504,7 @@ struct mptcp_subflow_context {
u8 hmac[MPTCPOPT_HMAC_LEN]; /* MPJ subflow only */
u64 iasn; /* initial ack sequence number, MPC subflows only */
};
- u8 local_id;
+ s16 local_id; /* if negative not initialized yet */
u8 remote_id;
u8 reset_seen:1;
u8 reset_transient:1;
@@ -556,6 +555,7 @@ mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow)
{
memset(&subflow->reset, 0, sizeof(subflow->reset));
subflow->request_mptcp = 1;
+ WRITE_ONCE(subflow->local_id, -1);
}
static inline u64
@@ -622,8 +622,9 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
unsigned int mptcp_close_timeout(const struct sock *sk);
int mptcp_get_pm_type(const struct net *net);
const char *mptcp_get_scheduler(const struct net *net);
-void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt);
+void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt);
bool __mptcp_retransmit_pending_data(struct sock *sk);
void mptcp_check_and_set_pending(struct sock *sk);
void __mptcp_push_pending(struct sock *sk, unsigned int flags);
@@ -641,6 +642,7 @@ bool __mptcp_close(struct sock *sk, long timeout);
void mptcp_cancel_work(struct sock *sk);
void __mptcp_unaccepted_force_close(struct sock *sk);
void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
+void mptcp_set_state(struct sock *sk, int state);
bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
const struct mptcp_addr_info *b, bool use_port);
@@ -788,6 +790,16 @@ static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
READ_ONCE(msk->write_seq) == READ_ONCE(msk->snd_nxt);
}
+static inline void mptcp_write_space(struct sock *sk)
+{
+ if (sk_stream_is_writeable(sk)) {
+ /* pairs with memory barrier in mptcp_poll */
+ smp_mb();
+ if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
+ sk_stream_write_space(sk);
+ }
+}
+
static inline void __mptcp_sync_sndbuf(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
@@ -806,6 +818,7 @@ static inline void __mptcp_sync_sndbuf(struct sock *sk)
/* the msk max wmem limit is <nr_subflows> * tcp wmem[2] */
WRITE_ONCE(sk->sk_sndbuf, new_sndbuf);
+ mptcp_write_space(sk);
}
/* The called held both the msk socket and the subflow socket locks,
@@ -836,16 +849,6 @@ static inline void mptcp_propagate_sndbuf(struct sock *sk, struct sock *ssk)
local_bh_enable();
}
-static inline void mptcp_write_space(struct sock *sk)
-{
- if (sk_stream_is_writeable(sk)) {
- /* pairs with memory barrier in mptcp_poll */
- smp_mb();
- if (test_and_clear_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags))
- sk_stream_write_space(sk);
- }
-}
-
void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags);
#define MPTCP_TOKEN_MAX_RETRIES 4
@@ -951,8 +954,8 @@ void mptcp_event_pm_listener(const struct sock *ssk,
enum mptcp_event_type event);
bool mptcp_userspace_pm_active(const struct mptcp_sock *msk);
-void mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt);
+void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt);
void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subflow,
struct request_sock *req);
@@ -1020,6 +1023,15 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
+static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflow)
+{
+ int local_id = READ_ONCE(subflow->local_id);
+
+ if (local_id < 0)
+ return 0;
+ return local_id;
+}
+
void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
@@ -1075,6 +1087,15 @@ static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
}
+static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk)
+{
+ struct sock *ssk = READ_ONCE(msk->first);
+
+ return ssk && ((1 << inet_sk_state_load(ssk)) &
+ (TCPF_ESTABLISHED | TCPF_SYN_SENT |
+ TCPF_SYN_RECV | TCPF_LISTEN));
+}
+
static inline void mptcp_do_fallback(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -1118,7 +1139,8 @@ static inline bool subflow_simultaneous_connect(struct sock *sk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1) &&
+ return (1 << sk->sk_state) &
+ (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING) &&
is_active_ssk(subflow) &&
!subflow->conn_finished;
}
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 353680733700..c40f1428e602 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -440,6 +440,8 @@ static bool mptcp_supported_sockopt(int level, int optname)
/* should work fine */
case IP_FREEBIND:
case IP_TRANSPARENT:
+ case IP_BIND_ADDRESS_NO_PORT:
+ case IP_LOCAL_PORT_RANGE:
/* the following are control cmsg related */
case IP_PKTINFO:
@@ -455,7 +457,6 @@ static bool mptcp_supported_sockopt(int level, int optname)
/* common stuff that need some love */
case IP_TOS:
case IP_TTL:
- case IP_BIND_ADDRESS_NO_PORT:
case IP_MTU_DISCOVER:
case IP_RECVERR:
@@ -683,8 +684,8 @@ static int mptcp_setsockopt_sol_tcp_nodelay(struct mptcp_sock *msk, sockptr_t op
return 0;
}
-static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int optname,
- sockptr_t optval, unsigned int optlen)
+static int mptcp_setsockopt_sol_ip_set(struct mptcp_sock *msk, int optname,
+ sockptr_t optval, unsigned int optlen)
{
struct sock *sk = (struct sock *)msk;
struct sock *ssk;
@@ -710,6 +711,14 @@ static int mptcp_setsockopt_sol_ip_set_transparent(struct mptcp_sock *msk, int o
inet_assign_bit(TRANSPARENT, ssk,
inet_test_bit(TRANSPARENT, sk));
break;
+ case IP_BIND_ADDRESS_NO_PORT:
+ inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk,
+ inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
+ break;
+ case IP_LOCAL_PORT_RANGE:
+ WRITE_ONCE(inet_sk(ssk)->local_port_range,
+ READ_ONCE(inet_sk(sk)->local_port_range));
+ break;
default:
release_sock(sk);
WARN_ON_ONCE(1);
@@ -755,7 +764,9 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
switch (optname) {
case IP_FREEBIND:
case IP_TRANSPARENT:
- return mptcp_setsockopt_sol_ip_set_transparent(msk, optname, optval, optlen);
+ case IP_BIND_ADDRESS_NO_PORT:
+ case IP_LOCAL_PORT_RANGE:
+ return mptcp_setsockopt_sol_ip_set(msk, optname, optval, optlen);
case IP_TOS:
return mptcp_setsockopt_v4_set_tos(msk, optname, optval, optlen);
}
@@ -938,6 +949,8 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
info->mptcpi_bytes_sent = msk->bytes_sent;
info->mptcpi_bytes_received = msk->bytes_received;
info->mptcpi_bytes_retrans = msk->bytes_retrans;
+ info->mptcpi_subflows_total = info->mptcpi_subflows +
+ __mptcp_has_initial_subflow(msk);
unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
@@ -1348,6 +1361,12 @@ static int mptcp_getsockopt_v4(struct mptcp_sock *msk, int optname,
switch (optname) {
case IP_TOS:
return mptcp_put_int_option(msk, optval, optlen, READ_ONCE(inet_sk(sk)->tos));
+ case IP_BIND_ADDRESS_NO_PORT:
+ return mptcp_put_int_option(msk, optval, optlen,
+ inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
+ case IP_LOCAL_PORT_RANGE:
+ return mptcp_put_int_option(msk, optval, optlen,
+ READ_ONCE(inet_sk(sk)->local_port_range));
}
return -EOPNOTSUPP;
@@ -1448,6 +1467,8 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk));
inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk));
+ inet_assign_bit(BIND_ADDRESS_NO_PORT, ssk, inet_test_bit(BIND_ADDRESS_NO_PORT, sk));
+ WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range));
}
void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 852b3f4af000..71ba86246ff8 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -157,8 +157,8 @@ static int subflow_check_req(struct request_sock *req,
mptcp_get_options(skb, &mp_opt);
- opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
- opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
+ opt_mp_capable = !!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYN);
+ opt_mp_join = !!(mp_opt.suboptions & OPTION_MPTCP_MPJ_SYN);
if (opt_mp_capable) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
@@ -254,8 +254,8 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
subflow_init_req(req, sk_listener);
mptcp_get_options(skb, &mp_opt);
- opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
- opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
+ opt_mp_capable = !!(mp_opt.suboptions & OPTION_MPTCP_MPC_ACK);
+ opt_mp_join = !!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK);
if (opt_mp_capable && opt_mp_join)
return -EINVAL;
@@ -421,29 +421,26 @@ static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct soc
void __mptcp_sync_state(struct sock *sk, int state)
{
+ struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
+ struct sock *ssk = msk->first;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ __mptcp_propagate_sndbuf(sk, ssk);
+ if (!msk->rcvspace_init)
+ mptcp_rcv_space_init(msk, ssk);
- __mptcp_propagate_sndbuf(sk, msk->first);
if (sk->sk_state == TCP_SYN_SENT) {
- inet_sk_state_store(sk, state);
+ /* subflow->idsn is always available is TCP_SYN_SENT state,
+ * even for the FASTOPEN scenarios
+ */
+ WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
+ WRITE_ONCE(msk->snd_nxt, msk->write_seq);
+ mptcp_set_state(sk, state);
sk->sk_state_change(sk);
}
}
-static void mptcp_propagate_state(struct sock *sk, struct sock *ssk)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- mptcp_data_lock(sk);
- if (!sock_owned_by_user(sk)) {
- __mptcp_sync_state(sk, ssk->sk_state);
- } else {
- msk->pending_state = ssk->sk_state;
- __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
- }
- mptcp_data_unlock(sk);
-}
-
static void subflow_set_remote_key(struct mptcp_sock *msk,
struct mptcp_subflow_context *subflow,
const struct mptcp_options_received *mp_opt)
@@ -465,6 +462,31 @@ static void subflow_set_remote_key(struct mptcp_sock *msk,
atomic64_set(&msk->rcv_wnd_sent, subflow->iasn);
}
+static void mptcp_propagate_state(struct sock *sk, struct sock *ssk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ mptcp_data_lock(sk);
+ if (mp_opt) {
+ /* Options are available only in the non fallback cases
+ * avoid updating rx path fields otherwise
+ */
+ WRITE_ONCE(msk->snd_una, subflow->idsn + 1);
+ WRITE_ONCE(msk->wnd_end, subflow->idsn + 1 + tcp_sk(ssk)->snd_wnd);
+ subflow_set_remote_key(msk, subflow, mp_opt);
+ }
+
+ if (!sock_owned_by_user(sk)) {
+ __mptcp_sync_state(sk, ssk->sk_state);
+ } else {
+ msk->pending_state = ssk->sk_state;
+ __set_bit(MPTCP_SYNC_STATE, &msk->cb_flags);
+ }
+ mptcp_data_unlock(sk);
+}
+
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -486,7 +508,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
mptcp_get_options(skb, &mp_opt);
if (subflow->request_mptcp) {
- if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
+ if (!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYNACK)) {
MPTCP_INC_STATS(sock_net(sk),
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
mptcp_do_fallback(sk);
@@ -499,14 +521,13 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (mp_opt.deny_join_id0)
WRITE_ONCE(msk->pm.remote_deny_join_id0, true);
subflow->mp_capable = 1;
- subflow_set_remote_key(msk, subflow, &mp_opt);
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
mptcp_finish_connect(sk);
- mptcp_propagate_state(parent, sk);
+ mptcp_propagate_state(parent, sk, subflow, &mp_opt);
} else if (subflow->request_join) {
u8 hmac[SHA256_DIGEST_SIZE];
- if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ)) {
+ if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_SYNACK)) {
subflow->reset_reason = MPTCP_RST_EMPTCP;
goto do_reset;
}
@@ -514,7 +535,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->backup = mp_opt.backup;
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
- subflow->remote_id = mp_opt.join_id;
+ WRITE_ONCE(subflow->remote_id, mp_opt.join_id);
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
subflow, subflow->thmac, subflow->remote_nonce,
subflow->backup);
@@ -545,8 +566,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
}
} else if (mptcp_check_fallback(sk)) {
fallback:
- mptcp_rcv_space_init(msk, sk);
- mptcp_propagate_state(parent, sk);
+ mptcp_propagate_state(parent, sk, subflow, NULL);
}
return;
@@ -557,8 +577,8 @@ do_reset:
static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id)
{
- subflow->local_id = local_id;
- subflow->local_id_valid = 1;
+ WARN_ON_ONCE(local_id < 0 || local_id > 255);
+ WRITE_ONCE(subflow->local_id, local_id);
}
static int subflow_chk_local_id(struct sock *sk)
@@ -567,7 +587,7 @@ static int subflow_chk_local_id(struct sock *sk)
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
int err;
- if (likely(subflow->local_id_valid))
+ if (likely(subflow->local_id >= 0))
return 0;
err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
@@ -731,17 +751,16 @@ void mptcp_subflow_drop_ctx(struct sock *ssk)
kfree_rcu(ctx, rcu);
}
-void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
- const struct mptcp_options_received *mp_opt)
+void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
+ struct mptcp_subflow_context *subflow,
+ const struct mptcp_options_received *mp_opt)
{
- struct mptcp_sock *msk = mptcp_sk(subflow->conn);
-
subflow_set_remote_key(msk, subflow, mp_opt);
subflow->fully_established = 1;
WRITE_ONCE(msk->fully_established, true);
if (subflow->is_mptfo)
- mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
+ __mptcp_fastopen_gen_msk_ackseq(msk, subflow, mp_opt);
}
static struct sock *subflow_syn_recv_sock(const struct sock *sk,
@@ -783,12 +802,13 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* options.
*/
mptcp_get_options(skb, &mp_opt);
- if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC))
+ if (!(mp_opt.suboptions &
+ (OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_ACK)))
fallback = true;
} else if (subflow_req->mp_join) {
mptcp_get_options(skb, &mp_opt);
- if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ) ||
+ if (!(mp_opt.suboptions & OPTION_MPTCP_MPJ_ACK) ||
!subflow_hmac_valid(req, &mp_opt) ||
!mptcp_can_accept_new_subflow(subflow_req->msk)) {
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
@@ -833,7 +853,6 @@ create_child:
* mpc option
*/
if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) {
- mptcp_subflow_fully_established(ctx, &mp_opt);
mptcp_pm_fully_established(owner, child);
ctx->pm_notified = 1;
}
@@ -1548,7 +1567,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
remote_token, local_id, remote_id);
subflow->remote_token = remote_token;
- subflow->remote_id = remote_id;
+ WRITE_ONCE(subflow->remote_id, remote_id);
subflow->request_join = 1;
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
subflow->subflow_id = msk->subflow_id++;
@@ -1712,6 +1731,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
pr_debug("subflow=%p", ctx);
ctx->tcp_sock = sk;
+ WRITE_ONCE(ctx->local_id, -1);
return ctx;
}
@@ -1743,10 +1763,9 @@ static void subflow_state_change(struct sock *sk)
msk = mptcp_sk(parent);
if (subflow_simultaneous_connect(sk)) {
mptcp_do_fallback(sk);
- mptcp_rcv_space_init(msk, sk);
pr_fallback(msk);
subflow->conn_finished = 1;
- mptcp_propagate_state(parent, sk);
+ mptcp_propagate_state(parent, sk, subflow, NULL);
}
/* as recvmsg() does not acquire the subflow socket for ssk selection
@@ -1948,14 +1967,14 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->idsn = subflow_req->idsn;
/* this is the first subflow, id is always 0 */
- new_ctx->local_id_valid = 1;
+ subflow_set_local_id(new_ctx, 0);
} else if (subflow_req->mp_join) {
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
new_ctx->fully_established = 1;
new_ctx->remote_key_valid = 1;
new_ctx->backup = subflow_req->backup;
- new_ctx->remote_id = subflow_req->remote_id;
+ WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id);
new_ctx->token = subflow_req->token;
new_ctx->thmac = subflow_req->thmac;