// SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ #include #include #include #include #include #include #include #include #include #include #include #define BPF_PROG_TEST_TCP_HDR_OPTIONS #include "test_tcp_hdr_options.h" #ifndef sizeof_field #define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) #endif __u8 test_kind = TCPOPT_EXP; __u16 test_magic = 0xeB9F; __u32 inherit_cb_flags = 0; struct bpf_test_option passive_synack_out = {}; struct bpf_test_option passive_fin_out = {}; struct bpf_test_option passive_estab_in = {}; struct bpf_test_option passive_fin_in = {}; struct bpf_test_option active_syn_out = {}; struct bpf_test_option active_fin_out = {}; struct bpf_test_option active_estab_in = {}; struct bpf_test_option active_fin_in = {}; struct { __uint(type, BPF_MAP_TYPE_SK_STORAGE); __uint(map_flags, BPF_F_NO_PREALLOC); __type(key, int); __type(value, struct hdr_stg); } hdr_stg_map SEC(".maps"); static bool skops_want_cookie(const struct bpf_sock_ops *skops) { return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE; } static bool skops_current_mss(const struct bpf_sock_ops *skops) { return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS; } static __u8 option_total_len(__u8 flags) { __u8 i, len = 1; /* +1 for flags */ if (!flags) return 0; /* RESEND bit does not use a byte */ for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++) len += !!TEST_OPTION_FLAGS(flags, i); if (test_kind == TCPOPT_EXP) return len + TCP_BPF_EXPOPT_BASE_LEN; else return len + 2; /* +1 kind, +1 kind-len */ } static void write_test_option(const struct bpf_test_option *test_opt, __u8 *data) { __u8 offset = 0; data[offset++] = test_opt->flags; if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS)) data[offset++] = test_opt->max_delack_ms; if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND)) data[offset++] = test_opt->rand; } static int store_option(struct bpf_sock_ops *skops, const struct bpf_test_option *test_opt) { union { struct tcp_exprm_opt exprm; struct tcp_opt regular; } write_opt; int err; if (test_kind == TCPOPT_EXP) { write_opt.exprm.kind = TCPOPT_EXP; write_opt.exprm.len = option_total_len(test_opt->flags); write_opt.exprm.magic = __bpf_htons(test_magic); write_opt.exprm.data32 = 0; write_test_option(test_opt, write_opt.exprm.data); err = bpf_store_hdr_opt(skops, &write_opt.exprm, sizeof(write_opt.exprm), 0); } else { write_opt.regular.kind = test_kind; write_opt.regular.len = option_total_len(test_opt->flags); write_opt.regular.data32 = 0; write_test_option(test_opt, write_opt.regular.data); err = bpf_store_hdr_opt(skops, &write_opt.regular, sizeof(write_opt.regular), 0); } if (err) RET_CG_ERR(err); return CG_OK; } static int parse_test_option(struct bpf_test_option *opt, const __u8 *start) { opt->flags = *start++; if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS)) opt->max_delack_ms = *start++; if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND)) opt->rand = *start++; return 0; } static int load_option(struct bpf_sock_ops *skops, struct bpf_test_option *test_opt, bool from_syn) { union { struct tcp_exprm_opt exprm; struct tcp_opt regular; } search_opt; int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0; if (test_kind == TCPOPT_EXP) { search_opt.exprm.kind = TCPOPT_EXP; search_opt.exprm.len = 4; search_opt.exprm.magic = __bpf_htons(test_magic); search_opt.exprm.data32 = 0; ret = bpf_load_hdr_opt(skops, &search_opt.exprm, sizeof(search_opt.exprm), load_flags); if (ret < 0) return ret; return parse_test_option(test_opt, search_opt.exprm.data); } else { search_opt.regular.kind = test_kind; search_opt.regular.len = 0; search_opt.regular.data32 = 0; ret = bpf_load_hdr_opt(skops, &search_opt.regular, sizeof(search_opt.regular), load_flags); if (ret < 0) return ret; return parse_test_option(test_opt, search_opt.regular.data); } } static int synack_opt_len(struct bpf_sock_ops *skops) { struct bpf_test_option test_opt = {}; __u8 optlen; int err; if (!passive_synack_out.flags) return CG_OK; err = load_option(skops, &test_opt, true); /* bpf_test_option is not found */ if (err == -ENOMSG) return CG_OK; if (err) RET_CG_ERR(err); optlen = option_total_len(passive_synack_out.flags); if (optlen) { err = bpf_reserve_hdr_opt(skops, optlen, 0); if (err) RET_CG_ERR(err); } return CG_OK; } static int write_synack_opt(struct bpf_sock_ops *skops) { struct bpf_test_option opt; if (!passive_synack_out.flags) /* We should not even be called since no header * space has been reserved. */ RET_CG_ERR(0); opt = passive_synack_out; if (skops_want_cookie(skops)) SET_OPTION_FLAGS(opt.flags, OPTION_RESEND); return store_option(skops, &opt); } static int syn_opt_len(struct bpf_sock_ops *skops) { __u8 optlen; int err; if (!active_syn_out.flags) return CG_OK; optlen = option_total_len(active_syn_out.flags); if (optlen) { err = bpf_reserve_hdr_opt(skops, optlen, 0); if (err) RET_CG_ERR(err); } return CG_OK; } static int write_syn_opt(struct bpf_sock_ops *skops) { if (!active_syn_out.flags) RET_CG_ERR(0); return store_option(skops, &active_syn_out); } static int fin_opt_len(struct bpf_sock_ops *skops) { struct bpf_test_option *opt; struct hdr_stg *hdr_stg; __u8 optlen; int err; if (!skops->sk) RET_CG_ERR(0); hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); if (!hdr_stg) RET_CG_ERR(0); if (hdr_stg->active) opt = &active_fin_out; else opt = &passive_fin_out; optlen = option_total_len(opt->flags); if (optlen) { err = bpf_reserve_hdr_opt(skops, optlen, 0); if (err) RET_CG_ERR(err); } return CG_OK; } static int write_fin_opt(struct bpf_sock_ops *skops) { struct bpf_test_option *opt; struct hdr_stg *hdr_stg; if (!skops->sk) RET_CG_ERR(0); hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); if (!hdr_stg) RET_CG_ERR(0); if (hdr_stg->active) opt = &active_fin_out; else opt = &passive_fin_out; if (!opt->flags) RET_CG_ERR(0); return store_option(skops, opt); } static int resend_in_ack(struct bpf_sock_ops *skops) { struct hdr_stg *hdr_stg; if (!skops->sk) return -1; hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); if (!hdr_stg) return -1; return !!hdr_stg->resend_syn; } static int nodata_opt_len(struct bpf_sock_ops *skops) { int resend; resend = resend_in_ack(skops); if (resend < 0) RET_CG_ERR(0); if (resend) return syn_opt_len(skops); return CG_OK; } static int write_nodata_opt(struct bpf_sock_ops *skops) { int resend; resend = resend_in_ack(skops); if (resend < 0) RET_CG_ERR(0); if (resend) return write_syn_opt(skops); return CG_OK; } static int data_opt_len(struct bpf_sock_ops *skops) { /* Same as the nodata version. Mostly to show * an example usage on skops->skb_len. */ return nodata_opt_len(skops); } static int write_data_opt(struct bpf_sock_ops *skops) { return write_nodata_opt(skops); } static int current_mss_opt_len(struct bpf_sock_ops *skops) { /* Reserve maximum that may be needed */ int err; err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0); if (err) RET_CG_ERR(err); return CG_OK; } static int handle_hdr_opt_len(struct bpf_sock_ops *skops) { __u8 tcp_flags = skops_tcp_flags(skops); if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) return synack_opt_len(skops); if (tcp_flags & TCPHDR_SYN) return syn_opt_len(skops); if (tcp_flags & TCPHDR_FIN) return fin_opt_len(skops); if (skops_current_mss(skops)) /* The kernel is calculating the MSS */ return current_mss_opt_len(skops); if (skops->skb_len) return data_opt_len(skops); return nodata_opt_len(skops); } static int handle_write_hdr_opt(struct bpf_sock_ops *skops) { __u8 tcp_flags = skops_tcp_flags(skops); struct tcphdr *th; if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) return write_synack_opt(skops); if (tcp_flags & TCPHDR_SYN) return write_syn_opt(skops); if (tcp_flags & TCPHDR_FIN) return write_fin_opt(skops); th = skops->skb_data; if (th + 1 > skops->skb_data_end) RET_CG_ERR(0); if (skops->skb_len > tcp_hdrlen(th)) return write_data_opt(skops); return write_nodata_opt(skops); } static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms) { __u32 max_delack_us = max_delack_ms * 1000; return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX, &max_delack_us, sizeof(max_delack_us)); } static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms) { __u32 min_rto_us = peer_max_delack_ms * 1000; return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us, sizeof(min_rto_us)); } static int handle_active_estab(struct bpf_sock_ops *skops) { struct hdr_stg init_stg = { .active = true, }; int err; err = load_option(skops, &active_estab_in, false); if (err && err != -ENOMSG) RET_CG_ERR(err); init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags, OPTION_RESEND); if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg, BPF_SK_STORAGE_GET_F_CREATE)) RET_CG_ERR(0); if (init_stg.resend_syn) /* Don't clear the write_hdr cb now because * the ACK may get lost and retransmit may * be needed. * * PARSE_ALL_HDR cb flag is set to learn if this * resend_syn option has received by the peer. * * The header option will be resent until a valid * packet is received at handle_parse_hdr() * and all hdr cb flags will be cleared in * handle_parse_hdr(). */ set_parse_all_hdr_cb_flags(skops); else if (!active_fin_out.flags) /* No options will be written from now */ clear_hdr_cb_flags(skops); if (active_syn_out.max_delack_ms) { err = set_delack_max(skops, active_syn_out.max_delack_ms); if (err) RET_CG_ERR(err); } if (active_estab_in.max_delack_ms) { err = set_rto_min(skops, active_estab_in.max_delack_ms); if (err) RET_CG_ERR(err); } return CG_OK; } static int handle_passive_estab(struct bpf_sock_ops *skops) { struct hdr_stg init_stg = {}; struct tcphdr *th; int err; inherit_cb_flags = skops->bpf_sock_ops_cb_flags; err = load_option(skops, &passive_estab_in, true); if (err == -ENOENT) { /* saved_syn is not found. It was in syncookie mode. * We have asked the active side to resend the options * in ACK, so try to find the bpf_test_option from ACK now. */ err = load_option(skops, &passive_estab_in, false); init_stg.syncookie = true; } /* ENOMSG: The bpf_test_option is not found which is fine. * Bail out now for all other errors. */ if (err && err != -ENOMSG) RET_CG_ERR(err); th = skops->skb_data; if (th + 1 > skops->skb_data_end) RET_CG_ERR(0); if (th->syn) { /* Fastopen */ /* Cannot clear cb_flags to stop write_hdr cb. * synack is not sent yet for fast open. * Even it was, the synack may need to be retransmitted. * * PARSE_ALL_HDR cb flag is set to learn * if synack has reached the peer. * All cb_flags will be cleared in handle_parse_hdr(). */ set_parse_all_hdr_cb_flags(skops); init_stg.fastopen = true; } else if (!passive_fin_out.flags) { /* No options will be written from now */ clear_hdr_cb_flags(skops); } if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg, BPF_SK_STORAGE_GET_F_CREATE)) RET_CG_ERR(0); if (passive_synack_out.max_delack_ms) { err = set_delack_max(skops, passive_synack_out.max_delack_ms); if (err) RET_CG_ERR(err); } if (passive_estab_in.max_delack_ms) { err = set_rto_min(skops, passive_estab_in.max_delack_ms); if (err) RET_CG_ERR(err); } return CG_OK; } static int handle_parse_hdr(struct bpf_sock_ops *skops) { struct hdr_stg *hdr_stg; struct tcphdr *th; if (!skops->sk) RET_CG_ERR(0); th = skops->skb_data; if (th + 1 > skops->skb_data_end) RET_CG_ERR(0); hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); if (!hdr_stg) RET_CG_ERR(0); if (hdr_stg->resend_syn || hdr_stg->fastopen) /* The PARSE_ALL_HDR cb flag was turned on * to ensure that the previously written * options have reached the peer. * Those previously written option includes: * - Active side: resend_syn in ACK during syncookie * or * - Passive side: SYNACK during fastopen * * A valid packet has been received here after * the 3WHS, so the PARSE_ALL_HDR cb flag * can be cleared now. */ clear_parse_all_hdr_cb_flags(skops); if (hdr_stg->resend_syn && !active_fin_out.flags) /* Active side resent the syn option in ACK * because the server was in syncookie mode. * A valid packet has been received, so * clear header cb flags if there is no * more option to send. */ clear_hdr_cb_flags(skops); if (hdr_stg->fastopen && !passive_fin_out.flags) /* Passive side was in fastopen. * A valid packet has been received, so * the SYNACK has reached the peer. * Clear header cb flags if there is no more * option to send. */ clear_hdr_cb_flags(skops); if (th->fin) { struct bpf_test_option *fin_opt; int err; if (hdr_stg->active) fin_opt = &active_fin_in; else fin_opt = &passive_fin_in; err = load_option(skops, fin_opt, false); if (err && err != -ENOMSG) RET_CG_ERR(err); } return CG_OK; } SEC("sockops") int estab(struct bpf_sock_ops *skops) { int true_val = 1; switch (skops->op) { case BPF_SOCK_OPS_TCP_LISTEN_CB: bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN, &true_val, sizeof(true_val)); set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG); break; case BPF_SOCK_OPS_TCP_CONNECT_CB: set_hdr_cb_flags(skops, 0); break; case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: return handle_parse_hdr(skops); case BPF_SOCK_OPS_HDR_OPT_LEN_CB: return handle_hdr_opt_len(skops); case BPF_SOCK_OPS_WRITE_HDR_OPT_CB: return handle_write_hdr_opt(skops); case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: return handle_passive_estab(skops); case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: return handle_active_estab(skops); } return CG_OK; } char _license[] SEC("license") = "GPL";