// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2018 Chelsio Communications, Inc. * * Written by: Atul Gupta (atul.gupta@chelsio.com) */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "chtls.h" #include "chtls_cm.h" static bool is_tls_tx(struct chtls_sock *csk) { return csk->tlshws.txkey >= 0; } static bool is_tls_rx(struct chtls_sock *csk) { return csk->tlshws.rxkey >= 0; } static int data_sgl_len(const struct sk_buff *skb) { unsigned int cnt; cnt = skb_shinfo(skb)->nr_frags; return sgl_len(cnt) * 8; } static int nos_ivs(struct sock *sk, unsigned int size) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); return DIV_ROUND_UP(size, csk->tlshws.mfs); } static int set_ivs_imm(struct sock *sk, const struct sk_buff *skb) { int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE; int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb); if ((hlen + KEY_ON_MEM_SZ + ivs_size) < MAX_IMM_OFLD_TX_DATA_WR_LEN) { ULP_SKB_CB(skb)->ulp.tls.iv = 1; return 1; } ULP_SKB_CB(skb)->ulp.tls.iv = 0; return 0; } static int max_ivs_size(struct sock *sk, int size) { return nos_ivs(sk, size) * CIPHER_BLOCK_SIZE; } static int ivs_size(struct sock *sk, const struct sk_buff *skb) { return set_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE) : 0; } static int flowc_wr_credits(int nparams, int *flowclenp) { int flowclen16, flowclen; flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]); flowclen16 = DIV_ROUND_UP(flowclen, 16); flowclen = flowclen16 * 16; if (flowclenp) *flowclenp = flowclen; return flowclen16; } static struct sk_buff *create_flowc_wr_skb(struct sock *sk, struct fw_flowc_wr *flowc, int flowclen) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); struct sk_buff *skb; skb = alloc_skb(flowclen, GFP_ATOMIC); if (!skb) return NULL; __skb_put_data(skb, flowc, flowclen); skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA); return skb; } static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc, int flowclen) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int flowclen16; int ret; flowclen16 = flowclen / 16; if (csk_flag(sk, CSK_TX_DATA_SENT)) { skb = create_flowc_wr_skb(sk, flowc, flowclen); if (!skb) return -ENOMEM; skb_entail(sk, skb, ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND); return 0; } ret = cxgb4_immdata_send(csk->egress_dev, csk->txq_idx, flowc, flowclen); if (!ret) return flowclen16; skb = create_flowc_wr_skb(sk, flowc, flowclen); if (!skb) return -ENOMEM; send_or_defer(sk, tp, skb, 0); return flowclen16; } static u8 tcp_state_to_flowc_state(u8 state) { switch (state) { case TCP_ESTABLISHED: return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED; case TCP_CLOSE_WAIT: return FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT; case TCP_FIN_WAIT1: return FW_FLOWC_MNEM_TCPSTATE_FINWAIT1; case TCP_CLOSING: return FW_FLOWC_MNEM_TCPSTATE_CLOSING; case TCP_LAST_ACK: return FW_FLOWC_MNEM_TCPSTATE_LASTACK; case TCP_FIN_WAIT2: return FW_FLOWC_MNEM_TCPSTATE_FINWAIT2; } return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED; } int send_tx_flowc_wr(struct sock *sk, int compl, u32 snd_nxt, u32 rcv_nxt) { struct flowc_packed { struct fw_flowc_wr fc; struct fw_flowc_mnemval mnemval[FW_FLOWC_MNEM_MAX]; } __packed sflowc; int nparams, paramidx, flowclen16, flowclen; struct fw_flowc_wr *flowc; struct chtls_sock *csk; struct tcp_sock *tp; csk = rcu_dereference_sk_user_data(sk); tp = tcp_sk(sk); memset(&sflowc, 0, sizeof(sflowc)); flowc = &sflowc.fc; #define FLOWC_PARAM(__m, __v) \ do { \ flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \ flowc->mnemval[paramidx].val = cpu_to_be32(__v); \ paramidx++; \ } while (0) paramidx = 0; FLOWC_PARAM(PFNVFN, FW_PFVF_CMD_PFN_V(csk->cdev->lldi->pf)); FLOWC_PARAM(CH, csk->tx_chan); FLOWC_PARAM(PORT, csk->tx_chan); FLOWC_PARAM(IQID, csk->rss_qid); FLOWC_PARAM(SNDNXT, tp->snd_nxt); FLOWC_PARAM(RCVNXT, tp->rcv_nxt); FLOWC_PARAM(SNDBUF, csk->sndbuf); FLOWC_PARAM(MSS, tp->mss_cache); FLOWC_PARAM(TCPSTATE, tcp_state_to_flowc_state(sk->sk_state)); if (SND_WSCALE(tp)) FLOWC_PARAM(RCV_SCALE, SND_WSCALE(tp)); if (csk->ulp_mode == ULP_MODE_TLS) FLOWC_PARAM(ULD_MODE, ULP_MODE_TLS); if (csk->tlshws.fcplenmax) FLOWC_PARAM(TXDATAPLEN_MAX, csk->tlshws.fcplenmax); nparams = paramidx; #undef FLOWC_PARAM flowclen16 = flowc_wr_credits(nparams, &flowclen); flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | FW_WR_COMPL_V(compl) | FW_FLOWC_WR_NPARAMS_V(nparams)); flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) | FW_WR_FLOWID_V(csk->tid)); return send_flowc_wr(sk, flowc, flowclen); } /* Copy IVs to WR */ static int tls_copy_ivs(struct sock *sk, struct sk_buff *skb) { struct chtls_sock *csk; unsigned char *iv_loc; struct chtls_hws *hws; unsigned char *ivs; u16 number_of_ivs; struct page *page; int err = 0; csk = rcu_dereference_sk_user_data(sk); hws = &csk->tlshws; number_of_ivs = nos_ivs(sk, skb->len); if (number_of_ivs > MAX_IVS_PAGE) { pr_warn("MAX IVs in PAGE exceeded %d\n", number_of_ivs); return -ENOMEM; } /* generate the IVs */ ivs = kmalloc_array(CIPHER_BLOCK_SIZE, number_of_ivs, GFP_ATOMIC); if (!ivs) return -ENOMEM; get_random_bytes(ivs, number_of_ivs * CIPHER_BLOCK_SIZE); if (skb_ulp_tls_iv_imm(skb)) { /* send the IVs as immediate data in the WR */ iv_loc = (unsigned char *)__skb_push(skb, number_of_ivs * CIPHER_BLOCK_SIZE); if (iv_loc) memcpy(iv_loc, ivs, number_of_ivs * CIPHER_BLOCK_SIZE); hws->ivsize = number_of_ivs * CIPHER_BLOCK_SIZE; } else { /* Send the IVs as sgls */ /* Already accounted IV DSGL for credits */ skb_shinfo(skb)->nr_frags--; page = alloc_pages(sk->sk_allocation | __GFP_COMP, 0); if (!page) { pr_info("%s : Page allocation for IVs failed\n", __func__); err = -ENOMEM; goto out; } memcpy(page_address(page), ivs, number_of_ivs * CIPHER_BLOCK_SIZE); skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page, 0, number_of_ivs * CIPHER_BLOCK_SIZE); hws->ivsize = 0; } out: kfree(ivs); return err; } /* Copy Key to WR */ static void tls_copy_tx_key(struct sock *sk, struct sk_buff *skb) { struct ulptx_sc_memrd *sc_memrd; struct chtls_sock *csk; struct chtls_dev *cdev; struct ulptx_idata *sc; struct chtls_hws *hws; u32 immdlen; int kaddr; csk = rcu_dereference_sk_user_data(sk); hws = &csk->tlshws; cdev = csk->cdev; immdlen = sizeof(*sc) + sizeof(*sc_memrd); kaddr = keyid_to_addr(cdev->kmap.start, hws->txkey); sc = (struct ulptx_idata *)__skb_push(skb, immdlen); if (sc) { sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP)); sc->len = htonl(0); sc_memrd = (struct ulptx_sc_memrd *)(sc + 1); sc_memrd->cmd_to_len = htonl(ULPTX_CMD_V(ULP_TX_SC_MEMRD) | ULP_TX_SC_MORE_V(1) | ULPTX_LEN16_V(hws->keylen >> 4)); sc_memrd->addr = htonl(kaddr); } } static u64 tlstx_incr_seqnum(struct chtls_hws *hws) { return hws->tx_seq_no++; } static bool is_sg_request(const struct sk_buff *skb) { return skb->peeked || (skb->len > MAX_IMM_ULPTX_WR_LEN); } /* * Returns true if an sk_buff carries urgent data. */ static bool skb_urgent(struct sk_buff *skb) { return ULP_SKB_CB(skb)->flags & ULPCB_FLAG_URG; } /* TLS content type for CPL SFO */ static unsigned char tls_content_type(unsigned char content_type) { switch (content_type) { case TLS_HDR_TYPE_CCS: return CPL_TX_TLS_SFO_TYPE_CCS; case TLS_HDR_TYPE_ALERT: return CPL_TX_TLS_SFO_TYPE_ALERT; case TLS_HDR_TYPE_HANDSHAKE: return CPL_TX_TLS_SFO_TYPE_HANDSHAKE; case TLS_HDR_TYPE_HEARTBEAT: return CPL_TX_TLS_SFO_TYPE_HEARTBEAT; } return CPL_TX_TLS_SFO_TYPE_DATA; } static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb, int dlen, int tls_immd, u32 credits, int expn, int pdus) { struct fw_tlstx_data_wr *req_wr; struct cpl_tx_tls_sfo *req_cpl; unsigned int wr_ulp_mode_force; struct tls_scmd *updated_scmd; unsigned char data_type; struct chtls_sock *csk; struct net_device *dev; struct chtls_hws *hws; struct tls_scmd *scmd; struct adapter *adap; unsigned char *req; int immd_len; int iv_imm; int len; csk = rcu_dereference_sk_user_data(sk); iv_imm = skb_ulp_tls_iv_imm(skb); dev = csk->egress_dev; adap = netdev2adap(dev); hws = &csk->tlshws; scmd = &hws->scmd; len = dlen + expn; dlen = (dlen < hws->mfs) ? dlen : hws->mfs; atomic_inc(&adap->chcr_stats.tls_pdu_tx); updated_scmd = scmd; updated_scmd->seqno_numivs &= 0xffffff80; updated_scmd->seqno_numivs |= SCMD_NUM_IVS_V(pdus); hws->scmd = *updated_scmd; req = (unsigned char *)__skb_push(skb, sizeof(struct cpl_tx_tls_sfo)); req_cpl = (struct cpl_tx_tls_sfo *)req; req = (unsigned char *)__skb_push(skb, (sizeof(struct fw_tlstx_data_wr))); req_wr = (struct fw_tlstx_data_wr *)req; immd_len = (tls_immd ? dlen : 0); req_wr->op_to_immdlen = htonl(FW_WR_OP_V(FW_TLSTX_DATA_WR) | FW_TLSTX_DATA_WR_COMPL_V(1) | FW_TLSTX_DATA_WR_IMMDLEN_V(immd_len)); req_wr->flowid_len16 = htonl(FW_TLSTX_DATA_WR_FLOWID_V(csk->tid) | FW_TLSTX_DATA_WR_LEN16_V(credits)); wr_ulp_mode_force = TX_ULP_MODE_V(ULP_MODE_TLS); if (is_sg_request(skb)) wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F | ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 : FW_OFLD_TX_DATA_WR_SHOVE_F); req_wr->lsodisable_to_flags = htonl(TX_ULP_MODE_V(ULP_MODE_TLS) | TX_URG_V(skb_urgent(skb)) | T6_TX_FORCE_F | wr_ulp_mode_force | TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) && skb_queue_empty(&csk->txq))); req_wr->ctxloc_to_exp = htonl(FW_TLSTX_DATA_WR_NUMIVS_V(pdus) | FW_TLSTX_DATA_WR_EXP_V(expn) | FW_TLSTX_DATA_WR_CTXLOC_V(CHTLS_KEY_CONTEXT_DDR) | FW_TLSTX_DATA_WR_IVDSGL_V(!iv_imm) | FW_TLSTX_DATA_WR_KEYSIZE_V(hws->keylen >> 4)); /* Fill in the length */ req_wr->plen = htonl(len); req_wr->mfs = htons(hws->mfs); req_wr->adjustedplen_pkd = htons(FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(hws->adjustlen)); req_wr->expinplenmax_pkd = htons(FW_TLSTX_DATA_WR_EXPINPLENMAX_V(hws->expansion)); req_wr->pdusinplenmax_pkd = FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(hws->pdus); req_wr->r10 = 0; data_type = tls_content_type(ULP_SKB_CB(skb)->ulp.tls.type); req_cpl->op_to_seg_len = htonl(CPL_TX_TLS_SFO_OPCODE_V(CPL_TX_TLS_SFO) | CPL_TX_TLS_SFO_DATA_TYPE_V(data_type) | CPL_TX_TLS_SFO_CPL_LEN_V(2) | CPL_TX_TLS_SFO_SEG_LEN_V(dlen)); req_cpl->pld_len = htonl(len - expn); req_cpl->type_protover = htonl(CPL_TX_TLS_SFO_TYPE_V ((data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) ? TLS_HDR_TYPE_HEARTBEAT : 0) | CPL_TX_TLS_SFO_PROTOVER_V(0)); /* create the s-command */ req_cpl->r1_lo = 0; req_cpl->seqno_numivs = cpu_to_be32(hws->scmd.seqno_numivs); req_cpl->ivgen_hdrlen = cpu_to_be32(hws->scmd.ivgen_hdrlen); req_cpl->scmd1 = cpu_to_be64(tlstx_incr_seqnum(hws)); } /* * Calculate the TLS data expansion size */ static int chtls_expansion_size(struct sock *sk, int data_len, int fullpdu, unsigned short *pducnt) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); struct chtls_hws *hws = &csk->tlshws; struct tls_scmd *scmd = &hws->scmd; int fragsize = hws->mfs; int expnsize = 0; int fragleft; int fragcnt; int expppdu; if (SCMD_CIPH_MODE_G(scmd->seqno_numivs) == SCMD_CIPH_MODE_AES_GCM) { expppdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE + TLS_HEADER_LENGTH; if (fullpdu) { *pducnt = data_len / (expppdu + fragsize); if (*pducnt > 32) *pducnt = 32; else if (!*pducnt) *pducnt = 1; expnsize = (*pducnt) * expppdu; return expnsize; } fragcnt = (data_len / fragsize); expnsize = fragcnt * expppdu; fragleft = data_len % fragsize; if (fragleft > 0) expnsize += expppdu; } return expnsize; } /* WR with IV, KEY and CPL SFO added */ static void make_tlstx_data_wr(struct sock *sk, struct sk_buff *skb, int tls_tx_imm, int tls_len, u32 credits) { unsigned short pdus_per_ulp = 0; struct chtls_sock *csk; struct chtls_hws *hws; int expn_sz; int pdus; csk = rcu_dereference_sk_user_data(sk); hws = &csk->tlshws; pdus = DIV_ROUND_UP(tls_len, hws->mfs); expn_sz = chtls_expansion_size(sk, tls_len, 0, NULL); if (!hws->compute) { hws->expansion = chtls_expansion_size(sk, hws->fcplenmax, 1, &pdus_per_ulp); hws->pdus = pdus_per_ulp; hws->adjustlen = hws->pdus * ((hws->expansion / hws->pdus) + hws->mfs); hws->compute = 1; } if (tls_copy_ivs(sk, skb)) return; tls_copy_tx_key(sk, skb); tls_tx_data_wr(sk, skb, tls_len, tls_tx_imm, credits, expn_sz, pdus); hws->tx_seq_no += (pdus - 1); } static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb, unsigned int immdlen, int len, u32 credits, u32 compl) { struct fw_ofld_tx_data_wr *req; unsigned int wr_ulp_mode_force; struct chtls_sock *csk; unsigned int opcode; csk = rcu_dereference_sk_user_data(sk); opcode = FW_OFLD_TX_DATA_WR; req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req)); req->op_to_immdlen = htonl(WR_OP_V(opcode) | FW_WR_COMPL_V(compl) | FW_WR_IMMDLEN_V(immdlen)); req->flowid_len16 = htonl(FW_WR_FLOWID_V(csk->tid) | FW_WR_LEN16_V(credits)); wr_ulp_mode_force = TX_ULP_MODE_V(csk->ulp_mode); if (is_sg_request(skb)) wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F | ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 : FW_OFLD_TX_DATA_WR_SHOVE_F); req->tunnel_to_proxy = htonl(wr_ulp_mode_force | TX_URG_V(skb_urgent(skb)) | TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) && skb_queue_empty(&csk->txq))); req->plen = htonl(len); } static int chtls_wr_size(struct chtls_sock *csk, const struct sk_buff *skb, bool size) { int wr_size; wr_size = TLS_WR_CPL_LEN; wr_size += KEY_ON_MEM_SZ; wr_size += ivs_size(csk->sk, skb); if (size) return wr_size; /* frags counted for IV dsgl */ if (!skb_ulp_tls_iv_imm(skb)) skb_shinfo(skb)->nr_frags++; return wr_size; } static bool is_ofld_imm(struct chtls_sock *csk, const struct sk_buff *skb) { int length = skb->len; if (skb->peeked || skb->len > MAX_IMM_ULPTX_WR_LEN) return false; if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) { /* Check TLS header len for Immediate */ if (csk->ulp_mode == ULP_MODE_TLS && skb_ulp_tls_inline(skb)) length += chtls_wr_size(csk, skb, true); else length += sizeof(struct fw_ofld_tx_data_wr); return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN; } return true; } static unsigned int calc_tx_flits(const struct sk_buff *skb, unsigned int immdlen) { unsigned int flits, cnt; flits = immdlen / 8; /* headers */ cnt = skb_shinfo(skb)->nr_frags; if (skb_tail_pointer(skb) != skb_transport_header(skb)) cnt++; return flits + sgl_len(cnt); } static void arp_failure_discard(void *handle, struct sk_buff *skb) { kfree_skb(skb); } int chtls_push_frames(struct chtls_sock *csk, int comp) { struct chtls_hws *hws = &csk->tlshws; struct tcp_sock *tp; struct sk_buff *skb; int total_size = 0; struct sock *sk; int wr_size; wr_size = sizeof(struct fw_ofld_tx_data_wr); sk = csk->sk; tp = tcp_sk(sk); if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE))) return 0; if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN))) return 0; while (csk->wr_credits && (skb = skb_peek(&csk->txq)) && (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_HOLD) || skb_queue_len(&csk->txq) > 1)) { unsigned int credit_len = skb->len; unsigned int credits_needed; unsigned int completion = 0; int tls_len = skb->len;/* TLS data len before IV/key */ unsigned int immdlen; int len = skb->len; /* length [ulp bytes] inserted by hw */ int flowclen16 = 0; int tls_tx_imm = 0; immdlen = skb->len; if (!is_ofld_imm(csk, skb)) { immdlen = skb_transport_offset(skb); if (skb_ulp_tls_inline(skb)) wr_size = chtls_wr_size(csk, skb, false); credit_len = 8 * calc_tx_flits(skb, immdlen); } else { if (skb_ulp_tls_inline(skb)) { wr_size = chtls_wr_size(csk, skb, false); tls_tx_imm = 1; } } if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) credit_len += wr_size; credits_needed = DIV_ROUND_UP(credit_len, 16); if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) { flowclen16 = send_tx_flowc_wr(sk, 1, tp->snd_nxt, tp->rcv_nxt); if (flowclen16 <= 0) break; csk->wr_credits -= flowclen16; csk->wr_unacked += flowclen16; csk->wr_nondata += flowclen16; csk_set_flag(csk, CSK_TX_DATA_SENT); } if (csk->wr_credits < credits_needed) { if (skb_ulp_tls_inline(skb) && !skb_ulp_tls_iv_imm(skb)) skb_shinfo(skb)->nr_frags--; break; } __skb_unlink(skb, &csk->txq); skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA); if (hws->ofld) hws->txqid = (skb->queue_mapping >> 1); skb->csum = (__force __wsum)(credits_needed + csk->wr_nondata); csk->wr_credits -= credits_needed; csk->wr_unacked += credits_needed; csk->wr_nondata = 0; enqueue_wr(csk, skb); if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) { if ((comp && csk->wr_unacked == credits_needed) || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) || csk->wr_unacked >= csk->wr_max_credits / 2) { completion = 1; csk->wr_unacked = 0; } if (skb_ulp_tls_inline(skb)) make_tlstx_data_wr(sk, skb, tls_tx_imm, tls_len, credits_needed); else make_tx_data_wr(sk, skb, immdlen, len, credits_needed, completion); tp->snd_nxt += len; tp->lsndtime = tcp_jiffies32; if (completion) ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR; } else { struct cpl_close_con_req *req = cplhdr(skb); unsigned int cmd = CPL_OPCODE_G(ntohl (OPCODE_TID(req))); if (cmd == CPL_CLOSE_CON_REQ) csk_set_flag(csk, CSK_CLOSE_CON_REQUESTED); if ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) && (csk->wr_unacked >= csk->wr_max_credits / 2)) { req->wr.wr_hi |= htonl(FW_WR_COMPL_F); csk->wr_unacked = 0; } } total_size += skb->truesize; if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_BARRIER) csk_set_flag(csk, CSK_TX_WAIT_IDLE); t4_set_arp_err_handler(skb, NULL, arp_failure_discard); cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry); } sk->sk_wmem_queued -= total_size; return total_size; } static void mark_urg(struct tcp_sock *tp, int flags, struct sk_buff *skb) { if (unlikely(flags & MSG_OOB)) { tp->snd_up = tp->write_seq; ULP_SKB_CB(skb)->flags = ULPCB_FLAG_URG | ULPCB_FLAG_BARRIER | ULPCB_FLAG_NO_APPEND | ULPCB_FLAG_NEED_HDR; } } /* * Returns true if a connection should send more data to TCP engine */ static bool should_push(struct sock *sk) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); struct chtls_dev *cdev = csk->cdev; struct tcp_sock *tp = tcp_sk(sk); /* * If we've released our offload resources there's nothing to do ... */ if (!cdev) return false; /* * If there aren't any work requests in flight, or there isn't enough * data in flight, or Nagle is off then send the current TX_DATA * otherwise hold it and wait to accumulate more data. */ return csk->wr_credits == csk->wr_max_credits || (tp->nonagle & TCP_NAGLE_OFF); } /* * Returns true if a TCP socket is corked. */ static bool corked(const struct tcp_sock *tp, int flags) { return (flags & MSG_MORE) || (tp->nonagle & TCP_NAGLE_CORK); } /* * Returns true if a send should try to push new data. */ static bool send_should_push(struct sock *sk, int flags) { return should_push(sk) && !corked(tcp_sk(sk), flags); } void chtls_tcp_push(struct sock *sk, int flags) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); int qlen = skb_queue_len(&csk->txq); if (likely(qlen)) { struct sk_buff *skb = skb_peek_tail(&csk->txq); struct tcp_sock *tp = tcp_sk(sk); mark_urg(tp, flags, skb); if (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) && corked(tp, flags)) { ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_HOLD; return; } ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_HOLD; if (qlen == 1 && ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || should_push(sk))) chtls_push_frames(csk, 1); } } /* * Calculate the size for a new send sk_buff. It's maximum size so we can * pack lots of data into it, unless we plan to send it immediately, in which * case we size it more tightly. * * Note: we don't bother compensating for MSS < PAGE_SIZE because it doesn't * arise in normal cases and when it does we are just wasting memory. */ static int select_size(struct sock *sk, int io_len, int flags, int len) { const int pgbreak = SKB_MAX_HEAD(len); /* * If the data wouldn't fit in the main body anyway, put only the * header in the main body so it can use immediate data and place all * the payload in page fragments. */ if (io_len > pgbreak) return 0; /* * If we will be accumulating payload get a large main body. */ if (!send_should_push(sk, flags)) return pgbreak; return io_len; } void skb_entail(struct sock *sk, struct sk_buff *skb, int flags) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); struct tcp_sock *tp = tcp_sk(sk); ULP_SKB_CB(skb)->seq = tp->write_seq; ULP_SKB_CB(skb)->flags = flags; __skb_queue_tail(&csk->txq, skb); sk->sk_wmem_queued += skb->truesize; if (TCP_PAGE(sk) && TCP_OFF(sk)) { put_page(TCP_PAGE(sk)); TCP_PAGE(sk) = NULL; TCP_OFF(sk) = 0; } } static struct sk_buff *get_tx_skb(struct sock *sk, int size) { struct sk_buff *skb; skb = alloc_skb(size + TX_HEADER_LEN, sk->sk_allocation); if (likely(skb)) { skb_reserve(skb, TX_HEADER_LEN); skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR); skb_reset_transport_header(skb); } return skb; } static struct sk_buff *get_record_skb(struct sock *sk, int size, bool zcopy) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); struct sk_buff *skb; skb = alloc_skb(((zcopy ? 0 : size) + TX_TLSHDR_LEN + KEY_ON_MEM_SZ + max_ivs_size(sk, size)), sk->sk_allocation); if (likely(skb)) { skb_reserve(skb, (TX_TLSHDR_LEN + KEY_ON_MEM_SZ + max_ivs_size(sk, size))); skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR); skb_reset_transport_header(skb); ULP_SKB_CB(skb)->ulp.tls.ofld = 1; ULP_SKB_CB(skb)->ulp.tls.type = csk->tlshws.type; } return skb; } static void tx_skb_finalize(struct sk_buff *skb) { struct ulp_skb_cb *cb = ULP_SKB_CB(skb); if (!(cb->flags & ULPCB_FLAG_NO_HDR)) cb->flags = ULPCB_FLAG_NEED_HDR; cb->flags |= ULPCB_FLAG_NO_APPEND; } static void push_frames_if_head(struct sock *sk) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); if (skb_queue_len(&csk->txq) == 1) chtls_push_frames(csk, 1); } static int chtls_skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *from, struct sk_buff *skb, struct page *page, int off, int copy) { int err; err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) + off, copy, skb->len); if (err) return err; skb->len += copy; skb->data_len += copy; skb->truesize += copy; sk->sk_wmem_queued += copy; return 0; } /* Read TLS header to find content type and data length */ static int tls_header_read(struct tls_hdr *thdr, struct iov_iter *from) { if (copy_from_iter(thdr, sizeof(*thdr), from) != sizeof(*thdr)) return -EFAULT; return (__force int)cpu_to_be16(thdr->length); } static int csk_mem_free(struct chtls_dev *cdev, struct sock *sk) { return (cdev->max_host_sndbuf - sk->sk_wmem_queued); } static int csk_wait_memory(struct chtls_dev *cdev, struct sock *sk, long *timeo_p) { DEFINE_WAIT_FUNC(wait, woken_wake_function); int err = 0; long current_timeo; long vm_wait = 0; bool noblock; current_timeo = *timeo_p; noblock = (*timeo_p ? false : true); if (csk_mem_free(cdev, sk)) { current_timeo = (prandom_u32() % (HZ / 5)) + 2; vm_wait = (prandom_u32() % (HZ / 5)) + 2; } add_wait_queue(sk_sleep(sk), &wait); while (1) { sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; if (!*timeo_p) { if (noblock) set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); goto do_nonblock; } if (signal_pending(current)) goto do_interrupted; sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); if (csk_mem_free(cdev, sk) && !vm_wait) break; set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; sk_wait_event(sk, ¤t_timeo, sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN) || (csk_mem_free(cdev, sk) && !vm_wait), &wait); sk->sk_write_pending--; if (vm_wait) { vm_wait -= current_timeo; current_timeo = *timeo_p; if (current_timeo != MAX_SCHEDULE_TIMEOUT) { current_timeo -= vm_wait; if (current_timeo < 0) current_timeo = 0; } vm_wait = 0; } *timeo_p = current_timeo; } do_rm_wq: remove_wait_queue(sk_sleep(sk), &wait); return err; do_error: err = -EPIPE; goto do_rm_wq; do_nonblock: err = -EAGAIN; goto do_rm_wq; do_interrupted: err = sock_intr_errno(*timeo_p); goto do_rm_wq; } int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); struct chtls_dev *cdev = csk->cdev; struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int mss, flags, err; int recordsz = 0; int copied = 0; long timeo; lock_sock(sk); flags = msg->msg_flags; timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { err = sk_stream_wait_connect(sk, &timeo); if (err) goto out_err; } sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); err = -EPIPE; if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto out_err; mss = csk->mss; csk_set_flag(csk, CSK_TX_MORE_DATA); while (msg_data_left(msg)) { int copy = 0; skb = skb_peek_tail(&csk->txq); if (skb) { copy = mss - skb->len; skb->ip_summed = CHECKSUM_UNNECESSARY; } if (!csk_mem_free(cdev, sk)) goto wait_for_sndbuf; if (is_tls_tx(csk) && !csk->tlshws.txleft) { struct tls_hdr hdr; recordsz = tls_header_read(&hdr, &msg->msg_iter); size -= TLS_HEADER_LENGTH; copied += TLS_HEADER_LENGTH; csk->tlshws.txleft = recordsz; csk->tlshws.type = hdr.type; if (skb) ULP_SKB_CB(skb)->ulp.tls.type = hdr.type; } if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || copy <= 0) { new_buf: if (skb) { tx_skb_finalize(skb); push_frames_if_head(sk); } if (is_tls_tx(csk)) { skb = get_record_skb(sk, select_size(sk, recordsz, flags, TX_TLSHDR_LEN), false); } else { skb = get_tx_skb(sk, select_size(sk, size, flags, TX_HEADER_LEN)); } if (unlikely(!skb)) goto wait_for_memory; skb->ip_summed = CHECKSUM_UNNECESSARY; copy = mss; } if (copy > size) copy = size; if (skb_tailroom(skb) > 0) { copy = min(copy, skb_tailroom(skb)); if (is_tls_tx(csk)) copy = min_t(int, copy, csk->tlshws.txleft); err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); if (err) goto do_fault; } else { int i = skb_shinfo(skb)->nr_frags; struct page *page = TCP_PAGE(sk); int pg_size = PAGE_SIZE; int off = TCP_OFF(sk); bool merge; if (page) pg_size = page_size(page); if (off < pg_size && skb_can_coalesce(skb, i, page, off)) { merge = 1; goto copy; } merge = 0; if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) : MAX_SKB_FRAGS)) goto new_buf; if (page && off == pg_size) { put_page(page); TCP_PAGE(sk) = page = NULL; pg_size = PAGE_SIZE; } if (!page) { gfp_t gfp = sk->sk_allocation; int order = cdev->send_page_order; if (order) { page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY, order); if (page) pg_size <<= order; } if (!page) { page = alloc_page(gfp); pg_size = PAGE_SIZE; } if (!page) goto wait_for_memory; off = 0; } copy: if (copy > pg_size - off) copy = pg_size - off; if (is_tls_tx(csk)) copy = min_t(int, copy, csk->tlshws.txleft); err = chtls_skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, page, off, copy); if (unlikely(err)) { if (!TCP_PAGE(sk)) { TCP_PAGE(sk) = page; TCP_OFF(sk) = 0; } goto do_fault; } /* Update the skb. */ if (merge) { skb_frag_size_add( &skb_shinfo(skb)->frags[i - 1], copy); } else { skb_fill_page_desc(skb, i, page, off, copy); if (off + copy < pg_size) { /* space left keep page */ get_page(page); TCP_PAGE(sk) = page; } else { TCP_PAGE(sk) = NULL; } } TCP_OFF(sk) = off + copy; } if (unlikely(skb->len == mss)) tx_skb_finalize(skb); tp->write_seq += copy; copied += copy; size -= copy; if (is_tls_tx(csk)) csk->tlshws.txleft -= copy; if (corked(tp, flags) && (sk_stream_wspace(sk) < sk_stream_min_wspace(sk))) ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND; if (size == 0) goto out; if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) push_frames_if_head(sk); continue; wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: err = csk_wait_memory(cdev, sk, &timeo); if (err) goto do_error; } out: csk_reset_flag(csk, CSK_TX_MORE_DATA); if (copied) chtls_tcp_push(sk, flags); done: release_sock(sk); return copied; do_fault: if (!skb->len) { __skb_unlink(skb, &csk->txq); sk->sk_wmem_queued -= skb->truesize; __kfree_skb(skb); } do_error: if (copied) goto out; out_err: if (csk_conn_inline(csk)) csk_reset_flag(csk, CSK_TX_MORE_DATA); copied = sk_stream_error(sk, flags, err); goto done; } int chtls_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags) { struct chtls_sock *csk; struct chtls_dev *cdev; int mss, err, copied; struct tcp_sock *tp; long timeo; tp = tcp_sk(sk); copied = 0; csk = rcu_dereference_sk_user_data(sk); cdev = csk->cdev; timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); err = sk_stream_wait_connect(sk, &timeo); if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && err != 0) goto out_err; mss = csk->mss; csk_set_flag(csk, CSK_TX_MORE_DATA); while (size > 0) { struct sk_buff *skb = skb_peek_tail(&csk->txq); int copy, i; if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || (copy = mss - skb->len) <= 0) { new_buf: if (!csk_mem_free(cdev, sk)) goto wait_for_sndbuf; if (is_tls_tx(csk)) { skb = get_record_skb(sk, select_size(sk, size, flags, TX_TLSHDR_LEN), true); } else { skb = get_tx_skb(sk, 0); } if (!skb) goto wait_for_memory; copy = mss; } if (copy > size) copy = size; i = skb_shinfo(skb)->nr_frags; if (skb_can_coalesce(skb, i, page, offset)) { skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else if (i < MAX_SKB_FRAGS) { get_page(page); skb_fill_page_desc(skb, i, page, offset, copy); } else { tx_skb_finalize(skb); push_frames_if_head(sk); goto new_buf; } skb->len += copy; if (skb->len == mss) tx_skb_finalize(skb); skb->data_len += copy; skb->truesize += copy; sk->sk_wmem_queued += copy; tp->write_seq += copy; copied += copy; offset += copy; size -= copy; if (corked(tp, flags) && (sk_stream_wspace(sk) < sk_stream_min_wspace(sk))) ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND; if (!size) break; if (unlikely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND)) push_frames_if_head(sk); continue; wait_for_sndbuf: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); wait_for_memory: err = csk_wait_memory(cdev, sk, &timeo); if (err) goto do_error; } out: csk_reset_flag(csk, CSK_TX_MORE_DATA); if (copied) chtls_tcp_push(sk, flags); done: release_sock(sk); return copied; do_error: if (copied) goto out; out_err: if (csk_conn_inline(csk)) csk_reset_flag(csk, CSK_TX_MORE_DATA); copied = sk_stream_error(sk, flags, err); goto done; } static void chtls_select_window(struct sock *sk) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); struct tcp_sock *tp = tcp_sk(sk); unsigned int wnd = tp->rcv_wnd; wnd = max_t(unsigned int, wnd, tcp_full_space(sk)); wnd = max_t(unsigned int, MIN_RCV_WND, wnd); if (wnd > MAX_RCV_WND) wnd = MAX_RCV_WND; /* * Check if we need to grow the receive window in response to an increase in * the socket's receive buffer size. Some applications increase the buffer * size dynamically and rely on the window to grow accordingly. */ if (wnd > tp->rcv_wnd) { tp->rcv_wup -= wnd - tp->rcv_wnd; tp->rcv_wnd = wnd; /* Mark the receive window as updated */ csk_reset_flag(csk, CSK_UPDATE_RCV_WND); } } /* * Send RX credits through an RX_DATA_ACK CPL message. We are permitted * to return without sending the message in case we cannot allocate * an sk_buff. Returns the number of credits sent. */ static u32 send_rx_credits(struct chtls_sock *csk, u32 credits) { struct cpl_rx_data_ack *req; struct sk_buff *skb; skb = alloc_skb(sizeof(*req), GFP_ATOMIC); if (!skb) return 0; __skb_put(skb, sizeof(*req)); req = (struct cpl_rx_data_ack *)skb->head; set_wr_txq(skb, CPL_PRIORITY_ACK, csk->port_id); INIT_TP_WR(req, csk->tid); OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, csk->tid)); req->credit_dack = cpu_to_be32(RX_CREDITS_V(credits) | RX_FORCE_ACK_F); cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb); return credits; } #define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | \ TCPF_FIN_WAIT1 | \ TCPF_FIN_WAIT2) /* * Called after some received data has been read. It returns RX credits * to the HW for the amount of data processed. */ static void chtls_cleanup_rbuf(struct sock *sk, int copied) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); struct tcp_sock *tp; int must_send; u32 credits; u32 thres; thres = 15 * 1024; if (!sk_in_state(sk, CREDIT_RETURN_STATE)) return; chtls_select_window(sk); tp = tcp_sk(sk); credits = tp->copied_seq - tp->rcv_wup; if (unlikely(!credits)) return; /* * For coalescing to work effectively ensure the receive window has * at least 16KB left. */ must_send = credits + 16384 >= tp->rcv_wnd; if (must_send || credits >= thres) tp->rcv_wup += send_rx_credits(csk, credits); } static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); struct chtls_hws *hws = &csk->tlshws; struct tcp_sock *tp = tcp_sk(sk); unsigned long avail; int buffers_freed; int copied = 0; int target; long timeo; buffers_freed = 0; timeo = sock_rcvtimeo(sk, nonblock); target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND))) chtls_cleanup_rbuf(sk, copied); do { struct sk_buff *skb; u32 offset = 0; if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) { if (copied) break; if (signal_pending(current)) { copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; break; } } skb = skb_peek(&sk->sk_receive_queue); if (skb) goto found_ok_skb; if (csk->wr_credits && skb_queue_len(&csk->txq) && chtls_push_frames(csk, csk->wr_credits == csk->wr_max_credits)) sk->sk_write_space(sk); if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) break; if (copied) { if (sk->sk_err || sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN) || signal_pending(current)) break; if (!timeo) break; } else { if (sock_flag(sk, SOCK_DONE)) break; if (sk->sk_err) { copied = sock_error(sk); break; } if (sk->sk_shutdown & RCV_SHUTDOWN) break; if (sk->sk_state == TCP_CLOSE) { copied = -ENOTCONN; break; } if (!timeo) { copied = -EAGAIN; break; } if (signal_pending(current)) { copied = sock_intr_errno(timeo); break; } } if (READ_ONCE(sk->sk_backlog.tail)) { release_sock(sk); lock_sock(sk); chtls_cleanup_rbuf(sk, copied); continue; } if (copied >= target) break; chtls_cleanup_rbuf(sk, copied); sk_wait_data(sk, &timeo, NULL); continue; found_ok_skb: if (!skb->len) { skb_dst_set(skb, NULL); __skb_unlink(skb, &sk->sk_receive_queue); kfree_skb(skb); if (!copied && !timeo) { copied = -EAGAIN; break; } if (copied < target) { release_sock(sk); lock_sock(sk); continue; } break; } offset = hws->copied_seq; avail = skb->len - offset; if (len < avail) avail = len; if (unlikely(tp->urg_data)) { u32 urg_offset = tp->urg_seq - tp->copied_seq; if (urg_offset < avail) { if (urg_offset) { avail = urg_offset; } else if (!sock_flag(sk, SOCK_URGINLINE)) { /* First byte is urgent, skip */ tp->copied_seq++; offset++; avail--; if (!avail) goto skip_copy; } } } if (skb_copy_datagram_msg(skb, offset, msg, avail)) { if (!copied) { copied = -EFAULT; break; } } copied += avail; len -= avail; hws->copied_seq += avail; skip_copy: if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) tp->urg_data = 0; if ((avail + offset) >= skb->len) { if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) { tp->copied_seq += skb->len; hws->rcvpld = skb->hdr_len; } else { tp->copied_seq += hws->rcvpld; } chtls_free_skb(sk, skb); buffers_freed++; hws->copied_seq = 0; if (copied >= target && !skb_peek(&sk->sk_receive_queue)) break; } } while (len > 0); if (buffers_freed) chtls_cleanup_rbuf(sk, copied); release_sock(sk); return copied; } /* * Peek at data in a socket's receive buffer. */ static int peekmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags) { struct tcp_sock *tp = tcp_sk(sk); u32 peek_seq, offset; struct sk_buff *skb; int copied = 0; size_t avail; /* amount of available data in current skb */ long timeo; lock_sock(sk); timeo = sock_rcvtimeo(sk, nonblock); peek_seq = tp->copied_seq; do { if (unlikely(tp->urg_data && tp->urg_seq == peek_seq)) { if (copied) break; if (signal_pending(current)) { copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; break; } } skb_queue_walk(&sk->sk_receive_queue, skb) { offset = peek_seq - ULP_SKB_CB(skb)->seq; if (offset < skb->len) goto found_ok_skb; } /* empty receive queue */ if (copied) break; if (sock_flag(sk, SOCK_DONE)) break; if (sk->sk_err) { copied = sock_error(sk); break; } if (sk->sk_shutdown & RCV_SHUTDOWN) break; if (sk->sk_state == TCP_CLOSE) { copied = -ENOTCONN; break; } if (!timeo) { copied = -EAGAIN; break; } if (signal_pending(current)) { copied = sock_intr_errno(timeo); break; } if (READ_ONCE(sk->sk_backlog.tail)) { /* Do not sleep, just process backlog. */ release_sock(sk); lock_sock(sk); } else { sk_wait_data(sk, &timeo, NULL); } if (unlikely(peek_seq != tp->copied_seq)) { if (net_ratelimit()) pr_info("TCP(%s:%d), race in MSG_PEEK.\n", current->comm, current->pid); peek_seq = tp->copied_seq; } continue; found_ok_skb: avail = skb->len - offset; if (len < avail) avail = len; /* * Do we have urgent data here? We need to skip over the * urgent byte. */ if (unlikely(tp->urg_data)) { u32 urg_offset = tp->urg_seq - peek_seq; if (urg_offset < avail) { /* * The amount of data we are preparing to copy * contains urgent data. */ if (!urg_offset) { /* First byte is urgent */ if (!sock_flag(sk, SOCK_URGINLINE)) { peek_seq++; offset++; avail--; } if (!avail) continue; } else { /* stop short of the urgent data */ avail = urg_offset; } } } /* * If MSG_TRUNC is specified the data is discarded. */ if (likely(!(flags & MSG_TRUNC))) if (skb_copy_datagram_msg(skb, offset, msg, len)) { if (!copied) { copied = -EFAULT; break; } } peek_seq += avail; copied += avail; len -= avail; } while (len > 0); release_sock(sk); return copied; } int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { struct tcp_sock *tp = tcp_sk(sk); struct chtls_sock *csk; unsigned long avail; /* amount of available data in current skb */ int buffers_freed; int copied = 0; long timeo; int target; /* Read at least this many bytes */ buffers_freed = 0; if (unlikely(flags & MSG_OOB)) return tcp_prot.recvmsg(sk, msg, len, nonblock, flags, addr_len); if (unlikely(flags & MSG_PEEK)) return peekmsg(sk, msg, len, nonblock, flags); if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) && sk->sk_state == TCP_ESTABLISHED) sk_busy_loop(sk, nonblock); lock_sock(sk); csk = rcu_dereference_sk_user_data(sk); if (is_tls_rx(csk)) return chtls_pt_recvmsg(sk, msg, len, nonblock, flags, addr_len); timeo = sock_rcvtimeo(sk, nonblock); target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND))) chtls_cleanup_rbuf(sk, copied); do { struct sk_buff *skb; u32 offset; if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) { if (copied) break; if (signal_pending(current)) { copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; break; } } skb = skb_peek(&sk->sk_receive_queue); if (skb) goto found_ok_skb; if (csk->wr_credits && skb_queue_len(&csk->txq) && chtls_push_frames(csk, csk->wr_credits == csk->wr_max_credits)) sk->sk_write_space(sk); if (copied >= target && !READ_ONCE(sk->sk_backlog.tail)) break; if (copied) { if (sk->sk_err || sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN) || signal_pending(current)) break; } else { if (sock_flag(sk, SOCK_DONE)) break; if (sk->sk_err) { copied = sock_error(sk); break; } if (sk->sk_shutdown & RCV_SHUTDOWN) break; if (sk->sk_state == TCP_CLOSE) { copied = -ENOTCONN; break; } if (!timeo) { copied = -EAGAIN; break; } if (signal_pending(current)) { copied = sock_intr_errno(timeo); break; } } if (READ_ONCE(sk->sk_backlog.tail)) { release_sock(sk); lock_sock(sk); chtls_cleanup_rbuf(sk, copied); continue; } if (copied >= target) break; chtls_cleanup_rbuf(sk, copied); sk_wait_data(sk, &timeo, NULL); continue; found_ok_skb: if (!skb->len) { chtls_kfree_skb(sk, skb); if (!copied && !timeo) { copied = -EAGAIN; break; } if (copied < target) continue; break; } offset = tp->copied_seq - ULP_SKB_CB(skb)->seq; avail = skb->len - offset; if (len < avail) avail = len; if (unlikely(tp->urg_data)) { u32 urg_offset = tp->urg_seq - tp->copied_seq; if (urg_offset < avail) { if (urg_offset) { avail = urg_offset; } else if (!sock_flag(sk, SOCK_URGINLINE)) { tp->copied_seq++; offset++; avail--; if (!avail) goto skip_copy; } } } if (likely(!(flags & MSG_TRUNC))) { if (skb_copy_datagram_msg(skb, offset, msg, avail)) { if (!copied) { copied = -EFAULT; break; } } } tp->copied_seq += avail; copied += avail; len -= avail; skip_copy: if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) tp->urg_data = 0; if (avail + offset >= skb->len) { chtls_free_skb(sk, skb); buffers_freed++; if (copied >= target && !skb_peek(&sk->sk_receive_queue)) break; } } while (len > 0); if (buffers_freed) chtls_cleanup_rbuf(sk, copied); release_sock(sk); return copied; }