2 * Copyright (c) 2018 Chelsio Communications, Inc.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * Written by: Atul Gupta (atul.gupta@chelsio.com)
11 #include <linux/module.h>
12 #include <linux/list.h>
13 #include <linux/workqueue.h>
14 #include <linux/skbuff.h>
15 #include <linux/timer.h>
16 #include <linux/notifier.h>
17 #include <linux/inetdevice.h>
19 #include <linux/tcp.h>
20 #include <linux/sched/signal.h>
21 #include <linux/kallsyms.h>
22 #include <linux/kprobes.h>
23 #include <linux/if_vlan.h>
31 * State transitions and actions for close. Note that if we are in SYN_SENT
32 * we remain in that state as we cannot control a connection while it's in
33 * SYN_SENT; such connections are allowed to establish and are then aborted.
35 static unsigned char new_state
[16] = {
36 /* current state: new state: action: */
37 /* (Invalid) */ TCP_CLOSE
,
38 /* TCP_ESTABLISHED */ TCP_FIN_WAIT1
| TCP_ACTION_FIN
,
39 /* TCP_SYN_SENT */ TCP_SYN_SENT
,
40 /* TCP_SYN_RECV */ TCP_FIN_WAIT1
| TCP_ACTION_FIN
,
41 /* TCP_FIN_WAIT1 */ TCP_FIN_WAIT1
,
42 /* TCP_FIN_WAIT2 */ TCP_FIN_WAIT2
,
43 /* TCP_TIME_WAIT */ TCP_CLOSE
,
44 /* TCP_CLOSE */ TCP_CLOSE
,
45 /* TCP_CLOSE_WAIT */ TCP_LAST_ACK
| TCP_ACTION_FIN
,
46 /* TCP_LAST_ACK */ TCP_LAST_ACK
,
47 /* TCP_LISTEN */ TCP_CLOSE
,
48 /* TCP_CLOSING */ TCP_CLOSING
,
51 static struct chtls_sock
*chtls_sock_create(struct chtls_dev
*cdev
)
53 struct chtls_sock
*csk
= kzalloc(sizeof(*csk
), GFP_ATOMIC
);
58 csk
->txdata_skb_cache
= alloc_skb(TXDATA_SKB_LEN
, GFP_ATOMIC
);
59 if (!csk
->txdata_skb_cache
) {
64 kref_init(&csk
->kref
);
66 skb_queue_head_init(&csk
->txq
);
67 csk
->wr_skb_head
= NULL
;
68 csk
->wr_skb_tail
= NULL
;
71 csk
->tlshws
.txkey
= -1;
72 csk
->tlshws
.rxkey
= -1;
73 csk
->tlshws
.mfs
= TLS_MFS
;
74 skb_queue_head_init(&csk
->tlshws
.sk_recv_queue
);
78 static void chtls_sock_release(struct kref
*ref
)
80 struct chtls_sock
*csk
=
81 container_of(ref
, struct chtls_sock
, kref
);
86 static struct net_device
*chtls_ipv4_netdev(struct chtls_dev
*cdev
,
89 struct net_device
*ndev
= cdev
->ports
[0];
91 if (likely(!inet_sk(sk
)->inet_rcv_saddr
))
94 ndev
= ip_dev_find(&init_net
, inet_sk(sk
)->inet_rcv_saddr
);
98 if (is_vlan_dev(ndev
))
99 return vlan_dev_real_dev(ndev
);
103 static void assign_rxopt(struct sock
*sk
, unsigned int opt
)
105 const struct chtls_dev
*cdev
;
106 struct chtls_sock
*csk
;
109 csk
= rcu_dereference_sk_user_data(sk
);
113 tp
->tcp_header_len
= sizeof(struct tcphdr
);
114 tp
->rx_opt
.mss_clamp
= cdev
->mtus
[TCPOPT_MSS_G(opt
)] - 40;
115 tp
->mss_cache
= tp
->rx_opt
.mss_clamp
;
116 tp
->rx_opt
.tstamp_ok
= TCPOPT_TSTAMP_G(opt
);
117 tp
->rx_opt
.snd_wscale
= TCPOPT_SACK_G(opt
);
118 tp
->rx_opt
.wscale_ok
= TCPOPT_WSCALE_OK_G(opt
);
119 SND_WSCALE(tp
) = TCPOPT_SND_WSCALE_G(opt
);
120 if (!tp
->rx_opt
.wscale_ok
)
121 tp
->rx_opt
.rcv_wscale
= 0;
122 if (tp
->rx_opt
.tstamp_ok
) {
123 tp
->tcp_header_len
+= TCPOLEN_TSTAMP_ALIGNED
;
124 tp
->rx_opt
.mss_clamp
-= TCPOLEN_TSTAMP_ALIGNED
;
125 } else if (csk
->opt2
& TSTAMPS_EN_F
) {
126 csk
->opt2
&= ~TSTAMPS_EN_F
;
127 csk
->mtu_idx
= TCPOPT_MSS_G(opt
);
131 static void chtls_purge_receive_queue(struct sock
*sk
)
135 while ((skb
= __skb_dequeue(&sk
->sk_receive_queue
)) != NULL
) {
136 skb_dst_set(skb
, (void *)NULL
);
141 static void chtls_purge_write_queue(struct sock
*sk
)
143 struct chtls_sock
*csk
= rcu_dereference_sk_user_data(sk
);
146 while ((skb
= __skb_dequeue(&csk
->txq
))) {
147 sk
->sk_wmem_queued
-= skb
->truesize
;
152 static void chtls_purge_recv_queue(struct sock
*sk
)
154 struct chtls_sock
*csk
= rcu_dereference_sk_user_data(sk
);
155 struct chtls_hws
*tlsk
= &csk
->tlshws
;
158 while ((skb
= __skb_dequeue(&tlsk
->sk_recv_queue
)) != NULL
) {
159 skb_dst_set(skb
, NULL
);
164 static void abort_arp_failure(void *handle
, struct sk_buff
*skb
)
166 struct cpl_abort_req
*req
= cplhdr(skb
);
167 struct chtls_dev
*cdev
;
169 cdev
= (struct chtls_dev
*)handle
;
170 req
->cmd
= CPL_ABORT_NO_RST
;
171 cxgb4_ofld_send(cdev
->lldi
->ports
[0], skb
);
174 static struct sk_buff
*alloc_ctrl_skb(struct sk_buff
*skb
, int len
)
176 if (likely(skb
&& !skb_shared(skb
) && !skb_cloned(skb
))) {
178 refcount_add(2, &skb
->users
);
180 skb
= alloc_skb(len
, GFP_KERNEL
| __GFP_NOFAIL
);
185 static void chtls_send_abort(struct sock
*sk
, int mode
, struct sk_buff
*skb
)
187 struct cpl_abort_req
*req
;
188 struct chtls_sock
*csk
;
191 csk
= rcu_dereference_sk_user_data(sk
);
195 skb
= alloc_ctrl_skb(csk
->txdata_skb_cache
, sizeof(*req
));
197 req
= (struct cpl_abort_req
*)skb_put(skb
, sizeof(*req
));
198 INIT_TP_WR_CPL(req
, CPL_ABORT_REQ
, csk
->tid
);
199 skb_set_queue_mapping(skb
, (csk
->txq_idx
<< 1) | CPL_PRIORITY_DATA
);
200 req
->rsvd0
= htonl(tp
->snd_nxt
);
201 req
->rsvd1
= !csk_flag_nochk(csk
, CSK_TX_DATA_SENT
);
203 t4_set_arp_err_handler(skb
, csk
->cdev
, abort_arp_failure
);
204 send_or_defer(sk
, tp
, skb
, mode
== CPL_ABORT_SEND_RST
);
207 static void chtls_send_reset(struct sock
*sk
, int mode
, struct sk_buff
*skb
)
209 struct chtls_sock
*csk
= rcu_dereference_sk_user_data(sk
);
211 if (unlikely(csk_flag_nochk(csk
, CSK_ABORT_SHUTDOWN
) ||
213 if (sk
->sk_state
== TCP_SYN_RECV
)
214 csk_set_flag(csk
, CSK_RST_ABORTED
);
218 if (!csk_flag_nochk(csk
, CSK_TX_DATA_SENT
)) {
219 struct tcp_sock
*tp
= tcp_sk(sk
);
221 if (send_tx_flowc_wr(sk
, 0, tp
->snd_nxt
, tp
->rcv_nxt
) < 0)
222 WARN_ONCE(1, "send tx flowc error");
223 csk_set_flag(csk
, CSK_TX_DATA_SENT
);
226 csk_set_flag(csk
, CSK_ABORT_RPL_PENDING
);
227 chtls_purge_write_queue(sk
);
229 csk_set_flag(csk
, CSK_ABORT_SHUTDOWN
);
230 if (sk
->sk_state
!= TCP_SYN_RECV
)
231 chtls_send_abort(sk
, mode
, skb
);
241 static void release_tcp_port(struct sock
*sk
)
243 if (inet_csk(sk
)->icsk_bind_hash
)
247 static void tcp_uncork(struct sock
*sk
)
249 struct tcp_sock
*tp
= tcp_sk(sk
);
251 if (tp
->nonagle
& TCP_NAGLE_CORK
) {
252 tp
->nonagle
&= ~TCP_NAGLE_CORK
;
253 chtls_tcp_push(sk
, 0);
257 static void chtls_close_conn(struct sock
*sk
)
259 struct cpl_close_con_req
*req
;
260 struct chtls_sock
*csk
;
265 len
= roundup(sizeof(struct cpl_close_con_req
), 16);
266 csk
= rcu_dereference_sk_user_data(sk
);
269 skb
= alloc_skb(len
, GFP_KERNEL
| __GFP_NOFAIL
);
270 req
= (struct cpl_close_con_req
*)__skb_put(skb
, len
);
272 req
->wr
.wr_hi
= htonl(FW_WR_OP_V(FW_TP_WR
) |
273 FW_WR_IMMDLEN_V(sizeof(*req
) -
275 req
->wr
.wr_mid
= htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req
), 16)) |
276 FW_WR_FLOWID_V(tid
));
278 OPCODE_TID(req
) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ
, tid
));
281 skb_entail(sk
, skb
, ULPCB_FLAG_NO_HDR
| ULPCB_FLAG_NO_APPEND
);
282 if (sk
->sk_state
!= TCP_SYN_SENT
)
283 chtls_push_frames(csk
, 1);
287 * Perform a state transition during close and return the actions indicated
288 * for the transition. Do not make this function inline, the main reason
289 * it exists at all is to avoid multiple inlining of tcp_set_state.
291 static int make_close_transition(struct sock
*sk
)
293 int next
= (int)new_state
[sk
->sk_state
];
295 tcp_set_state(sk
, next
& TCP_STATE_MASK
);
296 return next
& TCP_ACTION_FIN
;
299 void chtls_close(struct sock
*sk
, long timeout
)
301 int data_lost
, prev_state
;
302 struct chtls_sock
*csk
;
304 csk
= rcu_dereference_sk_user_data(sk
);
307 sk
->sk_shutdown
|= SHUTDOWN_MASK
;
309 data_lost
= skb_queue_len(&sk
->sk_receive_queue
);
310 data_lost
|= skb_queue_len(&csk
->tlshws
.sk_recv_queue
);
311 chtls_purge_recv_queue(sk
);
312 chtls_purge_receive_queue(sk
);
314 if (sk
->sk_state
== TCP_CLOSE
) {
316 } else if (data_lost
|| sk
->sk_state
== TCP_SYN_SENT
) {
317 chtls_send_reset(sk
, CPL_ABORT_SEND_RST
, NULL
);
318 release_tcp_port(sk
);
320 } else if (sock_flag(sk
, SOCK_LINGER
) && !sk
->sk_lingertime
) {
321 sk
->sk_prot
->disconnect(sk
, 0);
322 } else if (make_close_transition(sk
)) {
323 chtls_close_conn(sk
);
327 sk_stream_wait_close(sk
, timeout
);
330 prev_state
= sk
->sk_state
;
339 if (prev_state
!= TCP_CLOSE
&& sk
->sk_state
== TCP_CLOSE
)
342 if (sk
->sk_state
== TCP_FIN_WAIT2
&& tcp_sk(sk
)->linger2
< 0 &&
343 !csk_flag(sk
, CSK_ABORT_SHUTDOWN
)) {
346 skb
= alloc_skb(sizeof(struct cpl_abort_req
), GFP_ATOMIC
);
348 chtls_send_reset(sk
, CPL_ABORT_SEND_RST
, skb
);
351 if (sk
->sk_state
== TCP_CLOSE
)
352 inet_csk_destroy_sock(sk
);
361 * Wait until a socket enters on of the given states.
363 static int wait_for_states(struct sock
*sk
, unsigned int states
)
365 DECLARE_WAITQUEUE(wait
, current
);
366 struct socket_wq _sk_wq
;
373 * We want this to work even when there's no associated struct socket.
374 * In that case we provide a temporary wait_queue_head_t.
377 init_waitqueue_head(&_sk_wq
.wait
);
378 _sk_wq
.fasync_list
= NULL
;
379 init_rcu_head_on_stack(&_sk_wq
.rcu
);
380 RCU_INIT_POINTER(sk
->sk_wq
, &_sk_wq
);
383 add_wait_queue(sk_sleep(sk
), &wait
);
384 while (!sk_in_state(sk
, states
)) {
385 if (!current_timeo
) {
389 if (signal_pending(current
)) {
390 err
= sock_intr_errno(current_timeo
);
393 set_current_state(TASK_UNINTERRUPTIBLE
);
395 if (!sk_in_state(sk
, states
))
396 current_timeo
= schedule_timeout(current_timeo
);
397 __set_current_state(TASK_RUNNING
);
400 remove_wait_queue(sk_sleep(sk
), &wait
);
402 if (rcu_dereference(sk
->sk_wq
) == &_sk_wq
)
407 int chtls_disconnect(struct sock
*sk
, int flags
)
409 struct chtls_sock
*csk
;
414 csk
= rcu_dereference_sk_user_data(sk
);
415 chtls_purge_recv_queue(sk
);
416 chtls_purge_receive_queue(sk
);
417 chtls_purge_write_queue(sk
);
419 if (sk
->sk_state
!= TCP_CLOSE
) {
420 sk
->sk_err
= ECONNRESET
;
421 chtls_send_reset(sk
, CPL_ABORT_SEND_RST
, NULL
);
422 err
= wait_for_states(sk
, TCPF_CLOSE
);
426 chtls_purge_recv_queue(sk
);
427 chtls_purge_receive_queue(sk
);
428 tp
->max_window
= 0xFFFF << (tp
->rx_opt
.snd_wscale
);
429 return tcp_disconnect(sk
, flags
);
432 #define SHUTDOWN_ELIGIBLE_STATE (TCPF_ESTABLISHED | \
433 TCPF_SYN_RECV | TCPF_CLOSE_WAIT)
434 void chtls_shutdown(struct sock
*sk
, int how
)
436 if ((how
& SEND_SHUTDOWN
) &&
437 sk_in_state(sk
, SHUTDOWN_ELIGIBLE_STATE
) &&
438 make_close_transition(sk
))
439 chtls_close_conn(sk
);
442 void chtls_destroy_sock(struct sock
*sk
)
444 struct chtls_sock
*csk
;
446 csk
= rcu_dereference_sk_user_data(sk
);
447 chtls_purge_recv_queue(sk
);
448 csk
->ulp_mode
= ULP_MODE_NONE
;
449 chtls_purge_write_queue(sk
);
451 kref_put(&csk
->kref
, chtls_sock_release
);
452 sk
->sk_prot
= &tcp_prot
;
453 sk
->sk_prot
->destroy(sk
);
456 static void reset_listen_child(struct sock
*child
)
458 struct chtls_sock
*csk
= rcu_dereference_sk_user_data(child
);
461 skb
= alloc_ctrl_skb(csk
->txdata_skb_cache
,
462 sizeof(struct cpl_abort_req
));
464 chtls_send_reset(child
, CPL_ABORT_SEND_RST
, skb
);
466 INC_ORPHAN_COUNT(child
);
467 if (child
->sk_state
== TCP_CLOSE
)
468 inet_csk_destroy_sock(child
);
471 static void chtls_disconnect_acceptq(struct sock
*listen_sk
)
473 struct request_sock
**pprev
;
475 pprev
= ACCEPT_QUEUE(listen_sk
);
477 struct request_sock
*req
= *pprev
;
479 if (req
->rsk_ops
== &chtls_rsk_ops
) {
480 struct sock
*child
= req
->sk
;
482 *pprev
= req
->dl_next
;
483 sk_acceptq_removed(listen_sk
);
488 release_tcp_port(child
);
489 reset_listen_child(child
);
490 bh_unlock_sock(child
);
494 pprev
= &req
->dl_next
;
499 static int listen_hashfn(const struct sock
*sk
)
501 return ((unsigned long)sk
>> 10) & (LISTEN_INFO_HASH_SIZE
- 1);
504 static struct listen_info
*listen_hash_add(struct chtls_dev
*cdev
,
508 struct listen_info
*p
= kmalloc(sizeof(*p
), GFP_KERNEL
);
511 int key
= listen_hashfn(sk
);
515 spin_lock(&cdev
->listen_lock
);
516 p
->next
= cdev
->listen_hash_tab
[key
];
517 cdev
->listen_hash_tab
[key
] = p
;
518 spin_unlock(&cdev
->listen_lock
);
523 static int listen_hash_find(struct chtls_dev
*cdev
,
526 struct listen_info
*p
;
530 key
= listen_hashfn(sk
);
532 spin_lock(&cdev
->listen_lock
);
533 for (p
= cdev
->listen_hash_tab
[key
]; p
; p
= p
->next
)
538 spin_unlock(&cdev
->listen_lock
);
542 static int listen_hash_del(struct chtls_dev
*cdev
,
545 struct listen_info
*p
, **prev
;
549 key
= listen_hashfn(sk
);
550 prev
= &cdev
->listen_hash_tab
[key
];
552 spin_lock(&cdev
->listen_lock
);
553 for (p
= *prev
; p
; prev
= &p
->next
, p
= p
->next
)
560 spin_unlock(&cdev
->listen_lock
);
564 static void cleanup_syn_rcv_conn(struct sock
*child
, struct sock
*parent
)
566 struct request_sock
*req
;
567 struct chtls_sock
*csk
;
569 csk
= rcu_dereference_sk_user_data(child
);
570 req
= csk
->passive_reap_next
;
572 reqsk_queue_removed(&inet_csk(parent
)->icsk_accept_queue
, req
);
573 __skb_unlink((struct sk_buff
*)&csk
->synq
, &csk
->listen_ctx
->synq
);
574 chtls_reqsk_free(req
);
575 csk
->passive_reap_next
= NULL
;
578 static void chtls_reset_synq(struct listen_ctx
*listen_ctx
)
580 struct sock
*listen_sk
= listen_ctx
->lsk
;
582 while (!skb_queue_empty(&listen_ctx
->synq
)) {
583 struct chtls_sock
*csk
=
584 container_of((struct synq
*)__skb_dequeue
585 (&listen_ctx
->synq
), struct chtls_sock
, synq
);
586 struct sock
*child
= csk
->sk
;
588 cleanup_syn_rcv_conn(child
, listen_sk
);
592 release_tcp_port(child
);
593 reset_listen_child(child
);
594 bh_unlock_sock(child
);
600 int chtls_listen_start(struct chtls_dev
*cdev
, struct sock
*sk
)
602 struct net_device
*ndev
;
603 struct listen_ctx
*ctx
;
604 struct adapter
*adap
;
605 struct port_info
*pi
;
609 if (sk
->sk_family
!= PF_INET
)
613 ndev
= chtls_ipv4_netdev(cdev
, sk
);
618 pi
= netdev_priv(ndev
);
620 if (!(adap
->flags
& FULL_INIT_DONE
))
623 if (listen_hash_find(cdev
, sk
) >= 0) /* already have it */
626 ctx
= kmalloc(sizeof(*ctx
), GFP_KERNEL
);
630 __module_get(THIS_MODULE
);
633 ctx
->state
= T4_LISTEN_START_PENDING
;
634 skb_queue_head_init(&ctx
->synq
);
636 stid
= cxgb4_alloc_stid(cdev
->tids
, sk
->sk_family
, ctx
);
641 if (!listen_hash_add(cdev
, sk
, stid
))
644 ret
= cxgb4_create_server(ndev
, stid
,
645 inet_sk(sk
)->inet_rcv_saddr
,
646 inet_sk(sk
)->inet_sport
, 0,
647 cdev
->lldi
->rxq_ids
[0]);
649 ret
= net_xmit_errno(ret
);
654 listen_hash_del(cdev
, sk
);
656 cxgb4_free_stid(cdev
->tids
, stid
, sk
->sk_family
);
660 module_put(THIS_MODULE
);
664 void chtls_listen_stop(struct chtls_dev
*cdev
, struct sock
*sk
)
666 struct listen_ctx
*listen_ctx
;
669 stid
= listen_hash_del(cdev
, sk
);
673 listen_ctx
= (struct listen_ctx
*)lookup_stid(cdev
->tids
, stid
);
674 chtls_reset_synq(listen_ctx
);
676 cxgb4_remove_server(cdev
->lldi
->ports
[0], stid
,
677 cdev
->lldi
->rxq_ids
[0], 0);
678 chtls_disconnect_acceptq(sk
);
681 static int chtls_pass_open_rpl(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
683 struct cpl_pass_open_rpl
*rpl
= cplhdr(skb
) + RSS_HDR
;
684 unsigned int stid
= GET_TID(rpl
);
685 struct listen_ctx
*listen_ctx
;
687 listen_ctx
= (struct listen_ctx
*)lookup_stid(cdev
->tids
, stid
);
689 return CPL_RET_BUF_DONE
;
691 if (listen_ctx
->state
== T4_LISTEN_START_PENDING
) {
692 listen_ctx
->state
= T4_LISTEN_STARTED
;
693 return CPL_RET_BUF_DONE
;
696 if (rpl
->status
!= CPL_ERR_NONE
) {
697 pr_info("Unexpected PASS_OPEN_RPL status %u for STID %u\n",
699 return CPL_RET_BUF_DONE
;
701 cxgb4_free_stid(cdev
->tids
, stid
, listen_ctx
->lsk
->sk_family
);
702 sock_put(listen_ctx
->lsk
);
704 module_put(THIS_MODULE
);
709 static int chtls_close_listsrv_rpl(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
711 struct cpl_close_listsvr_rpl
*rpl
= cplhdr(skb
) + RSS_HDR
;
712 struct listen_ctx
*listen_ctx
;
717 data
= lookup_stid(cdev
->tids
, stid
);
718 listen_ctx
= (struct listen_ctx
*)data
;
720 if (rpl
->status
!= CPL_ERR_NONE
) {
721 pr_info("Unexpected CLOSE_LISTSRV_RPL status %u for STID %u\n",
723 return CPL_RET_BUF_DONE
;
726 cxgb4_free_stid(cdev
->tids
, stid
, listen_ctx
->lsk
->sk_family
);
727 sock_put(listen_ctx
->lsk
);
729 module_put(THIS_MODULE
);
734 static void chtls_release_resources(struct sock
*sk
)
736 struct chtls_sock
*csk
= rcu_dereference_sk_user_data(sk
);
737 struct chtls_dev
*cdev
= csk
->cdev
;
738 unsigned int tid
= csk
->tid
;
739 struct tid_info
*tids
;
745 kfree_skb(csk
->txdata_skb_cache
);
746 csk
->txdata_skb_cache
= NULL
;
748 if (csk
->l2t_entry
) {
749 cxgb4_l2t_release(csk
->l2t_entry
);
750 csk
->l2t_entry
= NULL
;
753 cxgb4_remove_tid(tids
, csk
->port_id
, tid
, sk
->sk_family
);
757 static void chtls_conn_done(struct sock
*sk
)
759 if (sock_flag(sk
, SOCK_DEAD
))
760 chtls_purge_receive_queue(sk
);
761 sk_wakeup_sleepers(sk
, 0);
765 static void do_abort_syn_rcv(struct sock
*child
, struct sock
*parent
)
768 * If the server is still open we clean up the child connection,
769 * otherwise the server already did the clean up as it was purging
770 * its SYN queue and the skb was just sitting in its backlog.
772 if (likely(parent
->sk_state
== TCP_LISTEN
)) {
773 cleanup_syn_rcv_conn(child
, parent
);
774 /* Without the below call to sock_orphan,
775 * we leak the socket resource with syn_flood test
776 * as inet_csk_destroy_sock will not be called
777 * in tcp_done since SOCK_DEAD flag is not set.
778 * Kernel handles this differently where new socket is
779 * created only after 3 way handshake is done.
782 percpu_counter_inc((child
)->sk_prot
->orphan_count
);
783 chtls_release_resources(child
);
784 chtls_conn_done(child
);
786 if (csk_flag(child
, CSK_RST_ABORTED
)) {
787 chtls_release_resources(child
);
788 chtls_conn_done(child
);
793 static void pass_open_abort(struct sock
*child
, struct sock
*parent
,
796 do_abort_syn_rcv(child
, parent
);
800 static void bl_pass_open_abort(struct sock
*lsk
, struct sk_buff
*skb
)
802 pass_open_abort(skb
->sk
, lsk
, skb
);
805 static void chtls_pass_open_arp_failure(struct sock
*sk
,
808 const struct request_sock
*oreq
;
809 struct chtls_sock
*csk
;
810 struct chtls_dev
*cdev
;
814 csk
= rcu_dereference_sk_user_data(sk
);
818 * If the connection is being aborted due to the parent listening
819 * socket going away there's nothing to do, the ABORT_REQ will close
822 if (csk_flag(sk
, CSK_ABORT_RPL_PENDING
)) {
827 oreq
= csk
->passive_reap_next
;
828 data
= lookup_stid(cdev
->tids
, oreq
->ts_recent
);
829 parent
= ((struct listen_ctx
*)data
)->lsk
;
831 bh_lock_sock(parent
);
832 if (!sock_owned_by_user(parent
)) {
833 pass_open_abort(sk
, parent
, skb
);
835 BLOG_SKB_CB(skb
)->backlog_rcv
= bl_pass_open_abort
;
836 __sk_add_backlog(parent
, skb
);
838 bh_unlock_sock(parent
);
841 static void chtls_accept_rpl_arp_failure(void *handle
,
844 struct sock
*sk
= (struct sock
*)handle
;
847 process_cpl_msg(chtls_pass_open_arp_failure
, sk
, skb
);
851 static unsigned int chtls_select_mss(const struct chtls_sock
*csk
,
853 struct cpl_pass_accept_req
*req
)
855 struct chtls_dev
*cdev
;
856 struct dst_entry
*dst
;
857 unsigned int tcpoptsz
;
858 unsigned int iphdrsz
;
859 unsigned int mtu_idx
;
864 mss
= ntohs(req
->tcpopt
.mss
);
866 dst
= __sk_dst_get(sk
);
871 iphdrsz
= sizeof(struct iphdr
) + sizeof(struct tcphdr
);
872 if (req
->tcpopt
.tstamp
)
873 tcpoptsz
+= round_up(TCPOLEN_TIMESTAMP
, 4);
875 tp
->advmss
= dst_metric_advmss(dst
);
876 if (USER_MSS(tp
) && tp
->advmss
> USER_MSS(tp
))
877 tp
->advmss
= USER_MSS(tp
);
878 if (tp
->advmss
> pmtu
- iphdrsz
)
879 tp
->advmss
= pmtu
- iphdrsz
;
880 if (mss
&& tp
->advmss
> mss
)
883 tp
->advmss
= cxgb4_best_aligned_mtu(cdev
->lldi
->mtus
,
885 tp
->advmss
- tcpoptsz
,
887 tp
->advmss
-= iphdrsz
;
889 inet_csk(sk
)->icsk_pmtu_cookie
= pmtu
;
893 static unsigned int select_rcv_wnd(struct chtls_sock
*csk
)
900 wnd
= tcp_full_space(sk
);
902 if (wnd
< MIN_RCV_WND
)
905 rcvwnd
= MAX_RCV_WND
;
907 csk_set_flag(csk
, CSK_UPDATE_RCV_WND
);
908 return min(wnd
, rcvwnd
);
911 static unsigned int select_rcv_wscale(int space
, int wscale_ok
, int win_clamp
)
915 if (space
> MAX_RCV_WND
)
917 if (win_clamp
&& win_clamp
< space
)
921 while (wscale
< 14 && (65535 << wscale
) < space
)
927 static void chtls_pass_accept_rpl(struct sk_buff
*skb
,
928 struct cpl_pass_accept_req
*req
,
932 struct cpl_t5_pass_accept_rpl
*rpl5
;
933 struct cxgb4_lld_info
*lldi
;
934 const struct tcphdr
*tcph
;
935 const struct tcp_sock
*tp
;
936 struct chtls_sock
*csk
;
944 csk
= sk
->sk_user_data
;
946 lldi
= csk
->cdev
->lldi
;
947 len
= roundup(sizeof(*rpl5
), 16);
949 rpl5
= __skb_put_zero(skb
, len
);
950 INIT_TP_WR(rpl5
, tid
);
952 OPCODE_TID(rpl5
) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL
,
954 csk
->mtu_idx
= chtls_select_mss(csk
, dst_mtu(__sk_dst_get(sk
)),
956 opt0
= TCAM_BYPASS_F
|
957 WND_SCALE_V((tp
)->rx_opt
.rcv_wscale
) |
958 MSS_IDX_V(csk
->mtu_idx
) |
959 L2T_IDX_V(csk
->l2t_entry
->idx
) |
960 NAGLE_V(!(tp
->nonagle
& TCP_NAGLE_OFF
)) |
961 TX_CHAN_V(csk
->tx_chan
) |
962 SMAC_SEL_V(csk
->smac_idx
) |
963 DSCP_V(csk
->tos
>> 2) |
964 ULP_MODE_V(ULP_MODE_TLS
) |
965 RCV_BUFSIZ_V(min(tp
->rcv_wnd
>> 10, RCV_BUFSIZ_M
));
967 opt2
= RX_CHANNEL_V(0) |
968 RSS_QUEUE_VALID_F
| RSS_QUEUE_V(csk
->rss_qid
);
970 if (!is_t5(lldi
->adapter_type
))
971 opt2
|= RX_FC_DISABLE_F
;
972 if (req
->tcpopt
.tstamp
)
973 opt2
|= TSTAMPS_EN_F
;
974 if (req
->tcpopt
.sack
)
976 hlen
= ntohl(req
->hdr_len
);
978 tcph
= (struct tcphdr
*)((u8
*)(req
+ 1) +
979 T6_ETH_HDR_LEN_G(hlen
) + T6_IP_HDR_LEN_G(hlen
));
980 if (tcph
->ece
&& tcph
->cwr
)
981 opt2
|= CCTRL_ECN_V(1);
982 opt2
|= CONG_CNTRL_V(CONG_ALG_NEWRENO
);
984 opt2
|= T5_OPT_2_VALID_F
;
985 rpl5
->opt0
= cpu_to_be64(opt0
);
986 rpl5
->opt2
= cpu_to_be32(opt2
);
987 rpl5
->iss
= cpu_to_be32((prandom_u32() & ~7UL) - 1);
988 set_wr_txq(skb
, CPL_PRIORITY_SETUP
, csk
->port_id
);
989 t4_set_arp_err_handler(skb
, sk
, chtls_accept_rpl_arp_failure
);
990 cxgb4_l2t_send(csk
->egress_dev
, skb
, csk
->l2t_entry
);
993 static void inet_inherit_port(struct inet_hashinfo
*hash_info
,
994 struct sock
*lsk
, struct sock
*newsk
)
997 __inet_inherit_port(lsk
, newsk
);
1001 static int chtls_backlog_rcv(struct sock
*sk
, struct sk_buff
*skb
)
1003 if (skb
->protocol
) {
1007 BLOG_SKB_CB(skb
)->backlog_rcv(sk
, skb
);
1011 static struct sock
*chtls_recv_sock(struct sock
*lsk
,
1012 struct request_sock
*oreq
,
1014 const struct cpl_pass_accept_req
*req
,
1015 struct chtls_dev
*cdev
)
1017 const struct tcphdr
*tcph
;
1018 struct inet_sock
*newinet
;
1019 const struct iphdr
*iph
;
1020 struct net_device
*ndev
;
1021 struct chtls_sock
*csk
;
1022 struct dst_entry
*dst
;
1023 struct neighbour
*n
;
1024 struct tcp_sock
*tp
;
1030 iph
= (const struct iphdr
*)network_hdr
;
1031 newsk
= tcp_create_openreq_child(lsk
, oreq
, cdev
->askb
);
1035 dst
= inet_csk_route_child_sock(lsk
, newsk
, oreq
);
1039 tcph
= (struct tcphdr
*)(iph
+ 1);
1040 n
= dst_neigh_lookup(dst
, &iph
->saddr
);
1047 port_id
= cxgb4_port_idx(ndev
);
1049 csk
= chtls_sock_create(cdev
);
1053 csk
->l2t_entry
= cxgb4_l2t_get(cdev
->lldi
->l2t
, n
, ndev
, 0);
1054 if (!csk
->l2t_entry
)
1057 newsk
->sk_user_data
= csk
;
1058 newsk
->sk_backlog_rcv
= chtls_backlog_rcv
;
1061 newinet
= inet_sk(newsk
);
1063 newinet
->inet_daddr
= iph
->saddr
;
1064 newinet
->inet_rcv_saddr
= iph
->daddr
;
1065 newinet
->inet_saddr
= iph
->daddr
;
1067 oreq
->ts_recent
= PASS_OPEN_TID_G(ntohl(req
->tos_stid
));
1068 sk_setup_caps(newsk
, dst
);
1070 csk
->passive_reap_next
= oreq
;
1071 csk
->tx_chan
= cxgb4_port_chan(ndev
);
1072 csk
->port_id
= port_id
;
1073 csk
->egress_dev
= ndev
;
1074 csk
->tos
= PASS_OPEN_TOS_G(ntohl(req
->tos_stid
));
1075 csk
->ulp_mode
= ULP_MODE_TLS
;
1076 step
= cdev
->lldi
->nrxq
/ cdev
->lldi
->nchan
;
1077 csk
->rss_qid
= cdev
->lldi
->rxq_ids
[port_id
* step
];
1078 rxq_idx
= port_id
* step
;
1079 csk
->txq_idx
= (rxq_idx
< cdev
->lldi
->ntxq
) ? rxq_idx
:
1081 csk
->sndbuf
= newsk
->sk_sndbuf
;
1082 csk
->smac_idx
= cxgb4_tp_smt_idx(cdev
->lldi
->adapter_type
,
1083 cxgb4_port_viid(ndev
));
1084 tp
->rcv_wnd
= select_rcv_wnd(csk
);
1085 RCV_WSCALE(tp
) = select_rcv_wscale(tcp_full_space(newsk
),
1089 inet_inherit_port(&tcp_hashinfo
, lsk
, newsk
);
1090 csk_set_flag(csk
, CSK_CONN_INLINE
);
1091 bh_unlock_sock(newsk
); /* tcp_create_openreq_child ->sk_clone_lock */
1095 chtls_sock_release(&csk
->kref
);
1099 inet_csk_prepare_forced_close(newsk
);
1102 chtls_reqsk_free(oreq
);
1107 * Populate a TID_RELEASE WR. The skb must be already propely sized.
1109 static void mk_tid_release(struct sk_buff
*skb
,
1110 unsigned int chan
, unsigned int tid
)
1112 struct cpl_tid_release
*req
;
1115 len
= roundup(sizeof(struct cpl_tid_release
), 16);
1116 req
= (struct cpl_tid_release
*)__skb_put(skb
, len
);
1117 memset(req
, 0, len
);
1118 set_wr_txq(skb
, CPL_PRIORITY_SETUP
, chan
);
1119 INIT_TP_WR_CPL(req
, CPL_TID_RELEASE
, tid
);
1122 static int chtls_get_module(struct sock
*sk
)
1124 struct inet_connection_sock
*icsk
= inet_csk(sk
);
1126 if (!try_module_get(icsk
->icsk_ulp_ops
->owner
))
1132 static void chtls_pass_accept_request(struct sock
*sk
,
1133 struct sk_buff
*skb
)
1135 struct cpl_t5_pass_accept_rpl
*rpl
;
1136 struct cpl_pass_accept_req
*req
;
1137 struct listen_ctx
*listen_ctx
;
1138 struct request_sock
*oreq
;
1139 struct sk_buff
*reply_skb
;
1140 struct chtls_sock
*csk
;
1141 struct chtls_dev
*cdev
;
1142 struct tcphdr
*tcph
;
1151 req
= cplhdr(skb
) + RSS_HDR
;
1153 cdev
= BLOG_SKB_CB(skb
)->cdev
;
1154 newsk
= lookup_tid(cdev
->tids
, tid
);
1155 stid
= PASS_OPEN_TID_G(ntohl(req
->tos_stid
));
1157 pr_info("tid (%d) already in use\n", tid
);
1161 len
= roundup(sizeof(*rpl
), 16);
1162 reply_skb
= alloc_skb(len
, GFP_ATOMIC
);
1164 cxgb4_remove_tid(cdev
->tids
, 0, tid
, sk
->sk_family
);
1169 if (sk
->sk_state
!= TCP_LISTEN
)
1172 if (inet_csk_reqsk_queue_is_full(sk
))
1175 if (sk_acceptq_is_full(sk
))
1178 oreq
= inet_reqsk_alloc(&chtls_rsk_ops
, sk
, true);
1182 oreq
->rsk_rcv_wnd
= 0;
1183 oreq
->rsk_window_clamp
= 0;
1184 oreq
->cookie_ts
= 0;
1186 oreq
->ts_recent
= 0;
1188 eh
= (struct ethhdr
*)(req
+ 1);
1189 iph
= (struct iphdr
*)(eh
+ 1);
1190 if (iph
->version
!= 0x4)
1193 network_hdr
= (void *)(eh
+ 1);
1194 tcph
= (struct tcphdr
*)(iph
+ 1);
1196 tcp_rsk(oreq
)->tfo_listener
= false;
1197 tcp_rsk(oreq
)->rcv_isn
= ntohl(tcph
->seq
);
1198 chtls_set_req_port(oreq
, tcph
->source
, tcph
->dest
);
1199 inet_rsk(oreq
)->ecn_ok
= 0;
1200 chtls_set_req_addr(oreq
, iph
->daddr
, iph
->saddr
);
1201 if (req
->tcpopt
.wsf
<= 14) {
1202 inet_rsk(oreq
)->wscale_ok
= 1;
1203 inet_rsk(oreq
)->snd_wscale
= req
->tcpopt
.wsf
;
1205 inet_rsk(oreq
)->ir_iif
= sk
->sk_bound_dev_if
;
1207 newsk
= chtls_recv_sock(sk
, oreq
, network_hdr
, req
, cdev
);
1211 if (chtls_get_module(newsk
))
1213 inet_csk_reqsk_queue_added(sk
);
1214 reply_skb
->sk
= newsk
;
1215 chtls_install_cpl_ops(newsk
);
1216 cxgb4_insert_tid(cdev
->tids
, newsk
, tid
, newsk
->sk_family
);
1217 csk
= rcu_dereference_sk_user_data(newsk
);
1218 listen_ctx
= (struct listen_ctx
*)lookup_stid(cdev
->tids
, stid
);
1219 csk
->listen_ctx
= listen_ctx
;
1220 __skb_queue_tail(&listen_ctx
->synq
, (struct sk_buff
*)&csk
->synq
);
1221 chtls_pass_accept_rpl(reply_skb
, req
, tid
);
1226 chtls_reqsk_free(oreq
);
1228 mk_tid_release(reply_skb
, 0, tid
);
1229 cxgb4_ofld_send(cdev
->lldi
->ports
[0], reply_skb
);
1234 * Handle a CPL_PASS_ACCEPT_REQ message.
1236 static int chtls_pass_accept_req(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
1238 struct cpl_pass_accept_req
*req
= cplhdr(skb
) + RSS_HDR
;
1239 struct listen_ctx
*ctx
;
1245 stid
= PASS_OPEN_TID_G(ntohl(req
->tos_stid
));
1248 data
= lookup_stid(cdev
->tids
, stid
);
1252 ctx
= (struct listen_ctx
*)data
;
1255 if (unlikely(tid
>= cdev
->tids
->ntids
)) {
1256 pr_info("passive open TID %u too large\n", tid
);
1260 BLOG_SKB_CB(skb
)->cdev
= cdev
;
1261 process_cpl_msg(chtls_pass_accept_request
, lsk
, skb
);
1266 * Completes some final bits of initialization for just established connections
1267 * and changes their state to TCP_ESTABLISHED.
1269 * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1.
1271 static void make_established(struct sock
*sk
, u32 snd_isn
, unsigned int opt
)
1273 struct tcp_sock
*tp
= tcp_sk(sk
);
1275 tp
->pushed_seq
= snd_isn
;
1276 tp
->write_seq
= snd_isn
;
1277 tp
->snd_nxt
= snd_isn
;
1278 tp
->snd_una
= snd_isn
;
1279 inet_sk(sk
)->inet_id
= tp
->write_seq
^ jiffies
;
1280 assign_rxopt(sk
, opt
);
1282 if (tp
->rcv_wnd
> (RCV_BUFSIZ_M
<< 10))
1283 tp
->rcv_wup
-= tp
->rcv_wnd
- (RCV_BUFSIZ_M
<< 10);
1286 tcp_set_state(sk
, TCP_ESTABLISHED
);
1289 static void chtls_abort_conn(struct sock
*sk
, struct sk_buff
*skb
)
1291 struct sk_buff
*abort_skb
;
1293 abort_skb
= alloc_skb(sizeof(struct cpl_abort_req
), GFP_ATOMIC
);
1295 chtls_send_reset(sk
, CPL_ABORT_SEND_RST
, abort_skb
);
1298 static struct sock
*reap_list
;
1299 static DEFINE_SPINLOCK(reap_list_lock
);
1302 * Process the reap list.
1304 DECLARE_TASK_FUNC(process_reap_list
, task_param
)
1306 spin_lock_bh(&reap_list_lock
);
1308 struct sock
*sk
= reap_list
;
1309 struct chtls_sock
*csk
= rcu_dereference_sk_user_data(sk
);
1311 reap_list
= csk
->passive_reap_next
;
1312 csk
->passive_reap_next
= NULL
;
1313 spin_unlock(&reap_list_lock
);
1317 chtls_abort_conn(sk
, NULL
);
1319 if (sk
->sk_state
== TCP_CLOSE
)
1320 inet_csk_destroy_sock(sk
);
1323 spin_lock(&reap_list_lock
);
1325 spin_unlock_bh(&reap_list_lock
);
1328 static DECLARE_WORK(reap_task
, process_reap_list
);
1330 static void add_to_reap_list(struct sock
*sk
)
1332 struct chtls_sock
*csk
= sk
->sk_user_data
;
1336 release_tcp_port(sk
); /* release the port immediately */
1338 spin_lock(&reap_list_lock
);
1339 csk
->passive_reap_next
= reap_list
;
1341 if (!csk
->passive_reap_next
)
1342 schedule_work(&reap_task
);
1343 spin_unlock(&reap_list_lock
);
1348 static void add_pass_open_to_parent(struct sock
*child
, struct sock
*lsk
,
1349 struct chtls_dev
*cdev
)
1351 struct request_sock
*oreq
;
1352 struct chtls_sock
*csk
;
1354 if (lsk
->sk_state
!= TCP_LISTEN
)
1357 csk
= child
->sk_user_data
;
1358 oreq
= csk
->passive_reap_next
;
1359 csk
->passive_reap_next
= NULL
;
1361 reqsk_queue_removed(&inet_csk(lsk
)->icsk_accept_queue
, oreq
);
1362 __skb_unlink((struct sk_buff
*)&csk
->synq
, &csk
->listen_ctx
->synq
);
1364 if (sk_acceptq_is_full(lsk
)) {
1365 chtls_reqsk_free(oreq
);
1366 add_to_reap_list(child
);
1368 refcount_set(&oreq
->rsk_refcnt
, 1);
1369 inet_csk_reqsk_queue_add(lsk
, oreq
, child
);
1370 lsk
->sk_data_ready(lsk
);
1374 static void bl_add_pass_open_to_parent(struct sock
*lsk
, struct sk_buff
*skb
)
1376 struct sock
*child
= skb
->sk
;
1379 add_pass_open_to_parent(child
, lsk
, BLOG_SKB_CB(skb
)->cdev
);
1383 static int chtls_pass_establish(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
1385 struct cpl_pass_establish
*req
= cplhdr(skb
) + RSS_HDR
;
1386 struct chtls_sock
*csk
;
1387 struct sock
*lsk
, *sk
;
1390 hwtid
= GET_TID(req
);
1391 sk
= lookup_tid(cdev
->tids
, hwtid
);
1393 return (CPL_RET_UNKNOWN_TID
| CPL_RET_BUF_DONE
);
1396 if (unlikely(sock_owned_by_user(sk
))) {
1402 csk
= sk
->sk_user_data
;
1403 csk
->wr_max_credits
= 64;
1404 csk
->wr_credits
= 64;
1405 csk
->wr_unacked
= 0;
1406 make_established(sk
, ntohl(req
->snd_isn
), ntohs(req
->tcp_opt
));
1407 stid
= PASS_OPEN_TID_G(ntohl(req
->tos_stid
));
1408 sk
->sk_state_change(sk
);
1409 if (unlikely(sk
->sk_socket
))
1410 sk_wake_async(sk
, 0, POLL_OUT
);
1412 data
= lookup_stid(cdev
->tids
, stid
);
1413 lsk
= ((struct listen_ctx
*)data
)->lsk
;
1416 if (unlikely(skb_queue_empty(&csk
->listen_ctx
->synq
))) {
1417 /* removed from synq */
1418 bh_unlock_sock(lsk
);
1423 if (likely(!sock_owned_by_user(lsk
))) {
1425 add_pass_open_to_parent(sk
, lsk
, cdev
);
1428 BLOG_SKB_CB(skb
)->cdev
= cdev
;
1429 BLOG_SKB_CB(skb
)->backlog_rcv
=
1430 bl_add_pass_open_to_parent
;
1431 __sk_add_backlog(lsk
, skb
);
1433 bh_unlock_sock(lsk
);
1441 * Handle receipt of an urgent pointer.
1443 static void handle_urg_ptr(struct sock
*sk
, u32 urg_seq
)
1445 struct tcp_sock
*tp
= tcp_sk(sk
);
1448 if (tp
->urg_data
&& !after(urg_seq
, tp
->urg_seq
))
1449 return; /* duplicate pointer */
1452 if (tp
->urg_seq
== tp
->copied_seq
&& tp
->urg_data
&&
1453 !sock_flag(sk
, SOCK_URGINLINE
) &&
1454 tp
->copied_seq
!= tp
->rcv_nxt
) {
1455 struct sk_buff
*skb
= skb_peek(&sk
->sk_receive_queue
);
1458 if (skb
&& tp
->copied_seq
- ULP_SKB_CB(skb
)->seq
>= skb
->len
)
1459 chtls_free_skb(sk
, skb
);
1462 tp
->urg_data
= TCP_URG_NOTYET
;
1463 tp
->urg_seq
= urg_seq
;
1466 static void check_sk_callbacks(struct chtls_sock
*csk
)
1468 struct sock
*sk
= csk
->sk
;
1470 if (unlikely(sk
->sk_user_data
&&
1471 !csk_flag_nochk(csk
, CSK_CALLBACKS_CHKD
)))
1472 csk_set_flag(csk
, CSK_CALLBACKS_CHKD
);
1476 * Handles Rx data that arrives in a state where the socket isn't accepting
1479 static void handle_excess_rx(struct sock
*sk
, struct sk_buff
*skb
)
1481 if (!csk_flag(sk
, CSK_ABORT_SHUTDOWN
))
1482 chtls_abort_conn(sk
, skb
);
1487 static void chtls_recv_data(struct sock
*sk
, struct sk_buff
*skb
)
1489 struct cpl_rx_data
*hdr
= cplhdr(skb
) + RSS_HDR
;
1490 struct chtls_sock
*csk
;
1491 struct tcp_sock
*tp
;
1493 csk
= rcu_dereference_sk_user_data(sk
);
1496 if (unlikely(sk
->sk_shutdown
& RCV_SHUTDOWN
)) {
1497 handle_excess_rx(sk
, skb
);
1501 ULP_SKB_CB(skb
)->seq
= ntohl(hdr
->seq
);
1502 ULP_SKB_CB(skb
)->psh
= hdr
->psh
;
1503 skb_ulp_mode(skb
) = ULP_MODE_NONE
;
1505 skb_reset_transport_header(skb
);
1506 __skb_pull(skb
, sizeof(*hdr
) + RSS_HDR
);
1508 __skb_trim(skb
, ntohs(hdr
->len
));
1510 if (unlikely(hdr
->urg
))
1511 handle_urg_ptr(sk
, tp
->rcv_nxt
+ ntohs(hdr
->urg
));
1512 if (unlikely(tp
->urg_data
== TCP_URG_NOTYET
&&
1513 tp
->urg_seq
- tp
->rcv_nxt
< skb
->len
))
1514 tp
->urg_data
= TCP_URG_VALID
|
1515 skb
->data
[tp
->urg_seq
- tp
->rcv_nxt
];
1517 if (unlikely(hdr
->dack_mode
!= csk
->delack_mode
)) {
1518 csk
->delack_mode
= hdr
->dack_mode
;
1519 csk
->delack_seq
= tp
->rcv_nxt
;
1522 tcp_hdr(skb
)->fin
= 0;
1523 tp
->rcv_nxt
+= skb
->len
;
1525 __skb_queue_tail(&sk
->sk_receive_queue
, skb
);
1527 if (!sock_flag(sk
, SOCK_DEAD
)) {
1528 check_sk_callbacks(csk
);
1529 sk
->sk_data_ready(sk
);
1533 static int chtls_rx_data(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
1535 struct cpl_rx_data
*req
= cplhdr(skb
) + RSS_HDR
;
1536 unsigned int hwtid
= GET_TID(req
);
1539 sk
= lookup_tid(cdev
->tids
, hwtid
);
1540 if (unlikely(!sk
)) {
1541 pr_err("can't find conn. for hwtid %u.\n", hwtid
);
1544 skb_dst_set(skb
, NULL
);
1545 process_cpl_msg(chtls_recv_data
, sk
, skb
);
1549 static void chtls_recv_pdu(struct sock
*sk
, struct sk_buff
*skb
)
1551 struct cpl_tls_data
*hdr
= cplhdr(skb
);
1552 struct chtls_sock
*csk
;
1553 struct chtls_hws
*tlsk
;
1554 struct tcp_sock
*tp
;
1556 csk
= rcu_dereference_sk_user_data(sk
);
1557 tlsk
= &csk
->tlshws
;
1560 if (unlikely(sk
->sk_shutdown
& RCV_SHUTDOWN
)) {
1561 handle_excess_rx(sk
, skb
);
1565 ULP_SKB_CB(skb
)->seq
= ntohl(hdr
->seq
);
1566 ULP_SKB_CB(skb
)->flags
= 0;
1567 skb_ulp_mode(skb
) = ULP_MODE_TLS
;
1569 skb_reset_transport_header(skb
);
1570 __skb_pull(skb
, sizeof(*hdr
));
1573 CPL_TLS_DATA_LENGTH_G(ntohl(hdr
->length_pkd
)));
1575 if (unlikely(tp
->urg_data
== TCP_URG_NOTYET
&& tp
->urg_seq
-
1576 tp
->rcv_nxt
< skb
->len
))
1577 tp
->urg_data
= TCP_URG_VALID
|
1578 skb
->data
[tp
->urg_seq
- tp
->rcv_nxt
];
1580 tcp_hdr(skb
)->fin
= 0;
1581 tlsk
->pldlen
= CPL_TLS_DATA_LENGTH_G(ntohl(hdr
->length_pkd
));
1582 __skb_queue_tail(&tlsk
->sk_recv_queue
, skb
);
1585 static int chtls_rx_pdu(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
1587 struct cpl_tls_data
*req
= cplhdr(skb
);
1588 unsigned int hwtid
= GET_TID(req
);
1591 sk
= lookup_tid(cdev
->tids
, hwtid
);
1592 if (unlikely(!sk
)) {
1593 pr_err("can't find conn. for hwtid %u.\n", hwtid
);
1596 skb_dst_set(skb
, NULL
);
1597 process_cpl_msg(chtls_recv_pdu
, sk
, skb
);
1601 static void chtls_set_hdrlen(struct sk_buff
*skb
, unsigned int nlen
)
1603 struct tlsrx_cmp_hdr
*tls_cmp_hdr
= cplhdr(skb
);
1605 skb
->hdr_len
= ntohs((__force __be16
)tls_cmp_hdr
->length
);
1606 tls_cmp_hdr
->length
= ntohs((__force __be16
)nlen
);
1609 static void chtls_rx_hdr(struct sock
*sk
, struct sk_buff
*skb
)
1611 struct tlsrx_cmp_hdr
*tls_hdr_pkt
;
1612 struct cpl_rx_tls_cmp
*cmp_cpl
;
1613 struct sk_buff
*skb_rec
;
1614 struct chtls_sock
*csk
;
1615 struct chtls_hws
*tlsk
;
1616 struct tcp_sock
*tp
;
1618 cmp_cpl
= cplhdr(skb
);
1619 csk
= rcu_dereference_sk_user_data(sk
);
1620 tlsk
= &csk
->tlshws
;
1623 ULP_SKB_CB(skb
)->seq
= ntohl(cmp_cpl
->seq
);
1624 ULP_SKB_CB(skb
)->flags
= 0;
1626 skb_reset_transport_header(skb
);
1627 __skb_pull(skb
, sizeof(*cmp_cpl
));
1628 tls_hdr_pkt
= (struct tlsrx_cmp_hdr
*)skb
->data
;
1629 if (tls_hdr_pkt
->res_to_mac_error
& TLSRX_HDR_PKT_ERROR_M
)
1630 tls_hdr_pkt
->type
= CONTENT_TYPE_ERROR
;
1632 __skb_trim(skb
, TLS_HEADER_LENGTH
);
1635 CPL_RX_TLS_CMP_PDULENGTH_G(ntohl(cmp_cpl
->pdulength_length
));
1637 ULP_SKB_CB(skb
)->flags
|= ULPCB_FLAG_TLS_HDR
;
1638 skb_rec
= __skb_dequeue(&tlsk
->sk_recv_queue
);
1640 __skb_queue_tail(&sk
->sk_receive_queue
, skb
);
1642 chtls_set_hdrlen(skb
, tlsk
->pldlen
);
1644 __skb_queue_tail(&sk
->sk_receive_queue
, skb
);
1645 __skb_queue_tail(&sk
->sk_receive_queue
, skb_rec
);
1648 if (!sock_flag(sk
, SOCK_DEAD
)) {
1649 check_sk_callbacks(csk
);
1650 sk
->sk_data_ready(sk
);
1654 static int chtls_rx_cmp(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
1656 struct cpl_rx_tls_cmp
*req
= cplhdr(skb
);
1657 unsigned int hwtid
= GET_TID(req
);
1660 sk
= lookup_tid(cdev
->tids
, hwtid
);
1661 if (unlikely(!sk
)) {
1662 pr_err("can't find conn. for hwtid %u.\n", hwtid
);
1665 skb_dst_set(skb
, NULL
);
1666 process_cpl_msg(chtls_rx_hdr
, sk
, skb
);
1671 static void chtls_timewait(struct sock
*sk
)
1673 struct tcp_sock
*tp
= tcp_sk(sk
);
1676 tp
->rx_opt
.ts_recent_stamp
= ktime_get_seconds();
1678 tcp_time_wait(sk
, TCP_TIME_WAIT
, 0);
1681 static void chtls_peer_close(struct sock
*sk
, struct sk_buff
*skb
)
1683 struct chtls_sock
*csk
= rcu_dereference_sk_user_data(sk
);
1685 sk
->sk_shutdown
|= RCV_SHUTDOWN
;
1686 sock_set_flag(sk
, SOCK_DONE
);
1688 switch (sk
->sk_state
) {
1690 case TCP_ESTABLISHED
:
1691 tcp_set_state(sk
, TCP_CLOSE_WAIT
);
1694 tcp_set_state(sk
, TCP_CLOSING
);
1697 chtls_release_resources(sk
);
1698 if (csk_flag_nochk(csk
, CSK_ABORT_RPL_PENDING
))
1699 chtls_conn_done(sk
);
1704 pr_info("cpl_peer_close in bad state %d\n", sk
->sk_state
);
1707 if (!sock_flag(sk
, SOCK_DEAD
)) {
1708 sk
->sk_state_change(sk
);
1709 /* Do not send POLL_HUP for half duplex close. */
1711 if ((sk
->sk_shutdown
& SEND_SHUTDOWN
) ||
1712 sk
->sk_state
== TCP_CLOSE
)
1713 sk_wake_async(sk
, SOCK_WAKE_WAITD
, POLL_HUP
);
1715 sk_wake_async(sk
, SOCK_WAKE_WAITD
, POLL_IN
);
1719 static void chtls_close_con_rpl(struct sock
*sk
, struct sk_buff
*skb
)
1721 struct cpl_close_con_rpl
*rpl
= cplhdr(skb
) + RSS_HDR
;
1722 struct chtls_sock
*csk
;
1723 struct tcp_sock
*tp
;
1725 csk
= rcu_dereference_sk_user_data(sk
);
1728 tp
->snd_una
= ntohl(rpl
->snd_nxt
) - 1; /* exclude FIN */
1730 switch (sk
->sk_state
) {
1732 chtls_release_resources(sk
);
1733 if (csk_flag_nochk(csk
, CSK_ABORT_RPL_PENDING
))
1734 chtls_conn_done(sk
);
1739 chtls_release_resources(sk
);
1740 chtls_conn_done(sk
);
1743 tcp_set_state(sk
, TCP_FIN_WAIT2
);
1744 sk
->sk_shutdown
|= SEND_SHUTDOWN
;
1746 if (!sock_flag(sk
, SOCK_DEAD
))
1747 sk
->sk_state_change(sk
);
1748 else if (tcp_sk(sk
)->linger2
< 0 &&
1749 !csk_flag_nochk(csk
, CSK_ABORT_SHUTDOWN
))
1750 chtls_abort_conn(sk
, skb
);
1753 pr_info("close_con_rpl in bad state %d\n", sk
->sk_state
);
1758 static struct sk_buff
*get_cpl_skb(struct sk_buff
*skb
,
1759 size_t len
, gfp_t gfp
)
1761 if (likely(!skb_is_nonlinear(skb
) && !skb_cloned(skb
))) {
1762 WARN_ONCE(skb
->len
< len
, "skb alloc error");
1763 __skb_trim(skb
, len
);
1766 skb
= alloc_skb(len
, gfp
);
1768 __skb_put(skb
, len
);
1773 static void set_abort_rpl_wr(struct sk_buff
*skb
, unsigned int tid
,
1776 struct cpl_abort_rpl
*rpl
= cplhdr(skb
);
1778 INIT_TP_WR_CPL(rpl
, CPL_ABORT_RPL
, tid
);
1782 static void send_defer_abort_rpl(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
1784 struct cpl_abort_req_rss
*req
= cplhdr(skb
);
1785 struct sk_buff
*reply_skb
;
1787 reply_skb
= alloc_skb(sizeof(struct cpl_abort_rpl
),
1788 GFP_KERNEL
| __GFP_NOFAIL
);
1789 __skb_put(reply_skb
, sizeof(struct cpl_abort_rpl
));
1790 set_abort_rpl_wr(reply_skb
, GET_TID(req
),
1791 (req
->status
& CPL_ABORT_NO_RST
));
1792 set_wr_txq(reply_skb
, CPL_PRIORITY_DATA
, req
->status
>> 1);
1793 cxgb4_ofld_send(cdev
->lldi
->ports
[0], reply_skb
);
1797 static void send_abort_rpl(struct sock
*sk
, struct sk_buff
*skb
,
1798 struct chtls_dev
*cdev
, int status
, int queue
)
1800 struct cpl_abort_req_rss
*req
= cplhdr(skb
);
1801 struct sk_buff
*reply_skb
;
1802 struct chtls_sock
*csk
;
1804 csk
= rcu_dereference_sk_user_data(sk
);
1806 reply_skb
= alloc_skb(sizeof(struct cpl_abort_rpl
),
1810 req
->status
= (queue
<< 1);
1811 send_defer_abort_rpl(cdev
, skb
);
1815 set_abort_rpl_wr(reply_skb
, GET_TID(req
), status
);
1818 set_wr_txq(reply_skb
, CPL_PRIORITY_DATA
, queue
);
1819 if (csk_conn_inline(csk
)) {
1820 struct l2t_entry
*e
= csk
->l2t_entry
;
1822 if (e
&& sk
->sk_state
!= TCP_SYN_RECV
) {
1823 cxgb4_l2t_send(csk
->egress_dev
, reply_skb
, e
);
1827 cxgb4_ofld_send(cdev
->lldi
->ports
[0], reply_skb
);
1831 * Add an skb to the deferred skb queue for processing from process context.
1833 static void t4_defer_reply(struct sk_buff
*skb
, struct chtls_dev
*cdev
,
1834 defer_handler_t handler
)
1836 DEFERRED_SKB_CB(skb
)->handler
= handler
;
1837 spin_lock_bh(&cdev
->deferq
.lock
);
1838 __skb_queue_tail(&cdev
->deferq
, skb
);
1839 if (skb_queue_len(&cdev
->deferq
) == 1)
1840 schedule_work(&cdev
->deferq_task
);
1841 spin_unlock_bh(&cdev
->deferq
.lock
);
1844 static void chtls_send_abort_rpl(struct sock
*sk
, struct sk_buff
*skb
,
1845 struct chtls_dev
*cdev
,
1846 int status
, int queue
)
1848 struct cpl_abort_req_rss
*req
= cplhdr(skb
) + RSS_HDR
;
1849 struct sk_buff
*reply_skb
;
1850 struct chtls_sock
*csk
;
1853 csk
= rcu_dereference_sk_user_data(sk
);
1856 reply_skb
= get_cpl_skb(skb
, sizeof(struct cpl_abort_rpl
), gfp_any());
1858 req
->status
= (queue
<< 1) | status
;
1859 t4_defer_reply(skb
, cdev
, send_defer_abort_rpl
);
1863 set_abort_rpl_wr(reply_skb
, tid
, status
);
1864 set_wr_txq(reply_skb
, CPL_PRIORITY_DATA
, queue
);
1865 if (csk_conn_inline(csk
)) {
1866 struct l2t_entry
*e
= csk
->l2t_entry
;
1868 if (e
&& sk
->sk_state
!= TCP_SYN_RECV
) {
1869 cxgb4_l2t_send(csk
->egress_dev
, reply_skb
, e
);
1873 cxgb4_ofld_send(cdev
->lldi
->ports
[0], reply_skb
);
1878 * This is run from a listener's backlog to abort a child connection in
1879 * SYN_RCV state (i.e., one on the listener's SYN queue).
1881 static void bl_abort_syn_rcv(struct sock
*lsk
, struct sk_buff
*skb
)
1883 struct chtls_sock
*csk
;
1888 csk
= rcu_dereference_sk_user_data(child
);
1889 queue
= csk
->txq_idx
;
1892 do_abort_syn_rcv(child
, lsk
);
1893 send_abort_rpl(child
, skb
, BLOG_SKB_CB(skb
)->cdev
,
1894 CPL_ABORT_NO_RST
, queue
);
1897 static int abort_syn_rcv(struct sock
*sk
, struct sk_buff
*skb
)
1899 const struct request_sock
*oreq
;
1900 struct listen_ctx
*listen_ctx
;
1901 struct chtls_sock
*csk
;
1902 struct chtls_dev
*cdev
;
1906 csk
= sk
->sk_user_data
;
1907 oreq
= csk
->passive_reap_next
;
1913 ctx
= lookup_stid(cdev
->tids
, oreq
->ts_recent
);
1917 listen_ctx
= (struct listen_ctx
*)ctx
;
1918 psk
= listen_ctx
->lsk
;
1921 if (!sock_owned_by_user(psk
)) {
1922 int queue
= csk
->txq_idx
;
1924 do_abort_syn_rcv(sk
, psk
);
1925 send_abort_rpl(sk
, skb
, cdev
, CPL_ABORT_NO_RST
, queue
);
1928 BLOG_SKB_CB(skb
)->backlog_rcv
= bl_abort_syn_rcv
;
1929 __sk_add_backlog(psk
, skb
);
1931 bh_unlock_sock(psk
);
1935 static void chtls_abort_req_rss(struct sock
*sk
, struct sk_buff
*skb
)
1937 const struct cpl_abort_req_rss
*req
= cplhdr(skb
) + RSS_HDR
;
1938 struct chtls_sock
*csk
= sk
->sk_user_data
;
1939 int rst_status
= CPL_ABORT_NO_RST
;
1940 int queue
= csk
->txq_idx
;
1942 if (is_neg_adv(req
->status
)) {
1943 if (sk
->sk_state
== TCP_SYN_RECV
)
1944 chtls_set_tcb_tflag(sk
, 0, 0);
1950 csk_reset_flag(csk
, CSK_ABORT_REQ_RCVD
);
1952 if (!csk_flag_nochk(csk
, CSK_ABORT_SHUTDOWN
) &&
1953 !csk_flag_nochk(csk
, CSK_TX_DATA_SENT
)) {
1954 struct tcp_sock
*tp
= tcp_sk(sk
);
1956 if (send_tx_flowc_wr(sk
, 0, tp
->snd_nxt
, tp
->rcv_nxt
) < 0)
1957 WARN_ONCE(1, "send_tx_flowc error");
1958 csk_set_flag(csk
, CSK_TX_DATA_SENT
);
1961 csk_set_flag(csk
, CSK_ABORT_SHUTDOWN
);
1963 if (!csk_flag_nochk(csk
, CSK_ABORT_RPL_PENDING
)) {
1964 sk
->sk_err
= ETIMEDOUT
;
1966 if (!sock_flag(sk
, SOCK_DEAD
))
1967 sk
->sk_error_report(sk
);
1969 if (sk
->sk_state
== TCP_SYN_RECV
&& !abort_syn_rcv(sk
, skb
))
1972 chtls_release_resources(sk
);
1973 chtls_conn_done(sk
);
1976 chtls_send_abort_rpl(sk
, skb
, csk
->cdev
, rst_status
, queue
);
1979 static void chtls_abort_rpl_rss(struct sock
*sk
, struct sk_buff
*skb
)
1981 struct cpl_abort_rpl_rss
*rpl
= cplhdr(skb
) + RSS_HDR
;
1982 struct chtls_sock
*csk
;
1983 struct chtls_dev
*cdev
;
1985 csk
= rcu_dereference_sk_user_data(sk
);
1988 if (csk_flag_nochk(csk
, CSK_ABORT_RPL_PENDING
)) {
1989 csk_reset_flag(csk
, CSK_ABORT_RPL_PENDING
);
1990 if (!csk_flag_nochk(csk
, CSK_ABORT_REQ_RCVD
)) {
1991 if (sk
->sk_state
== TCP_SYN_SENT
) {
1992 cxgb4_remove_tid(cdev
->tids
,
1998 chtls_release_resources(sk
);
1999 chtls_conn_done(sk
);
2005 static int chtls_conn_cpl(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
2007 struct cpl_peer_close
*req
= cplhdr(skb
) + RSS_HDR
;
2008 void (*fn
)(struct sock
*sk
, struct sk_buff
*skb
);
2009 unsigned int hwtid
= GET_TID(req
);
2013 opcode
= ((const struct rss_header
*)cplhdr(skb
))->opcode
;
2015 sk
= lookup_tid(cdev
->tids
, hwtid
);
2020 case CPL_PEER_CLOSE
:
2021 fn
= chtls_peer_close
;
2023 case CPL_CLOSE_CON_RPL
:
2024 fn
= chtls_close_con_rpl
;
2026 case CPL_ABORT_REQ_RSS
:
2027 fn
= chtls_abort_req_rss
;
2029 case CPL_ABORT_RPL_RSS
:
2030 fn
= chtls_abort_rpl_rss
;
2036 process_cpl_msg(fn
, sk
, skb
);
2044 static struct sk_buff
*dequeue_wr(struct sock
*sk
)
2046 struct chtls_sock
*csk
= rcu_dereference_sk_user_data(sk
);
2047 struct sk_buff
*skb
= csk
->wr_skb_head
;
2050 /* Don't bother clearing the tail */
2051 csk
->wr_skb_head
= WR_SKB_CB(skb
)->next_wr
;
2052 WR_SKB_CB(skb
)->next_wr
= NULL
;
2057 static void chtls_rx_ack(struct sock
*sk
, struct sk_buff
*skb
)
2059 struct cpl_fw4_ack
*hdr
= cplhdr(skb
) + RSS_HDR
;
2060 struct chtls_sock
*csk
= sk
->sk_user_data
;
2061 struct tcp_sock
*tp
= tcp_sk(sk
);
2062 u32 credits
= hdr
->credits
;
2065 snd_una
= ntohl(hdr
->snd_una
);
2066 csk
->wr_credits
+= credits
;
2068 if (csk
->wr_unacked
> csk
->wr_max_credits
- csk
->wr_credits
)
2069 csk
->wr_unacked
= csk
->wr_max_credits
- csk
->wr_credits
;
2072 struct sk_buff
*pskb
= csk
->wr_skb_head
;
2075 if (unlikely(!pskb
)) {
2076 if (csk
->wr_nondata
)
2077 csk
->wr_nondata
-= credits
;
2080 csum
= (__force u32
)pskb
->csum
;
2081 if (unlikely(credits
< csum
)) {
2082 pskb
->csum
= (__force __wsum
)(csum
- credits
);
2089 if (hdr
->seq_vld
& CPL_FW4_ACK_FLAGS_SEQVAL
) {
2090 if (unlikely(before(snd_una
, tp
->snd_una
))) {
2095 if (tp
->snd_una
!= snd_una
) {
2096 tp
->snd_una
= snd_una
;
2097 tp
->rcv_tstamp
= tcp_time_stamp(tp
);
2098 if (tp
->snd_una
== tp
->snd_nxt
&&
2099 !csk_flag_nochk(csk
, CSK_TX_FAILOVER
))
2100 csk_reset_flag(csk
, CSK_TX_WAIT_IDLE
);
2104 if (hdr
->seq_vld
& CPL_FW4_ACK_FLAGS_CH
) {
2105 unsigned int fclen16
= roundup(failover_flowc_wr_len
, 16);
2107 csk
->wr_credits
-= fclen16
;
2108 csk_reset_flag(csk
, CSK_TX_WAIT_IDLE
);
2109 csk_reset_flag(csk
, CSK_TX_FAILOVER
);
2111 if (skb_queue_len(&csk
->txq
) && chtls_push_frames(csk
, 0))
2112 sk
->sk_write_space(sk
);
2117 static int chtls_wr_ack(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
2119 struct cpl_fw4_ack
*rpl
= cplhdr(skb
) + RSS_HDR
;
2120 unsigned int hwtid
= GET_TID(rpl
);
2123 sk
= lookup_tid(cdev
->tids
, hwtid
);
2124 if (unlikely(!sk
)) {
2125 pr_err("can't find conn. for hwtid %u.\n", hwtid
);
2128 process_cpl_msg(chtls_rx_ack
, sk
, skb
);
2133 chtls_handler_func chtls_handlers
[NUM_CPL_CMDS
] = {
2134 [CPL_PASS_OPEN_RPL
] = chtls_pass_open_rpl
,
2135 [CPL_CLOSE_LISTSRV_RPL
] = chtls_close_listsrv_rpl
,
2136 [CPL_PASS_ACCEPT_REQ
] = chtls_pass_accept_req
,
2137 [CPL_PASS_ESTABLISH
] = chtls_pass_establish
,
2138 [CPL_RX_DATA
] = chtls_rx_data
,
2139 [CPL_TLS_DATA
] = chtls_rx_pdu
,
2140 [CPL_RX_TLS_CMP
] = chtls_rx_cmp
,
2141 [CPL_PEER_CLOSE
] = chtls_conn_cpl
,
2142 [CPL_CLOSE_CON_RPL
] = chtls_conn_cpl
,
2143 [CPL_ABORT_REQ_RSS
] = chtls_conn_cpl
,
2144 [CPL_ABORT_RPL_RSS
] = chtls_conn_cpl
,
2145 [CPL_FW4_ACK
] = chtls_wr_ack
,