2 * Copyright (c) 2018 Chelsio Communications, Inc.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
8 * Written by: Atul Gupta (atul.gupta@chelsio.com)
11 #include <linux/module.h>
12 #include <linux/list.h>
13 #include <linux/workqueue.h>
14 #include <linux/skbuff.h>
15 #include <linux/timer.h>
16 #include <linux/notifier.h>
17 #include <linux/inetdevice.h>
19 #include <linux/tcp.h>
20 #include <linux/sched/signal.h>
21 #include <linux/kallsyms.h>
22 #include <linux/kprobes.h>
23 #include <linux/if_vlan.h>
31 * State transitions and actions for close. Note that if we are in SYN_SENT
32 * we remain in that state as we cannot control a connection while it's in
33 * SYN_SENT; such connections are allowed to establish and are then aborted.
35 static unsigned char new_state
[16] = {
36 /* current state: new state: action: */
37 /* (Invalid) */ TCP_CLOSE
,
38 /* TCP_ESTABLISHED */ TCP_FIN_WAIT1
| TCP_ACTION_FIN
,
39 /* TCP_SYN_SENT */ TCP_SYN_SENT
,
40 /* TCP_SYN_RECV */ TCP_FIN_WAIT1
| TCP_ACTION_FIN
,
41 /* TCP_FIN_WAIT1 */ TCP_FIN_WAIT1
,
42 /* TCP_FIN_WAIT2 */ TCP_FIN_WAIT2
,
43 /* TCP_TIME_WAIT */ TCP_CLOSE
,
44 /* TCP_CLOSE */ TCP_CLOSE
,
45 /* TCP_CLOSE_WAIT */ TCP_LAST_ACK
| TCP_ACTION_FIN
,
46 /* TCP_LAST_ACK */ TCP_LAST_ACK
,
47 /* TCP_LISTEN */ TCP_CLOSE
,
48 /* TCP_CLOSING */ TCP_CLOSING
,
51 static struct chtls_sock
*chtls_sock_create(struct chtls_dev
*cdev
)
53 struct chtls_sock
*csk
= kzalloc(sizeof(*csk
), GFP_ATOMIC
);
58 csk
->txdata_skb_cache
= alloc_skb(TXDATA_SKB_LEN
, GFP_ATOMIC
);
59 if (!csk
->txdata_skb_cache
) {
64 kref_init(&csk
->kref
);
66 skb_queue_head_init(&csk
->txq
);
67 csk
->wr_skb_head
= NULL
;
68 csk
->wr_skb_tail
= NULL
;
71 csk
->tlshws
.txkey
= -1;
72 csk
->tlshws
.rxkey
= -1;
73 csk
->tlshws
.mfs
= TLS_MFS
;
74 skb_queue_head_init(&csk
->tlshws
.sk_recv_queue
);
78 static void chtls_sock_release(struct kref
*ref
)
80 struct chtls_sock
*csk
=
81 container_of(ref
, struct chtls_sock
, kref
);
86 static struct net_device
*chtls_ipv4_netdev(struct chtls_dev
*cdev
,
89 struct net_device
*ndev
= cdev
->ports
[0];
91 if (likely(!inet_sk(sk
)->inet_rcv_saddr
))
94 ndev
= ip_dev_find(&init_net
, inet_sk(sk
)->inet_rcv_saddr
);
98 if (is_vlan_dev(ndev
))
99 return vlan_dev_real_dev(ndev
);
103 static void assign_rxopt(struct sock
*sk
, unsigned int opt
)
105 const struct chtls_dev
*cdev
;
106 struct chtls_sock
*csk
;
109 csk
= rcu_dereference_sk_user_data(sk
);
113 tp
->tcp_header_len
= sizeof(struct tcphdr
);
114 tp
->rx_opt
.mss_clamp
= cdev
->mtus
[TCPOPT_MSS_G(opt
)] - 40;
115 tp
->mss_cache
= tp
->rx_opt
.mss_clamp
;
116 tp
->rx_opt
.tstamp_ok
= TCPOPT_TSTAMP_G(opt
);
117 tp
->rx_opt
.snd_wscale
= TCPOPT_SACK_G(opt
);
118 tp
->rx_opt
.wscale_ok
= TCPOPT_WSCALE_OK_G(opt
);
119 SND_WSCALE(tp
) = TCPOPT_SND_WSCALE_G(opt
);
120 if (!tp
->rx_opt
.wscale_ok
)
121 tp
->rx_opt
.rcv_wscale
= 0;
122 if (tp
->rx_opt
.tstamp_ok
) {
123 tp
->tcp_header_len
+= TCPOLEN_TSTAMP_ALIGNED
;
124 tp
->rx_opt
.mss_clamp
-= TCPOLEN_TSTAMP_ALIGNED
;
125 } else if (csk
->opt2
& TSTAMPS_EN_F
) {
126 csk
->opt2
&= ~TSTAMPS_EN_F
;
127 csk
->mtu_idx
= TCPOPT_MSS_G(opt
);
131 static void chtls_purge_receive_queue(struct sock
*sk
)
135 while ((skb
= __skb_dequeue(&sk
->sk_receive_queue
)) != NULL
) {
136 skb_dst_set(skb
, (void *)NULL
);
141 static void chtls_purge_write_queue(struct sock
*sk
)
143 struct chtls_sock
*csk
= rcu_dereference_sk_user_data(sk
);
146 while ((skb
= __skb_dequeue(&csk
->txq
))) {
147 sk
->sk_wmem_queued
-= skb
->truesize
;
152 static void chtls_purge_recv_queue(struct sock
*sk
)
154 struct chtls_sock
*csk
= rcu_dereference_sk_user_data(sk
);
155 struct chtls_hws
*tlsk
= &csk
->tlshws
;
158 while ((skb
= __skb_dequeue(&tlsk
->sk_recv_queue
)) != NULL
) {
159 skb_dst_set(skb
, NULL
);
164 static void abort_arp_failure(void *handle
, struct sk_buff
*skb
)
166 struct cpl_abort_req
*req
= cplhdr(skb
);
167 struct chtls_dev
*cdev
;
169 cdev
= (struct chtls_dev
*)handle
;
170 req
->cmd
= CPL_ABORT_NO_RST
;
171 cxgb4_ofld_send(cdev
->lldi
->ports
[0], skb
);
174 static struct sk_buff
*alloc_ctrl_skb(struct sk_buff
*skb
, int len
)
176 if (likely(skb
&& !skb_shared(skb
) && !skb_cloned(skb
))) {
178 refcount_add(2, &skb
->users
);
180 skb
= alloc_skb(len
, GFP_KERNEL
| __GFP_NOFAIL
);
185 static void chtls_send_abort(struct sock
*sk
, int mode
, struct sk_buff
*skb
)
187 struct cpl_abort_req
*req
;
188 struct chtls_sock
*csk
;
191 csk
= rcu_dereference_sk_user_data(sk
);
195 skb
= alloc_ctrl_skb(csk
->txdata_skb_cache
, sizeof(*req
));
197 req
= (struct cpl_abort_req
*)skb_put(skb
, sizeof(*req
));
198 INIT_TP_WR_CPL(req
, CPL_ABORT_REQ
, csk
->tid
);
199 skb_set_queue_mapping(skb
, (csk
->txq_idx
<< 1) | CPL_PRIORITY_DATA
);
200 req
->rsvd0
= htonl(tp
->snd_nxt
);
201 req
->rsvd1
= !csk_flag_nochk(csk
, CSK_TX_DATA_SENT
);
203 t4_set_arp_err_handler(skb
, csk
->cdev
, abort_arp_failure
);
204 send_or_defer(sk
, tp
, skb
, mode
== CPL_ABORT_SEND_RST
);
207 static void chtls_send_reset(struct sock
*sk
, int mode
, struct sk_buff
*skb
)
209 struct chtls_sock
*csk
= rcu_dereference_sk_user_data(sk
);
211 if (unlikely(csk_flag_nochk(csk
, CSK_ABORT_SHUTDOWN
) ||
213 if (sk
->sk_state
== TCP_SYN_RECV
)
214 csk_set_flag(csk
, CSK_RST_ABORTED
);
218 if (!csk_flag_nochk(csk
, CSK_TX_DATA_SENT
)) {
219 struct tcp_sock
*tp
= tcp_sk(sk
);
221 if (send_tx_flowc_wr(sk
, 0, tp
->snd_nxt
, tp
->rcv_nxt
) < 0)
222 WARN_ONCE(1, "send tx flowc error");
223 csk_set_flag(csk
, CSK_TX_DATA_SENT
);
226 csk_set_flag(csk
, CSK_ABORT_RPL_PENDING
);
227 chtls_purge_write_queue(sk
);
229 csk_set_flag(csk
, CSK_ABORT_SHUTDOWN
);
230 if (sk
->sk_state
!= TCP_SYN_RECV
)
231 chtls_send_abort(sk
, mode
, skb
);
241 static void release_tcp_port(struct sock
*sk
)
243 if (inet_csk(sk
)->icsk_bind_hash
)
247 static void tcp_uncork(struct sock
*sk
)
249 struct tcp_sock
*tp
= tcp_sk(sk
);
251 if (tp
->nonagle
& TCP_NAGLE_CORK
) {
252 tp
->nonagle
&= ~TCP_NAGLE_CORK
;
253 chtls_tcp_push(sk
, 0);
257 static void chtls_close_conn(struct sock
*sk
)
259 struct cpl_close_con_req
*req
;
260 struct chtls_sock
*csk
;
265 len
= roundup(sizeof(struct cpl_close_con_req
), 16);
266 csk
= rcu_dereference_sk_user_data(sk
);
269 skb
= alloc_skb(len
, GFP_KERNEL
| __GFP_NOFAIL
);
270 req
= (struct cpl_close_con_req
*)__skb_put(skb
, len
);
272 req
->wr
.wr_hi
= htonl(FW_WR_OP_V(FW_TP_WR
) |
273 FW_WR_IMMDLEN_V(sizeof(*req
) -
275 req
->wr
.wr_mid
= htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req
), 16)) |
276 FW_WR_FLOWID_V(tid
));
278 OPCODE_TID(req
) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ
, tid
));
281 skb_entail(sk
, skb
, ULPCB_FLAG_NO_HDR
| ULPCB_FLAG_NO_APPEND
);
282 if (sk
->sk_state
!= TCP_SYN_SENT
)
283 chtls_push_frames(csk
, 1);
287 * Perform a state transition during close and return the actions indicated
288 * for the transition. Do not make this function inline, the main reason
289 * it exists at all is to avoid multiple inlining of tcp_set_state.
291 static int make_close_transition(struct sock
*sk
)
293 int next
= (int)new_state
[sk
->sk_state
];
295 tcp_set_state(sk
, next
& TCP_STATE_MASK
);
296 return next
& TCP_ACTION_FIN
;
299 void chtls_close(struct sock
*sk
, long timeout
)
301 int data_lost
, prev_state
;
302 struct chtls_sock
*csk
;
304 csk
= rcu_dereference_sk_user_data(sk
);
307 sk
->sk_shutdown
|= SHUTDOWN_MASK
;
309 data_lost
= skb_queue_len(&sk
->sk_receive_queue
);
310 data_lost
|= skb_queue_len(&csk
->tlshws
.sk_recv_queue
);
311 chtls_purge_recv_queue(sk
);
312 chtls_purge_receive_queue(sk
);
314 if (sk
->sk_state
== TCP_CLOSE
) {
316 } else if (data_lost
|| sk
->sk_state
== TCP_SYN_SENT
) {
317 chtls_send_reset(sk
, CPL_ABORT_SEND_RST
, NULL
);
318 release_tcp_port(sk
);
320 } else if (sock_flag(sk
, SOCK_LINGER
) && !sk
->sk_lingertime
) {
321 sk
->sk_prot
->disconnect(sk
, 0);
322 } else if (make_close_transition(sk
)) {
323 chtls_close_conn(sk
);
327 sk_stream_wait_close(sk
, timeout
);
330 prev_state
= sk
->sk_state
;
339 if (prev_state
!= TCP_CLOSE
&& sk
->sk_state
== TCP_CLOSE
)
342 if (sk
->sk_state
== TCP_FIN_WAIT2
&& tcp_sk(sk
)->linger2
< 0 &&
343 !csk_flag(sk
, CSK_ABORT_SHUTDOWN
)) {
346 skb
= alloc_skb(sizeof(struct cpl_abort_req
), GFP_ATOMIC
);
348 chtls_send_reset(sk
, CPL_ABORT_SEND_RST
, skb
);
351 if (sk
->sk_state
== TCP_CLOSE
)
352 inet_csk_destroy_sock(sk
);
361 * Wait until a socket enters on of the given states.
363 static int wait_for_states(struct sock
*sk
, unsigned int states
)
365 DECLARE_WAITQUEUE(wait
, current
);
366 struct socket_wq _sk_wq
;
373 * We want this to work even when there's no associated struct socket.
374 * In that case we provide a temporary wait_queue_head_t.
377 init_waitqueue_head(&_sk_wq
.wait
);
378 _sk_wq
.fasync_list
= NULL
;
379 init_rcu_head_on_stack(&_sk_wq
.rcu
);
380 RCU_INIT_POINTER(sk
->sk_wq
, &_sk_wq
);
383 add_wait_queue(sk_sleep(sk
), &wait
);
384 while (!sk_in_state(sk
, states
)) {
385 if (!current_timeo
) {
389 if (signal_pending(current
)) {
390 err
= sock_intr_errno(current_timeo
);
393 set_current_state(TASK_UNINTERRUPTIBLE
);
395 if (!sk_in_state(sk
, states
))
396 current_timeo
= schedule_timeout(current_timeo
);
397 __set_current_state(TASK_RUNNING
);
400 remove_wait_queue(sk_sleep(sk
), &wait
);
402 if (rcu_dereference(sk
->sk_wq
) == &_sk_wq
)
407 int chtls_disconnect(struct sock
*sk
, int flags
)
409 struct chtls_sock
*csk
;
414 csk
= rcu_dereference_sk_user_data(sk
);
415 chtls_purge_recv_queue(sk
);
416 chtls_purge_receive_queue(sk
);
417 chtls_purge_write_queue(sk
);
419 if (sk
->sk_state
!= TCP_CLOSE
) {
420 sk
->sk_err
= ECONNRESET
;
421 chtls_send_reset(sk
, CPL_ABORT_SEND_RST
, NULL
);
422 err
= wait_for_states(sk
, TCPF_CLOSE
);
426 chtls_purge_recv_queue(sk
);
427 chtls_purge_receive_queue(sk
);
428 tp
->max_window
= 0xFFFF << (tp
->rx_opt
.snd_wscale
);
429 return tcp_disconnect(sk
, flags
);
432 #define SHUTDOWN_ELIGIBLE_STATE (TCPF_ESTABLISHED | \
433 TCPF_SYN_RECV | TCPF_CLOSE_WAIT)
434 void chtls_shutdown(struct sock
*sk
, int how
)
436 if ((how
& SEND_SHUTDOWN
) &&
437 sk_in_state(sk
, SHUTDOWN_ELIGIBLE_STATE
) &&
438 make_close_transition(sk
))
439 chtls_close_conn(sk
);
442 void chtls_destroy_sock(struct sock
*sk
)
444 struct chtls_sock
*csk
;
446 csk
= rcu_dereference_sk_user_data(sk
);
447 chtls_purge_recv_queue(sk
);
448 csk
->ulp_mode
= ULP_MODE_NONE
;
449 chtls_purge_write_queue(sk
);
451 kref_put(&csk
->kref
, chtls_sock_release
);
452 sk
->sk_prot
= &tcp_prot
;
453 sk
->sk_prot
->destroy(sk
);
456 static void reset_listen_child(struct sock
*child
)
458 struct chtls_sock
*csk
= rcu_dereference_sk_user_data(child
);
461 skb
= alloc_ctrl_skb(csk
->txdata_skb_cache
,
462 sizeof(struct cpl_abort_req
));
464 chtls_send_reset(child
, CPL_ABORT_SEND_RST
, skb
);
466 INC_ORPHAN_COUNT(child
);
467 if (child
->sk_state
== TCP_CLOSE
)
468 inet_csk_destroy_sock(child
);
471 static void chtls_disconnect_acceptq(struct sock
*listen_sk
)
473 struct request_sock
**pprev
;
475 pprev
= ACCEPT_QUEUE(listen_sk
);
477 struct request_sock
*req
= *pprev
;
479 if (req
->rsk_ops
== &chtls_rsk_ops
) {
480 struct sock
*child
= req
->sk
;
482 *pprev
= req
->dl_next
;
483 sk_acceptq_removed(listen_sk
);
488 release_tcp_port(child
);
489 reset_listen_child(child
);
490 bh_unlock_sock(child
);
494 pprev
= &req
->dl_next
;
499 static int listen_hashfn(const struct sock
*sk
)
501 return ((unsigned long)sk
>> 10) & (LISTEN_INFO_HASH_SIZE
- 1);
504 static struct listen_info
*listen_hash_add(struct chtls_dev
*cdev
,
508 struct listen_info
*p
= kmalloc(sizeof(*p
), GFP_KERNEL
);
511 int key
= listen_hashfn(sk
);
515 spin_lock(&cdev
->listen_lock
);
516 p
->next
= cdev
->listen_hash_tab
[key
];
517 cdev
->listen_hash_tab
[key
] = p
;
518 spin_unlock(&cdev
->listen_lock
);
523 static int listen_hash_find(struct chtls_dev
*cdev
,
526 struct listen_info
*p
;
530 key
= listen_hashfn(sk
);
532 spin_lock(&cdev
->listen_lock
);
533 for (p
= cdev
->listen_hash_tab
[key
]; p
; p
= p
->next
)
538 spin_unlock(&cdev
->listen_lock
);
542 static int listen_hash_del(struct chtls_dev
*cdev
,
545 struct listen_info
*p
, **prev
;
549 key
= listen_hashfn(sk
);
550 prev
= &cdev
->listen_hash_tab
[key
];
552 spin_lock(&cdev
->listen_lock
);
553 for (p
= *prev
; p
; prev
= &p
->next
, p
= p
->next
)
560 spin_unlock(&cdev
->listen_lock
);
564 static void cleanup_syn_rcv_conn(struct sock
*child
, struct sock
*parent
)
566 struct request_sock
*req
;
567 struct chtls_sock
*csk
;
569 csk
= rcu_dereference_sk_user_data(child
);
570 req
= csk
->passive_reap_next
;
572 reqsk_queue_removed(&inet_csk(parent
)->icsk_accept_queue
, req
);
573 __skb_unlink((struct sk_buff
*)&csk
->synq
, &csk
->listen_ctx
->synq
);
574 chtls_reqsk_free(req
);
575 csk
->passive_reap_next
= NULL
;
578 static void chtls_reset_synq(struct listen_ctx
*listen_ctx
)
580 struct sock
*listen_sk
= listen_ctx
->lsk
;
582 while (!skb_queue_empty(&listen_ctx
->synq
)) {
583 struct chtls_sock
*csk
=
584 container_of((struct synq
*)__skb_dequeue
585 (&listen_ctx
->synq
), struct chtls_sock
, synq
);
586 struct sock
*child
= csk
->sk
;
588 cleanup_syn_rcv_conn(child
, listen_sk
);
592 release_tcp_port(child
);
593 reset_listen_child(child
);
594 bh_unlock_sock(child
);
600 int chtls_listen_start(struct chtls_dev
*cdev
, struct sock
*sk
)
602 struct net_device
*ndev
;
603 struct listen_ctx
*ctx
;
604 struct adapter
*adap
;
605 struct port_info
*pi
;
609 if (sk
->sk_family
!= PF_INET
)
613 ndev
= chtls_ipv4_netdev(cdev
, sk
);
618 pi
= netdev_priv(ndev
);
620 if (!(adap
->flags
& FULL_INIT_DONE
))
623 if (listen_hash_find(cdev
, sk
) >= 0) /* already have it */
626 ctx
= kmalloc(sizeof(*ctx
), GFP_KERNEL
);
630 __module_get(THIS_MODULE
);
633 ctx
->state
= T4_LISTEN_START_PENDING
;
634 skb_queue_head_init(&ctx
->synq
);
636 stid
= cxgb4_alloc_stid(cdev
->tids
, sk
->sk_family
, ctx
);
641 if (!listen_hash_add(cdev
, sk
, stid
))
644 ret
= cxgb4_create_server(ndev
, stid
,
645 inet_sk(sk
)->inet_rcv_saddr
,
646 inet_sk(sk
)->inet_sport
, 0,
647 cdev
->lldi
->rxq_ids
[0]);
649 ret
= net_xmit_errno(ret
);
654 listen_hash_del(cdev
, sk
);
656 cxgb4_free_stid(cdev
->tids
, stid
, sk
->sk_family
);
660 module_put(THIS_MODULE
);
664 void chtls_listen_stop(struct chtls_dev
*cdev
, struct sock
*sk
)
666 struct listen_ctx
*listen_ctx
;
669 stid
= listen_hash_del(cdev
, sk
);
673 listen_ctx
= (struct listen_ctx
*)lookup_stid(cdev
->tids
, stid
);
674 chtls_reset_synq(listen_ctx
);
676 cxgb4_remove_server(cdev
->lldi
->ports
[0], stid
,
677 cdev
->lldi
->rxq_ids
[0], 0);
678 chtls_disconnect_acceptq(sk
);
681 static int chtls_pass_open_rpl(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
683 struct cpl_pass_open_rpl
*rpl
= cplhdr(skb
) + RSS_HDR
;
684 unsigned int stid
= GET_TID(rpl
);
685 struct listen_ctx
*listen_ctx
;
687 listen_ctx
= (struct listen_ctx
*)lookup_stid(cdev
->tids
, stid
);
689 return CPL_RET_BUF_DONE
;
691 if (listen_ctx
->state
== T4_LISTEN_START_PENDING
) {
692 listen_ctx
->state
= T4_LISTEN_STARTED
;
693 return CPL_RET_BUF_DONE
;
696 if (rpl
->status
!= CPL_ERR_NONE
) {
697 pr_info("Unexpected PASS_OPEN_RPL status %u for STID %u\n",
699 return CPL_RET_BUF_DONE
;
701 cxgb4_free_stid(cdev
->tids
, stid
, listen_ctx
->lsk
->sk_family
);
702 sock_put(listen_ctx
->lsk
);
704 module_put(THIS_MODULE
);
709 static int chtls_close_listsrv_rpl(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
711 struct cpl_close_listsvr_rpl
*rpl
= cplhdr(skb
) + RSS_HDR
;
712 struct listen_ctx
*listen_ctx
;
717 data
= lookup_stid(cdev
->tids
, stid
);
718 listen_ctx
= (struct listen_ctx
*)data
;
720 if (rpl
->status
!= CPL_ERR_NONE
) {
721 pr_info("Unexpected CLOSE_LISTSRV_RPL status %u for STID %u\n",
723 return CPL_RET_BUF_DONE
;
726 cxgb4_free_stid(cdev
->tids
, stid
, listen_ctx
->lsk
->sk_family
);
727 sock_put(listen_ctx
->lsk
);
729 module_put(THIS_MODULE
);
734 static void chtls_purge_wr_queue(struct sock
*sk
)
738 while ((skb
= dequeue_wr(sk
)) != NULL
)
742 static void chtls_release_resources(struct sock
*sk
)
744 struct chtls_sock
*csk
= rcu_dereference_sk_user_data(sk
);
745 struct chtls_dev
*cdev
= csk
->cdev
;
746 unsigned int tid
= csk
->tid
;
747 struct tid_info
*tids
;
753 kfree_skb(csk
->txdata_skb_cache
);
754 csk
->txdata_skb_cache
= NULL
;
756 if (csk
->wr_credits
!= csk
->wr_max_credits
) {
757 chtls_purge_wr_queue(sk
);
758 chtls_reset_wr_list(csk
);
761 if (csk
->l2t_entry
) {
762 cxgb4_l2t_release(csk
->l2t_entry
);
763 csk
->l2t_entry
= NULL
;
766 cxgb4_remove_tid(tids
, csk
->port_id
, tid
, sk
->sk_family
);
770 static void chtls_conn_done(struct sock
*sk
)
772 if (sock_flag(sk
, SOCK_DEAD
))
773 chtls_purge_receive_queue(sk
);
774 sk_wakeup_sleepers(sk
, 0);
778 static void do_abort_syn_rcv(struct sock
*child
, struct sock
*parent
)
781 * If the server is still open we clean up the child connection,
782 * otherwise the server already did the clean up as it was purging
783 * its SYN queue and the skb was just sitting in its backlog.
785 if (likely(parent
->sk_state
== TCP_LISTEN
)) {
786 cleanup_syn_rcv_conn(child
, parent
);
787 /* Without the below call to sock_orphan,
788 * we leak the socket resource with syn_flood test
789 * as inet_csk_destroy_sock will not be called
790 * in tcp_done since SOCK_DEAD flag is not set.
791 * Kernel handles this differently where new socket is
792 * created only after 3 way handshake is done.
795 percpu_counter_inc((child
)->sk_prot
->orphan_count
);
796 chtls_release_resources(child
);
797 chtls_conn_done(child
);
799 if (csk_flag(child
, CSK_RST_ABORTED
)) {
800 chtls_release_resources(child
);
801 chtls_conn_done(child
);
806 static void pass_open_abort(struct sock
*child
, struct sock
*parent
,
809 do_abort_syn_rcv(child
, parent
);
813 static void bl_pass_open_abort(struct sock
*lsk
, struct sk_buff
*skb
)
815 pass_open_abort(skb
->sk
, lsk
, skb
);
818 static void chtls_pass_open_arp_failure(struct sock
*sk
,
821 const struct request_sock
*oreq
;
822 struct chtls_sock
*csk
;
823 struct chtls_dev
*cdev
;
827 csk
= rcu_dereference_sk_user_data(sk
);
831 * If the connection is being aborted due to the parent listening
832 * socket going away there's nothing to do, the ABORT_REQ will close
835 if (csk_flag(sk
, CSK_ABORT_RPL_PENDING
)) {
840 oreq
= csk
->passive_reap_next
;
841 data
= lookup_stid(cdev
->tids
, oreq
->ts_recent
);
842 parent
= ((struct listen_ctx
*)data
)->lsk
;
844 bh_lock_sock(parent
);
845 if (!sock_owned_by_user(parent
)) {
846 pass_open_abort(sk
, parent
, skb
);
848 BLOG_SKB_CB(skb
)->backlog_rcv
= bl_pass_open_abort
;
849 __sk_add_backlog(parent
, skb
);
851 bh_unlock_sock(parent
);
854 static void chtls_accept_rpl_arp_failure(void *handle
,
857 struct sock
*sk
= (struct sock
*)handle
;
860 process_cpl_msg(chtls_pass_open_arp_failure
, sk
, skb
);
864 static unsigned int chtls_select_mss(const struct chtls_sock
*csk
,
866 struct cpl_pass_accept_req
*req
)
868 struct chtls_dev
*cdev
;
869 struct dst_entry
*dst
;
870 unsigned int tcpoptsz
;
871 unsigned int iphdrsz
;
872 unsigned int mtu_idx
;
877 mss
= ntohs(req
->tcpopt
.mss
);
879 dst
= __sk_dst_get(sk
);
884 iphdrsz
= sizeof(struct iphdr
) + sizeof(struct tcphdr
);
885 if (req
->tcpopt
.tstamp
)
886 tcpoptsz
+= round_up(TCPOLEN_TIMESTAMP
, 4);
888 tp
->advmss
= dst_metric_advmss(dst
);
889 if (USER_MSS(tp
) && tp
->advmss
> USER_MSS(tp
))
890 tp
->advmss
= USER_MSS(tp
);
891 if (tp
->advmss
> pmtu
- iphdrsz
)
892 tp
->advmss
= pmtu
- iphdrsz
;
893 if (mss
&& tp
->advmss
> mss
)
896 tp
->advmss
= cxgb4_best_aligned_mtu(cdev
->lldi
->mtus
,
898 tp
->advmss
- tcpoptsz
,
900 tp
->advmss
-= iphdrsz
;
902 inet_csk(sk
)->icsk_pmtu_cookie
= pmtu
;
906 static unsigned int select_rcv_wnd(struct chtls_sock
*csk
)
913 wnd
= tcp_full_space(sk
);
915 if (wnd
< MIN_RCV_WND
)
918 rcvwnd
= MAX_RCV_WND
;
920 csk_set_flag(csk
, CSK_UPDATE_RCV_WND
);
921 return min(wnd
, rcvwnd
);
924 static unsigned int select_rcv_wscale(int space
, int wscale_ok
, int win_clamp
)
928 if (space
> MAX_RCV_WND
)
930 if (win_clamp
&& win_clamp
< space
)
934 while (wscale
< 14 && (65535 << wscale
) < space
)
940 static void chtls_pass_accept_rpl(struct sk_buff
*skb
,
941 struct cpl_pass_accept_req
*req
,
945 struct cpl_t5_pass_accept_rpl
*rpl5
;
946 struct cxgb4_lld_info
*lldi
;
947 const struct tcphdr
*tcph
;
948 const struct tcp_sock
*tp
;
949 struct chtls_sock
*csk
;
957 csk
= sk
->sk_user_data
;
959 lldi
= csk
->cdev
->lldi
;
960 len
= roundup(sizeof(*rpl5
), 16);
962 rpl5
= __skb_put_zero(skb
, len
);
963 INIT_TP_WR(rpl5
, tid
);
965 OPCODE_TID(rpl5
) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL
,
967 csk
->mtu_idx
= chtls_select_mss(csk
, dst_mtu(__sk_dst_get(sk
)),
969 opt0
= TCAM_BYPASS_F
|
970 WND_SCALE_V((tp
)->rx_opt
.rcv_wscale
) |
971 MSS_IDX_V(csk
->mtu_idx
) |
972 L2T_IDX_V(csk
->l2t_entry
->idx
) |
973 NAGLE_V(!(tp
->nonagle
& TCP_NAGLE_OFF
)) |
974 TX_CHAN_V(csk
->tx_chan
) |
975 SMAC_SEL_V(csk
->smac_idx
) |
976 DSCP_V(csk
->tos
>> 2) |
977 ULP_MODE_V(ULP_MODE_TLS
) |
978 RCV_BUFSIZ_V(min(tp
->rcv_wnd
>> 10, RCV_BUFSIZ_M
));
980 opt2
= RX_CHANNEL_V(0) |
981 RSS_QUEUE_VALID_F
| RSS_QUEUE_V(csk
->rss_qid
);
983 if (!is_t5(lldi
->adapter_type
))
984 opt2
|= RX_FC_DISABLE_F
;
985 if (req
->tcpopt
.tstamp
)
986 opt2
|= TSTAMPS_EN_F
;
987 if (req
->tcpopt
.sack
)
989 hlen
= ntohl(req
->hdr_len
);
991 tcph
= (struct tcphdr
*)((u8
*)(req
+ 1) +
992 T6_ETH_HDR_LEN_G(hlen
) + T6_IP_HDR_LEN_G(hlen
));
993 if (tcph
->ece
&& tcph
->cwr
)
994 opt2
|= CCTRL_ECN_V(1);
995 opt2
|= CONG_CNTRL_V(CONG_ALG_NEWRENO
);
997 opt2
|= T5_OPT_2_VALID_F
;
998 rpl5
->opt0
= cpu_to_be64(opt0
);
999 rpl5
->opt2
= cpu_to_be32(opt2
);
1000 rpl5
->iss
= cpu_to_be32((prandom_u32() & ~7UL) - 1);
1001 set_wr_txq(skb
, CPL_PRIORITY_SETUP
, csk
->port_id
);
1002 t4_set_arp_err_handler(skb
, sk
, chtls_accept_rpl_arp_failure
);
1003 cxgb4_l2t_send(csk
->egress_dev
, skb
, csk
->l2t_entry
);
1006 static void inet_inherit_port(struct inet_hashinfo
*hash_info
,
1007 struct sock
*lsk
, struct sock
*newsk
)
1010 __inet_inherit_port(lsk
, newsk
);
1014 static int chtls_backlog_rcv(struct sock
*sk
, struct sk_buff
*skb
)
1016 if (skb
->protocol
) {
1020 BLOG_SKB_CB(skb
)->backlog_rcv(sk
, skb
);
1024 static struct sock
*chtls_recv_sock(struct sock
*lsk
,
1025 struct request_sock
*oreq
,
1027 const struct cpl_pass_accept_req
*req
,
1028 struct chtls_dev
*cdev
)
1030 const struct tcphdr
*tcph
;
1031 struct inet_sock
*newinet
;
1032 const struct iphdr
*iph
;
1033 struct net_device
*ndev
;
1034 struct chtls_sock
*csk
;
1035 struct dst_entry
*dst
;
1036 struct neighbour
*n
;
1037 struct tcp_sock
*tp
;
1043 iph
= (const struct iphdr
*)network_hdr
;
1044 newsk
= tcp_create_openreq_child(lsk
, oreq
, cdev
->askb
);
1048 dst
= inet_csk_route_child_sock(lsk
, newsk
, oreq
);
1052 tcph
= (struct tcphdr
*)(iph
+ 1);
1053 n
= dst_neigh_lookup(dst
, &iph
->saddr
);
1060 port_id
= cxgb4_port_idx(ndev
);
1062 csk
= chtls_sock_create(cdev
);
1066 csk
->l2t_entry
= cxgb4_l2t_get(cdev
->lldi
->l2t
, n
, ndev
, 0);
1067 if (!csk
->l2t_entry
)
1070 newsk
->sk_user_data
= csk
;
1071 newsk
->sk_backlog_rcv
= chtls_backlog_rcv
;
1074 newinet
= inet_sk(newsk
);
1076 newinet
->inet_daddr
= iph
->saddr
;
1077 newinet
->inet_rcv_saddr
= iph
->daddr
;
1078 newinet
->inet_saddr
= iph
->daddr
;
1080 oreq
->ts_recent
= PASS_OPEN_TID_G(ntohl(req
->tos_stid
));
1081 sk_setup_caps(newsk
, dst
);
1083 csk
->passive_reap_next
= oreq
;
1084 csk
->tx_chan
= cxgb4_port_chan(ndev
);
1085 csk
->port_id
= port_id
;
1086 csk
->egress_dev
= ndev
;
1087 csk
->tos
= PASS_OPEN_TOS_G(ntohl(req
->tos_stid
));
1088 csk
->ulp_mode
= ULP_MODE_TLS
;
1089 step
= cdev
->lldi
->nrxq
/ cdev
->lldi
->nchan
;
1090 csk
->rss_qid
= cdev
->lldi
->rxq_ids
[port_id
* step
];
1091 rxq_idx
= port_id
* step
;
1092 csk
->txq_idx
= (rxq_idx
< cdev
->lldi
->ntxq
) ? rxq_idx
:
1094 csk
->sndbuf
= newsk
->sk_sndbuf
;
1095 csk
->smac_idx
= cxgb4_tp_smt_idx(cdev
->lldi
->adapter_type
,
1096 cxgb4_port_viid(ndev
));
1097 tp
->rcv_wnd
= select_rcv_wnd(csk
);
1098 RCV_WSCALE(tp
) = select_rcv_wscale(tcp_full_space(newsk
),
1102 inet_inherit_port(&tcp_hashinfo
, lsk
, newsk
);
1103 csk_set_flag(csk
, CSK_CONN_INLINE
);
1104 bh_unlock_sock(newsk
); /* tcp_create_openreq_child ->sk_clone_lock */
1108 chtls_sock_release(&csk
->kref
);
1112 inet_csk_prepare_forced_close(newsk
);
1115 chtls_reqsk_free(oreq
);
1120 * Populate a TID_RELEASE WR. The skb must be already propely sized.
1122 static void mk_tid_release(struct sk_buff
*skb
,
1123 unsigned int chan
, unsigned int tid
)
1125 struct cpl_tid_release
*req
;
1128 len
= roundup(sizeof(struct cpl_tid_release
), 16);
1129 req
= (struct cpl_tid_release
*)__skb_put(skb
, len
);
1130 memset(req
, 0, len
);
1131 set_wr_txq(skb
, CPL_PRIORITY_SETUP
, chan
);
1132 INIT_TP_WR_CPL(req
, CPL_TID_RELEASE
, tid
);
1135 static int chtls_get_module(struct sock
*sk
)
1137 struct inet_connection_sock
*icsk
= inet_csk(sk
);
1139 if (!try_module_get(icsk
->icsk_ulp_ops
->owner
))
1145 static void chtls_pass_accept_request(struct sock
*sk
,
1146 struct sk_buff
*skb
)
1148 struct cpl_t5_pass_accept_rpl
*rpl
;
1149 struct cpl_pass_accept_req
*req
;
1150 struct listen_ctx
*listen_ctx
;
1151 struct request_sock
*oreq
;
1152 struct sk_buff
*reply_skb
;
1153 struct chtls_sock
*csk
;
1154 struct chtls_dev
*cdev
;
1155 struct tcphdr
*tcph
;
1164 req
= cplhdr(skb
) + RSS_HDR
;
1166 cdev
= BLOG_SKB_CB(skb
)->cdev
;
1167 newsk
= lookup_tid(cdev
->tids
, tid
);
1168 stid
= PASS_OPEN_TID_G(ntohl(req
->tos_stid
));
1170 pr_info("tid (%d) already in use\n", tid
);
1174 len
= roundup(sizeof(*rpl
), 16);
1175 reply_skb
= alloc_skb(len
, GFP_ATOMIC
);
1177 cxgb4_remove_tid(cdev
->tids
, 0, tid
, sk
->sk_family
);
1182 if (sk
->sk_state
!= TCP_LISTEN
)
1185 if (inet_csk_reqsk_queue_is_full(sk
))
1188 if (sk_acceptq_is_full(sk
))
1191 oreq
= inet_reqsk_alloc(&chtls_rsk_ops
, sk
, true);
1195 oreq
->rsk_rcv_wnd
= 0;
1196 oreq
->rsk_window_clamp
= 0;
1197 oreq
->cookie_ts
= 0;
1199 oreq
->ts_recent
= 0;
1201 eh
= (struct ethhdr
*)(req
+ 1);
1202 iph
= (struct iphdr
*)(eh
+ 1);
1203 if (iph
->version
!= 0x4)
1206 network_hdr
= (void *)(eh
+ 1);
1207 tcph
= (struct tcphdr
*)(iph
+ 1);
1209 tcp_rsk(oreq
)->tfo_listener
= false;
1210 tcp_rsk(oreq
)->rcv_isn
= ntohl(tcph
->seq
);
1211 chtls_set_req_port(oreq
, tcph
->source
, tcph
->dest
);
1212 inet_rsk(oreq
)->ecn_ok
= 0;
1213 chtls_set_req_addr(oreq
, iph
->daddr
, iph
->saddr
);
1214 if (req
->tcpopt
.wsf
<= 14) {
1215 inet_rsk(oreq
)->wscale_ok
= 1;
1216 inet_rsk(oreq
)->snd_wscale
= req
->tcpopt
.wsf
;
1218 inet_rsk(oreq
)->ir_iif
= sk
->sk_bound_dev_if
;
1220 newsk
= chtls_recv_sock(sk
, oreq
, network_hdr
, req
, cdev
);
1224 if (chtls_get_module(newsk
))
1226 inet_csk_reqsk_queue_added(sk
);
1227 reply_skb
->sk
= newsk
;
1228 chtls_install_cpl_ops(newsk
);
1229 cxgb4_insert_tid(cdev
->tids
, newsk
, tid
, newsk
->sk_family
);
1230 csk
= rcu_dereference_sk_user_data(newsk
);
1231 listen_ctx
= (struct listen_ctx
*)lookup_stid(cdev
->tids
, stid
);
1232 csk
->listen_ctx
= listen_ctx
;
1233 __skb_queue_tail(&listen_ctx
->synq
, (struct sk_buff
*)&csk
->synq
);
1234 chtls_pass_accept_rpl(reply_skb
, req
, tid
);
1239 chtls_reqsk_free(oreq
);
1241 mk_tid_release(reply_skb
, 0, tid
);
1242 cxgb4_ofld_send(cdev
->lldi
->ports
[0], reply_skb
);
1247 * Handle a CPL_PASS_ACCEPT_REQ message.
1249 static int chtls_pass_accept_req(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
1251 struct cpl_pass_accept_req
*req
= cplhdr(skb
) + RSS_HDR
;
1252 struct listen_ctx
*ctx
;
1258 stid
= PASS_OPEN_TID_G(ntohl(req
->tos_stid
));
1261 data
= lookup_stid(cdev
->tids
, stid
);
1265 ctx
= (struct listen_ctx
*)data
;
1268 if (unlikely(tid
>= cdev
->tids
->ntids
)) {
1269 pr_info("passive open TID %u too large\n", tid
);
1273 BLOG_SKB_CB(skb
)->cdev
= cdev
;
1274 process_cpl_msg(chtls_pass_accept_request
, lsk
, skb
);
1279 * Completes some final bits of initialization for just established connections
1280 * and changes their state to TCP_ESTABLISHED.
1282 * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1.
1284 static void make_established(struct sock
*sk
, u32 snd_isn
, unsigned int opt
)
1286 struct tcp_sock
*tp
= tcp_sk(sk
);
1288 tp
->pushed_seq
= snd_isn
;
1289 tp
->write_seq
= snd_isn
;
1290 tp
->snd_nxt
= snd_isn
;
1291 tp
->snd_una
= snd_isn
;
1292 inet_sk(sk
)->inet_id
= prandom_u32();
1293 assign_rxopt(sk
, opt
);
1295 if (tp
->rcv_wnd
> (RCV_BUFSIZ_M
<< 10))
1296 tp
->rcv_wup
-= tp
->rcv_wnd
- (RCV_BUFSIZ_M
<< 10);
1299 tcp_set_state(sk
, TCP_ESTABLISHED
);
1302 static void chtls_abort_conn(struct sock
*sk
, struct sk_buff
*skb
)
1304 struct sk_buff
*abort_skb
;
1306 abort_skb
= alloc_skb(sizeof(struct cpl_abort_req
), GFP_ATOMIC
);
1308 chtls_send_reset(sk
, CPL_ABORT_SEND_RST
, abort_skb
);
1311 static struct sock
*reap_list
;
1312 static DEFINE_SPINLOCK(reap_list_lock
);
1315 * Process the reap list.
1317 DECLARE_TASK_FUNC(process_reap_list
, task_param
)
1319 spin_lock_bh(&reap_list_lock
);
1321 struct sock
*sk
= reap_list
;
1322 struct chtls_sock
*csk
= rcu_dereference_sk_user_data(sk
);
1324 reap_list
= csk
->passive_reap_next
;
1325 csk
->passive_reap_next
= NULL
;
1326 spin_unlock(&reap_list_lock
);
1330 chtls_abort_conn(sk
, NULL
);
1332 if (sk
->sk_state
== TCP_CLOSE
)
1333 inet_csk_destroy_sock(sk
);
1336 spin_lock(&reap_list_lock
);
1338 spin_unlock_bh(&reap_list_lock
);
1341 static DECLARE_WORK(reap_task
, process_reap_list
);
1343 static void add_to_reap_list(struct sock
*sk
)
1345 struct chtls_sock
*csk
= sk
->sk_user_data
;
1349 release_tcp_port(sk
); /* release the port immediately */
1351 spin_lock(&reap_list_lock
);
1352 csk
->passive_reap_next
= reap_list
;
1354 if (!csk
->passive_reap_next
)
1355 schedule_work(&reap_task
);
1356 spin_unlock(&reap_list_lock
);
1361 static void add_pass_open_to_parent(struct sock
*child
, struct sock
*lsk
,
1362 struct chtls_dev
*cdev
)
1364 struct request_sock
*oreq
;
1365 struct chtls_sock
*csk
;
1367 if (lsk
->sk_state
!= TCP_LISTEN
)
1370 csk
= child
->sk_user_data
;
1371 oreq
= csk
->passive_reap_next
;
1372 csk
->passive_reap_next
= NULL
;
1374 reqsk_queue_removed(&inet_csk(lsk
)->icsk_accept_queue
, oreq
);
1375 __skb_unlink((struct sk_buff
*)&csk
->synq
, &csk
->listen_ctx
->synq
);
1377 if (sk_acceptq_is_full(lsk
)) {
1378 chtls_reqsk_free(oreq
);
1379 add_to_reap_list(child
);
1381 refcount_set(&oreq
->rsk_refcnt
, 1);
1382 inet_csk_reqsk_queue_add(lsk
, oreq
, child
);
1383 lsk
->sk_data_ready(lsk
);
1387 static void bl_add_pass_open_to_parent(struct sock
*lsk
, struct sk_buff
*skb
)
1389 struct sock
*child
= skb
->sk
;
1392 add_pass_open_to_parent(child
, lsk
, BLOG_SKB_CB(skb
)->cdev
);
1396 static int chtls_pass_establish(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
1398 struct cpl_pass_establish
*req
= cplhdr(skb
) + RSS_HDR
;
1399 struct chtls_sock
*csk
;
1400 struct sock
*lsk
, *sk
;
1403 hwtid
= GET_TID(req
);
1404 sk
= lookup_tid(cdev
->tids
, hwtid
);
1406 return (CPL_RET_UNKNOWN_TID
| CPL_RET_BUF_DONE
);
1409 if (unlikely(sock_owned_by_user(sk
))) {
1415 csk
= sk
->sk_user_data
;
1416 csk
->wr_max_credits
= 64;
1417 csk
->wr_credits
= 64;
1418 csk
->wr_unacked
= 0;
1419 make_established(sk
, ntohl(req
->snd_isn
), ntohs(req
->tcp_opt
));
1420 stid
= PASS_OPEN_TID_G(ntohl(req
->tos_stid
));
1421 sk
->sk_state_change(sk
);
1422 if (unlikely(sk
->sk_socket
))
1423 sk_wake_async(sk
, 0, POLL_OUT
);
1425 data
= lookup_stid(cdev
->tids
, stid
);
1426 lsk
= ((struct listen_ctx
*)data
)->lsk
;
1429 if (unlikely(skb_queue_empty(&csk
->listen_ctx
->synq
))) {
1430 /* removed from synq */
1431 bh_unlock_sock(lsk
);
1436 if (likely(!sock_owned_by_user(lsk
))) {
1438 add_pass_open_to_parent(sk
, lsk
, cdev
);
1441 BLOG_SKB_CB(skb
)->cdev
= cdev
;
1442 BLOG_SKB_CB(skb
)->backlog_rcv
=
1443 bl_add_pass_open_to_parent
;
1444 __sk_add_backlog(lsk
, skb
);
1446 bh_unlock_sock(lsk
);
1454 * Handle receipt of an urgent pointer.
1456 static void handle_urg_ptr(struct sock
*sk
, u32 urg_seq
)
1458 struct tcp_sock
*tp
= tcp_sk(sk
);
1461 if (tp
->urg_data
&& !after(urg_seq
, tp
->urg_seq
))
1462 return; /* duplicate pointer */
1465 if (tp
->urg_seq
== tp
->copied_seq
&& tp
->urg_data
&&
1466 !sock_flag(sk
, SOCK_URGINLINE
) &&
1467 tp
->copied_seq
!= tp
->rcv_nxt
) {
1468 struct sk_buff
*skb
= skb_peek(&sk
->sk_receive_queue
);
1471 if (skb
&& tp
->copied_seq
- ULP_SKB_CB(skb
)->seq
>= skb
->len
)
1472 chtls_free_skb(sk
, skb
);
1475 tp
->urg_data
= TCP_URG_NOTYET
;
1476 tp
->urg_seq
= urg_seq
;
1479 static void check_sk_callbacks(struct chtls_sock
*csk
)
1481 struct sock
*sk
= csk
->sk
;
1483 if (unlikely(sk
->sk_user_data
&&
1484 !csk_flag_nochk(csk
, CSK_CALLBACKS_CHKD
)))
1485 csk_set_flag(csk
, CSK_CALLBACKS_CHKD
);
1489 * Handles Rx data that arrives in a state where the socket isn't accepting
1492 static void handle_excess_rx(struct sock
*sk
, struct sk_buff
*skb
)
1494 if (!csk_flag(sk
, CSK_ABORT_SHUTDOWN
))
1495 chtls_abort_conn(sk
, skb
);
1500 static void chtls_recv_data(struct sock
*sk
, struct sk_buff
*skb
)
1502 struct cpl_rx_data
*hdr
= cplhdr(skb
) + RSS_HDR
;
1503 struct chtls_sock
*csk
;
1504 struct tcp_sock
*tp
;
1506 csk
= rcu_dereference_sk_user_data(sk
);
1509 if (unlikely(sk
->sk_shutdown
& RCV_SHUTDOWN
)) {
1510 handle_excess_rx(sk
, skb
);
1514 ULP_SKB_CB(skb
)->seq
= ntohl(hdr
->seq
);
1515 ULP_SKB_CB(skb
)->psh
= hdr
->psh
;
1516 skb_ulp_mode(skb
) = ULP_MODE_NONE
;
1518 skb_reset_transport_header(skb
);
1519 __skb_pull(skb
, sizeof(*hdr
) + RSS_HDR
);
1521 __skb_trim(skb
, ntohs(hdr
->len
));
1523 if (unlikely(hdr
->urg
))
1524 handle_urg_ptr(sk
, tp
->rcv_nxt
+ ntohs(hdr
->urg
));
1525 if (unlikely(tp
->urg_data
== TCP_URG_NOTYET
&&
1526 tp
->urg_seq
- tp
->rcv_nxt
< skb
->len
))
1527 tp
->urg_data
= TCP_URG_VALID
|
1528 skb
->data
[tp
->urg_seq
- tp
->rcv_nxt
];
1530 if (unlikely(hdr
->dack_mode
!= csk
->delack_mode
)) {
1531 csk
->delack_mode
= hdr
->dack_mode
;
1532 csk
->delack_seq
= tp
->rcv_nxt
;
1535 tcp_hdr(skb
)->fin
= 0;
1536 tp
->rcv_nxt
+= skb
->len
;
1538 __skb_queue_tail(&sk
->sk_receive_queue
, skb
);
1540 if (!sock_flag(sk
, SOCK_DEAD
)) {
1541 check_sk_callbacks(csk
);
1542 sk
->sk_data_ready(sk
);
1546 static int chtls_rx_data(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
1548 struct cpl_rx_data
*req
= cplhdr(skb
) + RSS_HDR
;
1549 unsigned int hwtid
= GET_TID(req
);
1552 sk
= lookup_tid(cdev
->tids
, hwtid
);
1553 if (unlikely(!sk
)) {
1554 pr_err("can't find conn. for hwtid %u.\n", hwtid
);
1557 skb_dst_set(skb
, NULL
);
1558 process_cpl_msg(chtls_recv_data
, sk
, skb
);
1562 static void chtls_recv_pdu(struct sock
*sk
, struct sk_buff
*skb
)
1564 struct cpl_tls_data
*hdr
= cplhdr(skb
);
1565 struct chtls_sock
*csk
;
1566 struct chtls_hws
*tlsk
;
1567 struct tcp_sock
*tp
;
1569 csk
= rcu_dereference_sk_user_data(sk
);
1570 tlsk
= &csk
->tlshws
;
1573 if (unlikely(sk
->sk_shutdown
& RCV_SHUTDOWN
)) {
1574 handle_excess_rx(sk
, skb
);
1578 ULP_SKB_CB(skb
)->seq
= ntohl(hdr
->seq
);
1579 ULP_SKB_CB(skb
)->flags
= 0;
1580 skb_ulp_mode(skb
) = ULP_MODE_TLS
;
1582 skb_reset_transport_header(skb
);
1583 __skb_pull(skb
, sizeof(*hdr
));
1586 CPL_TLS_DATA_LENGTH_G(ntohl(hdr
->length_pkd
)));
1588 if (unlikely(tp
->urg_data
== TCP_URG_NOTYET
&& tp
->urg_seq
-
1589 tp
->rcv_nxt
< skb
->len
))
1590 tp
->urg_data
= TCP_URG_VALID
|
1591 skb
->data
[tp
->urg_seq
- tp
->rcv_nxt
];
1593 tcp_hdr(skb
)->fin
= 0;
1594 tlsk
->pldlen
= CPL_TLS_DATA_LENGTH_G(ntohl(hdr
->length_pkd
));
1595 __skb_queue_tail(&tlsk
->sk_recv_queue
, skb
);
1598 static int chtls_rx_pdu(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
1600 struct cpl_tls_data
*req
= cplhdr(skb
);
1601 unsigned int hwtid
= GET_TID(req
);
1604 sk
= lookup_tid(cdev
->tids
, hwtid
);
1605 if (unlikely(!sk
)) {
1606 pr_err("can't find conn. for hwtid %u.\n", hwtid
);
1609 skb_dst_set(skb
, NULL
);
1610 process_cpl_msg(chtls_recv_pdu
, sk
, skb
);
1614 static void chtls_set_hdrlen(struct sk_buff
*skb
, unsigned int nlen
)
1616 struct tlsrx_cmp_hdr
*tls_cmp_hdr
= cplhdr(skb
);
1618 skb
->hdr_len
= ntohs((__force __be16
)tls_cmp_hdr
->length
);
1619 tls_cmp_hdr
->length
= ntohs((__force __be16
)nlen
);
1622 static void chtls_rx_hdr(struct sock
*sk
, struct sk_buff
*skb
)
1624 struct tlsrx_cmp_hdr
*tls_hdr_pkt
;
1625 struct cpl_rx_tls_cmp
*cmp_cpl
;
1626 struct sk_buff
*skb_rec
;
1627 struct chtls_sock
*csk
;
1628 struct chtls_hws
*tlsk
;
1629 struct tcp_sock
*tp
;
1631 cmp_cpl
= cplhdr(skb
);
1632 csk
= rcu_dereference_sk_user_data(sk
);
1633 tlsk
= &csk
->tlshws
;
1636 ULP_SKB_CB(skb
)->seq
= ntohl(cmp_cpl
->seq
);
1637 ULP_SKB_CB(skb
)->flags
= 0;
1639 skb_reset_transport_header(skb
);
1640 __skb_pull(skb
, sizeof(*cmp_cpl
));
1641 tls_hdr_pkt
= (struct tlsrx_cmp_hdr
*)skb
->data
;
1642 if (tls_hdr_pkt
->res_to_mac_error
& TLSRX_HDR_PKT_ERROR_M
)
1643 tls_hdr_pkt
->type
= CONTENT_TYPE_ERROR
;
1645 __skb_trim(skb
, TLS_HEADER_LENGTH
);
1648 CPL_RX_TLS_CMP_PDULENGTH_G(ntohl(cmp_cpl
->pdulength_length
));
1650 ULP_SKB_CB(skb
)->flags
|= ULPCB_FLAG_TLS_HDR
;
1651 skb_rec
= __skb_dequeue(&tlsk
->sk_recv_queue
);
1653 __skb_queue_tail(&sk
->sk_receive_queue
, skb
);
1655 chtls_set_hdrlen(skb
, tlsk
->pldlen
);
1657 __skb_queue_tail(&sk
->sk_receive_queue
, skb
);
1658 __skb_queue_tail(&sk
->sk_receive_queue
, skb_rec
);
1661 if (!sock_flag(sk
, SOCK_DEAD
)) {
1662 check_sk_callbacks(csk
);
1663 sk
->sk_data_ready(sk
);
1667 static int chtls_rx_cmp(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
1669 struct cpl_rx_tls_cmp
*req
= cplhdr(skb
);
1670 unsigned int hwtid
= GET_TID(req
);
1673 sk
= lookup_tid(cdev
->tids
, hwtid
);
1674 if (unlikely(!sk
)) {
1675 pr_err("can't find conn. for hwtid %u.\n", hwtid
);
1678 skb_dst_set(skb
, NULL
);
1679 process_cpl_msg(chtls_rx_hdr
, sk
, skb
);
1684 static void chtls_timewait(struct sock
*sk
)
1686 struct tcp_sock
*tp
= tcp_sk(sk
);
1689 tp
->rx_opt
.ts_recent_stamp
= ktime_get_seconds();
1691 tcp_time_wait(sk
, TCP_TIME_WAIT
, 0);
1694 static void chtls_peer_close(struct sock
*sk
, struct sk_buff
*skb
)
1696 struct chtls_sock
*csk
= rcu_dereference_sk_user_data(sk
);
1698 sk
->sk_shutdown
|= RCV_SHUTDOWN
;
1699 sock_set_flag(sk
, SOCK_DONE
);
1701 switch (sk
->sk_state
) {
1703 case TCP_ESTABLISHED
:
1704 tcp_set_state(sk
, TCP_CLOSE_WAIT
);
1707 tcp_set_state(sk
, TCP_CLOSING
);
1710 chtls_release_resources(sk
);
1711 if (csk_flag_nochk(csk
, CSK_ABORT_RPL_PENDING
))
1712 chtls_conn_done(sk
);
1717 pr_info("cpl_peer_close in bad state %d\n", sk
->sk_state
);
1720 if (!sock_flag(sk
, SOCK_DEAD
)) {
1721 sk
->sk_state_change(sk
);
1722 /* Do not send POLL_HUP for half duplex close. */
1724 if ((sk
->sk_shutdown
& SEND_SHUTDOWN
) ||
1725 sk
->sk_state
== TCP_CLOSE
)
1726 sk_wake_async(sk
, SOCK_WAKE_WAITD
, POLL_HUP
);
1728 sk_wake_async(sk
, SOCK_WAKE_WAITD
, POLL_IN
);
1733 static void chtls_close_con_rpl(struct sock
*sk
, struct sk_buff
*skb
)
1735 struct cpl_close_con_rpl
*rpl
= cplhdr(skb
) + RSS_HDR
;
1736 struct chtls_sock
*csk
;
1737 struct tcp_sock
*tp
;
1739 csk
= rcu_dereference_sk_user_data(sk
);
1742 tp
->snd_una
= ntohl(rpl
->snd_nxt
) - 1; /* exclude FIN */
1744 switch (sk
->sk_state
) {
1746 chtls_release_resources(sk
);
1747 if (csk_flag_nochk(csk
, CSK_ABORT_RPL_PENDING
))
1748 chtls_conn_done(sk
);
1753 chtls_release_resources(sk
);
1754 chtls_conn_done(sk
);
1757 tcp_set_state(sk
, TCP_FIN_WAIT2
);
1758 sk
->sk_shutdown
|= SEND_SHUTDOWN
;
1760 if (!sock_flag(sk
, SOCK_DEAD
))
1761 sk
->sk_state_change(sk
);
1762 else if (tcp_sk(sk
)->linger2
< 0 &&
1763 !csk_flag_nochk(csk
, CSK_ABORT_SHUTDOWN
))
1764 chtls_abort_conn(sk
, skb
);
1767 pr_info("close_con_rpl in bad state %d\n", sk
->sk_state
);
1772 static struct sk_buff
*get_cpl_skb(struct sk_buff
*skb
,
1773 size_t len
, gfp_t gfp
)
1775 if (likely(!skb_is_nonlinear(skb
) && !skb_cloned(skb
))) {
1776 WARN_ONCE(skb
->len
< len
, "skb alloc error");
1777 __skb_trim(skb
, len
);
1780 skb
= alloc_skb(len
, gfp
);
1782 __skb_put(skb
, len
);
1787 static void set_abort_rpl_wr(struct sk_buff
*skb
, unsigned int tid
,
1790 struct cpl_abort_rpl
*rpl
= cplhdr(skb
);
1792 INIT_TP_WR_CPL(rpl
, CPL_ABORT_RPL
, tid
);
1796 static void send_defer_abort_rpl(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
1798 struct cpl_abort_req_rss
*req
= cplhdr(skb
);
1799 struct sk_buff
*reply_skb
;
1801 reply_skb
= alloc_skb(sizeof(struct cpl_abort_rpl
),
1802 GFP_KERNEL
| __GFP_NOFAIL
);
1803 __skb_put(reply_skb
, sizeof(struct cpl_abort_rpl
));
1804 set_abort_rpl_wr(reply_skb
, GET_TID(req
),
1805 (req
->status
& CPL_ABORT_NO_RST
));
1806 set_wr_txq(reply_skb
, CPL_PRIORITY_DATA
, req
->status
>> 1);
1807 cxgb4_ofld_send(cdev
->lldi
->ports
[0], reply_skb
);
1811 static void send_abort_rpl(struct sock
*sk
, struct sk_buff
*skb
,
1812 struct chtls_dev
*cdev
, int status
, int queue
)
1814 struct cpl_abort_req_rss
*req
= cplhdr(skb
);
1815 struct sk_buff
*reply_skb
;
1816 struct chtls_sock
*csk
;
1818 csk
= rcu_dereference_sk_user_data(sk
);
1820 reply_skb
= alloc_skb(sizeof(struct cpl_abort_rpl
),
1824 req
->status
= (queue
<< 1);
1825 send_defer_abort_rpl(cdev
, skb
);
1829 set_abort_rpl_wr(reply_skb
, GET_TID(req
), status
);
1832 set_wr_txq(reply_skb
, CPL_PRIORITY_DATA
, queue
);
1833 if (csk_conn_inline(csk
)) {
1834 struct l2t_entry
*e
= csk
->l2t_entry
;
1836 if (e
&& sk
->sk_state
!= TCP_SYN_RECV
) {
1837 cxgb4_l2t_send(csk
->egress_dev
, reply_skb
, e
);
1841 cxgb4_ofld_send(cdev
->lldi
->ports
[0], reply_skb
);
1845 * Add an skb to the deferred skb queue for processing from process context.
1847 static void t4_defer_reply(struct sk_buff
*skb
, struct chtls_dev
*cdev
,
1848 defer_handler_t handler
)
1850 DEFERRED_SKB_CB(skb
)->handler
= handler
;
1851 spin_lock_bh(&cdev
->deferq
.lock
);
1852 __skb_queue_tail(&cdev
->deferq
, skb
);
1853 if (skb_queue_len(&cdev
->deferq
) == 1)
1854 schedule_work(&cdev
->deferq_task
);
1855 spin_unlock_bh(&cdev
->deferq
.lock
);
1858 static void chtls_send_abort_rpl(struct sock
*sk
, struct sk_buff
*skb
,
1859 struct chtls_dev
*cdev
,
1860 int status
, int queue
)
1862 struct cpl_abort_req_rss
*req
= cplhdr(skb
) + RSS_HDR
;
1863 struct sk_buff
*reply_skb
;
1864 struct chtls_sock
*csk
;
1867 csk
= rcu_dereference_sk_user_data(sk
);
1870 reply_skb
= get_cpl_skb(skb
, sizeof(struct cpl_abort_rpl
), gfp_any());
1872 req
->status
= (queue
<< 1) | status
;
1873 t4_defer_reply(skb
, cdev
, send_defer_abort_rpl
);
1877 set_abort_rpl_wr(reply_skb
, tid
, status
);
1878 set_wr_txq(reply_skb
, CPL_PRIORITY_DATA
, queue
);
1879 if (csk_conn_inline(csk
)) {
1880 struct l2t_entry
*e
= csk
->l2t_entry
;
1882 if (e
&& sk
->sk_state
!= TCP_SYN_RECV
) {
1883 cxgb4_l2t_send(csk
->egress_dev
, reply_skb
, e
);
1887 cxgb4_ofld_send(cdev
->lldi
->ports
[0], reply_skb
);
1892 * This is run from a listener's backlog to abort a child connection in
1893 * SYN_RCV state (i.e., one on the listener's SYN queue).
1895 static void bl_abort_syn_rcv(struct sock
*lsk
, struct sk_buff
*skb
)
1897 struct chtls_sock
*csk
;
1902 csk
= rcu_dereference_sk_user_data(child
);
1903 queue
= csk
->txq_idx
;
1906 do_abort_syn_rcv(child
, lsk
);
1907 send_abort_rpl(child
, skb
, BLOG_SKB_CB(skb
)->cdev
,
1908 CPL_ABORT_NO_RST
, queue
);
1911 static int abort_syn_rcv(struct sock
*sk
, struct sk_buff
*skb
)
1913 const struct request_sock
*oreq
;
1914 struct listen_ctx
*listen_ctx
;
1915 struct chtls_sock
*csk
;
1916 struct chtls_dev
*cdev
;
1920 csk
= sk
->sk_user_data
;
1921 oreq
= csk
->passive_reap_next
;
1927 ctx
= lookup_stid(cdev
->tids
, oreq
->ts_recent
);
1931 listen_ctx
= (struct listen_ctx
*)ctx
;
1932 psk
= listen_ctx
->lsk
;
1935 if (!sock_owned_by_user(psk
)) {
1936 int queue
= csk
->txq_idx
;
1938 do_abort_syn_rcv(sk
, psk
);
1939 send_abort_rpl(sk
, skb
, cdev
, CPL_ABORT_NO_RST
, queue
);
1942 BLOG_SKB_CB(skb
)->backlog_rcv
= bl_abort_syn_rcv
;
1943 __sk_add_backlog(psk
, skb
);
1945 bh_unlock_sock(psk
);
1949 static void chtls_abort_req_rss(struct sock
*sk
, struct sk_buff
*skb
)
1951 const struct cpl_abort_req_rss
*req
= cplhdr(skb
) + RSS_HDR
;
1952 struct chtls_sock
*csk
= sk
->sk_user_data
;
1953 int rst_status
= CPL_ABORT_NO_RST
;
1954 int queue
= csk
->txq_idx
;
1956 if (is_neg_adv(req
->status
)) {
1957 if (sk
->sk_state
== TCP_SYN_RECV
)
1958 chtls_set_tcb_tflag(sk
, 0, 0);
1964 csk_reset_flag(csk
, CSK_ABORT_REQ_RCVD
);
1966 if (!csk_flag_nochk(csk
, CSK_ABORT_SHUTDOWN
) &&
1967 !csk_flag_nochk(csk
, CSK_TX_DATA_SENT
)) {
1968 struct tcp_sock
*tp
= tcp_sk(sk
);
1970 if (send_tx_flowc_wr(sk
, 0, tp
->snd_nxt
, tp
->rcv_nxt
) < 0)
1971 WARN_ONCE(1, "send_tx_flowc error");
1972 csk_set_flag(csk
, CSK_TX_DATA_SENT
);
1975 csk_set_flag(csk
, CSK_ABORT_SHUTDOWN
);
1977 if (!csk_flag_nochk(csk
, CSK_ABORT_RPL_PENDING
)) {
1978 sk
->sk_err
= ETIMEDOUT
;
1980 if (!sock_flag(sk
, SOCK_DEAD
))
1981 sk
->sk_error_report(sk
);
1983 if (sk
->sk_state
== TCP_SYN_RECV
&& !abort_syn_rcv(sk
, skb
))
1986 chtls_release_resources(sk
);
1987 chtls_conn_done(sk
);
1990 chtls_send_abort_rpl(sk
, skb
, csk
->cdev
, rst_status
, queue
);
1993 static void chtls_abort_rpl_rss(struct sock
*sk
, struct sk_buff
*skb
)
1995 struct cpl_abort_rpl_rss
*rpl
= cplhdr(skb
) + RSS_HDR
;
1996 struct chtls_sock
*csk
;
1997 struct chtls_dev
*cdev
;
1999 csk
= rcu_dereference_sk_user_data(sk
);
2002 if (csk_flag_nochk(csk
, CSK_ABORT_RPL_PENDING
)) {
2003 csk_reset_flag(csk
, CSK_ABORT_RPL_PENDING
);
2004 if (!csk_flag_nochk(csk
, CSK_ABORT_REQ_RCVD
)) {
2005 if (sk
->sk_state
== TCP_SYN_SENT
) {
2006 cxgb4_remove_tid(cdev
->tids
,
2012 chtls_release_resources(sk
);
2013 chtls_conn_done(sk
);
2019 static int chtls_conn_cpl(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
2021 struct cpl_peer_close
*req
= cplhdr(skb
) + RSS_HDR
;
2022 void (*fn
)(struct sock
*sk
, struct sk_buff
*skb
);
2023 unsigned int hwtid
= GET_TID(req
);
2027 opcode
= ((const struct rss_header
*)cplhdr(skb
))->opcode
;
2029 sk
= lookup_tid(cdev
->tids
, hwtid
);
2034 case CPL_PEER_CLOSE
:
2035 fn
= chtls_peer_close
;
2037 case CPL_CLOSE_CON_RPL
:
2038 fn
= chtls_close_con_rpl
;
2040 case CPL_ABORT_REQ_RSS
:
2041 fn
= chtls_abort_req_rss
;
2043 case CPL_ABORT_RPL_RSS
:
2044 fn
= chtls_abort_rpl_rss
;
2050 process_cpl_msg(fn
, sk
, skb
);
2058 static void chtls_rx_ack(struct sock
*sk
, struct sk_buff
*skb
)
2060 struct cpl_fw4_ack
*hdr
= cplhdr(skb
) + RSS_HDR
;
2061 struct chtls_sock
*csk
= sk
->sk_user_data
;
2062 struct tcp_sock
*tp
= tcp_sk(sk
);
2063 u32 credits
= hdr
->credits
;
2066 snd_una
= ntohl(hdr
->snd_una
);
2067 csk
->wr_credits
+= credits
;
2069 if (csk
->wr_unacked
> csk
->wr_max_credits
- csk
->wr_credits
)
2070 csk
->wr_unacked
= csk
->wr_max_credits
- csk
->wr_credits
;
2073 struct sk_buff
*pskb
= csk
->wr_skb_head
;
2076 if (unlikely(!pskb
)) {
2077 if (csk
->wr_nondata
)
2078 csk
->wr_nondata
-= credits
;
2081 csum
= (__force u32
)pskb
->csum
;
2082 if (unlikely(credits
< csum
)) {
2083 pskb
->csum
= (__force __wsum
)(csum
- credits
);
2090 if (hdr
->seq_vld
& CPL_FW4_ACK_FLAGS_SEQVAL
) {
2091 if (unlikely(before(snd_una
, tp
->snd_una
))) {
2096 if (tp
->snd_una
!= snd_una
) {
2097 tp
->snd_una
= snd_una
;
2098 tp
->rcv_tstamp
= tcp_time_stamp(tp
);
2099 if (tp
->snd_una
== tp
->snd_nxt
&&
2100 !csk_flag_nochk(csk
, CSK_TX_FAILOVER
))
2101 csk_reset_flag(csk
, CSK_TX_WAIT_IDLE
);
2105 if (hdr
->seq_vld
& CPL_FW4_ACK_FLAGS_CH
) {
2106 unsigned int fclen16
= roundup(failover_flowc_wr_len
, 16);
2108 csk
->wr_credits
-= fclen16
;
2109 csk_reset_flag(csk
, CSK_TX_WAIT_IDLE
);
2110 csk_reset_flag(csk
, CSK_TX_FAILOVER
);
2112 if (skb_queue_len(&csk
->txq
) && chtls_push_frames(csk
, 0))
2113 sk
->sk_write_space(sk
);
2118 static int chtls_wr_ack(struct chtls_dev
*cdev
, struct sk_buff
*skb
)
2120 struct cpl_fw4_ack
*rpl
= cplhdr(skb
) + RSS_HDR
;
2121 unsigned int hwtid
= GET_TID(rpl
);
2124 sk
= lookup_tid(cdev
->tids
, hwtid
);
2125 if (unlikely(!sk
)) {
2126 pr_err("can't find conn. for hwtid %u.\n", hwtid
);
2129 process_cpl_msg(chtls_rx_ack
, sk
, skb
);
2134 chtls_handler_func chtls_handlers
[NUM_CPL_CMDS
] = {
2135 [CPL_PASS_OPEN_RPL
] = chtls_pass_open_rpl
,
2136 [CPL_CLOSE_LISTSRV_RPL
] = chtls_close_listsrv_rpl
,
2137 [CPL_PASS_ACCEPT_REQ
] = chtls_pass_accept_req
,
2138 [CPL_PASS_ESTABLISH
] = chtls_pass_establish
,
2139 [CPL_RX_DATA
] = chtls_rx_data
,
2140 [CPL_TLS_DATA
] = chtls_rx_pdu
,
2141 [CPL_RX_TLS_CMP
] = chtls_rx_cmp
,
2142 [CPL_PEER_CLOSE
] = chtls_conn_cpl
,
2143 [CPL_CLOSE_CON_RPL
] = chtls_conn_cpl
,
2144 [CPL_ABORT_REQ_RSS
] = chtls_conn_cpl
,
2145 [CPL_ABORT_RPL_RSS
] = chtls_conn_cpl
,
2146 [CPL_FW4_ACK
] = chtls_wr_ack
,