2 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 * Socket Closing - normal and abnormal
6 * Copyright IBM Corp. 2016
8 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
11 #include <linux/workqueue.h>
12 #include <linux/sched/signal.h>
19 #include "smc_close.h"
21 #define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ)
23 static void smc_close_cleanup_listen(struct sock
*parent
)
27 /* Close non-accepted connections */
28 while ((sk
= smc_accept_dequeue(parent
, NULL
)))
29 smc_close_non_accepted(sk
);
32 static void smc_close_wait_tx_pends(struct smc_sock
*smc
)
34 DEFINE_WAIT_FUNC(wait
, woken_wake_function
);
35 struct sock
*sk
= &smc
->sk
;
38 timeout
= SMC_CLOSE_WAIT_TX_PENDS_TIME
;
39 add_wait_queue(sk_sleep(sk
), &wait
);
40 while (!signal_pending(current
) && timeout
) {
43 rc
= sk_wait_event(sk
, &timeout
,
44 !smc_cdc_tx_has_pending(&smc
->conn
),
49 remove_wait_queue(sk_sleep(sk
), &wait
);
52 /* wait for sndbuf data being transmitted */
53 static void smc_close_stream_wait(struct smc_sock
*smc
, long timeout
)
55 DEFINE_WAIT_FUNC(wait
, woken_wake_function
);
56 struct sock
*sk
= &smc
->sk
;
61 if (!smc_tx_prepared_sends(&smc
->conn
))
64 smc
->wait_close_tx_prepared
= 1;
65 add_wait_queue(sk_sleep(sk
), &wait
);
66 while (!signal_pending(current
) && timeout
) {
69 rc
= sk_wait_event(sk
, &timeout
,
70 !smc_tx_prepared_sends(&smc
->conn
) ||
71 (sk
->sk_err
== ECONNABORTED
) ||
72 (sk
->sk_err
== ECONNRESET
),
77 remove_wait_queue(sk_sleep(sk
), &wait
);
78 smc
->wait_close_tx_prepared
= 0;
81 void smc_close_wake_tx_prepared(struct smc_sock
*smc
)
83 if (smc
->wait_close_tx_prepared
)
84 /* wake up socket closing */
85 smc
->sk
.sk_state_change(&smc
->sk
);
88 static int smc_close_wr(struct smc_connection
*conn
)
90 conn
->local_tx_ctrl
.conn_state_flags
.peer_done_writing
= 1;
92 return smc_cdc_get_slot_and_msg_send(conn
);
95 static int smc_close_final(struct smc_connection
*conn
)
97 if (atomic_read(&conn
->bytes_to_rcv
))
98 conn
->local_tx_ctrl
.conn_state_flags
.peer_conn_abort
= 1;
100 conn
->local_tx_ctrl
.conn_state_flags
.peer_conn_closed
= 1;
102 return smc_cdc_get_slot_and_msg_send(conn
);
105 static int smc_close_abort(struct smc_connection
*conn
)
107 conn
->local_tx_ctrl
.conn_state_flags
.peer_conn_abort
= 1;
109 return smc_cdc_get_slot_and_msg_send(conn
);
112 /* terminate smc socket abnormally - active abort
113 * RDMA communication no longer possible
115 void smc_close_active_abort(struct smc_sock
*smc
)
117 struct smc_cdc_conn_state_flags
*txflags
=
118 &smc
->conn
.local_tx_ctrl
.conn_state_flags
;
120 smc
->sk
.sk_err
= ECONNABORTED
;
121 if (smc
->clcsock
&& smc
->clcsock
->sk
) {
122 smc
->clcsock
->sk
->sk_err
= ECONNABORTED
;
123 smc
->clcsock
->sk
->sk_state_change(smc
->clcsock
->sk
);
125 switch (smc
->sk
.sk_state
) {
128 smc
->sk
.sk_state
= SMC_PEERABORTWAIT
;
130 case SMC_APPCLOSEWAIT1
:
131 case SMC_APPCLOSEWAIT2
:
132 txflags
->peer_conn_abort
= 1;
133 sock_release(smc
->clcsock
);
134 if (!smc_cdc_rxed_any_close(&smc
->conn
))
135 smc
->sk
.sk_state
= SMC_PEERABORTWAIT
;
137 smc
->sk
.sk_state
= SMC_CLOSED
;
139 case SMC_PEERCLOSEWAIT1
:
140 case SMC_PEERCLOSEWAIT2
:
141 if (!txflags
->peer_conn_closed
) {
142 smc
->sk
.sk_state
= SMC_PEERABORTWAIT
;
143 txflags
->peer_conn_abort
= 1;
144 sock_release(smc
->clcsock
);
146 smc
->sk
.sk_state
= SMC_CLOSED
;
149 case SMC_PROCESSABORT
:
150 case SMC_APPFINCLOSEWAIT
:
151 if (!txflags
->peer_conn_closed
) {
152 txflags
->peer_conn_abort
= 1;
153 sock_release(smc
->clcsock
);
155 smc
->sk
.sk_state
= SMC_CLOSED
;
157 case SMC_PEERFINCLOSEWAIT
:
158 case SMC_PEERABORTWAIT
:
163 sock_set_flag(&smc
->sk
, SOCK_DEAD
);
164 smc
->sk
.sk_state_change(&smc
->sk
);
167 static inline bool smc_close_sent_any_close(struct smc_connection
*conn
)
169 return conn
->local_tx_ctrl
.conn_state_flags
.peer_conn_abort
||
170 conn
->local_tx_ctrl
.conn_state_flags
.peer_conn_closed
;
173 int smc_close_active(struct smc_sock
*smc
)
175 struct smc_cdc_conn_state_flags
*txflags
=
176 &smc
->conn
.local_tx_ctrl
.conn_state_flags
;
177 struct smc_connection
*conn
= &smc
->conn
;
178 struct sock
*sk
= &smc
->sk
;
183 timeout
= current
->flags
& PF_EXITING
?
184 0 : sock_flag(sk
, SOCK_LINGER
) ?
185 sk
->sk_lingertime
: SMC_MAX_STREAM_WAIT_TIMEOUT
;
188 old_state
= sk
->sk_state
;
191 sk
->sk_state
= SMC_CLOSED
;
192 if (smc
->smc_listen_work
.func
)
193 cancel_work_sync(&smc
->smc_listen_work
);
196 sk
->sk_state
= SMC_CLOSED
;
197 sk
->sk_state_change(sk
); /* wake up accept */
198 if (smc
->clcsock
&& smc
->clcsock
->sk
) {
199 rc
= kernel_sock_shutdown(smc
->clcsock
, SHUT_RDWR
);
200 /* wake up kernel_accept of smc_tcp_listen_worker */
201 smc
->clcsock
->sk
->sk_data_ready(smc
->clcsock
->sk
);
204 smc_close_cleanup_listen(sk
);
205 cancel_work_sync(&smc
->smc_listen_work
);
209 smc_close_stream_wait(smc
, timeout
);
211 cancel_delayed_work_sync(&conn
->tx_work
);
213 if (sk
->sk_state
== SMC_ACTIVE
) {
214 /* send close request */
215 rc
= smc_close_final(conn
);
216 sk
->sk_state
= SMC_PEERCLOSEWAIT1
;
218 /* peer event has changed the state */
222 case SMC_APPFINCLOSEWAIT
:
223 /* socket already shutdown wr or both (active close) */
224 if (txflags
->peer_done_writing
&&
225 !smc_close_sent_any_close(conn
)) {
226 /* just shutdown wr done, send close request */
227 rc
= smc_close_final(conn
);
229 sk
->sk_state
= SMC_CLOSED
;
230 smc_close_wait_tx_pends(smc
);
232 case SMC_APPCLOSEWAIT1
:
233 case SMC_APPCLOSEWAIT2
:
234 if (!smc_cdc_rxed_any_close(conn
))
235 smc_close_stream_wait(smc
, timeout
);
237 cancel_delayed_work_sync(&conn
->tx_work
);
239 if (sk
->sk_err
!= ECONNABORTED
) {
240 /* confirm close from peer */
241 rc
= smc_close_final(conn
);
245 if (smc_cdc_rxed_any_close(conn
))
246 /* peer has closed the socket already */
247 sk
->sk_state
= SMC_CLOSED
;
249 /* peer has just issued a shutdown write */
250 sk
->sk_state
= SMC_PEERFINCLOSEWAIT
;
251 smc_close_wait_tx_pends(smc
);
253 case SMC_PEERCLOSEWAIT1
:
254 case SMC_PEERCLOSEWAIT2
:
255 if (txflags
->peer_done_writing
&&
256 !smc_close_sent_any_close(conn
)) {
257 /* just shutdown wr done, send close request */
258 rc
= smc_close_final(conn
);
260 /* peer sending PeerConnectionClosed will cause transition */
262 case SMC_PEERFINCLOSEWAIT
:
263 /* peer sending PeerConnectionClosed will cause transition */
265 case SMC_PROCESSABORT
:
267 cancel_delayed_work_sync(&conn
->tx_work
);
269 smc_close_abort(conn
);
270 sk
->sk_state
= SMC_CLOSED
;
271 smc_close_wait_tx_pends(smc
);
273 case SMC_PEERABORTWAIT
:
275 /* nothing to do, add tracing in future patch */
279 if (old_state
!= sk
->sk_state
)
280 sk
->sk_state_change(&smc
->sk
);
284 static void smc_close_passive_abort_received(struct smc_sock
*smc
)
286 struct smc_cdc_conn_state_flags
*txflags
=
287 &smc
->conn
.local_tx_ctrl
.conn_state_flags
;
288 struct sock
*sk
= &smc
->sk
;
290 switch (sk
->sk_state
) {
292 case SMC_APPFINCLOSEWAIT
:
293 case SMC_APPCLOSEWAIT1
:
294 case SMC_APPCLOSEWAIT2
:
295 smc_close_abort(&smc
->conn
);
296 sk
->sk_state
= SMC_PROCESSABORT
;
298 case SMC_PEERCLOSEWAIT1
:
299 case SMC_PEERCLOSEWAIT2
:
300 if (txflags
->peer_done_writing
&&
301 !smc_close_sent_any_close(&smc
->conn
)) {
302 /* just shutdown, but not yet closed locally */
303 smc_close_abort(&smc
->conn
);
304 sk
->sk_state
= SMC_PROCESSABORT
;
306 sk
->sk_state
= SMC_CLOSED
;
309 case SMC_PEERFINCLOSEWAIT
:
310 case SMC_PEERABORTWAIT
:
311 sk
->sk_state
= SMC_CLOSED
;
314 case SMC_PROCESSABORT
:
315 /* nothing to do, add tracing in future patch */
320 /* Some kind of closing has been received: peer_conn_closed, peer_conn_abort,
321 * or peer_done_writing.
323 static void smc_close_passive_work(struct work_struct
*work
)
325 struct smc_connection
*conn
= container_of(work
,
326 struct smc_connection
,
328 struct smc_sock
*smc
= container_of(conn
, struct smc_sock
, conn
);
329 struct smc_cdc_conn_state_flags
*rxflags
;
330 struct sock
*sk
= &smc
->sk
;
334 old_state
= sk
->sk_state
;
336 if (!conn
->alert_token_local
) {
337 /* abnormal termination */
338 smc_close_active_abort(smc
);
342 rxflags
= &smc
->conn
.local_rx_ctrl
.conn_state_flags
;
343 if (rxflags
->peer_conn_abort
) {
344 smc_close_passive_abort_received(smc
);
348 switch (sk
->sk_state
) {
350 if (atomic_read(&smc
->conn
.bytes_to_rcv
) ||
351 (rxflags
->peer_done_writing
&&
352 !smc_cdc_rxed_any_close(conn
)))
353 sk
->sk_state
= SMC_APPCLOSEWAIT1
;
355 sk
->sk_state
= SMC_CLOSED
;
358 sk
->sk_state
= SMC_APPCLOSEWAIT1
;
360 case SMC_PEERCLOSEWAIT1
:
361 if (rxflags
->peer_done_writing
)
362 sk
->sk_state
= SMC_PEERCLOSEWAIT2
;
363 /* fall through to check for closing */
364 case SMC_PEERCLOSEWAIT2
:
365 case SMC_PEERFINCLOSEWAIT
:
366 if (!smc_cdc_rxed_any_close(&smc
->conn
))
368 if (sock_flag(sk
, SOCK_DEAD
) &&
369 smc_close_sent_any_close(conn
)) {
370 /* smc_release has already been called locally */
371 sk
->sk_state
= SMC_CLOSED
;
373 /* just shutdown, but not yet closed locally */
374 sk
->sk_state
= SMC_APPFINCLOSEWAIT
;
377 case SMC_APPCLOSEWAIT1
:
378 case SMC_APPCLOSEWAIT2
:
379 case SMC_APPFINCLOSEWAIT
:
380 case SMC_PEERABORTWAIT
:
381 case SMC_PROCESSABORT
:
383 /* nothing to do, add tracing in future patch */
388 sk
->sk_data_ready(sk
); /* wakeup blocked rcvbuf consumers */
389 sk
->sk_write_space(sk
); /* wakeup blocked sndbuf producers */
391 if (old_state
!= sk
->sk_state
) {
392 sk
->sk_state_change(sk
);
393 if ((sk
->sk_state
== SMC_CLOSED
) &&
394 (sock_flag(sk
, SOCK_DEAD
) || !sk
->sk_socket
)) {
395 smc_conn_free(&smc
->conn
);
396 schedule_delayed_work(&smc
->sock_put_work
,
397 SMC_CLOSE_SOCK_PUT_DELAY
);
400 release_sock(&smc
->sk
);
403 void smc_close_sock_put_work(struct work_struct
*work
)
405 struct smc_sock
*smc
= container_of(to_delayed_work(work
),
409 smc
->sk
.sk_prot
->unhash(&smc
->sk
);
413 int smc_close_shutdown_write(struct smc_sock
*smc
)
415 struct smc_connection
*conn
= &smc
->conn
;
416 struct sock
*sk
= &smc
->sk
;
421 timeout
= current
->flags
& PF_EXITING
?
422 0 : sock_flag(sk
, SOCK_LINGER
) ?
423 sk
->sk_lingertime
: SMC_MAX_STREAM_WAIT_TIMEOUT
;
426 old_state
= sk
->sk_state
;
429 smc_close_stream_wait(smc
, timeout
);
431 cancel_delayed_work_sync(&conn
->tx_work
);
433 /* send close wr request */
434 rc
= smc_close_wr(conn
);
435 if (sk
->sk_state
== SMC_ACTIVE
)
436 sk
->sk_state
= SMC_PEERCLOSEWAIT1
;
440 case SMC_APPCLOSEWAIT1
:
442 if (!smc_cdc_rxed_any_close(conn
))
443 smc_close_stream_wait(smc
, timeout
);
445 cancel_delayed_work_sync(&conn
->tx_work
);
447 /* confirm close from peer */
448 rc
= smc_close_wr(conn
);
449 sk
->sk_state
= SMC_APPCLOSEWAIT2
;
451 case SMC_APPCLOSEWAIT2
:
452 case SMC_PEERFINCLOSEWAIT
:
453 case SMC_PEERCLOSEWAIT1
:
454 case SMC_PEERCLOSEWAIT2
:
455 case SMC_APPFINCLOSEWAIT
:
456 case SMC_PROCESSABORT
:
457 case SMC_PEERABORTWAIT
:
458 /* nothing to do, add tracing in future patch */
462 if (old_state
!= sk
->sk_state
)
463 sk
->sk_state_change(&smc
->sk
);
467 /* Initialize close properties on connection establishment. */
468 void smc_close_init(struct smc_sock
*smc
)
470 INIT_WORK(&smc
->conn
.close_work
, smc_close_passive_work
);