2 * Shared Memory Communications over RDMA (SMC-R) and RoCE
4 * Socket Closing - normal and abnormal
6 * Copyright IBM Corp. 2016
8 * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
11 #include <linux/workqueue.h>
12 #include <linux/sched/signal.h>
19 #include "smc_close.h"
21 #define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ)
23 static void smc_close_cleanup_listen(struct sock
*parent
)
27 /* Close non-accepted connections */
28 while ((sk
= smc_accept_dequeue(parent
, NULL
)))
29 smc_close_non_accepted(sk
);
32 static void smc_close_wait_tx_pends(struct smc_sock
*smc
)
34 DEFINE_WAIT_FUNC(wait
, woken_wake_function
);
35 struct sock
*sk
= &smc
->sk
;
38 timeout
= SMC_CLOSE_WAIT_TX_PENDS_TIME
;
39 add_wait_queue(sk_sleep(sk
), &wait
);
40 while (!signal_pending(current
) && timeout
) {
43 rc
= sk_wait_event(sk
, &timeout
,
44 !smc_cdc_tx_has_pending(&smc
->conn
),
49 remove_wait_queue(sk_sleep(sk
), &wait
);
52 /* wait for sndbuf data being transmitted */
53 static void smc_close_stream_wait(struct smc_sock
*smc
, long timeout
)
55 DEFINE_WAIT_FUNC(wait
, woken_wake_function
);
56 struct sock
*sk
= &smc
->sk
;
61 if (!smc_tx_prepared_sends(&smc
->conn
))
64 smc
->wait_close_tx_prepared
= 1;
65 add_wait_queue(sk_sleep(sk
), &wait
);
66 while (!signal_pending(current
) && timeout
) {
69 rc
= sk_wait_event(sk
, &timeout
,
70 !smc_tx_prepared_sends(&smc
->conn
) ||
71 (sk
->sk_err
== ECONNABORTED
) ||
72 (sk
->sk_err
== ECONNRESET
),
77 remove_wait_queue(sk_sleep(sk
), &wait
);
78 smc
->wait_close_tx_prepared
= 0;
81 void smc_close_wake_tx_prepared(struct smc_sock
*smc
)
83 if (smc
->wait_close_tx_prepared
)
84 /* wake up socket closing */
85 smc
->sk
.sk_state_change(&smc
->sk
);
88 static int smc_close_wr(struct smc_connection
*conn
)
90 conn
->local_tx_ctrl
.conn_state_flags
.peer_done_writing
= 1;
92 return smc_cdc_get_slot_and_msg_send(conn
);
95 static int smc_close_final(struct smc_connection
*conn
)
97 if (atomic_read(&conn
->bytes_to_rcv
))
98 conn
->local_tx_ctrl
.conn_state_flags
.peer_conn_abort
= 1;
100 conn
->local_tx_ctrl
.conn_state_flags
.peer_conn_closed
= 1;
102 return smc_cdc_get_slot_and_msg_send(conn
);
105 static int smc_close_abort(struct smc_connection
*conn
)
107 conn
->local_tx_ctrl
.conn_state_flags
.peer_conn_abort
= 1;
109 return smc_cdc_get_slot_and_msg_send(conn
);
112 /* terminate smc socket abnormally - active abort
113 * RDMA communication no longer possible
115 void smc_close_active_abort(struct smc_sock
*smc
)
117 struct smc_cdc_conn_state_flags
*txflags
=
118 &smc
->conn
.local_tx_ctrl
.conn_state_flags
;
120 smc
->sk
.sk_err
= ECONNABORTED
;
121 if (smc
->clcsock
&& smc
->clcsock
->sk
) {
122 smc
->clcsock
->sk
->sk_err
= ECONNABORTED
;
123 smc
->clcsock
->sk
->sk_state_change(smc
->clcsock
->sk
);
125 switch (smc
->sk
.sk_state
) {
128 smc
->sk
.sk_state
= SMC_PEERABORTWAIT
;
130 case SMC_APPCLOSEWAIT1
:
131 case SMC_APPCLOSEWAIT2
:
132 txflags
->peer_conn_abort
= 1;
133 sock_release(smc
->clcsock
);
134 if (!smc_cdc_rxed_any_close(&smc
->conn
))
135 smc
->sk
.sk_state
= SMC_PEERABORTWAIT
;
137 smc
->sk
.sk_state
= SMC_CLOSED
;
139 case SMC_PEERCLOSEWAIT1
:
140 case SMC_PEERCLOSEWAIT2
:
141 if (!txflags
->peer_conn_closed
) {
142 smc
->sk
.sk_state
= SMC_PEERABORTWAIT
;
143 txflags
->peer_conn_abort
= 1;
144 sock_release(smc
->clcsock
);
146 smc
->sk
.sk_state
= SMC_CLOSED
;
149 case SMC_PROCESSABORT
:
150 case SMC_APPFINCLOSEWAIT
:
151 if (!txflags
->peer_conn_closed
) {
152 txflags
->peer_conn_abort
= 1;
153 sock_release(smc
->clcsock
);
155 smc
->sk
.sk_state
= SMC_CLOSED
;
157 case SMC_PEERFINCLOSEWAIT
:
158 case SMC_PEERABORTWAIT
:
163 sock_set_flag(&smc
->sk
, SOCK_DEAD
);
164 smc
->sk
.sk_state_change(&smc
->sk
);
167 static inline bool smc_close_sent_any_close(struct smc_connection
*conn
)
169 return conn
->local_tx_ctrl
.conn_state_flags
.peer_conn_abort
||
170 conn
->local_tx_ctrl
.conn_state_flags
.peer_conn_closed
;
173 int smc_close_active(struct smc_sock
*smc
)
175 struct smc_cdc_conn_state_flags
*txflags
=
176 &smc
->conn
.local_tx_ctrl
.conn_state_flags
;
177 long timeout
= SMC_MAX_STREAM_WAIT_TIMEOUT
;
178 struct smc_connection
*conn
= &smc
->conn
;
179 struct sock
*sk
= &smc
->sk
;
183 if (sock_flag(sk
, SOCK_LINGER
) &&
184 !(current
->flags
& PF_EXITING
))
185 timeout
= sk
->sk_lingertime
;
188 old_state
= sk
->sk_state
;
191 sk
->sk_state
= SMC_CLOSED
;
192 if (smc
->smc_listen_work
.func
)
193 cancel_work_sync(&smc
->smc_listen_work
);
196 sk
->sk_state
= SMC_CLOSED
;
197 sk
->sk_state_change(sk
); /* wake up accept */
198 if (smc
->clcsock
&& smc
->clcsock
->sk
) {
199 rc
= kernel_sock_shutdown(smc
->clcsock
, SHUT_RDWR
);
200 /* wake up kernel_accept of smc_tcp_listen_worker */
201 smc
->clcsock
->sk
->sk_data_ready(smc
->clcsock
->sk
);
204 smc_close_cleanup_listen(sk
);
205 cancel_work_sync(&smc
->smc_listen_work
);
209 smc_close_stream_wait(smc
, timeout
);
211 cancel_work_sync(&conn
->tx_work
);
213 if (sk
->sk_state
== SMC_ACTIVE
) {
214 /* send close request */
215 rc
= smc_close_final(conn
);
216 sk
->sk_state
= SMC_PEERCLOSEWAIT1
;
218 /* peer event has changed the state */
222 case SMC_APPFINCLOSEWAIT
:
223 /* socket already shutdown wr or both (active close) */
224 if (txflags
->peer_done_writing
&&
225 !smc_close_sent_any_close(conn
)) {
226 /* just shutdown wr done, send close request */
227 rc
= smc_close_final(conn
);
229 sk
->sk_state
= SMC_CLOSED
;
230 smc_close_wait_tx_pends(smc
);
232 case SMC_APPCLOSEWAIT1
:
233 case SMC_APPCLOSEWAIT2
:
234 if (!smc_cdc_rxed_any_close(conn
))
235 smc_close_stream_wait(smc
, timeout
);
237 cancel_work_sync(&conn
->tx_work
);
239 if (sk
->sk_err
!= ECONNABORTED
) {
240 /* confirm close from peer */
241 rc
= smc_close_final(conn
);
245 if (smc_cdc_rxed_any_close(conn
))
246 /* peer has closed the socket already */
247 sk
->sk_state
= SMC_CLOSED
;
249 /* peer has just issued a shutdown write */
250 sk
->sk_state
= SMC_PEERFINCLOSEWAIT
;
251 smc_close_wait_tx_pends(smc
);
253 case SMC_PEERCLOSEWAIT1
:
254 case SMC_PEERCLOSEWAIT2
:
255 if (txflags
->peer_done_writing
&&
256 !smc_close_sent_any_close(conn
)) {
257 /* just shutdown wr done, send close request */
258 rc
= smc_close_final(conn
);
260 /* peer sending PeerConnectionClosed will cause transition */
262 case SMC_PEERFINCLOSEWAIT
:
263 /* peer sending PeerConnectionClosed will cause transition */
265 case SMC_PROCESSABORT
:
266 cancel_work_sync(&conn
->tx_work
);
267 smc_close_abort(conn
);
268 sk
->sk_state
= SMC_CLOSED
;
269 smc_close_wait_tx_pends(smc
);
271 case SMC_PEERABORTWAIT
:
273 /* nothing to do, add tracing in future patch */
277 if (old_state
!= sk
->sk_state
)
278 sk
->sk_state_change(&smc
->sk
);
282 static void smc_close_passive_abort_received(struct smc_sock
*smc
)
284 struct smc_cdc_conn_state_flags
*txflags
=
285 &smc
->conn
.local_tx_ctrl
.conn_state_flags
;
286 struct sock
*sk
= &smc
->sk
;
288 switch (sk
->sk_state
) {
290 case SMC_APPFINCLOSEWAIT
:
291 case SMC_APPCLOSEWAIT1
:
292 case SMC_APPCLOSEWAIT2
:
293 smc_close_abort(&smc
->conn
);
294 sk
->sk_state
= SMC_PROCESSABORT
;
296 case SMC_PEERCLOSEWAIT1
:
297 case SMC_PEERCLOSEWAIT2
:
298 if (txflags
->peer_done_writing
&&
299 !smc_close_sent_any_close(&smc
->conn
)) {
300 /* just shutdown, but not yet closed locally */
301 smc_close_abort(&smc
->conn
);
302 sk
->sk_state
= SMC_PROCESSABORT
;
304 sk
->sk_state
= SMC_CLOSED
;
307 case SMC_PEERFINCLOSEWAIT
:
308 case SMC_PEERABORTWAIT
:
309 sk
->sk_state
= SMC_CLOSED
;
312 case SMC_PROCESSABORT
:
313 /* nothing to do, add tracing in future patch */
318 /* Some kind of closing has been received: peer_conn_closed, peer_conn_abort,
319 * or peer_done_writing.
321 static void smc_close_passive_work(struct work_struct
*work
)
323 struct smc_connection
*conn
= container_of(work
,
324 struct smc_connection
,
326 struct smc_sock
*smc
= container_of(conn
, struct smc_sock
, conn
);
327 struct smc_cdc_conn_state_flags
*rxflags
;
328 struct sock
*sk
= &smc
->sk
;
332 old_state
= sk
->sk_state
;
334 if (!conn
->alert_token_local
) {
335 /* abnormal termination */
336 smc_close_active_abort(smc
);
340 rxflags
= &smc
->conn
.local_rx_ctrl
.conn_state_flags
;
341 if (rxflags
->peer_conn_abort
) {
342 smc_close_passive_abort_received(smc
);
346 switch (sk
->sk_state
) {
348 if (atomic_read(&smc
->conn
.bytes_to_rcv
) ||
349 (rxflags
->peer_done_writing
&&
350 !smc_cdc_rxed_any_close(conn
)))
351 sk
->sk_state
= SMC_APPCLOSEWAIT1
;
353 sk
->sk_state
= SMC_CLOSED
;
356 sk
->sk_state
= SMC_APPCLOSEWAIT1
;
358 case SMC_PEERCLOSEWAIT1
:
359 if (rxflags
->peer_done_writing
)
360 sk
->sk_state
= SMC_PEERCLOSEWAIT2
;
361 /* fall through to check for closing */
362 case SMC_PEERCLOSEWAIT2
:
363 case SMC_PEERFINCLOSEWAIT
:
364 if (!smc_cdc_rxed_any_close(&smc
->conn
))
366 if (sock_flag(sk
, SOCK_DEAD
) &&
367 smc_close_sent_any_close(conn
)) {
368 /* smc_release has already been called locally */
369 sk
->sk_state
= SMC_CLOSED
;
371 /* just shutdown, but not yet closed locally */
372 sk
->sk_state
= SMC_APPFINCLOSEWAIT
;
375 case SMC_APPCLOSEWAIT1
:
376 case SMC_APPCLOSEWAIT2
:
377 case SMC_APPFINCLOSEWAIT
:
378 case SMC_PEERABORTWAIT
:
379 case SMC_PROCESSABORT
:
381 /* nothing to do, add tracing in future patch */
386 sk
->sk_data_ready(sk
); /* wakeup blocked rcvbuf consumers */
387 sk
->sk_write_space(sk
); /* wakeup blocked sndbuf producers */
389 if (old_state
!= sk
->sk_state
) {
390 sk
->sk_state_change(sk
);
391 if ((sk
->sk_state
== SMC_CLOSED
) &&
392 (sock_flag(sk
, SOCK_DEAD
) || !sk
->sk_socket
)) {
393 smc_conn_free(&smc
->conn
);
394 schedule_delayed_work(&smc
->sock_put_work
,
395 SMC_CLOSE_SOCK_PUT_DELAY
);
398 release_sock(&smc
->sk
);
401 void smc_close_sock_put_work(struct work_struct
*work
)
403 struct smc_sock
*smc
= container_of(to_delayed_work(work
),
407 smc
->sk
.sk_prot
->unhash(&smc
->sk
);
411 int smc_close_shutdown_write(struct smc_sock
*smc
)
413 struct smc_connection
*conn
= &smc
->conn
;
414 long timeout
= SMC_MAX_STREAM_WAIT_TIMEOUT
;
415 struct sock
*sk
= &smc
->sk
;
419 if (sock_flag(sk
, SOCK_LINGER
))
420 timeout
= sk
->sk_lingertime
;
423 old_state
= sk
->sk_state
;
426 smc_close_stream_wait(smc
, timeout
);
428 cancel_work_sync(&conn
->tx_work
);
430 /* send close wr request */
431 rc
= smc_close_wr(conn
);
432 if (sk
->sk_state
== SMC_ACTIVE
)
433 sk
->sk_state
= SMC_PEERCLOSEWAIT1
;
437 case SMC_APPCLOSEWAIT1
:
439 if (!smc_cdc_rxed_any_close(conn
))
440 smc_close_stream_wait(smc
, timeout
);
442 cancel_work_sync(&conn
->tx_work
);
444 /* confirm close from peer */
445 rc
= smc_close_wr(conn
);
446 sk
->sk_state
= SMC_APPCLOSEWAIT2
;
448 case SMC_APPCLOSEWAIT2
:
449 case SMC_PEERFINCLOSEWAIT
:
450 case SMC_PEERCLOSEWAIT1
:
451 case SMC_PEERCLOSEWAIT2
:
452 case SMC_APPFINCLOSEWAIT
:
453 case SMC_PROCESSABORT
:
454 case SMC_PEERABORTWAIT
:
455 /* nothing to do, add tracing in future patch */
459 if (old_state
!= sk
->sk_state
)
460 sk
->sk_state_change(&smc
->sk
);
464 /* Initialize close properties on connection establishment. */
465 void smc_close_init(struct smc_sock
*smc
)
467 INIT_WORK(&smc
->conn
.close_work
, smc_close_passive_work
);