1 // SPDX-License-Identifier: GPL-2.0-only
3 * Intel MIC Platform Software Stack (MPSS)
5 * Copyright(c) 2014 Intel Corporation.
9 #include <linux/scif.h>
10 #include "scif_main.h"
13 static const char * const scif_ep_states
[] = {
25 enum conn_async_state
{
26 ASYNC_CONN_IDLE
= 1, /* ep setup for async connect */
27 ASYNC_CONN_INPROGRESS
, /* async connect in progress */
28 ASYNC_CONN_FLUSH_WORK
/* async work flush in progress */
32 * File operations for anonymous inode file associated with a SCIF endpoint,
33 * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
34 * poll API in the kernel and these take in a struct file *. Since a struct
35 * file is not available to kernel mode SCIF, it uses an anonymous file for
38 const struct file_operations scif_anon_fops
= {
42 scif_epd_t
scif_open(void)
44 struct scif_endpt
*ep
;
48 ep
= kzalloc(sizeof(*ep
), GFP_KERNEL
);
52 ep
->qp_info
.qp
= kzalloc(sizeof(*ep
->qp_info
.qp
), GFP_KERNEL
);
56 err
= scif_anon_inode_getfile(ep
);
60 spin_lock_init(&ep
->lock
);
61 mutex_init(&ep
->sendlock
);
62 mutex_init(&ep
->recvlock
);
65 ep
->state
= SCIFEP_UNBOUND
;
66 dev_dbg(scif_info
.mdev
.this_device
,
67 "SCIFAPI open: ep %p success\n", ep
);
71 kfree(ep
->qp_info
.qp
);
77 EXPORT_SYMBOL_GPL(scif_open
);
80 * scif_disconnect_ep - Disconnects the endpoint if found
81 * @epd: The end point returned from scif_open()
83 static struct scif_endpt
*scif_disconnect_ep(struct scif_endpt
*ep
)
86 struct scif_endpt
*fep
= NULL
;
87 struct scif_endpt
*tmpep
;
88 struct list_head
*pos
, *tmpq
;
92 * Wake up any threads blocked in send()/recv() before closing
93 * out the connection. Grabbing and releasing the send/recv lock
94 * will ensure that any blocked senders/receivers have exited for
95 * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
96 * close. Ring 3 endpoints are not affected since close will not
97 * be called while there are IOCTLs executing.
99 wake_up_interruptible(&ep
->sendwq
);
100 wake_up_interruptible(&ep
->recvwq
);
101 mutex_lock(&ep
->sendlock
);
102 mutex_unlock(&ep
->sendlock
);
103 mutex_lock(&ep
->recvlock
);
104 mutex_unlock(&ep
->recvlock
);
106 /* Remove from the connected list */
107 mutex_lock(&scif_info
.connlock
);
108 list_for_each_safe(pos
, tmpq
, &scif_info
.connected
) {
109 tmpep
= list_entry(pos
, struct scif_endpt
, list
);
113 spin_lock(&ep
->lock
);
120 * The other side has completed the disconnect before
121 * the end point can be removed from the list. Therefore
122 * the ep lock is not locked, traverse the disconnected
123 * list to find the endpoint and release the conn lock.
125 list_for_each_safe(pos
, tmpq
, &scif_info
.disconnected
) {
126 tmpep
= list_entry(pos
, struct scif_endpt
, list
);
132 mutex_unlock(&scif_info
.connlock
);
136 init_completion(&ep
->discon
);
137 msg
.uop
= SCIF_DISCNCT
;
140 msg
.payload
[0] = (u64
)ep
;
141 msg
.payload
[1] = ep
->remote_ep
;
143 err
= scif_nodeqp_send(ep
->remote_dev
, &msg
);
144 spin_unlock(&ep
->lock
);
145 mutex_unlock(&scif_info
.connlock
);
148 /* Wait for the remote node to respond with SCIF_DISCNT_ACK */
149 wait_for_completion_timeout(&ep
->discon
,
150 SCIF_NODE_ALIVE_TIMEOUT
);
154 int scif_close(scif_epd_t epd
)
156 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
157 struct scif_endpt
*tmpep
;
158 struct list_head
*pos
, *tmpq
;
159 enum scif_epd_state oldstate
;
162 dev_dbg(scif_info
.mdev
.this_device
, "SCIFAPI close: ep %p %s\n",
163 ep
, scif_ep_states
[ep
->state
]);
165 spin_lock(&ep
->lock
);
166 flush_conn
= (ep
->conn_async_state
== ASYNC_CONN_INPROGRESS
);
167 spin_unlock(&ep
->lock
);
170 flush_work(&scif_info
.conn_work
);
172 spin_lock(&ep
->lock
);
173 oldstate
= ep
->state
;
175 ep
->state
= SCIFEP_CLOSING
;
179 dev_err(scif_info
.mdev
.this_device
,
180 "SCIFAPI close: zombie state unexpected\n");
182 case SCIFEP_DISCONNECTED
:
183 spin_unlock(&ep
->lock
);
184 scif_unregister_all_windows(epd
);
185 /* Remove from the disconnected list */
186 mutex_lock(&scif_info
.connlock
);
187 list_for_each_safe(pos
, tmpq
, &scif_info
.disconnected
) {
188 tmpep
= list_entry(pos
, struct scif_endpt
, list
);
194 mutex_unlock(&scif_info
.connlock
);
198 case SCIFEP_CONNECTING
:
199 spin_unlock(&ep
->lock
);
202 case SCIFEP_CONNECTED
:
205 spin_unlock(&ep
->lock
);
206 scif_unregister_all_windows(epd
);
207 scif_disconnect_ep(ep
);
210 case SCIFEP_LISTENING
:
211 case SCIFEP_CLLISTEN
:
213 struct scif_conreq
*conreq
;
215 struct scif_endpt
*aep
;
217 spin_unlock(&ep
->lock
);
218 mutex_lock(&scif_info
.eplock
);
220 /* remove from listen list */
221 list_for_each_safe(pos
, tmpq
, &scif_info
.listen
) {
222 tmpep
= list_entry(pos
, struct scif_endpt
, list
);
226 /* Remove any dangling accepts */
227 while (ep
->acceptcnt
) {
228 aep
= list_first_entry(&ep
->li_accept
,
229 struct scif_endpt
, liacceptlist
);
230 list_del(&aep
->liacceptlist
);
231 scif_put_port(aep
->port
.port
);
232 list_for_each_safe(pos
, tmpq
, &scif_info
.uaccept
) {
233 tmpep
= list_entry(pos
, struct scif_endpt
,
240 mutex_unlock(&scif_info
.eplock
);
241 mutex_lock(&scif_info
.connlock
);
242 list_for_each_safe(pos
, tmpq
, &scif_info
.connected
) {
243 tmpep
= list_entry(pos
,
244 struct scif_endpt
, list
);
250 list_for_each_safe(pos
, tmpq
, &scif_info
.disconnected
) {
251 tmpep
= list_entry(pos
,
252 struct scif_endpt
, list
);
258 mutex_unlock(&scif_info
.connlock
);
259 scif_teardown_ep(aep
);
260 mutex_lock(&scif_info
.eplock
);
261 scif_add_epd_to_zombie_list(aep
, SCIF_EPLOCK_HELD
);
265 spin_lock(&ep
->lock
);
266 mutex_unlock(&scif_info
.eplock
);
268 /* Remove and reject any pending connection requests. */
269 while (ep
->conreqcnt
) {
270 conreq
= list_first_entry(&ep
->conlist
,
271 struct scif_conreq
, list
);
272 list_del(&conreq
->list
);
274 msg
.uop
= SCIF_CNCT_REJ
;
275 msg
.dst
.node
= conreq
->msg
.src
.node
;
276 msg
.dst
.port
= conreq
->msg
.src
.port
;
277 msg
.payload
[0] = conreq
->msg
.payload
[0];
278 msg
.payload
[1] = conreq
->msg
.payload
[1];
280 * No Error Handling on purpose for scif_nodeqp_send().
281 * If the remote node is lost we still want free the
282 * connection requests on the self node.
284 scif_nodeqp_send(&scif_dev
[conreq
->msg
.src
.node
],
290 spin_unlock(&ep
->lock
);
291 /* If a kSCIF accept is waiting wake it up */
292 wake_up_interruptible(&ep
->conwq
);
296 scif_put_port(ep
->port
.port
);
297 scif_anon_inode_fput(ep
);
298 scif_teardown_ep(ep
);
299 scif_add_epd_to_zombie_list(ep
, !SCIF_EPLOCK_HELD
);
302 EXPORT_SYMBOL_GPL(scif_close
);
305 * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
306 * accept new connections.
307 * @epd: The end point returned from scif_open()
309 int __scif_flush(scif_epd_t epd
)
311 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
314 case SCIFEP_LISTENING
:
316 ep
->state
= SCIFEP_CLLISTEN
;
318 /* If an accept is waiting wake it up */
319 wake_up_interruptible(&ep
->conwq
);
328 int scif_bind(scif_epd_t epd
, u16 pn
)
330 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
334 dev_dbg(scif_info
.mdev
.this_device
,
335 "SCIFAPI bind: ep %p %s requested port number %d\n",
336 ep
, scif_ep_states
[ep
->state
], pn
);
339 * Similar to IETF RFC 1700, SCIF ports below
340 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
341 * processes or by processes executed by privileged users.
343 if (pn
< SCIF_ADMIN_PORT_END
&& !capable(CAP_SYS_ADMIN
)) {
345 goto scif_bind_admin_exit
;
349 spin_lock(&ep
->lock
);
350 if (ep
->state
== SCIFEP_BOUND
) {
353 } else if (ep
->state
!= SCIFEP_UNBOUND
) {
359 tmp
= scif_rsrv_port(pn
);
365 ret
= scif_get_new_port();
371 ep
->state
= SCIFEP_BOUND
;
372 ep
->port
.node
= scif_info
.nodeid
;
374 ep
->conn_async_state
= ASYNC_CONN_IDLE
;
376 dev_dbg(scif_info
.mdev
.this_device
,
377 "SCIFAPI bind: bound to port number %d\n", pn
);
379 spin_unlock(&ep
->lock
);
380 scif_bind_admin_exit
:
383 EXPORT_SYMBOL_GPL(scif_bind
);
385 int scif_listen(scif_epd_t epd
, int backlog
)
387 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
389 dev_dbg(scif_info
.mdev
.this_device
,
390 "SCIFAPI listen: ep %p %s\n", ep
, scif_ep_states
[ep
->state
]);
391 spin_lock(&ep
->lock
);
395 case SCIFEP_CLLISTEN
:
397 case SCIFEP_DISCONNECTED
:
398 spin_unlock(&ep
->lock
);
400 case SCIFEP_LISTENING
:
401 case SCIFEP_CONNECTED
:
402 case SCIFEP_CONNECTING
:
404 spin_unlock(&ep
->lock
);
410 ep
->state
= SCIFEP_LISTENING
;
411 ep
->backlog
= backlog
;
415 INIT_LIST_HEAD(&ep
->conlist
);
416 init_waitqueue_head(&ep
->conwq
);
417 INIT_LIST_HEAD(&ep
->li_accept
);
418 spin_unlock(&ep
->lock
);
421 * Listen status is complete so delete the qp information not needed
422 * on a listen before placing on the list of listening ep's
424 scif_teardown_ep(ep
);
425 ep
->qp_info
.qp
= NULL
;
427 mutex_lock(&scif_info
.eplock
);
428 list_add_tail(&ep
->list
, &scif_info
.listen
);
429 mutex_unlock(&scif_info
.eplock
);
432 EXPORT_SYMBOL_GPL(scif_listen
);
435 ************************************************************************
436 * SCIF connection flow:
438 * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
439 * connections via a SCIF_CNCT_REQ message
440 * 2) A SCIF endpoint can initiate a SCIF connection by calling
441 * scif_connect(..) which calls scif_setup_qp_connect(..) which
442 * allocates the local qp for the endpoint ring buffer and then sends
443 * a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
444 * a SCIF_CNCT_REJ message
445 * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
446 * wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
448 * 4) A thread blocked waiting for incoming connections allocates its local
449 * endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
450 * and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
451 * the node sends a SCIF_CNCT_REJ message
452 * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
453 * connecting endpoint is woken up as part of handling
454 * scif_cnctgnt_resp(..) following which it maps the remote endpoints'
455 * QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
456 * success or a SCIF_CNCT_GNTNACK message on failure and completes
457 * the scif_connect(..) API
458 * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
459 * in step 4 is woken up and completes the scif_accept(..) API
460 * 7) The SCIF connection is now established between the two SCIF endpoints.
462 static int scif_conn_func(struct scif_endpt
*ep
)
466 struct device
*spdev
;
468 err
= scif_reserve_dma_chan(ep
);
470 dev_err(&ep
->remote_dev
->sdev
->dev
,
471 "%s %d err %d\n", __func__
, __LINE__
, err
);
472 ep
->state
= SCIFEP_BOUND
;
473 goto connect_error_simple
;
475 /* Initiate the first part of the endpoint QP setup */
476 err
= scif_setup_qp_connect(ep
->qp_info
.qp
, &ep
->qp_info
.qp_offset
,
477 SCIF_ENDPT_QP_SIZE
, ep
->remote_dev
);
479 dev_err(&ep
->remote_dev
->sdev
->dev
,
480 "%s err %d qp_offset 0x%llx\n",
481 __func__
, err
, ep
->qp_info
.qp_offset
);
482 ep
->state
= SCIFEP_BOUND
;
483 goto connect_error_simple
;
486 spdev
= scif_get_peer_dev(ep
->remote_dev
);
488 err
= PTR_ERR(spdev
);
491 /* Format connect message and send it */
493 msg
.dst
= ep
->conn_port
;
494 msg
.uop
= SCIF_CNCT_REQ
;
495 msg
.payload
[0] = (u64
)ep
;
496 msg
.payload
[1] = ep
->qp_info
.qp_offset
;
497 err
= _scif_nodeqp_send(ep
->remote_dev
, &msg
);
499 goto connect_error_dec
;
500 scif_put_peer_dev(spdev
);
502 * Wait for the remote node to respond with SCIF_CNCT_GNT or
503 * SCIF_CNCT_REJ message.
505 err
= wait_event_timeout(ep
->conwq
, ep
->state
!= SCIFEP_CONNECTING
,
506 SCIF_NODE_ALIVE_TIMEOUT
);
508 dev_err(&ep
->remote_dev
->sdev
->dev
,
509 "%s %d timeout\n", __func__
, __LINE__
);
510 ep
->state
= SCIFEP_BOUND
;
512 spdev
= scif_get_peer_dev(ep
->remote_dev
);
514 err
= PTR_ERR(spdev
);
517 if (ep
->state
== SCIFEP_MAPPING
) {
518 err
= scif_setup_qp_connect_response(ep
->remote_dev
,
520 ep
->qp_info
.gnt_pld
);
522 * If the resource to map the queue are not available then
523 * we need to tell the other side to terminate the accept
526 dev_err(&ep
->remote_dev
->sdev
->dev
,
527 "%s %d err %d\n", __func__
, __LINE__
, err
);
528 msg
.uop
= SCIF_CNCT_GNTNACK
;
529 msg
.payload
[0] = ep
->remote_ep
;
530 _scif_nodeqp_send(ep
->remote_dev
, &msg
);
531 ep
->state
= SCIFEP_BOUND
;
532 goto connect_error_dec
;
535 msg
.uop
= SCIF_CNCT_GNTACK
;
536 msg
.payload
[0] = ep
->remote_ep
;
537 err
= _scif_nodeqp_send(ep
->remote_dev
, &msg
);
539 ep
->state
= SCIFEP_BOUND
;
540 goto connect_error_dec
;
542 ep
->state
= SCIFEP_CONNECTED
;
543 mutex_lock(&scif_info
.connlock
);
544 list_add_tail(&ep
->list
, &scif_info
.connected
);
545 mutex_unlock(&scif_info
.connlock
);
546 dev_dbg(&ep
->remote_dev
->sdev
->dev
,
547 "SCIFAPI connect: ep %p connected\n", ep
);
548 } else if (ep
->state
== SCIFEP_BOUND
) {
549 dev_dbg(&ep
->remote_dev
->sdev
->dev
,
550 "SCIFAPI connect: ep %p connection refused\n", ep
);
552 goto connect_error_dec
;
554 scif_put_peer_dev(spdev
);
557 scif_put_peer_dev(spdev
);
559 scif_cleanup_ep_qp(ep
);
560 connect_error_simple
:
567 * Workqueue handler for servicing non-blocking SCIF connect
570 void scif_conn_handler(struct work_struct
*work
)
572 struct scif_endpt
*ep
;
576 spin_lock(&scif_info
.nb_connect_lock
);
577 if (!list_empty(&scif_info
.nb_connect_list
)) {
578 ep
= list_first_entry(&scif_info
.nb_connect_list
,
579 struct scif_endpt
, conn_list
);
580 list_del(&ep
->conn_list
);
582 spin_unlock(&scif_info
.nb_connect_lock
);
584 ep
->conn_err
= scif_conn_func(ep
);
585 wake_up_interruptible(&ep
->conn_pend_wq
);
590 int __scif_connect(scif_epd_t epd
, struct scif_port_id
*dst
, bool non_block
)
592 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
594 struct scif_dev
*remote_dev
;
595 struct device
*spdev
;
597 dev_dbg(scif_info
.mdev
.this_device
, "SCIFAPI connect: ep %p %s\n", ep
,
598 scif_ep_states
[ep
->state
]);
600 if (!scif_dev
|| dst
->node
> scif_info
.maxid
)
605 remote_dev
= &scif_dev
[dst
->node
];
606 spdev
= scif_get_peer_dev(remote_dev
);
608 err
= PTR_ERR(spdev
);
612 spin_lock(&ep
->lock
);
618 case SCIFEP_DISCONNECTED
:
619 if (ep
->conn_async_state
== ASYNC_CONN_INPROGRESS
)
620 ep
->conn_async_state
= ASYNC_CONN_FLUSH_WORK
;
624 case SCIFEP_LISTENING
:
625 case SCIFEP_CLLISTEN
:
628 case SCIFEP_CONNECTING
:
630 if (ep
->conn_async_state
== ASYNC_CONN_INPROGRESS
)
635 case SCIFEP_CONNECTED
:
636 if (ep
->conn_async_state
== ASYNC_CONN_INPROGRESS
)
637 ep
->conn_async_state
= ASYNC_CONN_FLUSH_WORK
;
642 err
= scif_get_new_port();
646 ep
->port
.node
= scif_info
.nodeid
;
647 ep
->conn_async_state
= ASYNC_CONN_IDLE
;
651 * If a non-blocking connect has been already initiated
652 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
653 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
654 * SCIF_BOUND due an error in the connection process
655 * (e.g., connection refused) If conn_async_state is
656 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
657 * so that the error status can be collected. If the state is
658 * already ASYNC_CONN_FLUSH_WORK - then set the error to
659 * EINPROGRESS since some other thread is waiting to collect
662 if (ep
->conn_async_state
== ASYNC_CONN_INPROGRESS
) {
663 ep
->conn_async_state
= ASYNC_CONN_FLUSH_WORK
;
664 } else if (ep
->conn_async_state
== ASYNC_CONN_FLUSH_WORK
) {
667 ep
->conn_port
= *dst
;
668 init_waitqueue_head(&ep
->sendwq
);
669 init_waitqueue_head(&ep
->recvwq
);
670 init_waitqueue_head(&ep
->conwq
);
671 ep
->conn_async_state
= 0;
673 if (unlikely(non_block
))
674 ep
->conn_async_state
= ASYNC_CONN_INPROGRESS
;
679 if (err
|| ep
->conn_async_state
== ASYNC_CONN_FLUSH_WORK
)
680 goto connect_simple_unlock1
;
682 ep
->state
= SCIFEP_CONNECTING
;
683 ep
->remote_dev
= &scif_dev
[dst
->node
];
684 ep
->qp_info
.qp
->magic
= SCIFEP_MAGIC
;
685 if (ep
->conn_async_state
== ASYNC_CONN_INPROGRESS
) {
686 init_waitqueue_head(&ep
->conn_pend_wq
);
687 spin_lock(&scif_info
.nb_connect_lock
);
688 list_add_tail(&ep
->conn_list
, &scif_info
.nb_connect_list
);
689 spin_unlock(&scif_info
.nb_connect_lock
);
691 schedule_work(&scif_info
.conn_work
);
693 connect_simple_unlock1
:
694 spin_unlock(&ep
->lock
);
695 scif_put_peer_dev(spdev
);
698 } else if (ep
->conn_async_state
== ASYNC_CONN_FLUSH_WORK
) {
699 flush_work(&scif_info
.conn_work
);
701 spin_lock(&ep
->lock
);
702 ep
->conn_async_state
= ASYNC_CONN_IDLE
;
703 spin_unlock(&ep
->lock
);
705 err
= scif_conn_func(ep
);
710 int scif_connect(scif_epd_t epd
, struct scif_port_id
*dst
)
712 return __scif_connect(epd
, dst
, false);
714 EXPORT_SYMBOL_GPL(scif_connect
);
717 * scif_accept() - Accept a connection request from the remote node
719 * The function accepts a connection request from the remote node. Successful
720 * complete is indicate by a new end point being created and passed back
721 * to the caller for future reference.
723 * Upon successful complete a zero will be returned and the peer information
726 * If the end point is not in the listening state -EINVAL will be returned.
728 * If during the connection sequence resource allocation fails the -ENOMEM
731 * If the function is called with the ASYNC flag set and no connection requests
732 * are pending it will return -EAGAIN.
734 * If the remote side is not sending any connection requests the caller may
735 * terminate this function with a signal. If so a -EINTR will be returned.
737 int scif_accept(scif_epd_t epd
, struct scif_port_id
*peer
,
738 scif_epd_t
*newepd
, int flags
)
740 struct scif_endpt
*lep
= (struct scif_endpt
*)epd
;
741 struct scif_endpt
*cep
;
742 struct scif_conreq
*conreq
;
745 struct device
*spdev
;
747 dev_dbg(scif_info
.mdev
.this_device
,
748 "SCIFAPI accept: ep %p %s\n", lep
, scif_ep_states
[lep
->state
]);
750 if (flags
& ~SCIF_ACCEPT_SYNC
)
753 if (!peer
|| !newepd
)
757 spin_lock(&lep
->lock
);
758 if (lep
->state
!= SCIFEP_LISTENING
) {
759 spin_unlock(&lep
->lock
);
763 if (!lep
->conreqcnt
&& !(flags
& SCIF_ACCEPT_SYNC
)) {
764 /* No connection request present and we do not want to wait */
765 spin_unlock(&lep
->lock
);
769 lep
->files
= current
->files
;
771 spin_unlock(&lep
->lock
);
772 /* Wait for the remote node to send us a SCIF_CNCT_REQ */
773 err
= wait_event_interruptible(lep
->conwq
,
775 (lep
->state
!= SCIFEP_LISTENING
)));
779 if (lep
->state
!= SCIFEP_LISTENING
)
782 spin_lock(&lep
->lock
);
785 goto retry_connection
;
787 /* Get the first connect request off the list */
788 conreq
= list_first_entry(&lep
->conlist
, struct scif_conreq
, list
);
789 list_del(&conreq
->list
);
791 spin_unlock(&lep
->lock
);
793 /* Fill in the peer information */
794 peer
->node
= conreq
->msg
.src
.node
;
795 peer
->port
= conreq
->msg
.src
.port
;
797 cep
= kzalloc(sizeof(*cep
), GFP_KERNEL
);
800 goto scif_accept_error_epalloc
;
802 spin_lock_init(&cep
->lock
);
803 mutex_init(&cep
->sendlock
);
804 mutex_init(&cep
->recvlock
);
805 cep
->state
= SCIFEP_CONNECTING
;
806 cep
->remote_dev
= &scif_dev
[peer
->node
];
807 cep
->remote_ep
= conreq
->msg
.payload
[0];
809 scif_rma_ep_init(cep
);
811 err
= scif_reserve_dma_chan(cep
);
813 dev_err(scif_info
.mdev
.this_device
,
814 "%s %d err %d\n", __func__
, __LINE__
, err
);
815 goto scif_accept_error_qpalloc
;
818 cep
->qp_info
.qp
= kzalloc(sizeof(*cep
->qp_info
.qp
), GFP_KERNEL
);
819 if (!cep
->qp_info
.qp
) {
821 goto scif_accept_error_qpalloc
;
824 err
= scif_anon_inode_getfile(cep
);
826 goto scif_accept_error_anon_inode
;
828 cep
->qp_info
.qp
->magic
= SCIFEP_MAGIC
;
829 spdev
= scif_get_peer_dev(cep
->remote_dev
);
831 err
= PTR_ERR(spdev
);
832 goto scif_accept_error_map
;
834 err
= scif_setup_qp_accept(cep
->qp_info
.qp
, &cep
->qp_info
.qp_offset
,
835 conreq
->msg
.payload
[1], SCIF_ENDPT_QP_SIZE
,
838 dev_dbg(&cep
->remote_dev
->sdev
->dev
,
839 "SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
840 lep
, cep
, err
, cep
->qp_info
.qp_offset
);
841 scif_put_peer_dev(spdev
);
842 goto scif_accept_error_map
;
845 cep
->port
.node
= lep
->port
.node
;
846 cep
->port
.port
= lep
->port
.port
;
847 cep
->peer
.node
= peer
->node
;
848 cep
->peer
.port
= peer
->port
;
849 init_waitqueue_head(&cep
->sendwq
);
850 init_waitqueue_head(&cep
->recvwq
);
851 init_waitqueue_head(&cep
->conwq
);
853 msg
.uop
= SCIF_CNCT_GNT
;
855 msg
.payload
[0] = cep
->remote_ep
;
856 msg
.payload
[1] = cep
->qp_info
.qp_offset
;
857 msg
.payload
[2] = (u64
)cep
;
859 err
= _scif_nodeqp_send(cep
->remote_dev
, &msg
);
860 scif_put_peer_dev(spdev
);
862 goto scif_accept_error_map
;
864 /* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
865 err
= wait_event_timeout(cep
->conwq
, cep
->state
!= SCIFEP_CONNECTING
,
866 SCIF_NODE_ACCEPT_TIMEOUT
);
867 if (!err
&& scifdev_alive(cep
))
869 err
= !err
? -ENODEV
: 0;
871 goto scif_accept_error_map
;
874 spin_lock(&cep
->lock
);
876 if (cep
->state
== SCIFEP_CLOSING
) {
878 * Remote failed to allocate resources and NAKed the grant.
879 * There is at this point nothing referencing the new end point.
881 spin_unlock(&cep
->lock
);
882 scif_teardown_ep(cep
);
885 /* If call with sync flag then go back and wait. */
886 if (flags
& SCIF_ACCEPT_SYNC
) {
887 spin_lock(&lep
->lock
);
888 goto retry_connection
;
893 scif_get_port(cep
->port
.port
);
894 *newepd
= (scif_epd_t
)cep
;
895 spin_unlock(&cep
->lock
);
897 scif_accept_error_map
:
898 scif_anon_inode_fput(cep
);
899 scif_accept_error_anon_inode
:
900 scif_teardown_ep(cep
);
901 scif_accept_error_qpalloc
:
903 scif_accept_error_epalloc
:
904 msg
.uop
= SCIF_CNCT_REJ
;
905 msg
.dst
.node
= conreq
->msg
.src
.node
;
906 msg
.dst
.port
= conreq
->msg
.src
.port
;
907 msg
.payload
[0] = conreq
->msg
.payload
[0];
908 msg
.payload
[1] = conreq
->msg
.payload
[1];
909 scif_nodeqp_send(&scif_dev
[conreq
->msg
.src
.node
], &msg
);
913 EXPORT_SYMBOL_GPL(scif_accept
);
916 * scif_msg_param_check:
917 * @epd: The end point returned from scif_open()
918 * @len: Length to receive
919 * @flags: blocking or non blocking
921 * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
923 static inline int scif_msg_param_check(scif_epd_t epd
, int len
, int flags
)
929 if (flags
&& (!(flags
& SCIF_RECV_BLOCK
)))
936 static int _scif_send(scif_epd_t epd
, void *msg
, int len
, int flags
)
938 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
939 struct scifmsg notif_msg
;
940 int curr_xfer_len
= 0, sent_len
= 0, write_count
;
942 struct scif_qp
*qp
= ep
->qp_info
.qp
;
944 if (flags
& SCIF_SEND_BLOCK
)
947 spin_lock(&ep
->lock
);
948 while (sent_len
!= len
&& SCIFEP_CONNECTED
== ep
->state
) {
949 write_count
= scif_rb_space(&qp
->outbound_q
);
951 /* Best effort to send as much data as possible */
952 curr_xfer_len
= min(len
- sent_len
, write_count
);
953 ret
= scif_rb_write(&qp
->outbound_q
, msg
,
957 /* Success. Update write pointer */
958 scif_rb_commit(&qp
->outbound_q
);
960 * Send a notification to the peer about the
961 * produced data message.
963 notif_msg
.src
= ep
->port
;
964 notif_msg
.uop
= SCIF_CLIENT_SENT
;
965 notif_msg
.payload
[0] = ep
->remote_ep
;
966 ret
= _scif_nodeqp_send(ep
->remote_dev
, ¬if_msg
);
969 sent_len
+= curr_xfer_len
;
970 msg
= msg
+ curr_xfer_len
;
973 curr_xfer_len
= min(len
- sent_len
, SCIF_ENDPT_QP_SIZE
- 1);
974 /* Not enough RB space. return for the Non Blocking case */
975 if (!(flags
& SCIF_SEND_BLOCK
))
978 spin_unlock(&ep
->lock
);
979 /* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
981 wait_event_interruptible(ep
->sendwq
,
982 (SCIFEP_CONNECTED
!= ep
->state
) ||
983 (scif_rb_space(&qp
->outbound_q
) >=
985 spin_lock(&ep
->lock
);
991 else if (!ret
&& SCIFEP_CONNECTED
!= ep
->state
)
992 ret
= SCIFEP_DISCONNECTED
== ep
->state
?
993 -ECONNRESET
: -ENOTCONN
;
994 spin_unlock(&ep
->lock
);
998 static int _scif_recv(scif_epd_t epd
, void *msg
, int len
, int flags
)
1001 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
1002 struct scifmsg notif_msg
;
1003 int curr_recv_len
= 0, remaining_len
= len
, read_count
;
1005 struct scif_qp
*qp
= ep
->qp_info
.qp
;
1007 if (flags
& SCIF_RECV_BLOCK
)
1009 spin_lock(&ep
->lock
);
1010 while (remaining_len
&& (SCIFEP_CONNECTED
== ep
->state
||
1011 SCIFEP_DISCONNECTED
== ep
->state
)) {
1012 read_count
= scif_rb_count(&qp
->inbound_q
, remaining_len
);
1015 * Best effort to recv as much data as there
1016 * are bytes to read in the RB particularly
1017 * important for the Non Blocking case.
1019 curr_recv_len
= min(remaining_len
, read_count
);
1020 read_size
= scif_rb_get_next(&qp
->inbound_q
,
1021 msg
, curr_recv_len
);
1022 if (ep
->state
== SCIFEP_CONNECTED
) {
1024 * Update the read pointer only if the endpoint
1025 * is still connected else the read pointer
1026 * might no longer exist since the peer has
1029 scif_rb_update_read_ptr(&qp
->inbound_q
);
1031 * Send a notification to the peer about the
1032 * consumed data message only if the EP is in
1033 * SCIFEP_CONNECTED state.
1035 notif_msg
.src
= ep
->port
;
1036 notif_msg
.uop
= SCIF_CLIENT_RCVD
;
1037 notif_msg
.payload
[0] = ep
->remote_ep
;
1038 ret
= _scif_nodeqp_send(ep
->remote_dev
,
1043 remaining_len
-= curr_recv_len
;
1044 msg
= msg
+ curr_recv_len
;
1048 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
1049 * we will keep looping forever.
1051 if (ep
->state
== SCIFEP_DISCONNECTED
)
1054 * Return in the Non Blocking case if there is no data
1055 * to read in this iteration.
1057 if (!(flags
& SCIF_RECV_BLOCK
))
1059 curr_recv_len
= min(remaining_len
, SCIF_ENDPT_QP_SIZE
- 1);
1060 spin_unlock(&ep
->lock
);
1062 * Wait for a SCIF_CLIENT_SEND message in the blocking case
1063 * or until other side disconnects.
1066 wait_event_interruptible(ep
->recvwq
,
1067 SCIFEP_CONNECTED
!= ep
->state
||
1068 scif_rb_count(&qp
->inbound_q
,
1071 spin_lock(&ep
->lock
);
1075 if (len
- remaining_len
)
1076 ret
= len
- remaining_len
;
1077 else if (!ret
&& ep
->state
!= SCIFEP_CONNECTED
)
1078 ret
= ep
->state
== SCIFEP_DISCONNECTED
?
1079 -ECONNRESET
: -ENOTCONN
;
1080 spin_unlock(&ep
->lock
);
1085 * scif_user_send() - Send data to connection queue
1086 * @epd: The end point returned from scif_open()
1087 * @msg: Address to place data
1088 * @len: Length to receive
1089 * @flags: blocking or non blocking
1091 * This function is called from the driver IOCTL entry point
1092 * only and is a wrapper for _scif_send().
1094 int scif_user_send(scif_epd_t epd
, void __user
*msg
, int len
, int flags
)
1096 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
1101 int chunk_len
= min(len
, (1 << (MAX_ORDER
+ PAGE_SHIFT
- 1)));
1103 dev_dbg(scif_info
.mdev
.this_device
,
1104 "SCIFAPI send (U): ep %p %s\n", ep
, scif_ep_states
[ep
->state
]);
1108 err
= scif_msg_param_check(epd
, len
, flags
);
1112 tmp
= kmalloc(chunk_len
, GFP_KERNEL
);
1118 * Grabbing the lock before breaking up the transfer in
1119 * multiple chunks is required to ensure that messages do
1120 * not get fragmented and reordered.
1122 mutex_lock(&ep
->sendlock
);
1123 while (sent_len
!= len
) {
1124 loop_len
= len
- sent_len
;
1125 loop_len
= min(chunk_len
, loop_len
);
1126 if (copy_from_user(tmp
, msg
, loop_len
)) {
1130 err
= _scif_send(epd
, tmp
, loop_len
, flags
);
1135 if (err
!= loop_len
)
1139 mutex_unlock(&ep
->sendlock
);
1142 return err
< 0 ? err
: sent_len
;
1146 * scif_user_recv() - Receive data from connection queue
1147 * @epd: The end point returned from scif_open()
1148 * @msg: Address to place data
1149 * @len: Length to receive
1150 * @flags: blocking or non blocking
1152 * This function is called from the driver IOCTL entry point
1153 * only and is a wrapper for _scif_recv().
1155 int scif_user_recv(scif_epd_t epd
, void __user
*msg
, int len
, int flags
)
1157 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
1162 int chunk_len
= min(len
, (1 << (MAX_ORDER
+ PAGE_SHIFT
- 1)));
1164 dev_dbg(scif_info
.mdev
.this_device
,
1165 "SCIFAPI recv (U): ep %p %s\n", ep
, scif_ep_states
[ep
->state
]);
1169 err
= scif_msg_param_check(epd
, len
, flags
);
1173 tmp
= kmalloc(chunk_len
, GFP_KERNEL
);
1179 * Grabbing the lock before breaking up the transfer in
1180 * multiple chunks is required to ensure that messages do
1181 * not get fragmented and reordered.
1183 mutex_lock(&ep
->recvlock
);
1184 while (recv_len
!= len
) {
1185 loop_len
= len
- recv_len
;
1186 loop_len
= min(chunk_len
, loop_len
);
1187 err
= _scif_recv(epd
, tmp
, loop_len
, flags
);
1190 if (copy_to_user(msg
, tmp
, err
)) {
1196 if (err
!= loop_len
)
1200 mutex_unlock(&ep
->recvlock
);
1203 return err
< 0 ? err
: recv_len
;
1207 * scif_send() - Send data to connection queue
1208 * @epd: The end point returned from scif_open()
1209 * @msg: Address to place data
1210 * @len: Length to receive
1211 * @flags: blocking or non blocking
1213 * This function is called from the kernel mode only and is
1214 * a wrapper for _scif_send().
1216 int scif_send(scif_epd_t epd
, void *msg
, int len
, int flags
)
1218 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
1221 dev_dbg(scif_info
.mdev
.this_device
,
1222 "SCIFAPI send (K): ep %p %s\n", ep
, scif_ep_states
[ep
->state
]);
1226 ret
= scif_msg_param_check(epd
, len
, flags
);
1229 if (!ep
->remote_dev
)
1232 * Grab the mutex lock in the blocking case only
1233 * to ensure messages do not get fragmented/reordered.
1234 * The non blocking mode is protected using spin locks
1237 if (flags
& SCIF_SEND_BLOCK
)
1238 mutex_lock(&ep
->sendlock
);
1240 ret
= _scif_send(epd
, msg
, len
, flags
);
1242 if (flags
& SCIF_SEND_BLOCK
)
1243 mutex_unlock(&ep
->sendlock
);
1246 EXPORT_SYMBOL_GPL(scif_send
);
1249 * scif_recv() - Receive data from connection queue
1250 * @epd: The end point returned from scif_open()
1251 * @msg: Address to place data
1252 * @len: Length to receive
1253 * @flags: blocking or non blocking
1255 * This function is called from the kernel mode only and is
1256 * a wrapper for _scif_recv().
1258 int scif_recv(scif_epd_t epd
, void *msg
, int len
, int flags
)
1260 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
1263 dev_dbg(scif_info
.mdev
.this_device
,
1264 "SCIFAPI recv (K): ep %p %s\n", ep
, scif_ep_states
[ep
->state
]);
1268 ret
= scif_msg_param_check(epd
, len
, flags
);
1272 * Grab the mutex lock in the blocking case only
1273 * to ensure messages do not get fragmented/reordered.
1274 * The non blocking mode is protected using spin locks
1277 if (flags
& SCIF_RECV_BLOCK
)
1278 mutex_lock(&ep
->recvlock
);
1280 ret
= _scif_recv(epd
, msg
, len
, flags
);
1282 if (flags
& SCIF_RECV_BLOCK
)
1283 mutex_unlock(&ep
->recvlock
);
1287 EXPORT_SYMBOL_GPL(scif_recv
);
1289 static inline void _scif_poll_wait(struct file
*f
, wait_queue_head_t
*wq
,
1290 poll_table
*p
, struct scif_endpt
*ep
)
1293 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
1294 * and regrab it afterwards. Because the endpoint state might have
1295 * changed while the lock was given up, the state must be checked
1296 * again after re-acquiring the lock. The code in __scif_pollfd(..)
1299 spin_unlock(&ep
->lock
);
1300 poll_wait(f
, wq
, p
);
1301 spin_lock(&ep
->lock
);
1305 __scif_pollfd(struct file
*f
, poll_table
*wait
, struct scif_endpt
*ep
)
1309 dev_dbg(scif_info
.mdev
.this_device
,
1310 "SCIFAPI pollfd: ep %p %s\n", ep
, scif_ep_states
[ep
->state
]);
1312 spin_lock(&ep
->lock
);
1314 /* Endpoint is waiting for a non-blocking connect to complete */
1315 if (ep
->conn_async_state
== ASYNC_CONN_INPROGRESS
) {
1316 _scif_poll_wait(f
, &ep
->conn_pend_wq
, wait
, ep
);
1317 if (ep
->conn_async_state
== ASYNC_CONN_INPROGRESS
) {
1318 if (ep
->state
== SCIFEP_CONNECTED
||
1319 ep
->state
== SCIFEP_DISCONNECTED
||
1326 /* Endpoint is listening for incoming connection requests */
1327 if (ep
->state
== SCIFEP_LISTENING
) {
1328 _scif_poll_wait(f
, &ep
->conwq
, wait
, ep
);
1329 if (ep
->state
== SCIFEP_LISTENING
) {
1336 /* Endpoint is connected or disconnected */
1337 if (ep
->state
== SCIFEP_CONNECTED
|| ep
->state
== SCIFEP_DISCONNECTED
) {
1338 if (poll_requested_events(wait
) & EPOLLIN
)
1339 _scif_poll_wait(f
, &ep
->recvwq
, wait
, ep
);
1340 if (poll_requested_events(wait
) & EPOLLOUT
)
1341 _scif_poll_wait(f
, &ep
->sendwq
, wait
, ep
);
1342 if (ep
->state
== SCIFEP_CONNECTED
||
1343 ep
->state
== SCIFEP_DISCONNECTED
) {
1344 /* Data can be read without blocking */
1345 if (scif_rb_count(&ep
->qp_info
.qp
->inbound_q
, 1))
1347 /* Data can be written without blocking */
1348 if (scif_rb_space(&ep
->qp_info
.qp
->outbound_q
))
1350 /* Return EPOLLHUP if endpoint is disconnected */
1351 if (ep
->state
== SCIFEP_DISCONNECTED
)
1357 /* Return EPOLLERR if the endpoint is in none of the above states */
1360 spin_unlock(&ep
->lock
);
1365 * scif_poll() - Kernel mode SCIF poll
1366 * @ufds: Array of scif_pollepd structures containing the end points
1367 * and events to poll on
1368 * @nfds: Size of the ufds array
1369 * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
1371 * The code flow in this function is based on do_poll(..) in select.c
1373 * Returns the number of endpoints which have pending events or 0 in
1374 * the event of a timeout. If a signal is used for wake up, -EINTR is
1378 scif_poll(struct scif_pollepd
*ufds
, unsigned int nfds
, long timeout_msecs
)
1380 struct poll_wqueues table
;
1382 int i
, count
= 0, timed_out
= timeout_msecs
== 0;
1384 u64 timeout
= timeout_msecs
< 0 ? MAX_SCHEDULE_TIMEOUT
1385 : msecs_to_jiffies(timeout_msecs
);
1387 poll_initwait(&table
);
1390 for (i
= 0; i
< nfds
; i
++) {
1391 pt
->_key
= ufds
[i
].events
| EPOLLERR
| EPOLLHUP
;
1392 mask
= __scif_pollfd(ufds
[i
].epd
->anon
,
1394 mask
&= ufds
[i
].events
| EPOLLERR
| EPOLLHUP
;
1399 ufds
[i
].revents
= mask
;
1403 count
= table
.error
;
1404 if (signal_pending(current
))
1407 if (count
|| timed_out
)
1410 if (!schedule_timeout_interruptible(timeout
))
1413 poll_freewait(&table
);
1416 EXPORT_SYMBOL_GPL(scif_poll
);
1418 int scif_get_node_ids(u16
*nodes
, int len
, u16
*self
)
1424 if (!scif_is_mgmt_node())
1425 scif_get_node_info();
1427 *self
= scif_info
.nodeid
;
1428 mutex_lock(&scif_info
.conflock
);
1429 len
= min_t(int, len
, scif_info
.total
);
1430 for (node
= 0; node
<= scif_info
.maxid
; node
++) {
1431 if (_scifdev_alive(&scif_dev
[node
])) {
1434 nodes
[offset
++] = node
;
1437 dev_dbg(scif_info
.mdev
.this_device
,
1438 "SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
1439 scif_info
.total
, online
, offset
);
1440 mutex_unlock(&scif_info
.conflock
);
1444 EXPORT_SYMBOL_GPL(scif_get_node_ids
);
1446 static int scif_add_client_dev(struct device
*dev
, struct subsys_interface
*si
)
1448 struct scif_client
*client
=
1449 container_of(si
, struct scif_client
, si
);
1450 struct scif_peer_dev
*spdev
=
1451 container_of(dev
, struct scif_peer_dev
, dev
);
1454 client
->probe(spdev
);
1458 static void scif_remove_client_dev(struct device
*dev
,
1459 struct subsys_interface
*si
)
1461 struct scif_client
*client
=
1462 container_of(si
, struct scif_client
, si
);
1463 struct scif_peer_dev
*spdev
=
1464 container_of(dev
, struct scif_peer_dev
, dev
);
1467 client
->remove(spdev
);
1470 void scif_client_unregister(struct scif_client
*client
)
1472 subsys_interface_unregister(&client
->si
);
1474 EXPORT_SYMBOL_GPL(scif_client_unregister
);
1476 int scif_client_register(struct scif_client
*client
)
1478 struct subsys_interface
*si
= &client
->si
;
1480 si
->name
= client
->name
;
1481 si
->subsys
= &scif_peer_bus
;
1482 si
->add_dev
= scif_add_client_dev
;
1483 si
->remove_dev
= scif_remove_client_dev
;
1485 return subsys_interface_register(&client
->si
);
1487 EXPORT_SYMBOL_GPL(scif_client_register
);