2 * Intel MIC Platform Software Stack (MPSS)
4 * Copyright(c) 2014 Intel Corporation.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
18 #include <linux/scif.h>
19 #include "scif_main.h"
22 static const char * const scif_ep_states
[] = {
34 enum conn_async_state
{
35 ASYNC_CONN_IDLE
= 1, /* ep setup for async connect */
36 ASYNC_CONN_INPROGRESS
, /* async connect in progress */
37 ASYNC_CONN_FLUSH_WORK
/* async work flush in progress */
41 * File operations for anonymous inode file associated with a SCIF endpoint,
42 * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
43 * poll API in the kernel and these take in a struct file *. Since a struct
44 * file is not available to kernel mode SCIF, it uses an anonymous file for
47 const struct file_operations scif_anon_fops
= {
51 scif_epd_t
scif_open(void)
53 struct scif_endpt
*ep
;
57 ep
= kzalloc(sizeof(*ep
), GFP_KERNEL
);
61 ep
->qp_info
.qp
= kzalloc(sizeof(*ep
->qp_info
.qp
), GFP_KERNEL
);
65 err
= scif_anon_inode_getfile(ep
);
69 spin_lock_init(&ep
->lock
);
70 mutex_init(&ep
->sendlock
);
71 mutex_init(&ep
->recvlock
);
74 ep
->state
= SCIFEP_UNBOUND
;
75 dev_dbg(scif_info
.mdev
.this_device
,
76 "SCIFAPI open: ep %p success\n", ep
);
80 kfree(ep
->qp_info
.qp
);
86 EXPORT_SYMBOL_GPL(scif_open
);
89 * scif_disconnect_ep - Disconnects the endpoint if found
90 * @epd: The end point returned from scif_open()
92 static struct scif_endpt
*scif_disconnect_ep(struct scif_endpt
*ep
)
95 struct scif_endpt
*fep
= NULL
;
96 struct scif_endpt
*tmpep
;
97 struct list_head
*pos
, *tmpq
;
101 * Wake up any threads blocked in send()/recv() before closing
102 * out the connection. Grabbing and releasing the send/recv lock
103 * will ensure that any blocked senders/receivers have exited for
104 * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
105 * close. Ring 3 endpoints are not affected since close will not
106 * be called while there are IOCTLs executing.
108 wake_up_interruptible(&ep
->sendwq
);
109 wake_up_interruptible(&ep
->recvwq
);
110 mutex_lock(&ep
->sendlock
);
111 mutex_unlock(&ep
->sendlock
);
112 mutex_lock(&ep
->recvlock
);
113 mutex_unlock(&ep
->recvlock
);
115 /* Remove from the connected list */
116 mutex_lock(&scif_info
.connlock
);
117 list_for_each_safe(pos
, tmpq
, &scif_info
.connected
) {
118 tmpep
= list_entry(pos
, struct scif_endpt
, list
);
122 spin_lock(&ep
->lock
);
129 * The other side has completed the disconnect before
130 * the end point can be removed from the list. Therefore
131 * the ep lock is not locked, traverse the disconnected
132 * list to find the endpoint and release the conn lock.
134 list_for_each_safe(pos
, tmpq
, &scif_info
.disconnected
) {
135 tmpep
= list_entry(pos
, struct scif_endpt
, list
);
141 mutex_unlock(&scif_info
.connlock
);
145 init_completion(&ep
->discon
);
146 msg
.uop
= SCIF_DISCNCT
;
149 msg
.payload
[0] = (u64
)ep
;
150 msg
.payload
[1] = ep
->remote_ep
;
152 err
= scif_nodeqp_send(ep
->remote_dev
, &msg
);
153 spin_unlock(&ep
->lock
);
154 mutex_unlock(&scif_info
.connlock
);
157 /* Wait for the remote node to respond with SCIF_DISCNT_ACK */
158 wait_for_completion_timeout(&ep
->discon
,
159 SCIF_NODE_ALIVE_TIMEOUT
);
163 int scif_close(scif_epd_t epd
)
165 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
166 struct scif_endpt
*tmpep
;
167 struct list_head
*pos
, *tmpq
;
168 enum scif_epd_state oldstate
;
171 dev_dbg(scif_info
.mdev
.this_device
, "SCIFAPI close: ep %p %s\n",
172 ep
, scif_ep_states
[ep
->state
]);
174 spin_lock(&ep
->lock
);
175 flush_conn
= (ep
->conn_async_state
== ASYNC_CONN_INPROGRESS
);
176 spin_unlock(&ep
->lock
);
179 flush_work(&scif_info
.conn_work
);
181 spin_lock(&ep
->lock
);
182 oldstate
= ep
->state
;
184 ep
->state
= SCIFEP_CLOSING
;
188 dev_err(scif_info
.mdev
.this_device
,
189 "SCIFAPI close: zombie state unexpected\n");
190 case SCIFEP_DISCONNECTED
:
191 spin_unlock(&ep
->lock
);
192 scif_unregister_all_windows(epd
);
193 /* Remove from the disconnected list */
194 mutex_lock(&scif_info
.connlock
);
195 list_for_each_safe(pos
, tmpq
, &scif_info
.disconnected
) {
196 tmpep
= list_entry(pos
, struct scif_endpt
, list
);
202 mutex_unlock(&scif_info
.connlock
);
206 case SCIFEP_CONNECTING
:
207 spin_unlock(&ep
->lock
);
210 case SCIFEP_CONNECTED
:
213 spin_unlock(&ep
->lock
);
214 scif_unregister_all_windows(epd
);
215 scif_disconnect_ep(ep
);
218 case SCIFEP_LISTENING
:
219 case SCIFEP_CLLISTEN
:
221 struct scif_conreq
*conreq
;
223 struct scif_endpt
*aep
;
225 spin_unlock(&ep
->lock
);
226 mutex_lock(&scif_info
.eplock
);
228 /* remove from listen list */
229 list_for_each_safe(pos
, tmpq
, &scif_info
.listen
) {
230 tmpep
= list_entry(pos
, struct scif_endpt
, list
);
234 /* Remove any dangling accepts */
235 while (ep
->acceptcnt
) {
236 aep
= list_first_entry(&ep
->li_accept
,
237 struct scif_endpt
, liacceptlist
);
238 list_del(&aep
->liacceptlist
);
239 scif_put_port(aep
->port
.port
);
240 list_for_each_safe(pos
, tmpq
, &scif_info
.uaccept
) {
241 tmpep
= list_entry(pos
, struct scif_endpt
,
248 mutex_unlock(&scif_info
.eplock
);
249 mutex_lock(&scif_info
.connlock
);
250 list_for_each_safe(pos
, tmpq
, &scif_info
.connected
) {
251 tmpep
= list_entry(pos
,
252 struct scif_endpt
, list
);
258 list_for_each_safe(pos
, tmpq
, &scif_info
.disconnected
) {
259 tmpep
= list_entry(pos
,
260 struct scif_endpt
, list
);
266 mutex_unlock(&scif_info
.connlock
);
267 scif_teardown_ep(aep
);
268 mutex_lock(&scif_info
.eplock
);
269 scif_add_epd_to_zombie_list(aep
, SCIF_EPLOCK_HELD
);
273 spin_lock(&ep
->lock
);
274 mutex_unlock(&scif_info
.eplock
);
276 /* Remove and reject any pending connection requests. */
277 while (ep
->conreqcnt
) {
278 conreq
= list_first_entry(&ep
->conlist
,
279 struct scif_conreq
, list
);
280 list_del(&conreq
->list
);
282 msg
.uop
= SCIF_CNCT_REJ
;
283 msg
.dst
.node
= conreq
->msg
.src
.node
;
284 msg
.dst
.port
= conreq
->msg
.src
.port
;
285 msg
.payload
[0] = conreq
->msg
.payload
[0];
286 msg
.payload
[1] = conreq
->msg
.payload
[1];
288 * No Error Handling on purpose for scif_nodeqp_send().
289 * If the remote node is lost we still want free the
290 * connection requests on the self node.
292 scif_nodeqp_send(&scif_dev
[conreq
->msg
.src
.node
],
298 spin_unlock(&ep
->lock
);
299 /* If a kSCIF accept is waiting wake it up */
300 wake_up_interruptible(&ep
->conwq
);
304 scif_put_port(ep
->port
.port
);
305 scif_anon_inode_fput(ep
);
306 scif_teardown_ep(ep
);
307 scif_add_epd_to_zombie_list(ep
, !SCIF_EPLOCK_HELD
);
310 EXPORT_SYMBOL_GPL(scif_close
);
313 * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
314 * accept new connections.
315 * @epd: The end point returned from scif_open()
317 int __scif_flush(scif_epd_t epd
)
319 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
322 case SCIFEP_LISTENING
:
324 ep
->state
= SCIFEP_CLLISTEN
;
326 /* If an accept is waiting wake it up */
327 wake_up_interruptible(&ep
->conwq
);
336 int scif_bind(scif_epd_t epd
, u16 pn
)
338 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
342 dev_dbg(scif_info
.mdev
.this_device
,
343 "SCIFAPI bind: ep %p %s requested port number %d\n",
344 ep
, scif_ep_states
[ep
->state
], pn
);
347 * Similar to IETF RFC 1700, SCIF ports below
348 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
349 * processes or by processes executed by privileged users.
351 if (pn
< SCIF_ADMIN_PORT_END
&& !capable(CAP_SYS_ADMIN
)) {
353 goto scif_bind_admin_exit
;
357 spin_lock(&ep
->lock
);
358 if (ep
->state
== SCIFEP_BOUND
) {
361 } else if (ep
->state
!= SCIFEP_UNBOUND
) {
367 tmp
= scif_rsrv_port(pn
);
373 pn
= scif_get_new_port();
380 ep
->state
= SCIFEP_BOUND
;
381 ep
->port
.node
= scif_info
.nodeid
;
383 ep
->conn_async_state
= ASYNC_CONN_IDLE
;
385 dev_dbg(scif_info
.mdev
.this_device
,
386 "SCIFAPI bind: bound to port number %d\n", pn
);
388 spin_unlock(&ep
->lock
);
389 scif_bind_admin_exit
:
392 EXPORT_SYMBOL_GPL(scif_bind
);
394 int scif_listen(scif_epd_t epd
, int backlog
)
396 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
398 dev_dbg(scif_info
.mdev
.this_device
,
399 "SCIFAPI listen: ep %p %s\n", ep
, scif_ep_states
[ep
->state
]);
400 spin_lock(&ep
->lock
);
404 case SCIFEP_CLLISTEN
:
406 case SCIFEP_DISCONNECTED
:
407 spin_unlock(&ep
->lock
);
409 case SCIFEP_LISTENING
:
410 case SCIFEP_CONNECTED
:
411 case SCIFEP_CONNECTING
:
413 spin_unlock(&ep
->lock
);
419 ep
->state
= SCIFEP_LISTENING
;
420 ep
->backlog
= backlog
;
424 INIT_LIST_HEAD(&ep
->conlist
);
425 init_waitqueue_head(&ep
->conwq
);
426 INIT_LIST_HEAD(&ep
->li_accept
);
427 spin_unlock(&ep
->lock
);
430 * Listen status is complete so delete the qp information not needed
431 * on a listen before placing on the list of listening ep's
433 scif_teardown_ep(ep
);
434 ep
->qp_info
.qp
= NULL
;
436 mutex_lock(&scif_info
.eplock
);
437 list_add_tail(&ep
->list
, &scif_info
.listen
);
438 mutex_unlock(&scif_info
.eplock
);
441 EXPORT_SYMBOL_GPL(scif_listen
);
444 ************************************************************************
445 * SCIF connection flow:
447 * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
448 * connections via a SCIF_CNCT_REQ message
449 * 2) A SCIF endpoint can initiate a SCIF connection by calling
450 * scif_connect(..) which calls scif_setup_qp_connect(..) which
451 * allocates the local qp for the endpoint ring buffer and then sends
452 * a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
453 * a SCIF_CNCT_REJ message
454 * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
455 * wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
457 * 4) A thread blocked waiting for incoming connections allocates its local
458 * endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
459 * and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
460 * the node sends a SCIF_CNCT_REJ message
461 * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
462 * connecting endpoint is woken up as part of handling
463 * scif_cnctgnt_resp(..) following which it maps the remote endpoints'
464 * QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
465 * success or a SCIF_CNCT_GNTNACK message on failure and completes
466 * the scif_connect(..) API
467 * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
468 * in step 4 is woken up and completes the scif_accept(..) API
469 * 7) The SCIF connection is now established between the two SCIF endpoints.
471 static int scif_conn_func(struct scif_endpt
*ep
)
475 struct device
*spdev
;
477 err
= scif_reserve_dma_chan(ep
);
479 dev_err(&ep
->remote_dev
->sdev
->dev
,
480 "%s %d err %d\n", __func__
, __LINE__
, err
);
481 ep
->state
= SCIFEP_BOUND
;
482 goto connect_error_simple
;
484 /* Initiate the first part of the endpoint QP setup */
485 err
= scif_setup_qp_connect(ep
->qp_info
.qp
, &ep
->qp_info
.qp_offset
,
486 SCIF_ENDPT_QP_SIZE
, ep
->remote_dev
);
488 dev_err(&ep
->remote_dev
->sdev
->dev
,
489 "%s err %d qp_offset 0x%llx\n",
490 __func__
, err
, ep
->qp_info
.qp_offset
);
491 ep
->state
= SCIFEP_BOUND
;
492 goto connect_error_simple
;
495 spdev
= scif_get_peer_dev(ep
->remote_dev
);
497 err
= PTR_ERR(spdev
);
500 /* Format connect message and send it */
502 msg
.dst
= ep
->conn_port
;
503 msg
.uop
= SCIF_CNCT_REQ
;
504 msg
.payload
[0] = (u64
)ep
;
505 msg
.payload
[1] = ep
->qp_info
.qp_offset
;
506 err
= _scif_nodeqp_send(ep
->remote_dev
, &msg
);
508 goto connect_error_dec
;
509 scif_put_peer_dev(spdev
);
511 * Wait for the remote node to respond with SCIF_CNCT_GNT or
512 * SCIF_CNCT_REJ message.
514 err
= wait_event_timeout(ep
->conwq
, ep
->state
!= SCIFEP_CONNECTING
,
515 SCIF_NODE_ALIVE_TIMEOUT
);
517 dev_err(&ep
->remote_dev
->sdev
->dev
,
518 "%s %d timeout\n", __func__
, __LINE__
);
519 ep
->state
= SCIFEP_BOUND
;
521 spdev
= scif_get_peer_dev(ep
->remote_dev
);
523 err
= PTR_ERR(spdev
);
526 if (ep
->state
== SCIFEP_MAPPING
) {
527 err
= scif_setup_qp_connect_response(ep
->remote_dev
,
529 ep
->qp_info
.gnt_pld
);
531 * If the resource to map the queue are not available then
532 * we need to tell the other side to terminate the accept
535 dev_err(&ep
->remote_dev
->sdev
->dev
,
536 "%s %d err %d\n", __func__
, __LINE__
, err
);
537 msg
.uop
= SCIF_CNCT_GNTNACK
;
538 msg
.payload
[0] = ep
->remote_ep
;
539 _scif_nodeqp_send(ep
->remote_dev
, &msg
);
540 ep
->state
= SCIFEP_BOUND
;
541 goto connect_error_dec
;
544 msg
.uop
= SCIF_CNCT_GNTACK
;
545 msg
.payload
[0] = ep
->remote_ep
;
546 err
= _scif_nodeqp_send(ep
->remote_dev
, &msg
);
548 ep
->state
= SCIFEP_BOUND
;
549 goto connect_error_dec
;
551 ep
->state
= SCIFEP_CONNECTED
;
552 mutex_lock(&scif_info
.connlock
);
553 list_add_tail(&ep
->list
, &scif_info
.connected
);
554 mutex_unlock(&scif_info
.connlock
);
555 dev_dbg(&ep
->remote_dev
->sdev
->dev
,
556 "SCIFAPI connect: ep %p connected\n", ep
);
557 } else if (ep
->state
== SCIFEP_BOUND
) {
558 dev_dbg(&ep
->remote_dev
->sdev
->dev
,
559 "SCIFAPI connect: ep %p connection refused\n", ep
);
561 goto connect_error_dec
;
563 scif_put_peer_dev(spdev
);
566 scif_put_peer_dev(spdev
);
568 scif_cleanup_ep_qp(ep
);
569 connect_error_simple
:
576 * Workqueue handler for servicing non-blocking SCIF connect
579 void scif_conn_handler(struct work_struct
*work
)
581 struct scif_endpt
*ep
;
585 spin_lock(&scif_info
.nb_connect_lock
);
586 if (!list_empty(&scif_info
.nb_connect_list
)) {
587 ep
= list_first_entry(&scif_info
.nb_connect_list
,
588 struct scif_endpt
, conn_list
);
589 list_del(&ep
->conn_list
);
591 spin_unlock(&scif_info
.nb_connect_lock
);
593 ep
->conn_err
= scif_conn_func(ep
);
594 wake_up_interruptible(&ep
->conn_pend_wq
);
599 int __scif_connect(scif_epd_t epd
, struct scif_port_id
*dst
, bool non_block
)
601 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
603 struct scif_dev
*remote_dev
;
604 struct device
*spdev
;
606 dev_dbg(scif_info
.mdev
.this_device
, "SCIFAPI connect: ep %p %s\n", ep
,
607 scif_ep_states
[ep
->state
]);
609 if (!scif_dev
|| dst
->node
> scif_info
.maxid
)
614 remote_dev
= &scif_dev
[dst
->node
];
615 spdev
= scif_get_peer_dev(remote_dev
);
617 err
= PTR_ERR(spdev
);
621 spin_lock(&ep
->lock
);
627 case SCIFEP_DISCONNECTED
:
628 if (ep
->conn_async_state
== ASYNC_CONN_INPROGRESS
)
629 ep
->conn_async_state
= ASYNC_CONN_FLUSH_WORK
;
633 case SCIFEP_LISTENING
:
634 case SCIFEP_CLLISTEN
:
637 case SCIFEP_CONNECTING
:
639 if (ep
->conn_async_state
== ASYNC_CONN_INPROGRESS
)
644 case SCIFEP_CONNECTED
:
645 if (ep
->conn_async_state
== ASYNC_CONN_INPROGRESS
)
646 ep
->conn_async_state
= ASYNC_CONN_FLUSH_WORK
;
651 ep
->port
.port
= scif_get_new_port();
652 if (!ep
->port
.port
) {
655 ep
->port
.node
= scif_info
.nodeid
;
656 ep
->conn_async_state
= ASYNC_CONN_IDLE
;
661 * If a non-blocking connect has been already initiated
662 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
663 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
664 * SCIF_BOUND due an error in the connection process
665 * (e.g., connection refused) If conn_async_state is
666 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
667 * so that the error status can be collected. If the state is
668 * already ASYNC_CONN_FLUSH_WORK - then set the error to
669 * EINPROGRESS since some other thread is waiting to collect
672 if (ep
->conn_async_state
== ASYNC_CONN_INPROGRESS
) {
673 ep
->conn_async_state
= ASYNC_CONN_FLUSH_WORK
;
674 } else if (ep
->conn_async_state
== ASYNC_CONN_FLUSH_WORK
) {
677 ep
->conn_port
= *dst
;
678 init_waitqueue_head(&ep
->sendwq
);
679 init_waitqueue_head(&ep
->recvwq
);
680 init_waitqueue_head(&ep
->conwq
);
681 ep
->conn_async_state
= 0;
683 if (unlikely(non_block
))
684 ep
->conn_async_state
= ASYNC_CONN_INPROGRESS
;
689 if (err
|| ep
->conn_async_state
== ASYNC_CONN_FLUSH_WORK
)
690 goto connect_simple_unlock1
;
692 ep
->state
= SCIFEP_CONNECTING
;
693 ep
->remote_dev
= &scif_dev
[dst
->node
];
694 ep
->qp_info
.qp
->magic
= SCIFEP_MAGIC
;
695 if (ep
->conn_async_state
== ASYNC_CONN_INPROGRESS
) {
696 init_waitqueue_head(&ep
->conn_pend_wq
);
697 spin_lock(&scif_info
.nb_connect_lock
);
698 list_add_tail(&ep
->conn_list
, &scif_info
.nb_connect_list
);
699 spin_unlock(&scif_info
.nb_connect_lock
);
701 schedule_work(&scif_info
.conn_work
);
703 connect_simple_unlock1
:
704 spin_unlock(&ep
->lock
);
705 scif_put_peer_dev(spdev
);
708 } else if (ep
->conn_async_state
== ASYNC_CONN_FLUSH_WORK
) {
709 flush_work(&scif_info
.conn_work
);
711 spin_lock(&ep
->lock
);
712 ep
->conn_async_state
= ASYNC_CONN_IDLE
;
713 spin_unlock(&ep
->lock
);
715 err
= scif_conn_func(ep
);
720 int scif_connect(scif_epd_t epd
, struct scif_port_id
*dst
)
722 return __scif_connect(epd
, dst
, false);
724 EXPORT_SYMBOL_GPL(scif_connect
);
727 * scif_accept() - Accept a connection request from the remote node
729 * The function accepts a connection request from the remote node. Successful
730 * complete is indicate by a new end point being created and passed back
731 * to the caller for future reference.
733 * Upon successful complete a zero will be returned and the peer information
736 * If the end point is not in the listening state -EINVAL will be returned.
738 * If during the connection sequence resource allocation fails the -ENOMEM
741 * If the function is called with the ASYNC flag set and no connection requests
742 * are pending it will return -EAGAIN.
744 * If the remote side is not sending any connection requests the caller may
745 * terminate this function with a signal. If so a -EINTR will be returned.
747 int scif_accept(scif_epd_t epd
, struct scif_port_id
*peer
,
748 scif_epd_t
*newepd
, int flags
)
750 struct scif_endpt
*lep
= (struct scif_endpt
*)epd
;
751 struct scif_endpt
*cep
;
752 struct scif_conreq
*conreq
;
755 struct device
*spdev
;
757 dev_dbg(scif_info
.mdev
.this_device
,
758 "SCIFAPI accept: ep %p %s\n", lep
, scif_ep_states
[lep
->state
]);
760 if (flags
& ~SCIF_ACCEPT_SYNC
)
763 if (!peer
|| !newepd
)
767 spin_lock(&lep
->lock
);
768 if (lep
->state
!= SCIFEP_LISTENING
) {
769 spin_unlock(&lep
->lock
);
773 if (!lep
->conreqcnt
&& !(flags
& SCIF_ACCEPT_SYNC
)) {
774 /* No connection request present and we do not want to wait */
775 spin_unlock(&lep
->lock
);
779 lep
->files
= current
->files
;
781 spin_unlock(&lep
->lock
);
782 /* Wait for the remote node to send us a SCIF_CNCT_REQ */
783 err
= wait_event_interruptible(lep
->conwq
,
785 (lep
->state
!= SCIFEP_LISTENING
)));
789 if (lep
->state
!= SCIFEP_LISTENING
)
792 spin_lock(&lep
->lock
);
795 goto retry_connection
;
797 /* Get the first connect request off the list */
798 conreq
= list_first_entry(&lep
->conlist
, struct scif_conreq
, list
);
799 list_del(&conreq
->list
);
801 spin_unlock(&lep
->lock
);
803 /* Fill in the peer information */
804 peer
->node
= conreq
->msg
.src
.node
;
805 peer
->port
= conreq
->msg
.src
.port
;
807 cep
= kzalloc(sizeof(*cep
), GFP_KERNEL
);
810 goto scif_accept_error_epalloc
;
812 spin_lock_init(&cep
->lock
);
813 mutex_init(&cep
->sendlock
);
814 mutex_init(&cep
->recvlock
);
815 cep
->state
= SCIFEP_CONNECTING
;
816 cep
->remote_dev
= &scif_dev
[peer
->node
];
817 cep
->remote_ep
= conreq
->msg
.payload
[0];
819 scif_rma_ep_init(cep
);
821 err
= scif_reserve_dma_chan(cep
);
823 dev_err(scif_info
.mdev
.this_device
,
824 "%s %d err %d\n", __func__
, __LINE__
, err
);
825 goto scif_accept_error_qpalloc
;
828 cep
->qp_info
.qp
= kzalloc(sizeof(*cep
->qp_info
.qp
), GFP_KERNEL
);
829 if (!cep
->qp_info
.qp
) {
831 goto scif_accept_error_qpalloc
;
834 err
= scif_anon_inode_getfile(cep
);
836 goto scif_accept_error_anon_inode
;
838 cep
->qp_info
.qp
->magic
= SCIFEP_MAGIC
;
839 spdev
= scif_get_peer_dev(cep
->remote_dev
);
841 err
= PTR_ERR(spdev
);
842 goto scif_accept_error_map
;
844 err
= scif_setup_qp_accept(cep
->qp_info
.qp
, &cep
->qp_info
.qp_offset
,
845 conreq
->msg
.payload
[1], SCIF_ENDPT_QP_SIZE
,
848 dev_dbg(&cep
->remote_dev
->sdev
->dev
,
849 "SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
850 lep
, cep
, err
, cep
->qp_info
.qp_offset
);
851 scif_put_peer_dev(spdev
);
852 goto scif_accept_error_map
;
855 cep
->port
.node
= lep
->port
.node
;
856 cep
->port
.port
= lep
->port
.port
;
857 cep
->peer
.node
= peer
->node
;
858 cep
->peer
.port
= peer
->port
;
859 init_waitqueue_head(&cep
->sendwq
);
860 init_waitqueue_head(&cep
->recvwq
);
861 init_waitqueue_head(&cep
->conwq
);
863 msg
.uop
= SCIF_CNCT_GNT
;
865 msg
.payload
[0] = cep
->remote_ep
;
866 msg
.payload
[1] = cep
->qp_info
.qp_offset
;
867 msg
.payload
[2] = (u64
)cep
;
869 err
= _scif_nodeqp_send(cep
->remote_dev
, &msg
);
870 scif_put_peer_dev(spdev
);
872 goto scif_accept_error_map
;
874 /* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
875 err
= wait_event_timeout(cep
->conwq
, cep
->state
!= SCIFEP_CONNECTING
,
876 SCIF_NODE_ACCEPT_TIMEOUT
);
877 if (!err
&& scifdev_alive(cep
))
879 err
= !err
? -ENODEV
: 0;
881 goto scif_accept_error_map
;
884 spin_lock(&cep
->lock
);
886 if (cep
->state
== SCIFEP_CLOSING
) {
888 * Remote failed to allocate resources and NAKed the grant.
889 * There is at this point nothing referencing the new end point.
891 spin_unlock(&cep
->lock
);
892 scif_teardown_ep(cep
);
895 /* If call with sync flag then go back and wait. */
896 if (flags
& SCIF_ACCEPT_SYNC
) {
897 spin_lock(&lep
->lock
);
898 goto retry_connection
;
903 scif_get_port(cep
->port
.port
);
904 *newepd
= (scif_epd_t
)cep
;
905 spin_unlock(&cep
->lock
);
907 scif_accept_error_map
:
908 scif_anon_inode_fput(cep
);
909 scif_accept_error_anon_inode
:
910 scif_teardown_ep(cep
);
911 scif_accept_error_qpalloc
:
913 scif_accept_error_epalloc
:
914 msg
.uop
= SCIF_CNCT_REJ
;
915 msg
.dst
.node
= conreq
->msg
.src
.node
;
916 msg
.dst
.port
= conreq
->msg
.src
.port
;
917 msg
.payload
[0] = conreq
->msg
.payload
[0];
918 msg
.payload
[1] = conreq
->msg
.payload
[1];
919 scif_nodeqp_send(&scif_dev
[conreq
->msg
.src
.node
], &msg
);
923 EXPORT_SYMBOL_GPL(scif_accept
);
926 * scif_msg_param_check:
927 * @epd: The end point returned from scif_open()
928 * @len: Length to receive
929 * @flags: blocking or non blocking
931 * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
933 static inline int scif_msg_param_check(scif_epd_t epd
, int len
, int flags
)
939 if (flags
&& (!(flags
& SCIF_RECV_BLOCK
)))
946 static int _scif_send(scif_epd_t epd
, void *msg
, int len
, int flags
)
948 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
949 struct scifmsg notif_msg
;
950 int curr_xfer_len
= 0, sent_len
= 0, write_count
;
952 struct scif_qp
*qp
= ep
->qp_info
.qp
;
954 if (flags
& SCIF_SEND_BLOCK
)
957 spin_lock(&ep
->lock
);
958 while (sent_len
!= len
&& SCIFEP_CONNECTED
== ep
->state
) {
959 write_count
= scif_rb_space(&qp
->outbound_q
);
961 /* Best effort to send as much data as possible */
962 curr_xfer_len
= min(len
- sent_len
, write_count
);
963 ret
= scif_rb_write(&qp
->outbound_q
, msg
,
967 /* Success. Update write pointer */
968 scif_rb_commit(&qp
->outbound_q
);
970 * Send a notification to the peer about the
971 * produced data message.
973 notif_msg
.src
= ep
->port
;
974 notif_msg
.uop
= SCIF_CLIENT_SENT
;
975 notif_msg
.payload
[0] = ep
->remote_ep
;
976 ret
= _scif_nodeqp_send(ep
->remote_dev
, ¬if_msg
);
979 sent_len
+= curr_xfer_len
;
980 msg
= msg
+ curr_xfer_len
;
983 curr_xfer_len
= min(len
- sent_len
, SCIF_ENDPT_QP_SIZE
- 1);
984 /* Not enough RB space. return for the Non Blocking case */
985 if (!(flags
& SCIF_SEND_BLOCK
))
988 spin_unlock(&ep
->lock
);
989 /* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
991 wait_event_interruptible(ep
->sendwq
,
992 (SCIFEP_CONNECTED
!= ep
->state
) ||
993 (scif_rb_space(&qp
->outbound_q
) >=
995 spin_lock(&ep
->lock
);
1001 else if (!ret
&& SCIFEP_CONNECTED
!= ep
->state
)
1002 ret
= SCIFEP_DISCONNECTED
== ep
->state
?
1003 -ECONNRESET
: -ENOTCONN
;
1004 spin_unlock(&ep
->lock
);
1008 static int _scif_recv(scif_epd_t epd
, void *msg
, int len
, int flags
)
1011 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
1012 struct scifmsg notif_msg
;
1013 int curr_recv_len
= 0, remaining_len
= len
, read_count
;
1015 struct scif_qp
*qp
= ep
->qp_info
.qp
;
1017 if (flags
& SCIF_RECV_BLOCK
)
1019 spin_lock(&ep
->lock
);
1020 while (remaining_len
&& (SCIFEP_CONNECTED
== ep
->state
||
1021 SCIFEP_DISCONNECTED
== ep
->state
)) {
1022 read_count
= scif_rb_count(&qp
->inbound_q
, remaining_len
);
1025 * Best effort to recv as much data as there
1026 * are bytes to read in the RB particularly
1027 * important for the Non Blocking case.
1029 curr_recv_len
= min(remaining_len
, read_count
);
1030 read_size
= scif_rb_get_next(&qp
->inbound_q
,
1031 msg
, curr_recv_len
);
1032 if (ep
->state
== SCIFEP_CONNECTED
) {
1034 * Update the read pointer only if the endpoint
1035 * is still connected else the read pointer
1036 * might no longer exist since the peer has
1039 scif_rb_update_read_ptr(&qp
->inbound_q
);
1041 * Send a notification to the peer about the
1042 * consumed data message only if the EP is in
1043 * SCIFEP_CONNECTED state.
1045 notif_msg
.src
= ep
->port
;
1046 notif_msg
.uop
= SCIF_CLIENT_RCVD
;
1047 notif_msg
.payload
[0] = ep
->remote_ep
;
1048 ret
= _scif_nodeqp_send(ep
->remote_dev
,
1053 remaining_len
-= curr_recv_len
;
1054 msg
= msg
+ curr_recv_len
;
1058 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
1059 * we will keep looping forever.
1061 if (ep
->state
== SCIFEP_DISCONNECTED
)
1064 * Return in the Non Blocking case if there is no data
1065 * to read in this iteration.
1067 if (!(flags
& SCIF_RECV_BLOCK
))
1069 curr_recv_len
= min(remaining_len
, SCIF_ENDPT_QP_SIZE
- 1);
1070 spin_unlock(&ep
->lock
);
1072 * Wait for a SCIF_CLIENT_SEND message in the blocking case
1073 * or until other side disconnects.
1076 wait_event_interruptible(ep
->recvwq
,
1077 SCIFEP_CONNECTED
!= ep
->state
||
1078 scif_rb_count(&qp
->inbound_q
,
1081 spin_lock(&ep
->lock
);
1085 if (len
- remaining_len
)
1086 ret
= len
- remaining_len
;
1087 else if (!ret
&& ep
->state
!= SCIFEP_CONNECTED
)
1088 ret
= ep
->state
== SCIFEP_DISCONNECTED
?
1089 -ECONNRESET
: -ENOTCONN
;
1090 spin_unlock(&ep
->lock
);
1095 * scif_user_send() - Send data to connection queue
1096 * @epd: The end point returned from scif_open()
1097 * @msg: Address to place data
1098 * @len: Length to receive
1099 * @flags: blocking or non blocking
1101 * This function is called from the driver IOCTL entry point
1102 * only and is a wrapper for _scif_send().
1104 int scif_user_send(scif_epd_t epd
, void __user
*msg
, int len
, int flags
)
1106 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
1111 int chunk_len
= min(len
, (1 << (MAX_ORDER
+ PAGE_SHIFT
- 1)));
1113 dev_dbg(scif_info
.mdev
.this_device
,
1114 "SCIFAPI send (U): ep %p %s\n", ep
, scif_ep_states
[ep
->state
]);
1118 err
= scif_msg_param_check(epd
, len
, flags
);
1122 tmp
= kmalloc(chunk_len
, GFP_KERNEL
);
1128 * Grabbing the lock before breaking up the transfer in
1129 * multiple chunks is required to ensure that messages do
1130 * not get fragmented and reordered.
1132 mutex_lock(&ep
->sendlock
);
1133 while (sent_len
!= len
) {
1134 loop_len
= len
- sent_len
;
1135 loop_len
= min(chunk_len
, loop_len
);
1136 if (copy_from_user(tmp
, msg
, loop_len
)) {
1140 err
= _scif_send(epd
, tmp
, loop_len
, flags
);
1145 if (err
!= loop_len
)
1149 mutex_unlock(&ep
->sendlock
);
1152 return err
< 0 ? err
: sent_len
;
1156 * scif_user_recv() - Receive data from connection queue
1157 * @epd: The end point returned from scif_open()
1158 * @msg: Address to place data
1159 * @len: Length to receive
1160 * @flags: blocking or non blocking
1162 * This function is called from the driver IOCTL entry point
1163 * only and is a wrapper for _scif_recv().
1165 int scif_user_recv(scif_epd_t epd
, void __user
*msg
, int len
, int flags
)
1167 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
1172 int chunk_len
= min(len
, (1 << (MAX_ORDER
+ PAGE_SHIFT
- 1)));
1174 dev_dbg(scif_info
.mdev
.this_device
,
1175 "SCIFAPI recv (U): ep %p %s\n", ep
, scif_ep_states
[ep
->state
]);
1179 err
= scif_msg_param_check(epd
, len
, flags
);
1183 tmp
= kmalloc(chunk_len
, GFP_KERNEL
);
1189 * Grabbing the lock before breaking up the transfer in
1190 * multiple chunks is required to ensure that messages do
1191 * not get fragmented and reordered.
1193 mutex_lock(&ep
->recvlock
);
1194 while (recv_len
!= len
) {
1195 loop_len
= len
- recv_len
;
1196 loop_len
= min(chunk_len
, loop_len
);
1197 err
= _scif_recv(epd
, tmp
, loop_len
, flags
);
1200 if (copy_to_user(msg
, tmp
, err
)) {
1206 if (err
!= loop_len
)
1210 mutex_unlock(&ep
->recvlock
);
1213 return err
< 0 ? err
: recv_len
;
1217 * scif_send() - Send data to connection queue
1218 * @epd: The end point returned from scif_open()
1219 * @msg: Address to place data
1220 * @len: Length to receive
1221 * @flags: blocking or non blocking
1223 * This function is called from the kernel mode only and is
1224 * a wrapper for _scif_send().
1226 int scif_send(scif_epd_t epd
, void *msg
, int len
, int flags
)
1228 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
1231 dev_dbg(scif_info
.mdev
.this_device
,
1232 "SCIFAPI send (K): ep %p %s\n", ep
, scif_ep_states
[ep
->state
]);
1236 ret
= scif_msg_param_check(epd
, len
, flags
);
1239 if (!ep
->remote_dev
)
1242 * Grab the mutex lock in the blocking case only
1243 * to ensure messages do not get fragmented/reordered.
1244 * The non blocking mode is protected using spin locks
1247 if (flags
& SCIF_SEND_BLOCK
)
1248 mutex_lock(&ep
->sendlock
);
1250 ret
= _scif_send(epd
, msg
, len
, flags
);
1252 if (flags
& SCIF_SEND_BLOCK
)
1253 mutex_unlock(&ep
->sendlock
);
1256 EXPORT_SYMBOL_GPL(scif_send
);
1259 * scif_recv() - Receive data from connection queue
1260 * @epd: The end point returned from scif_open()
1261 * @msg: Address to place data
1262 * @len: Length to receive
1263 * @flags: blocking or non blocking
1265 * This function is called from the kernel mode only and is
1266 * a wrapper for _scif_recv().
1268 int scif_recv(scif_epd_t epd
, void *msg
, int len
, int flags
)
1270 struct scif_endpt
*ep
= (struct scif_endpt
*)epd
;
1273 dev_dbg(scif_info
.mdev
.this_device
,
1274 "SCIFAPI recv (K): ep %p %s\n", ep
, scif_ep_states
[ep
->state
]);
1278 ret
= scif_msg_param_check(epd
, len
, flags
);
1282 * Grab the mutex lock in the blocking case only
1283 * to ensure messages do not get fragmented/reordered.
1284 * The non blocking mode is protected using spin locks
1287 if (flags
& SCIF_RECV_BLOCK
)
1288 mutex_lock(&ep
->recvlock
);
1290 ret
= _scif_recv(epd
, msg
, len
, flags
);
1292 if (flags
& SCIF_RECV_BLOCK
)
1293 mutex_unlock(&ep
->recvlock
);
1297 EXPORT_SYMBOL_GPL(scif_recv
);
1299 static inline void _scif_poll_wait(struct file
*f
, wait_queue_head_t
*wq
,
1300 poll_table
*p
, struct scif_endpt
*ep
)
1303 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
1304 * and regrab it afterwards. Because the endpoint state might have
1305 * changed while the lock was given up, the state must be checked
1306 * again after re-acquiring the lock. The code in __scif_pollfd(..)
1309 spin_unlock(&ep
->lock
);
1310 poll_wait(f
, wq
, p
);
1311 spin_lock(&ep
->lock
);
1315 __scif_pollfd(struct file
*f
, poll_table
*wait
, struct scif_endpt
*ep
)
1317 unsigned int mask
= 0;
1319 dev_dbg(scif_info
.mdev
.this_device
,
1320 "SCIFAPI pollfd: ep %p %s\n", ep
, scif_ep_states
[ep
->state
]);
1322 spin_lock(&ep
->lock
);
1324 /* Endpoint is waiting for a non-blocking connect to complete */
1325 if (ep
->conn_async_state
== ASYNC_CONN_INPROGRESS
) {
1326 _scif_poll_wait(f
, &ep
->conn_pend_wq
, wait
, ep
);
1327 if (ep
->conn_async_state
== ASYNC_CONN_INPROGRESS
) {
1328 if (ep
->state
== SCIFEP_CONNECTED
||
1329 ep
->state
== SCIFEP_DISCONNECTED
||
1336 /* Endpoint is listening for incoming connection requests */
1337 if (ep
->state
== SCIFEP_LISTENING
) {
1338 _scif_poll_wait(f
, &ep
->conwq
, wait
, ep
);
1339 if (ep
->state
== SCIFEP_LISTENING
) {
1346 /* Endpoint is connected or disconnected */
1347 if (ep
->state
== SCIFEP_CONNECTED
|| ep
->state
== SCIFEP_DISCONNECTED
) {
1348 if (poll_requested_events(wait
) & POLLIN
)
1349 _scif_poll_wait(f
, &ep
->recvwq
, wait
, ep
);
1350 if (poll_requested_events(wait
) & POLLOUT
)
1351 _scif_poll_wait(f
, &ep
->sendwq
, wait
, ep
);
1352 if (ep
->state
== SCIFEP_CONNECTED
||
1353 ep
->state
== SCIFEP_DISCONNECTED
) {
1354 /* Data can be read without blocking */
1355 if (scif_rb_count(&ep
->qp_info
.qp
->inbound_q
, 1))
1357 /* Data can be written without blocking */
1358 if (scif_rb_space(&ep
->qp_info
.qp
->outbound_q
))
1360 /* Return POLLHUP if endpoint is disconnected */
1361 if (ep
->state
== SCIFEP_DISCONNECTED
)
1367 /* Return POLLERR if the endpoint is in none of the above states */
1370 spin_unlock(&ep
->lock
);
1375 * scif_poll() - Kernel mode SCIF poll
1376 * @ufds: Array of scif_pollepd structures containing the end points
1377 * and events to poll on
1378 * @nfds: Size of the ufds array
1379 * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
1381 * The code flow in this function is based on do_poll(..) in select.c
1383 * Returns the number of endpoints which have pending events or 0 in
1384 * the event of a timeout. If a signal is used for wake up, -EINTR is
1388 scif_poll(struct scif_pollepd
*ufds
, unsigned int nfds
, long timeout_msecs
)
1390 struct poll_wqueues table
;
1392 int i
, mask
, count
= 0, timed_out
= timeout_msecs
== 0;
1393 u64 timeout
= timeout_msecs
< 0 ? MAX_SCHEDULE_TIMEOUT
1394 : msecs_to_jiffies(timeout_msecs
);
1396 poll_initwait(&table
);
1399 for (i
= 0; i
< nfds
; i
++) {
1400 pt
->_key
= ufds
[i
].events
| POLLERR
| POLLHUP
;
1401 mask
= __scif_pollfd(ufds
[i
].epd
->anon
,
1403 mask
&= ufds
[i
].events
| POLLERR
| POLLHUP
;
1408 ufds
[i
].revents
= mask
;
1412 count
= table
.error
;
1413 if (signal_pending(current
))
1416 if (count
|| timed_out
)
1419 if (!schedule_timeout_interruptible(timeout
))
1422 poll_freewait(&table
);
1425 EXPORT_SYMBOL_GPL(scif_poll
);
1427 int scif_get_node_ids(u16
*nodes
, int len
, u16
*self
)
1433 if (!scif_is_mgmt_node())
1434 scif_get_node_info();
1436 *self
= scif_info
.nodeid
;
1437 mutex_lock(&scif_info
.conflock
);
1438 len
= min_t(int, len
, scif_info
.total
);
1439 for (node
= 0; node
<= scif_info
.maxid
; node
++) {
1440 if (_scifdev_alive(&scif_dev
[node
])) {
1443 nodes
[offset
++] = node
;
1446 dev_dbg(scif_info
.mdev
.this_device
,
1447 "SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
1448 scif_info
.total
, online
, offset
);
1449 mutex_unlock(&scif_info
.conflock
);
1453 EXPORT_SYMBOL_GPL(scif_get_node_ids
);
1455 static int scif_add_client_dev(struct device
*dev
, struct subsys_interface
*si
)
1457 struct scif_client
*client
=
1458 container_of(si
, struct scif_client
, si
);
1459 struct scif_peer_dev
*spdev
=
1460 container_of(dev
, struct scif_peer_dev
, dev
);
1463 client
->probe(spdev
);
1467 static void scif_remove_client_dev(struct device
*dev
,
1468 struct subsys_interface
*si
)
1470 struct scif_client
*client
=
1471 container_of(si
, struct scif_client
, si
);
1472 struct scif_peer_dev
*spdev
=
1473 container_of(dev
, struct scif_peer_dev
, dev
);
1476 client
->remove(spdev
);
1479 void scif_client_unregister(struct scif_client
*client
)
1481 subsys_interface_unregister(&client
->si
);
1483 EXPORT_SYMBOL_GPL(scif_client_unregister
);
1485 int scif_client_register(struct scif_client
*client
)
1487 struct subsys_interface
*si
= &client
->si
;
1489 si
->name
= client
->name
;
1490 si
->subsys
= &scif_peer_bus
;
1491 si
->add_dev
= scif_add_client_dev
;
1492 si
->remove_dev
= scif_remove_client_dev
;
1494 return subsys_interface_register(&client
->si
);
1496 EXPORT_SYMBOL_GPL(scif_client_register
);