2 * Unix Domain Sockets Implementation (PF_UNIX, PF_LOCAL)
3 * This code handles requests generated by operations on /dev/uds
5 * The interface to UNIX domain sockets is similar to the interface to network
6 * sockets. There is a character device (/dev/uds) and this server is a
7 * 'driver' for that device.
12 static ssize_t
uds_perform_write(devminor_t
, endpoint_t
, cp_grant_id_t
, size_t,
15 static int uds_open(devminor_t
, int, endpoint_t
);
16 static int uds_close(devminor_t
);
17 static ssize_t
uds_read(devminor_t
, u64_t
, endpoint_t
, cp_grant_id_t
, size_t,
19 static ssize_t
uds_write(devminor_t
, u64_t
, endpoint_t
, cp_grant_id_t
, size_t,
21 static int uds_ioctl(devminor_t
, unsigned long, endpoint_t
, cp_grant_id_t
, int,
22 endpoint_t
, cdev_id_t
);
23 static int uds_cancel(devminor_t
, endpoint_t
, cdev_id_t
);
24 static int uds_select(devminor_t
, unsigned int, endpoint_t
);
26 static struct chardriver uds_tab
= {
28 .cdr_close
= uds_close
,
30 .cdr_write
= uds_write
,
31 .cdr_ioctl
= uds_ioctl
,
32 .cdr_cancel
= uds_cancel
,
33 .cdr_select
= uds_select
36 /* File Descriptor Table */
37 uds_fd_t uds_fd_table
[NR_FDS
];
39 static unsigned int uds_exit_left
;
42 uds_open(devminor_t
UNUSED(orig_minor
), int access
,
43 endpoint_t user_endpt
)
49 dprintf(("UDS: uds_open() from %d\n", user_endpt
));
52 * Find a slot in the descriptor table for the new descriptor.
53 * The index of the descriptor in the table will be returned.
54 * Subsequent calls to read/write/close/ioctl/etc will use this
55 * minor number. The minor number must be different from the
56 * the /dev/uds device's minor number (0).
58 for (minor
= 1; minor
< NR_FDS
; minor
++)
59 if (uds_fd_table
[minor
].state
== UDS_FREE
)
66 * Allocate memory for the ringer buffer. In order to save on memory
67 * in the common case, the buffer is allocated only when the socket is
68 * in use. We use mmap instead of malloc to allow the memory to be
69 * actually freed later.
71 if ((buf
= mmap(NULL
, UDS_BUF
, PROT_READ
| PROT_WRITE
,
72 MAP_ANON
| MAP_PRIVATE
, -1, 0)) == MAP_FAILED
)
76 * Allocate the socket, and set its initial parameters.
78 uds_fd_table
[minor
].state
= UDS_INUSE
;
79 uds_fd_table
[minor
].owner
= user_endpt
;
80 uds_fd_table
[minor
].sel_endpt
= NONE
;
81 uds_fd_table
[minor
].sel_ops
= 0;
82 uds_fd_table
[minor
].buf
= buf
;
83 uds_fd_table
[minor
].pos
= 0;
84 uds_fd_table
[minor
].size
= 0;
85 uds_fd_table
[minor
].mode
= UDS_R
| UDS_W
;
86 uds_fd_table
[minor
].type
= -1;
88 for (i
= 0; i
< UDS_SOMAXCONN
; i
++)
89 uds_fd_table
[minor
].backlog
[i
] = -1;
90 uds_fd_table
[minor
].backlog_size
= UDS_SOMAXCONN
;
92 memset(&uds_fd_table
[minor
].ancillary_data
, '\0',
93 sizeof(struct ancillary
));
94 for (i
= 0; i
< OPEN_MAX
; i
++)
95 uds_fd_table
[minor
].ancillary_data
.fds
[i
] = -1;
97 uds_fd_table
[minor
].listening
= 0;
98 uds_fd_table
[minor
].peer
= -1;
99 uds_fd_table
[minor
].child
= -1;
101 memset(&uds_fd_table
[minor
].addr
, '\0', sizeof(struct sockaddr_un
));
102 memset(&uds_fd_table
[minor
].source
, '\0', sizeof(struct sockaddr_un
));
103 memset(&uds_fd_table
[minor
].target
, '\0', sizeof(struct sockaddr_un
));
105 uds_fd_table
[minor
].suspended
= UDS_NOT_SUSPENDED
;
107 return CDEV_CLONED
| minor
;
111 uds_reset(devminor_t minor
)
113 /* Disconnect the socket from its peer. */
114 uds_fd_table
[minor
].peer
= -1;
116 /* Set an error to pass to the caller. */
117 uds_fd_table
[minor
].err
= ECONNRESET
;
119 /* If a process was blocked on I/O, revive it. */
120 if (uds_fd_table
[minor
].suspended
!= UDS_NOT_SUSPENDED
)
121 uds_unsuspend(minor
);
123 /* All of the peer's calls will fail immediately now. */
124 if (uds_fd_table
[minor
].sel_ops
!= 0) {
125 chardriver_reply_select(uds_fd_table
[minor
].sel_endpt
, minor
,
126 uds_fd_table
[minor
].sel_ops
);
127 uds_fd_table
[minor
].sel_ops
= 0;
132 uds_close(devminor_t minor
)
136 dprintf(("UDS: uds_close(%d)\n", minor
));
138 if (minor
< 0 || minor
>= NR_FDS
) return ENXIO
;
140 if (uds_fd_table
[minor
].state
!= UDS_INUSE
)
143 peer
= uds_fd_table
[minor
].peer
;
144 if (peer
!= -1 && uds_fd_table
[peer
].peer
== -1) {
145 /* Connecting socket: clear from server's backlog. */
146 if (!uds_fd_table
[peer
].listening
)
147 panic("connecting socket attached to non-server");
149 for (i
= 0; i
< uds_fd_table
[peer
].backlog_size
; i
++) {
150 if (uds_fd_table
[peer
].backlog
[i
] == minor
) {
151 uds_fd_table
[peer
].backlog
[i
] = -1;
155 } else if (peer
!= -1) {
156 /* Connected socket: disconnect it. */
158 } else if (uds_fd_table
[minor
].listening
) {
159 /* Listening socket: disconnect all sockets in the backlog. */
160 for (i
= 0; i
< uds_fd_table
[minor
].backlog_size
; i
++)
161 if (uds_fd_table
[minor
].backlog
[i
] != -1)
162 uds_reset(uds_fd_table
[minor
].backlog
[i
]);
165 if (uds_fd_table
[minor
].ancillary_data
.nfiledes
> 0)
166 uds_clear_fds(minor
, &uds_fd_table
[minor
].ancillary_data
);
168 /* Release the memory for the ring buffer. */
169 munmap(uds_fd_table
[minor
].buf
, UDS_BUF
);
171 /* Set the socket back to its original UDS_FREE state. */
172 memset(&uds_fd_table
[minor
], '\0', sizeof(uds_fd_t
));
174 /* If terminating, and this was the last open socket, exit now. */
175 if (uds_exit_left
> 0) {
176 if (--uds_exit_left
== 0)
177 chardriver_terminate();
184 uds_select(devminor_t minor
, unsigned int ops
, endpoint_t endpt
)
186 unsigned int ready_ops
;
189 dprintf(("UDS: uds_select(%d)\n", minor
));
191 if (minor
< 0 || minor
>= NR_FDS
) return ENXIO
;
193 if (uds_fd_table
[minor
].state
!= UDS_INUSE
)
196 watch
= (ops
& CDEV_NOTIFY
);
197 ops
&= (CDEV_OP_RD
| CDEV_OP_WR
| CDEV_OP_ERR
);
201 /* Check if there is data available to read. */
202 if (ops
& CDEV_OP_RD
) {
203 bytes
= uds_perform_read(minor
, NONE
, GRANT_INVALID
, 1, 1);
205 ready_ops
|= CDEV_OP_RD
; /* data available */
206 } else if (uds_fd_table
[minor
].listening
== 1) {
207 /* Check for pending connections. */
208 for (i
= 0; i
< uds_fd_table
[minor
].backlog_size
; i
++)
210 if (uds_fd_table
[minor
].backlog
[i
] != -1) {
211 ready_ops
|= CDEV_OP_RD
;
215 } else if (bytes
!= EDONTREPLY
) {
216 ready_ops
|= CDEV_OP_RD
; /* error */
220 /* Check if we can write without blocking. */
221 if (ops
& CDEV_OP_WR
) {
222 bytes
= uds_perform_write(minor
, NONE
, GRANT_INVALID
, 1, 1);
223 if (bytes
!= 0 && bytes
!= EDONTREPLY
)
224 ready_ops
|= CDEV_OP_WR
;
228 * If not all requested ops were ready, and the caller requests to be
229 * notified about changes, we add the remaining ops to the saved set.
233 uds_fd_table
[minor
].sel_endpt
= endpt
;
234 uds_fd_table
[minor
].sel_ops
|= ops
;
241 uds_perform_read(devminor_t minor
, endpoint_t endpt
, cp_grant_id_t grant
,
242 size_t size
, int pretend
)
247 dprintf(("UDS: uds_perform_read(%d)\n", minor
));
249 peer
= uds_fd_table
[minor
].peer
;
251 /* Skip reads of zero bytes. */
255 /* Check if the socket isn't shut down for reads. */
256 if (!(uds_fd_table
[minor
].mode
& UDS_R
))
259 if (uds_fd_table
[minor
].size
== 0) {
262 * We're not connected. That's only a problem when this
263 * socket is connection oriented.
265 if (uds_fd_table
[minor
].type
== SOCK_STREAM
||
266 uds_fd_table
[minor
].type
== SOCK_SEQPACKET
) {
267 if (uds_fd_table
[minor
].err
== ECONNRESET
) {
269 uds_fd_table
[minor
].err
= 0;
276 /* Check if process is reading from a closed pipe. */
277 if (peer
!= -1 && !(uds_fd_table
[peer
].mode
& UDS_W
) &&
278 uds_fd_table
[minor
].size
== 0)
285 uds_fd_table
[peer
].suspended
== UDS_SUSPENDED_WRITE
)
286 panic("writer blocked on empty socket");
288 dprintf(("UDS: suspending read request on %d\n", minor
));
290 /* Process is reading from an empty pipe. Suspend it. */
294 /* How much can we get from the ring buffer? */
295 if (size
> uds_fd_table
[minor
].size
)
296 size
= uds_fd_table
[minor
].size
;
301 /* Get the data from the tail of the ring buffer. */
302 pos
= uds_fd_table
[minor
].pos
;
304 subsize
= UDS_BUF
- pos
;
308 if ((r
= sys_safecopyto(endpt
, grant
, 0,
309 (vir_bytes
) &uds_fd_table
[minor
].buf
[pos
], subsize
)) != OK
)
312 if (subsize
< size
) {
313 if ((r
= sys_safecopyto(endpt
, grant
, subsize
,
314 (vir_bytes
) uds_fd_table
[minor
].buf
,
315 size
- subsize
)) != OK
)
319 /* Advance the buffer tail. */
320 uds_fd_table
[minor
].pos
= (pos
+ size
) % UDS_BUF
;
321 uds_fd_table
[minor
].size
-= size
;
323 /* Reset position if the buffer is empty (it may save a copy call). */
324 if (uds_fd_table
[minor
].size
== 0)
325 uds_fd_table
[minor
].pos
= 0;
327 /* See if we can wake up a blocked writer. */
328 if (peer
!= -1 && uds_fd_table
[peer
].suspended
== UDS_SUSPENDED_WRITE
)
331 /* See if we can satisfy an ongoing select. */
332 if (peer
!= -1 && (uds_fd_table
[peer
].sel_ops
& CDEV_OP_WR
) &&
333 uds_fd_table
[minor
].size
< UDS_BUF
) {
334 /* A write on the peer is possible now. */
335 chardriver_reply_select(uds_fd_table
[peer
].sel_endpt
, peer
,
337 uds_fd_table
[peer
].sel_ops
&= ~CDEV_OP_WR
;
340 return size
; /* number of bytes read */
344 uds_perform_write(devminor_t minor
, endpoint_t endpt
, cp_grant_id_t grant
,
345 size_t size
, int pretend
)
350 dprintf(("UDS: uds_perform_write(%d)\n", minor
));
352 /* Skip writes of zero bytes. */
356 /* Check if the socket isn't shut down for writes. */
357 if (!(uds_fd_table
[minor
].mode
& UDS_W
))
360 /* Datagram messages must fit in the buffer entirely. */
361 if (size
> UDS_BUF
&& uds_fd_table
[minor
].type
!= SOCK_STREAM
)
364 if (uds_fd_table
[minor
].type
== SOCK_STREAM
||
365 uds_fd_table
[minor
].type
== SOCK_SEQPACKET
) {
367 * If we're writing to a connection-oriented socket, then it
368 * needs a peer to write to. For disconnected sockets, writing
369 * is an error; for connecting sockets, writes should suspend.
371 peer
= uds_fd_table
[minor
].peer
;
374 if (uds_fd_table
[minor
].err
== ECONNRESET
) {
376 uds_fd_table
[minor
].err
= 0;
380 } else if (uds_fd_table
[peer
].peer
== -1) /* connecting */
382 } else /* uds_fd_table[minor].type == SOCK_DGRAM */ {
385 /* Locate the "peer" we want to write to. */
386 for (i
= 0; i
< NR_FDS
; i
++) {
388 * Look for a SOCK_DGRAM socket that is bound on
389 * the target address.
391 if (uds_fd_table
[i
].type
== SOCK_DGRAM
&&
392 uds_fd_table
[i
].addr
.sun_family
== AF_UNIX
&&
393 !strncmp(uds_fd_table
[minor
].target
.sun_path
,
394 uds_fd_table
[i
].addr
.sun_path
,
395 sizeof(uds_fd_table
[i
].addr
.sun_path
))) {
405 /* Check if we write to a closed pipe. */
406 if (!(uds_fd_table
[peer
].mode
& UDS_R
))
410 * We have to preserve the boundary for DGRAM. If there's already a
411 * packet waiting, discard it silently and pretend it was written.
413 if (uds_fd_table
[minor
].type
== SOCK_DGRAM
&&
414 uds_fd_table
[peer
].size
> 0)
418 * Check if the ring buffer is already full, and if the SEQPACKET
419 * message wouldn't write to an empty buffer.
421 if (uds_fd_table
[peer
].size
== UDS_BUF
||
422 (uds_fd_table
[minor
].type
== SOCK_SEQPACKET
&&
423 uds_fd_table
[peer
].size
> 0)) {
427 if (uds_fd_table
[peer
].suspended
== UDS_SUSPENDED_READ
)
428 panic("reader blocked on full socket");
430 dprintf(("UDS: suspending write request on %d\n", minor
));
432 /* Process is reading from an empty pipe. Suspend it. */
436 /* How much can we add to the ring buffer? */
437 if (size
> UDS_BUF
- uds_fd_table
[peer
].size
)
438 size
= UDS_BUF
- uds_fd_table
[peer
].size
;
443 /* Put the data at the head of the ring buffer. */
444 pos
= (uds_fd_table
[peer
].pos
+ uds_fd_table
[peer
].size
) % UDS_BUF
;
446 subsize
= UDS_BUF
- pos
;
450 if ((r
= sys_safecopyfrom(endpt
, grant
, 0,
451 (vir_bytes
) &uds_fd_table
[peer
].buf
[pos
], subsize
)) != OK
)
454 if (subsize
< size
) {
455 if ((r
= sys_safecopyfrom(endpt
, grant
, subsize
,
456 (vir_bytes
) uds_fd_table
[peer
].buf
, size
- subsize
)) != OK
)
460 /* Advance the buffer head. */
461 uds_fd_table
[peer
].size
+= size
;
463 /* Fill in the source address to be returned by recvfrom, recvmsg. */
464 if (uds_fd_table
[minor
].type
== SOCK_DGRAM
)
465 memcpy(&uds_fd_table
[peer
].source
, &uds_fd_table
[minor
].addr
,
466 sizeof(struct sockaddr_un
));
468 /* See if we can wake up a blocked reader. */
469 if (uds_fd_table
[peer
].suspended
== UDS_SUSPENDED_READ
)
472 /* See if we can satisfy an ongoing select. */
473 if ((uds_fd_table
[peer
].sel_ops
& CDEV_OP_RD
) &&
474 uds_fd_table
[peer
].size
> 0) {
475 /* A read on the peer is possible now. */
476 chardriver_reply_select(uds_fd_table
[peer
].sel_endpt
, peer
,
478 uds_fd_table
[peer
].sel_ops
&= ~CDEV_OP_RD
;
481 return size
; /* number of bytes written */
485 uds_read(devminor_t minor
, u64_t position
, endpoint_t endpt
,
486 cp_grant_id_t grant
, size_t size
, int flags
, cdev_id_t id
)
490 dprintf(("UDS: uds_read(%d)\n", minor
));
492 if (minor
< 0 || minor
>= NR_FDS
) return ENXIO
;
494 if (uds_fd_table
[minor
].state
!= UDS_INUSE
)
497 rc
= uds_perform_read(minor
, endpt
, grant
, size
, 0);
499 /* If the call couldn't complete, suspend the caller. */
500 if (rc
== EDONTREPLY
) {
501 uds_fd_table
[minor
].suspended
= UDS_SUSPENDED_READ
;
502 uds_fd_table
[minor
].susp_endpt
= endpt
;
503 uds_fd_table
[minor
].susp_grant
= grant
;
504 uds_fd_table
[minor
].susp_size
= size
;
505 uds_fd_table
[minor
].susp_id
= id
;
507 /* If the call wasn't supposed to block, cancel immediately. */
508 if (flags
& CDEV_NONBLOCK
) {
509 uds_cancel(minor
, endpt
, id
);
519 uds_write(devminor_t minor
, u64_t position
, endpoint_t endpt
,
520 cp_grant_id_t grant
, size_t size
, int flags
, cdev_id_t id
)
524 dprintf(("UDS: uds_write(%d)\n", minor
));
526 if (minor
< 0 || minor
>= NR_FDS
) return ENXIO
;
528 if (uds_fd_table
[minor
].state
!= UDS_INUSE
)
531 rc
= uds_perform_write(minor
, endpt
, grant
, size
, 0);
533 /* If the call couldn't complete, suspend the caller. */
534 if (rc
== EDONTREPLY
) {
535 uds_fd_table
[minor
].suspended
= UDS_SUSPENDED_WRITE
;
536 uds_fd_table
[minor
].susp_endpt
= endpt
;
537 uds_fd_table
[minor
].susp_grant
= grant
;
538 uds_fd_table
[minor
].susp_size
= size
;
539 uds_fd_table
[minor
].susp_id
= id
;
541 /* If the call wasn't supposed to block, cancel immediately. */
542 if (flags
& CDEV_NONBLOCK
) {
543 uds_cancel(minor
, endpt
, id
);
553 uds_ioctl(devminor_t minor
, unsigned long request
, endpoint_t endpt
,
554 cp_grant_id_t grant
, int flags
, endpoint_t user_endpt
, cdev_id_t id
)
558 dprintf(("UDS: uds_ioctl(%d, %lu)\n", minor
, request
));
560 if (minor
< 0 || minor
>= NR_FDS
) return ENXIO
;
562 if (uds_fd_table
[minor
].state
!= UDS_INUSE
)
565 /* Update the owner endpoint. */
566 uds_fd_table
[minor
].owner
= user_endpt
;
568 /* Let the UDS ioctl subsystem handle the actual request. */
569 rc
= uds_do_ioctl(minor
, request
, endpt
, grant
);
571 /* If the call couldn't complete, suspend the caller. */
572 if (rc
== EDONTREPLY
) {
573 /* The suspension type is already set by the IOCTL handler. */
574 if ((s
= uds_fd_table
[minor
].suspended
) == UDS_NOT_SUSPENDED
)
575 panic("IOCTL did not actually suspend?");
576 uds_fd_table
[minor
].susp_endpt
= endpt
;
577 uds_fd_table
[minor
].susp_grant
= grant
;
578 uds_fd_table
[minor
].susp_size
= 0; /* irrelevant */
579 uds_fd_table
[minor
].susp_id
= id
;
581 /* If the call wasn't supposed to block, cancel immediately. */
582 if (flags
& CDEV_NONBLOCK
) {
583 uds_cancel(minor
, endpt
, id
);
584 if (s
== UDS_SUSPENDED_CONNECT
)
595 uds_unsuspend(devminor_t minor
)
600 fdp
= &uds_fd_table
[minor
];
602 switch (fdp
->suspended
) {
603 case UDS_SUSPENDED_READ
:
604 r
= uds_perform_read(minor
, fdp
->susp_endpt
, fdp
->susp_grant
,
612 case UDS_SUSPENDED_WRITE
:
613 r
= uds_perform_write(minor
, fdp
->susp_endpt
, fdp
->susp_grant
,
621 case UDS_SUSPENDED_CONNECT
:
622 case UDS_SUSPENDED_ACCEPT
:
624 * In both cases, the caller already set up the connection.
625 * The only thing to do here is unblock.
633 panic("unknown suspension type %d", fdp
->suspended
);
636 chardriver_reply_task(fdp
->susp_endpt
, fdp
->susp_id
, r
);
638 fdp
->suspended
= UDS_NOT_SUSPENDED
;
642 uds_cancel(devminor_t minor
, endpoint_t endpt
, cdev_id_t id
)
647 dprintf(("UDS: uds_cancel(%d)\n", minor
));
649 if (minor
< 0 || minor
>= NR_FDS
) return EDONTREPLY
;
651 fdp
= &uds_fd_table
[minor
];
653 if (fdp
->state
!= UDS_INUSE
) {
654 printf("UDS: cancel request for closed minor %d\n", minor
);
658 /* Make sure the cancel request is for a request we're hanging on. */
659 if (fdp
->suspended
== UDS_NOT_SUSPENDED
|| fdp
->susp_endpt
!= endpt
||
661 return EDONTREPLY
; /* this happens. */
664 * The system call was cancelled, so the socket is not suspended
667 switch (fdp
->suspended
) {
668 case UDS_SUSPENDED_ACCEPT
:
669 /* A partial accept() only sets the server's child. */
670 for (i
= 0; i
< NR_FDS
; i
++)
671 if (uds_fd_table
[i
].child
== minor
)
672 uds_fd_table
[i
].child
= -1;
676 case UDS_SUSPENDED_CONNECT
:
677 /* Connect requests should continue asynchronously. */
680 case UDS_SUSPENDED_READ
:
681 case UDS_SUSPENDED_WRITE
:
682 /* Nothing more to do. */
686 panic("unknown suspension type %d", fdp
->suspended
);
689 fdp
->suspended
= UDS_NOT_SUSPENDED
;
691 return EINTR
; /* reply to the original request */
695 * Initialize the server.
698 uds_init(int UNUSED(type
), sef_init_info_t
*UNUSED(info
))
700 /* Setting everything to NULL implicitly sets the state to UDS_FREE. */
701 memset(uds_fd_table
, '\0', sizeof(uds_fd_t
) * NR_FDS
);
709 uds_signal(int signo
)
713 /* Only check for termination signal, ignore anything else. */
714 if (signo
!= SIGTERM
) return;
716 /* Only exit once all sockets have been closed. */
718 for (i
= 0; i
< NR_FDS
; i
++)
719 if (uds_fd_table
[i
].state
== UDS_INUSE
)
722 if (uds_exit_left
== 0)
723 chardriver_terminate();
729 /* Register init callbacks. */
730 sef_setcb_init_fresh(uds_init
);
732 /* No live update support for now. */
734 /* Register signal callbacks. */
735 sef_setcb_signal_handler(uds_signal
);
737 /* Let SEF perform startup. */
742 * The UNIX domain sockets driver.
749 chardriver_task(&uds_tab
);