1 /* $NetBSD: dmover_io.c,v 1.36 2009/12/09 21:32:58 dsl Exp $ */
4 * Copyright (c) 2002, 2003 Wasabi Systems, Inc.
7 * Written by Jason R. Thorpe for Wasabi Systems, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
39 * dmover_io.c: Support for user-space access to dmover-api
41 * This interface is quite simple:
43 * 1. The user opens /dev/dmover, which is a cloning device. This
44 * allocates internal state for the session.
46 * 2. The user does a DMIO_SETFUNC to select the data movement
47 * function. This actually creates the dmover session.
49 * 3. The user writes request messages to its dmover handle.
51 * 4. The user reads request responses from its dmover handle.
53 * 5. The user closes the file descriptor and the session is
57 #include <sys/cdefs.h>
58 __KERNEL_RCSID(0, "$NetBSD: dmover_io.c,v 1.36 2009/12/09 21:32:58 dsl Exp $");
60 #include <sys/param.h>
61 #include <sys/queue.h>
66 #include <sys/malloc.h>
67 #include <sys/simplelock.h>
69 #include <sys/filedesc.h>
70 #include <sys/filio.h>
71 #include <sys/select.h>
72 #include <sys/systm.h>
73 #include <sys/workqueue.h>
76 #include <sys/kauth.h>
78 #include <uvm/uvm_extern.h>
80 #include <dev/dmover/dmovervar.h>
81 #include <dev/dmover/dmover_io.h>
83 struct dmio_usrreq_state
{
86 TAILQ_ENTRY(dmio_usrreq_state
) u_q
;
88 #define dus_q dus_u.u_q
89 #define dus_work dus_u.u_work
90 struct uio dus_uio_out
;
91 struct uio
*dus_uio_in
;
92 struct dmover_request
*dus_req
;
94 struct vmspace
*dus_vmspace
;
98 struct dmover_session
*ds_session
;
99 TAILQ_HEAD(, dmio_usrreq_state
) ds_pending
;
100 TAILQ_HEAD(, dmio_usrreq_state
) ds_complete
;
101 struct selinfo ds_selq
;
102 volatile int ds_flags
;
104 struct simplelock ds_slock
;
105 struct timespec ds_atime
;
106 struct timespec ds_mtime
;
107 struct timespec ds_btime
;
110 static ONCE_DECL(dmio_cleaner_control
);
111 static struct workqueue
*dmio_cleaner
;
112 static int dmio_cleaner_init(void);
113 static void dmio_usrreq_fini1(struct work
*wk
, void *);
115 #define DMIO_STATE_SEL 0x0001
116 #define DMIO_STATE_DEAD 0x0002
117 #define DMIO_STATE_LARVAL 0x0004
118 #define DMIO_STATE_READ_WAIT 0x0008
119 #define DMIO_STATE_WRITE_WAIT 0x0010
121 #define DMIO_NREQS_MAX 64 /* XXX pulled out of a hat */
123 struct pool dmio_state_pool
;
124 struct pool dmio_usrreq_state_pool
;
126 void dmoverioattach(int);
128 dev_type_open(dmoverioopen
);
130 const struct cdevsw dmoverio_cdevsw
= {
131 dmoverioopen
, noclose
, noread
, nowrite
, noioctl
,
132 nostop
, notty
, nopoll
, nommap
, nokqfilter
,
139 * Pseudo-device attach routine.
142 dmoverioattach(int count
)
145 pool_init(&dmio_state_pool
, sizeof(struct dmio_state
),
146 0, 0, 0, "dmiostate", NULL
, IPL_SOFTCLOCK
);
147 pool_init(&dmio_usrreq_state_pool
, sizeof(struct dmio_usrreq_state
),
148 0, 0, 0, "dmiourstate", NULL
, IPL_SOFTCLOCK
);
154 * Create cleaner thread.
157 dmio_cleaner_init(void)
160 return workqueue_create(&dmio_cleaner
, "dmioclean", dmio_usrreq_fini1
,
161 NULL
, PWAIT
, IPL_SOFTCLOCK
, 0);
167 * Build a request structure.
170 dmio_usrreq_init(struct file
*fp
, struct dmio_usrreq_state
*dus
,
171 struct dmio_usrreq
*req
, struct dmover_request
*dreq
)
173 struct dmio_state
*ds
= (struct dmio_state
*) fp
->f_data
;
174 struct dmover_session
*dses
= ds
->ds_session
;
175 struct uio
*uio_out
= &dus
->dus_uio_out
;
182 /* XXX How should malloc interact w/ FNONBLOCK? */
184 error
= RUN_ONCE(&dmio_cleaner_control
, dmio_cleaner_init
);
189 error
= proc_vmspace_getref(curproc
, &dus
->dus_vmspace
);
194 if (req
->req_outbuf
.dmbuf_iovcnt
!= 0) {
195 if (req
->req_outbuf
.dmbuf_iovcnt
> IOV_MAX
)
197 len
= sizeof(struct iovec
) * req
->req_outbuf
.dmbuf_iovcnt
;
198 uio_out
->uio_iov
= malloc(len
, M_TEMP
, M_WAITOK
);
199 error
= copyin(req
->req_outbuf
.dmbuf_iov
, uio_out
->uio_iov
,
202 free(uio_out
->uio_iov
, M_TEMP
);
206 for (j
= 0, len
= 0; j
< req
->req_outbuf
.dmbuf_iovcnt
; j
++) {
207 len
+= uio_out
->uio_iov
[j
].iov_len
;
208 if (len
> SSIZE_MAX
) {
209 free(uio_out
->uio_iov
, M_TEMP
);
214 uio_out
->uio_iovcnt
= req
->req_outbuf
.dmbuf_iovcnt
;
215 uio_out
->uio_resid
= len
;
216 uio_out
->uio_rw
= UIO_READ
;
217 uio_out
->uio_vmspace
= dus
->dus_vmspace
;
219 dreq
->dreq_outbuf_type
= DMOVER_BUF_UIO
;
220 dreq
->dreq_outbuf
.dmbuf_uio
= uio_out
;
222 uio_out
->uio_iov
= NULL
;
224 dreq
->dreq_outbuf_type
= DMOVER_BUF_NONE
;
227 memcpy(dreq
->dreq_immediate
, req
->req_immediate
,
228 sizeof(dreq
->dreq_immediate
));
230 if (dses
->dses_ninputs
== 0) {
231 /* No inputs; all done. */
235 dreq
->dreq_inbuf_type
= DMOVER_BUF_UIO
;
237 dus
->dus_uio_in
= malloc(sizeof(struct uio
) * dses
->dses_ninputs
,
239 memset(dus
->dus_uio_in
, 0, sizeof(struct uio
) * dses
->dses_ninputs
);
241 for (i
= 0; i
< dses
->dses_ninputs
; i
++) {
242 uio_in
= &dus
->dus_uio_in
[i
];
244 error
= copyin(&req
->req_inbuf
[i
], &inbuf
, sizeof(inbuf
));
248 if (inbuf
.dmbuf_iovcnt
> IOV_MAX
) {
252 len
= sizeof(struct iovec
) * inbuf
.dmbuf_iovcnt
;
257 uio_in
->uio_iov
= malloc(len
, M_TEMP
, M_WAITOK
);
259 error
= copyin(inbuf
.dmbuf_iov
, uio_in
->uio_iov
, len
);
261 free(uio_in
->uio_iov
, M_TEMP
);
265 for (j
= 0, len
= 0; j
< inbuf
.dmbuf_iovcnt
; j
++) {
266 len
+= uio_in
->uio_iov
[j
].iov_len
;
267 if (len
> SSIZE_MAX
) {
268 free(uio_in
->uio_iov
, M_TEMP
);
274 if (uio_out
!= NULL
&& len
!= uio_out
->uio_resid
) {
275 free(uio_in
->uio_iov
, M_TEMP
);
280 uio_in
->uio_iovcnt
= inbuf
.dmbuf_iovcnt
;
281 uio_in
->uio_resid
= len
;
282 uio_in
->uio_rw
= UIO_WRITE
;
283 uio_in
->uio_vmspace
= dus
->dus_vmspace
;
285 dreq
->dreq_inbuf
[i
].dmbuf_uio
= uio_in
;
292 for (--i
; i
>= 0; i
--) {
293 uio_in
= &dus
->dus_uio_in
[i
];
294 free(uio_in
->uio_iov
, M_TEMP
);
297 free(dus
->dus_uio_in
, M_TEMP
);
299 free(uio_out
->uio_iov
, M_TEMP
);
300 uvmspace_free(dus
->dus_vmspace
);
307 * Tear down a request. Must be called at splsoftclock().
310 dmio_usrreq_fini(struct dmio_state
*ds
, struct dmio_usrreq_state
*dus
)
312 struct dmover_session
*dses
= ds
->ds_session
;
313 struct uio
*uio_out
= &dus
->dus_uio_out
;
317 if (uio_out
->uio_iov
!= NULL
)
318 free(uio_out
->uio_iov
, M_TEMP
);
320 if (dses
->dses_ninputs
) {
321 for (i
= 0; i
< dses
->dses_ninputs
; i
++) {
322 uio_in
= &dus
->dus_uio_in
[i
];
323 free(uio_in
->uio_iov
, M_TEMP
);
325 free(dus
->dus_uio_in
, M_TEMP
);
328 workqueue_enqueue(dmio_cleaner
, &dus
->dus_work
, NULL
);
332 dmio_usrreq_fini1(struct work
*wk
, void *dummy
)
334 struct dmio_usrreq_state
*dus
= (void *)wk
;
337 KASSERT(wk
== &dus
->dus_work
);
339 uvmspace_free(dus
->dus_vmspace
);
341 pool_put(&dmio_usrreq_state_pool
, dus
);
351 dmio_read(struct file
*fp
, off_t
*offp
, struct uio
*uio
,
352 kauth_cred_t cred
, int flags
)
354 struct dmio_state
*ds
= (struct dmio_state
*) fp
->f_data
;
355 struct dmio_usrreq_state
*dus
;
356 struct dmover_request
*dreq
;
357 struct dmio_usrresp resp
;
358 int s
, error
= 0, progress
= 0;
360 if ((uio
->uio_resid
% sizeof(resp
)) != 0)
363 if (ds
->ds_session
== NULL
)
366 getnanotime(&ds
->ds_atime
);
368 simple_lock(&ds
->ds_slock
);
370 while (uio
->uio_resid
!= 0) {
373 dus
= TAILQ_FIRST(&ds
->ds_complete
);
375 if (fp
->f_flag
& FNONBLOCK
) {
376 error
= progress
? 0 : EWOULDBLOCK
;
379 ds
->ds_flags
|= DMIO_STATE_READ_WAIT
;
380 error
= ltsleep(&ds
->ds_complete
,
381 PRIBIO
| PCATCH
, "dmvrrd", 0,
387 /* Have a completed request. */
388 TAILQ_REMOVE(&ds
->ds_complete
, dus
, dus_q
);
390 if (ds
->ds_flags
& DMIO_STATE_WRITE_WAIT
) {
391 ds
->ds_flags
&= ~DMIO_STATE_WRITE_WAIT
;
392 wakeup(&ds
->ds_nreqs
);
394 if (ds
->ds_flags
& DMIO_STATE_SEL
) {
395 ds
->ds_flags
&= ~DMIO_STATE_SEL
;
396 selnotify(&ds
->ds_selq
, POLLIN
| POLLRDNORM
, 0);
401 simple_unlock(&ds
->ds_slock
);
404 resp
.resp_id
= dus
->dus_id
;
405 if (dreq
->dreq_flags
& DMOVER_REQ_ERROR
)
406 resp
.resp_error
= dreq
->dreq_error
;
409 memcpy(resp
.resp_immediate
, dreq
->dreq_immediate
,
410 sizeof(resp
.resp_immediate
));
413 dmio_usrreq_fini(ds
, dus
);
419 dmover_request_free(dreq
);
421 error
= uiomove(&resp
, sizeof(resp
), uio
);
426 simple_lock(&ds
->ds_slock
);
430 simple_unlock(&ds
->ds_slock
);
439 * Dmover completion callback.
442 dmio_usrreq_done(struct dmover_request
*dreq
)
444 struct dmio_usrreq_state
*dus
= dreq
->dreq_cookie
;
445 struct dmio_state
*ds
= dreq
->dreq_session
->dses_cookie
;
447 /* We're already at splsoftclock(). */
449 simple_lock(&ds
->ds_slock
);
450 TAILQ_REMOVE(&ds
->ds_pending
, dus
, dus_q
);
451 if (ds
->ds_flags
& DMIO_STATE_DEAD
) {
453 dmio_usrreq_fini(ds
, dus
);
454 dmover_request_free(dreq
);
455 if (ds
->ds_nreqs
== 0) {
456 simple_unlock(&ds
->ds_slock
);
457 seldestroy(&ds
->ds_selq
);
458 pool_put(&dmio_state_pool
, ds
);
462 TAILQ_INSERT_TAIL(&ds
->ds_complete
, dus
, dus_q
);
463 if (ds
->ds_flags
& DMIO_STATE_READ_WAIT
) {
464 ds
->ds_flags
&= ~DMIO_STATE_READ_WAIT
;
465 wakeup(&ds
->ds_complete
);
467 if (ds
->ds_flags
& DMIO_STATE_SEL
) {
468 ds
->ds_flags
&= ~DMIO_STATE_SEL
;
469 selnotify(&ds
->ds_selq
, POLLOUT
| POLLWRNORM
, 0);
472 simple_unlock(&ds
->ds_slock
);
481 dmio_write(struct file
*fp
, off_t
*offp
, struct uio
*uio
,
482 kauth_cred_t cred
, int flags
)
484 struct dmio_state
*ds
= (struct dmio_state
*) fp
->f_data
;
485 struct dmio_usrreq_state
*dus
;
486 struct dmover_request
*dreq
;
487 struct dmio_usrreq req
;
488 int error
= 0, s
, progress
= 0;
490 if ((uio
->uio_resid
% sizeof(req
)) != 0)
493 if (ds
->ds_session
== NULL
)
496 getnanotime(&ds
->ds_mtime
);
498 simple_lock(&ds
->ds_slock
);
500 while (uio
->uio_resid
!= 0) {
502 if (ds
->ds_nreqs
== DMIO_NREQS_MAX
) {
503 if (fp
->f_flag
& FNONBLOCK
) {
504 error
= progress
? 0 : EWOULDBLOCK
;
507 ds
->ds_flags
|= DMIO_STATE_WRITE_WAIT
;
508 error
= ltsleep(&ds
->ds_nreqs
, PRIBIO
| PCATCH
,
509 "dmiowr", 0, &ds
->ds_slock
);
517 simple_unlock(&ds
->ds_slock
);
522 error
= uiomove(&req
, sizeof(req
), uio
);
525 simple_lock(&ds
->ds_slock
);
530 /* XXX How should this interact with FNONBLOCK? */
531 dreq
= dmover_request_alloc(ds
->ds_session
, NULL
);
535 simple_lock(&ds
->ds_slock
);
541 dus
= pool_get(&dmio_usrreq_state_pool
, PR_WAITOK
);
544 error
= dmio_usrreq_init(fp
, dus
, &req
, dreq
);
546 dmover_request_free(dreq
);
548 pool_put(&dmio_usrreq_state_pool
, dus
);
549 simple_lock(&ds
->ds_slock
);
553 dreq
->dreq_callback
= dmio_usrreq_done
;
554 dreq
->dreq_cookie
= dus
;
557 dus
->dus_id
= req
.req_id
;
560 simple_lock(&ds
->ds_slock
);
562 TAILQ_INSERT_TAIL(&ds
->ds_pending
, dus
, dus_q
);
564 simple_unlock(&ds
->ds_slock
);
567 dmover_process(dreq
);
570 simple_lock(&ds
->ds_slock
);
573 simple_unlock(&ds
->ds_slock
);
580 dmio_stat(struct file
*fp
, struct stat
*st
)
582 struct dmio_state
*ds
= fp
->f_data
;
584 (void)memset(st
, 0, sizeof(st
));
585 KERNEL_LOCK(1, NULL
);
586 st
->st_dev
= makedev(cdevsw_lookup_major(&dmoverio_cdevsw
), 0);
587 st
->st_atimespec
= ds
->ds_atime
;
588 st
->st_mtimespec
= ds
->ds_mtime
;
589 st
->st_ctimespec
= st
->st_birthtimespec
= ds
->ds_btime
;
590 st
->st_uid
= kauth_cred_geteuid(fp
->f_cred
);
591 st
->st_gid
= kauth_cred_getegid(fp
->f_cred
);
592 KERNEL_UNLOCK_ONE(NULL
);
602 dmio_ioctl(struct file
*fp
, u_long cmd
, void *data
)
604 struct dmio_state
*ds
= (struct dmio_state
*) fp
->f_data
;
614 struct dmio_setfunc
*dsf
= data
;
615 struct dmover_session
*dses
;
618 simple_lock(&ds
->ds_slock
);
620 if (ds
->ds_session
!= NULL
||
621 (ds
->ds_flags
& DMIO_STATE_LARVAL
) != 0) {
622 simple_unlock(&ds
->ds_slock
);
627 ds
->ds_flags
|= DMIO_STATE_LARVAL
;
629 simple_unlock(&ds
->ds_slock
);
632 dsf
->dsf_name
[DMIO_MAX_FUNCNAME
- 1] = '\0';
633 error
= dmover_session_create(dsf
->dsf_name
, &dses
);
636 simple_lock(&ds
->ds_slock
);
639 dses
->dses_cookie
= ds
;
640 ds
->ds_session
= dses
;
642 ds
->ds_flags
&= ~DMIO_STATE_LARVAL
;
644 simple_unlock(&ds
->ds_slock
);
662 dmio_poll(struct file
*fp
, int events
)
664 struct dmio_state
*ds
= (struct dmio_state
*) fp
->f_data
;
667 if ((events
& (POLLIN
| POLLRDNORM
| POLLOUT
| POLLWRNORM
)) == 0)
671 simple_lock(&ds
->ds_slock
);
673 if (ds
->ds_flags
& DMIO_STATE_DEAD
) {
675 revents
|= events
& (POLLIN
| POLLRDNORM
|
676 POLLOUT
| POLLWRNORM
);
680 /* We can read if there are completed requests. */
681 if (events
& (POLLIN
| POLLRDNORM
))
682 if (TAILQ_EMPTY(&ds
->ds_complete
) == 0)
683 revents
|= events
& (POLLIN
| POLLRDNORM
);
686 * We can write if there is there are fewer then DMIO_NREQS_MAX
687 * are already in the queue.
689 if (events
& (POLLOUT
| POLLWRNORM
))
690 if (ds
->ds_nreqs
< DMIO_NREQS_MAX
)
691 revents
|= events
& (POLLOUT
| POLLWRNORM
);
694 selrecord(curlwp
, &ds
->ds_selq
);
695 ds
->ds_flags
|= DMIO_STATE_SEL
;
699 simple_unlock(&ds
->ds_slock
);
711 dmio_close(struct file
*fp
)
713 struct dmio_state
*ds
= (struct dmio_state
*) fp
->f_data
;
714 struct dmio_usrreq_state
*dus
;
715 struct dmover_session
*dses
;
719 simple_lock(&ds
->ds_slock
);
721 ds
->ds_flags
|= DMIO_STATE_DEAD
;
723 /* Garbage-collect all the responses on the queue. */
724 while ((dus
= TAILQ_FIRST(&ds
->ds_complete
)) != NULL
) {
725 TAILQ_REMOVE(&ds
->ds_complete
, dus
, dus_q
);
727 dmover_request_free(dus
->dus_req
);
728 dmio_usrreq_fini(ds
, dus
);
732 * If there are any requests pending, we have to wait for
733 * them. Don't free the dmio_state in this case.
735 if (ds
->ds_nreqs
== 0) {
736 dses
= ds
->ds_session
;
737 simple_unlock(&ds
->ds_slock
);
738 seldestroy(&ds
->ds_selq
);
739 pool_put(&dmio_state_pool
, ds
);
742 simple_unlock(&ds
->ds_slock
);
750 dmover_session_destroy(dses
);
755 static const struct fileops dmio_fileops
= {
756 .fo_read
= dmio_read
,
757 .fo_write
= dmio_write
,
758 .fo_ioctl
= dmio_ioctl
,
759 .fo_fcntl
= fnullop_fcntl
,
760 .fo_poll
= dmio_poll
,
761 .fo_stat
= dmio_stat
,
762 .fo_close
= dmio_close
,
763 .fo_kqfilter
= fnullop_kqfilter
,
764 .fo_restart
= fnullop_restart
,
770 * Device switch open routine.
773 dmoverioopen(dev_t dev
, int flag
, int mode
, struct lwp
*l
)
775 struct dmio_state
*ds
;
779 /* falloc() will use the descriptor for us. */
780 if ((error
= fd_allocfile(&fp
, &fd
)) != 0)
784 ds
= pool_get(&dmio_state_pool
, PR_WAITOK
);
786 getnanotime(&ds
->ds_btime
);
787 ds
->ds_atime
= ds
->ds_mtime
= ds
->ds_btime
;
789 memset(ds
, 0, sizeof(*ds
));
790 simple_lock_init(&ds
->ds_slock
);
791 TAILQ_INIT(&ds
->ds_pending
);
792 TAILQ_INIT(&ds
->ds_complete
);
793 selinit(&ds
->ds_selq
);
795 return fd_clone(fp
, fd
, flag
, &dmio_fileops
, ds
);