1 /* $NetBSD: coda_psdev.c,v 1.45.12.2 2008/12/30 18:50:25 christos Exp $ */
5 * Coda: an Experimental Distributed File System
8 * Copyright (c) 1987-1998 Carnegie Mellon University
11 * Permission to use, copy, modify and distribute this software and its
12 * documentation is hereby granted, provided that both the copyright
13 * notice and this permission notice appear in all copies of the
14 * software, derivative works or modified versions, and any portions
15 * thereof, and that both notices appear in supporting documentation, and
16 * that credit is given to Carnegie Mellon University in all documents
17 * and publicity pertaining to direct or indirect use of this code or its
20 * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
21 * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
22 * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
23 * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
24 * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
25 * ANY DERIVATIVE WORK.
27 * Carnegie Mellon encourages users of this software to return any
28 * improvements or extensions that they make, and to grant Carnegie
29 * Mellon the rights to redistribute these changes without encumbrance.
31 * @(#) coda/coda_psdev.c,v 1.1.1.1 1998/08/29 21:26:45 rvb Exp $
35 * Mach Operating System
36 * Copyright (c) 1989 Carnegie-Mellon University
37 * All rights reserved. The CMU software License Agreement specifies
38 * the terms and conditions for use and redistribution.
42 * This code was written for the Coda file system at Carnegie Mellon
43 * University. Contributers include David Steere, James Kistler, and
44 * M. Satyanarayanan. */
46 /* These routines define the pseudo device for communication between
47 * Coda's Venus and Minicache in Mach 2.6. They used to be in cfs_subr.c,
48 * but I moved them to make it easier to port the Minicache without
49 * porting coda. -- DCS 10/12/94
51 * Following code depends on file-system CODA.
54 /* These routines are the device entry points for Venus. */
56 #include <sys/cdefs.h>
57 __KERNEL_RCSID(0, "$NetBSD: coda_psdev.c,v 1.45.12.2 2008/12/30 18:50:25 christos Exp $");
59 extern int coda_nc_initialized
; /* Set if cache has been initialized */
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/kernel.h>
70 #include <sys/malloc.h>
72 #include <sys/mount.h>
74 #include <sys/ioctl.h>
76 #include <sys/select.h>
78 #include <sys/atomic.h>
80 #include <miscfs/syncfs/syncfs.h>
82 #include <coda/coda.h>
83 #include <coda/cnode.h>
84 #include <coda/coda_namecache.h>
85 #include <coda/coda_io.h>
89 int coda_psdev_print_entry
= 0;
91 int outstanding_upcalls
= 0;
92 int coda_call_sleep
= PZERO
- 1;
94 int coda_pcatch
= PCATCH
;
98 #define ENTRY if(coda_psdev_print_entry) myprintf(("Entered %s\n",__func__))
100 void vcodaattach(int n
);
102 dev_type_open(vc_nb_open
);
103 dev_type_close(vc_nb_close
);
104 dev_type_read(vc_nb_read
);
105 dev_type_write(vc_nb_write
);
106 dev_type_ioctl(vc_nb_ioctl
);
107 dev_type_poll(vc_nb_poll
);
108 dev_type_kqfilter(vc_nb_kqfilter
);
110 const struct cdevsw vcoda_cdevsw
= {
111 vc_nb_open
, vc_nb_close
, vc_nb_read
, vc_nb_write
, vc_nb_ioctl
,
112 nostop
, notty
, vc_nb_poll
, nommap
, vc_nb_kqfilter
, D_OTHER
,
116 TAILQ_ENTRY(vmsg
) vm_chain
;
119 u_short vm_inSize
; /* Size is at most 5000 bytes */
121 u_short vm_opcode
; /* copied from data to save ptr lookup */
123 void * vm_sleep
; /* Not used by Mach. */
130 /* vcodaattach: do nothing */
137 * These functions are written for NetBSD.
140 vc_nb_open(dev_t dev
, int flag
, int mode
,
147 if (minor(dev
) >= NVCODA
)
150 if (!coda_nc_initialized
)
153 vcp
= &coda_mnttbl
[minor(dev
)].mi_vcomm
;
157 selinit(&vcp
->vc_selproc
);
158 TAILQ_INIT(&vcp
->vc_requests
);
159 TAILQ_INIT(&vcp
->vc_replies
);
162 coda_mnttbl
[minor(dev
)].mi_vfsp
= NULL
;
163 coda_mnttbl
[minor(dev
)].mi_rootvp
= NULL
;
169 vc_nb_close(dev_t dev
, int flag
, int mode
, struct lwp
*l
)
173 struct coda_mntinfo
*mi
;
178 if (minor(dev
) >= NVCODA
)
181 mi
= &coda_mnttbl
[minor(dev
)];
182 vcp
= &(mi
->mi_vcomm
);
185 panic("vcclose: not open");
187 /* prevent future operations on this vfs from succeeding by auto-
188 * unmounting any vfs mounted via this device. This frees user or
189 * sysadm from having to remember where all mount points are located.
190 * Put this before WAKEUPs to avoid queuing new messages between
191 * the WAKEUP and the unmount (which can happen if we're unlucky)
193 if (!mi
->mi_rootvp
) {
194 /* just a simple open/close w no mount */
199 /* Let unmount know this is for real */
200 atomic_inc_uint(&mi
->mi_vfsp
->mnt_refcnt
);
201 VTOC(mi
->mi_rootvp
)->c_flags
|= C_UNMOUNTING
;
202 coda_unmounting(mi
->mi_vfsp
);
204 /* Wakeup clients so they can return. */
205 while ((vmp
= TAILQ_FIRST(&vcp
->vc_requests
)) != NULL
) {
206 TAILQ_REMOVE(&vcp
->vc_requests
, vmp
, vm_chain
);
208 /* Free signal request messages and don't wakeup cause
209 no one is waiting. */
210 if (vmp
->vm_opcode
== CODA_SIGNAL
) {
211 CODA_FREE(vmp
->vm_data
, VC_IN_NO_DATA
);
212 CODA_FREE(vmp
, sizeof(struct vmsg
));
215 outstanding_upcalls
++;
216 wakeup(&vmp
->vm_sleep
);
219 while ((vmp
= TAILQ_FIRST(&vcp
->vc_replies
)) != NULL
) {
220 TAILQ_REMOVE(&vcp
->vc_replies
, vmp
, vm_chain
);
222 outstanding_upcalls
++;
223 wakeup(&vmp
->vm_sleep
);
228 if (outstanding_upcalls
) {
230 printf("presleep: outstanding_upcalls = %d\n", outstanding_upcalls
);
231 (void) tsleep(&outstanding_upcalls
, coda_call_sleep
, "coda_umount", 0);
232 printf("postsleep: outstanding_upcalls = %d\n", outstanding_upcalls
);
234 (void) tsleep(&outstanding_upcalls
, coda_call_sleep
, "coda_umount", 0);
238 err
= dounmount(mi
->mi_vfsp
, flag
, l
);
240 myprintf(("Error %d unmounting vfs in vcclose(%llu)\n",
241 err
, (unsigned long long)minor(dev
)));
242 seldestroy(&vcp
->vc_selproc
);
247 vc_nb_read(dev_t dev
, struct uio
*uiop
, int flag
)
255 if (minor(dev
) >= NVCODA
)
258 vcp
= &coda_mnttbl
[minor(dev
)].mi_vcomm
;
260 /* Get message at head of request queue. */
261 vmp
= TAILQ_FIRST(&vcp
->vc_requests
);
263 return(0); /* Nothing to read */
265 /* Move the input args into userspace */
266 uiop
->uio_rw
= UIO_READ
;
267 error
= uiomove(vmp
->vm_data
, vmp
->vm_inSize
, uiop
);
269 myprintf(("vcread: error (%d) on uiomove\n", error
));
273 TAILQ_REMOVE(&vcp
->vc_requests
, vmp
, vm_chain
);
275 /* If request was a signal, free up the message and don't
276 enqueue it in the reply queue. */
277 if (vmp
->vm_opcode
== CODA_SIGNAL
) {
279 myprintf(("vcread: signal msg (%d, %d)\n",
280 vmp
->vm_opcode
, vmp
->vm_unique
));
281 CODA_FREE(vmp
->vm_data
, VC_IN_NO_DATA
);
282 CODA_FREE(vmp
, sizeof(struct vmsg
));
286 vmp
->vm_flags
|= VM_READ
;
287 TAILQ_INSERT_TAIL(&vcp
->vc_replies
, vmp
, vm_chain
);
293 vc_nb_write(dev_t dev
, struct uio
*uiop
, int flag
)
297 struct coda_out_hdr
*out
;
305 if (minor(dev
) >= NVCODA
)
308 vcp
= &coda_mnttbl
[minor(dev
)].mi_vcomm
;
310 /* Peek at the opcode, unique without transfering the data. */
311 uiop
->uio_rw
= UIO_WRITE
;
312 error
= uiomove(tbuf
, sizeof(int) * 2, uiop
);
314 myprintf(("vcwrite: error (%d) on uiomove\n", error
));
322 myprintf(("vcwrite got a call for %ld.%ld\n", opcode
, seq
));
324 if (DOWNCALL(opcode
)) {
325 union outputArgs pbuf
;
327 /* get the rest of the data. */
328 uiop
->uio_rw
= UIO_WRITE
;
329 error
= uiomove(&pbuf
.coda_purgeuser
.oh
.result
, sizeof(pbuf
) - (sizeof(int)*2), uiop
);
331 myprintf(("vcwrite: error (%d) on uiomove (Op %ld seq %ld)\n",
332 error
, opcode
, seq
));
336 return handleDownCall(opcode
, &pbuf
);
339 /* Look for the message on the (waiting for) reply queue. */
340 TAILQ_FOREACH(vmp
, &vcp
->vc_replies
, vm_chain
) {
341 if (vmp
->vm_unique
== seq
) break;
346 myprintf(("vcwrite: msg (%ld, %ld) not found\n", opcode
, seq
));
351 /* Remove the message from the reply queue */
352 TAILQ_REMOVE(&vcp
->vc_replies
, vmp
, vm_chain
);
354 /* move data into response buffer. */
355 out
= (struct coda_out_hdr
*)vmp
->vm_data
;
356 /* Don't need to copy opcode and uniquifier. */
358 /* get the rest of the data. */
359 if (vmp
->vm_outSize
< uiop
->uio_resid
) {
360 myprintf(("vcwrite: more data than asked for (%d < %lu)\n",
361 vmp
->vm_outSize
, (unsigned long) uiop
->uio_resid
));
362 wakeup(&vmp
->vm_sleep
); /* Notify caller of the error. */
366 tbuf
[0] = uiop
->uio_resid
; /* Save this value. */
367 uiop
->uio_rw
= UIO_WRITE
;
368 error
= uiomove(&out
->result
, vmp
->vm_outSize
- (sizeof(int) * 2), uiop
);
370 myprintf(("vcwrite: error (%d) on uiomove (op %ld seq %ld)\n",
371 error
, opcode
, seq
));
375 /* I don't think these are used, but just in case. */
376 /* XXX - aren't these two already correct? -bnoble */
377 out
->opcode
= opcode
;
379 vmp
->vm_outSize
= tbuf
[0]; /* Amount of data transferred? */
380 vmp
->vm_flags
|= VM_WRITE
;
381 wakeup(&vmp
->vm_sleep
);
387 vc_nb_ioctl(dev_t dev
, u_long cmd
, void *addr
, int flag
,
394 struct coda_resize
*data
= (struct coda_resize
*)addr
;
395 return(coda_nc_resize(data
->hashsize
, data
->heapsize
, IS_DOWNCALL
));
400 coda_nc_gather_stats();
414 case CIOC_KERNEL_VERSION
:
415 switch (*(u_int
*)addr
) {
417 *(u_int
*)addr
= coda_kernel_version
;
422 if (coda_kernel_version
!= *(u_int
*)addr
)
437 vc_nb_poll(dev_t dev
, int events
, struct lwp
*l
)
444 if (minor(dev
) >= NVCODA
)
447 vcp
= &coda_mnttbl
[minor(dev
)].mi_vcomm
;
449 event_msk
= events
& (POLLIN
|POLLRDNORM
);
453 if (!TAILQ_EMPTY(&vcp
->vc_requests
))
454 return(events
& (POLLIN
|POLLRDNORM
));
456 selrecord(l
, &(vcp
->vc_selproc
));
462 filt_vc_nb_detach(struct knote
*kn
)
464 struct vcomm
*vcp
= kn
->kn_hook
;
466 SLIST_REMOVE(&vcp
->vc_selproc
.sel_klist
, kn
, knote
, kn_selnext
);
470 filt_vc_nb_read(struct knote
*kn
, long hint
)
472 struct vcomm
*vcp
= kn
->kn_hook
;
475 vmp
= TAILQ_FIRST(&vcp
->vc_requests
);
479 kn
->kn_data
= vmp
->vm_inSize
;
483 static const struct filterops vc_nb_read_filtops
=
484 { 1, NULL
, filt_vc_nb_detach
, filt_vc_nb_read
};
487 vc_nb_kqfilter(dev_t dev
, struct knote
*kn
)
494 if (minor(dev
) >= NVCODA
)
497 vcp
= &coda_mnttbl
[minor(dev
)].mi_vcomm
;
499 switch (kn
->kn_filter
) {
501 klist
= &vcp
->vc_selproc
.sel_klist
;
502 kn
->kn_fop
= &vc_nb_read_filtops
;
511 SLIST_INSERT_HEAD(klist
, kn
, kn_selnext
);
519 struct coda_clstat coda_clstat
;
522 * Key question: whether to sleep interruptably or uninterruptably when
523 * waiting for Venus. The former seems better (cause you can ^C a
524 * job), but then GNU-EMACS completion breaks. Use tsleep with no
525 * timeout, and no longjmp happens. But, when sleeping
526 * "uninterruptibly", we don't get told if it returns abnormally
531 coda_call(struct coda_mntinfo
*mntinfo
, int inSize
, int *outSize
,
538 struct lwp
*l
= curlwp
;
539 struct proc
*p
= l
->l_proc
;
542 psig_omask
= l
->l_sigmask
; /* XXXSA */
544 if (mntinfo
== NULL
) {
545 /* Unlikely, but could be a race condition with a dying warden */
549 vcp
= &(mntinfo
->mi_vcomm
);
551 coda_clstat
.ncalls
++;
552 coda_clstat
.reqs
[((struct coda_in_hdr
*)buffer
)->opcode
]++;
557 CODA_ALLOC(vmp
,struct vmsg
*,sizeof(struct vmsg
));
558 /* Format the request message. */
559 vmp
->vm_data
= buffer
;
561 vmp
->vm_inSize
= inSize
;
563 = *outSize
? *outSize
: inSize
; /* |buffer| >= inSize */
564 vmp
->vm_opcode
= ((struct coda_in_hdr
*)buffer
)->opcode
;
565 vmp
->vm_unique
= ++vcp
->vc_seq
;
567 myprintf(("Doing a call for %d.%d\n",
568 vmp
->vm_opcode
, vmp
->vm_unique
));
570 /* Fill in the common input args. */
571 ((struct coda_in_hdr
*)buffer
)->unique
= vmp
->vm_unique
;
573 /* Append msg to request queue and poke Venus. */
574 TAILQ_INSERT_TAIL(&vcp
->vc_requests
, vmp
, vm_chain
);
575 selnotify(&(vcp
->vc_selproc
), 0, 0);
577 /* We can be interrupted while we wait for Venus to process
578 * our request. If the interrupt occurs before Venus has read
579 * the request, we dequeue and return. If it occurs after the
580 * read but before the reply, we dequeue, send a signal
581 * message, and return. If it occurs after the reply we ignore
582 * it. In no case do we want to restart the syscall. If it
583 * was interrupted by a venus shutdown (vcclose), return
586 /* Ignore return, We have to check anyway */
588 /* This is work in progress. Setting coda_pcatch lets tsleep reawaken
589 on a ^c or ^z. The problem is that emacs sets certain interrupts
590 as SA_RESTART. This means that we should exit sleep handle the
591 "signal" and then go to sleep again. Mostly this is done by letting
592 the syscall complete and be restarted. We are not idempotent and
593 can not do this. A better solution is necessary.
597 error
= tsleep(&vmp
->vm_sleep
, (coda_call_sleep
|coda_pcatch
), "coda_call", hz
*2);
600 mutex_enter(p
->p_lock
);
601 if (error
== EWOULDBLOCK
) {
603 printf("coda_call: tsleep TIMEOUT %d sec\n", 2+2*i
);
605 } else if (sigispending(l
, SIGIO
)) {
606 sigaddset(&l
->l_sigmask
, SIGIO
);
608 printf("coda_call: tsleep returns %d SIGIO, cnt %d\n", error
, i
);
610 } else if (sigispending(l
, SIGALRM
)) {
611 sigaddset(&l
->l_sigmask
, SIGALRM
);
613 printf("coda_call: tsleep returns %d SIGALRM, cnt %d\n", error
, i
);
617 tmp
= p
->p_sigpend
.sp_set
; /* array assignment */
618 sigminusset(&l
->l_sigmask
, &tmp
);
621 printf("coda_call: tsleep returns %d, cnt %d\n", error
, i
);
622 printf("coda_call: siglist = %x.%x.%x.%x, sigmask = %x.%x.%x.%x, mask %x.%x.%x.%x\n",
623 p
->p_sigpend
.sp_set
.__bits
[0], p
->p_sigpend
.sp_set
.__bits
[1],
624 p
->p_sigpend
.sp_set
.__bits
[2], p
->p_sigpend
.sp_set
.__bits
[3],
625 l
->l_sigmask
.__bits
[0], l
->l_sigmask
.__bits
[1],
626 l
->l_sigmask
.__bits
[2], l
->l_sigmask
.__bits
[3],
627 tmp
.__bits
[0], tmp
.__bits
[1], tmp
.__bits
[2], tmp
.__bits
[3]);
629 mutex_exit(p
->p_lock
);
632 sigminusset(&l
->l_sigmask
, &p
->p_sigpend
.sp_set
);
633 printf("coda_call: siglist = %x.%x.%x.%x, sigmask = %x.%x.%x.%x\n",
634 p
->p_sigpend
.sp_set
.__bits
[0], p
->p_sigpend
.sp_set
.__bits
[1],
635 p
->p_sigpend
.sp_set
.__bits
[2], p
->p_sigpend
.sp_set
.__bits
[3],
636 l
->l_sigmask
.__bits
[0], l
->l_sigmask
.__bits
[1],
637 l
->l_sigmask
.__bits
[2], l
->l_sigmask
.__bits
[3]);
640 mutex_exit(p
->p_lock
);
641 } while (error
&& i
++ < 128 && VC_OPEN(vcp
));
642 l
->l_sigmask
= psig_omask
; /* XXXSA */
644 (void) tsleep(&vmp
->vm_sleep
, coda_call_sleep
, "coda_call", 0);
646 if (VC_OPEN(vcp
)) { /* Venus is still alive */
647 /* Op went through, interrupt or not... */
648 if (vmp
->vm_flags
& VM_WRITE
) {
650 *outSize
= vmp
->vm_outSize
;
653 else if (!(vmp
->vm_flags
& VM_READ
)) {
654 /* Interrupted before venus read it. */
660 myprintf(("interrupted before read: op = %d.%d, flags = %x\n",
661 vmp
->vm_opcode
, vmp
->vm_unique
, vmp
->vm_flags
));
663 TAILQ_REMOVE(&vcp
->vc_requests
, vmp
, vm_chain
);
668 /* (!(vmp->vm_flags & VM_WRITE)) means interrupted after
670 /* Interrupted after start of upcall, send venus a signal */
671 struct coda_in_hdr
*dog
;
679 myprintf(("Sending Venus a signal: op = %d.%d, flags = %x\n",
680 vmp
->vm_opcode
, vmp
->vm_unique
, vmp
->vm_flags
));
682 TAILQ_REMOVE(&vcp
->vc_replies
, vmp
, vm_chain
);
685 CODA_ALLOC(svmp
, struct vmsg
*, sizeof (struct vmsg
));
687 CODA_ALLOC((svmp
->vm_data
), char *, sizeof (struct coda_in_hdr
));
688 dog
= (struct coda_in_hdr
*)svmp
->vm_data
;
691 dog
->opcode
= svmp
->vm_opcode
= CODA_SIGNAL
;
692 dog
->unique
= svmp
->vm_unique
= vmp
->vm_unique
;
693 svmp
->vm_inSize
= sizeof (struct coda_in_hdr
);
694 /*??? rvb */ svmp
->vm_outSize
= sizeof (struct coda_in_hdr
);
697 myprintf(("coda_call: enqueing signal msg (%d, %d)\n",
698 svmp
->vm_opcode
, svmp
->vm_unique
));
700 /* insert at head of queue */
701 TAILQ_INSERT_HEAD(&vcp
->vc_requests
, svmp
, vm_chain
);
702 selnotify(&(vcp
->vc_selproc
), 0, 0);
706 else { /* If venus died (!VC_OPEN(vcp)) */
708 myprintf(("vcclose woke op %d.%d flags %d\n",
709 vmp
->vm_opcode
, vmp
->vm_unique
, vmp
->vm_flags
));
714 CODA_FREE(vmp
, sizeof(struct vmsg
));
716 if (outstanding_upcalls
> 0 && (--outstanding_upcalls
== 0))
717 wakeup(&outstanding_upcalls
);
720 error
= ((struct coda_out_hdr
*)buffer
)->result
;