Merge 1.8.0~pre4 packaging into master
[pkg-k5-afs_openafs.git] / src / afs / HPUX / osi_vnodeops.c
blobbce2441da47e23b5fc7fdc53ca71b1a161a75ea0
1 /*
2 * Copyright 2000, International Business Machines Corporation and others.
3 * All Rights Reserved.
4 *
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
8 */
10 /* This is a placeholder for routines unique to the port of AFS to hp-ux*/
12 #include <afsconfig.h>
13 #include "afs/param.h"
16 #include "afs/sysincludes.h" /* Standard vendor system headers */
17 #include "afsincludes.h" /* Afs-based standard headers */
18 #include "afs/afs_stats.h" /* statistics stuff */
20 #include <sys/uio.h>
21 #include <sys/vfs.h>
22 #include <sys/mount.h>
23 #include <sys/vnode.h>
24 #include <sys/pathname.h>
26 extern struct vfsops Afs_vfsops;
27 extern int afs_hp_strategy();
28 extern int afs_bmap(), afs_badop(), afs_noop(), afs_lockf();
29 extern int afs_pagein();
30 extern int afs_pageout();
31 extern int afs_ioctl();
32 extern int afs_prealloc();
33 extern int afs_mapdbd();
34 extern int afs_mmap();
35 extern int afs_cachelimit();
36 extern int afs_vm_checkpage();
37 extern int afs_vm_fscontiguous();
38 extern int afs_vm_stopio();
39 extern int afs_read_ahead();
40 extern int afs_unmap();
41 extern int afs_release();
42 extern int afs_swapfs_len();
43 extern int afs_readdir2();
44 extern int afs_readdir();
45 extern int afs_readdir3();
46 extern int afs_pathconf();
47 extern int afs_close();
49 #define vtoblksz(vp) ((vp)->v_vfsp->vfs_bsize)
51 #if defined(AFS_HPUX110_ENV)
52 /* We no longer need to lock on the VM Empire,
53 * or at least that is what is claimed.
54 * so we will noopt the vmemp_ routines
55 * This needs to be looked at closer.
57 #define vmemp_lockx()
58 #undef vmemp_returnx
59 #define vmemp_returnx(a) return(a)
60 #define vmemp_unlockx()
61 #endif
63 #if !defined(AFS_HPUX110_ENV)
65 * Copy an mbuf to the contiguous area pointed to by cp.
66 * Skip <off> bytes and copy <len> bytes.
67 * Returns the number of bytes not transferred.
68 * The mbuf is NOT changed.
70 int
71 m_cpytoc(m, off, len, cp)
72 struct mbuf *m;
73 int off, len;
74 caddr_t cp;
76 int ml;
78 if (m == NULL || off < 0 || len < 0 || cp == NULL)
79 osi_Panic("m_cpytoc");
80 while (off && m)
81 if (m->m_len <= off) {
82 off -= m->m_len;
83 m = m->m_next;
84 continue;
85 } else
86 break;
87 if (m == NULL)
88 return (len);
90 ml = MIN(len, m->m_len - off);
91 memcpy(cp, mtod(m, caddr_t) + off, (u_int) ml);
92 cp += ml;
93 len -= ml;
94 m = m->m_next;
96 while (len && m) {
97 ml = m->m_len;
98 memcpy(cp, mtod(m, caddr_t), (u_int) ml);
99 cp += ml;
100 len -= ml;
101 m = m->m_next;
104 return (len);
106 #endif
109 * Note that the standard Sun vnode interface doesn't haven't an vop_lockf(), so this code is
110 * totally new. This came about because HP-UX has lockf() implemented as
111 * a system call while Sun has it implemented as a library (apparently).
112 * To handle this, we have to translate the lockf() request into an
113 * fcntl() looking request, and then translate the results back if necessary.
114 * we call afs_lockctl() directly .
116 afs_lockf(vp, flag, len, cred, fp, LB, UB)
117 struct vnode *vp;
118 int flag;
119 afs_ucred_t *cred;
120 struct file *fp;
121 k_off_t len, LB, UB;
123 /*for now, just pretend it works */
124 struct k_flock flock;
125 int cmd, code;
128 * Create a flock structure and translate the lockf request
129 * into an appropriate looking fcntl() type request for afs_lockctl()
131 flock.l_whence = 0;
132 flock.l_len = len;
133 flock.l_start = fp->f_offset;
134 /* convert negative lengths to positive */
135 if (flock.l_len < 0) {
136 flock.l_start += flock.l_len;
137 flock.l_len = -(flock.l_len);
140 * Adjust values to look like fcntl() requests.
141 * All locks are write locks, only F_LOCK requests
142 * are blocking. F_TEST has to be translated into
143 * a get lock and then back again.
145 flock.l_type = F_WRLCK;
146 cmd = F_SETLK;
147 switch (flag) {
148 case F_ULOCK:
149 flock.l_type = F_UNLCK;
150 break;
151 case F_LOCK:
152 cmd = F_SETLKW;
153 break;
154 case F_TEST:
155 cmd = F_GETLK;
156 break;
158 u.u_error = mp_afs_lockctl(vp, &flock, cmd, fp->f_cred);
159 if (u.u_error) {
160 return (u.u_error); /* some other error code */
163 * if request is F_TEST, and GETLK changed
164 * the lock type to ULOCK, then return 0, else
165 * set errno to EACCESS and return.
167 if (flag == F_TEST && flock.l_type != F_UNLCK) {
168 u.u_error = EACCES;
169 return (u.u_error);
171 return (0);
175 #if defined(AFS_HPUX1122_ENV)
176 #include "machine/vm/vmparam.h"
177 #else
178 #include "../machine/vmparam.h" /* For KERNELSPACE */
179 #endif
180 #include "h/debug.h"
181 #include "h/types.h"
182 #if !defined(AFS_HPUX1123_ENV)
183 /* 11.23 is using 64 bit in many cases */
184 #define kern_daddr_t daddr_t
185 #endif
186 #include "h/param.h"
187 #include "h/vmmac.h"
188 #include "h/time.h"
189 #include "ufs/inode.h"
190 #include "ufs/fs.h"
191 #include "h/dbd.h"
192 #if defined(AFS_HPUX1123_ENV)
193 dbd_t *finddbd();
194 #endif /* AFS_HPUX1123_ENV */
195 #include "h/vfd.h"
196 #include "h/region.h"
197 #include "h/pregion.h"
198 #include "h/vmmeter.h"
199 #include "h/user.h"
200 #include "h/sysinfo.h"
201 #include "h/pfdat.h"
202 #if !defined(AFS_HPUX1123_ENV)
203 #include "h/tuneable.h"
204 #endif
205 #include "h/buf.h"
206 #include "netinet/in.h"
208 /* a freelist of one */
209 struct buf *afs_bread_freebp = 0;
212 * Only rfs_read calls this, and it only looks at bp->b_un.b_addr.
213 * Thus we can use fake bufs (ie not from the real buffer pool).
215 afs_bread(vp, lbn, bpp)
216 struct vnode *vp;
217 kern_daddr_t lbn;
218 struct buf **bpp;
220 int offset, fsbsize, error;
221 struct buf *bp;
222 struct iovec iov;
223 struct uio uio;
225 memset(&uio, 0, sizeof(uio));
226 memset(&iov, 0, sizeof(iov));
228 AFS_STATCNT(afs_bread);
229 fsbsize = vp->v_vfsp->vfs_bsize;
230 offset = lbn * fsbsize;
231 if (afs_bread_freebp) {
232 bp = afs_bread_freebp;
233 afs_bread_freebp = 0;
234 } else {
235 bp = (struct buf *)AFS_KALLOC(sizeof(*bp));
236 bp->b_un.b_addr = (caddr_t) AFS_KALLOC(fsbsize);
239 iov.iov_base = bp->b_un.b_addr;
240 iov.iov_len = fsbsize;
241 uio.afsio_iov = &iov;
242 uio.afsio_iovcnt = 1;
243 uio.afsio_seg = AFS_UIOSYS;
244 uio.afsio_offset = offset;
245 uio.afsio_resid = fsbsize;
246 uio.uio_fpflags = 0;
247 *bpp = 0;
249 error = afs_read(VTOAFS(vp), &uio, p_cred(u.u_procp), 0);
250 if (error) {
251 afs_bread_freebp = bp;
252 return error;
254 if (*bpp) {
255 afs_bread_freebp = bp;
256 } else {
257 *(struct buf **)&bp->b_vp = bp; /* mark as fake */
258 *bpp = bp;
260 return 0;
263 afs_brelse(vp, bp)
264 struct vnode *vp;
265 struct buf *bp;
267 AFS_STATCNT(afs_brelse);
269 if ((struct buf *)bp->b_vp != bp) { /* not fake */
270 ufs_brelse(bp->b_vp, bp);
271 } else if (afs_bread_freebp) {
272 AFS_KFREE(bp->b_un.b_addr, vp->v_vfsp->vfs_bsize);
273 AFS_KFREE(bp, sizeof(*bp));
274 } else {
275 afs_bread_freebp = bp;
280 afs_bmap(avc, abn, anvp, anbn)
281 struct vcache *avc;
282 kern_daddr_t abn, *anbn;
283 struct vcache **anvp;
285 AFS_STATCNT(afs_bmap);
286 if (anvp)
287 *anvp = avc;
288 if (anbn)
289 *anbn = abn * (8192 / DEV_BSIZE); /* in 512 byte units */
290 return 0;
293 afs_inactive(avc, acred)
294 struct vcache *avc;
295 afs_ucred_t *acred;
297 struct vnode *vp = AFSTOV(avc);
298 ulong_t context;
299 lock_t *sv_lock;
300 if (afs_shuttingdown != AFS_RUNNING)
301 return;
304 * In Solaris and HPUX s800 and HP-UX10.0 they actually call us with
305 * v_count 1 on last reference!
307 MP_H_SPINLOCK_USAV(vn_h_sl_pool, vp, &sv_lock, &context);
308 if (avc->vrefCount < 1)
309 osi_Panic("afs_inactive : v_count < 1\n");
312 * If more than 1 don't unmap the vnode but do decrement the ref count
314 vp->v_count--;
315 if (vp->v_count > 0) {
316 MP_SPINUNLOCK_USAV(sv_lock, context);
317 return 0;
319 MP_SPINUNLOCK_USAV(sv_lock, context);
320 afs_InactiveVCache(avc, acred);
321 return 0;
326 mp_afs_open(struct vnode **avcp, int aflags, afs_ucred_t *acred)
328 int code;
330 AFS_GLOCK();
331 code = afs_open(avcp, aflags, acred);
332 AFS_GUNLOCK();
333 return (code);
337 mp_afs_close(struct vnode *avcp, int aflags, afs_ucred_t *acred)
339 int code;
341 AFS_GLOCK();
342 code = afs_close(avcp, aflags, acred);
343 AFS_GUNLOCK();
344 return (code);
348 mp_afs_rdwr(struct vnode *avcp, struct uio *uio, enum uio_rw arw,
349 int aio, afs_ucred_t *acred)
351 int code;
352 long save_resid;
354 AFS_GLOCK();
355 save_resid = uio->uio_resid;
356 code = afs_rdwr(avcp, uio, arw, aio, acred);
357 if (arw == UIO_WRITE && code == ENOSPC) {
358 /* HP clears code if any data written. */
359 uio->uio_resid = save_resid;
361 AFS_GUNLOCK();
362 return (code);
366 mp_afs_getattr(struct vnode *avcp, struct vattr *attrs,
367 afs_ucred_t *acred, enum vsync unused1)
369 int code;
371 AFS_GLOCK();
372 code = afs_getattr(avcp, attrs, acred);
373 AFS_GUNLOCK();
374 return (code);
378 mp_afs_setattr(struct vnode *avcp, struct vattr *attrs,
379 afs_ucred_t *acred, int unused1)
381 int code;
383 AFS_GLOCK();
384 code = afs_setattr(avcp, attrs, acred);
385 AFS_GUNLOCK();
386 return (code);
390 mp_afs_access(struct vnode *avcp, int mode, afs_ucred_t *acred)
392 int code;
394 AFS_GLOCK();
395 code = afs_access(avcp, mode, acred);
396 AFS_GUNLOCK();
397 return (code);
401 mp_afs_lookup(struct vnode *adp, char *aname,
402 struct vnode **avcp, afs_ucred_t *acred,
403 struct vnode *unused1)
405 int code;
407 AFS_GLOCK();
408 code = afs_lookup(adp, aname, avcp, acred);
409 AFS_GUNLOCK();
410 return (code);
414 mp_afs_create(struct vnode *adp, char *aname, struct vattr *attrs,
415 enum vcexcl aexcl, int amode, struct vnode **avcp,
416 afs_ucred_t *acred)
418 int code;
420 AFS_GLOCK();
421 code = afs_create(adp, aname, attrs, aexcl, amode, avcp, acred);
422 AFS_GUNLOCK();
423 return (code);
428 mp_afs_remove(struct vnode *adp, char *aname,
429 afs_ucred_t *acred)
431 int code;
433 AFS_GLOCK();
434 code = afs_remove(adp, aname, acred);
435 AFS_GUNLOCK();
436 return (code);
440 mp_afs_link(struct vnode *avc, struct vnode *adp,
441 char *aname, afs_ucred_t *acred)
443 int code;
445 AFS_GLOCK();
446 code = afs_link(avc, adp, aname, acred);
447 AFS_GUNLOCK();
448 return (code);
452 mp_afs_rename(struct vnode *aodp, char *aname1,
453 struct vnode *andp, char *aname2,
454 afs_ucred_t *acred)
456 int code;
458 AFS_GLOCK();
459 code = afs_rename(aodp, aname1, andp, aname2, acred);
460 AFS_GUNLOCK();
461 return (code);
465 mp_afs_mkdir(struct vnode *adp, char *aname, struct vattr *attrs,
466 struct vnode **avcp, afs_ucred_t *acred)
468 int code;
470 AFS_GLOCK();
471 code = afs_mkdir(adp, aname, attrs, avcp, acred);
472 AFS_GUNLOCK();
473 return (code);
478 mp_afs_rmdir(struct vnode *adp, char *aname, afs_ucred_t *acred)
480 int code;
482 AFS_GLOCK();
483 code = afs_rmdir(adp, aname, acred);
484 AFS_GUNLOCK();
485 return (code);
490 mp_afs_readdir(struct vnode *avc, struct uio *auio,
491 afs_ucred_t *acred)
493 int code;
495 AFS_GLOCK();
496 code = afs_readdir(avc, auio, acred);
497 AFS_GUNLOCK();
498 return (code);
502 mp_afs_symlink(struct vnode *adp, char *aname, struct vattr *attrs,
503 char *atargetName, afs_ucred_t *acred)
505 int code;
507 AFS_GLOCK();
508 code = afs_symlink(adp, aname, attrs, atargetName, NULL, acred);
509 AFS_GUNLOCK();
510 return (code);
515 mp_afs_readlink(struct vnode *avc, struct uio *auio,
516 afs_ucred_t *acred)
518 int code;
520 AFS_GLOCK();
521 code = afs_readlink(avc, auio, acred);
522 AFS_GUNLOCK();
523 return (code);
527 mp_afs_fsync(struct vnode *avc, afs_ucred_t *acred, int unused1)
529 int code;
531 AFS_GLOCK();
532 code = afs_fsync(avc, acred);
533 AFS_GUNLOCK();
534 return (code);
538 mp_afs_bread(struct vnode *avc, kern_daddr_t lbn, struct buf **bpp,
539 struct vattr *unused1, struct ucred *unused2)
541 int code;
543 AFS_GLOCK();
544 code = afs_bread(avc, lbn, bpp);
545 AFS_GUNLOCK();
546 return (code);
550 mp_afs_brelse(struct vnode *avc, struct buf *bp)
552 int code;
554 AFS_GLOCK();
555 code = afs_brelse(avc, bp);
556 AFS_GUNLOCK();
557 return (code);
562 mp_afs_inactive(struct vnode *avc, afs_ucred_t *acred)
564 int code;
566 AFS_GLOCK();
567 code = afs_inactive(avc, acred);
568 AFS_GUNLOCK();
569 return (code);
573 mp_afs_lockctl(struct vnode *avc, struct flock *af, int cmd,
574 afs_ucred_t *acred, struct file *unused1, off_t unused2,
575 off_t unused3)
577 int code;
579 AFS_GLOCK();
580 code = afs_lockctl(avc, af, cmd, acred);
581 AFS_GUNLOCK();
582 return (code);
586 mp_afs_fid(struct vnode *avc, struct fid **fidpp)
588 int code;
590 AFS_GLOCK();
591 code = afs_fid(avc, fidpp);
592 AFS_GUNLOCK();
593 return (code);
597 mp_afs_readdir2(struct vnode *avc, struct uio *auio,
598 afs_ucred_t *acred)
600 int code;
602 AFS_GLOCK();
603 code = afs_readdir2(avc, auio, acred);
604 AFS_GUNLOCK();
605 return (code);
609 struct vnodeops Afs_vnodeops = {
610 mp_afs_open,
611 mp_afs_close,
612 mp_afs_rdwr,
613 afs_ioctl,
614 afs_noop,
615 mp_afs_getattr,
616 mp_afs_setattr,
617 mp_afs_access,
618 mp_afs_lookup,
619 mp_afs_create,
620 mp_afs_remove,
621 mp_afs_link,
622 mp_afs_rename,
623 mp_afs_mkdir,
624 mp_afs_rmdir,
625 afs_readdir,
626 mp_afs_symlink,
627 mp_afs_readlink,
628 mp_afs_fsync,
629 mp_afs_inactive,
630 afs_bmap,
631 afs_hp_strategy,
632 #if !defined(AFS_NONFSTRANS)
633 /* on HPUX102 the nfs translator calls afs_bread but does
634 * not call afs_brelse. Hence we see a memory leak. If the
635 * VOP_BREAD() call fails, then nfs does VOP_RDWR() to get
636 * the same data : this is the path we follow now. */
637 afs_noop,
638 afs_noop,
639 #else
640 mp_afs_bread,
641 mp_afs_brelse,
642 #endif
643 afs_badop, /* pathsend */
644 afs_noop, /* setacl */
645 afs_noop, /* getacl */
646 afs_pathconf,
647 afs_pathconf,
648 mp_afs_lockctl,
649 afs_lockf, /* lockf */
650 mp_afs_fid,
651 afs_noop, /*fsctl */
652 afs_badop,
653 afs_pagein,
654 afs_pageout,
655 NULL,
656 NULL,
657 afs_prealloc,
658 afs_mapdbd,
659 afs_mmap,
660 afs_cachelimit,
661 afs_vm_checkpage,
662 afs_vm_fscontiguous,
663 afs_vm_stopio,
664 afs_read_ahead,
665 afs_release,
666 afs_unmap,
667 afs_swapfs_len,
668 mp_afs_readdir2,
669 afs_readdir3,
672 struct vnodeops *afs_ops = &Afs_vnodeops;
674 /* vnode file operations, and our own */
675 extern int vno_rw();
676 extern int vno_ioctl();
677 extern int vno_select();
678 extern int afs_closex();
679 extern int vno_close();
680 struct fileops afs_fileops = {
681 vno_rw,
682 vno_ioctl,
683 vno_select,
684 afs_close,
687 #define vtoblksz(vp) ((vp)->v_vfsp->vfs_bsize)
690 ********************************************************************
691 ****
692 **** afspgin_setup_io_ranges ()
693 **** similar to: nfspgin_setup_io_ranges ()
694 ********************************************************************
696 pgcnt_t
697 afspgin_setup_io_ranges(vfspage_t * vm_info, pgcnt_t bpages, k_off_t isize,
698 pgcnt_t startindex)
700 pgcnt_t file_offset = VM_FILE_OFFSET(vm_info);
701 pgcnt_t minpage; /* first page to bring in */
702 pgcnt_t maxpage; /* one past last page to bring in */
703 pgcnt_t maxpagein;
704 pgcnt_t multio_maxpage;
705 kern_daddr_t start_blk;
706 dbd_t *dbd;
707 expnd_flags_t up_reason, down_reason;
708 int count = 1;
709 int indx = 0;
710 int max_num_io;
711 int dbdtype;
712 preg_t *prp;
714 VM_GET_IO_INFO(vm_info, maxpagein, max_num_io);
717 * We do not go past the end of the current pregion nor past the end
718 * of the current file.
721 maxpage = startindex + (bpages - (startindex + file_offset) % bpages);
722 maxpage = vm_reset_maxpage(vm_info, maxpage);
723 maxpage = MIN(maxpage, (pgcnt_t) btorp(isize) - file_offset);
724 maxpage = MIN(maxpage, startindex + maxpagein);
725 multio_maxpage = maxpage = vm_maxpage(vm_info, maxpage);
727 if (!maxpage)
728 return (0);
730 VASSERT(maxpage >= startindex);
733 * Expanding the fault will create calls to FINDENTRY() for new
734 * pages, which will obsolete "dbd", so copy what it points to
735 * and clear it to prevent using stale data.
738 prp = VM_PRP(vm_info);
739 dbdtype = DBD_TYPE(vm_info);
740 start_blk = DBD_DATA(vm_info);
741 vm_info->dbd = NULL;
742 vm_info->vfd = NULL;
743 VASSERT(dbdtype != DBD_NONE);
745 if (max_num_io == 1) {
747 * We need to set up one I/O: First we attempt to expand the
748 * I/O forward. Then we expand the I/O backwards.
750 count =
751 expand_faultin_up(vm_info, dbdtype, (int)bpages, maxpage, count,
752 startindex, start_blk, &up_reason);
753 maxpage = startindex + count;
754 VASSERT(maxpage <= startindex + maxpagein);
755 minpage = startindex - (startindex + file_offset) % bpages;
756 minpage = MAX(minpage, maxpage - maxpagein);
757 VASSERT(startindex >= VM_BASE_OFFSET(vm_info));
758 minpage = vm_minpage(vm_info, minpage);
759 VASSERT(minpage <= startindex);
760 count =
761 expand_faultin_down(vm_info, dbdtype, (int)bpages, minpage, count,
762 &startindex, &start_blk, &down_reason);
763 VM_SET_IO_STARTINDX(vm_info, 0, startindex);
764 VM_SET_IO_STARTBLK(vm_info, 0, start_blk);
765 VM_SET_IO_COUNT(vm_info, 0, count);
766 VM_SET_NUM_IO(vm_info, 1);
769 if (max_num_io > 1) {
771 * We need to set up multiple I/O information; beginning
772 * with the startindex, we will expand upwards. The expansion
773 * could stop for one of 2 reasons; we take the appropriate
774 * action in each of these cases:
775 * o VM reasons: abort setting up the multiple I/O
776 * information and return to our caller indicating
777 * that "retry" is required.
778 * o pagelimit: set up the next I/O info [we may have
779 * reached multio_maxpage at this point].
780 * Note that expansion involves no more than a block at a time;
781 * hence it could never stop due to "discontiguous block"
782 * reason.
784 startindex = minpage = vm_minpage(vm_info, 0);
785 for (indx = 0; (indx < max_num_io) && (startindex < multio_maxpage);
786 indx++, startindex += count) {
787 dbd = FINDDBD(prp->p_reg, startindex);
788 start_blk = dbd->dbd_data;
789 maxpage =
790 startindex + (bpages - (startindex + file_offset) % bpages);
791 maxpage = min(maxpage, multio_maxpage);
792 count =
793 expand_faultin_up(vm_info, dbdtype, bpages, maxpage,
794 1 /* count */ ,
795 startindex, start_blk, &up_reason);
796 VM_SET_IO_STARTINDX(vm_info, indx, startindex);
797 VM_SET_IO_STARTBLK(vm_info, indx, start_blk);
798 VM_SET_IO_COUNT(vm_info, indx, count);
799 if (up_reason & VM_REASONS)
800 break;
801 VASSERT(!(up_reason & NONCONTIGUOUS_BLOCK));
802 VASSERT(up_reason & PAGELIMIT);
804 if (startindex < multio_maxpage) {
805 VM_MULT_IO_FAILURE(vm_info);
806 VM_REINIT_FAULT_DBDVFD(vm_info);
807 return (0); /* retry */
809 count = maxpagein;
810 VM_SET_NUM_IO(vm_info, indx);
814 * Tell VM where the I/O intends to start. This may be different
815 * from the faulting point.
818 VM_SET_STARTINDX(vm_info, VM_GET_IO_STARTINDX(vm_info, 0));
820 return (count);
825 ********************************************************************
826 ****
827 **** afspgin_blkflsh ()
828 **** similar to: nfspgin_blkflsh ()
829 ********************************************************************
831 retval_t
832 afspgin_blkflsh(vfspage_t * vm_info, struct vnode * devvp, pgcnt_t * num_4k)
834 int flush_reslt = 0;
835 pgcnt_t count = *num_4k;
836 pgcnt_t page_count;
837 int indx = 0;
838 int num_io = VM_GET_NUM_IO(vm_info);
841 * On this blkflush() we don't want to purge the buffer cache and we do
842 * want to wait, so the flags are '0'.
845 for (indx = 0; indx < num_io; indx++) {
846 flush_reslt =
847 blkflush(devvp, (kern_daddr_t) VM_GET_IO_STARTBLK(vm_info, indx),
848 ptob(VM_GET_IO_COUNT(vm_info, indx)), 0,
849 VM_REGION(vm_info));
850 if (flush_reslt) {
851 vm_lock(vm_info);
852 if (vm_page_now_valid(vm_info, &page_count)) {
853 vm_release_memory(vm_info);
854 vm_release_structs(vm_info);
855 *num_4k = page_count;
856 return (VM_PAGE_PRESENT);
858 return (VM_RETRY);
861 return (VM_DONE);
865 ********************************************************************
866 ****
867 **** afspgin_io ()
868 **** similar to: nfspgin_io ()
869 ********************************************************************
872 afspgin_io(vfspage_t * vm_info, struct vnode *devvp, pgcnt_t bpages,
873 pgcnt_t maxpagein, pgcnt_t count)
875 int i;
876 int error = 0;
877 caddr_t vaddr = VM_ADDR(vm_info);
878 caddr_t virt_addr = VM_MAPPED_ADDR(vm_info);
879 pagein_info_t *io = VM_PAGEIN_INFO(vm_info);
880 preg_t *prp = VM_PRP(vm_info);
881 int wrt = VM_WRT(vm_info);
882 space_t space = VM_SPACE(vm_info);
883 int num_io = VM_GET_NUM_IO(vm_info);
885 #ifdef notdef /* Not used in AFS */
887 * With VM_READ_AHEAD_ALLOWED() macro, check if read-ahead should
888 * be used in this case.
890 * Unlike UFS, NFS does not start the faulting page I/O
891 * asynchronously. Why? Asynchronous requests are handled by the
892 * biod's. It doesn't make sense to queue up the faulting request
893 * behind other asynchrnous requests. This is not true for UFS
894 * where the asynchrnous request is immediately handled.
897 if ((VM_READ_AHEAD_ALLOWED(vm_info)) && (nfs_read_ahead_on)
898 && (NFS_DO_READ_AHEAD) && (should_do_read_ahead(prp, vaddr))) {
900 pgcnt_t max_rhead_io;
901 caddr_t rhead_vaddr;
902 pgcnt_t total_rheads_allowed;
905 * Determine the maximum amount of read-ahead I/O.
907 total_rheads_allowed = maxpagein - count;
910 * If the count is less than a block, raise it to one.
912 if (total_rheads_allowed < bpages)
913 total_rheads_allowed = bpages;
915 max_rhead_io = total_rheads_allowed;
916 rhead_vaddr = VM_MAPPED_ADDR(vm_info) + (count * NBPG);
917 error =
918 nfs_read_ahead(vm_info->vp, prp, wrt, space, rhead_vaddr,
919 &max_rhead_io);
922 * Set the next fault location. If read_ahead launches any
923 * I/O it will adjust it accordingly.
925 vm_info->prp->p_nextfault = vm_info->startindex + count;
928 * Now perform the faulting I/O synchronously.
930 vm_unlock(vm_info);
932 error =
933 syncpageio((swblk_t) VM_GET_IO_STARTBLK(vm_info, 0),
934 VM_MAPPED_SPACE(vm_info), VM_MAPPED_ADDR(vm_info),
935 (int)ptob(count), B_READ, devvp,
936 B_vfs_pagein | B_pagebf, VM_REGION(vm_info));
937 } else
938 #endif
940 virt_addr = VM_MAPPED_ADDR(vm_info);
941 vm_unlock(vm_info);
942 for (i = 0; i < num_io; i++) {
944 * REVISIT -- investigate doing asyncpageio().
946 error |= (io[i].error =
947 syncpageio((swblk_t) VM_GET_IO_STARTBLK(vm_info, i),
948 VM_MAPPED_SPACE(vm_info), virt_addr,
949 (int)ptob(VM_GET_IO_COUNT(vm_info, i)),
950 B_READ, devvp, B_vfs_pagein | B_pagebf,
951 VM_REGION(vm_info)));
952 virt_addr += ptob(VM_GET_IO_COUNT(vm_info, i));
955 * Set the next fault location. If read_ahead launches any
956 * I/O it will adjust it accordingly.
958 vm_info->prp->p_nextfault = vm_info->startindex + count;
961 return (error);
965 ********************************************************************
966 ****
967 **** afspgin_update_dbd ()
968 **** similar to: nfspgin_update_dbd ()
969 ********************************************************************
971 void
972 afspgin_update_dbd(vfspage_t * vm_info, int bsize)
974 k_off_t off;
975 pgcnt_t count = bsize / NBPG;
976 k_off_t rem;
977 pgcnt_t m;
978 pgcnt_t pgindx;
979 kern_daddr_t blkno;
980 int num_io = VM_GET_NUM_IO(vm_info);
981 int i;
983 for (i = 0; i < num_io; i++) {
985 pgindx = VM_GET_IO_STARTINDX(vm_info, i);
986 off = vnodindx(VM_REGION(vm_info), pgindx);
987 rem = off % bsize;
988 blkno = VM_GET_IO_STARTBLK(vm_info, i);
990 VASSERT(bsize % NBPG == 0);
991 VASSERT(rem % NBPG == 0);
993 pgindx -= (pgcnt_t) btop(rem);
994 blkno -= (kern_daddr_t) btodb(rem);
997 * This region could start in mid-block. If so, pgindx
998 * could be less than 0, so we adjust pgindx and blkno back
999 * up so that pgindx is 0.
1002 if (pgindx < 0) {
1003 pgcnt_t prem;
1004 prem = 0 - pgindx;
1005 pgindx = 0;
1006 count -= prem;
1007 blkno += btodb(ptob(prem));
1010 for (m = 0; m < count && pgindx < VM_REGION_SIZE(vm_info);
1011 m++, pgindx++, blkno += btodb(NBPG)) {
1013 * Note: since this only changes one block, it
1014 * assumes only one block was faulted in. Currently
1015 * this is always true for remote files, and we only
1016 * get here for remote files, so everything is ok.
1018 vm_mark_dbd(vm_info, pgindx, blkno);
1024 afs_pagein(vp, prp, wrt, space, vaddr, ret_startindex)
1025 struct vnode *vp;
1026 preg_t *prp;
1027 int wrt;
1028 space_t space;
1029 caddr_t vaddr;
1030 pgcnt_t *ret_startindex;
1032 pgcnt_t startindex;
1033 pgcnt_t pgindx = *ret_startindex;
1034 pgcnt_t maxpagein;
1035 struct vnode *devvp;
1036 pgcnt_t count;
1037 kern_daddr_t start_blk = 0;
1038 int bsize;
1039 int error;
1040 k_off_t isize;
1041 int shared; /* writable memory mapped file */
1042 retval_t retval = 0;
1043 pgcnt_t ok_dbd_limit = 0; /* last dbd that we can trust */
1044 pgcnt_t bpages; /* number of pages per block */
1045 pgcnt_t page_count;
1046 vfspage_t *vm_info = NULL;
1047 int done;
1049 struct vattr va;
1051 caddr_t nvaddr;
1052 space_t nspace;
1053 int change_to_fstore = 0; /* need to change dbds to DBD_FSTORE */
1054 int flush_start_blk = 0;
1055 int flush_end_blk = 0;
1057 int i, j;
1059 AFS_STATCNT(afs_pagein);
1060 vmemp_lockx(); /* lock down VM empire */
1062 /* Initialize the VM info structure */
1063 done =
1064 vm_pagein_init(&vm_info, prp, pgindx, space, vaddr, wrt, 0,
1065 LGPG_ENABLE);
1067 /* Check to see if we slept and the page was falted in. */
1068 if (done) {
1069 vm_release_structs(vm_info);
1070 vmemp_returnx(1);
1073 vp = VM_GET_PAGEIN_VNODE(vm_info);
1074 VASSERT(vp != NULL);
1075 shared = VM_SHARED_OBJECT(vm_info);
1076 VASSERT(DBD_TYPE(vm_info) != DBD_NONE);
1079 * Get the devvp and block size for this vnode type
1081 devvp = vp;
1082 bsize = vp->v_vfsp->vfs_bsize;
1083 if (bsize <= 0 || (bsize & (DEV_BSIZE - 1)))
1084 osi_Panic("afs_pagein: bsize is zero or not a multiple of DEV_BSIZE");
1086 bpages = (pgcnt_t) btop(bsize);
1087 VASSERT(bpages > 0);
1088 VM_SET_FS_MAX_PAGES(vm_info, bpages);
1090 /* this trace cannot be here because the afs_global lock might not be
1091 * held at this point. We hold the vm global lock throughout
1092 * this procedure ( and not the AFS global lock )
1093 * afs_Trace4(afs_iclSetp, CM_TRACE_HPPAGEIN, ICL_TYPE_POINTER, (afs_int32) vp,
1094 * ICL_TYPE_LONG, DBD_TYPE(vm_info), ICL_TYPE_LONG, bpages,
1095 * ICL_TYPE_LONG, shared);
1097 /* Come here if we have to release the region lock before
1098 * locking pages. This can happen in memreserve() and
1099 * blkflush().
1101 retry:
1103 * For remote files like ours, we want to check to see if the file has shrunk.
1104 * If so, we should invalidate any pages past the end. In the name
1105 * of efficiency, we only do this if the page we want to fault is
1106 * past the end of the file.
1109 if (VOP_GETATTR(vp, &va, kt_cred(u.u_kthreadp), VIFSYNC) != 0) {
1110 VM_ZOMBIE_OBJECT(vm_info);
1111 vm_release_memory(vm_info);
1112 vm_release_structs(vm_info);
1113 vmemp_returnx(0);
1115 isize = va.va_size;
1116 if (vnodindx(VM_REGION(vm_info), pgindx) >= isize) {
1118 * The file has shrunk and someone is trying to access a
1119 * page past the end of the object. Shrink the object back
1120 * to its currrent size, send a SIGBUS to the faulting
1121 * process and return.
1123 * We must release the region lock before calling mtrunc(),
1124 * since mtrunc() locks all the regions that are using this
1125 * file.
1127 vm_release_memory(vm_info);
1128 vm_truncate_region(vm_info, isize);
1129 vm_release_structs(vm_info);
1130 vmemp_returnx(-SIGBUS);
1134 maxpagein = vm_pick_maxpagein(vm_info);
1135 if (vm_wait_for_memory(vm_info, maxpagein, 1)) {
1136 /* Check to see if we should continue faulting. */
1137 if (vm_page_now_valid(vm_info, &page_count)) {
1138 vm_release_memory(vm_info);
1139 vm_release_structs(vm_info);
1140 vmemp_returnx(page_count);
1143 if (count = vm_no_io_required(vm_info)) {
1144 /* Release any excess memory. */
1145 vm_release_memory(vm_info);
1146 vm_release_structs(vm_info);
1147 vmemp_returnx(count);
1149 #ifdef OSDEBUG
1151 * We should never have DBD_HOLE pages in a non-MMF region.
1153 if (!shared)
1154 VASSERT(dbd->dbd_type != DBD_HOLE);
1155 #endif
1156 VASSERT(DBD_TYPE(vm_info) != DBD_NONE);
1158 startindex = *ret_startindex;
1161 * If the page we want is in memory already, take it
1163 if (VM_MEMORY_RESERVED(vm_info) < maxpagein) {
1164 /* pick up the rest of memory now. */
1165 if (vm_wait_for_memory(vm_info, maxpagein, 0)) {
1166 if (vm_page_now_valid(vm_info, &page_count)) {
1167 vm_release_memory(vm_info);
1168 vm_release_structs(vm_info);
1169 vmemp_returnx(page_count);
1171 goto retry;
1175 if (!
1176 (count =
1177 afspgin_setup_io_ranges(vm_info, bpages, isize, startindex))) {
1178 goto retry;
1181 startindex = VM_GET_STARTINDX(vm_info);
1183 VASSERT(maxpagein >= count);
1186 * Release the memory we won't need.
1188 if (count < maxpagein) {
1189 vm_release_excess_memory(vm_info,
1190 (VM_MEMORY_RESERVED(vm_info) - count));
1193 retval = afspgin_blkflsh(vm_info, devvp, &count);
1195 if (retval == VM_RETRY) {
1196 goto retry;
1199 if (retval == VM_PAGE_PRESENT)
1200 return (count);
1202 #if 0
1204 * The definition of krusage_cntr_t is in h/kmetric.h, which
1205 * is not shipped. Since it's just statistics, we punt and do
1206 * not update it. If it's a problem we'll need to get HP to export
1207 * an interface that we can use to increment the counter.
1210 /* It's a real fault, not a reclaim */
1212 krusage_cntr_t *temp;
1213 temp = kt_cntrp(u.u_kthreadp);
1214 temp->krc_majflt++;
1216 #endif
1219 * Tell VM where the I/O intends to start. This may be different
1220 * from the faulting point.
1224 * vm_prepare_io will fill the region with pages and release the
1225 * region lock.
1227 vm_prepare_io(vm_info, &count);
1230 * Count may have been adjusted, check to make sure it's non-zero.
1232 if (count == 0) {
1233 if (vm_retry(vm_info)) {
1234 goto retry;
1238 * Release resources and retry the fault. Release any excess
1239 * memory.
1242 vm_release_memory(vm_info);
1243 vm_release_structs(vm_info);
1244 vmemp_returnx(0);
1247 error = afspgin_io(vm_info, devvp, bpages, maxpagein, count);
1249 if ((VM_IS_ZOMBIE(vm_info)) || (error)) {
1250 retval = -SIGBUS;
1251 VM_ZOMBIE_OBJECT(vm_info);
1252 goto backout;
1255 * For a writable memory mapped file that is remote we must
1256 * detect potential holes in the file and force allocation of
1257 * disk space on the remote system. Unfortunately, there is
1258 * no easy way to do this, so this gets a little ugly.
1260 if (shared && wrt) {
1262 * See if The user wants to write to this page. Write some
1263 * minimal amount of data back to the remote file to
1264 * force allocation of file space. We only need to
1265 * write a small amount, since holes are always at
1266 * least one filesystem block in size.
1268 error = vm_alloc_hole(vm_info);
1271 * If some sort of I/O error occurred we generate a
1272 * SIGBUS for the process that caused the write,
1273 * undo our page locks, etc and return.
1275 if ((VM_IS_ZOMBIE(vm_info)) || (error)) {
1276 VM_ZOMBIE_OBJECT(vm_info);
1277 retval = -SIGBUS;
1278 goto backout;
1282 * Change these dbds to DBD_FSTORE. We cannot do it here,
1283 * since the region must be locked, and it is not locked
1284 * at the moment. We cannot lock the region yet, as we
1285 * first have to release the page locks.
1287 change_to_fstore = 1;
1290 vm_finish_io(vm_info, count);
1293 * Acquire the lock before we play around with changing the vfd's.
1295 vm_lock(vm_info);
1297 if (change_to_fstore)
1298 afspgin_update_dbd(vm_info, bsize);
1300 #if defined(AFS_HPUX110_ENV)
1301 getppdp()->cnt.v_exfod += count;
1302 #else
1303 mpproc_info[getprocindex()].cnt.v_exfod += count;
1304 #endif
1305 vmemp_unlockx(); /* free up VM empire */
1306 *ret_startindex = startindex;
1309 * In case we have any excess memory...
1311 if (VM_MEMORY_RESERVED(vm_info))
1312 vm_release_memory(vm_info);
1313 vm_release_structs(vm_info);
1315 return count;
1317 backout:
1319 vm_finish_io_failed(vm_info, count);
1321 vm_lock(vm_info);
1323 vm_undo_validation(vm_info, count);
1326 * In case we have any excess memory...
1328 if (VM_MEMORY_RESERVED(vm_info))
1329 vm_release_memory(vm_info);
1330 vm_release_structs(vm_info);
1332 vmemp_unlockx(); /* free up VM empire */
1333 return retval;
1337 afs_pageout(vp, prp, start, end, flags)
1338 struct vnode *vp; /* not used */
1339 preg_t *prp;
1340 pgcnt_t start;
1341 pgcnt_t end;
1342 int flags;
1344 struct vnode *filevp;
1345 struct vnode *devvp;
1346 pgcnt_t i;
1347 int steal;
1348 int vhand;
1349 int hard;
1350 int *piocnt; /* wakeup counter used if PAGEOUT_WAIT */
1351 struct ucred *old_cred;
1352 vfspage_t vm_info;
1353 fsdata_t args;
1355 int inode_changed = 0;
1356 int file_is_remote;
1357 struct inode *ip;
1359 AFS_STATCNT(afs_pageout);
1361 steal = (flags & PAGEOUT_FREE);
1362 vhand = (flags & PAGEOUT_VHAND);
1363 hard = (flags & PAGEOUT_HARD);
1365 vmemp_lockx();
1367 /* Initialize the VM info structure. */
1368 vm_pageout_init(&vm_info, prp, start, end, 0, 0, 0, flags);
1371 * If the region is marked "don't swap", then don't steal any pages
1372 * from it. We can, however, write dirty pages out to disk (only if
1373 * PAGEOUT_FREE is not set).
1375 if (vm_no_pageout(&vm_info)) {
1376 vmemp_unlockx();
1377 return (0);
1381 * If caller wants to wait until the I/O is complete.
1383 vm_setup_wait_for_io(&vm_info);
1385 filevp = VM_GET_PAGEOUT_VNODE(&vm_info); /* always page out to back store */
1386 VASSERT(filevp != NULL);
1388 memset((caddr_t) & args, 0, sizeof(fsdata_t));
1389 args.remote_down = 0; /* assume remote file servers are up */
1390 args.remote = 1; /* we are remote */
1391 args.bsize = 0; /* filled up later by afs_vm_checkpage() */
1393 if (filevp->v_fstype == VUFS) {
1394 ip = VTOI(filevp);
1395 devvp = ip->i_devvp;
1396 file_is_remote = 0;
1397 } else {
1398 file_is_remote = 1;
1399 devvp = filevp;
1402 * If we are vhand(), and this is an NFS file, we need to
1403 * see if the NFS server is "down". If so, we decide
1404 * if we will try to talk to it again, or defer pageouts
1405 * of dirty NFS pages until a future time.
1407 #ifdef notdef
1408 if (vhand && filevp->v_fstype == VNFS && vtomi(filevp)->mi_down
1409 && vtomi(filevp)->mi_hard) {
1410 extern afs_int32 vhand_nfs_retry;
1412 * If there is still time left on our timer, we will
1413 * not talk to this server right now.
1415 if (vhand_nfs_retry > 0)
1416 args.remote_down = 1;
1418 #endif
1422 * Initialize args. We set bsize to 0 to tell vfs_vfdcheck() that
1423 * it must get the file size and other attributes if it comes across
1424 * a dirty page.
1426 vm_info.fs_data = (caddr_t) & args;
1428 /* this trace cannot be here because the afs_global lock might not be
1429 * held at this point. We hold the vm global lock throughout
1430 * this procedure ( and not the AFS global lock )
1431 * afs_Trace4(afs_iclSetp, CM_TRACE_HPPAGEOUT, ICL_TYPE_POINTER, (afs_int32) filevp,
1432 * ICL_TYPE_LONG, start, ICL_TYPE_LONG, end, ICL_TYPE_LONG, flags);
1435 i = start;
1437 while (i <= end) {
1438 struct buf *bp;
1439 k_off_t start;
1440 pgcnt_t npages;
1441 k_off_t nbytes;
1442 int error;
1444 extern int pageiodone();
1445 space_t nspace;
1446 caddr_t nvaddr;
1449 * Ask the VM system to find the next run of pages.
1451 vm_find_next_range(&vm_info, i, end);
1454 * It's possible that the remote file shrunk in size. Check the flags
1455 * to see if the request was beyond the end of the file. If it was,
1456 * truncate the region to the file size and continue. We could be on a
1457 * run so after trunction continue, there may be some I/O to write
1458 * out.
1460 if (VM_FS_FLAGS(&vm_info) & PAGEOUT_TRUNCATE) {
1461 pgcnt_t pglen = (pgcnt_t) btorp(args.isize);
1464 * This page is past the end of the file. Unlock this page
1465 * (region_trunc will throw it away) and then call
1466 * region_trunc() to invalidate all pages past the new end of
1467 * the file.
1469 region_trunc(VM_REGION(&vm_info), pglen, pglen + 1);
1472 * remove the truncation flag.
1474 VM_UNSETFS_FLAGS(&vm_info, PAGEOUT_TRUNCATE);
1477 if (VM_NO_PAGEOUT_RUN(&vm_info))
1478 break;
1481 * We have a run of dirty pages [args.start...args.end].
1483 VASSERT(filevp->v_fstype != VCDFS);
1484 VASSERT((filevp->v_vfsp->vfs_flag & VFS_RDONLY) == 0);
1485 VASSERT(VM_GET_NUM_IO(&vm_info) == 1);
1488 * We will be doing an I/O on the region, let the VM system know.
1490 (void)vm_up_physio_count(&vm_info);
1493 * Okay, get set to perform the I/O.
1495 inode_changed = 1;
1496 npages =
1497 (VM_END_PAGEOUT_INDX(&vm_info) + 1) -
1498 VM_START_PAGEOUT_INDX(&vm_info);
1501 * Allocate and initialize an I/O buffer.
1503 bp = bswalloc();
1504 vm_init_bp(&vm_info, bp); /* Let the VM system initialize */
1506 /* Identify this buffer for KI */
1507 bp->b_bptype = B_vfs_pageout | B_pagebf;
1509 if (steal)
1510 bp->b_flags = B_CALL | B_BUSY | B_PAGEOUT; /* steal pages */
1511 else
1512 bp->b_flags = B_CALL | B_BUSY; /* keep pages */
1515 * If we are vhand paging over NFS, we will wait for the I/O
1516 * to complete.
1518 if (vhand && filevp->v_fstype == VNFS) {
1519 bp->b_flags &= ~B_CALL;
1520 } else {
1521 bp->b_iodone = (int (*)())pageiodone;
1525 * Make sure we do not write past the end of the file.
1527 nbytes = ptob(npages);
1528 start = vnodindx(VM_REGION(&vm_info), vm_info.start);
1529 if (start + nbytes > args.isize) {
1530 #ifdef OSDEBUG
1532 * The amount we are off better not be bigger than a
1533 * filesystem block.
1535 if (start + nbytes - args.isize >= args.bsize) {
1536 osi_Panic("afs_pageout: remainder too large");
1538 #endif
1540 * Reset the size of the I/O as necessary. For remote
1541 * files, we set the size to the exact number of bytes to
1542 * the end of the file. For local files, we round this up
1543 * to the nearest DEV_BSIZE chunk since disk I/O must always
1544 * be in multiples of DEV_BSIZE. In this case, we do not
1545 * bother to zero out the data past the "real" end of the
1546 * file, this is done when the data is read (either through
1547 * mmap() or by normal file system access).
1549 if (file_is_remote)
1550 nbytes = args.isize - start;
1551 else
1552 nbytes = roundup(args.isize - start, DEV_BSIZE);
1556 * Now get ready to perform the I/O
1558 if (!vm_protect_pageout(&vm_info, npages)) {
1559 VASSERT(vhand);
1560 vm_undo_invalidation(&vm_info, vm_info.start, vm_info.end);
1561 vm_finish_io_failed(&vm_info, npages);
1562 bswfree(bp);
1563 break;
1566 * If this is an NFS write by vhand(), we will not be calling
1567 * pageiodone(). asyncpageio() increments parolemem for us
1568 * if bp->b_iodone is pageiodone, so we must do it manually
1569 * if pageiodone() will not be called automatically.
1571 if (!(bp->b_flags & B_CALL) && steal) {
1572 ulong_t context;
1574 SPINLOCK_USAV(pfdat_lock, context);
1575 parolemem += btorp(nbytes);
1576 SPINUNLOCK_USAV(pfdat_lock, context);
1578 blkflush(devvp, VM_START_PAGEOUT_BLK(&vm_info), (long)nbytes,
1579 (BX_NOBUFWAIT | BX_PURGE), VM_REGION(&vm_info));
1582 * If vhand is the one paging things out, and this is an NFS
1583 * file, we need to temporarily become a different user so
1584 * that we are not trying to page over NFS as root. We use
1585 * the user credentials associated with the writable file
1586 * pointer that is in the psuedo-vas for this MMF.
1588 * NOTE: we are currently using "va_rss" to store the ucred
1589 * value in the vas (this should be fixed in 10.0).
1591 old_cred = kt_cred(u.u_kthreadp);
1592 if (vhand) {
1593 #if defined(AFS_HPUX1123_ENV)
1595 * DEE - 1123 does not have the vas.h, and it looks
1596 * we should never be called with a NFS type file anyway.
1597 * so where did this come from? Was it copied from NFS?
1598 * I assume it was, so we will add an assert for now
1599 * and see if the code runs at all.
1601 VASSERT(filevp->v_fstype != VNFS);
1602 #else
1603 set_kt_cred(u.u_kthreadp, filevp->v_vas->va_cred);
1606 * If root was the one who opened the mmf for write,
1607 * va_cred will be NULL. So reset kt_cred(u.u_kthreadp) to what it
1608 * was. We will page out as root, but that is the
1609 * correct thing to do in this case anyway.
1611 if (kt_cred(u.u_kthreadp) == NULL)
1612 set_kt_cred(u.u_kthreadp, old_cred);
1613 #endif
1617 * Really do the I/O.
1619 error =
1620 asyncpageio(bp, VM_START_PAGEOUT_BLK(&vm_info),
1621 VM_MAPPED_SPACE(&vm_info), VM_MAPPED_ADDR(&vm_info),
1622 (int)nbytes, B_WRITE, devvp);
1624 VASSERT(error == 0);
1626 #ifdef notdef
1628 * If we are vhand paging over NFS we want to wait for the
1629 * I/O to complete and take the appropriate actions if an
1630 * error is encountered.
1632 if (vhand) {
1633 if (waitforpageio(bp) && nfs_mi_harddown(filevp)) {
1635 * The server is down, ignore this failure, and
1636 * try again later. (rfscall() has set our retry
1637 * timer).
1639 fsdata.remote_down = 1;
1640 pageiocleanup(bp, 0);
1643 * vm_vfdcheck() has cleared the valid bit on the
1644 * vfds for these pages. We must go back and set the
1645 * valid bit, as the pages are really not gone.
1647 * NOTE: we can do this because we still hold (and have
1648 * not released) the region lock.
1650 if (steal)
1651 vm_undo_invalidation(&vm_info, vm_info.start,
1652 vm_info.end);
1653 } else {
1655 * The I/O succeeded, or we had an error that we do
1656 * not want to defer until later. Call pageidone()
1657 * to handle things.
1659 pageiodone(bp);
1662 #endif
1665 * And restore our credentials to what they were.
1667 set_kt_cred(u.u_kthreadp, old_cred);
1670 * If we reserved memory in vfs_vfdcheck(), (only for NFS) we
1671 * can now unreserve it.
1673 if (vm_info.vm_flags & PAGEOUT_RESERVED) {
1674 vm_info.vm_flags &= ~PAGEOUT_RESERVED;
1675 vm_release_malloc_memory();
1679 * Update statistics
1681 if (steal) {
1682 if (flags & PF_DEACT) {
1683 #if defined(AFS_HPUX110_ENV)
1684 getppdp()->cnt.v_pswpout += npages;
1685 #else
1686 mpproc_info[getprocindex()].cnt.v_pswpout += npages;
1687 #endif
1688 /* sar_bswapout += ptod(npages);*/
1689 } else if (vhand) {
1690 #if defined(AFS_HPUX110_ENV)
1691 getppdp()->cnt.v_pgout++;
1692 getppdp()->cnt.v_pgpgout += npages;
1693 #else
1694 mpproc_info[getprocindex()].cnt.v_pgout++;
1695 mpproc_info[getprocindex()].cnt.v_pgpgout += npages;
1696 #endif
1701 * If time and patience have delivered enough
1702 * pages, then quit now while we are ahead.
1704 if (VM_STOP_PAGING(&vm_info))
1705 break;
1707 i = VM_END_PAGEOUT_INDX(&vm_info) - VM_BASE_OFFSET(&vm_info) + 1;
1710 vm_finish_pageout(&vm_info); /* update vhand's stealscan */
1712 vmemp_unlockx();
1715 * If we wanted to wait for the I/O to complete, sleep on piocnt.
1716 * We must decrement it by one first, and then make sure that it
1717 * is non-zero before going to sleep.
1719 vm_wait_for_io(&vm_info);
1721 if (inode_changed && !file_is_remote) {
1722 imark(ip, IUPD | ICHG);
1723 iupdat(ip, 0, 0);
1725 return 0;
1729 afs_mapdbd(filevp, offset, bn, flags, hole, startidx, endidx)
1730 struct vnode *filevp;
1731 off_t offset;
1732 kern_daddr_t *bn; /* Block number. */
1733 int flags; /* B_READ or B_WRITE */
1734 int *hole; /* To be used for read-ahead. */
1735 pgcnt_t *startidx; /* To be used for read-ahead. */
1736 pgcnt_t *endidx; /* To be used for read-ahead. */
1738 kern_daddr_t lbn, local_bn;
1739 int on;
1740 int err;
1741 long bsize = vtoblksz(filevp) & ~(DEV_BSIZE - 1);
1743 if (startidx)
1744 *startidx = (pgcnt_t) (offset / NBPG);
1745 if (endidx)
1746 *endidx = (pgcnt_t) (offset / NBPG);
1747 if (hole)
1748 *hole = 0; /* Can't have holes. */
1749 if (bsize <= 0)
1750 osi_Panic("afs_mapdbd: zero size");
1752 lbn = (kern_daddr_t) (offset / bsize);
1753 on = offset % bsize;
1755 err = VOP_BMAP(filevp, lbn, NULL, &local_bn, flags);
1756 VASSERT(err == 0);
1759 * We can never get a bn less than zero on remote files.
1761 VASSERT(local_bn >= 0);
1763 local_bn = local_bn + btodb(on);
1764 *bn = local_bn;
1766 return (0);
1770 * Return values:
1771 * 1: The blocks are contiguous.
1772 * 0: The blocks are not contiguous.
1775 afs_vm_fscontiguous(vp, args, cur_data)
1776 struct vnode *vp;
1777 vfspage_t *args;
1778 u_int cur_data;
1780 if (cur_data == (VM_END_PAGEOUT_BLK(args) + btodb(NBPG))) {
1781 return (1);
1782 } else {
1783 return (0);
1788 * Return values:
1789 * 1: Stop, this page is the last in the block.
1790 * 0: Continue on
1791 * Terminate requests at filesystem block boundaries
1793 afs_vm_stopio(vp, args)
1794 struct vnode *vp;
1795 vfspage_t *args;
1797 fsdata_t *fsdata = (fsdata_t *) args->fs_data;
1799 #if defined(AFS_HPUX1123_ENV)
1800 uint64_t tmpdb;
1801 tmpdb = VM_END_PAGEOUT_BLK(args);
1803 if ((dbtob(tmpdb) + NBPG) % (fsdata->bsize) == 0)
1804 #else
1805 if ((dbtob(VM_END_PAGEOUT_BLK(args)) + NBPG) % (fsdata->bsize) == 0)
1806 #endif /* AFS_HPUX1123_ENV */
1808 return (1);
1809 } else {
1810 return (0);
1815 * afs_vm_checkpage is called by the VM while collecting a run of
1816 * pages on a pageout. afs_vm_checkpage() is called for each page
1817 * VM wants to write to disk.
1819 afs_vm_checkpage(vp, args, pgindx, cur_data)
1820 struct vnode *vp;
1821 vfspage_t *args;
1822 pgcnt_t pgindx;
1823 int cur_data;
1825 fsdata_t *fsdata = (fsdata_t *) args->fs_data;
1827 if (fsdata->remote_down) { /* never happens for AFS */
1829 * The remote system is down.
1831 VASSERT(args->run == 0);
1832 return 1;
1835 * A dirty page. If we have not yet determined the file size and
1836 * other attributes that we need to write out pages (the block
1837 * size and ok_dbd_limit), get that information now.
1839 if (fsdata->bsize == 0) {
1840 k_off_t isize;
1841 long bsize;
1842 struct vattr va;
1843 struct vnode *filevp;
1845 * Get the various attributes about the file. Store them
1846 * in args for the next time around.
1848 filevp = args->vp;
1850 bsize = vtoblksz(filevp);
1851 args->maxpgs = (pgcnt_t) btop(bsize);
1853 if (VOP_GETATTR(filevp, &va, kt_cred(u.u_kthreadp), VIFSYNC) != 0) {
1855 * The VOP_GETATTR() failed.
1856 * we are vhand, and this is a hard mount, we will
1857 * skip dirty pages for a while and try again later.
1859 if (args->vm_flags & PAGEOUT_VHAND) {
1860 VASSERT(args->run == 0);
1861 return 1;
1864 * This is a "soft" mount, or some other error was
1865 * returned from the server. Mark this region
1866 * as a zombie, and free this dirty page.
1868 VM_ZOMBIE_OBJECT(args);
1871 * The caller will see r_zomb and remove the page
1872 * appropriately.
1874 return (1);
1876 isize = va.va_size;
1877 fsdata->isize = isize;
1878 fsdata->bsize = bsize;
1879 fsdata->remote = 1;
1882 * See if the file has shrunk (this could have happened
1883 * asynchronously because of NFS or DUX). If so, invalidate
1884 * all of the pages past the end of the file. This is only
1885 * needed for remote files, as local files are truncated
1886 * synchronously.
1889 if (vnodindx(VM_REGION(args), pgindx) > fsdata->isize) {
1891 * This page is past the end of the file. Unlock this page
1892 * (region_trunc will throw it away) and then call region_trunc()
1893 * to invalidate all pages past the new end of the file.
1895 VM_SETFS_FLAGS(args, PAGEOUT_TRUNCATE);
1896 return (1);
1898 #ifdef notdef
1899 if ((args->vm_flags & PAGEOUT_VHAND)
1900 && (!(args->vm_flags & PAGEOUT_RESERVED))
1901 && (!(VM_IS_ZOMBIE(args)))) {
1902 VASSERT(args->run == 0);
1903 if (vm_reserve_malloc_memory(NFS_PAGEOUT_MEM)) {
1905 * Got enough memory to pageout. Mark the fact that we did
1906 * a sysprocmemreserve(), so that we can sysprocmemunreserve() it
1907 * later (in remote_pageout()).
1909 args->vm_flags |= PAGEOUT_RESERVED;
1910 } else {
1912 * We do not have enough memory to do this pageout. By
1913 * definition, we do not yet have a run, so we just unlock
1914 * this page and tell foreach_valid() to continue scanning.
1915 * If we come across another dirty page, we will try to
1916 * reserve memory again. That is okay, in fact some memory
1917 * may have freed up (as earlier pageouts complete under
1918 * interrupt).
1920 return 1;
1923 #endif
1924 return (0);
1927 afs_swapfs_len(bp)
1928 struct buf *bp;
1930 long fs_bsize;
1931 long max_size;
1932 long bnrem;
1934 fs_bsize = vtoblksz(bp->b_vp);
1936 * Check to see if we are starting mid block. If so, then
1937 * we must return the remainder of the block or less depending
1938 * on the length.
1940 bnrem = bp->b_offset % fs_bsize;
1941 if (bnrem) {
1942 max_size = fs_bsize - bnrem;
1943 } else {
1944 max_size = fs_bsize;
1947 if (bp->b_bcount > max_size) {
1948 return (max_size);
1949 } else {
1950 return (bp->b_bcount);
1954 afs_mmap(vp, off, size_bytes, access)
1955 struct vnode *vp;
1956 u_int off;
1957 #if defined(AFS_HPUX1111_ENV)
1958 u_long size_bytes;
1959 #else
1960 u_int size_bytes;
1961 #endif
1962 int access;
1964 long bsize = vtoblksz(vp);
1966 if (bsize % NBPG != 0) {
1967 return (EINVAL);
1970 return (0);
1973 afs_cachelimit(vp, len, location)
1974 struct vnode *vp;
1975 k_off_t len;
1976 int *location;
1979 * Disk addresses are logical, not physical, so fragments are
1980 * transparent.
1982 *location = btorp(len) + 1;
1985 afs_release(vp)
1986 struct vnode *vp;
1988 return (0);
1992 afs_unmap(vp, off, size_bytes, access)
1993 struct vnode *vp;
1994 u_int off;
1995 #if defined(AFS_HPUX1111_ENV)
1996 u_long size_bytes;
1997 #else
1998 u_int size_bytes;
1999 #endif
2000 int access;
2002 return 0;
2006 afs_read_ahead(vp, prp, wrt, space, vaddr, rhead_cnt)
2007 struct vnode *vp;
2008 preg_t *prp;
2009 int wrt;
2010 space_t space;
2011 caddr_t vaddr;
2012 pgcnt_t *rhead_cnt;
2014 printf("afs_read_ahead returning 0 \n");
2015 return 0;
2019 afs_prealloc(vp, size, ignore_minfree, reserved)
2020 struct vnode *vp;
2021 /* DEE on 11.22 following is off_t */
2022 size_t size;
2023 int ignore_minfree;
2024 int reserved;
2026 printf("afs_prealloc returning ENOSPC\n");
2027 return ENOSPC;
2031 afs_ioctl(vp, com, data, flag, cred)
2032 struct vnode *vp;
2033 int com;
2034 caddr_t data;
2035 int flag;
2036 struct ucred *cred;
2038 int error;
2039 struct afs_ioctl afsioctl, *ai;
2041 AFS_STATCNT(afs_ioctl);
2043 /* The call must be a VICEIOCTL call */
2044 if (((com >> 8) & 0xff) == 'V') {
2045 #ifdef notdef
2046 /* AFS_COPYIN returns error 14. Copy data in instead */
2047 AFS_COPYIN(data, (caddr_t) & afsioctl, sizeof(afsioctl), error);
2048 if (error)
2049 return (error);
2050 #endif
2051 ai = (struct afs_ioctl *)data;
2052 afsioctl.in = ai->in;
2053 afsioctl.out = ai->out;
2054 afsioctl.in_size = ai->in_size;
2055 afsioctl.out_size = ai->out_size;
2056 error = HandleIoctl(VTOAFS(vp), com, &afsioctl);
2057 return (error);
2059 return (ENOTTY);
2062 #if defined(AFS_HPUX1111_ENV)
2063 /* looks like even if appl is 32 bit, we need to round to 8 bytes */
2064 /* This had no effect, it must not be being used */
2066 #define roundtoint(x) (((x) + (sizeof(long) - 1)) & ~(sizeof(long) - 1))
2067 #define reclen(dp) roundtoint(((dp)->d_namlen + 1 + (sizeof(u_long)) +\
2068 sizeof(u_int) + 2 * sizeof(u_short)))
2069 #else
2071 #define roundtoint(x) (((x) + (sizeof(int) - 1)) & ~(sizeof(int) - 1))
2072 #define reclen(dp) roundtoint(((dp)->d_namlen + 1 + (sizeof(u_long)) +\
2073 2 * sizeof(u_short)))
2074 #endif
2077 afs_readdir(vp, uiop, cred)
2078 struct vnode *vp;
2079 struct uio *uiop;
2080 struct ucred *cred;
2082 struct uio auio;
2083 struct iovec aiov;
2084 caddr_t ibuf, obuf, ibufend, obufend;
2085 struct __dirent32 *idp;
2086 struct dirent *odp;
2087 int count, outcount;
2088 dir_off_t offset;
2089 uint64_t tmp_offset;
2091 memset(&auio, 0, sizeof(auio));
2092 memset(&aiov, 0, sizeof(aiov));
2094 count = uiop->uio_resid;
2095 /* Allocate temporary space for format conversion */
2096 ibuf = kmem_alloc(2 * count); /* overkill - fix later */
2097 obuf = kmem_alloc(count + sizeof(struct dirent));
2098 aiov.iov_base = ibuf;
2099 aiov.iov_len = count;
2100 auio.uio_iov = &aiov;
2101 auio.uio_iovcnt = 1;
2102 offset = auio.uio_offset = uiop->uio_offset;
2103 auio.uio_seg = UIOSEG_KERNEL;
2104 auio.uio_resid = count;
2105 auio.uio_fpflags = 0;
2107 u.u_error = mp_afs_readdir2(vp, &auio, cred);
2108 if (u.u_error)
2109 goto out;
2111 /* Convert entries from __dirent32 to dirent format */
2113 for (idp = (struct __dirent32 *)ibuf, odp =
2114 (struct dirent *)obuf, ibufend =
2115 ibuf + (count - auio.uio_resid), obufend = obuf + count;
2116 (caddr_t) idp < ibufend;
2117 idp = (struct __dirent32 *)((caddr_t) idp + idp->__d_reclen), odp =
2118 (struct dirent *)((caddr_t) odp + odp->d_reclen)) {
2119 odp->d_ino = idp->__d_ino;
2120 odp->d_namlen = idp->__d_namlen;
2121 (void)strcpy(odp->d_name, idp->__d_name);
2122 odp->d_reclen = reclen(odp);
2123 if ((caddr_t) odp + odp->d_reclen > obufend)
2124 break;
2125 /* record offset *after* we're sure to use this entry */
2126 memcpy((char *)&tmp_offset, (char *)&idp->__d_off, sizeof tmp_offset);
2127 offset = tmp_offset;
2130 outcount = (caddr_t) odp - obuf;
2131 AFS_UIOMOVE(obuf, outcount, UIO_READ, uiop, u.u_error);
2132 if (u.u_error)
2133 goto out;
2134 uiop->uio_offset = offset;
2135 out:
2136 kmem_free(ibuf, count);
2137 kmem_free(obuf, count + sizeof(struct dirent));
2138 return u.u_error;
2142 #define roundtolong(x) (((x) + (sizeof(long) - 1)) & ~(sizeof(long) - 1))
2143 #define reclen_dirent64(dp) roundtolong(((dp)->__d_namlen + 1 + (2*sizeof(u_long)) +\
2144 2 * sizeof(u_short)))
2147 afs_readdir3(vp, uiop, cred)
2148 struct vnode *vp;
2149 struct uio *uiop;
2150 struct ucred *cred;
2152 struct uio auio;
2153 struct iovec aiov;
2154 caddr_t ibuf, obuf, ibufend, obufend;
2155 struct __dirent32 *idp;
2156 struct __dirent64 *odp;
2157 int count, outcount;
2158 dir_off_t offset;
2160 memset(&auio, 0, sizeof(auio));
2161 memset(&aiov, 0, sizeof(aiov));
2163 count = uiop->uio_resid;
2164 /* Allocate temporary space for format conversion */
2165 ibuf = kmem_alloc(2 * count); /* overkill - fix later */
2166 obuf = kmem_alloc(count + sizeof(struct __dirent64));
2167 aiov.iov_base = ibuf;
2168 aiov.iov_len = count;
2169 auio.uio_iov = &aiov;
2170 auio.uio_iovcnt = 1;
2171 offset = auio.uio_offset = uiop->uio_offset;
2172 auio.uio_seg = UIOSEG_KERNEL;
2173 auio.uio_resid = count;
2174 auio.uio_fpflags = 0;
2176 u.u_error = mp_afs_readdir2(vp, &auio, cred);
2177 if (u.u_error)
2178 goto out;
2180 /* Convert entries from __dirent32 to __dirent64 format */
2182 for (idp = (struct __dirent32 *)ibuf, odp =
2183 (struct __dirent64 *)obuf, ibufend =
2184 ibuf + (count - auio.uio_resid), obufend = obuf + count;
2185 (caddr_t) idp < ibufend;
2186 idp = (struct __dirent32 *)((caddr_t) idp + idp->__d_reclen), odp =
2187 (struct __dirent64 *)((caddr_t) odp + odp->__d_reclen)) {
2188 memcpy((char *)&odp->__d_off, (char *)&idp->__d_off,
2189 sizeof odp->__d_off);
2190 odp->__d_ino = idp->__d_ino;
2191 odp->__d_namlen = idp->__d_namlen;
2192 (void)strcpy(odp->__d_name, idp->__d_name);
2193 odp->__d_reclen = reclen_dirent64(odp);
2194 if ((caddr_t) odp + odp->__d_reclen > obufend)
2195 break;
2196 /* record offset *after* we're sure to use this entry */
2197 offset = odp->__d_off;
2200 outcount = (caddr_t) odp - obuf;
2201 AFS_UIOMOVE(obuf, outcount, UIO_READ, uiop, u.u_error);
2202 if (u.u_error)
2203 goto out;
2204 uiop->uio_offset = offset;
2205 out:
2206 kmem_free(ibuf, count);
2207 kmem_free(obuf, count + sizeof(struct __dirent64));
2208 return u.u_error;
2211 #define AFS_SV_SEMA_HASH 1
2212 #define AFS_SV_SEMA_HASH_DEBUG 0
2214 #if AFS_SV_SEMA_HASH
2215 /* This portion of the code was originally used to implement
2216 * thread specific storage for the semaphore save area. However,
2217 * there were some spare fields in the proc structure, this is
2218 * now being used for the saving semapores. Hence, this portion of
2219 * the code is no longer used.
2222 /* This portion of the code implements thread specific information.
2223 * The thread id is passed in as the key. The semaphore saved area
2224 * is hashed on this key.
2227 /* why is this hash table required ?
2228 * The AFS code is written in such a way that a GLOCK() is done in
2229 * one function and the GUNLOCK() is done in another function further
2230 * down the call chain. The GLOCK() call has to save the current
2231 * semaphore status before acquiring afs_global_sema. The GUNLOCK
2232 * has to release afs_global_sema and reacquire the sempahore status
2233 * that existed before the corresponding GLOCK. If GLOCK() and
2234 * GUNLOCK() were called in the same function, the GLOCK call could
2235 * have stored the saved sempahore status in a local variable and the
2236 * corresponding GUNLOCK() call could have restored the original
2237 * status from this local variable. But this is not the case with
2238 * AFS code. Hence, we have to implement a thread specific semaphore
2239 * save area. This is implemented as a hash table. The key is the
2240 * thread id.
2243 /* In order for multithreaded processes to work, the sv_sema structures
2244 * must be saved on a per-thread basis, not a per-process basis. There
2245 * is no per-thread storage available to hijack in the OS per-thread
2246 * data structures (e.g. struct user) so we revive this code.
2247 * I removed the upper limit on the memory consumption since we don't
2248 * know how many threads there will be. Now the code first checks the
2249 * freeList. If that fails it then tries garbage collecting. If that
2250 * doesn't free up anything then it allocs what it needs.
2253 #define ELEMENT sv_sema_t
2254 #define KEY tid_t
2255 #define Hash(xx) ( (xx) % sizeOfHashTable )
2256 #define hashLockInit(xx) initsema(&xx,1, FILESYS_SEMA_PRI, FILESYS_SEMA_ORDER)
2257 #define hashLock(xx) MP_PSEMA(&xx)
2258 #define hashUnlock(xx) MP_VSEMA(&xx)
2260 typedef struct elem {
2261 struct elem *next;
2262 ELEMENT element;
2263 KEY key;
2264 int refCnt;
2265 } Element;
2267 typedef struct bucket {
2268 sema_t lock;
2269 Element *element;
2270 } Bucket;
2272 static int sizeOfHashTable;
2273 static Bucket *hashTable;
2275 static int currentSize = 0;
2276 static Element *freeList; /* free list */
2278 #pragma align 64
2279 static sema_t afsHashLock = { 0 }; /* global lock for hash table */
2281 static void afsHashGarbageCollect();
2284 ** The global lock protects the global data structures,
2285 ** e.g. freeList and currentSize.
2286 ** The bucket lock protects the link list hanging off that bucket.
2287 ** The lock hierarchy : one can obtain the bucket lock while holding
2288 ** the global lock, but not vice versa.
2292 void
2293 afsHash(int nbuckets)
2294 { /* allocate the hash table */
2295 int i;
2297 #if AFS_SV_SEMA_HASH_DEBUG
2298 printf("afsHash: enter\n");
2299 #endif
2301 sizeOfHashTable = nbuckets;
2302 currentSize = nbuckets * sizeof(Bucket);
2304 if (hashTable)
2305 osi_Panic("afs: SEMA Hashtable already created\n");
2307 hashTable = (Bucket *) AFS_KALLOC(sizeOfHashTable * sizeof(Bucket));
2308 if (!hashTable)
2309 osi_Panic("afs: cannot create SEMA Hashtable\n");
2311 /* initialize the hash table and associated locks */
2312 memset(hashTable, 0, sizeOfHashTable * sizeof(Bucket));
2313 for (i = 0; i < sizeOfHashTable; i++)
2314 hashLockInit(hashTable[i].lock);
2315 hashLockInit(afsHashLock);
2317 #if AFS_SV_SEMA_HASH_DEBUG
2318 printf("afsHash: exit\n");
2319 #endif
2322 ELEMENT *
2323 afsHashInsertFind(KEY key)
2325 int index;
2326 Element *ptr;
2328 #if AFS_SV_SEMA_HASH_DEBUG
2329 printf("afsHashInsertFind: %d\n", key);
2330 #endif
2331 if (!hashTable)
2332 osi_Panic("afs: afsHashInsertFind: no hashTable\n");
2334 index = Hash(key); /* get bucket number */
2335 hashLock(hashTable[index].lock); /* lock this bucket */
2336 ptr = hashTable[index].element;
2338 /* if it is already there */
2339 while (ptr) {
2340 if (ptr->key == key) {
2341 ptr->refCnt++; /* hold it */
2342 hashUnlock(hashTable[index].lock);
2343 #if AFS_SV_SEMA_HASH_DEBUG
2344 printf("afsHashInsertFind: %d FOUND\n", key);
2345 #endif
2346 return &(ptr->element);
2347 } else {
2348 ptr = ptr->next;
2352 hashUnlock(hashTable[index].lock);
2354 /* if something exists in the freeList, take it from there */
2355 ptr = NULL;
2356 hashLock(afsHashLock);
2358 if (freeList) {
2359 ptr = freeList; /* reuse entry */
2360 freeList = freeList->next;
2361 } else {
2362 afsHashGarbageCollect(); /* afsHashLock locked */
2363 if (freeList) {
2364 ptr = freeList; /* reuse entry */
2365 freeList = freeList->next;
2366 } else {
2367 ptr = (Element *) AFS_KALLOC(sizeof(Element));
2371 currentSize += sizeof(Element); /* update memory used */
2372 hashUnlock(afsHashLock);
2374 if (!ptr)
2375 osi_Panic("afs: SEMA Hashtable cannot create new entry\n");
2376 /* create new entry */
2377 ptr->key = key;
2378 memset(&ptr->element, 0, sizeof(ptr->element));
2379 ptr->refCnt = 1; /* this guy */
2381 /* insert new entry in bucket */
2382 hashLock(hashTable[index].lock); /* lock this bucket */
2383 ptr->next = hashTable[index].element;
2384 hashTable[index].element = ptr;
2385 hashUnlock(hashTable[index].lock);
2387 #if AFS_SV_SEMA_HASH_DEBUG
2388 printf("afsHashInsertFind: %d MADE\n", key);
2389 #endif
2391 return &(ptr->element);
2394 ELEMENT *
2395 afsHashFind(KEY key)
2397 int index;
2398 Element *ptr;
2400 #if AFS_SV_SEMA_HASH_DEBUG
2401 printf("afsHashFind: %d\n", key);
2402 #endif
2403 if (!hashTable)
2404 osi_Panic("afs: afsHashFind: no hashTable\n");
2406 index = Hash(key); /* get bucket number */
2407 hashLock(hashTable[index].lock); /* lock this bucket */
2408 ptr = hashTable[index].element;
2410 /* it should be in the hash table */
2411 while (ptr) {
2412 if (ptr->key == key) {
2413 if (ptr->refCnt <= 0)
2414 osi_Panic("afs: SEMA HashTable entry already released\n");
2415 hashUnlock(hashTable[index].lock);
2416 #if AFS_SV_SEMA_HASH_DEBUG
2417 printf("afsHashFind: %d FOUND\n", key);
2418 #endif
2419 return &(ptr->element);
2420 } else {
2421 ptr = ptr->next;
2425 hashUnlock(hashTable[index].lock);
2426 /* it better be in the hash table */
2427 osi_Panic("afs: SEMA HashTable wants non-existent entry \n");
2428 return 0;
2431 void
2432 afsHashRelease(KEY key)
2434 int index;
2435 Element *ptr;
2437 #if AFS_SV_SEMA_HASH_DEBUG
2438 printf("afsHashRelease: %d\n", key);
2439 #endif
2440 if (!hashTable)
2441 osi_Panic("afs: afsHashRelease: no hashTable\n");
2443 index = Hash(key); /* get bucket number */
2444 hashLock(hashTable[index].lock); /* lock this bucket */
2445 ptr = hashTable[index].element;
2447 /* it should be in the hash table */
2448 while (ptr) {
2449 if (ptr->key == key) {
2450 if (ptr->refCnt <= 0)
2451 osi_Panic("afs: SEMA HashTable entry already released\n");
2452 ptr->refCnt--; /* release this guy */
2453 hashUnlock(hashTable[index].lock);
2454 #if AFS_SV_SEMA_HASH_DEBUG
2455 printf("afsHashRelease: %d FOUND\n", key);
2456 #endif
2457 return;
2458 } else {
2459 ptr = ptr->next;
2463 hashUnlock(hashTable[index].lock);
2464 /* it better be in the hash table */
2465 osi_Panic("afs: SEMA HashTable deleting non-existent entry \n");
2468 /* this should be called with afsHashLock WRITE locked */
2469 static void
2470 afsHashGarbageCollect()
2472 int index;
2473 Element *ptr;
2474 int foundFlag = 0;
2476 if (!hashTable)
2477 osi_Panic("afs: afsHashGarbageCollect: no hashTable\n");
2479 for (index = 0; index < sizeOfHashTable; index++) {
2480 hashLock(hashTable[index].lock);
2481 ptr = hashTable[index].element; /* pick up bucket */
2483 while (ptr && !ptr->refCnt) {
2484 /* insert this element into free list */
2485 Element *temp;
2486 temp = ptr->next;
2487 ptr->next = freeList;
2488 freeList = ptr;
2490 foundFlag = 1; /* found at least one */
2491 currentSize -= sizeof(Element);
2492 ptr = temp;
2494 hashTable[index].element = ptr;
2496 /* scan thru the remaining list */
2497 if (ptr) {
2498 while (ptr->next) {
2499 if (ptr->next->refCnt == 0) {
2500 /* collect this element */
2501 Element *temp;
2502 temp = ptr->next;
2503 ptr->next = ptr->next->next;
2504 temp->next = freeList;
2505 freeList = temp;
2506 foundFlag = 1;
2507 currentSize -= sizeof(Element);
2508 } else {
2509 ptr = ptr->next;
2513 hashUnlock(hashTable[index].lock);
2515 #if 0
2516 if (!foundFlag)
2517 osi_Panic("afs: SEMA HashTable full\n");
2518 #endif
2521 #endif /* AFS_SV_SEMA_HASH */
2524 afs_hp_strategy(bp)
2525 struct buf *bp;
2527 afs_int32 code;
2528 struct uio tuio;
2529 struct iovec tiovec[1];
2530 extern caddr_t hdl_kmap_bp();
2531 struct kthread *t = u.u_kthreadp;
2533 memset(&tuio, 0, sizeof(tuio));
2534 memset(&tiovec, 0, sizeof(tiovec));
2536 AFS_STATCNT(afs_hp_strategy);
2538 * hdl_kmap_bp() saves "b_bcount" and restores it in hdl_remap_bp() after
2539 * the I/O. We must save and restore the count because pageiodone()
2540 * uses b_bcount to determine how many pages to unlock.
2542 * Remap the entire range.
2544 hdl_kmap_bp(bp);
2546 AFS_GLOCK();
2547 afs_Trace4(afs_iclSetp, CM_TRACE_HPSTRAT, ICL_TYPE_POINTER, bp->b_vp,
2548 ICL_TYPE_LONG, (int)bp->b_blkno * DEV_BSIZE, ICL_TYPE_LONG,
2549 bp->b_bcount, ICL_TYPE_LONG, 0);
2551 /* Set up the uio structure */
2552 tuio.afsio_iov = tiovec;
2553 tuio.afsio_iovcnt = 1;
2554 tuio.afsio_offset = DEV_BSIZE * bp->b_blkno;
2555 tuio.afsio_seg = AFS_UIOSYS;
2556 tuio.afsio_resid = bp->b_bcount;
2557 tuio.uio_fpflags = 0;
2558 tiovec[0].iov_base = bp->b_un.b_addr;
2559 tiovec[0].iov_len = bp->b_bcount;
2561 /* Do the I/O */
2562 if ((bp->b_flags & B_READ) == B_READ) {
2563 /* read b_bcount bytes into kernel address b_un.b_addr
2564 * starting at byte DEV_BSIZE * b_blkno. Bzero anything
2565 * we can't read, and finally call iodone(bp). File is
2566 * in bp->b_vp. Credentials are from u area??
2568 code = afs_rdwr(VTOAFS(bp->b_vp), &tuio, UIO_READ, 0, kt_cred(t));
2569 if (code == 0)
2570 if (tuio.afsio_resid > 0) {
2571 privlbzero(bvtospace(bp, bp->b_un.b_addr),
2572 bp->b_un.b_addr + bp->b_bcount - tuio.afsio_resid,
2573 (size_t) tuio.afsio_resid);
2576 } else
2577 code = afs_rdwr(VTOAFS(bp->b_vp), &tuio, UIO_WRITE, 0, kt_cred(t));
2579 /* Remap back to the user's space */
2580 hdl_remap_bp(bp);
2582 AFS_GUNLOCK();
2584 iodone(bp);
2585 return code;
2588 afs_pathconf(vp, name, resultp, cred)
2589 struct vnode *vp;
2590 int name;
2591 int *resultp;
2592 struct ucred *cred; /* unused */
2594 switch (name) {
2595 case _PC_LINK_MAX: /* Maximum number of links to a file */
2596 *resultp = 255; /* an unsigned short on the fileserver */
2597 break; /* a unsigned char in the client.... */
2599 case _PC_NAME_MAX: /* Max length of file name */
2600 *resultp = 255;
2601 break;
2603 case _PC_PATH_MAX: /* Maximum length of Path Name */
2604 *resultp = 1024;
2605 break;
2607 case _PC_PIPE_BUF: /* Max atomic write to pipe. See fifo_vnops */
2608 case _PC_CHOWN_RESTRICTED: /* Anybody can chown? */
2609 case _PC_NO_TRUNC: /* No file name truncation on overflow? */
2610 u.u_error = EOPNOTSUPP;
2611 return (EOPNOTSUPP);
2612 break;
2614 case _PC_MAX_CANON: /* TTY buffer size for canonical input */
2615 /* need more work here for pty, ite buffer size, if differ */
2616 if (vp->v_type != VCHR) {
2617 u.u_error = EINVAL;
2618 return (EINVAL);
2620 *resultp = CANBSIZ; /*for tty */
2621 break;
2623 case _PC_MAX_INPUT:
2624 /* need more work here for pty, ite buffer size, if differ */
2625 if (vp->v_type != VCHR) { /* TTY buffer size */
2626 u.u_error = EINVAL;
2627 return (EINVAL);
2629 *resultp = TTYHOG; /*for tty */
2630 break;
2632 case _PC_VDISABLE:
2633 /* Terminal special characters can be disabled? */
2634 if (vp->v_type != VCHR) {
2635 u.u_error = EINVAL;
2636 return (EINVAL);
2638 *resultp = 1;
2639 break;
2641 case _PC_SYNC_IO:
2642 if ((vp->v_type != VREG) && (vp->v_type != VBLK)) {
2643 *resultp = -1;
2644 return EINVAL;
2646 *resultp = 1; /* Synchronized IO supported for this file */
2647 break;
2649 case _PC_FILESIZEBITS:
2650 if (vp->v_type != VDIR)
2651 return (EINVAL);
2652 *resultp = MAX_SMALL_FILE_BITS;
2653 break;
2655 default:
2656 return (EINVAL);
2659 return (0);