2 * Copyright 2000, International Business Machines Corporation and others.
5 * This software has been released under the terms of the IBM Public
6 * License. For details, see the LICENSE file in the top-level source
7 * directory or online at http://www.openafs.org/dl/license10.html
10 /* This is a placeholder for routines unique to the port of AFS to hp-ux*/
12 #include <afsconfig.h>
13 #include "afs/param.h"
16 #include "afs/sysincludes.h" /* Standard vendor system headers */
17 #include "afsincludes.h" /* Afs-based standard headers */
18 #include "afs/afs_stats.h" /* statistics stuff */
22 #include <sys/mount.h>
23 #include <sys/vnode.h>
24 #include <sys/pathname.h>
26 extern struct vfsops Afs_vfsops
;
27 extern int afs_hp_strategy();
28 extern int afs_bmap(), afs_badop(), afs_noop(), afs_lockf();
29 extern int afs_pagein();
30 extern int afs_pageout();
31 extern int afs_ioctl();
32 extern int afs_prealloc();
33 extern int afs_mapdbd();
34 extern int afs_mmap();
35 extern int afs_cachelimit();
36 extern int afs_vm_checkpage();
37 extern int afs_vm_fscontiguous();
38 extern int afs_vm_stopio();
39 extern int afs_read_ahead();
40 extern int afs_unmap();
41 extern int afs_release();
42 extern int afs_swapfs_len();
43 extern int afs_readdir2();
44 extern int afs_readdir();
45 extern int afs_readdir3();
46 extern int afs_pathconf();
47 extern int afs_close();
49 #define vtoblksz(vp) ((vp)->v_vfsp->vfs_bsize)
51 #if defined(AFS_HPUX110_ENV)
52 /* We no longer need to lock on the VM Empire,
53 * or at least that is what is claimed.
54 * so we will noopt the vmemp_ routines
55 * This needs to be looked at closer.
59 #define vmemp_returnx(a) return(a)
60 #define vmemp_unlockx()
63 #if !defined(AFS_HPUX110_ENV)
65 * Copy an mbuf to the contiguous area pointed to by cp.
66 * Skip <off> bytes and copy <len> bytes.
67 * Returns the number of bytes not transferred.
68 * The mbuf is NOT changed.
71 m_cpytoc(m
, off
, len
, cp
)
78 if (m
== NULL
|| off
< 0 || len
< 0 || cp
== NULL
)
79 osi_Panic("m_cpytoc");
81 if (m
->m_len
<= off
) {
90 ml
= MIN(len
, m
->m_len
- off
);
91 memcpy(cp
, mtod(m
, caddr_t
) + off
, (u_int
) ml
);
98 memcpy(cp
, mtod(m
, caddr_t
), (u_int
) ml
);
109 * Note that the standard Sun vnode interface doesn't haven't an vop_lockf(), so this code is
110 * totally new. This came about because HP-UX has lockf() implemented as
111 * a system call while Sun has it implemented as a library (apparently).
112 * To handle this, we have to translate the lockf() request into an
113 * fcntl() looking request, and then translate the results back if necessary.
114 * we call afs_lockctl() directly .
116 afs_lockf(vp
, flag
, len
, cred
, fp
, LB
, UB
)
123 /*for now, just pretend it works */
124 struct k_flock flock
;
128 * Create a flock structure and translate the lockf request
129 * into an appropriate looking fcntl() type request for afs_lockctl()
133 flock
.l_start
= fp
->f_offset
;
134 /* convert negative lengths to positive */
135 if (flock
.l_len
< 0) {
136 flock
.l_start
+= flock
.l_len
;
137 flock
.l_len
= -(flock
.l_len
);
140 * Adjust values to look like fcntl() requests.
141 * All locks are write locks, only F_LOCK requests
142 * are blocking. F_TEST has to be translated into
143 * a get lock and then back again.
145 flock
.l_type
= F_WRLCK
;
149 flock
.l_type
= F_UNLCK
;
158 u
.u_error
= mp_afs_lockctl(vp
, &flock
, cmd
, fp
->f_cred
);
160 return (u
.u_error
); /* some other error code */
163 * if request is F_TEST, and GETLK changed
164 * the lock type to ULOCK, then return 0, else
165 * set errno to EACCESS and return.
167 if (flag
== F_TEST
&& flock
.l_type
!= F_UNLCK
) {
175 #if defined(AFS_HPUX1122_ENV)
176 #include "machine/vm/vmparam.h"
178 #include "../machine/vmparam.h" /* For KERNELSPACE */
182 #if !defined(AFS_HPUX1123_ENV)
183 /* 11.23 is using 64 bit in many cases */
184 #define kern_daddr_t daddr_t
189 #include "ufs/inode.h"
192 #if defined(AFS_HPUX1123_ENV)
194 #endif /* AFS_HPUX1123_ENV */
196 #include "h/region.h"
197 #include "h/pregion.h"
198 #include "h/vmmeter.h"
200 #include "h/sysinfo.h"
202 #if !defined(AFS_HPUX1123_ENV)
203 #include "h/tuneable.h"
206 #include "netinet/in.h"
208 /* a freelist of one */
209 struct buf
*afs_bread_freebp
= 0;
212 * Only rfs_read calls this, and it only looks at bp->b_un.b_addr.
213 * Thus we can use fake bufs (ie not from the real buffer pool).
215 afs_bread(vp
, lbn
, bpp
)
220 int offset
, fsbsize
, error
;
225 memset(&uio
, 0, sizeof(uio
));
226 memset(&iov
, 0, sizeof(iov
));
228 AFS_STATCNT(afs_bread
);
229 fsbsize
= vp
->v_vfsp
->vfs_bsize
;
230 offset
= lbn
* fsbsize
;
231 if (afs_bread_freebp
) {
232 bp
= afs_bread_freebp
;
233 afs_bread_freebp
= 0;
235 bp
= (struct buf
*)AFS_KALLOC(sizeof(*bp
));
236 bp
->b_un
.b_addr
= (caddr_t
) AFS_KALLOC(fsbsize
);
239 iov
.iov_base
= bp
->b_un
.b_addr
;
240 iov
.iov_len
= fsbsize
;
241 uio
.afsio_iov
= &iov
;
242 uio
.afsio_iovcnt
= 1;
243 uio
.afsio_seg
= AFS_UIOSYS
;
244 uio
.afsio_offset
= offset
;
245 uio
.afsio_resid
= fsbsize
;
249 error
= afs_read(VTOAFS(vp
), &uio
, p_cred(u
.u_procp
), 0);
251 afs_bread_freebp
= bp
;
255 afs_bread_freebp
= bp
;
257 *(struct buf
**)&bp
->b_vp
= bp
; /* mark as fake */
267 AFS_STATCNT(afs_brelse
);
269 if ((struct buf
*)bp
->b_vp
!= bp
) { /* not fake */
270 ufs_brelse(bp
->b_vp
, bp
);
271 } else if (afs_bread_freebp
) {
272 AFS_KFREE(bp
->b_un
.b_addr
, vp
->v_vfsp
->vfs_bsize
);
273 AFS_KFREE(bp
, sizeof(*bp
));
275 afs_bread_freebp
= bp
;
280 afs_bmap(avc
, abn
, anvp
, anbn
)
282 kern_daddr_t abn
, *anbn
;
283 struct vcache
**anvp
;
285 AFS_STATCNT(afs_bmap
);
289 *anbn
= abn
* (8192 / DEV_BSIZE
); /* in 512 byte units */
293 afs_inactive(avc
, acred
)
297 struct vnode
*vp
= AFSTOV(avc
);
300 if (afs_shuttingdown
!= AFS_RUNNING
)
304 * In Solaris and HPUX s800 and HP-UX10.0 they actually call us with
305 * v_count 1 on last reference!
307 MP_H_SPINLOCK_USAV(vn_h_sl_pool
, vp
, &sv_lock
, &context
);
308 if (avc
->vrefCount
< 1)
309 osi_Panic("afs_inactive : v_count < 1\n");
312 * If more than 1 don't unmap the vnode but do decrement the ref count
315 if (vp
->v_count
> 0) {
316 MP_SPINUNLOCK_USAV(sv_lock
, context
);
319 MP_SPINUNLOCK_USAV(sv_lock
, context
);
320 afs_InactiveVCache(avc
, acred
);
326 mp_afs_open(struct vnode
**avcp
, int aflags
, afs_ucred_t
*acred
)
331 code
= afs_open(avcp
, aflags
, acred
);
337 mp_afs_close(struct vnode
*avcp
, int aflags
, afs_ucred_t
*acred
)
342 code
= afs_close(avcp
, aflags
, acred
);
348 mp_afs_rdwr(struct vnode
*avcp
, struct uio
*uio
, enum uio_rw arw
,
349 int aio
, afs_ucred_t
*acred
)
355 save_resid
= uio
->uio_resid
;
356 code
= afs_rdwr(avcp
, uio
, arw
, aio
, acred
);
357 if (arw
== UIO_WRITE
&& code
== ENOSPC
) {
358 /* HP clears code if any data written. */
359 uio
->uio_resid
= save_resid
;
366 mp_afs_getattr(struct vnode
*avcp
, struct vattr
*attrs
,
367 afs_ucred_t
*acred
, enum vsync unused1
)
372 code
= afs_getattr(avcp
, attrs
, acred
);
378 mp_afs_setattr(struct vnode
*avcp
, struct vattr
*attrs
,
379 afs_ucred_t
*acred
, int unused1
)
384 code
= afs_setattr(avcp
, attrs
, acred
);
390 mp_afs_access(struct vnode
*avcp
, int mode
, afs_ucred_t
*acred
)
395 code
= afs_access(avcp
, mode
, acred
);
401 mp_afs_lookup(struct vnode
*adp
, char *aname
,
402 struct vnode
**avcp
, afs_ucred_t
*acred
,
403 struct vnode
*unused1
)
408 code
= afs_lookup(adp
, aname
, avcp
, acred
);
414 mp_afs_create(struct vnode
*adp
, char *aname
, struct vattr
*attrs
,
415 enum vcexcl aexcl
, int amode
, struct vnode
**avcp
,
421 code
= afs_create(adp
, aname
, attrs
, aexcl
, amode
, avcp
, acred
);
428 mp_afs_remove(struct vnode
*adp
, char *aname
,
434 code
= afs_remove(adp
, aname
, acred
);
440 mp_afs_link(struct vnode
*avc
, struct vnode
*adp
,
441 char *aname
, afs_ucred_t
*acred
)
446 code
= afs_link(avc
, adp
, aname
, acred
);
452 mp_afs_rename(struct vnode
*aodp
, char *aname1
,
453 struct vnode
*andp
, char *aname2
,
459 code
= afs_rename(aodp
, aname1
, andp
, aname2
, acred
);
465 mp_afs_mkdir(struct vnode
*adp
, char *aname
, struct vattr
*attrs
,
466 struct vnode
**avcp
, afs_ucred_t
*acred
)
471 code
= afs_mkdir(adp
, aname
, attrs
, avcp
, acred
);
478 mp_afs_rmdir(struct vnode
*adp
, char *aname
, afs_ucred_t
*acred
)
483 code
= afs_rmdir(adp
, aname
, acred
);
490 mp_afs_readdir(struct vnode
*avc
, struct uio
*auio
,
496 code
= afs_readdir(avc
, auio
, acred
);
502 mp_afs_symlink(struct vnode
*adp
, char *aname
, struct vattr
*attrs
,
503 char *atargetName
, afs_ucred_t
*acred
)
508 code
= afs_symlink(adp
, aname
, attrs
, atargetName
, NULL
, acred
);
515 mp_afs_readlink(struct vnode
*avc
, struct uio
*auio
,
521 code
= afs_readlink(avc
, auio
, acred
);
527 mp_afs_fsync(struct vnode
*avc
, afs_ucred_t
*acred
, int unused1
)
532 code
= afs_fsync(avc
, acred
);
538 mp_afs_bread(struct vnode
*avc
, kern_daddr_t lbn
, struct buf
**bpp
,
539 struct vattr
*unused1
, struct ucred
*unused2
)
544 code
= afs_bread(avc
, lbn
, bpp
);
550 mp_afs_brelse(struct vnode
*avc
, struct buf
*bp
)
555 code
= afs_brelse(avc
, bp
);
562 mp_afs_inactive(struct vnode
*avc
, afs_ucred_t
*acred
)
567 code
= afs_inactive(avc
, acred
);
573 mp_afs_lockctl(struct vnode
*avc
, struct flock
*af
, int cmd
,
574 afs_ucred_t
*acred
, struct file
*unused1
, off_t unused2
,
580 code
= afs_lockctl(avc
, af
, cmd
, acred
);
586 mp_afs_fid(struct vnode
*avc
, struct fid
**fidpp
)
591 code
= afs_fid(avc
, fidpp
);
597 mp_afs_readdir2(struct vnode
*avc
, struct uio
*auio
,
603 code
= afs_readdir2(avc
, auio
, acred
);
609 struct vnodeops Afs_vnodeops
= {
632 #if !defined(AFS_NONFSTRANS)
633 /* on HPUX102 the nfs translator calls afs_bread but does
634 * not call afs_brelse. Hence we see a memory leak. If the
635 * VOP_BREAD() call fails, then nfs does VOP_RDWR() to get
636 * the same data : this is the path we follow now. */
643 afs_badop
, /* pathsend */
644 afs_noop
, /* setacl */
645 afs_noop
, /* getacl */
649 afs_lockf
, /* lockf */
672 struct vnodeops
*afs_ops
= &Afs_vnodeops
;
674 /* vnode file operations, and our own */
676 extern int vno_ioctl();
677 extern int vno_select();
678 extern int afs_closex();
679 extern int vno_close();
680 struct fileops afs_fileops
= {
687 #define vtoblksz(vp) ((vp)->v_vfsp->vfs_bsize)
690 ********************************************************************
692 **** afspgin_setup_io_ranges ()
693 **** similar to: nfspgin_setup_io_ranges ()
694 ********************************************************************
697 afspgin_setup_io_ranges(vfspage_t
* vm_info
, pgcnt_t bpages
, k_off_t isize
,
700 pgcnt_t file_offset
= VM_FILE_OFFSET(vm_info
);
701 pgcnt_t minpage
; /* first page to bring in */
702 pgcnt_t maxpage
; /* one past last page to bring in */
704 pgcnt_t multio_maxpage
;
705 kern_daddr_t start_blk
;
707 expnd_flags_t up_reason
, down_reason
;
714 VM_GET_IO_INFO(vm_info
, maxpagein
, max_num_io
);
717 * We do not go past the end of the current pregion nor past the end
718 * of the current file.
721 maxpage
= startindex
+ (bpages
- (startindex
+ file_offset
) % bpages
);
722 maxpage
= vm_reset_maxpage(vm_info
, maxpage
);
723 maxpage
= MIN(maxpage
, (pgcnt_t
) btorp(isize
) - file_offset
);
724 maxpage
= MIN(maxpage
, startindex
+ maxpagein
);
725 multio_maxpage
= maxpage
= vm_maxpage(vm_info
, maxpage
);
730 VASSERT(maxpage
>= startindex
);
733 * Expanding the fault will create calls to FINDENTRY() for new
734 * pages, which will obsolete "dbd", so copy what it points to
735 * and clear it to prevent using stale data.
738 prp
= VM_PRP(vm_info
);
739 dbdtype
= DBD_TYPE(vm_info
);
740 start_blk
= DBD_DATA(vm_info
);
743 VASSERT(dbdtype
!= DBD_NONE
);
745 if (max_num_io
== 1) {
747 * We need to set up one I/O: First we attempt to expand the
748 * I/O forward. Then we expand the I/O backwards.
751 expand_faultin_up(vm_info
, dbdtype
, (int)bpages
, maxpage
, count
,
752 startindex
, start_blk
, &up_reason
);
753 maxpage
= startindex
+ count
;
754 VASSERT(maxpage
<= startindex
+ maxpagein
);
755 minpage
= startindex
- (startindex
+ file_offset
) % bpages
;
756 minpage
= MAX(minpage
, maxpage
- maxpagein
);
757 VASSERT(startindex
>= VM_BASE_OFFSET(vm_info
));
758 minpage
= vm_minpage(vm_info
, minpage
);
759 VASSERT(minpage
<= startindex
);
761 expand_faultin_down(vm_info
, dbdtype
, (int)bpages
, minpage
, count
,
762 &startindex
, &start_blk
, &down_reason
);
763 VM_SET_IO_STARTINDX(vm_info
, 0, startindex
);
764 VM_SET_IO_STARTBLK(vm_info
, 0, start_blk
);
765 VM_SET_IO_COUNT(vm_info
, 0, count
);
766 VM_SET_NUM_IO(vm_info
, 1);
769 if (max_num_io
> 1) {
771 * We need to set up multiple I/O information; beginning
772 * with the startindex, we will expand upwards. The expansion
773 * could stop for one of 2 reasons; we take the appropriate
774 * action in each of these cases:
775 * o VM reasons: abort setting up the multiple I/O
776 * information and return to our caller indicating
777 * that "retry" is required.
778 * o pagelimit: set up the next I/O info [we may have
779 * reached multio_maxpage at this point].
780 * Note that expansion involves no more than a block at a time;
781 * hence it could never stop due to "discontiguous block"
784 startindex
= minpage
= vm_minpage(vm_info
, 0);
785 for (indx
= 0; (indx
< max_num_io
) && (startindex
< multio_maxpage
);
786 indx
++, startindex
+= count
) {
787 dbd
= FINDDBD(prp
->p_reg
, startindex
);
788 start_blk
= dbd
->dbd_data
;
790 startindex
+ (bpages
- (startindex
+ file_offset
) % bpages
);
791 maxpage
= min(maxpage
, multio_maxpage
);
793 expand_faultin_up(vm_info
, dbdtype
, bpages
, maxpage
,
795 startindex
, start_blk
, &up_reason
);
796 VM_SET_IO_STARTINDX(vm_info
, indx
, startindex
);
797 VM_SET_IO_STARTBLK(vm_info
, indx
, start_blk
);
798 VM_SET_IO_COUNT(vm_info
, indx
, count
);
799 if (up_reason
& VM_REASONS
)
801 VASSERT(!(up_reason
& NONCONTIGUOUS_BLOCK
));
802 VASSERT(up_reason
& PAGELIMIT
);
804 if (startindex
< multio_maxpage
) {
805 VM_MULT_IO_FAILURE(vm_info
);
806 VM_REINIT_FAULT_DBDVFD(vm_info
);
807 return (0); /* retry */
810 VM_SET_NUM_IO(vm_info
, indx
);
814 * Tell VM where the I/O intends to start. This may be different
815 * from the faulting point.
818 VM_SET_STARTINDX(vm_info
, VM_GET_IO_STARTINDX(vm_info
, 0));
825 ********************************************************************
827 **** afspgin_blkflsh ()
828 **** similar to: nfspgin_blkflsh ()
829 ********************************************************************
832 afspgin_blkflsh(vfspage_t
* vm_info
, struct vnode
* devvp
, pgcnt_t
* num_4k
)
835 pgcnt_t count
= *num_4k
;
838 int num_io
= VM_GET_NUM_IO(vm_info
);
841 * On this blkflush() we don't want to purge the buffer cache and we do
842 * want to wait, so the flags are '0'.
845 for (indx
= 0; indx
< num_io
; indx
++) {
847 blkflush(devvp
, (kern_daddr_t
) VM_GET_IO_STARTBLK(vm_info
, indx
),
848 ptob(VM_GET_IO_COUNT(vm_info
, indx
)), 0,
852 if (vm_page_now_valid(vm_info
, &page_count
)) {
853 vm_release_memory(vm_info
);
854 vm_release_structs(vm_info
);
855 *num_4k
= page_count
;
856 return (VM_PAGE_PRESENT
);
865 ********************************************************************
868 **** similar to: nfspgin_io ()
869 ********************************************************************
872 afspgin_io(vfspage_t
* vm_info
, struct vnode
*devvp
, pgcnt_t bpages
,
873 pgcnt_t maxpagein
, pgcnt_t count
)
877 caddr_t vaddr
= VM_ADDR(vm_info
);
878 caddr_t virt_addr
= VM_MAPPED_ADDR(vm_info
);
879 pagein_info_t
*io
= VM_PAGEIN_INFO(vm_info
);
880 preg_t
*prp
= VM_PRP(vm_info
);
881 int wrt
= VM_WRT(vm_info
);
882 space_t space
= VM_SPACE(vm_info
);
883 int num_io
= VM_GET_NUM_IO(vm_info
);
885 #ifdef notdef /* Not used in AFS */
887 * With VM_READ_AHEAD_ALLOWED() macro, check if read-ahead should
888 * be used in this case.
890 * Unlike UFS, NFS does not start the faulting page I/O
891 * asynchronously. Why? Asynchronous requests are handled by the
892 * biod's. It doesn't make sense to queue up the faulting request
893 * behind other asynchrnous requests. This is not true for UFS
894 * where the asynchrnous request is immediately handled.
897 if ((VM_READ_AHEAD_ALLOWED(vm_info
)) && (nfs_read_ahead_on
)
898 && (NFS_DO_READ_AHEAD
) && (should_do_read_ahead(prp
, vaddr
))) {
900 pgcnt_t max_rhead_io
;
902 pgcnt_t total_rheads_allowed
;
905 * Determine the maximum amount of read-ahead I/O.
907 total_rheads_allowed
= maxpagein
- count
;
910 * If the count is less than a block, raise it to one.
912 if (total_rheads_allowed
< bpages
)
913 total_rheads_allowed
= bpages
;
915 max_rhead_io
= total_rheads_allowed
;
916 rhead_vaddr
= VM_MAPPED_ADDR(vm_info
) + (count
* NBPG
);
918 nfs_read_ahead(vm_info
->vp
, prp
, wrt
, space
, rhead_vaddr
,
922 * Set the next fault location. If read_ahead launches any
923 * I/O it will adjust it accordingly.
925 vm_info
->prp
->p_nextfault
= vm_info
->startindex
+ count
;
928 * Now perform the faulting I/O synchronously.
933 syncpageio((swblk_t
) VM_GET_IO_STARTBLK(vm_info
, 0),
934 VM_MAPPED_SPACE(vm_info
), VM_MAPPED_ADDR(vm_info
),
935 (int)ptob(count
), B_READ
, devvp
,
936 B_vfs_pagein
| B_pagebf
, VM_REGION(vm_info
));
940 virt_addr
= VM_MAPPED_ADDR(vm_info
);
942 for (i
= 0; i
< num_io
; i
++) {
944 * REVISIT -- investigate doing asyncpageio().
946 error
|= (io
[i
].error
=
947 syncpageio((swblk_t
) VM_GET_IO_STARTBLK(vm_info
, i
),
948 VM_MAPPED_SPACE(vm_info
), virt_addr
,
949 (int)ptob(VM_GET_IO_COUNT(vm_info
, i
)),
950 B_READ
, devvp
, B_vfs_pagein
| B_pagebf
,
951 VM_REGION(vm_info
)));
952 virt_addr
+= ptob(VM_GET_IO_COUNT(vm_info
, i
));
955 * Set the next fault location. If read_ahead launches any
956 * I/O it will adjust it accordingly.
958 vm_info
->prp
->p_nextfault
= vm_info
->startindex
+ count
;
965 ********************************************************************
967 **** afspgin_update_dbd ()
968 **** similar to: nfspgin_update_dbd ()
969 ********************************************************************
972 afspgin_update_dbd(vfspage_t
* vm_info
, int bsize
)
975 pgcnt_t count
= bsize
/ NBPG
;
980 int num_io
= VM_GET_NUM_IO(vm_info
);
983 for (i
= 0; i
< num_io
; i
++) {
985 pgindx
= VM_GET_IO_STARTINDX(vm_info
, i
);
986 off
= vnodindx(VM_REGION(vm_info
), pgindx
);
988 blkno
= VM_GET_IO_STARTBLK(vm_info
, i
);
990 VASSERT(bsize
% NBPG
== 0);
991 VASSERT(rem
% NBPG
== 0);
993 pgindx
-= (pgcnt_t
) btop(rem
);
994 blkno
-= (kern_daddr_t
) btodb(rem
);
997 * This region could start in mid-block. If so, pgindx
998 * could be less than 0, so we adjust pgindx and blkno back
999 * up so that pgindx is 0.
1007 blkno
+= btodb(ptob(prem
));
1010 for (m
= 0; m
< count
&& pgindx
< VM_REGION_SIZE(vm_info
);
1011 m
++, pgindx
++, blkno
+= btodb(NBPG
)) {
1013 * Note: since this only changes one block, it
1014 * assumes only one block was faulted in. Currently
1015 * this is always true for remote files, and we only
1016 * get here for remote files, so everything is ok.
1018 vm_mark_dbd(vm_info
, pgindx
, blkno
);
1024 afs_pagein(vp
, prp
, wrt
, space
, vaddr
, ret_startindex
)
1030 pgcnt_t
*ret_startindex
;
1033 pgcnt_t pgindx
= *ret_startindex
;
1035 struct vnode
*devvp
;
1037 kern_daddr_t start_blk
= 0;
1041 int shared
; /* writable memory mapped file */
1042 retval_t retval
= 0;
1043 pgcnt_t ok_dbd_limit
= 0; /* last dbd that we can trust */
1044 pgcnt_t bpages
; /* number of pages per block */
1046 vfspage_t
*vm_info
= NULL
;
1053 int change_to_fstore
= 0; /* need to change dbds to DBD_FSTORE */
1054 int flush_start_blk
= 0;
1055 int flush_end_blk
= 0;
1059 AFS_STATCNT(afs_pagein
);
1060 vmemp_lockx(); /* lock down VM empire */
1062 /* Initialize the VM info structure */
1064 vm_pagein_init(&vm_info
, prp
, pgindx
, space
, vaddr
, wrt
, 0,
1067 /* Check to see if we slept and the page was falted in. */
1069 vm_release_structs(vm_info
);
1073 vp
= VM_GET_PAGEIN_VNODE(vm_info
);
1074 VASSERT(vp
!= NULL
);
1075 shared
= VM_SHARED_OBJECT(vm_info
);
1076 VASSERT(DBD_TYPE(vm_info
) != DBD_NONE
);
1079 * Get the devvp and block size for this vnode type
1082 bsize
= vp
->v_vfsp
->vfs_bsize
;
1083 if (bsize
<= 0 || (bsize
& (DEV_BSIZE
- 1)))
1084 osi_Panic("afs_pagein: bsize is zero or not a multiple of DEV_BSIZE");
1086 bpages
= (pgcnt_t
) btop(bsize
);
1087 VASSERT(bpages
> 0);
1088 VM_SET_FS_MAX_PAGES(vm_info
, bpages
);
1090 /* this trace cannot be here because the afs_global lock might not be
1091 * held at this point. We hold the vm global lock throughout
1092 * this procedure ( and not the AFS global lock )
1093 * afs_Trace4(afs_iclSetp, CM_TRACE_HPPAGEIN, ICL_TYPE_POINTER, (afs_int32) vp,
1094 * ICL_TYPE_LONG, DBD_TYPE(vm_info), ICL_TYPE_LONG, bpages,
1095 * ICL_TYPE_LONG, shared);
1097 /* Come here if we have to release the region lock before
1098 * locking pages. This can happen in memreserve() and
1103 * For remote files like ours, we want to check to see if the file has shrunk.
1104 * If so, we should invalidate any pages past the end. In the name
1105 * of efficiency, we only do this if the page we want to fault is
1106 * past the end of the file.
1109 if (VOP_GETATTR(vp
, &va
, kt_cred(u
.u_kthreadp
), VIFSYNC
) != 0) {
1110 VM_ZOMBIE_OBJECT(vm_info
);
1111 vm_release_memory(vm_info
);
1112 vm_release_structs(vm_info
);
1116 if (vnodindx(VM_REGION(vm_info
), pgindx
) >= isize
) {
1118 * The file has shrunk and someone is trying to access a
1119 * page past the end of the object. Shrink the object back
1120 * to its currrent size, send a SIGBUS to the faulting
1121 * process and return.
1123 * We must release the region lock before calling mtrunc(),
1124 * since mtrunc() locks all the regions that are using this
1127 vm_release_memory(vm_info
);
1128 vm_truncate_region(vm_info
, isize
);
1129 vm_release_structs(vm_info
);
1130 vmemp_returnx(-SIGBUS
);
1134 maxpagein
= vm_pick_maxpagein(vm_info
);
1135 if (vm_wait_for_memory(vm_info
, maxpagein
, 1)) {
1136 /* Check to see if we should continue faulting. */
1137 if (vm_page_now_valid(vm_info
, &page_count
)) {
1138 vm_release_memory(vm_info
);
1139 vm_release_structs(vm_info
);
1140 vmemp_returnx(page_count
);
1143 if (count
= vm_no_io_required(vm_info
)) {
1144 /* Release any excess memory. */
1145 vm_release_memory(vm_info
);
1146 vm_release_structs(vm_info
);
1147 vmemp_returnx(count
);
1151 * We should never have DBD_HOLE pages in a non-MMF region.
1154 VASSERT(dbd
->dbd_type
!= DBD_HOLE
);
1156 VASSERT(DBD_TYPE(vm_info
) != DBD_NONE
);
1158 startindex
= *ret_startindex
;
1161 * If the page we want is in memory already, take it
1163 if (VM_MEMORY_RESERVED(vm_info
) < maxpagein
) {
1164 /* pick up the rest of memory now. */
1165 if (vm_wait_for_memory(vm_info
, maxpagein
, 0)) {
1166 if (vm_page_now_valid(vm_info
, &page_count
)) {
1167 vm_release_memory(vm_info
);
1168 vm_release_structs(vm_info
);
1169 vmemp_returnx(page_count
);
1177 afspgin_setup_io_ranges(vm_info
, bpages
, isize
, startindex
))) {
1181 startindex
= VM_GET_STARTINDX(vm_info
);
1183 VASSERT(maxpagein
>= count
);
1186 * Release the memory we won't need.
1188 if (count
< maxpagein
) {
1189 vm_release_excess_memory(vm_info
,
1190 (VM_MEMORY_RESERVED(vm_info
) - count
));
1193 retval
= afspgin_blkflsh(vm_info
, devvp
, &count
);
1195 if (retval
== VM_RETRY
) {
1199 if (retval
== VM_PAGE_PRESENT
)
1204 * The definition of krusage_cntr_t is in h/kmetric.h, which
1205 * is not shipped. Since it's just statistics, we punt and do
1206 * not update it. If it's a problem we'll need to get HP to export
1207 * an interface that we can use to increment the counter.
1210 /* It's a real fault, not a reclaim */
1212 krusage_cntr_t
*temp
;
1213 temp
= kt_cntrp(u
.u_kthreadp
);
1219 * Tell VM where the I/O intends to start. This may be different
1220 * from the faulting point.
1224 * vm_prepare_io will fill the region with pages and release the
1227 vm_prepare_io(vm_info
, &count
);
1230 * Count may have been adjusted, check to make sure it's non-zero.
1233 if (vm_retry(vm_info
)) {
1238 * Release resources and retry the fault. Release any excess
1242 vm_release_memory(vm_info
);
1243 vm_release_structs(vm_info
);
1247 error
= afspgin_io(vm_info
, devvp
, bpages
, maxpagein
, count
);
1249 if ((VM_IS_ZOMBIE(vm_info
)) || (error
)) {
1251 VM_ZOMBIE_OBJECT(vm_info
);
1255 * For a writable memory mapped file that is remote we must
1256 * detect potential holes in the file and force allocation of
1257 * disk space on the remote system. Unfortunately, there is
1258 * no easy way to do this, so this gets a little ugly.
1260 if (shared
&& wrt
) {
1262 * See if The user wants to write to this page. Write some
1263 * minimal amount of data back to the remote file to
1264 * force allocation of file space. We only need to
1265 * write a small amount, since holes are always at
1266 * least one filesystem block in size.
1268 error
= vm_alloc_hole(vm_info
);
1271 * If some sort of I/O error occurred we generate a
1272 * SIGBUS for the process that caused the write,
1273 * undo our page locks, etc and return.
1275 if ((VM_IS_ZOMBIE(vm_info
)) || (error
)) {
1276 VM_ZOMBIE_OBJECT(vm_info
);
1282 * Change these dbds to DBD_FSTORE. We cannot do it here,
1283 * since the region must be locked, and it is not locked
1284 * at the moment. We cannot lock the region yet, as we
1285 * first have to release the page locks.
1287 change_to_fstore
= 1;
1290 vm_finish_io(vm_info
, count
);
1293 * Acquire the lock before we play around with changing the vfd's.
1297 if (change_to_fstore
)
1298 afspgin_update_dbd(vm_info
, bsize
);
1300 #if defined(AFS_HPUX110_ENV)
1301 getppdp()->cnt
.v_exfod
+= count
;
1303 mpproc_info
[getprocindex()].cnt
.v_exfod
+= count
;
1305 vmemp_unlockx(); /* free up VM empire */
1306 *ret_startindex
= startindex
;
1309 * In case we have any excess memory...
1311 if (VM_MEMORY_RESERVED(vm_info
))
1312 vm_release_memory(vm_info
);
1313 vm_release_structs(vm_info
);
1319 vm_finish_io_failed(vm_info
, count
);
1323 vm_undo_validation(vm_info
, count
);
1326 * In case we have any excess memory...
1328 if (VM_MEMORY_RESERVED(vm_info
))
1329 vm_release_memory(vm_info
);
1330 vm_release_structs(vm_info
);
1332 vmemp_unlockx(); /* free up VM empire */
1337 afs_pageout(vp
, prp
, start
, end
, flags
)
1338 struct vnode
*vp
; /* not used */
1344 struct vnode
*filevp
;
1345 struct vnode
*devvp
;
1350 int *piocnt
; /* wakeup counter used if PAGEOUT_WAIT */
1351 struct ucred
*old_cred
;
1355 int inode_changed
= 0;
1359 AFS_STATCNT(afs_pageout
);
1361 steal
= (flags
& PAGEOUT_FREE
);
1362 vhand
= (flags
& PAGEOUT_VHAND
);
1363 hard
= (flags
& PAGEOUT_HARD
);
1367 /* Initialize the VM info structure. */
1368 vm_pageout_init(&vm_info
, prp
, start
, end
, 0, 0, 0, flags
);
1371 * If the region is marked "don't swap", then don't steal any pages
1372 * from it. We can, however, write dirty pages out to disk (only if
1373 * PAGEOUT_FREE is not set).
1375 if (vm_no_pageout(&vm_info
)) {
1381 * If caller wants to wait until the I/O is complete.
1383 vm_setup_wait_for_io(&vm_info
);
1385 filevp
= VM_GET_PAGEOUT_VNODE(&vm_info
); /* always page out to back store */
1386 VASSERT(filevp
!= NULL
);
1388 memset((caddr_t
) & args
, 0, sizeof(fsdata_t
));
1389 args
.remote_down
= 0; /* assume remote file servers are up */
1390 args
.remote
= 1; /* we are remote */
1391 args
.bsize
= 0; /* filled up later by afs_vm_checkpage() */
1393 if (filevp
->v_fstype
== VUFS
) {
1395 devvp
= ip
->i_devvp
;
1402 * If we are vhand(), and this is an NFS file, we need to
1403 * see if the NFS server is "down". If so, we decide
1404 * if we will try to talk to it again, or defer pageouts
1405 * of dirty NFS pages until a future time.
1408 if (vhand
&& filevp
->v_fstype
== VNFS
&& vtomi(filevp
)->mi_down
1409 && vtomi(filevp
)->mi_hard
) {
1410 extern afs_int32 vhand_nfs_retry
;
1412 * If there is still time left on our timer, we will
1413 * not talk to this server right now.
1415 if (vhand_nfs_retry
> 0)
1416 args
.remote_down
= 1;
1422 * Initialize args. We set bsize to 0 to tell vfs_vfdcheck() that
1423 * it must get the file size and other attributes if it comes across
1426 vm_info
.fs_data
= (caddr_t
) & args
;
1428 /* this trace cannot be here because the afs_global lock might not be
1429 * held at this point. We hold the vm global lock throughout
1430 * this procedure ( and not the AFS global lock )
1431 * afs_Trace4(afs_iclSetp, CM_TRACE_HPPAGEOUT, ICL_TYPE_POINTER, (afs_int32) filevp,
1432 * ICL_TYPE_LONG, start, ICL_TYPE_LONG, end, ICL_TYPE_LONG, flags);
1444 extern int pageiodone();
1449 * Ask the VM system to find the next run of pages.
1451 vm_find_next_range(&vm_info
, i
, end
);
1454 * It's possible that the remote file shrunk in size. Check the flags
1455 * to see if the request was beyond the end of the file. If it was,
1456 * truncate the region to the file size and continue. We could be on a
1457 * run so after trunction continue, there may be some I/O to write
1460 if (VM_FS_FLAGS(&vm_info
) & PAGEOUT_TRUNCATE
) {
1461 pgcnt_t pglen
= (pgcnt_t
) btorp(args
.isize
);
1464 * This page is past the end of the file. Unlock this page
1465 * (region_trunc will throw it away) and then call
1466 * region_trunc() to invalidate all pages past the new end of
1469 region_trunc(VM_REGION(&vm_info
), pglen
, pglen
+ 1);
1472 * remove the truncation flag.
1474 VM_UNSETFS_FLAGS(&vm_info
, PAGEOUT_TRUNCATE
);
1477 if (VM_NO_PAGEOUT_RUN(&vm_info
))
1481 * We have a run of dirty pages [args.start...args.end].
1483 VASSERT(filevp
->v_fstype
!= VCDFS
);
1484 VASSERT((filevp
->v_vfsp
->vfs_flag
& VFS_RDONLY
) == 0);
1485 VASSERT(VM_GET_NUM_IO(&vm_info
) == 1);
1488 * We will be doing an I/O on the region, let the VM system know.
1490 (void)vm_up_physio_count(&vm_info
);
1493 * Okay, get set to perform the I/O.
1497 (VM_END_PAGEOUT_INDX(&vm_info
) + 1) -
1498 VM_START_PAGEOUT_INDX(&vm_info
);
1501 * Allocate and initialize an I/O buffer.
1504 vm_init_bp(&vm_info
, bp
); /* Let the VM system initialize */
1506 /* Identify this buffer for KI */
1507 bp
->b_bptype
= B_vfs_pageout
| B_pagebf
;
1510 bp
->b_flags
= B_CALL
| B_BUSY
| B_PAGEOUT
; /* steal pages */
1512 bp
->b_flags
= B_CALL
| B_BUSY
; /* keep pages */
1515 * If we are vhand paging over NFS, we will wait for the I/O
1518 if (vhand
&& filevp
->v_fstype
== VNFS
) {
1519 bp
->b_flags
&= ~B_CALL
;
1521 bp
->b_iodone
= (int (*)())pageiodone
;
1525 * Make sure we do not write past the end of the file.
1527 nbytes
= ptob(npages
);
1528 start
= vnodindx(VM_REGION(&vm_info
), vm_info
.start
);
1529 if (start
+ nbytes
> args
.isize
) {
1532 * The amount we are off better not be bigger than a
1535 if (start
+ nbytes
- args
.isize
>= args
.bsize
) {
1536 osi_Panic("afs_pageout: remainder too large");
1540 * Reset the size of the I/O as necessary. For remote
1541 * files, we set the size to the exact number of bytes to
1542 * the end of the file. For local files, we round this up
1543 * to the nearest DEV_BSIZE chunk since disk I/O must always
1544 * be in multiples of DEV_BSIZE. In this case, we do not
1545 * bother to zero out the data past the "real" end of the
1546 * file, this is done when the data is read (either through
1547 * mmap() or by normal file system access).
1550 nbytes
= args
.isize
- start
;
1552 nbytes
= roundup(args
.isize
- start
, DEV_BSIZE
);
1556 * Now get ready to perform the I/O
1558 if (!vm_protect_pageout(&vm_info
, npages
)) {
1560 vm_undo_invalidation(&vm_info
, vm_info
.start
, vm_info
.end
);
1561 vm_finish_io_failed(&vm_info
, npages
);
1566 * If this is an NFS write by vhand(), we will not be calling
1567 * pageiodone(). asyncpageio() increments parolemem for us
1568 * if bp->b_iodone is pageiodone, so we must do it manually
1569 * if pageiodone() will not be called automatically.
1571 if (!(bp
->b_flags
& B_CALL
) && steal
) {
1574 SPINLOCK_USAV(pfdat_lock
, context
);
1575 parolemem
+= btorp(nbytes
);
1576 SPINUNLOCK_USAV(pfdat_lock
, context
);
1578 blkflush(devvp
, VM_START_PAGEOUT_BLK(&vm_info
), (long)nbytes
,
1579 (BX_NOBUFWAIT
| BX_PURGE
), VM_REGION(&vm_info
));
1582 * If vhand is the one paging things out, and this is an NFS
1583 * file, we need to temporarily become a different user so
1584 * that we are not trying to page over NFS as root. We use
1585 * the user credentials associated with the writable file
1586 * pointer that is in the psuedo-vas for this MMF.
1588 * NOTE: we are currently using "va_rss" to store the ucred
1589 * value in the vas (this should be fixed in 10.0).
1591 old_cred
= kt_cred(u
.u_kthreadp
);
1593 #if defined(AFS_HPUX1123_ENV)
1595 * DEE - 1123 does not have the vas.h, and it looks
1596 * we should never be called with a NFS type file anyway.
1597 * so where did this come from? Was it copied from NFS?
1598 * I assume it was, so we will add an assert for now
1599 * and see if the code runs at all.
1601 VASSERT(filevp
->v_fstype
!= VNFS
);
1603 set_kt_cred(u
.u_kthreadp
, filevp
->v_vas
->va_cred
);
1606 * If root was the one who opened the mmf for write,
1607 * va_cred will be NULL. So reset kt_cred(u.u_kthreadp) to what it
1608 * was. We will page out as root, but that is the
1609 * correct thing to do in this case anyway.
1611 if (kt_cred(u
.u_kthreadp
) == NULL
)
1612 set_kt_cred(u
.u_kthreadp
, old_cred
);
1617 * Really do the I/O.
1620 asyncpageio(bp
, VM_START_PAGEOUT_BLK(&vm_info
),
1621 VM_MAPPED_SPACE(&vm_info
), VM_MAPPED_ADDR(&vm_info
),
1622 (int)nbytes
, B_WRITE
, devvp
);
1624 VASSERT(error
== 0);
1628 * If we are vhand paging over NFS we want to wait for the
1629 * I/O to complete and take the appropriate actions if an
1630 * error is encountered.
1633 if (waitforpageio(bp
) && nfs_mi_harddown(filevp
)) {
1635 * The server is down, ignore this failure, and
1636 * try again later. (rfscall() has set our retry
1639 fsdata
.remote_down
= 1;
1640 pageiocleanup(bp
, 0);
1643 * vm_vfdcheck() has cleared the valid bit on the
1644 * vfds for these pages. We must go back and set the
1645 * valid bit, as the pages are really not gone.
1647 * NOTE: we can do this because we still hold (and have
1648 * not released) the region lock.
1651 vm_undo_invalidation(&vm_info
, vm_info
.start
,
1655 * The I/O succeeded, or we had an error that we do
1656 * not want to defer until later. Call pageidone()
1665 * And restore our credentials to what they were.
1667 set_kt_cred(u
.u_kthreadp
, old_cred
);
1670 * If we reserved memory in vfs_vfdcheck(), (only for NFS) we
1671 * can now unreserve it.
1673 if (vm_info
.vm_flags
& PAGEOUT_RESERVED
) {
1674 vm_info
.vm_flags
&= ~PAGEOUT_RESERVED
;
1675 vm_release_malloc_memory();
1682 if (flags
& PF_DEACT
) {
1683 #if defined(AFS_HPUX110_ENV)
1684 getppdp()->cnt
.v_pswpout
+= npages
;
1686 mpproc_info
[getprocindex()].cnt
.v_pswpout
+= npages
;
1688 /* sar_bswapout += ptod(npages);*/
1690 #if defined(AFS_HPUX110_ENV)
1691 getppdp()->cnt
.v_pgout
++;
1692 getppdp()->cnt
.v_pgpgout
+= npages
;
1694 mpproc_info
[getprocindex()].cnt
.v_pgout
++;
1695 mpproc_info
[getprocindex()].cnt
.v_pgpgout
+= npages
;
1701 * If time and patience have delivered enough
1702 * pages, then quit now while we are ahead.
1704 if (VM_STOP_PAGING(&vm_info
))
1707 i
= VM_END_PAGEOUT_INDX(&vm_info
) - VM_BASE_OFFSET(&vm_info
) + 1;
1710 vm_finish_pageout(&vm_info
); /* update vhand's stealscan */
1715 * If we wanted to wait for the I/O to complete, sleep on piocnt.
1716 * We must decrement it by one first, and then make sure that it
1717 * is non-zero before going to sleep.
1719 vm_wait_for_io(&vm_info
);
1721 if (inode_changed
&& !file_is_remote
) {
1722 imark(ip
, IUPD
| ICHG
);
1729 afs_mapdbd(filevp
, offset
, bn
, flags
, hole
, startidx
, endidx
)
1730 struct vnode
*filevp
;
1732 kern_daddr_t
*bn
; /* Block number. */
1733 int flags
; /* B_READ or B_WRITE */
1734 int *hole
; /* To be used for read-ahead. */
1735 pgcnt_t
*startidx
; /* To be used for read-ahead. */
1736 pgcnt_t
*endidx
; /* To be used for read-ahead. */
1738 kern_daddr_t lbn
, local_bn
;
1741 long bsize
= vtoblksz(filevp
) & ~(DEV_BSIZE
- 1);
1744 *startidx
= (pgcnt_t
) (offset
/ NBPG
);
1746 *endidx
= (pgcnt_t
) (offset
/ NBPG
);
1748 *hole
= 0; /* Can't have holes. */
1750 osi_Panic("afs_mapdbd: zero size");
1752 lbn
= (kern_daddr_t
) (offset
/ bsize
);
1753 on
= offset
% bsize
;
1755 err
= VOP_BMAP(filevp
, lbn
, NULL
, &local_bn
, flags
);
1759 * We can never get a bn less than zero on remote files.
1761 VASSERT(local_bn
>= 0);
1763 local_bn
= local_bn
+ btodb(on
);
1771 * 1: The blocks are contiguous.
1772 * 0: The blocks are not contiguous.
1775 afs_vm_fscontiguous(vp
, args
, cur_data
)
1780 if (cur_data
== (VM_END_PAGEOUT_BLK(args
) + btodb(NBPG
))) {
1789 * 1: Stop, this page is the last in the block.
1791 * Terminate requests at filesystem block boundaries
1793 afs_vm_stopio(vp
, args
)
1797 fsdata_t
*fsdata
= (fsdata_t
*) args
->fs_data
;
1799 #if defined(AFS_HPUX1123_ENV)
1801 tmpdb
= VM_END_PAGEOUT_BLK(args
);
1803 if ((dbtob(tmpdb
) + NBPG
) % (fsdata
->bsize
) == 0)
1805 if ((dbtob(VM_END_PAGEOUT_BLK(args
)) + NBPG
) % (fsdata
->bsize
) == 0)
1806 #endif /* AFS_HPUX1123_ENV */
1815 * afs_vm_checkpage is called by the VM while collecting a run of
1816 * pages on a pageout. afs_vm_checkpage() is called for each page
1817 * VM wants to write to disk.
1819 afs_vm_checkpage(vp
, args
, pgindx
, cur_data
)
1825 fsdata_t
*fsdata
= (fsdata_t
*) args
->fs_data
;
1827 if (fsdata
->remote_down
) { /* never happens for AFS */
1829 * The remote system is down.
1831 VASSERT(args
->run
== 0);
1835 * A dirty page. If we have not yet determined the file size and
1836 * other attributes that we need to write out pages (the block
1837 * size and ok_dbd_limit), get that information now.
1839 if (fsdata
->bsize
== 0) {
1843 struct vnode
*filevp
;
1845 * Get the various attributes about the file. Store them
1846 * in args for the next time around.
1850 bsize
= vtoblksz(filevp
);
1851 args
->maxpgs
= (pgcnt_t
) btop(bsize
);
1853 if (VOP_GETATTR(filevp
, &va
, kt_cred(u
.u_kthreadp
), VIFSYNC
) != 0) {
1855 * The VOP_GETATTR() failed.
1856 * we are vhand, and this is a hard mount, we will
1857 * skip dirty pages for a while and try again later.
1859 if (args
->vm_flags
& PAGEOUT_VHAND
) {
1860 VASSERT(args
->run
== 0);
1864 * This is a "soft" mount, or some other error was
1865 * returned from the server. Mark this region
1866 * as a zombie, and free this dirty page.
1868 VM_ZOMBIE_OBJECT(args
);
1871 * The caller will see r_zomb and remove the page
1877 fsdata
->isize
= isize
;
1878 fsdata
->bsize
= bsize
;
1882 * See if the file has shrunk (this could have happened
1883 * asynchronously because of NFS or DUX). If so, invalidate
1884 * all of the pages past the end of the file. This is only
1885 * needed for remote files, as local files are truncated
1889 if (vnodindx(VM_REGION(args
), pgindx
) > fsdata
->isize
) {
1891 * This page is past the end of the file. Unlock this page
1892 * (region_trunc will throw it away) and then call region_trunc()
1893 * to invalidate all pages past the new end of the file.
1895 VM_SETFS_FLAGS(args
, PAGEOUT_TRUNCATE
);
1899 if ((args
->vm_flags
& PAGEOUT_VHAND
)
1900 && (!(args
->vm_flags
& PAGEOUT_RESERVED
))
1901 && (!(VM_IS_ZOMBIE(args
)))) {
1902 VASSERT(args
->run
== 0);
1903 if (vm_reserve_malloc_memory(NFS_PAGEOUT_MEM
)) {
1905 * Got enough memory to pageout. Mark the fact that we did
1906 * a sysprocmemreserve(), so that we can sysprocmemunreserve() it
1907 * later (in remote_pageout()).
1909 args
->vm_flags
|= PAGEOUT_RESERVED
;
1912 * We do not have enough memory to do this pageout. By
1913 * definition, we do not yet have a run, so we just unlock
1914 * this page and tell foreach_valid() to continue scanning.
1915 * If we come across another dirty page, we will try to
1916 * reserve memory again. That is okay, in fact some memory
1917 * may have freed up (as earlier pageouts complete under
1934 fs_bsize
= vtoblksz(bp
->b_vp
);
1936 * Check to see if we are starting mid block. If so, then
1937 * we must return the remainder of the block or less depending
1940 bnrem
= bp
->b_offset
% fs_bsize
;
1942 max_size
= fs_bsize
- bnrem
;
1944 max_size
= fs_bsize
;
1947 if (bp
->b_bcount
> max_size
) {
1950 return (bp
->b_bcount
);
1954 afs_mmap(vp
, off
, size_bytes
, access
)
1957 #if defined(AFS_HPUX1111_ENV)
1964 long bsize
= vtoblksz(vp
);
1966 if (bsize
% NBPG
!= 0) {
1973 afs_cachelimit(vp
, len
, location
)
1979 * Disk addresses are logical, not physical, so fragments are
1982 *location
= btorp(len
) + 1;
1992 afs_unmap(vp
, off
, size_bytes
, access
)
1995 #if defined(AFS_HPUX1111_ENV)
2006 afs_read_ahead(vp
, prp
, wrt
, space
, vaddr
, rhead_cnt
)
2014 printf("afs_read_ahead returning 0 \n");
2019 afs_prealloc(vp
, size
, ignore_minfree
, reserved
)
2021 /* DEE on 11.22 following is off_t */
2026 printf("afs_prealloc returning ENOSPC\n");
2031 afs_ioctl(vp
, com
, data
, flag
, cred
)
2039 struct afs_ioctl afsioctl
, *ai
;
2041 AFS_STATCNT(afs_ioctl
);
2043 /* The call must be a VICEIOCTL call */
2044 if (((com
>> 8) & 0xff) == 'V') {
2046 /* AFS_COPYIN returns error 14. Copy data in instead */
2047 AFS_COPYIN(data
, (caddr_t
) & afsioctl
, sizeof(afsioctl
), error
);
2051 ai
= (struct afs_ioctl
*)data
;
2052 afsioctl
.in
= ai
->in
;
2053 afsioctl
.out
= ai
->out
;
2054 afsioctl
.in_size
= ai
->in_size
;
2055 afsioctl
.out_size
= ai
->out_size
;
2056 error
= HandleIoctl(VTOAFS(vp
), com
, &afsioctl
);
2062 #if defined(AFS_HPUX1111_ENV)
2063 /* looks like even if appl is 32 bit, we need to round to 8 bytes */
2064 /* This had no effect, it must not be being used */
2066 #define roundtoint(x) (((x) + (sizeof(long) - 1)) & ~(sizeof(long) - 1))
2067 #define reclen(dp) roundtoint(((dp)->d_namlen + 1 + (sizeof(u_long)) +\
2068 sizeof(u_int) + 2 * sizeof(u_short)))
2071 #define roundtoint(x) (((x) + (sizeof(int) - 1)) & ~(sizeof(int) - 1))
2072 #define reclen(dp) roundtoint(((dp)->d_namlen + 1 + (sizeof(u_long)) +\
2073 2 * sizeof(u_short)))
2077 afs_readdir(vp
, uiop
, cred
)
2084 caddr_t ibuf
, obuf
, ibufend
, obufend
;
2085 struct __dirent32
*idp
;
2087 int count
, outcount
;
2089 uint64_t tmp_offset
;
2091 memset(&auio
, 0, sizeof(auio
));
2092 memset(&aiov
, 0, sizeof(aiov
));
2094 count
= uiop
->uio_resid
;
2095 /* Allocate temporary space for format conversion */
2096 ibuf
= kmem_alloc(2 * count
); /* overkill - fix later */
2097 obuf
= kmem_alloc(count
+ sizeof(struct dirent
));
2098 aiov
.iov_base
= ibuf
;
2099 aiov
.iov_len
= count
;
2100 auio
.uio_iov
= &aiov
;
2101 auio
.uio_iovcnt
= 1;
2102 offset
= auio
.uio_offset
= uiop
->uio_offset
;
2103 auio
.uio_seg
= UIOSEG_KERNEL
;
2104 auio
.uio_resid
= count
;
2105 auio
.uio_fpflags
= 0;
2107 u
.u_error
= mp_afs_readdir2(vp
, &auio
, cred
);
2111 /* Convert entries from __dirent32 to dirent format */
2113 for (idp
= (struct __dirent32
*)ibuf
, odp
=
2114 (struct dirent
*)obuf
, ibufend
=
2115 ibuf
+ (count
- auio
.uio_resid
), obufend
= obuf
+ count
;
2116 (caddr_t
) idp
< ibufend
;
2117 idp
= (struct __dirent32
*)((caddr_t
) idp
+ idp
->__d_reclen
), odp
=
2118 (struct dirent
*)((caddr_t
) odp
+ odp
->d_reclen
)) {
2119 odp
->d_ino
= idp
->__d_ino
;
2120 odp
->d_namlen
= idp
->__d_namlen
;
2121 (void)strcpy(odp
->d_name
, idp
->__d_name
);
2122 odp
->d_reclen
= reclen(odp
);
2123 if ((caddr_t
) odp
+ odp
->d_reclen
> obufend
)
2125 /* record offset *after* we're sure to use this entry */
2126 memcpy((char *)&tmp_offset
, (char *)&idp
->__d_off
, sizeof tmp_offset
);
2127 offset
= tmp_offset
;
2130 outcount
= (caddr_t
) odp
- obuf
;
2131 AFS_UIOMOVE(obuf
, outcount
, UIO_READ
, uiop
, u
.u_error
);
2134 uiop
->uio_offset
= offset
;
2136 kmem_free(ibuf
, count
);
2137 kmem_free(obuf
, count
+ sizeof(struct dirent
));
2142 #define roundtolong(x) (((x) + (sizeof(long) - 1)) & ~(sizeof(long) - 1))
2143 #define reclen_dirent64(dp) roundtolong(((dp)->__d_namlen + 1 + (2*sizeof(u_long)) +\
2144 2 * sizeof(u_short)))
2147 afs_readdir3(vp
, uiop
, cred
)
2154 caddr_t ibuf
, obuf
, ibufend
, obufend
;
2155 struct __dirent32
*idp
;
2156 struct __dirent64
*odp
;
2157 int count
, outcount
;
2160 memset(&auio
, 0, sizeof(auio
));
2161 memset(&aiov
, 0, sizeof(aiov
));
2163 count
= uiop
->uio_resid
;
2164 /* Allocate temporary space for format conversion */
2165 ibuf
= kmem_alloc(2 * count
); /* overkill - fix later */
2166 obuf
= kmem_alloc(count
+ sizeof(struct __dirent64
));
2167 aiov
.iov_base
= ibuf
;
2168 aiov
.iov_len
= count
;
2169 auio
.uio_iov
= &aiov
;
2170 auio
.uio_iovcnt
= 1;
2171 offset
= auio
.uio_offset
= uiop
->uio_offset
;
2172 auio
.uio_seg
= UIOSEG_KERNEL
;
2173 auio
.uio_resid
= count
;
2174 auio
.uio_fpflags
= 0;
2176 u
.u_error
= mp_afs_readdir2(vp
, &auio
, cred
);
2180 /* Convert entries from __dirent32 to __dirent64 format */
2182 for (idp
= (struct __dirent32
*)ibuf
, odp
=
2183 (struct __dirent64
*)obuf
, ibufend
=
2184 ibuf
+ (count
- auio
.uio_resid
), obufend
= obuf
+ count
;
2185 (caddr_t
) idp
< ibufend
;
2186 idp
= (struct __dirent32
*)((caddr_t
) idp
+ idp
->__d_reclen
), odp
=
2187 (struct __dirent64
*)((caddr_t
) odp
+ odp
->__d_reclen
)) {
2188 memcpy((char *)&odp
->__d_off
, (char *)&idp
->__d_off
,
2189 sizeof odp
->__d_off
);
2190 odp
->__d_ino
= idp
->__d_ino
;
2191 odp
->__d_namlen
= idp
->__d_namlen
;
2192 (void)strcpy(odp
->__d_name
, idp
->__d_name
);
2193 odp
->__d_reclen
= reclen_dirent64(odp
);
2194 if ((caddr_t
) odp
+ odp
->__d_reclen
> obufend
)
2196 /* record offset *after* we're sure to use this entry */
2197 offset
= odp
->__d_off
;
2200 outcount
= (caddr_t
) odp
- obuf
;
2201 AFS_UIOMOVE(obuf
, outcount
, UIO_READ
, uiop
, u
.u_error
);
2204 uiop
->uio_offset
= offset
;
2206 kmem_free(ibuf
, count
);
2207 kmem_free(obuf
, count
+ sizeof(struct __dirent64
));
2211 #define AFS_SV_SEMA_HASH 1
2212 #define AFS_SV_SEMA_HASH_DEBUG 0
2214 #if AFS_SV_SEMA_HASH
2215 /* This portion of the code was originally used to implement
2216 * thread specific storage for the semaphore save area. However,
2217 * there were some spare fields in the proc structure, this is
2218 * now being used for the saving semapores. Hence, this portion of
2219 * the code is no longer used.
2222 /* This portion of the code implements thread specific information.
2223 * The thread id is passed in as the key. The semaphore saved area
2224 * is hashed on this key.
2227 /* why is this hash table required ?
2228 * The AFS code is written in such a way that a GLOCK() is done in
2229 * one function and the GUNLOCK() is done in another function further
2230 * down the call chain. The GLOCK() call has to save the current
2231 * semaphore status before acquiring afs_global_sema. The GUNLOCK
2232 * has to release afs_global_sema and reacquire the sempahore status
2233 * that existed before the corresponding GLOCK. If GLOCK() and
2234 * GUNLOCK() were called in the same function, the GLOCK call could
2235 * have stored the saved sempahore status in a local variable and the
2236 * corresponding GUNLOCK() call could have restored the original
2237 * status from this local variable. But this is not the case with
2238 * AFS code. Hence, we have to implement a thread specific semaphore
2239 * save area. This is implemented as a hash table. The key is the
2243 /* In order for multithreaded processes to work, the sv_sema structures
2244 * must be saved on a per-thread basis, not a per-process basis. There
2245 * is no per-thread storage available to hijack in the OS per-thread
2246 * data structures (e.g. struct user) so we revive this code.
2247 * I removed the upper limit on the memory consumption since we don't
2248 * know how many threads there will be. Now the code first checks the
2249 * freeList. If that fails it then tries garbage collecting. If that
2250 * doesn't free up anything then it allocs what it needs.
2253 #define ELEMENT sv_sema_t
2255 #define Hash(xx) ( (xx) % sizeOfHashTable )
2256 #define hashLockInit(xx) initsema(&xx,1, FILESYS_SEMA_PRI, FILESYS_SEMA_ORDER)
2257 #define hashLock(xx) MP_PSEMA(&xx)
2258 #define hashUnlock(xx) MP_VSEMA(&xx)
2260 typedef struct elem
{
2267 typedef struct bucket
{
2272 static int sizeOfHashTable
;
2273 static Bucket
*hashTable
;
2275 static int currentSize
= 0;
2276 static Element
*freeList
; /* free list */
2279 static sema_t afsHashLock
= { 0 }; /* global lock for hash table */
2281 static void afsHashGarbageCollect();
2284 ** The global lock protects the global data structures,
2285 ** e.g. freeList and currentSize.
2286 ** The bucket lock protects the link list hanging off that bucket.
2287 ** The lock hierarchy : one can obtain the bucket lock while holding
2288 ** the global lock, but not vice versa.
2293 afsHash(int nbuckets
)
2294 { /* allocate the hash table */
2297 #if AFS_SV_SEMA_HASH_DEBUG
2298 printf("afsHash: enter\n");
2301 sizeOfHashTable
= nbuckets
;
2302 currentSize
= nbuckets
* sizeof(Bucket
);
2305 osi_Panic("afs: SEMA Hashtable already created\n");
2307 hashTable
= (Bucket
*) AFS_KALLOC(sizeOfHashTable
* sizeof(Bucket
));
2309 osi_Panic("afs: cannot create SEMA Hashtable\n");
2311 /* initialize the hash table and associated locks */
2312 memset(hashTable
, 0, sizeOfHashTable
* sizeof(Bucket
));
2313 for (i
= 0; i
< sizeOfHashTable
; i
++)
2314 hashLockInit(hashTable
[i
].lock
);
2315 hashLockInit(afsHashLock
);
2317 #if AFS_SV_SEMA_HASH_DEBUG
2318 printf("afsHash: exit\n");
2323 afsHashInsertFind(KEY key
)
2328 #if AFS_SV_SEMA_HASH_DEBUG
2329 printf("afsHashInsertFind: %d\n", key
);
2332 osi_Panic("afs: afsHashInsertFind: no hashTable\n");
2334 index
= Hash(key
); /* get bucket number */
2335 hashLock(hashTable
[index
].lock
); /* lock this bucket */
2336 ptr
= hashTable
[index
].element
;
2338 /* if it is already there */
2340 if (ptr
->key
== key
) {
2341 ptr
->refCnt
++; /* hold it */
2342 hashUnlock(hashTable
[index
].lock
);
2343 #if AFS_SV_SEMA_HASH_DEBUG
2344 printf("afsHashInsertFind: %d FOUND\n", key
);
2346 return &(ptr
->element
);
2352 hashUnlock(hashTable
[index
].lock
);
2354 /* if something exists in the freeList, take it from there */
2356 hashLock(afsHashLock
);
2359 ptr
= freeList
; /* reuse entry */
2360 freeList
= freeList
->next
;
2362 afsHashGarbageCollect(); /* afsHashLock locked */
2364 ptr
= freeList
; /* reuse entry */
2365 freeList
= freeList
->next
;
2367 ptr
= (Element
*) AFS_KALLOC(sizeof(Element
));
2371 currentSize
+= sizeof(Element
); /* update memory used */
2372 hashUnlock(afsHashLock
);
2375 osi_Panic("afs: SEMA Hashtable cannot create new entry\n");
2376 /* create new entry */
2378 memset(&ptr
->element
, 0, sizeof(ptr
->element
));
2379 ptr
->refCnt
= 1; /* this guy */
2381 /* insert new entry in bucket */
2382 hashLock(hashTable
[index
].lock
); /* lock this bucket */
2383 ptr
->next
= hashTable
[index
].element
;
2384 hashTable
[index
].element
= ptr
;
2385 hashUnlock(hashTable
[index
].lock
);
2387 #if AFS_SV_SEMA_HASH_DEBUG
2388 printf("afsHashInsertFind: %d MADE\n", key
);
2391 return &(ptr
->element
);
2395 afsHashFind(KEY key
)
2400 #if AFS_SV_SEMA_HASH_DEBUG
2401 printf("afsHashFind: %d\n", key
);
2404 osi_Panic("afs: afsHashFind: no hashTable\n");
2406 index
= Hash(key
); /* get bucket number */
2407 hashLock(hashTable
[index
].lock
); /* lock this bucket */
2408 ptr
= hashTable
[index
].element
;
2410 /* it should be in the hash table */
2412 if (ptr
->key
== key
) {
2413 if (ptr
->refCnt
<= 0)
2414 osi_Panic("afs: SEMA HashTable entry already released\n");
2415 hashUnlock(hashTable
[index
].lock
);
2416 #if AFS_SV_SEMA_HASH_DEBUG
2417 printf("afsHashFind: %d FOUND\n", key
);
2419 return &(ptr
->element
);
2425 hashUnlock(hashTable
[index
].lock
);
2426 /* it better be in the hash table */
2427 osi_Panic("afs: SEMA HashTable wants non-existent entry \n");
2432 afsHashRelease(KEY key
)
2437 #if AFS_SV_SEMA_HASH_DEBUG
2438 printf("afsHashRelease: %d\n", key
);
2441 osi_Panic("afs: afsHashRelease: no hashTable\n");
2443 index
= Hash(key
); /* get bucket number */
2444 hashLock(hashTable
[index
].lock
); /* lock this bucket */
2445 ptr
= hashTable
[index
].element
;
2447 /* it should be in the hash table */
2449 if (ptr
->key
== key
) {
2450 if (ptr
->refCnt
<= 0)
2451 osi_Panic("afs: SEMA HashTable entry already released\n");
2452 ptr
->refCnt
--; /* release this guy */
2453 hashUnlock(hashTable
[index
].lock
);
2454 #if AFS_SV_SEMA_HASH_DEBUG
2455 printf("afsHashRelease: %d FOUND\n", key
);
2463 hashUnlock(hashTable
[index
].lock
);
2464 /* it better be in the hash table */
2465 osi_Panic("afs: SEMA HashTable deleting non-existent entry \n");
2468 /* this should be called with afsHashLock WRITE locked */
2470 afsHashGarbageCollect()
2477 osi_Panic("afs: afsHashGarbageCollect: no hashTable\n");
2479 for (index
= 0; index
< sizeOfHashTable
; index
++) {
2480 hashLock(hashTable
[index
].lock
);
2481 ptr
= hashTable
[index
].element
; /* pick up bucket */
2483 while (ptr
&& !ptr
->refCnt
) {
2484 /* insert this element into free list */
2487 ptr
->next
= freeList
;
2490 foundFlag
= 1; /* found at least one */
2491 currentSize
-= sizeof(Element
);
2494 hashTable
[index
].element
= ptr
;
2496 /* scan thru the remaining list */
2499 if (ptr
->next
->refCnt
== 0) {
2500 /* collect this element */
2503 ptr
->next
= ptr
->next
->next
;
2504 temp
->next
= freeList
;
2507 currentSize
-= sizeof(Element
);
2513 hashUnlock(hashTable
[index
].lock
);
2517 osi_Panic("afs: SEMA HashTable full\n");
2521 #endif /* AFS_SV_SEMA_HASH */
2529 struct iovec tiovec
[1];
2530 extern caddr_t
hdl_kmap_bp();
2531 struct kthread
*t
= u
.u_kthreadp
;
2533 memset(&tuio
, 0, sizeof(tuio
));
2534 memset(&tiovec
, 0, sizeof(tiovec
));
2536 AFS_STATCNT(afs_hp_strategy
);
2538 * hdl_kmap_bp() saves "b_bcount" and restores it in hdl_remap_bp() after
2539 * the I/O. We must save and restore the count because pageiodone()
2540 * uses b_bcount to determine how many pages to unlock.
2542 * Remap the entire range.
2547 afs_Trace4(afs_iclSetp
, CM_TRACE_HPSTRAT
, ICL_TYPE_POINTER
, bp
->b_vp
,
2548 ICL_TYPE_LONG
, (int)bp
->b_blkno
* DEV_BSIZE
, ICL_TYPE_LONG
,
2549 bp
->b_bcount
, ICL_TYPE_LONG
, 0);
2551 /* Set up the uio structure */
2552 tuio
.afsio_iov
= tiovec
;
2553 tuio
.afsio_iovcnt
= 1;
2554 tuio
.afsio_offset
= DEV_BSIZE
* bp
->b_blkno
;
2555 tuio
.afsio_seg
= AFS_UIOSYS
;
2556 tuio
.afsio_resid
= bp
->b_bcount
;
2557 tuio
.uio_fpflags
= 0;
2558 tiovec
[0].iov_base
= bp
->b_un
.b_addr
;
2559 tiovec
[0].iov_len
= bp
->b_bcount
;
2562 if ((bp
->b_flags
& B_READ
) == B_READ
) {
2563 /* read b_bcount bytes into kernel address b_un.b_addr
2564 * starting at byte DEV_BSIZE * b_blkno. Bzero anything
2565 * we can't read, and finally call iodone(bp). File is
2566 * in bp->b_vp. Credentials are from u area??
2568 code
= afs_rdwr(VTOAFS(bp
->b_vp
), &tuio
, UIO_READ
, 0, kt_cred(t
));
2570 if (tuio
.afsio_resid
> 0) {
2571 privlbzero(bvtospace(bp
, bp
->b_un
.b_addr
),
2572 bp
->b_un
.b_addr
+ bp
->b_bcount
- tuio
.afsio_resid
,
2573 (size_t) tuio
.afsio_resid
);
2577 code
= afs_rdwr(VTOAFS(bp
->b_vp
), &tuio
, UIO_WRITE
, 0, kt_cred(t
));
2579 /* Remap back to the user's space */
2588 afs_pathconf(vp
, name
, resultp
, cred
)
2592 struct ucred
*cred
; /* unused */
2595 case _PC_LINK_MAX
: /* Maximum number of links to a file */
2596 *resultp
= 255; /* an unsigned short on the fileserver */
2597 break; /* a unsigned char in the client.... */
2599 case _PC_NAME_MAX
: /* Max length of file name */
2603 case _PC_PATH_MAX
: /* Maximum length of Path Name */
2607 case _PC_PIPE_BUF
: /* Max atomic write to pipe. See fifo_vnops */
2608 case _PC_CHOWN_RESTRICTED
: /* Anybody can chown? */
2609 case _PC_NO_TRUNC
: /* No file name truncation on overflow? */
2610 u
.u_error
= EOPNOTSUPP
;
2611 return (EOPNOTSUPP
);
2614 case _PC_MAX_CANON
: /* TTY buffer size for canonical input */
2615 /* need more work here for pty, ite buffer size, if differ */
2616 if (vp
->v_type
!= VCHR
) {
2620 *resultp
= CANBSIZ
; /*for tty */
2624 /* need more work here for pty, ite buffer size, if differ */
2625 if (vp
->v_type
!= VCHR
) { /* TTY buffer size */
2629 *resultp
= TTYHOG
; /*for tty */
2633 /* Terminal special characters can be disabled? */
2634 if (vp
->v_type
!= VCHR
) {
2642 if ((vp
->v_type
!= VREG
) && (vp
->v_type
!= VBLK
)) {
2646 *resultp
= 1; /* Synchronized IO supported for this file */
2649 case _PC_FILESIZEBITS
:
2650 if (vp
->v_type
!= VDIR
)
2652 *resultp
= MAX_SMALL_FILE_BITS
;