4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
40 typedef enum nfs_access_type
{
46 typedef struct acache_hash
{
47 struct acache
*next
; /* next and prev must be first */
52 typedef struct acache
{
53 struct acache
*next
; /* next and prev must be first */
60 struct acache_hash
*hashq
;
63 #define NFS_FHANDLE_LEN 72
65 typedef struct nfs_fhandle
{
67 char fh_buf
[NFS_FHANDLE_LEN
];
70 typedef struct rddir_cache
{
71 lloff_t _cookie
; /* cookie used to find this cache entry */
72 lloff_t _ncookie
; /* cookie used to find the next cache entry */
73 char *entries
; /* buffer containing dirent entries */
74 int eof
; /* EOF reached after this request */
75 int entlen
; /* size of dirent entries in buf */
76 int buflen
; /* size of the buffer used to store entries */
77 int flags
; /* control flags, see below */
78 kcondvar_t cv
; /* cv for blocking */
79 int error
; /* error from RPC operation */
81 uint_t count
; /* reference count */
82 avl_node_t tree
; /* AVL tree links */
85 #define nfs_cookie _cookie._p._l
86 #define nfs_ncookie _ncookie._p._l
87 #define nfs3_cookie _cookie._f
88 #define nfs3_ncookie _ncookie._f
90 #define RDDIR 0x1 /* readdir operation in progress */
91 #define RDDIRWAIT 0x2 /* waiting on readdir in progress */
92 #define RDDIRREQ 0x4 /* a new readdir is required */
93 #define RDDIRCACHED 0x8 /* entry is in the cache */
95 #define HAVE_RDDIR_CACHE(rp) (avl_numnodes(&(rp)->r_dir) > 0)
97 typedef struct symlink_cache
{
98 char *contents
; /* contents of the symbolic link */
99 int len
; /* length of the contents */
100 int size
; /* size of the allocated buffer */
103 typedef struct commit
{
104 page_t
*c_pages
; /* list of pages to commit */
105 offset3 c_commbase
; /* base offset to do commit from */
106 count3 c_commlen
; /* len to commit */
107 kcondvar_t c_cv
; /* condvar for waiting for commit */
111 * The various values for the commit states. These are stored in
112 * the p_fsdata byte in the page struct.
113 * NFSv3,4 can use asynchronous writes - the NFS server can send a response
114 * before storing the data to the stable store (disk). The response contains
115 * information if the data are on a disk or not. NFS client marks pages
116 * which are already on the stable store as C_NOCOMMIT. The pages which were
117 * sent but are not yet on the stable store are only partially 'safe' and are
118 * marked as C_DELAYCOMMIT, which can be later changed to C_COMMIT if the
119 * commit operation is in progress. If the NFS server is e.g. rebooted, the
120 * client needs to resend all the uncommitted data. The client walks all the
121 * vp->v_object's list and if C_DELAYCOMMIT or C_COMMIT is set, the page is
122 * marked as dirty and thus will be written to the server again.
124 #define C_NOCOMMIT 0 /* no commit is required */
125 #define C_COMMIT 1 /* a commit is required so do it now */
126 #define C_DELAYCOMMIT 2 /* a commit is required, but can be delayed */
129 * The lock manager holds state making it possible for the client
130 * and server to be out of sync. For example, if the response from
131 * the server granting a lock request is lost, the server will think
132 * the lock is granted and the client will think the lock is lost.
133 * To deal with this, a list of processes for which the client is
134 * not sure if the server holds a lock is attached to the rnode.
135 * When such a process closes the rnode, an unlock request is sent
136 * to the server to unlock the entire file.
138 * The list is kept as a singularly linked NULL terminated list.
139 * Because it is only added to under extreme error conditions, the
140 * list shouldn't get very big. DEBUG kernels print a console warning
141 * when the number of entries on a list go beyond nfs_lmpl_high_water
142 * an arbitrary number defined in nfs_add_locking_id()
145 #define RLMPL_OWNER 2
146 typedef struct lock_manager_pid_list
{
156 struct lock_manager_pid_list
*lmpl_next
;
159 #define lmpl_opid un._pid
160 #define lmpl_own_len un._own.len
161 #define lmpl_owner un._own.owner
164 * A homegrown reader/writer lock implementation. It addresses
165 * two requirements not addressed by the system primitives. They
166 * are that the `enter" operation is optionally interruptible and
167 * that they can be re`enter'ed by writers without deadlock.
169 typedef struct nfs_rwlock
{
179 * The format of the hash bucket used to lookup rnodes from a file handle.
181 typedef struct rhashq
{
182 struct rnode
*r_hashf
;
183 struct rnode
*r_hashb
;
188 * Remote file information structure.
190 * The rnode is the "inode" for remote files. It contains all the
191 * information necessary to handle remote file on the client side.
193 * Note on file sizes: we keep two file sizes in the rnode: the size
194 * according to the client (r_size) and the size according to the server
195 * (r_attr.va_size). They can differ because we modify r_size during a
196 * write system call (nfs_rdwr), before the write request goes over the
197 * wire (before the file is actually modified on the server). If an OTW
198 * request occurs before the cached data is written to the server the file
199 * size returned from the server (r_attr.va_size) may not match r_size.
200 * r_size is the one we use, in general. r_attr.va_size is only used to
201 * determine whether or not our cached data is valid.
203 * Each rnode has 3 locks associated with it (not including the rnode
204 * hash table and free list locks):
206 * r_rwlock: Serializes nfs_write and nfs_setattr requests
207 * and allows nfs_read requests to proceed in parallel.
208 * Serializes reads/updates to directories.
210 * r_lkserlock: Serializes lock requests with map, write, and
211 * readahead operations.
213 * r_statelock: Protects all fields in the rnode except for
214 * those listed below. This lock is intented
215 * to be held for relatively short periods of
216 * time (not accross entire putpage operations,
219 * The following members are protected by the mutex rpfreelist_lock:
223 * The following members are protected by the hash bucket rwlock:
227 * Note: r_modaddr is only accessed when the r_statelock mutex is held.
228 * Its value is also controlled via r_rwlock. It is assumed that
229 * there will be only 1 writer active at a time, so it safe to
230 * set r_modaddr and release r_statelock as long as the r_rwlock
231 * writer lock is held.
233 * r_inmap informs nfsX_read()/write() that there is a call to nfsX_map()
234 * in progress. nfsX_read()/write() check r_inmap to decide whether
235 * to perform directio on the file or not. r_inmap is atomically
236 * incremented in nfsX_map() before the address space routines are
237 * called and atomically decremented just before nfsX_map() exits.
238 * r_inmap is not protected by any lock.
240 * r_mapcnt tells that the rnode has mapped pages. r_inmap can be 0
241 * while the rnode has mapped pages.
243 * 64-bit offsets: the code formerly assumed that atomic reads of
244 * r_size were safe and reliable; on 32-bit architectures, this is
245 * not true since an intervening bus cycle from another processor
246 * could update half of the size field. The r_statelock must now
247 * be held whenever any kind of access of r_size is made.
250 * r_rwlock > r_lkserlock > r_statelock
252 struct exportinfo
; /* defined in nfs/export.h */
253 struct servinfo
; /* defined in nfs/nfs_clnt.h */
254 struct failinfo
; /* defined in nfs/nfs_clnt.h */
255 struct mntinfo
; /* defined in nfs/nfs_clnt.h */
259 typedef struct rnode
{
260 /* the hash fields must be first to match the rhashq_t */
261 struct rnode
*r_hashf
; /* hash queue forward pointer */
262 struct rnode
*r_hashb
; /* hash queue back pointer */
263 struct rnode
*r_freef
; /* free list forward pointer */
264 struct rnode
*r_freeb
; /* free list back pointer */
265 rhashq_t
*r_hashq
; /* pointer to the hash bucket */
266 vnode_t
*r_vnode
; /* vnode for remote file */
267 nfs_rwlock_t r_rwlock
; /* serializes write/setattr requests */
268 nfs_rwlock_t r_lkserlock
; /* serialize lock with other ops */
269 kmutex_t r_statelock
; /* protects (most of) rnode contents */
270 nfs_fhandle r_fh
; /* file handle */
271 struct servinfo
*r_server
; /* current server */
272 char *r_path
; /* path to this rnode */
273 uoff_t r_nextr
; /* next byte read offset (read-ahead) */
274 cred_t
*r_cred
; /* current credentials */
275 cred_t
*r_unlcred
; /* unlinked credentials */
276 char *r_unlname
; /* unlinked file name */
277 vnode_t
*r_unldvp
; /* parent dir of unlinked file */
278 len_t r_size
; /* client's view of file size */
279 struct vattr r_attr
; /* cached vnode attributes */
280 hrtime_t r_attrtime
; /* time attributes become invalid */
281 hrtime_t r_mtime
; /* client time file last modified */
282 long r_mapcnt
; /* count of mmapped pages */
283 uint_t r_count
; /* # of refs not reflect in v_count */
284 uint_t r_awcount
; /* # of outstanding async write */
285 uint_t r_gcount
; /* getattrs waiting to flush pages */
286 ushort_t r_flags
; /* flags, see below */
287 short r_error
; /* async write error */
288 kcondvar_t r_cv
; /* condvar for blocked threads */
289 int (*r_putapage
) /* address of putapage routine */
290 (vnode_t
*, page_t
*, uoff_t
*, size_t *, int, cred_t
*);
291 avl_tree_t r_dir
; /* cache of readdir responses */
292 rddir_cache
*r_direof
; /* pointer to the EOF entry */
293 symlink_cache r_symlink
; /* cached readlink response */
294 writeverf3 r_verf
; /* version 3 write verifier */
295 uoff_t r_modaddr
; /* address for page in writerp */
296 commit_t r_commit
; /* commit information */
297 uoff_t r_truncaddr
; /* base for truncate operation */
298 vsecattr_t
*r_secattr
; /* cached security attributes (acls) */
299 cookieverf3 r_cookieverf
; /* version 3 readdir cookie verifier */
300 lmpl_t
*r_lmpl
; /* pids that may be holding locks */
301 nfs3_pathconf_info
*r_pathconf
; /* cached pathconf information */
302 acache_t
*r_acache
; /* list of access cache entries */
303 kthread_t
*r_serial
; /* id of purging thread */
304 list_t r_indelmap
; /* list of delmap callers */
305 uint_t r_inmap
; /* to serialize read/write and mmap */
306 list_node_t r_mi_link
; /* linkage into list of rnodes for */
314 #define RREADDIRPLUS 0x1 /* issue a READDIRPLUS instead of READDIR */
315 #define RDIRTY 0x2 /* dirty pages from write operation */
316 #define RSTALE 0x4 /* file handle is stale */
317 #define RMODINPROGRESS 0x8 /* page modification happening */
318 #define RTRUNCATE 0x10 /* truncating, don't commit */
319 #define RHAVEVERF 0x20 /* have a write verifier to compare against */
320 #define RCOMMIT 0x40 /* commit in progress */
321 #define RCOMMITWAIT 0x80 /* someone is waiting to do a commit */
322 #define RHASHED 0x100 /* rnode is in hash queues */
323 #define ROUTOFSPACE 0x200 /* an out of space error has happened */
324 #define RDIRECTIO 0x400 /* bypass the buffer cache */
325 #define RLOOKUP 0x800 /* a lookup has been performed */
326 #define RWRITEATTR 0x1000 /* attributes came from WRITE */
327 #define RINDNLCPURGE 0x2000 /* in the process of purging DNLC references */
328 #define RDELMAPLIST 0x4000 /* delmap callers tracking for as callback */
329 #define RINCACHEPURGE 0x8000 /* purging caches due to file size change */
332 * Convert between vnode and rnode
334 #define RTOV(rp) ((rp)->r_vnode)
335 #define VTOR(vp) ((rnode_t *)((vp)->v_data))
337 #define VTOFH(vp) (RTOFH(VTOR(vp)))
338 #define RTOFH(rp) ((fhandle_t *)(&(rp)->r_fh.fh_buf))
339 #define VTOFH3(vp) (RTOFH3(VTOR(vp)))
340 #define RTOFH3(rp) ((nfs_fh3 *)(&(rp)->r_fh))
343 extern int nfs_async_readahead(vnode_t
*, uoff_t
, caddr_t
,
344 struct seg
*, cred_t
*,
345 void (*)(vnode_t
*, uoff_t
,
346 caddr_t
, struct seg
*, cred_t
*));
347 extern int nfs_async_putapage(vnode_t
*, page_t
*, uoff_t
, size_t,
348 int, cred_t
*, int (*)(vnode_t
*, page_t
*,
349 uoff_t
, size_t, int, cred_t
*));
350 extern int nfs_async_pageio(vnode_t
*, page_t
*, uoff_t
, size_t,
351 int, cred_t
*, int (*)(vnode_t
*, page_t
*,
352 uoff_t
, size_t, int, cred_t
*));
353 extern void nfs_async_readdir(vnode_t
*, rddir_cache
*,
354 cred_t
*, int (*)(vnode_t
*,
355 rddir_cache
*, cred_t
*));
356 extern void nfs_async_commit(vnode_t
*, page_t
*, offset3
, count3
,
357 cred_t
*, void (*)(vnode_t
*, page_t
*,
358 offset3
, count3
, cred_t
*));
359 extern void nfs_async_inactive(vnode_t
*, cred_t
*, void (*)(vnode_t
*,
360 cred_t
*, caller_context_t
*));
361 extern int writerp(rnode_t
*, caddr_t
, int, struct uio
*, int);
362 extern int nfs_putpages(vnode_t
*, uoff_t
, size_t, int, cred_t
*);
363 extern void nfs_invalidate_pages(vnode_t
*, uoff_t
, cred_t
*);
364 extern int rfs2call(struct mntinfo
*, rpcproc_t
, xdrproc_t
, caddr_t
,
365 xdrproc_t
, caddr_t
, cred_t
*, int *, enum nfsstat
*,
366 int, struct failinfo
*);
367 extern int rfs3call(struct mntinfo
*, rpcproc_t
, xdrproc_t
, caddr_t
,
368 xdrproc_t
, caddr_t
, cred_t
*, int *, nfsstat3
*,
369 int, struct failinfo
*);
370 extern void nfs_setswaplike(vnode_t
*, vattr_t
*);
371 extern vnode_t
*makenfsnode(fhandle_t
*, struct nfsfattr
*, struct vfs
*,
372 hrtime_t
, cred_t
*, char *, char *);
373 extern vnode_t
*makenfs3node_va(nfs_fh3
*, vattr_t
*, struct vfs
*, hrtime_t
,
374 cred_t
*, char *, char *);
375 extern vnode_t
*makenfs3node(nfs_fh3
*, fattr3
*, struct vfs
*, hrtime_t
,
376 cred_t
*, char *, char *);
377 extern void rp_addfree(rnode_t
*, cred_t
*);
378 extern void rp_rmhash(rnode_t
*);
379 extern int check_rtable(struct vfs
*);
380 extern void destroy_rtable(struct vfs
*, cred_t
*);
381 extern void rflush(struct vfs
*, cred_t
*);
382 extern nfs_access_type_t
nfs_access_check(rnode_t
*, uint32_t, cred_t
*);
383 extern void nfs_access_cache(rnode_t
*rp
, uint32_t, uint32_t, cred_t
*);
384 extern int nfs_access_purge_rp(rnode_t
*);
385 extern int nfs_putapage(vnode_t
*, page_t
*, uoff_t
*, size_t *,
387 extern int nfs3_putapage(vnode_t
*, page_t
*, uoff_t
*, size_t *,
389 extern void nfs_printfhandle(nfs_fhandle
*);
390 extern void nfs_write_error(vnode_t
*, int, cred_t
*);
391 extern rddir_cache
*rddir_cache_alloc(int);
392 extern void rddir_cache_hold(rddir_cache
*);
393 extern void rddir_cache_rele(rddir_cache
*);
395 extern char *rddir_cache_buf_alloc(size_t, int);
396 extern void rddir_cache_buf_free(void *, size_t);
398 extern int nfs_rw_enter_sig(nfs_rwlock_t
*, krw_t
, int);
399 extern int nfs_rw_tryenter(nfs_rwlock_t
*, krw_t
);
400 extern void nfs_rw_exit(nfs_rwlock_t
*);
401 extern int nfs_rw_lock_held(nfs_rwlock_t
*, krw_t
);
402 extern void nfs_rw_init(nfs_rwlock_t
*, char *, krw_type_t
, void *);
403 extern void nfs_rw_destroy(nfs_rwlock_t
*);
404 extern int nfs_directio(vnode_t
*, int, cred_t
*);
405 extern int nfs3_rddir_compar(const void *, const void *);
406 extern int nfs_rddir_compar(const void *, const void *);
407 extern struct zone
*nfs_zone(void);
408 extern zoneid_t
nfs_zoneid(void);
416 #endif /* _NFS_RNODE_H */