4 * Copyright (c) 1997-2009 Erez Zadok
5 * Copyright (c) 1990 Jan-Simon Pendry
6 * Copyright (c) 1990 Imperial College of Science, Technology & Medicine
7 * Copyright (c) 1990 The Regents of the University of California.
10 * This code is derived from software contributed to Berkeley by
11 * Jan-Simon Pendry at Imperial College, London.
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. All advertising materials mentioning features or use of this software
22 * must display the following acknowledgment:
23 * This product includes software developed by the University of
24 * California, Berkeley and its contributors.
25 * 4. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
42 * File: am-utils/amd/nfs_subr.c
48 #endif /* HAVE_CONFIG_H */
53 * Convert from UN*X to NFS error code.
54 * Some systems like linux define their own (see
55 * conf/mount/mount_linux.h).
58 # define nfs_error(e) ((nfsstat)(e))
59 #endif /* nfs_error */
62 * File Handle structure
64 * This is interpreted by indexing the exported array
65 * by fhh_id (for old-style filehandles), or by retrieving
66 * the node name from fhh_path (for new-style filehandles).
68 * The whole structure is mapped onto a standard fhandle_t
72 u_int fhh_gen
; /* generation number */
75 int fhh_type
; /* old or new am_fh */
76 pid_t fhh_pid
; /* process id */
77 int fhh_id
; /* map id */
79 char fhh_path
[NFS_FHSIZE
-sizeof(u_int
)]; /* path to am_node */
84 /* forward declarations */
85 /* converting am-filehandles to mount-points */
86 static am_node
*fh_to_mp3(am_nfs_fh
*fhp
, int *rp
, int vop
);
87 static am_node
*fh_to_mp(am_nfs_fh
*fhp
);
88 static void count_map_entries(const am_node
*mp
, u_int
*out_blocks
, u_int
*out_bfree
, u_int
*out_bavail
);
92 do_readlink(am_node
*mp
, int *error_return
)
97 * If there is a readlink method then use it,
98 * otherwise if a link exists use that,
99 * otherwise use the mount point.
101 if (mp
->am_mnt
->mf_ops
->readlink
) {
103 mp
= (*mp
->am_mnt
->mf_ops
->readlink
) (mp
, &retry
);
105 *error_return
= retry
;
108 /* reschedule_timeout_mp(); */
114 ln
= mp
->am_mnt
->mf_mount
;
122 nfsproc_null_2_svc(voidp argp
, struct svc_req
*rqstp
)
131 nfsproc_getattr_2_svc(am_nfs_fh
*argp
, struct svc_req
*rqstp
)
133 static nfsattrstat res
;
136 time_t now
= clocktime(NULL
);
138 if (amuDebug(D_TRACE
))
139 plog(XLOG_DEBUG
, "getattr:");
141 mp
= fh_to_mp3(argp
, &retry
, VLOOK_CREATE
);
143 if (amuDebug(D_TRACE
))
144 plog(XLOG_DEBUG
, "\tretry=%d", retry
);
150 res
.ns_status
= nfs_error(retry
);
155 if (amuDebug(D_TRACE
))
156 plog(XLOG_DEBUG
, "\tstat(%s), size = %d, mtime=%ld.%ld",
158 (int) res
.ns_u
.ns_attr_u
.na_size
,
159 (long) res
.ns_u
.ns_attr_u
.na_mtime
.nt_seconds
,
160 (long) res
.ns_u
.ns_attr_u
.na_mtime
.nt_useconds
);
162 /* Delay unmount of what was looked up */
163 if (mp
->am_timeo_w
< 4 * gopt
.am_timeo_w
)
164 mp
->am_timeo_w
+= gopt
.am_timeo_w
;
165 mp
->am_ttl
= now
+ mp
->am_timeo_w
;
167 mp
->am_stats
.s_getattr
++;
173 nfsproc_setattr_2_svc(nfssattrargs
*argp
, struct svc_req
*rqstp
)
175 static nfsattrstat res
;
177 if (!fh_to_mp(&argp
->sag_fhandle
))
178 res
.ns_status
= nfs_error(ESTALE
);
180 res
.ns_status
= nfs_error(EROFS
);
187 nfsproc_root_2_svc(voidp argp
, struct svc_req
*rqstp
)
196 nfsproc_lookup_2_svc(nfsdiropargs
*argp
, struct svc_req
*rqstp
)
198 static nfsdiropres res
;
204 if (amuDebug(D_TRACE
))
205 plog(XLOG_DEBUG
, "lookup:");
207 /* finally, find the effective uid/gid from RPC request */
208 if (getcreds(rqstp
, &uid
, &gid
, nfsxprt
) < 0)
209 plog(XLOG_ERROR
, "cannot get uid/gid from RPC credentials");
210 xsnprintf(opt_uid
, sizeof(uid_str
), "%d", (int) uid
);
211 xsnprintf(opt_gid
, sizeof(gid_str
), "%d", (int) gid
);
213 mp
= fh_to_mp3(&argp
->da_fhandle
, &retry
, VLOOK_CREATE
);
219 res
.dr_status
= nfs_error(retry
);
223 if (amuDebug(D_TRACE
))
224 plog(XLOG_DEBUG
, "\tlookup(%s, %s)", mp
->am_path
, argp
->da_name
);
225 ap
= mp
->am_mnt
->mf_ops
->lookup_child(mp
, argp
->da_name
, &error
, VLOOK_CREATE
);
227 ap
= mp
->am_mnt
->mf_ops
->mount_child(ap
, &error
);
233 res
.dr_status
= nfs_error(error
);
236 * XXX: EXPERIMENTAL! Delay unmount of what was looked up. This
237 * should reduce the chance for race condition between unmounting an
238 * entry synchronously, and re-mounting it asynchronously.
240 if (ap
->am_ttl
< mp
->am_ttl
)
241 ap
->am_ttl
= mp
->am_ttl
;
242 mp_to_fh(ap
, &res
.dr_u
.dr_drok_u
.drok_fhandle
);
243 res
.dr_u
.dr_drok_u
.drok_attributes
= ap
->am_fattr
;
244 res
.dr_status
= NFS_OK
;
246 mp
->am_stats
.s_lookup
++;
247 /* reschedule_timeout_mp(); */
255 nfs_quick_reply(am_node
*mp
, int error
)
257 SVCXPRT
*transp
= mp
->am_transp
;
259 xdrproc_t xdr_result
= (xdrproc_t
) xdr_diropres
;
262 * If there's a transp structure then we can reply to the client's
263 * nfs lookup request.
268 * Construct a valid reply to a lookup request. Same
269 * code as in nfsproc_lookup_2_svc() above.
271 mp_to_fh(mp
, &res
.dr_u
.dr_drok_u
.drok_fhandle
);
272 res
.dr_u
.dr_drok_u
.drok_attributes
= mp
->am_fattr
;
273 res
.dr_status
= NFS_OK
;
276 * Return the error that was passed to us.
278 res
.dr_status
= nfs_error(error
);
283 if (!svc_sendreply(transp
, (XDRPROC_T_TYPE
) xdr_result
, (SVC_IN_ARG_TYPE
) & res
))
284 svcerr_systemerr(transp
);
287 * Free up transp. It's only used for one reply.
289 XFREE(mp
->am_transp
);
290 dlog("Quick reply sent for %s", mp
->am_mnt
->mf_mount
);
296 nfsproc_readlink_2_svc(am_nfs_fh
*argp
, struct svc_req
*rqstp
)
298 static nfsreadlinkres res
;
302 if (amuDebug(D_TRACE
))
303 plog(XLOG_DEBUG
, "readlink:");
305 mp
= fh_to_mp3(argp
, &retry
, VLOOK_CREATE
);
312 res
.rlr_status
= nfs_error(retry
);
314 char *ln
= do_readlink(mp
, &retry
);
317 res
.rlr_status
= NFS_OK
;
318 if (amuDebug(D_TRACE
) && ln
)
319 plog(XLOG_DEBUG
, "\treadlink(%s) = %s", mp
->am_path
, ln
);
320 res
.rlr_u
.rlr_data_u
= ln
;
321 mp
->am_stats
.s_readlink
++;
329 nfsproc_read_2_svc(nfsreadargs
*argp
, struct svc_req
*rqstp
)
331 static nfsreadres res
;
333 memset((char *) &res
, 0, sizeof(res
));
334 res
.rr_status
= nfs_error(EACCES
);
341 nfsproc_writecache_2_svc(voidp argp
, struct svc_req
*rqstp
)
350 nfsproc_write_2_svc(nfswriteargs
*argp
, struct svc_req
*rqstp
)
352 static nfsattrstat res
;
354 if (!fh_to_mp(&argp
->wra_fhandle
))
355 res
.ns_status
= nfs_error(ESTALE
);
357 res
.ns_status
= nfs_error(EROFS
);
364 nfsproc_create_2_svc(nfscreateargs
*argp
, struct svc_req
*rqstp
)
366 static nfsdiropres res
;
368 if (!fh_to_mp(&argp
->ca_where
.da_fhandle
))
369 res
.dr_status
= nfs_error(ESTALE
);
371 res
.dr_status
= nfs_error(EROFS
);
378 unlink_or_rmdir(nfsdiropargs
*argp
, struct svc_req
*rqstp
, int unlinkp
)
383 am_node
*mp
= fh_to_mp3(&argp
->da_fhandle
, &retry
, VLOOK_DELETE
);
389 res
= nfs_error(retry
);
393 if (mp
->am_fattr
.na_type
!= NFDIR
) {
394 res
= nfs_error(ENOTDIR
);
398 if (amuDebug(D_TRACE
))
399 plog(XLOG_DEBUG
, "\tremove(%s, %s)", mp
->am_path
, argp
->da_name
);
401 mp
= mp
->am_mnt
->mf_ops
->lookup_child(mp
, argp
->da_name
, &retry
, VLOOK_DELETE
);
409 * Usual NFS workaround...
411 else if (retry
== ENOENT
)
413 res
= nfs_error(retry
);
415 forcibly_timeout_mp(mp
);
425 nfsproc_remove_2_svc(nfsdiropargs
*argp
, struct svc_req
*rqstp
)
427 return unlink_or_rmdir(argp
, rqstp
, TRUE
);
432 nfsproc_rename_2_svc(nfsrenameargs
*argp
, struct svc_req
*rqstp
)
436 if (!fh_to_mp(&argp
->rna_from
.da_fhandle
) || !fh_to_mp(&argp
->rna_to
.da_fhandle
))
437 res
= nfs_error(ESTALE
);
439 * If the kernel is doing clever things with referenced files
440 * then let it pretend...
442 else if (NSTREQ(argp
->rna_to
.da_name
, ".nfs", 4))
445 * otherwise a failure
448 res
= nfs_error(EROFS
);
455 nfsproc_link_2_svc(nfslinkargs
*argp
, struct svc_req
*rqstp
)
459 if (!fh_to_mp(&argp
->la_fhandle
) || !fh_to_mp(&argp
->la_to
.da_fhandle
))
460 res
= nfs_error(ESTALE
);
462 res
= nfs_error(EROFS
);
469 nfsproc_symlink_2_svc(nfssymlinkargs
*argp
, struct svc_req
*rqstp
)
473 if (!fh_to_mp(&argp
->sla_from
.da_fhandle
))
474 res
= nfs_error(ESTALE
);
476 res
= nfs_error(EROFS
);
483 nfsproc_mkdir_2_svc(nfscreateargs
*argp
, struct svc_req
*rqstp
)
485 static nfsdiropres res
;
487 if (!fh_to_mp(&argp
->ca_where
.da_fhandle
))
488 res
.dr_status
= nfs_error(ESTALE
);
490 res
.dr_status
= nfs_error(EROFS
);
497 nfsproc_rmdir_2_svc(nfsdiropargs
*argp
, struct svc_req
*rqstp
)
499 return unlink_or_rmdir(argp
, rqstp
, FALSE
);
504 nfsproc_readdir_2_svc(nfsreaddirargs
*argp
, struct svc_req
*rqstp
)
506 static nfsreaddirres res
;
507 static nfsentry e_res
[MAX_READDIR_ENTRIES
];
511 if (amuDebug(D_TRACE
))
512 plog(XLOG_DEBUG
, "readdir:");
514 mp
= fh_to_mp3(&argp
->rda_fhandle
, &retry
, VLOOK_CREATE
);
520 res
.rdr_status
= nfs_error(retry
);
522 if (amuDebug(D_TRACE
))
523 plog(XLOG_DEBUG
, "\treaddir(%s)", mp
->am_path
);
524 res
.rdr_status
= nfs_error((*mp
->am_mnt
->mf_ops
->readdir
)
525 (mp
, argp
->rda_cookie
,
526 &res
.rdr_u
.rdr_reply_u
, e_res
, argp
->rda_count
));
527 mp
->am_stats
.s_readdir
++;
535 nfsproc_statfs_2_svc(am_nfs_fh
*argp
, struct svc_req
*rqstp
)
537 static nfsstatfsres res
;
542 if (amuDebug(D_TRACE
))
543 plog(XLOG_DEBUG
, "statfs:");
545 mp
= fh_to_mp3(argp
, &retry
, VLOOK_CREATE
);
551 res
.sfr_status
= nfs_error(retry
);
554 if (amuDebug(D_TRACE
))
555 plog(XLOG_DEBUG
, "\tstat_fs(%s)", mp
->am_path
);
558 * just return faked up file system information
560 fp
= &res
.sfr_u
.sfr_reply_u
;
562 fp
->sfrok_tsize
= 1024;
563 fp
->sfrok_bsize
= 1024;
565 /* check if map is browsable and show_statfs_entries=yes */
566 if ((gopt
.flags
& CFM_SHOW_STATFS_ENTRIES
) &&
567 mp
->am_mnt
&& mp
->am_mnt
->mf_mopts
) {
568 mnt
.mnt_opts
= mp
->am_mnt
->mf_mopts
;
569 if (amu_hasmntopt(&mnt
, "browsable")) {
570 count_map_entries(mp
,
576 fp
->sfrok_blocks
= 0; /* set to 1 if you don't want empty automounts */
578 fp
->sfrok_bavail
= 0;
581 res
.sfr_status
= NFS_OK
;
582 mp
->am_stats
.s_statfs
++;
590 * count how many total entries there are in a map, and how many
591 * of them are in use.
594 count_map_entries(const am_node
*mp
, u_int
*out_blocks
, u_int
*out_bfree
, u_int
*out_bavail
)
596 u_int blocks
, bfree
, bavail
, i
;
601 blocks
= bfree
= bavail
= 0;
607 mmp
= (mnt_map
*) mf
->mf_private
;
611 /* iterate over keys */
612 for (i
= 0; i
< NKVHASH
; i
++) {
613 for (k
= mmp
->kvhash
[i
]; k
; k
= k
->next
) {
618 * XXX: Need to count how many are actively in use and recompute
619 * bfree and bavail based on it.
625 *out_blocks
= blocks
;
627 *out_bavail
= bavail
;
632 * Convert from file handle to automount node.
635 fh_to_mp3(am_nfs_fh
*fhp
, int *rp
, int vop
)
637 struct am_fh
*fp
= (struct am_fh
*) fhp
;
640 if (fp
->u
.s
.fhh_type
!= 0) {
641 /* New filehandle type */
642 int len
= sizeof(*fhp
) - sizeof(fp
->fhh_gen
);
643 char *path
= xmalloc(len
+1);
645 * Because fhp is treated as a filehandle we use memcpy
646 * instead of xstrlcpy.
648 memcpy(path
, (char *) fp
->u
.fhh_path
, len
);
650 /* dlog("fh_to_mp3: new filehandle: %s", path); */
652 ap
= path_to_exported_ap(path
);
655 /* dlog("fh_to_mp3: old filehandle: %d", fp->u.s.fhh_id); */
657 * Check process id matches
658 * If it doesn't then it is probably
659 * from an old kernel-cached filehandle
660 * which is now out of date.
662 if (fp
->u
.s
.fhh_pid
!= get_server_pid()) {
663 dlog("fh_to_mp3: wrong pid %ld != my pid %ld",
664 (long) fp
->u
.s
.fhh_pid
, get_server_pid());
669 * Get hold of the supposed mount node
671 ap
= get_exported_ap(fp
->u
.s
.fhh_id
);
675 * Check the generation number in the node
676 * matches the one from the kernel. If not
677 * then the old node has been timed out and
678 * a new one allocated.
680 if (ap
!= NULL
&& ap
->am_gen
!= fp
->fhh_gen
)
684 * If it doesn't exists then drop the request
691 * If the node is hung then locate a new node
692 * for it. This implements the replicated filesystem
695 if (ap
->am_mnt
&& FSRV_ISDOWN(ap
->am_mnt
->mf_server
) && ap
->am_parent
) {
697 am_node
*orig_ap
= ap
;
699 dlog("fh_to_mp3: %s (%s) is hung: lookup alternative file server",
700 orig_ap
->am_path
, orig_ap
->am_mnt
->mf_info
);
703 * Update modify time of parent node.
704 * With any luck the kernel will re-stat
705 * the child node and get new information.
707 clocktime(&orig_ap
->am_fattr
.na_mtime
);
710 * Call the parent's lookup routine for an object
711 * with the same name. This may return -1 in error
712 * if a mount is in progress. In any case, if no
713 * mount node is returned the error code is propagated
716 if (vop
== VLOOK_CREATE
) {
717 ap
= orig_ap
->am_parent
->am_mnt
->mf_ops
->lookup_child(orig_ap
->am_parent
, orig_ap
->am_name
, &error
, vop
);
719 ap
= orig_ap
->am_parent
->am_mnt
->mf_ops
->mount_child(ap
, &error
);
725 if (error
< 0 && amd_state
== Finishing
)
732 * Update last access to original node. This
733 * avoids timing it out and so sending ESTALE
734 * back to the kernel.
735 * XXX - Not sure we need this anymore (jsp, 90/10/6).
743 * Disallow references to objects being unmounted, unless
744 * they are automount points.
746 if (ap
->am_mnt
&& (ap
->am_mnt
->mf_flags
& MFF_UNMOUNTING
) &&
747 !(ap
->am_flags
& AMF_ROOT
)) {
748 if (amd_state
== Finishing
)
757 if (!ap
|| !ap
->am_mnt
) {
759 * If we are shutting down then it is likely
760 * that this node has disappeared because of
761 * a fast timeout. To avoid things thrashing
762 * just pretend it doesn't exist at all. If
763 * ESTALE is returned, some NFS clients just
764 * keep retrying (stupid or what - if it's
765 * stale now, what's it going to be in 5 minutes?)
767 if (amd_state
== Finishing
)
780 fh_to_mp(am_nfs_fh
*fhp
)
784 return fh_to_mp3(fhp
, &dummy
, VLOOK_CREATE
);
789 * Convert from automount node to file handle.
792 mp_to_fh(am_node
*mp
, am_nfs_fh
*fhp
)
795 struct am_fh
*fp
= (struct am_fh
*) fhp
;
797 memset((char *) fhp
, 0, sizeof(am_nfs_fh
));
799 /* Store the generation number */
800 fp
->fhh_gen
= mp
->am_gen
;
802 pathlen
= strlen(mp
->am_path
);
803 if (pathlen
<= sizeof(*fhp
) - sizeof(fp
->fhh_gen
)) {
804 /* dlog("mp_to_fh: new filehandle: %s", mp->am_path); */
807 * Because fhp is treated as a filehandle we use memcpy instead of
810 memcpy(fp
->u
.fhh_path
, mp
->am_path
, pathlen
); /* making a filehandle */
813 * Take the process id
815 fp
->u
.s
.fhh_pid
= get_server_pid();
820 fp
->u
.s
.fhh_id
= mp
->am_mapno
;
823 * ... and the generation number (previously stored)
824 * to make a "unique" triple that will never
825 * be reallocated except across reboots (which doesn't matter)
826 * or if we are unlucky enough to be given the same
827 * pid as a previous amd (very unlikely).
829 /* dlog("mp_to_fh: old filehandle: %d", fp->u.s.fhh_id); */