dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / fs / nfs / nfs4_subr.c
blob59526761521ffd9a94601a28f97d1dbcd3d32b22
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
30 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
31 * All Rights Reserved
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/systm.h>
37 #include <sys/cmn_err.h>
38 #include <sys/vtrace.h>
39 #include <sys/session.h>
40 #include <sys/thread.h>
41 #include <sys/dnlc.h>
42 #include <sys/cred.h>
43 #include <sys/priv.h>
44 #include <sys/list.h>
45 #include <sys/sdt.h>
46 #include <sys/policy.h>
48 #include <rpc/types.h>
49 #include <rpc/xdr.h>
51 #include <nfs/nfs.h>
53 #include <nfs/nfs_clnt.h>
55 #include <nfs/nfs4.h>
56 #include <nfs/rnode4.h>
57 #include <nfs/nfs4_clnt.h>
60 * client side statistics
62 static const struct clstat4 clstat4_tmpl = {
63 { "calls", KSTAT_DATA_UINT64 },
64 { "badcalls", KSTAT_DATA_UINT64 },
65 { "referrals", KSTAT_DATA_UINT64 },
66 { "referlinks", KSTAT_DATA_UINT64 },
67 { "clgets", KSTAT_DATA_UINT64 },
68 { "cltoomany", KSTAT_DATA_UINT64 },
69 #ifdef DEBUG
70 { "clalloc", KSTAT_DATA_UINT64 },
71 { "noresponse", KSTAT_DATA_UINT64 },
72 { "failover", KSTAT_DATA_UINT64 },
73 { "remap", KSTAT_DATA_UINT64 },
74 #endif
77 #ifdef DEBUG
78 struct clstat4_debug clstat4_debug = {
79 { "nrnode", KSTAT_DATA_UINT64 },
80 { "access", KSTAT_DATA_UINT64 },
81 { "dirent", KSTAT_DATA_UINT64 },
82 { "dirents", KSTAT_DATA_UINT64 },
83 { "reclaim", KSTAT_DATA_UINT64 },
84 { "clreclaim", KSTAT_DATA_UINT64 },
85 { "f_reclaim", KSTAT_DATA_UINT64 },
86 { "a_reclaim", KSTAT_DATA_UINT64 },
87 { "r_reclaim", KSTAT_DATA_UINT64 },
88 { "r_path", KSTAT_DATA_UINT64 },
90 #endif
93 * We keep a global list of per-zone client data, so we can clean up all zones
94 * if we get low on memory.
96 static list_t nfs4_clnt_list;
97 static kmutex_t nfs4_clnt_list_lock;
98 zone_key_t nfs4clnt_zone_key;
100 static struct kmem_cache *chtab4_cache;
102 #ifdef DEBUG
103 static int nfs4_rfscall_debug;
104 static int nfs4_try_failover_any;
105 int nfs4_utf8_debug = 0;
106 #endif
109 * NFSv4 readdir cache implementation
111 typedef struct rddir4_cache_impl {
112 rddir4_cache rc; /* readdir cache element */
113 kmutex_t lock; /* lock protects count */
114 uint_t count; /* reference count */
115 avl_node_t tree; /* AVL tree link */
116 } rddir4_cache_impl;
118 static int rddir4_cache_compar(const void *, const void *);
119 static void rddir4_cache_free(rddir4_cache_impl *);
120 static rddir4_cache *rddir4_cache_alloc(int);
121 static void rddir4_cache_hold(rddir4_cache *);
122 static int try_failover(enum clnt_stat);
124 static int nfs4_readdir_cache_hits = 0;
125 static int nfs4_readdir_cache_waits = 0;
126 static int nfs4_readdir_cache_misses = 0;
129 * Shared nfs4 functions
133 * Copy an nfs_fh4. The destination storage (to->nfs_fh4_val) must already
134 * be allocated.
137 void
138 nfs_fh4_copy(nfs_fh4 *from, nfs_fh4 *to)
140 to->nfs_fh4_len = from->nfs_fh4_len;
141 bcopy(from->nfs_fh4_val, to->nfs_fh4_val, to->nfs_fh4_len);
145 * nfs4cmpfh - compare 2 filehandles.
146 * Returns 0 if the two nfsv4 filehandles are the same, -1 if the first is
147 * "less" than the second, +1 if the first is "greater" than the second.
151 nfs4cmpfh(const nfs_fh4 *fh4p1, const nfs_fh4 *fh4p2)
153 const char *c1, *c2;
155 if (fh4p1->nfs_fh4_len < fh4p2->nfs_fh4_len)
156 return (-1);
157 if (fh4p1->nfs_fh4_len > fh4p2->nfs_fh4_len)
158 return (1);
159 for (c1 = fh4p1->nfs_fh4_val, c2 = fh4p2->nfs_fh4_val;
160 c1 < fh4p1->nfs_fh4_val + fh4p1->nfs_fh4_len;
161 c1++, c2++) {
162 if (*c1 < *c2)
163 return (-1);
164 if (*c1 > *c2)
165 return (1);
168 return (0);
172 * Compare two v4 filehandles. Return zero if they're the same, non-zero
173 * if they're not. Like nfs4cmpfh(), but different filehandle
174 * representation, and doesn't provide information about greater than or
175 * less than.
179 nfs4cmpfhandle(nfs4_fhandle_t *fh1, nfs4_fhandle_t *fh2)
181 if (fh1->fh_len == fh2->fh_len)
182 return (bcmp(fh1->fh_buf, fh2->fh_buf, fh1->fh_len));
184 return (1);
188 stateid4_cmp(stateid4 *s1, stateid4 *s2)
190 if (bcmp(s1, s2, sizeof (stateid4)) == 0)
191 return (1);
192 else
193 return (0);
196 nfsstat4
197 puterrno4(int error)
199 switch (error) {
200 case 0:
201 return (NFS4_OK);
202 case EPERM:
203 return (NFS4ERR_PERM);
204 case ENOENT:
205 return (NFS4ERR_NOENT);
206 case EINTR:
207 return (NFS4ERR_IO);
208 case EIO:
209 return (NFS4ERR_IO);
210 case ENXIO:
211 return (NFS4ERR_NXIO);
212 case ENOMEM:
213 return (NFS4ERR_RESOURCE);
214 case EACCES:
215 return (NFS4ERR_ACCESS);
216 case EBUSY:
217 return (NFS4ERR_IO);
218 case EEXIST:
219 return (NFS4ERR_EXIST);
220 case EXDEV:
221 return (NFS4ERR_XDEV);
222 case ENODEV:
223 return (NFS4ERR_IO);
224 case ENOTDIR:
225 return (NFS4ERR_NOTDIR);
226 case EISDIR:
227 return (NFS4ERR_ISDIR);
228 case EINVAL:
229 return (NFS4ERR_INVAL);
230 case EMFILE:
231 return (NFS4ERR_RESOURCE);
232 case EFBIG:
233 return (NFS4ERR_FBIG);
234 case ENOSPC:
235 return (NFS4ERR_NOSPC);
236 case EROFS:
237 return (NFS4ERR_ROFS);
238 case EMLINK:
239 return (NFS4ERR_MLINK);
240 case EDEADLK:
241 return (NFS4ERR_DEADLOCK);
242 case ENOLCK:
243 return (NFS4ERR_DENIED);
244 case EREMOTE:
245 return (NFS4ERR_SERVERFAULT);
246 case ENOTSUP:
247 return (NFS4ERR_NOTSUPP);
248 case EDQUOT:
249 return (NFS4ERR_DQUOT);
250 case ENAMETOOLONG:
251 return (NFS4ERR_NAMETOOLONG);
252 case EOVERFLOW:
253 return (NFS4ERR_INVAL);
254 case ENOSYS:
255 return (NFS4ERR_NOTSUPP);
256 case ENOTEMPTY:
257 return (NFS4ERR_NOTEMPTY);
258 case EOPNOTSUPP:
259 return (NFS4ERR_NOTSUPP);
260 case ESTALE:
261 return (NFS4ERR_STALE);
262 case EAGAIN:
263 if (curthread->t_flag & T_WOULDBLOCK) {
264 curthread->t_flag &= ~T_WOULDBLOCK;
265 return (NFS4ERR_DELAY);
267 return (NFS4ERR_LOCKED);
268 default:
269 return ((enum nfsstat4)error);
274 geterrno4(enum nfsstat4 status)
276 switch (status) {
277 case NFS4_OK:
278 return (0);
279 case NFS4ERR_PERM:
280 return (EPERM);
281 case NFS4ERR_NOENT:
282 return (ENOENT);
283 case NFS4ERR_IO:
284 return (EIO);
285 case NFS4ERR_NXIO:
286 return (ENXIO);
287 case NFS4ERR_ACCESS:
288 return (EACCES);
289 case NFS4ERR_EXIST:
290 return (EEXIST);
291 case NFS4ERR_XDEV:
292 return (EXDEV);
293 case NFS4ERR_NOTDIR:
294 return (ENOTDIR);
295 case NFS4ERR_ISDIR:
296 return (EISDIR);
297 case NFS4ERR_INVAL:
298 return (EINVAL);
299 case NFS4ERR_FBIG:
300 return (EFBIG);
301 case NFS4ERR_NOSPC:
302 return (ENOSPC);
303 case NFS4ERR_ROFS:
304 return (EROFS);
305 case NFS4ERR_MLINK:
306 return (EMLINK);
307 case NFS4ERR_NAMETOOLONG:
308 return (ENAMETOOLONG);
309 case NFS4ERR_NOTEMPTY:
310 return (ENOTEMPTY);
311 case NFS4ERR_DQUOT:
312 return (EDQUOT);
313 case NFS4ERR_STALE:
314 return (ESTALE);
315 case NFS4ERR_BADHANDLE:
316 return (ESTALE);
317 case NFS4ERR_BAD_COOKIE:
318 return (EINVAL);
319 case NFS4ERR_NOTSUPP:
320 return (EOPNOTSUPP);
321 case NFS4ERR_TOOSMALL:
322 return (EINVAL);
323 case NFS4ERR_SERVERFAULT:
324 return (EIO);
325 case NFS4ERR_BADTYPE:
326 return (EINVAL);
327 case NFS4ERR_DELAY:
328 return (ENXIO);
329 case NFS4ERR_SAME:
330 return (EPROTO);
331 case NFS4ERR_DENIED:
332 return (ENOLCK);
333 case NFS4ERR_EXPIRED:
334 return (EPROTO);
335 case NFS4ERR_LOCKED:
336 return (EACCES);
337 case NFS4ERR_GRACE:
338 return (EAGAIN);
339 case NFS4ERR_FHEXPIRED: /* if got here, failed to get a new fh */
340 return (ESTALE);
341 case NFS4ERR_SHARE_DENIED:
342 return (EACCES);
343 case NFS4ERR_WRONGSEC:
344 return (EPERM);
345 case NFS4ERR_CLID_INUSE:
346 return (EAGAIN);
347 case NFS4ERR_RESOURCE:
348 return (EAGAIN);
349 case NFS4ERR_MOVED:
350 return (EPROTO);
351 case NFS4ERR_NOFILEHANDLE:
352 return (EIO);
353 case NFS4ERR_MINOR_VERS_MISMATCH:
354 return (ENOTSUP);
355 case NFS4ERR_STALE_CLIENTID:
356 return (EIO);
357 case NFS4ERR_STALE_STATEID:
358 return (EIO);
359 case NFS4ERR_OLD_STATEID:
360 return (EIO);
361 case NFS4ERR_BAD_STATEID:
362 return (EIO);
363 case NFS4ERR_BAD_SEQID:
364 return (EIO);
365 case NFS4ERR_NOT_SAME:
366 return (EPROTO);
367 case NFS4ERR_LOCK_RANGE:
368 return (EPROTO);
369 case NFS4ERR_SYMLINK:
370 return (EPROTO);
371 case NFS4ERR_RESTOREFH:
372 return (EPROTO);
373 case NFS4ERR_LEASE_MOVED:
374 return (EPROTO);
375 case NFS4ERR_ATTRNOTSUPP:
376 return (ENOTSUP);
377 case NFS4ERR_NO_GRACE:
378 return (EPROTO);
379 case NFS4ERR_RECLAIM_BAD:
380 return (EPROTO);
381 case NFS4ERR_RECLAIM_CONFLICT:
382 return (EPROTO);
383 case NFS4ERR_BADXDR:
384 return (EINVAL);
385 case NFS4ERR_LOCKS_HELD:
386 return (EIO);
387 case NFS4ERR_OPENMODE:
388 return (EACCES);
389 case NFS4ERR_BADOWNER:
391 * Client and server are in different DNS domains
392 * and the NFSMAPID_DOMAIN in /etc/default/nfs
393 * doesn't match. No good answer here. Return
394 * EACCESS, which translates to "permission denied".
396 return (EACCES);
397 case NFS4ERR_BADCHAR:
398 return (EINVAL);
399 case NFS4ERR_BADNAME:
400 return (EINVAL);
401 case NFS4ERR_BAD_RANGE:
402 return (EIO);
403 case NFS4ERR_LOCK_NOTSUPP:
404 return (ENOTSUP);
405 case NFS4ERR_OP_ILLEGAL:
406 return (EINVAL);
407 case NFS4ERR_DEADLOCK:
408 return (EDEADLK);
409 case NFS4ERR_FILE_OPEN:
410 return (EACCES);
411 case NFS4ERR_ADMIN_REVOKED:
412 return (EPROTO);
413 case NFS4ERR_CB_PATH_DOWN:
414 return (EPROTO);
415 default:
416 #ifdef DEBUG
417 zcmn_err(getzoneid(), CE_WARN, "geterrno4: got status %d",
418 status);
419 #endif
420 return ((int)status);
424 void
425 nfs4_log_badowner(mntinfo4_t *mi, nfs_opnum4 op)
427 nfs4_server_t *server;
430 * Return if already printed/queued a msg
431 * for this mount point.
433 if (mi->mi_flags & MI4_BADOWNER_DEBUG)
434 return;
436 * Happens once per client <-> server pair.
438 if (nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER,
439 mi->mi_flags & MI4_INT))
440 return;
442 server = find_nfs4_server(mi);
443 if (server == NULL) {
444 nfs_rw_exit(&mi->mi_recovlock);
445 return;
448 if (!(server->s_flags & N4S_BADOWNER_DEBUG)) {
449 zcmn_err(mi->mi_zone->zone_id, CE_WARN,
450 "!NFSMAPID_DOMAIN does not match"
451 " the server: %s domain.\n"
452 "Please check configuration",
453 mi->mi_curr_serv->sv_hostname);
454 server->s_flags |= N4S_BADOWNER_DEBUG;
456 mutex_exit(&server->s_lock);
457 nfs4_server_rele(server);
458 nfs_rw_exit(&mi->mi_recovlock);
461 * Happens once per mntinfo4_t.
462 * This error is deemed as one of the recovery facts "RF_BADOWNER",
463 * queue this in the mesg queue for this mount_info. This message
464 * is not printed, meaning its absent from id_to_dump_solo_fact()
465 * but its there for inspection if the queue is ever dumped/inspected.
467 mutex_enter(&mi->mi_lock);
468 if (!(mi->mi_flags & MI4_BADOWNER_DEBUG)) {
469 nfs4_queue_fact(RF_BADOWNER, mi, NFS4ERR_BADOWNER, 0, op,
470 FALSE, NULL, 0, NULL);
471 mi->mi_flags |= MI4_BADOWNER_DEBUG;
473 mutex_exit(&mi->mi_lock);
477 nfs4_time_ntov(nfstime4 *ntime, timestruc_t *vatime)
479 int64_t sec;
480 int32_t nsec;
483 * Here check that the nfsv4 time is valid for the system.
484 * nfsv4 time value is a signed 64-bit, and the system time
485 * may be either int64_t or int32_t (depends on the kernel),
486 * so if the kernel is 32-bit, the nfsv4 time value may not fit.
488 #ifndef _LP64
489 if (! NFS4_TIME_OK(ntime->seconds)) {
490 return (EOVERFLOW);
492 #endif
494 /* Invalid to specify 1 billion (or more) nsecs */
495 if (ntime->nseconds >= 1000000000)
496 return (EINVAL);
498 if (ntime->seconds < 0) {
499 sec = ntime->seconds + 1;
500 nsec = -1000000000 + ntime->nseconds;
501 } else {
502 sec = ntime->seconds;
503 nsec = ntime->nseconds;
506 vatime->tv_sec = sec;
507 vatime->tv_nsec = nsec;
509 return (0);
513 nfs4_time_vton(timestruc_t *vatime, nfstime4 *ntime)
515 int64_t sec;
516 uint32_t nsec;
519 * nfsv4 time value is a signed 64-bit, and the system time
520 * may be either int64_t or int32_t (depends on the kernel),
521 * so all system time values will fit.
523 if (vatime->tv_nsec >= 0) {
524 sec = vatime->tv_sec;
525 nsec = vatime->tv_nsec;
526 } else {
527 sec = vatime->tv_sec - 1;
528 nsec = 1000000000 + vatime->tv_nsec;
530 ntime->seconds = sec;
531 ntime->nseconds = nsec;
533 return (0);
537 * Converts a utf8 string to a valid null terminated filename string.
539 * XXX - Not actually translating the UTF-8 string as per RFC 2279.
540 * For now, just validate that the UTF-8 string off the wire
541 * does not have characters that will freak out UFS, and leave
542 * it at that.
544 char *
545 utf8_to_fn(utf8string *u8s, uint_t *lenp, char *s)
547 ASSERT(lenp != NULL);
549 if (u8s == NULL || u8s->utf8string_len <= 0 ||
550 u8s->utf8string_val == NULL)
551 return (NULL);
554 * Check for obvious illegal filename chars
556 if (utf8_strchr(u8s, '/') != NULL) {
557 #ifdef DEBUG
558 if (nfs4_utf8_debug) {
559 char *path;
560 int len = u8s->utf8string_len;
562 path = kmem_alloc(len + 1, KM_SLEEP);
563 bcopy(u8s->utf8string_val, path, len);
564 path[len] = '\0';
566 zcmn_err(getzoneid(), CE_WARN,
567 "Invalid UTF-8 filename: %s", path);
569 kmem_free(path, len + 1);
571 #endif
572 return (NULL);
575 return (utf8_to_str(u8s, lenp, s));
579 * Converts a utf8 string to a C string.
580 * kmem_allocs a new string if not supplied
582 char *
583 utf8_to_str(utf8string *str, uint_t *lenp, char *s)
585 char *sp;
586 char *u8p;
587 int len;
588 int i;
590 ASSERT(lenp != NULL);
592 if (str == NULL)
593 return (NULL);
595 u8p = str->utf8string_val;
596 len = str->utf8string_len;
597 if (len <= 0 || u8p == NULL) {
598 if (s)
599 *s = '\0';
600 return (NULL);
603 sp = s;
604 if (sp == NULL)
605 sp = kmem_alloc(len + 1, KM_SLEEP);
608 * At least check for embedded nulls
610 for (i = 0; i < len; i++) {
611 sp[i] = u8p[i];
612 if (u8p[i] == '\0') {
613 #ifdef DEBUG
614 zcmn_err(getzoneid(), CE_WARN,
615 "Embedded NULL in UTF-8 string");
616 #endif
617 if (s == NULL)
618 kmem_free(sp, len + 1);
619 return (NULL);
622 sp[len] = '\0';
623 *lenp = len + 1;
625 return (sp);
629 * str_to_utf8 - converts a null-terminated C string to a utf8 string
631 utf8string *
632 str_to_utf8(char *nm, utf8string *str)
634 int len;
636 if (str == NULL)
637 return (NULL);
639 if (nm == NULL || *nm == '\0') {
640 str->utf8string_len = 0;
641 str->utf8string_val = NULL;
644 len = strlen(nm);
646 str->utf8string_val = kmem_alloc(len, KM_SLEEP);
647 str->utf8string_len = len;
648 bcopy(nm, str->utf8string_val, len);
650 return (str);
653 utf8string *
654 utf8_copy(utf8string *src, utf8string *dest)
656 if (src == NULL)
657 return (NULL);
658 if (dest == NULL)
659 return (NULL);
661 if (src->utf8string_len > 0) {
662 dest->utf8string_val = kmem_alloc(src->utf8string_len,
663 KM_SLEEP);
664 bcopy(src->utf8string_val, dest->utf8string_val,
665 src->utf8string_len);
666 dest->utf8string_len = src->utf8string_len;
667 } else {
668 dest->utf8string_val = NULL;
669 dest->utf8string_len = 0;
672 return (dest);
676 utf8_compare(const utf8string *a, const utf8string *b)
678 int mlen, cmp;
679 int alen, blen;
680 char *aval, *bval;
682 if ((a == NULL) && (b == NULL))
683 return (0);
684 else if (a == NULL)
685 return (-1);
686 else if (b == NULL)
687 return (1);
689 alen = a->utf8string_len;
690 blen = b->utf8string_len;
691 aval = a->utf8string_val;
692 bval = b->utf8string_val;
694 if (((alen == 0) || (aval == NULL)) &&
695 ((blen == 0) || (bval == NULL)))
696 return (0);
697 else if ((alen == 0) || (aval == NULL))
698 return (-1);
699 else if ((blen == 0) || (bval == NULL))
700 return (1);
702 mlen = MIN(alen, blen);
703 cmp = strncmp(aval, bval, mlen);
705 if ((cmp == 0) && (alen == blen))
706 return (0);
707 else if ((cmp == 0) && (alen < blen))
708 return (-1);
709 else if (cmp == 0)
710 return (1);
711 else if (cmp < 0)
712 return (-1);
713 return (1);
717 * utf8_dir_verify - checks that the utf8 string is valid
719 nfsstat4
720 utf8_dir_verify(utf8string *str)
722 char *nm;
723 int len;
725 if (str == NULL)
726 return (NFS4ERR_INVAL);
728 nm = str->utf8string_val;
729 len = str->utf8string_len;
730 if (nm == NULL || len == 0) {
731 return (NFS4ERR_INVAL);
734 if (len == 1 && nm[0] == '.')
735 return (NFS4ERR_BADNAME);
736 if (len == 2 && nm[0] == '.' && nm[1] == '.')
737 return (NFS4ERR_BADNAME);
739 if (utf8_strchr(str, '/') != NULL)
740 return (NFS4ERR_BADNAME);
742 if (utf8_strchr(str, '\0') != NULL)
743 return (NFS4ERR_BADNAME);
745 return (NFS4_OK);
749 * from rpcsec module (common/rpcsec)
751 extern int sec_clnt_geth(CLIENT *, struct sec_data *, cred_t *, AUTH **);
752 extern void sec_clnt_freeh(AUTH *);
753 extern void sec_clnt_freeinfo(struct sec_data *);
756 * authget() gets an auth handle based on the security
757 * information from the servinfo in mountinfo.
758 * The auth handle is stored in ch_client->cl_auth.
760 * First security flavor of choice is to use sv_secdata
761 * which is initiated by the client. If that fails, get
762 * secinfo from the server and then select one from the
763 * server secinfo list .
765 * For RPCSEC_GSS flavor, upon success, a secure context is
766 * established between client and server.
769 authget(servinfo4_t *svp, CLIENT *ch_client, cred_t *cr)
771 int error, i;
774 * SV4_TRYSECINFO indicates to try the secinfo list from
775 * sv_secinfo until a successful one is reached. Point
776 * sv_currsec to the selected security mechanism for
777 * later sessions.
779 (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0);
780 if ((svp->sv_flags & SV4_TRYSECINFO) && svp->sv_secinfo) {
781 for (i = svp->sv_secinfo->index; i < svp->sv_secinfo->count;
782 i++) {
783 if (!(error = sec_clnt_geth(ch_client,
784 &svp->sv_secinfo->sdata[i],
785 cr, &ch_client->cl_auth))) {
787 svp->sv_currsec = &svp->sv_secinfo->sdata[i];
788 svp->sv_secinfo->index = i;
789 /* done */
790 svp->sv_flags &= ~SV4_TRYSECINFO;
791 break;
795 * Allow the caller retry with the security flavor
796 * pointed by svp->sv_secinfo->index when
797 * ETIMEDOUT/ECONNRESET occurs.
799 if (error == ETIMEDOUT || error == ECONNRESET) {
800 svp->sv_secinfo->index = i;
801 break;
804 } else {
805 /* sv_currsec points to one of the entries in sv_secinfo */
806 if (svp->sv_currsec) {
807 error = sec_clnt_geth(ch_client, svp->sv_currsec, cr,
808 &ch_client->cl_auth);
809 } else {
810 /* If it's null, use sv_secdata. */
811 error = sec_clnt_geth(ch_client, svp->sv_secdata, cr,
812 &ch_client->cl_auth);
815 nfs_rw_exit(&svp->sv_lock);
817 return (error);
821 * Common handle get program for NFS, NFS ACL, and NFS AUTH client.
824 clget4(clinfo_t *ci, servinfo4_t *svp, cred_t *cr, CLIENT **newcl,
825 struct chtab **chp, struct nfs4_clnt *nfscl)
827 struct chhead *ch, *newch;
828 struct chhead **plistp;
829 struct chtab *cp;
830 int error;
831 k_sigset_t smask;
833 if (newcl == NULL || chp == NULL || ci == NULL)
834 return (EINVAL);
836 *newcl = NULL;
837 *chp = NULL;
840 * Find an unused handle or create one
842 newch = NULL;
843 nfscl->nfscl_stat.clgets.value.ui64++;
844 top:
846 * Find the correct entry in the cache to check for free
847 * client handles. The search is based on the RPC program
848 * number, program version number, dev_t for the transport
849 * device, and the protocol family.
851 mutex_enter(&nfscl->nfscl_chtable4_lock);
852 plistp = &nfscl->nfscl_chtable4;
853 for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) {
854 if (ch->ch_prog == ci->cl_prog &&
855 ch->ch_vers == ci->cl_vers &&
856 ch->ch_dev == svp->sv_knconf->knc_rdev &&
857 (strcmp(ch->ch_protofmly,
858 svp->sv_knconf->knc_protofmly) == 0))
859 break;
860 plistp = &ch->ch_next;
864 * If we didn't find a cache entry for this quadruple, then
865 * create one. If we don't have one already preallocated,
866 * then drop the cache lock, create one, and then start over.
867 * If we did have a preallocated entry, then just add it to
868 * the front of the list.
870 if (ch == NULL) {
871 if (newch == NULL) {
872 mutex_exit(&nfscl->nfscl_chtable4_lock);
873 newch = kmem_alloc(sizeof (*newch), KM_SLEEP);
874 newch->ch_timesused = 0;
875 newch->ch_prog = ci->cl_prog;
876 newch->ch_vers = ci->cl_vers;
877 newch->ch_dev = svp->sv_knconf->knc_rdev;
878 newch->ch_protofmly = kmem_alloc(
879 strlen(svp->sv_knconf->knc_protofmly) + 1,
880 KM_SLEEP);
881 (void) strcpy(newch->ch_protofmly,
882 svp->sv_knconf->knc_protofmly);
883 newch->ch_list = NULL;
884 goto top;
886 ch = newch;
887 newch = NULL;
888 ch->ch_next = nfscl->nfscl_chtable4;
889 nfscl->nfscl_chtable4 = ch;
891 * We found a cache entry, but if it isn't on the front of the
892 * list, then move it to the front of the list to try to take
893 * advantage of locality of operations.
895 } else if (ch != nfscl->nfscl_chtable4) {
896 *plistp = ch->ch_next;
897 ch->ch_next = nfscl->nfscl_chtable4;
898 nfscl->nfscl_chtable4 = ch;
902 * If there was a free client handle cached, then remove it
903 * from the list, init it, and use it.
905 if (ch->ch_list != NULL) {
906 cp = ch->ch_list;
907 ch->ch_list = cp->ch_list;
908 mutex_exit(&nfscl->nfscl_chtable4_lock);
909 if (newch != NULL) {
910 kmem_free(newch->ch_protofmly,
911 strlen(newch->ch_protofmly) + 1);
912 kmem_free(newch, sizeof (*newch));
914 (void) clnt_tli_kinit(cp->ch_client, svp->sv_knconf,
915 &svp->sv_addr, ci->cl_readsize, ci->cl_retrans, cr);
918 * Get an auth handle.
920 error = authget(svp, cp->ch_client, cr);
921 if (error || cp->ch_client->cl_auth == NULL) {
922 CLNT_DESTROY(cp->ch_client);
923 kmem_cache_free(chtab4_cache, cp);
924 return ((error != 0) ? error : EINTR);
926 ch->ch_timesused++;
927 *newcl = cp->ch_client;
928 *chp = cp;
929 return (0);
933 * There weren't any free client handles which fit, so allocate
934 * a new one and use that.
936 #ifdef DEBUG
937 atomic_inc_64(&nfscl->nfscl_stat.clalloc.value.ui64);
938 #endif
939 mutex_exit(&nfscl->nfscl_chtable4_lock);
941 nfscl->nfscl_stat.cltoomany.value.ui64++;
942 if (newch != NULL) {
943 kmem_free(newch->ch_protofmly, strlen(newch->ch_protofmly) + 1);
944 kmem_free(newch, sizeof (*newch));
947 cp = kmem_cache_alloc(chtab4_cache, KM_SLEEP);
948 cp->ch_head = ch;
950 sigintr(&smask, (int)ci->cl_flags & MI4_INT);
951 error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, ci->cl_prog,
952 ci->cl_vers, ci->cl_readsize, ci->cl_retrans, cr, &cp->ch_client);
953 sigunintr(&smask);
955 if (error != 0) {
956 kmem_cache_free(chtab4_cache, cp);
957 #ifdef DEBUG
958 atomic_dec_64(&nfscl->nfscl_stat.clalloc.value.ui64);
959 #endif
961 * Warning is unnecessary if error is EINTR.
963 if (error != EINTR) {
964 nfs_cmn_err(error, CE_WARN,
965 "clget: couldn't create handle: %m\n");
967 return (error);
969 (void) CLNT_CONTROL(cp->ch_client, CLSET_PROGRESS, NULL);
970 auth_destroy(cp->ch_client->cl_auth);
973 * Get an auth handle.
975 error = authget(svp, cp->ch_client, cr);
976 if (error || cp->ch_client->cl_auth == NULL) {
977 CLNT_DESTROY(cp->ch_client);
978 kmem_cache_free(chtab4_cache, cp);
979 #ifdef DEBUG
980 atomic_dec_64(&nfscl->nfscl_stat.clalloc.value.ui64);
981 #endif
982 return ((error != 0) ? error : EINTR);
984 ch->ch_timesused++;
985 *newcl = cp->ch_client;
986 ASSERT(cp->ch_client->cl_nosignal == FALSE);
987 *chp = cp;
988 return (0);
991 static int
992 nfs_clget4(mntinfo4_t *mi, servinfo4_t *svp, cred_t *cr, CLIENT **newcl,
993 struct chtab **chp, struct nfs4_clnt *nfscl)
995 clinfo_t ci;
996 bool_t is_recov;
997 int firstcall, error = 0;
1000 * Set read buffer size to rsize
1001 * and add room for RPC headers.
1003 ci.cl_readsize = mi->mi_tsize;
1004 if (ci.cl_readsize != 0)
1005 ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA);
1008 * If soft mount and server is down just try once.
1009 * meaning: do not retransmit.
1011 if (!(mi->mi_flags & MI4_HARD) && (mi->mi_flags & MI4_DOWN))
1012 ci.cl_retrans = 0;
1013 else
1014 ci.cl_retrans = mi->mi_retrans;
1016 ci.cl_prog = mi->mi_prog;
1017 ci.cl_vers = mi->mi_vers;
1018 ci.cl_flags = mi->mi_flags;
1021 * clget4 calls authget() to get an auth handle. For RPCSEC_GSS
1022 * security flavor, the client tries to establish a security context
1023 * by contacting the server. If the connection is timed out or reset,
1024 * e.g. server reboot, we will try again.
1026 is_recov = (curthread == mi->mi_recovthread);
1027 firstcall = 1;
1029 do {
1030 error = clget4(&ci, svp, cr, newcl, chp, nfscl);
1032 if (error == 0)
1033 break;
1036 * For forced unmount and zone shutdown, bail out but
1037 * let the recovery thread do one more transmission.
1039 if ((FS_OR_ZONE_GONE4(mi->mi_vfsp)) &&
1040 (!is_recov || !firstcall)) {
1041 error = EIO;
1042 break;
1045 /* do not retry for soft mount */
1046 if (!(mi->mi_flags & MI4_HARD))
1047 break;
1049 /* let the caller deal with the failover case */
1050 if (FAILOVER_MOUNT4(mi))
1051 break;
1053 firstcall = 0;
1055 } while (error == ETIMEDOUT || error == ECONNRESET);
1057 return (error);
1060 void
1061 clfree4(CLIENT *cl, struct chtab *cp, struct nfs4_clnt *nfscl)
1063 if (cl->cl_auth != NULL) {
1064 sec_clnt_freeh(cl->cl_auth);
1065 cl->cl_auth = NULL;
1069 * Timestamp this cache entry so that we know when it was last
1070 * used.
1072 cp->ch_freed = gethrestime_sec();
1075 * Add the free client handle to the front of the list.
1076 * This way, the list will be sorted in youngest to oldest
1077 * order.
1079 mutex_enter(&nfscl->nfscl_chtable4_lock);
1080 cp->ch_list = cp->ch_head->ch_list;
1081 cp->ch_head->ch_list = cp;
1082 mutex_exit(&nfscl->nfscl_chtable4_lock);
1085 #define CL_HOLDTIME 60 /* time to hold client handles */
1087 static void
1088 clreclaim4_zone(struct nfs4_clnt *nfscl, uint_t cl_holdtime)
1090 struct chhead *ch;
1091 struct chtab *cp; /* list of objects that can be reclaimed */
1092 struct chtab *cpe;
1093 struct chtab *cpl;
1094 struct chtab **cpp;
1095 #ifdef DEBUG
1096 int n = 0;
1097 clstat4_debug.clreclaim.value.ui64++;
1098 #endif
1101 * Need to reclaim some memory, so step through the cache
1102 * looking through the lists for entries which can be freed.
1104 cp = NULL;
1106 mutex_enter(&nfscl->nfscl_chtable4_lock);
1109 * Here we step through each non-NULL quadruple and start to
1110 * construct the reclaim list pointed to by cp. Note that
1111 * cp will contain all eligible chtab entries. When this traversal
1112 * completes, chtab entries from the last quadruple will be at the
1113 * front of cp and entries from previously inspected quadruples have
1114 * been appended to the rear of cp.
1116 for (ch = nfscl->nfscl_chtable4; ch != NULL; ch = ch->ch_next) {
1117 if (ch->ch_list == NULL)
1118 continue;
1120 * Search each list for entries older then
1121 * cl_holdtime seconds. The lists are maintained
1122 * in youngest to oldest order so that when the
1123 * first entry is found which is old enough, then
1124 * all of the rest of the entries on the list will
1125 * be old enough as well.
1127 cpl = ch->ch_list;
1128 cpp = &ch->ch_list;
1129 while (cpl != NULL &&
1130 cpl->ch_freed + cl_holdtime > gethrestime_sec()) {
1131 cpp = &cpl->ch_list;
1132 cpl = cpl->ch_list;
1134 if (cpl != NULL) {
1135 *cpp = NULL;
1136 if (cp != NULL) {
1137 cpe = cpl;
1138 while (cpe->ch_list != NULL)
1139 cpe = cpe->ch_list;
1140 cpe->ch_list = cp;
1142 cp = cpl;
1146 mutex_exit(&nfscl->nfscl_chtable4_lock);
1149 * If cp is empty, then there is nothing to reclaim here.
1151 if (cp == NULL)
1152 return;
1155 * Step through the list of entries to free, destroying each client
1156 * handle and kmem_free'ing the memory for each entry.
1158 while (cp != NULL) {
1159 #ifdef DEBUG
1160 n++;
1161 #endif
1162 CLNT_DESTROY(cp->ch_client);
1163 cpl = cp->ch_list;
1164 kmem_cache_free(chtab4_cache, cp);
1165 cp = cpl;
1168 #ifdef DEBUG
1170 * Update clalloc so that nfsstat shows the current number
1171 * of allocated client handles.
1173 atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -n);
1174 #endif
1177 /* ARGSUSED */
1178 static void
1179 clreclaim4(void *all)
1181 struct nfs4_clnt *nfscl;
1184 * The system is low on memory; go through and try to reclaim some from
1185 * every zone on the system.
1187 mutex_enter(&nfs4_clnt_list_lock);
1188 nfscl = list_head(&nfs4_clnt_list);
1189 for (; nfscl != NULL; nfscl = list_next(&nfs4_clnt_list, nfscl))
1190 clreclaim4_zone(nfscl, CL_HOLDTIME);
1191 mutex_exit(&nfs4_clnt_list_lock);
1195 * Minimum time-out values indexed by call type
1196 * These units are in "eights" of a second to avoid multiplies
1198 static unsigned int minimum_timeo[] = {
1199 6, 7, 10
1202 #define SHORTWAIT (NFS_COTS_TIMEO / 10)
1205 * Back off for retransmission timeout, MAXTIMO is in hz of a sec
1207 #define MAXTIMO (20*hz)
1208 #define backoff(tim) (((tim) < MAXTIMO) ? dobackoff(tim) : (tim))
1209 #define dobackoff(tim) ((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1))
1211 static int
1212 nfs4_rfscall(mntinfo4_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
1213 xdrproc_t xdrres, caddr_t resp, cred_t *icr, int *doqueue,
1214 enum clnt_stat *rpc_statusp, int flags, struct nfs4_clnt *nfscl)
1216 CLIENT *client;
1217 struct chtab *ch;
1218 cred_t *cr = icr;
1219 struct rpc_err rpcerr, rpcerr_tmp;
1220 enum clnt_stat status;
1221 int error;
1222 struct timeval wait;
1223 int timeo; /* in units of hz */
1224 bool_t tryagain, is_recov;
1225 bool_t cred_cloned = FALSE;
1226 k_sigset_t smask;
1227 servinfo4_t *svp;
1228 #ifdef DEBUG
1229 char *bufp;
1230 #endif
1231 int firstcall;
1233 rpcerr.re_status = RPC_SUCCESS;
1236 * If we know that we are rebooting then let's
1237 * not bother with doing any over the wireness.
1239 mutex_enter(&mi->mi_lock);
1240 if (mi->mi_flags & MI4_SHUTDOWN) {
1241 mutex_exit(&mi->mi_lock);
1242 return (EIO);
1244 mutex_exit(&mi->mi_lock);
1247 * clget() calls clnt_tli_kinit() which clears the xid, so we
1248 * are guaranteed to reprocess the retry as a new request.
1250 svp = mi->mi_curr_serv;
1251 rpcerr.re_errno = nfs_clget4(mi, svp, cr, &client, &ch, nfscl);
1252 if (rpcerr.re_errno != 0)
1253 return (rpcerr.re_errno);
1255 timeo = (mi->mi_timeo * hz) / 10;
1258 * If hard mounted fs, retry call forever unless hard error
1259 * occurs.
1261 * For forced unmount, let the recovery thread through but return
1262 * an error for all others. This is so that user processes can
1263 * exit quickly. The recovery thread bails out after one
1264 * transmission so that it can tell if it needs to continue.
1266 * For zone shutdown, behave as above to encourage quick
1267 * process exit, but also fail quickly when servers have
1268 * timed out before and reduce the timeouts.
1270 is_recov = (curthread == mi->mi_recovthread);
1271 firstcall = 1;
1272 do {
1273 tryagain = FALSE;
1275 NFS4_DEBUG(nfs4_rfscall_debug, (CE_NOTE,
1276 "nfs4_rfscall: vfs_flag=0x%x, %s",
1277 mi->mi_vfsp->vfs_flag,
1278 is_recov ? "recov thread" : "not recov thread"));
1281 * It's possible while we're retrying the admin
1282 * decided to reboot.
1284 mutex_enter(&mi->mi_lock);
1285 if (mi->mi_flags & MI4_SHUTDOWN) {
1286 mutex_exit(&mi->mi_lock);
1287 clfree4(client, ch, nfscl);
1288 if (cred_cloned)
1289 crfree(cr);
1290 return (EIO);
1292 mutex_exit(&mi->mi_lock);
1294 if ((mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED) &&
1295 (!is_recov || !firstcall)) {
1296 clfree4(client, ch, nfscl);
1297 if (cred_cloned)
1298 crfree(cr);
1299 return (EIO);
1302 if (zone_status_get(curproc->p_zone) >= ZONE_IS_SHUTTING_DOWN) {
1303 mutex_enter(&mi->mi_lock);
1304 if ((mi->mi_flags & MI4_TIMEDOUT) ||
1305 !is_recov || !firstcall) {
1306 mutex_exit(&mi->mi_lock);
1307 clfree4(client, ch, nfscl);
1308 if (cred_cloned)
1309 crfree(cr);
1310 return (EIO);
1312 mutex_exit(&mi->mi_lock);
1313 timeo = (MIN(mi->mi_timeo, SHORTWAIT) * hz) / 10;
1316 firstcall = 0;
1317 TICK_TO_TIMEVAL(timeo, &wait);
1320 * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
1321 * and SIGTERM. (Preserving the existing masks).
1322 * Mask out SIGINT if mount option nointr is specified.
1324 sigintr(&smask, (int)mi->mi_flags & MI4_INT);
1325 if (!(mi->mi_flags & MI4_INT))
1326 client->cl_nosignal = TRUE;
1329 * If there is a current signal, then don't bother
1330 * even trying to send out the request because we
1331 * won't be able to block waiting for the response.
1332 * Simply assume RPC_INTR and get on with it.
1334 if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING))
1335 status = RPC_INTR;
1336 else {
1337 status = CLNT_CALL(client, which, xdrargs, argsp,
1338 xdrres, resp, wait);
1341 if (!(mi->mi_flags & MI4_INT))
1342 client->cl_nosignal = FALSE;
1344 * restore original signal mask
1346 sigunintr(&smask);
1348 switch (status) {
1349 case RPC_SUCCESS:
1350 break;
1352 case RPC_INTR:
1354 * There is no way to recover from this error,
1355 * even if mount option nointr is specified.
1356 * SIGKILL, for example, cannot be blocked.
1358 rpcerr.re_status = RPC_INTR;
1359 rpcerr.re_errno = EINTR;
1360 break;
1362 case RPC_UDERROR:
1364 * If the NFS server is local (vold) and
1365 * it goes away then we get RPC_UDERROR.
1366 * This is a retryable error, so we would
1367 * loop, so check to see if the specific
1368 * error was ECONNRESET, indicating that
1369 * target did not exist at all. If so,
1370 * return with RPC_PROGUNAVAIL and
1371 * ECONNRESET to indicate why.
1373 CLNT_GETERR(client, &rpcerr);
1374 if (rpcerr.re_errno == ECONNRESET) {
1375 rpcerr.re_status = RPC_PROGUNAVAIL;
1376 rpcerr.re_errno = ECONNRESET;
1377 break;
1379 /*FALLTHROUGH*/
1381 default: /* probably RPC_TIMEDOUT */
1383 if (IS_UNRECOVERABLE_RPC(status))
1384 break;
1387 * increment server not responding count
1389 mutex_enter(&mi->mi_lock);
1390 mi->mi_noresponse++;
1391 mutex_exit(&mi->mi_lock);
1392 #ifdef DEBUG
1393 nfscl->nfscl_stat.noresponse.value.ui64++;
1394 #endif
1396 * On zone shutdown, mark server dead and move on.
1398 if (zone_status_get(curproc->p_zone) >=
1399 ZONE_IS_SHUTTING_DOWN) {
1400 mutex_enter(&mi->mi_lock);
1401 mi->mi_flags |= MI4_TIMEDOUT;
1402 mutex_exit(&mi->mi_lock);
1403 clfree4(client, ch, nfscl);
1404 if (cred_cloned)
1405 crfree(cr);
1406 return (EIO);
1410 * NFS client failover support:
1411 * return and let the caller take care of
1412 * failover. We only return for failover mounts
1413 * because otherwise we want the "not responding"
1414 * message, the timer updates, etc.
1416 if (mi->mi_vers == 4 && FAILOVER_MOUNT4(mi) &&
1417 (error = try_failover(status)) != 0) {
1418 clfree4(client, ch, nfscl);
1419 if (cred_cloned)
1420 crfree(cr);
1421 *rpc_statusp = status;
1422 return (error);
1425 if (flags & RFSCALL_SOFT)
1426 break;
1428 tryagain = TRUE;
1431 * The call is in progress (over COTS).
1432 * Try the CLNT_CALL again, but don't
1433 * print a noisy error message.
1435 if (status == RPC_INPROGRESS)
1436 break;
1438 timeo = backoff(timeo);
1439 CLNT_GETERR(client, &rpcerr_tmp);
1441 mutex_enter(&mi->mi_lock);
1442 if (!(mi->mi_flags & MI4_PRINTED)) {
1443 mi->mi_flags |= MI4_PRINTED;
1444 mutex_exit(&mi->mi_lock);
1445 if ((status == RPC_CANTSEND) &&
1446 (rpcerr_tmp.re_errno == ENOBUFS))
1447 nfs4_queue_fact(RF_SENDQ_FULL, mi, 0,
1448 0, 0, FALSE, NULL, 0, NULL);
1449 else
1450 nfs4_queue_fact(RF_SRV_NOT_RESPOND, mi,
1451 0, 0, 0, FALSE, NULL, 0, NULL);
1452 } else
1453 mutex_exit(&mi->mi_lock);
1455 if (*doqueue && nfs_has_ctty()) {
1456 *doqueue = 0;
1457 if (!(mi->mi_flags & MI4_NOPRINT)) {
1458 if ((status == RPC_CANTSEND) &&
1459 (rpcerr_tmp.re_errno == ENOBUFS))
1460 nfs4_queue_fact(RF_SENDQ_FULL,
1461 mi, 0, 0, 0, FALSE, NULL,
1462 0, NULL);
1463 else
1464 nfs4_queue_fact(
1465 RF_SRV_NOT_RESPOND, mi, 0,
1466 0, 0, FALSE, NULL, 0, NULL);
1470 } while (tryagain);
1472 DTRACE_PROBE2(nfs4__rfscall_debug, enum clnt_stat, status,
1473 int, rpcerr.re_errno);
1475 if (status != RPC_SUCCESS) {
1476 zoneid_t zoneid = mi->mi_zone->zone_id;
1479 * Let soft mounts use the timed out message.
1481 if (status == RPC_INPROGRESS)
1482 status = RPC_TIMEDOUT;
1483 nfscl->nfscl_stat.badcalls.value.ui64++;
1484 if (status != RPC_INTR) {
1485 mutex_enter(&mi->mi_lock);
1486 mi->mi_flags |= MI4_DOWN;
1487 mutex_exit(&mi->mi_lock);
1488 CLNT_GETERR(client, &rpcerr);
1489 #ifdef DEBUG
1490 bufp = clnt_sperror(client, svp->sv_hostname);
1491 zprintf(zoneid, "NFS%d %s failed for %s\n",
1492 mi->mi_vers, mi->mi_rfsnames[which], bufp);
1493 if (nfs_has_ctty()) {
1494 if (!(mi->mi_flags & MI4_NOPRINT)) {
1495 uprintf("NFS%d %s failed for %s\n",
1496 mi->mi_vers, mi->mi_rfsnames[which],
1497 bufp);
1500 kmem_free(bufp, MAXPATHLEN);
1501 #else
1502 zprintf(zoneid,
1503 "NFS %s failed for server %s: error %d (%s)\n",
1504 mi->mi_rfsnames[which], svp->sv_hostname,
1505 status, clnt_sperrno(status));
1506 if (nfs_has_ctty()) {
1507 if (!(mi->mi_flags & MI4_NOPRINT)) {
1508 uprintf(
1509 "NFS %s failed for server %s: error %d (%s)\n",
1510 mi->mi_rfsnames[which],
1511 svp->sv_hostname, status,
1512 clnt_sperrno(status));
1515 #endif
1517 * when CLNT_CALL() fails with RPC_AUTHERROR,
1518 * re_errno is set appropriately depending on
1519 * the authentication error
1521 if (status == RPC_VERSMISMATCH ||
1522 status == RPC_PROGVERSMISMATCH)
1523 rpcerr.re_errno = EIO;
1525 } else {
1527 * Test the value of mi_down and mi_printed without
1528 * holding the mi_lock mutex. If they are both zero,
1529 * then it is okay to skip the down and printed
1530 * processing. This saves on a mutex_enter and
1531 * mutex_exit pair for a normal, successful RPC.
1532 * This was just complete overhead.
1534 if (mi->mi_flags & (MI4_DOWN | MI4_PRINTED)) {
1535 mutex_enter(&mi->mi_lock);
1536 mi->mi_flags &= ~MI4_DOWN;
1537 if (mi->mi_flags & MI4_PRINTED) {
1538 mi->mi_flags &= ~MI4_PRINTED;
1539 mutex_exit(&mi->mi_lock);
1540 if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
1541 nfs4_queue_fact(RF_SRV_OK, mi, 0, 0,
1542 0, FALSE, NULL, 0, NULL);
1543 } else
1544 mutex_exit(&mi->mi_lock);
1547 if (*doqueue == 0) {
1548 if (!(mi->mi_flags & MI4_NOPRINT) &&
1549 !(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
1550 nfs4_queue_fact(RF_SRV_OK, mi, 0, 0, 0,
1551 FALSE, NULL, 0, NULL);
1553 *doqueue = 1;
1557 clfree4(client, ch, nfscl);
1558 if (cred_cloned)
1559 crfree(cr);
1561 ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0);
1563 TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "nfs4_rfscall_end:errno %d",
1564 rpcerr.re_errno);
1566 *rpc_statusp = status;
1567 return (rpcerr.re_errno);
1571 * rfs4call - general wrapper for RPC calls initiated by the client
1573 void
1574 rfs4call(mntinfo4_t *mi, COMPOUND4args_clnt *argsp, COMPOUND4res_clnt *resp,
1575 cred_t *cr, int *doqueue, int flags, nfs4_error_t *ep)
1577 int i, error;
1578 enum clnt_stat rpc_status = NFS4_OK;
1579 int num_resops;
1580 struct nfs4_clnt *nfscl;
1582 ASSERT(nfs_zone() == mi->mi_zone);
1583 nfscl = zone_getspecific(nfs4clnt_zone_key, nfs_zone());
1584 ASSERT(nfscl != NULL);
1586 nfscl->nfscl_stat.calls.value.ui64++;
1587 mi->mi_reqs[NFSPROC4_COMPOUND].value.ui64++;
1589 /* Set up the results struct for XDR usage */
1590 resp->argsp = argsp;
1591 resp->array = NULL;
1592 resp->status = 0;
1593 resp->decode_len = 0;
1595 error = nfs4_rfscall(mi, NFSPROC4_COMPOUND,
1596 xdr_COMPOUND4args_clnt, (caddr_t)argsp,
1597 xdr_COMPOUND4res_clnt, (caddr_t)resp, cr,
1598 doqueue, &rpc_status, flags, nfscl);
1600 /* Return now if it was an RPC error */
1601 if (error) {
1602 ep->error = error;
1603 ep->stat = resp->status;
1604 ep->rpc_status = rpc_status;
1605 return;
1608 /* else we'll count the processed operations */
1609 num_resops = resp->decode_len;
1610 for (i = 0; i < num_resops; i++) {
1612 * Count the individual operations
1613 * processed by the server.
1615 if (resp->array[i].resop >= NFSPROC4_NULL &&
1616 resp->array[i].resop <= OP_WRITE)
1617 mi->mi_reqs[resp->array[i].resop].value.ui64++;
1620 ep->error = 0;
1621 ep->stat = resp->status;
1622 ep->rpc_status = rpc_status;
1626 * nfs4rename_update - updates stored state after a rename. Currently this
1627 * is the path of the object and anything under it, and the filehandle of
1628 * the renamed object.
1630 void
1631 nfs4rename_update(vnode_t *renvp, vnode_t *ndvp, nfs_fh4 *nfh4p, char *nnm)
1633 sfh4_update(VTOR4(renvp)->r_fh, nfh4p);
1634 fn_move(VTOSV(renvp)->sv_name, VTOSV(ndvp)->sv_name, nnm);
1638 * Routine to look up the filehandle for the given path and rootvp.
1640 * Return values:
1641 * - success: returns zero and *statp is set to NFS4_OK, and *fhp is
1642 * updated.
1643 * - error: return value (errno value) and/or *statp is set appropriately.
1645 #define RML_ORDINARY 1
1646 #define RML_NAMED_ATTR 2
1647 #define RML_ATTRDIR 3
1649 static void
1650 remap_lookup(nfs4_fname_t *fname, vnode_t *rootvp,
1651 int filetype, cred_t *cr,
1652 nfs_fh4 *fhp, nfs4_ga_res_t *garp, /* fh, attrs for object */
1653 nfs_fh4 *pfhp, nfs4_ga_res_t *pgarp, /* fh, attrs for parent */
1654 nfs4_error_t *ep)
1656 COMPOUND4args_clnt args;
1657 COMPOUND4res_clnt res;
1658 nfs_argop4 *argop;
1659 nfs_resop4 *resop;
1660 int num_argops;
1661 lookup4_param_t lookuparg;
1662 nfs_fh4 *tmpfhp;
1663 int doqueue = 1;
1664 char *path;
1665 mntinfo4_t *mi;
1667 ASSERT(fname != NULL);
1668 ASSERT(rootvp->v_type == VDIR);
1670 mi = VTOMI4(rootvp);
1671 path = fn_path(fname);
1672 switch (filetype) {
1673 case RML_NAMED_ATTR:
1674 lookuparg.l4_getattrs = LKP4_LAST_NAMED_ATTR;
1675 args.ctag = TAG_REMAP_LOOKUP_NA;
1676 break;
1677 case RML_ATTRDIR:
1678 lookuparg.l4_getattrs = LKP4_LAST_ATTRDIR;
1679 args.ctag = TAG_REMAP_LOOKUP_AD;
1680 break;
1681 case RML_ORDINARY:
1682 lookuparg.l4_getattrs = LKP4_ALL_ATTRIBUTES;
1683 args.ctag = TAG_REMAP_LOOKUP;
1684 break;
1685 default:
1686 ep->error = EINVAL;
1687 return;
1689 lookuparg.argsp = &args;
1690 lookuparg.resp = &res;
1691 lookuparg.header_len = 1; /* Putfh */
1692 lookuparg.trailer_len = 0;
1693 lookuparg.ga_bits = NFS4_VATTR_MASK;
1694 lookuparg.mi = VTOMI4(rootvp);
1696 (void) nfs4lookup_setup(path, &lookuparg, 1);
1698 /* 0: putfh directory */
1699 argop = args.array;
1700 argop[0].argop = OP_CPUTFH;
1701 argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(rootvp)->r_fh;
1703 num_argops = args.array_len;
1705 rfs4call(mi, &args, &res, cr, &doqueue, RFSCALL_SOFT, ep);
1707 if (ep->error || res.status != NFS4_OK)
1708 goto exit;
1710 /* get the object filehandle */
1711 resop = &res.array[res.array_len - 2];
1712 if (resop->resop != OP_GETFH) {
1713 nfs4_queue_event(RE_FAIL_REMAP_OP, mi, NULL,
1714 0, NULL, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0);
1715 ep->stat = NFS4ERR_SERVERFAULT;
1716 goto exit;
1718 tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
1719 if (tmpfhp->nfs_fh4_len > NFS4_FHSIZE) {
1720 nfs4_queue_event(RE_FAIL_REMAP_LEN, mi, NULL,
1721 tmpfhp->nfs_fh4_len, NULL, NULL, 0, NULL, 0, TAG_NONE,
1722 TAG_NONE, 0, 0);
1723 ep->stat = NFS4ERR_SERVERFAULT;
1724 goto exit;
1726 fhp->nfs_fh4_val = kmem_alloc(tmpfhp->nfs_fh4_len, KM_SLEEP);
1727 nfs_fh4_copy(tmpfhp, fhp);
1729 /* get the object attributes */
1730 resop = &res.array[res.array_len - 1];
1731 if (garp && resop->resop == OP_GETATTR)
1732 *garp = resop->nfs_resop4_u.opgetattr.ga_res;
1734 /* See if there are enough fields in the response for parent info */
1735 if ((int)res.array_len - 5 <= 0)
1736 goto exit;
1738 /* get the parent filehandle */
1739 resop = &res.array[res.array_len - 5];
1740 if (resop->resop != OP_GETFH) {
1741 nfs4_queue_event(RE_FAIL_REMAP_OP, mi, NULL,
1742 0, NULL, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0);
1743 ep->stat = NFS4ERR_SERVERFAULT;
1744 goto exit;
1746 tmpfhp = &resop->nfs_resop4_u.opgetfh.object;
1747 if (tmpfhp->nfs_fh4_len > NFS4_FHSIZE) {
1748 nfs4_queue_event(RE_FAIL_REMAP_LEN, mi, NULL,
1749 tmpfhp->nfs_fh4_len, NULL, NULL, 0, NULL, 0, TAG_NONE,
1750 TAG_NONE, 0, 0);
1751 ep->stat = NFS4ERR_SERVERFAULT;
1752 goto exit;
1754 pfhp->nfs_fh4_val = kmem_alloc(tmpfhp->nfs_fh4_len, KM_SLEEP);
1755 nfs_fh4_copy(tmpfhp, pfhp);
1757 /* get the parent attributes */
1758 resop = &res.array[res.array_len - 4];
1759 if (pgarp && resop->resop == OP_GETATTR)
1760 *pgarp = resop->nfs_resop4_u.opgetattr.ga_res;
1762 exit:
1764 * It is too hard to remember where all the OP_LOOKUPs are
1766 nfs4args_lookup_free(argop, num_argops);
1767 kmem_free(argop, lookuparg.arglen * sizeof (nfs_argop4));
1769 if (!ep->error)
1770 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1771 kmem_free(path, strlen(path)+1);
1775 * NFS client failover / volatile filehandle support
1777 * Recover the filehandle for the given rnode.
1779 * Errors are returned via the nfs4_error_t parameter.
1782 void
1783 nfs4_remap_file(mntinfo4_t *mi, vnode_t *vp, int flags, nfs4_error_t *ep)
1785 int is_stub;
1786 rnode4_t *rp = VTOR4(vp);
1787 vnode_t *rootvp = NULL;
1788 vnode_t *dvp = NULL;
1789 cred_t *cr, *cred_otw;
1790 nfs4_ga_res_t gar, pgar;
1791 nfs_fh4 newfh = {0, NULL}, newpfh = {0, NULL};
1792 int filetype = RML_ORDINARY;
1793 nfs4_recov_state_t recov = {NULL, 0, 0};
1794 int badfhcount = 0;
1795 nfs4_open_stream_t *osp = NULL;
1796 bool_t first_time = TRUE; /* first time getting OTW cred */
1797 bool_t last_time = FALSE; /* last time getting OTW cred */
1799 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
1800 "nfs4_remap_file: remapping %s", rnode4info(rp)));
1801 ASSERT(nfs4_consistent_type(vp));
1803 if (vp->v_flag & VROOT) {
1804 nfs4_remap_root(mi, ep, flags);
1805 return;
1809 * Given the root fh, use the path stored in
1810 * the rnode to find the fh for the new server.
1812 ep->error = VFS_ROOT(mi->mi_vfsp, &rootvp);
1813 if (ep->error != 0)
1814 return;
1816 cr = curthread->t_cred;
1817 ASSERT(cr != NULL);
1818 get_remap_cred:
1820 * Releases the osp, if it is provided.
1821 * Puts a hold on the cred_otw and the new osp (if found).
1823 cred_otw = nfs4_get_otw_cred_by_osp(rp, cr, &osp,
1824 &first_time, &last_time);
1825 ASSERT(cred_otw != NULL);
1827 if (rp->r_flags & R4ISXATTR) {
1828 filetype = RML_NAMED_ATTR;
1829 (void) vtodv(vp, &dvp, cred_otw, FALSE);
1832 if (vp->v_flag & V_XATTRDIR) {
1833 filetype = RML_ATTRDIR;
1836 if (filetype == RML_ORDINARY && rootvp->v_type == VREG) {
1837 /* file mount, doesn't need a remap */
1838 goto done;
1841 again:
1842 remap_lookup(rp->r_svnode.sv_name, rootvp, filetype, cred_otw,
1843 &newfh, &gar, &newpfh, &pgar, ep);
1845 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
1846 "nfs4_remap_file: remap_lookup returned %d/%d",
1847 ep->error, ep->stat));
1849 if (last_time == FALSE && ep->error == EACCES) {
1850 crfree(cred_otw);
1851 if (dvp != NULL)
1852 VN_RELE(dvp);
1853 goto get_remap_cred;
1855 if (ep->error != 0)
1856 goto done;
1858 switch (ep->stat) {
1859 case NFS4_OK:
1860 badfhcount = 0;
1861 if (recov.rs_flags & NFS4_RS_DELAY_MSG) {
1862 mutex_enter(&rp->r_statelock);
1863 rp->r_delay_interval = 0;
1864 mutex_exit(&rp->r_statelock);
1865 uprintf("NFS File Available..\n");
1867 break;
1868 case NFS4ERR_FHEXPIRED:
1869 case NFS4ERR_BADHANDLE:
1870 case NFS4ERR_STALE:
1872 * If we ran into filehandle problems, we should try to
1873 * remap the root vnode first and hope life gets better.
1874 * But we need to avoid loops.
1876 if (badfhcount++ > 0)
1877 goto done;
1878 if (newfh.nfs_fh4_len != 0) {
1879 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len);
1880 newfh.nfs_fh4_len = 0;
1882 if (newpfh.nfs_fh4_len != 0) {
1883 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len);
1884 newpfh.nfs_fh4_len = 0;
1886 /* relative path - remap rootvp then retry */
1887 VN_RELE(rootvp);
1888 rootvp = NULL;
1889 nfs4_remap_root(mi, ep, flags);
1890 if (ep->error != 0 || ep->stat != NFS4_OK)
1891 goto done;
1892 ep->error = VFS_ROOT(mi->mi_vfsp, &rootvp);
1893 if (ep->error != 0)
1894 goto done;
1895 goto again;
1896 case NFS4ERR_DELAY:
1897 badfhcount = 0;
1898 nfs4_set_delay_wait(vp);
1899 ep->error = nfs4_wait_for_delay(vp, &recov);
1900 if (ep->error != 0)
1901 goto done;
1902 goto again;
1903 case NFS4ERR_ACCESS:
1904 /* get new cred, try again */
1905 if (last_time == TRUE)
1906 goto done;
1907 if (dvp != NULL)
1908 VN_RELE(dvp);
1909 crfree(cred_otw);
1910 goto get_remap_cred;
1911 default:
1912 goto done;
1916 * Check on the new and old rnodes before updating;
1917 * if the vnode type or size changes, issue a warning
1918 * and mark the file dead.
1920 mutex_enter(&rp->r_statelock);
1921 if (flags & NFS4_REMAP_CKATTRS) {
1922 if (vp->v_type != gar.n4g_va.va_type ||
1923 (vp->v_type != VDIR &&
1924 rp->r_size != gar.n4g_va.va_size)) {
1925 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
1926 "nfs4_remap_file: size %d vs. %d, type %d vs. %d",
1927 (int)rp->r_size, (int)gar.n4g_va.va_size,
1928 vp->v_type, gar.n4g_va.va_type));
1929 mutex_exit(&rp->r_statelock);
1930 nfs4_queue_event(RE_FILE_DIFF, mi,
1931 rp->r_server->sv_hostname, 0, vp, NULL, 0, NULL, 0,
1932 TAG_NONE, TAG_NONE, 0, 0);
1933 nfs4_fail_recov(vp, NULL, 0, NFS4_OK);
1934 goto done;
1937 ASSERT(gar.n4g_va.va_type != VNON);
1938 rp->r_server = mi->mi_curr_serv;
1941 * Turn this object into a "stub" object if we
1942 * crossed an underlying server fs boundary.
1944 * This stub will be for a mirror-mount.
1945 * A referral would look like a boundary crossing
1946 * as well, but would not be the same type of object,
1947 * so we would expect to mark the object dead.
1949 * See comment in r4_do_attrcache() for more details.
1951 is_stub = 0;
1952 if (gar.n4g_fsid_valid) {
1953 (void) nfs_rw_enter_sig(&rp->r_server->sv_lock, RW_READER, 0);
1954 rp->r_srv_fsid = gar.n4g_fsid;
1955 if (!FATTR4_FSID_EQ(&gar.n4g_fsid, &rp->r_server->sv_fsid))
1956 is_stub = 1;
1957 nfs_rw_exit(&rp->r_server->sv_lock);
1958 #ifdef DEBUG
1959 } else {
1960 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
1961 "remap_file: fsid attr not provided by server. rp=%p",
1962 (void *)rp));
1963 #endif
1965 if (is_stub)
1966 r4_stub_mirrormount(rp);
1967 else
1968 r4_stub_none(rp);
1969 mutex_exit(&rp->r_statelock);
1970 nfs4_attrcache_noinval(vp, &gar, gethrtime()); /* force update */
1971 sfh4_update(rp->r_fh, &newfh);
1972 ASSERT(nfs4_consistent_type(vp));
1975 * If we got parent info, use it to update the parent
1977 if (newpfh.nfs_fh4_len != 0) {
1978 if (rp->r_svnode.sv_dfh != NULL)
1979 sfh4_update(rp->r_svnode.sv_dfh, &newpfh);
1980 if (dvp != NULL) {
1981 /* force update of attrs */
1982 nfs4_attrcache_noinval(dvp, &pgar, gethrtime());
1985 done:
1986 if (newfh.nfs_fh4_len != 0)
1987 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len);
1988 if (newpfh.nfs_fh4_len != 0)
1989 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len);
1990 if (cred_otw != NULL)
1991 crfree(cred_otw);
1992 if (rootvp != NULL)
1993 VN_RELE(rootvp);
1994 if (dvp != NULL)
1995 VN_RELE(dvp);
1996 if (osp != NULL)
1997 open_stream_rele(osp, rp);
2001 * Client-side failover support: remap the filehandle for vp if it appears
2002 * necessary. errors are returned via the nfs4_error_t parameter; though,
2003 * if there is a problem, we will just try again later.
2006 void
2007 nfs4_check_remap(mntinfo4_t *mi, vnode_t *vp, int flags, nfs4_error_t *ep)
2009 if (vp == NULL)
2010 return;
2012 if (!(vp->v_vfsp->vfs_flag & VFS_RDONLY))
2013 return;
2015 if (VTOR4(vp)->r_server == mi->mi_curr_serv)
2016 return;
2018 nfs4_remap_file(mi, vp, flags, ep);
2022 * nfs4_make_dotdot() - find or create a parent vnode of a non-root node.
2024 * Our caller has a filehandle for ".." relative to a particular
2025 * directory object. We want to find or create a parent vnode
2026 * with that filehandle and return it. We can of course create
2027 * a vnode from this filehandle, but we need to also make sure
2028 * that if ".." is a regular file (i.e. dvp is a V_XATTRDIR)
2029 * that we have a parent FH for future reopens as well. If
2030 * we have a remap failure, we won't be able to reopen this
2031 * file, but we won't treat that as fatal because a reopen
2032 * is at least unlikely. Someday nfs4_reopen() should look
2033 * for a missing parent FH and try a remap to recover from it.
2035 * need_start_op argument indicates whether this function should
2036 * do a start_op before calling remap_lookup(). This should
2037 * be FALSE, if you are the recovery thread or in an op; otherwise,
2038 * set it to TRUE.
2041 nfs4_make_dotdot(nfs4_sharedfh_t *fhp, hrtime_t t, vnode_t *dvp,
2042 cred_t *cr, vnode_t **vpp, int need_start_op)
2044 mntinfo4_t *mi = VTOMI4(dvp);
2045 nfs4_fname_t *np = NULL, *pnp = NULL;
2046 vnode_t *vp = NULL, *rootvp = NULL;
2047 rnode4_t *rp;
2048 nfs_fh4 newfh = {0, NULL}, newpfh = {0, NULL};
2049 nfs4_ga_res_t gar, pgar;
2050 vattr_t va, pva;
2051 nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
2052 nfs4_sharedfh_t *sfh = NULL, *psfh = NULL;
2053 nfs4_recov_state_t recov_state;
2055 #ifdef DEBUG
2057 * ensure need_start_op is correct
2060 int no_need_start_op = (tsd_get(nfs4_tsd_key) ||
2061 (curthread == mi->mi_recovthread));
2062 /* C needs a ^^ operator! */
2063 ASSERT(((need_start_op) && (!no_need_start_op)) ||
2064 ((! need_start_op) && (no_need_start_op)));
2066 #endif
2067 ASSERT(VTOMI4(dvp)->mi_zone == nfs_zone());
2069 NFS4_DEBUG(nfs4_client_shadow_debug, (CE_NOTE,
2070 "nfs4_make_dotdot: called with fhp %p, dvp %s", (void *)fhp,
2071 rnode4info(VTOR4(dvp))));
2074 * rootvp might be needed eventually. Holding it now will
2075 * ensure that r4find_unlocked() will find it, if ".." is the root.
2077 e.error = VFS_ROOT(mi->mi_vfsp, &rootvp);
2078 if (e.error != 0)
2079 goto out;
2080 rp = r4find_unlocked(fhp, mi->mi_vfsp);
2081 if (rp != NULL) {
2082 *vpp = RTOV4(rp);
2083 VN_RELE(rootvp);
2084 return (0);
2088 * Since we don't have the rnode, we have to go over the wire.
2089 * remap_lookup() can get all of the filehandles and attributes
2090 * we need in one operation.
2092 np = fn_parent(VTOSV(dvp)->sv_name);
2093 /* if a parent was not found return an error */
2094 if (np == NULL) {
2095 e.error = ENOENT;
2096 goto out;
2099 recov_state.rs_flags = 0;
2100 recov_state.rs_num_retry_despite_err = 0;
2101 recov_retry:
2102 if (need_start_op) {
2103 e.error = nfs4_start_fop(mi, rootvp, NULL, OH_LOOKUP,
2104 &recov_state, NULL);
2105 if (e.error != 0) {
2106 goto out;
2110 pgar.n4g_va.va_type = VNON;
2111 gar.n4g_va.va_type = VNON;
2113 remap_lookup(np, rootvp, RML_ORDINARY, cr,
2114 &newfh, &gar, &newpfh, &pgar, &e);
2115 if (nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp)) {
2116 if (need_start_op) {
2117 bool_t abort;
2119 abort = nfs4_start_recovery(&e, mi,
2120 rootvp, NULL, NULL, NULL, OP_LOOKUP, NULL, NULL,
2121 NULL);
2122 if (abort) {
2123 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2124 &recov_state, FALSE);
2125 if (e.error == 0)
2126 e.error = EIO;
2127 goto out;
2129 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2130 &recov_state, TRUE);
2131 goto recov_retry;
2133 if (e.error == 0)
2134 e.error = EIO;
2135 goto out;
2138 va = gar.n4g_va;
2139 pva = pgar.n4g_va;
2141 if ((e.error != 0) ||
2142 (va.va_type != VDIR)) {
2143 if (need_start_op)
2144 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2145 &recov_state, FALSE);
2146 if (e.error == 0)
2147 e.error = EIO;
2148 goto out;
2151 if (e.stat != NFS4_OK) {
2152 if (need_start_op)
2153 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2154 &recov_state, FALSE);
2155 e.error = EIO;
2156 goto out;
2160 * It is possible for remap_lookup() to return with no error,
2161 * but without providing the parent filehandle and attrs.
2163 if (pva.va_type != VDIR) {
2165 * Call remap_lookup() again, this time with the
2166 * newpfh and pgar args in the first position.
2168 pnp = fn_parent(np);
2169 if (pnp != NULL) {
2170 remap_lookup(pnp, rootvp, RML_ORDINARY, cr,
2171 &newpfh, &pgar, NULL, NULL, &e);
2173 * This remap_lookup call modifies pgar. The following
2174 * line prevents trouble when checking the va_type of
2175 * pva later in this code.
2177 pva = pgar.n4g_va;
2179 if (nfs4_needs_recovery(&e, FALSE,
2180 mi->mi_vfsp)) {
2181 if (need_start_op) {
2182 bool_t abort;
2184 abort = nfs4_start_recovery(&e, mi,
2185 rootvp, NULL, NULL, NULL,
2186 OP_LOOKUP, NULL, NULL, NULL);
2187 if (abort) {
2188 nfs4_end_fop(mi, rootvp, NULL,
2189 OH_LOOKUP, &recov_state,
2190 FALSE);
2191 if (e.error == 0)
2192 e.error = EIO;
2193 goto out;
2195 nfs4_end_fop(mi, rootvp, NULL,
2196 OH_LOOKUP, &recov_state, TRUE);
2197 goto recov_retry;
2199 if (e.error == 0)
2200 e.error = EIO;
2201 goto out;
2204 if (e.stat != NFS4_OK) {
2205 if (need_start_op)
2206 nfs4_end_fop(mi, rootvp, NULL,
2207 OH_LOOKUP, &recov_state, FALSE);
2208 e.error = EIO;
2209 goto out;
2212 if ((pnp == NULL) ||
2213 (e.error != 0) ||
2214 (pva.va_type == VNON)) {
2215 if (need_start_op)
2216 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP,
2217 &recov_state, FALSE);
2218 if (e.error == 0)
2219 e.error = EIO;
2220 goto out;
2223 ASSERT(newpfh.nfs_fh4_len != 0);
2224 if (need_start_op)
2225 nfs4_end_fop(mi, rootvp, NULL, OH_LOOKUP, &recov_state, FALSE);
2226 psfh = sfh4_get(&newpfh, mi);
2228 sfh = sfh4_get(&newfh, mi);
2229 vp = makenfs4node_by_fh(sfh, psfh, &np, &gar, mi, cr, t);
2231 out:
2232 if (np != NULL)
2233 fn_rele(&np);
2234 if (pnp != NULL)
2235 fn_rele(&pnp);
2236 if (newfh.nfs_fh4_len != 0)
2237 kmem_free(newfh.nfs_fh4_val, newfh.nfs_fh4_len);
2238 if (newpfh.nfs_fh4_len != 0)
2239 kmem_free(newpfh.nfs_fh4_val, newpfh.nfs_fh4_len);
2240 if (sfh != NULL)
2241 sfh4_rele(&sfh);
2242 if (psfh != NULL)
2243 sfh4_rele(&psfh);
2244 if (rootvp != NULL)
2245 VN_RELE(rootvp);
2246 *vpp = vp;
2247 return (e.error);
2250 #ifdef DEBUG
2251 size_t r_path_memuse = 0;
2252 #endif
2255 * NFS client failover support
2257 * sv4_free() frees the malloc'd portion of a "servinfo_t".
2259 void
2260 sv4_free(servinfo4_t *svp)
2262 servinfo4_t *next;
2263 struct knetconfig *knconf;
2265 while (svp != NULL) {
2266 next = svp->sv_next;
2267 if (svp->sv_dhsec)
2268 sec_clnt_freeinfo(svp->sv_dhsec);
2269 if (svp->sv_secdata)
2270 sec_clnt_freeinfo(svp->sv_secdata);
2271 if (svp->sv_save_secinfo &&
2272 svp->sv_save_secinfo != svp->sv_secinfo)
2273 secinfo_free(svp->sv_save_secinfo);
2274 if (svp->sv_secinfo)
2275 secinfo_free(svp->sv_secinfo);
2276 if (svp->sv_hostname && svp->sv_hostnamelen > 0)
2277 kmem_free(svp->sv_hostname, svp->sv_hostnamelen);
2278 knconf = svp->sv_knconf;
2279 if (knconf != NULL) {
2280 if (knconf->knc_protofmly != NULL)
2281 kmem_free(knconf->knc_protofmly, KNC_STRSIZE);
2282 if (knconf->knc_proto != NULL)
2283 kmem_free(knconf->knc_proto, KNC_STRSIZE);
2284 kmem_free(knconf, sizeof (*knconf));
2286 knconf = svp->sv_origknconf;
2287 if (knconf != NULL) {
2288 if (knconf->knc_protofmly != NULL)
2289 kmem_free(knconf->knc_protofmly, KNC_STRSIZE);
2290 if (knconf->knc_proto != NULL)
2291 kmem_free(knconf->knc_proto, KNC_STRSIZE);
2292 kmem_free(knconf, sizeof (*knconf));
2294 if (svp->sv_addr.buf != NULL && svp->sv_addr.maxlen != 0)
2295 kmem_free(svp->sv_addr.buf, svp->sv_addr.maxlen);
2296 if (svp->sv_path != NULL) {
2297 kmem_free(svp->sv_path, svp->sv_pathlen);
2299 nfs_rw_destroy(&svp->sv_lock);
2300 kmem_free(svp, sizeof (*svp));
2301 svp = next;
2305 void
2306 nfs4_printfhandle(nfs4_fhandle_t *fhp)
2308 int *ip;
2309 char *buf;
2310 size_t bufsize;
2311 char *cp;
2314 * 13 == "(file handle:"
2315 * maximum of NFS_FHANDLE / sizeof (*ip) elements in fh_buf times
2316 * 1 == ' '
2317 * 8 == maximum strlen of "%x"
2318 * 3 == ")\n\0"
2320 bufsize = 13 + ((NFS_FHANDLE_LEN / sizeof (*ip)) * (1 + 8)) + 3;
2321 buf = kmem_alloc(bufsize, KM_NOSLEEP);
2322 if (buf == NULL)
2323 return;
2325 cp = buf;
2326 (void) strcpy(cp, "(file handle:");
2327 while (*cp != '\0')
2328 cp++;
2329 for (ip = (int *)fhp->fh_buf;
2330 ip < (int *)&fhp->fh_buf[fhp->fh_len];
2331 ip++) {
2332 (void) sprintf(cp, " %x", *ip);
2333 while (*cp != '\0')
2334 cp++;
2336 (void) strcpy(cp, ")\n");
2338 zcmn_err(getzoneid(), CE_CONT, "%s", buf);
2340 kmem_free(buf, bufsize);
2344 * The NFSv4 readdir cache subsystem.
2346 * We provide a set of interfaces to allow the rest of the system to utilize
2347 * a caching mechanism while encapsulating the details of the actual
2348 * implementation. This should allow for better maintainability and
2349 * extensibility by consolidating the implementation details in one location.
2353 * Comparator used by AVL routines.
2355 static int
2356 rddir4_cache_compar(const void *x, const void *y)
2358 rddir4_cache_impl *ai = (rddir4_cache_impl *)x;
2359 rddir4_cache_impl *bi = (rddir4_cache_impl *)y;
2360 rddir4_cache *a = &ai->rc;
2361 rddir4_cache *b = &bi->rc;
2363 if (a->nfs4_cookie == b->nfs4_cookie) {
2364 if (a->buflen == b->buflen)
2365 return (0);
2366 if (a->buflen < b->buflen)
2367 return (-1);
2368 return (1);
2371 if (a->nfs4_cookie < b->nfs4_cookie)
2372 return (-1);
2374 return (1);
2378 * Allocate an opaque handle for the readdir cache.
2380 void
2381 rddir4_cache_create(rnode4_t *rp)
2383 ASSERT(rp->r_dir == NULL);
2385 rp->r_dir = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
2387 avl_create(rp->r_dir, rddir4_cache_compar, sizeof (rddir4_cache_impl),
2388 offsetof(rddir4_cache_impl, tree));
2392 * Purge the cache of all cached readdir responses.
2394 void
2395 rddir4_cache_purge(rnode4_t *rp)
2397 rddir4_cache_impl *rdip;
2398 rddir4_cache_impl *nrdip;
2400 ASSERT(MUTEX_HELD(&rp->r_statelock));
2402 if (rp->r_dir == NULL)
2403 return;
2405 rdip = avl_first(rp->r_dir);
2407 while (rdip != NULL) {
2408 nrdip = AVL_NEXT(rp->r_dir, rdip);
2409 avl_remove(rp->r_dir, rdip);
2410 rdip->rc.flags &= ~RDDIRCACHED;
2411 rddir4_cache_rele(rp, &rdip->rc);
2412 rdip = nrdip;
2414 ASSERT(avl_numnodes(rp->r_dir) == 0);
2418 * Destroy the readdir cache.
2420 void
2421 rddir4_cache_destroy(rnode4_t *rp)
2423 ASSERT(MUTEX_HELD(&rp->r_statelock));
2424 if (rp->r_dir == NULL)
2425 return;
2427 rddir4_cache_purge(rp);
2428 avl_destroy(rp->r_dir);
2429 kmem_free(rp->r_dir, sizeof (avl_tree_t));
2430 rp->r_dir = NULL;
2434 * Locate a readdir response from the readdir cache.
2436 * Return values:
2438 * NULL - If there is an unrecoverable situation like the operation may have
2439 * been interrupted.
2441 * rddir4_cache * - A pointer to a rddir4_cache is returned to the caller.
2442 * The flags are set approprately, such that the caller knows
2443 * what state the entry is in.
2445 rddir4_cache *
2446 rddir4_cache_lookup(rnode4_t *rp, offset_t cookie, int count)
2448 rddir4_cache_impl *rdip = NULL;
2449 rddir4_cache_impl srdip;
2450 rddir4_cache *srdc;
2451 rddir4_cache *rdc = NULL;
2452 rddir4_cache *nrdc = NULL;
2453 avl_index_t where;
2455 top:
2456 ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_READER));
2457 ASSERT(MUTEX_HELD(&rp->r_statelock));
2459 * Check to see if the readdir cache has been disabled. If so, then
2460 * simply allocate an rddir4_cache entry and return it, since caching
2461 * operations do not apply.
2463 if (rp->r_dir == NULL) {
2464 if (nrdc == NULL) {
2466 * Drop the lock because we are doing a sleeping
2467 * allocation.
2469 mutex_exit(&rp->r_statelock);
2470 rdc = rddir4_cache_alloc(KM_SLEEP);
2471 rdc->nfs4_cookie = cookie;
2472 rdc->buflen = count;
2473 mutex_enter(&rp->r_statelock);
2474 return (rdc);
2476 return (nrdc);
2479 srdc = &srdip.rc;
2480 srdc->nfs4_cookie = cookie;
2481 srdc->buflen = count;
2483 rdip = avl_find(rp->r_dir, &srdip, &where);
2486 * If we didn't find an entry then create one and insert it
2487 * into the cache.
2489 if (rdip == NULL) {
2491 * Check for the case where we have made a second pass through
2492 * the cache due to a lockless allocation. If we find that no
2493 * thread has already inserted this entry, do the insert now
2494 * and return.
2496 if (nrdc != NULL) {
2497 avl_insert(rp->r_dir, nrdc->data, where);
2498 nrdc->flags |= RDDIRCACHED;
2499 rddir4_cache_hold(nrdc);
2500 return (nrdc);
2503 #ifdef DEBUG
2504 nfs4_readdir_cache_misses++;
2505 #endif
2507 * First, try to allocate an entry without sleeping. If that
2508 * fails then drop the lock and do a sleeping allocation.
2510 nrdc = rddir4_cache_alloc(KM_NOSLEEP);
2511 if (nrdc != NULL) {
2512 nrdc->nfs4_cookie = cookie;
2513 nrdc->buflen = count;
2514 avl_insert(rp->r_dir, nrdc->data, where);
2515 nrdc->flags |= RDDIRCACHED;
2516 rddir4_cache_hold(nrdc);
2517 return (nrdc);
2521 * Drop the lock and do a sleeping allocation. We incur
2522 * additional overhead by having to search the cache again,
2523 * but this case should be rare.
2525 mutex_exit(&rp->r_statelock);
2526 nrdc = rddir4_cache_alloc(KM_SLEEP);
2527 nrdc->nfs4_cookie = cookie;
2528 nrdc->buflen = count;
2529 mutex_enter(&rp->r_statelock);
2531 * We need to take another pass through the cache
2532 * since we dropped our lock to perform the alloc.
2533 * Another thread may have come by and inserted the
2534 * entry we are interested in.
2536 goto top;
2540 * Check to see if we need to free our entry. This can happen if
2541 * another thread came along beat us to the insert. We can
2542 * safely call rddir4_cache_free directly because no other thread
2543 * would have a reference to this entry.
2545 if (nrdc != NULL)
2546 rddir4_cache_free((rddir4_cache_impl *)nrdc->data);
2548 #ifdef DEBUG
2549 nfs4_readdir_cache_hits++;
2550 #endif
2552 * Found something. Make sure it's ready to return.
2554 rdc = &rdip->rc;
2555 rddir4_cache_hold(rdc);
2557 * If the cache entry is in the process of being filled in, wait
2558 * until this completes. The RDDIRWAIT bit is set to indicate that
2559 * someone is waiting and when the thread currently filling the entry
2560 * is done, it should do a cv_broadcast to wakeup all of the threads
2561 * waiting for it to finish. If the thread wakes up to find that
2562 * someone new is now trying to complete the the entry, go back
2563 * to sleep.
2565 while (rdc->flags & RDDIR) {
2567 * The entry is not complete.
2569 nfs_rw_exit(&rp->r_rwlock);
2570 rdc->flags |= RDDIRWAIT;
2571 #ifdef DEBUG
2572 nfs4_readdir_cache_waits++;
2573 #endif
2574 while (rdc->flags & RDDIRWAIT) {
2575 if (!cv_wait_sig(&rdc->cv, &rp->r_statelock)) {
2577 * We got interrupted, probably the user
2578 * typed ^C or an alarm fired. We free the
2579 * new entry if we allocated one.
2581 rddir4_cache_rele(rp, rdc);
2582 mutex_exit(&rp->r_statelock);
2583 (void) nfs_rw_enter_sig(&rp->r_rwlock,
2584 RW_READER, FALSE);
2585 mutex_enter(&rp->r_statelock);
2586 return (NULL);
2589 mutex_exit(&rp->r_statelock);
2590 (void) nfs_rw_enter_sig(&rp->r_rwlock,
2591 RW_READER, FALSE);
2592 mutex_enter(&rp->r_statelock);
2596 * The entry we were waiting on may have been purged from
2597 * the cache and should no longer be used, release it and
2598 * start over.
2600 if (!(rdc->flags & RDDIRCACHED)) {
2601 rddir4_cache_rele(rp, rdc);
2602 goto top;
2606 * The entry is completed. Return it.
2608 return (rdc);
2612 * Allocate a cache element and return it. Can return NULL if memory is
2613 * low.
2615 static rddir4_cache *
2616 rddir4_cache_alloc(int flags)
2618 rddir4_cache_impl *rdip = NULL;
2619 rddir4_cache *rc = NULL;
2621 rdip = kmem_alloc(sizeof (rddir4_cache_impl), flags);
2623 if (rdip != NULL) {
2624 rc = &rdip->rc;
2625 rc->data = (void *)rdip;
2626 rc->nfs4_cookie = 0;
2627 rc->nfs4_ncookie = 0;
2628 rc->entries = NULL;
2629 rc->eof = 0;
2630 rc->entlen = 0;
2631 rc->buflen = 0;
2632 rc->actlen = 0;
2634 * A readdir is required so set the flag.
2636 rc->flags = RDDIRREQ;
2637 cv_init(&rc->cv, NULL, CV_DEFAULT, NULL);
2638 rc->error = 0;
2639 mutex_init(&rdip->lock, NULL, MUTEX_DEFAULT, NULL);
2640 rdip->count = 1;
2641 #ifdef DEBUG
2642 atomic_inc_64(&clstat4_debug.dirent.value.ui64);
2643 #endif
2645 return (rc);
2649 * Increment the reference count to this cache element.
2651 static void
2652 rddir4_cache_hold(rddir4_cache *rc)
2654 rddir4_cache_impl *rdip = (rddir4_cache_impl *)rc->data;
2656 mutex_enter(&rdip->lock);
2657 rdip->count++;
2658 mutex_exit(&rdip->lock);
2662 * Release a reference to this cache element. If the count is zero then
2663 * free the element.
2665 void
2666 rddir4_cache_rele(rnode4_t *rp, rddir4_cache *rdc)
2668 rddir4_cache_impl *rdip = (rddir4_cache_impl *)rdc->data;
2670 ASSERT(MUTEX_HELD(&rp->r_statelock));
2673 * Check to see if we have any waiters. If so, we can wake them
2674 * so that they can proceed.
2676 if (rdc->flags & RDDIRWAIT) {
2677 rdc->flags &= ~RDDIRWAIT;
2678 cv_broadcast(&rdc->cv);
2681 mutex_enter(&rdip->lock);
2682 ASSERT(rdip->count > 0);
2683 if (--rdip->count == 0) {
2684 mutex_exit(&rdip->lock);
2685 rddir4_cache_free(rdip);
2686 } else
2687 mutex_exit(&rdip->lock);
2691 * Free a cache element.
2693 static void
2694 rddir4_cache_free(rddir4_cache_impl *rdip)
2696 rddir4_cache *rc = &rdip->rc;
2698 #ifdef DEBUG
2699 atomic_dec_64(&clstat4_debug.dirent.value.ui64);
2700 #endif
2701 if (rc->entries != NULL)
2702 kmem_free(rc->entries, rc->buflen);
2703 cv_destroy(&rc->cv);
2704 mutex_destroy(&rdip->lock);
2705 kmem_free(rdip, sizeof (*rdip));
2709 * Snapshot callback for nfs:0:nfs4_client as registered with the kstat
2710 * framework.
2712 static int
2713 cl4_snapshot(kstat_t *ksp, void *buf, int rw)
2715 ksp->ks_snaptime = gethrtime();
2716 if (rw == KSTAT_WRITE) {
2717 bcopy(buf, ksp->ks_private, sizeof (clstat4_tmpl));
2718 #ifdef DEBUG
2720 * Currently only the global zone can write to kstats, but we
2721 * add the check just for paranoia.
2723 if (INGLOBALZONE(curproc))
2724 bcopy((char *)buf + sizeof (clstat4_tmpl),
2725 &clstat4_debug, sizeof (clstat4_debug));
2726 #endif
2727 } else {
2728 bcopy(ksp->ks_private, buf, sizeof (clstat4_tmpl));
2729 #ifdef DEBUG
2731 * If we're displaying the "global" debug kstat values, we
2732 * display them as-is to all zones since in fact they apply to
2733 * the system as a whole.
2735 bcopy(&clstat4_debug, (char *)buf + sizeof (clstat4_tmpl),
2736 sizeof (clstat4_debug));
2737 #endif
2739 return (0);
2745 * Zone support
2747 static void *
2748 clinit4_zone(zoneid_t zoneid)
2750 kstat_t *nfs4_client_kstat;
2751 struct nfs4_clnt *nfscl;
2752 uint_t ndata;
2754 nfscl = kmem_alloc(sizeof (*nfscl), KM_SLEEP);
2755 mutex_init(&nfscl->nfscl_chtable4_lock, NULL, MUTEX_DEFAULT, NULL);
2756 nfscl->nfscl_chtable4 = NULL;
2757 nfscl->nfscl_zoneid = zoneid;
2759 bcopy(&clstat4_tmpl, &nfscl->nfscl_stat, sizeof (clstat4_tmpl));
2760 ndata = sizeof (clstat4_tmpl) / sizeof (kstat_named_t);
2761 #ifdef DEBUG
2762 ndata += sizeof (clstat4_debug) / sizeof (kstat_named_t);
2763 #endif
2764 if ((nfs4_client_kstat = kstat_create_zone("nfs", 0, "nfs4_client",
2765 "misc", KSTAT_TYPE_NAMED, ndata,
2766 KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, zoneid)) != NULL) {
2767 nfs4_client_kstat->ks_private = &nfscl->nfscl_stat;
2768 nfs4_client_kstat->ks_snapshot = cl4_snapshot;
2769 kstat_install(nfs4_client_kstat);
2771 mutex_enter(&nfs4_clnt_list_lock);
2772 list_insert_head(&nfs4_clnt_list, nfscl);
2773 mutex_exit(&nfs4_clnt_list_lock);
2775 return (nfscl);
2778 /*ARGSUSED*/
2779 static void
2780 clfini4_zone(zoneid_t zoneid, void *arg)
2782 struct nfs4_clnt *nfscl = arg;
2783 chhead_t *chp, *next;
2785 if (nfscl == NULL)
2786 return;
2787 mutex_enter(&nfs4_clnt_list_lock);
2788 list_remove(&nfs4_clnt_list, nfscl);
2789 mutex_exit(&nfs4_clnt_list_lock);
2790 clreclaim4_zone(nfscl, 0);
2791 for (chp = nfscl->nfscl_chtable4; chp != NULL; chp = next) {
2792 ASSERT(chp->ch_list == NULL);
2793 kmem_free(chp->ch_protofmly, strlen(chp->ch_protofmly) + 1);
2794 next = chp->ch_next;
2795 kmem_free(chp, sizeof (*chp));
2797 kstat_delete_byname_zone("nfs", 0, "nfs4_client", zoneid);
2798 mutex_destroy(&nfscl->nfscl_chtable4_lock);
2799 kmem_free(nfscl, sizeof (*nfscl));
2803 * Called by endpnt_destructor to make sure the client handles are
2804 * cleaned up before the RPC endpoints. This becomes a no-op if
2805 * clfini_zone (above) is called first. This function is needed
2806 * (rather than relying on clfini_zone to clean up) because the ZSD
2807 * callbacks have no ordering mechanism, so we have no way to ensure
2808 * that clfini_zone is called before endpnt_destructor.
2810 void
2811 clcleanup4_zone(zoneid_t zoneid)
2813 struct nfs4_clnt *nfscl;
2815 mutex_enter(&nfs4_clnt_list_lock);
2816 nfscl = list_head(&nfs4_clnt_list);
2817 for (; nfscl != NULL; nfscl = list_next(&nfs4_clnt_list, nfscl)) {
2818 if (nfscl->nfscl_zoneid == zoneid) {
2819 clreclaim4_zone(nfscl, 0);
2820 break;
2823 mutex_exit(&nfs4_clnt_list_lock);
2827 nfs4_subr_init(void)
2830 * Allocate and initialize the client handle cache
2832 chtab4_cache = kmem_cache_create("client_handle4_cache",
2833 sizeof (struct chtab), 0, NULL, NULL, clreclaim4, NULL,
2834 NULL, 0);
2837 * Initialize the list of per-zone client handles (and associated data).
2838 * This needs to be done before we call zone_key_create().
2840 list_create(&nfs4_clnt_list, sizeof (struct nfs4_clnt),
2841 offsetof(struct nfs4_clnt, nfscl_node));
2844 * Initialize the zone_key for per-zone client handle lists.
2846 zone_key_create(&nfs4clnt_zone_key, clinit4_zone, NULL, clfini4_zone);
2848 if (nfs4err_delay_time == 0)
2849 nfs4err_delay_time = NFS4ERR_DELAY_TIME;
2851 return (0);
2855 nfs4_subr_fini(void)
2858 * Deallocate the client handle cache
2860 kmem_cache_destroy(chtab4_cache);
2863 * Destroy the zone_key
2865 (void) zone_key_delete(nfs4clnt_zone_key);
2867 return (0);
2870 * Set or Clear direct I/O flag
2871 * fop_rwlock() is held for write access to prevent a race condition
2872 * which would occur if a process is in the middle of a write when
2873 * directio flag gets set. It is possible that all pages may not get flushed.
2875 * This is a copy of nfs_directio, changes here may need to be made
2876 * there and vice versa.
2880 nfs4_directio(vnode_t *vp, int cmd, cred_t *cr)
2882 int error = 0;
2883 rnode4_t *rp;
2885 rp = VTOR4(vp);
2887 if (cmd == DIRECTIO_ON) {
2889 if (rp->r_flags & R4DIRECTIO)
2890 return (0);
2893 * Flush the page cache.
2896 (void) fop_rwlock(vp, V_WRITELOCK_TRUE, NULL);
2898 if (rp->r_flags & R4DIRECTIO) {
2899 fop_rwunlock(vp, V_WRITELOCK_TRUE, NULL);
2900 return (0);
2903 if (nfs4_has_pages(vp) &&
2904 ((rp->r_flags & R4DIRTY) || rp->r_awcount > 0)) {
2905 error = fop_putpage(vp, (offset_t)0, (uint_t)0,
2906 B_INVAL, cr, NULL);
2907 if (error) {
2908 if (error == ENOSPC || error == EDQUOT) {
2909 mutex_enter(&rp->r_statelock);
2910 if (!rp->r_error)
2911 rp->r_error = error;
2912 mutex_exit(&rp->r_statelock);
2914 fop_rwunlock(vp, V_WRITELOCK_TRUE, NULL);
2915 return (error);
2919 mutex_enter(&rp->r_statelock);
2920 rp->r_flags |= R4DIRECTIO;
2921 mutex_exit(&rp->r_statelock);
2922 fop_rwunlock(vp, V_WRITELOCK_TRUE, NULL);
2923 return (0);
2926 if (cmd == DIRECTIO_OFF) {
2927 mutex_enter(&rp->r_statelock);
2928 rp->r_flags &= ~R4DIRECTIO; /* disable direct mode */
2929 mutex_exit(&rp->r_statelock);
2930 return (0);
2933 return (EINVAL);
2937 * Return TRUE if the file has any pages. Always go back to
2938 * the master vnode to check it since none of the shadows
2939 * can have pages.
2942 bool_t
2943 nfs4_has_pages(vnode_t *vp)
2945 rnode4_t *rp;
2947 rp = VTOR4(vp);
2948 if (IS_SHADOW(vp, rp))
2949 vp = RTOV4(rp); /* RTOV4 always gives the master */
2951 return (vn_has_cached_data(vp));
2955 * This table is used to determine whether the client should attempt
2956 * failover based on the clnt_stat value returned by CLNT_CALL. The
2957 * clnt_stat is used as an index into the table. If
2958 * the error value that corresponds to the clnt_stat value in the
2959 * table is non-zero, then that is the error to be returned AND
2960 * that signals that failover should be attempted.
2962 * Special note: If the RPC_ values change, then direct indexing of the
2963 * table is no longer valid, but having the RPC_ values in the table
2964 * allow the functions to detect the change and issue a warning.
2965 * In this case, the code will always attempt failover as a defensive
2966 * measure.
2969 static struct try_failover_tab {
2970 enum clnt_stat cstat;
2971 int error;
2972 } try_failover_table [] = {
2974 RPC_SUCCESS, 0,
2975 RPC_CANTENCODEARGS, 0,
2976 RPC_CANTDECODERES, 0,
2977 RPC_CANTSEND, ECOMM,
2978 RPC_CANTRECV, ECOMM,
2979 RPC_TIMEDOUT, ETIMEDOUT,
2980 RPC_VERSMISMATCH, 0,
2981 RPC_AUTHERROR, 0,
2982 RPC_PROGUNAVAIL, 0,
2983 RPC_PROGVERSMISMATCH, 0,
2984 RPC_PROCUNAVAIL, 0,
2985 RPC_CANTDECODEARGS, 0,
2986 RPC_SYSTEMERROR, ENOSR,
2987 RPC_UNKNOWNHOST, EHOSTUNREACH,
2988 RPC_RPCBFAILURE, ENETUNREACH,
2989 RPC_PROGNOTREGISTERED, ECONNREFUSED,
2990 RPC_FAILED, ETIMEDOUT,
2991 RPC_UNKNOWNPROTO, EHOSTUNREACH,
2992 RPC_INTR, 0,
2993 RPC_UNKNOWNADDR, EHOSTUNREACH,
2994 RPC_TLIERROR, 0,
2995 RPC_NOBROADCAST, EHOSTUNREACH,
2996 RPC_N2AXLATEFAILURE, ECONNREFUSED,
2997 RPC_UDERROR, 0,
2998 RPC_INPROGRESS, 0,
2999 RPC_STALERACHANDLE, EINVAL,
3000 RPC_CANTCONNECT, ECONNREFUSED,
3001 RPC_XPRTFAILED, ECONNABORTED,
3002 RPC_CANTCREATESTREAM, ECONNREFUSED,
3003 RPC_CANTSTORE, ENOBUFS
3007 * nfs4_try_failover - determine whether the client should
3008 * attempt failover based on the values stored in the nfs4_error_t.
3011 nfs4_try_failover(nfs4_error_t *ep)
3013 if (ep->error == ETIMEDOUT || ep->stat == NFS4ERR_RESOURCE)
3014 return (TRUE);
3016 if (ep->error && ep->rpc_status != RPC_SUCCESS)
3017 return (try_failover(ep->rpc_status) != 0 ? TRUE : FALSE);
3019 return (FALSE);
3023 * try_failover - internal version of nfs4_try_failover, called
3024 * only by rfscall and aclcall. Determine if failover is warranted
3025 * based on the clnt_stat and return the error number if it is.
3027 static int
3028 try_failover(enum clnt_stat rpc_status)
3030 int err = 0;
3032 if (rpc_status == RPC_SUCCESS)
3033 return (0);
3035 #ifdef DEBUG
3036 if (rpc_status != 0 && nfs4_try_failover_any) {
3037 err = ETIMEDOUT;
3038 goto done;
3040 #endif
3042 * The rpc status is used as an index into the table.
3043 * If the rpc status is outside of the range of the
3044 * table or if the rpc error numbers have been changed
3045 * since the table was constructed, then print a warning
3046 * (DEBUG only) and try failover anyway. Otherwise, just
3047 * grab the resulting error number out of the table.
3049 if (rpc_status < RPC_SUCCESS || rpc_status >=
3050 sizeof (try_failover_table)/sizeof (try_failover_table[0]) ||
3051 try_failover_table[rpc_status].cstat != rpc_status) {
3053 err = ETIMEDOUT;
3054 #ifdef DEBUG
3055 cmn_err(CE_NOTE, "try_failover: unexpected rpc error %d",
3056 rpc_status);
3057 #endif
3058 } else
3059 err = try_failover_table[rpc_status].error;
3061 done:
3062 if (rpc_status)
3063 NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE,
3064 "nfs4_try_failover: %strying failover on error %d",
3065 err ? "" : "NOT ", rpc_status));
3067 return (err);
3070 void
3071 nfs4_error_zinit(nfs4_error_t *ep)
3073 ep->error = 0;
3074 ep->stat = NFS4_OK;
3075 ep->rpc_status = RPC_SUCCESS;
3078 void
3079 nfs4_error_init(nfs4_error_t *ep, int error)
3081 ep->error = error;
3082 ep->stat = NFS4_OK;
3083 ep->rpc_status = RPC_SUCCESS;
3087 #ifdef DEBUG
3090 * Return a 16-bit hash for filehandle, stateid, clientid, owner.
3091 * use the same algorithm as for NFS v3.
3095 hash16(void *p, int len)
3097 int i, rem;
3098 uint_t *wp;
3099 uint_t key = 0;
3101 /* protect against non word aligned */
3102 if ((rem = len & 3) != 0)
3103 len &= ~3;
3105 for (i = 0, wp = (uint_t *)p; i < len; i += 4, wp++) {
3106 key ^= (*wp >> 16) ^ *wp;
3109 /* hash left-over bytes */
3110 for (i = 0; i < rem; i++)
3111 key ^= *((uchar_t *)p + i);
3113 return (key & 0xffff);
3117 * rnode4info - return filehandle and path information for an rnode.
3118 * XXX MT issues: uses a single static buffer, no locking of path.
3120 char *
3121 rnode4info(rnode4_t *rp)
3123 static char buf[80];
3124 nfs4_fhandle_t fhandle;
3125 char *path;
3126 char *type;
3128 if (rp == NULL)
3129 return ("null");
3130 if (rp->r_flags & R4ISXATTR)
3131 type = "attr";
3132 else if (RTOV4(rp)->v_flag & V_XATTRDIR)
3133 type = "attrdir";
3134 else if (RTOV4(rp)->v_flag & VROOT)
3135 type = "root";
3136 else if (RTOV4(rp)->v_type == VDIR)
3137 type = "dir";
3138 else if (RTOV4(rp)->v_type == VREG)
3139 type = "file";
3140 else
3141 type = "other";
3142 sfh4_copyval(rp->r_fh, &fhandle);
3143 path = fn_path(rp->r_svnode.sv_name);
3144 (void) snprintf(buf, 80, "$%p[%s], type=%s, flags=%04X, FH=%04X\n",
3145 (void *)rp, path, type, rp->r_flags,
3146 hash16((void *)&fhandle.fh_buf, fhandle.fh_len));
3147 kmem_free(path, strlen(path)+1);
3148 return (buf);
3150 #endif