dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / kernel / fs / nfs / nfs4_client_state.c
blob8b082582d618dcc61205f9472aab865113416fa4
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 #include <nfs/nfs4_clnt.h>
31 #include <nfs/rnode4.h>
32 #include <sys/systm.h>
33 #include <sys/cmn_err.h>
34 #include <sys/atomic.h>
36 static void nfs4_free_open_owner(nfs4_open_owner_t *, mntinfo4_t *);
37 static nfs4_open_owner_t *find_freed_open_owner(cred_t *,
38 nfs4_oo_hash_bucket_t *, mntinfo4_t *);
39 static open_delegation_type4 get_dtype(rnode4_t *);
41 #ifdef DEBUG
42 int nfs4_client_foo_debug = 0x0;
43 int nfs4_client_open_dg = 0x0;
45 * If this is non-zero, the lockowner and openowner seqid sync primitives
46 * will intermittently return errors.
48 static int seqid_sync_faults = 0;
49 #endif
51 stateid4 clnt_special0 = {
53 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
56 stateid4 clnt_special1 = {
57 0xffffffff,
59 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
60 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
61 (char)0xff, (char)0xff, (char)0xff, (char)0xff
65 /* finds hash bucket and locks it */
66 static nfs4_oo_hash_bucket_t *
67 lock_bucket(cred_t *cr, mntinfo4_t *mi)
69 nfs4_oo_hash_bucket_t *bucketp;
70 uint32_t hash_key;
72 hash_key = (uint32_t)(crgetuid(cr) + crgetruid(cr))
73 % NFS4_NUM_OO_BUCKETS;
74 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, "lock_bucket: "
75 "hash_key %d for cred %p", hash_key, (void*)cr));
77 ASSERT(hash_key >= 0 && hash_key < NFS4_NUM_OO_BUCKETS);
78 ASSERT(mi != NULL);
79 ASSERT(mutex_owned(&mi->mi_lock));
81 bucketp = &(mi->mi_oo_list[hash_key]);
82 mutex_enter(&bucketp->b_lock);
83 return (bucketp);
86 /* unlocks hash bucket pointed by bucket_ptr */
87 static void
88 unlock_bucket(nfs4_oo_hash_bucket_t *bucketp)
90 mutex_exit(&bucketp->b_lock);
94 * Removes the lock owner from the rnode's lock_owners list and frees the
95 * corresponding reference.
97 void
98 nfs4_rnode_remove_lock_owner(rnode4_t *rp, nfs4_lock_owner_t *lop)
100 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
101 "nfs4_rnode_remove_lock_owner"));
103 mutex_enter(&rp->r_statev4_lock);
105 if (lop->lo_next_rnode == NULL) {
106 /* already removed from list */
107 mutex_exit(&rp->r_statev4_lock);
108 return;
111 ASSERT(lop->lo_prev_rnode != NULL);
113 lop->lo_prev_rnode->lo_next_rnode = lop->lo_next_rnode;
114 lop->lo_next_rnode->lo_prev_rnode = lop->lo_prev_rnode;
116 lop->lo_next_rnode = lop->lo_prev_rnode = NULL;
118 mutex_exit(&rp->r_statev4_lock);
121 * This would be an appropriate place for
122 * RELEASE_LOCKOWNER. For now, this is overkill
123 * because in the common case, close is going to
124 * release any lockowners anyway.
126 lock_owner_rele(lop);
130 * Remove all lock owners from the rnode's lock_owners list. Frees up
131 * their references from the list.
134 void
135 nfs4_flush_lock_owners(rnode4_t *rp)
137 nfs4_lock_owner_t *lop;
139 mutex_enter(&rp->r_statev4_lock);
140 while (rp->r_lo_head.lo_next_rnode != &rp->r_lo_head) {
141 lop = rp->r_lo_head.lo_next_rnode;
142 lop->lo_prev_rnode->lo_next_rnode = lop->lo_next_rnode;
143 lop->lo_next_rnode->lo_prev_rnode = lop->lo_prev_rnode;
144 lop->lo_next_rnode = lop->lo_prev_rnode = NULL;
145 lock_owner_rele(lop);
147 mutex_exit(&rp->r_statev4_lock);
150 void
151 nfs4_clear_open_streams(rnode4_t *rp)
153 nfs4_open_stream_t *osp;
155 mutex_enter(&rp->r_os_lock);
156 while ((osp = list_head(&rp->r_open_streams)) != NULL) {
157 open_owner_rele(osp->os_open_owner);
158 list_remove(&rp->r_open_streams, osp);
159 mutex_destroy(&osp->os_sync_lock);
160 osp->os_open_owner = NULL;
161 kmem_free(osp, sizeof (*osp));
163 mutex_exit(&rp->r_os_lock);
166 void
167 open_owner_hold(nfs4_open_owner_t *oop)
169 mutex_enter(&oop->oo_lock);
170 oop->oo_ref_count++;
171 mutex_exit(&oop->oo_lock);
175 * Frees the open owner if the ref count hits zero.
177 void
178 open_owner_rele(nfs4_open_owner_t *oop)
180 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
181 "open_owner_rele"));
183 mutex_enter(&oop->oo_lock);
184 oop->oo_ref_count--;
185 if (oop->oo_ref_count == 0) {
186 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
187 "open_owner_rele: freeing open owner"));
188 oop->oo_valid = 0;
189 mutex_exit(&oop->oo_lock);
191 * Ok, we don't destroy the open owner, nor do we put it on
192 * the mntinfo4's free list just yet. We are lazy about it
193 * and let callers to find_open_owner() do that to keep locking
194 * simple.
196 } else {
197 mutex_exit(&oop->oo_lock);
201 void
202 open_stream_hold(nfs4_open_stream_t *osp)
204 mutex_enter(&osp->os_sync_lock);
205 osp->os_ref_count++;
206 mutex_exit(&osp->os_sync_lock);
210 * Frees the open stream and removes it from the rnode4's open streams list if
211 * the ref count drops to zero.
213 void
214 open_stream_rele(nfs4_open_stream_t *osp, rnode4_t *rp)
216 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
217 "open_stream_rele"));
219 ASSERT(!mutex_owned(&rp->r_os_lock));
221 mutex_enter(&osp->os_sync_lock);
222 ASSERT(osp->os_ref_count > 0);
223 osp->os_ref_count--;
224 if (osp->os_ref_count == 0) {
225 nfs4_open_owner_t *tmp_oop;
227 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
228 "open_stream_rele: freeing open stream"));
229 osp->os_valid = 0;
230 tmp_oop = osp->os_open_owner;
231 mutex_exit(&osp->os_sync_lock);
233 /* now see if we need to destroy the open owner */
234 open_owner_rele(tmp_oop);
236 mutex_enter(&rp->r_os_lock);
237 list_remove(&rp->r_open_streams, osp);
238 mutex_exit(&rp->r_os_lock);
240 /* free up osp */
241 mutex_destroy(&osp->os_sync_lock);
242 osp->os_open_owner = NULL;
243 kmem_free(osp, sizeof (*osp));
244 } else {
245 mutex_exit(&osp->os_sync_lock);
249 void
250 lock_owner_hold(nfs4_lock_owner_t *lop)
252 mutex_enter(&lop->lo_lock);
253 lop->lo_ref_count++;
254 mutex_exit(&lop->lo_lock);
258 * Frees the lock owner if the ref count hits zero and
259 * the structure no longer has no locks.
261 void
262 lock_owner_rele(nfs4_lock_owner_t *lop)
264 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
265 "lock_owner_rele"));
267 mutex_enter(&lop->lo_lock);
268 lop->lo_ref_count--;
269 if (lop->lo_ref_count == 0) {
270 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
271 "lock_owner_rele: freeing lock owner: "
272 "%x", lop->lo_pid));
273 lop->lo_valid = 0;
275 * If there are no references, the lock_owner should
276 * already be off the rnode's list.
278 ASSERT(lop->lo_next_rnode == NULL);
279 ASSERT(lop->lo_prev_rnode == NULL);
280 ASSERT(!(lop->lo_flags & NFS4_LOCK_SEQID_INUSE));
281 ASSERT(lop->lo_seqid_holder == NULL);
282 mutex_exit(&lop->lo_lock);
284 /* free up lop */
285 cv_destroy(&lop->lo_cv_seqid_sync);
286 mutex_destroy(&lop->lo_lock);
287 kmem_free(lop, sizeof (*lop));
288 } else {
289 mutex_exit(&lop->lo_lock);
294 * This increments the open owner ref count if found.
295 * The argument 'just_created' determines whether we are looking for open
296 * owners with the 'oo_just_created' flag set or not.
298 nfs4_open_owner_t *
299 find_open_owner_nolock(cred_t *cr, int just_created, mntinfo4_t *mi)
301 nfs4_open_owner_t *oop = NULL, *next_oop;
302 nfs4_oo_hash_bucket_t *bucketp;
304 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
305 "find_open_owner: cred %p, just_created %d",
306 (void*)cr, just_created));
308 ASSERT(mi != NULL);
309 ASSERT(mutex_owned(&mi->mi_lock));
311 bucketp = lock_bucket(cr, mi);
313 /* got hash bucket, search through open owners */
314 for (oop = list_head(&bucketp->b_oo_hash_list); oop != NULL; ) {
315 mutex_enter(&oop->oo_lock);
316 if (!crcmp(oop->oo_cred, cr) &&
317 (oop->oo_just_created == just_created ||
318 just_created == NFS4_JUST_CREATED)) {
319 /* match */
320 if (oop->oo_valid == 0) {
321 /* reactivate the open owner */
322 oop->oo_valid = 1;
323 ASSERT(oop->oo_ref_count == 0);
325 oop->oo_ref_count++;
326 mutex_exit(&oop->oo_lock);
327 unlock_bucket(bucketp);
328 return (oop);
330 next_oop = list_next(&bucketp->b_oo_hash_list, oop);
331 if (oop->oo_valid == 0) {
332 list_remove(&bucketp->b_oo_hash_list, oop);
335 * Now we go ahead and put this open owner
336 * on the freed list. This is our lazy method.
338 nfs4_free_open_owner(oop, mi);
341 mutex_exit(&oop->oo_lock);
342 oop = next_oop;
345 /* search through recently freed open owners */
346 oop = find_freed_open_owner(cr, bucketp, mi);
348 unlock_bucket(bucketp);
350 return (oop);
353 nfs4_open_owner_t *
354 find_open_owner(cred_t *cr, int just_created, mntinfo4_t *mi)
356 nfs4_open_owner_t *oop;
358 mutex_enter(&mi->mi_lock);
359 oop = find_open_owner_nolock(cr, just_created, mi);
360 mutex_exit(&mi->mi_lock);
362 return (oop);
366 * This increments osp's ref count if found.
367 * Returns with 'os_sync_lock' held.
369 nfs4_open_stream_t *
370 find_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp)
372 nfs4_open_stream_t *osp;
374 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
375 "find_open_stream"));
377 mutex_enter(&rp->r_os_lock);
378 /* Now, no one can add or delete to rp's open streams list */
379 for (osp = list_head(&rp->r_open_streams); osp != NULL;
380 osp = list_next(&rp->r_open_streams, osp)) {
381 mutex_enter(&osp->os_sync_lock);
382 if (osp->os_open_owner == oop && osp->os_valid != 0) {
383 /* match */
384 NFS4_DEBUG(nfs4_client_state_debug,
385 (CE_NOTE, "find_open_stream "
386 "got a match"));
388 osp->os_ref_count++;
389 mutex_exit(&rp->r_os_lock);
390 return (osp);
392 mutex_exit(&osp->os_sync_lock);
395 mutex_exit(&rp->r_os_lock);
396 return (NULL);
400 * Find the lock owner for the given file and process ID. If "which" is
401 * LOWN_VALID_STATEID, require that the lock owner contain a valid stateid
402 * from the server.
404 * This increments the lock owner's ref count if found. Returns NULL if
405 * there was no match.
407 nfs4_lock_owner_t *
408 find_lock_owner(rnode4_t *rp, pid_t pid, lown_which_t which)
410 nfs4_lock_owner_t *lop, *next_lop;
412 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
413 "find_lock_owner: pid %x, which %d", pid, which));
415 ASSERT(which == LOWN_ANY || which == LOWN_VALID_STATEID);
417 /* search by pid */
418 mutex_enter(&rp->r_statev4_lock);
420 lop = rp->r_lo_head.lo_next_rnode;
421 while (lop != &rp->r_lo_head) {
422 mutex_enter(&lop->lo_lock);
423 if (lop->lo_pid == pid && lop->lo_valid != 0 &&
424 !(lop->lo_flags & NFS4_BAD_SEQID_LOCK)) {
425 if (which == LOWN_ANY ||
426 lop->lo_just_created != NFS4_JUST_CREATED) {
427 /* Found a matching lock owner */
428 NFS4_DEBUG(nfs4_client_state_debug,
429 (CE_NOTE, "find_lock_owner: "
430 "got a match"));
432 lop->lo_ref_count++;
433 mutex_exit(&lop->lo_lock);
434 mutex_exit(&rp->r_statev4_lock);
435 return (lop);
438 next_lop = lop->lo_next_rnode;
439 mutex_exit(&lop->lo_lock);
440 lop = next_lop;
443 mutex_exit(&rp->r_statev4_lock);
444 return (NULL);
448 * This returns the delegation stateid as 'sid'. Returns 1 if a successful
449 * delegation stateid was found, otherwise returns 0.
452 static int
453 nfs4_get_deleg_stateid(rnode4_t *rp, nfs_opnum4 op, stateid4 *sid)
455 ASSERT(!mutex_owned(&rp->r_statev4_lock));
457 mutex_enter(&rp->r_statev4_lock);
458 if (((rp->r_deleg_type == OPEN_DELEGATE_WRITE && op == OP_WRITE) ||
459 (rp->r_deleg_type != OPEN_DELEGATE_NONE && op != OP_WRITE)) &&
460 !rp->r_deleg_return_pending) {
462 *sid = rp->r_deleg_stateid;
463 mutex_exit(&rp->r_statev4_lock);
464 return (1);
466 mutex_exit(&rp->r_statev4_lock);
467 return (0);
471 * This returns the lock stateid as 'sid'. Returns 1 if a successful lock
472 * stateid was found, otherwise returns 0.
474 static int
475 nfs4_get_lock_stateid(rnode4_t *rp, pid_t pid, stateid4 *sid)
477 nfs4_lock_owner_t *lop;
479 lop = find_lock_owner(rp, pid, LOWN_VALID_STATEID);
481 if (lop) {
483 * Found a matching lock owner, so use a lock
484 * stateid rather than an open stateid.
486 mutex_enter(&lop->lo_lock);
487 *sid = lop->lock_stateid;
488 mutex_exit(&lop->lo_lock);
489 lock_owner_rele(lop);
490 return (1);
493 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
494 "nfs4_get_lock_stateid: no lop"));
495 return (0);
499 * This returns the open stateid as 'sid'. Returns 1 if a successful open
500 * stateid was found, otherwise returns 0.
502 * Once the stateid is returned to the caller, it is no longer protected;
503 * so the caller must be prepared to handle OLD/BAD_STATEID where
504 * appropiate.
506 static int
507 nfs4_get_open_stateid(rnode4_t *rp, cred_t *cr, mntinfo4_t *mi, stateid4 *sid)
509 nfs4_open_owner_t *oop;
510 nfs4_open_stream_t *osp;
512 ASSERT(mi != NULL);
514 oop = find_open_owner(cr, NFS4_PERM_CREATED, mi);
515 if (!oop) {
516 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
517 "nfs4_get_open_stateid: no oop"));
518 return (0);
521 osp = find_open_stream(oop, rp);
522 open_owner_rele(oop);
523 if (!osp) {
524 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
525 "nfs4_get_open_stateid: no osp"));
526 return (0);
529 if (osp->os_failed_reopen) {
530 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
531 "nfs4_get_open_stateid: osp %p failed reopen",
532 (void *)osp));
533 mutex_exit(&osp->os_sync_lock);
534 open_stream_rele(osp, rp);
535 return (0);
537 *sid = osp->open_stateid;
538 mutex_exit(&osp->os_sync_lock);
539 open_stream_rele(osp, rp);
540 return (1);
544 * Returns the delegation stateid if this 'op' is OP_WRITE and the
545 * delegation we hold is a write delegation, OR this 'op' is not
546 * OP_WRITE and we have a delegation held (read or write), otherwise
547 * returns the lock stateid if there is a lock owner, otherwise
548 * returns the open stateid if there is a open stream, otherwise
549 * returns special stateid <seqid = 0, other = 0>.
551 * Used for WRITE operations.
553 stateid4
554 nfs4_get_w_stateid(cred_t *cr, rnode4_t *rp, pid_t pid, mntinfo4_t *mi,
555 nfs_opnum4 op, nfs4_stateid_types_t *sid_tp)
557 stateid4 sid;
559 if (nfs4_get_deleg_stateid(rp, op, &sid)) {
560 if (!stateid4_cmp(&sid, &sid_tp->d_sid)) {
561 sid_tp->cur_sid_type = DEL_SID;
562 return (sid);
565 if (nfs4_get_lock_stateid(rp, pid, &sid)) {
566 if (!stateid4_cmp(&sid, &sid_tp->l_sid)) {
567 sid_tp->cur_sid_type = LOCK_SID;
568 return (sid);
571 if (nfs4_get_open_stateid(rp, cr, mi, &sid)) {
572 if (!stateid4_cmp(&sid, &sid_tp->o_sid)) {
573 sid_tp->cur_sid_type = OPEN_SID;
574 return (sid);
577 bzero(&sid, sizeof (stateid4));
578 sid_tp->cur_sid_type = SPEC_SID;
579 return (sid);
583 * Returns the delegation stateid if this 'op' is OP_WRITE and the
584 * delegation we hold is a write delegation, OR this 'op' is not
585 * OP_WRITE and we have a delegation held (read or write), otherwise
586 * returns the lock stateid if there is a lock owner, otherwise
587 * returns the open stateid if there is a open stream, otherwise
588 * returns special stateid <seqid = 0, other = 0>.
590 * This also updates which stateid we are using in 'sid_tp', skips
591 * previously attempted stateids, and skips checking higher priority
592 * stateids than the current level as dictated by 'sid_tp->cur_sid_type'
593 * for async reads.
595 * Used for READ and SETATTR operations.
597 stateid4
598 nfs4_get_stateid(cred_t *cr, rnode4_t *rp, pid_t pid, mntinfo4_t *mi,
599 nfs_opnum4 op, nfs4_stateid_types_t *sid_tp, bool_t async_read)
601 stateid4 sid;
604 * For asynchronous READs, do not attempt to retry from the start of
605 * the stateid priority list, just continue from where you last left
606 * off.
608 if (async_read) {
609 switch (sid_tp->cur_sid_type) {
610 case NO_SID:
611 break;
612 case DEL_SID:
613 goto lock_stateid;
614 case LOCK_SID:
615 goto open_stateid;
616 case OPEN_SID:
617 goto special_stateid;
618 case SPEC_SID:
619 default:
620 cmn_err(CE_PANIC, "nfs4_get_stateid: illegal current "
621 "stateid type %d", sid_tp->cur_sid_type);
625 if (nfs4_get_deleg_stateid(rp, op, &sid)) {
626 if (!stateid4_cmp(&sid, &sid_tp->d_sid)) {
627 sid_tp->cur_sid_type = DEL_SID;
628 return (sid);
631 lock_stateid:
632 if (nfs4_get_lock_stateid(rp, pid, &sid)) {
633 if (!stateid4_cmp(&sid, &sid_tp->l_sid)) {
634 sid_tp->cur_sid_type = LOCK_SID;
635 return (sid);
638 open_stateid:
639 if (nfs4_get_open_stateid(rp, cr, mi, &sid)) {
640 if (!stateid4_cmp(&sid, &sid_tp->o_sid)) {
641 sid_tp->cur_sid_type = OPEN_SID;
642 return (sid);
645 special_stateid:
646 bzero(&sid, sizeof (stateid4));
647 sid_tp->cur_sid_type = SPEC_SID;
648 return (sid);
651 void
652 nfs4_set_lock_stateid(nfs4_lock_owner_t *lop, stateid4 stateid)
654 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
655 "nfs4_set_lock_stateid"));
657 ASSERT(lop);
658 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
660 mutex_enter(&lop->lo_lock);
661 lop->lock_stateid = stateid;
662 mutex_exit(&lop->lo_lock);
666 * Sequence number used when a new open owner is needed.
667 * This is used so as to not confuse the server. Since a open owner
668 * is based off of cred, a cred could be re-used quickly, and the server
669 * may not release all state for a cred.
671 static uint64_t open_owner_seq_num = 0;
673 uint64_t
674 nfs4_get_new_oo_name(void)
676 return (atomic_inc_64_nv(&open_owner_seq_num));
680 * Create a new open owner and add it to the open owner hash table.
682 nfs4_open_owner_t *
683 create_open_owner(cred_t *cr, mntinfo4_t *mi)
685 nfs4_open_owner_t *oop;
686 nfs4_oo_hash_bucket_t *bucketp;
688 oop = kmem_alloc(sizeof (nfs4_open_owner_t), KM_SLEEP);
690 * Make sure the cred doesn't go away when we put this open owner
691 * on the free list, as well as make crcmp() a valid check.
693 crhold(cr);
694 oop->oo_cred = cr;
695 mutex_init(&oop->oo_lock, NULL, MUTEX_DEFAULT, NULL);
696 oop->oo_ref_count = 1;
697 oop->oo_valid = 1;
698 oop->oo_just_created = NFS4_JUST_CREATED;
699 oop->oo_seqid = 0;
700 oop->oo_seqid_inuse = 0;
701 oop->oo_last_good_seqid = 0;
702 oop->oo_last_good_op = TAG_NONE;
703 oop->oo_cred_otw = NULL;
704 cv_init(&oop->oo_cv_seqid_sync, NULL, CV_DEFAULT, NULL);
707 * A Solaris open_owner is <oo_seq_num>
709 oop->oo_name = nfs4_get_new_oo_name();
711 /* now add the struct into the cred hash table */
712 ASSERT(mutex_owned(&mi->mi_lock));
713 bucketp = lock_bucket(cr, mi);
714 list_insert_head(&bucketp->b_oo_hash_list, oop);
715 unlock_bucket(bucketp);
717 return (oop);
721 * Create a new open stream and it to the rnode's list.
722 * Increments the ref count on oop.
723 * Returns with 'os_sync_lock' held.
725 nfs4_open_stream_t *
726 create_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp)
728 nfs4_open_stream_t *osp;
730 #ifdef DEBUG
731 mutex_enter(&oop->oo_lock);
732 ASSERT(oop->oo_seqid_inuse);
733 mutex_exit(&oop->oo_lock);
734 #endif
736 osp = kmem_alloc(sizeof (nfs4_open_stream_t), KM_SLEEP);
737 osp->os_open_ref_count = 1;
738 osp->os_mapcnt = 0;
739 osp->os_ref_count = 2;
740 osp->os_valid = 1;
741 osp->os_open_owner = oop;
742 osp->os_orig_oo_name = oop->oo_name;
743 bzero(&osp->open_stateid, sizeof (stateid4));
744 osp->os_share_acc_read = 0;
745 osp->os_share_acc_write = 0;
746 osp->os_mmap_read = 0;
747 osp->os_mmap_write = 0;
748 osp->os_share_deny_none = 0;
749 osp->os_share_deny_read = 0;
750 osp->os_share_deny_write = 0;
751 osp->os_delegation = 0;
752 osp->os_dc_openacc = 0;
753 osp->os_final_close = 0;
754 osp->os_pending_close = 0;
755 osp->os_failed_reopen = 0;
756 osp->os_force_close = 0;
757 mutex_init(&osp->os_sync_lock, NULL, MUTEX_DEFAULT, NULL);
759 /* open owner gets a reference */
760 open_owner_hold(oop);
762 /* now add the open stream to rp */
763 mutex_enter(&rp->r_os_lock);
764 mutex_enter(&osp->os_sync_lock);
765 list_insert_head(&rp->r_open_streams, osp);
766 mutex_exit(&rp->r_os_lock);
768 return (osp);
772 * Returns an open stream with 'os_sync_lock' held.
773 * If the open stream is found (rather than created), its
774 * 'os_open_ref_count' is bumped.
776 * There is no race with two threads entering this function
777 * and creating two open streams for the same <oop, rp> pair.
778 * This is because the open seqid sync must be acquired, thus
779 * only allowing one thread in at a time.
781 nfs4_open_stream_t *
782 find_or_create_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp,
783 int *created_osp)
785 nfs4_open_stream_t *osp;
787 #ifdef DEBUG
788 mutex_enter(&oop->oo_lock);
789 ASSERT(oop->oo_seqid_inuse);
790 mutex_exit(&oop->oo_lock);
791 #endif
793 osp = find_open_stream(oop, rp);
794 if (!osp) {
795 osp = create_open_stream(oop, rp);
796 if (osp)
797 *created_osp = 1;
798 } else {
799 *created_osp = 0;
800 osp->os_open_ref_count++;
803 return (osp);
806 static uint64_t lock_owner_seq_num = 0;
809 * Create a new lock owner and add it to the rnode's list.
810 * Assumes the rnode's r_statev4_lock is held.
811 * The created lock owner has a reference count of 2: one for the list and
812 * one for the caller to use. Returns the lock owner locked down.
814 nfs4_lock_owner_t *
815 create_lock_owner(rnode4_t *rp, pid_t pid)
817 nfs4_lock_owner_t *lop;
819 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
820 "create_lock_owner: pid %x", pid));
822 ASSERT(mutex_owned(&rp->r_statev4_lock));
824 lop = kmem_alloc(sizeof (nfs4_lock_owner_t), KM_SLEEP);
825 lop->lo_ref_count = 2;
826 lop->lo_valid = 1;
827 bzero(&lop->lock_stateid, sizeof (stateid4));
828 lop->lo_pid = pid;
829 lop->lock_seqid = 0;
830 lop->lo_pending_rqsts = 0;
831 lop->lo_just_created = NFS4_JUST_CREATED;
832 lop->lo_flags = 0;
833 lop->lo_seqid_holder = NULL;
836 * A Solaris lock_owner is <seq_num><pid>
838 lop->lock_owner_name.ln_seq_num =
839 atomic_inc_64_nv(&lock_owner_seq_num);
840 lop->lock_owner_name.ln_pid = pid;
842 cv_init(&lop->lo_cv_seqid_sync, NULL, CV_DEFAULT, NULL);
843 mutex_init(&lop->lo_lock, NULL, MUTEX_DEFAULT, NULL);
845 mutex_enter(&lop->lo_lock);
847 /* now add the lock owner to rp */
848 lop->lo_prev_rnode = &rp->r_lo_head;
849 lop->lo_next_rnode = rp->r_lo_head.lo_next_rnode;
850 rp->r_lo_head.lo_next_rnode->lo_prev_rnode = lop;
851 rp->r_lo_head.lo_next_rnode = lop;
853 return (lop);
858 * This sets the lock seqid of a lock owner.
860 void
861 nfs4_set_lock_seqid(seqid4 seqid, nfs4_lock_owner_t *lop)
863 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
864 "nfs4_set_lock_seqid"));
866 ASSERT(lop != NULL);
867 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
869 lop->lock_seqid = seqid;
872 static void
873 nfs4_set_new_lock_owner_args(lock_owner4 *owner, pid_t pid)
875 nfs4_lo_name_t *cast_namep;
877 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
878 "nfs4_set_new_lock_owner_args"));
880 owner->owner_len = sizeof (*cast_namep);
881 owner->owner_val = kmem_alloc(owner->owner_len, KM_SLEEP);
883 * A Solaris lock_owner is <seq_num><pid>
885 cast_namep = (nfs4_lo_name_t *)owner->owner_val;
886 cast_namep->ln_seq_num = atomic_inc_64_nv(&lock_owner_seq_num);
887 cast_namep->ln_pid = pid;
891 * Fill in the lock owner args.
893 void
894 nfs4_setlockowner_args(lock_owner4 *owner, rnode4_t *rp, pid_t pid)
896 nfs4_lock_owner_t *lop;
898 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
899 "nfs4_setlockowner_args"));
901 /* This increments lop's ref count */
902 lop = find_lock_owner(rp, pid, LOWN_VALID_STATEID);
904 if (!lop)
905 goto make_up_args;
907 mutex_enter(&lop->lo_lock);
908 owner->owner_len = sizeof (lop->lock_owner_name);
909 owner->owner_val = kmem_alloc(owner->owner_len, KM_SLEEP);
910 bcopy(&lop->lock_owner_name, owner->owner_val,
911 owner->owner_len);
912 mutex_exit(&lop->lo_lock);
913 lock_owner_rele(lop);
914 return;
916 make_up_args:
917 nfs4_set_new_lock_owner_args(owner, pid);
921 * This ends our use of the open owner's open seqid by setting
922 * the appropiate flags and issuing a cv_signal to wake up another
923 * thread waiting to use the open seqid.
926 void
927 nfs4_end_open_seqid_sync(nfs4_open_owner_t *oop)
929 mutex_enter(&oop->oo_lock);
930 ASSERT(oop->oo_seqid_inuse);
931 oop->oo_seqid_inuse = 0;
932 cv_broadcast(&oop->oo_cv_seqid_sync);
933 mutex_exit(&oop->oo_lock);
937 * This starts our use of the open owner's open seqid by setting
938 * the oo_seqid_inuse to true. We will wait (forever) with a
939 * cv_wait() until we are woken up.
941 * Return values:
942 * 0 no problems
943 * EAGAIN caller should retry (like a recovery retry)
946 nfs4_start_open_seqid_sync(nfs4_open_owner_t *oop, mntinfo4_t *mi)
948 int error = 0;
949 #ifdef DEBUG
950 static int ops = 0; /* fault injection */
951 #endif
953 #ifdef DEBUG
954 if (seqid_sync_faults && curthread != mi->mi_recovthread &&
955 ++ops % 5 == 0)
956 return (EAGAIN);
957 #endif
959 mutex_enter(&mi->mi_lock);
960 if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
961 curthread != mi->mi_recovthread)
962 error = EAGAIN;
963 mutex_exit(&mi->mi_lock);
964 if (error != 0)
965 goto done;
967 mutex_enter(&oop->oo_lock);
969 while (oop->oo_seqid_inuse) {
970 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
971 "nfs4_start_open_seqid_sync waiting on cv"));
973 cv_wait(&oop->oo_cv_seqid_sync, &oop->oo_lock);
976 oop->oo_seqid_inuse = 1;
978 mutex_exit(&oop->oo_lock);
980 mutex_enter(&mi->mi_lock);
981 if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
982 curthread != mi->mi_recovthread)
983 error = EAGAIN;
984 mutex_exit(&mi->mi_lock);
986 if (error == EAGAIN)
987 nfs4_end_open_seqid_sync(oop);
989 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
990 "nfs4_start_open_seqid_sync: error=%d", error));
992 done:
993 return (error);
996 #ifdef DEBUG
997 int bypass_otw[2];
998 #endif
1001 * Checks to see if the OPEN OTW is necessary that is, if it's already
1002 * been opened with the same access and deny bits we are now asking for.
1003 * Note, this assumes that *vp is a rnode.
1006 nfs4_is_otw_open_necessary(nfs4_open_owner_t *oop, int flag, vnode_t *vp,
1007 int just_been_created, int *errorp, int acc, nfs4_recov_state_t *rsp)
1009 rnode4_t *rp;
1010 nfs4_open_stream_t *osp;
1011 open_delegation_type4 dt;
1013 rp = VTOR4(vp);
1016 * Grab the delegation type. This function is protected against
1017 * the delegation being returned by virtue of start_op (called
1018 * by nfs4open_otw) taking the r_deleg_recall_lock in read mode,
1019 * delegreturn requires this lock in write mode to proceed.
1021 ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_READER));
1022 dt = get_dtype(rp);
1024 /* returns with 'os_sync_lock' held */
1025 osp = find_open_stream(oop, rp);
1027 if (osp) {
1028 uint32_t do_otw = 0;
1030 if (osp->os_failed_reopen) {
1031 NFS4_DEBUG(nfs4_open_stream_debug, (CE_NOTE,
1032 "nfs4_is_otw_open_necessary: os_failed_reopen "
1033 "set on osp %p, cr %p, rp %s", (void *)osp,
1034 (void *)osp->os_open_owner->oo_cred,
1035 rnode4info(rp)));
1036 do_otw = 1;
1040 * check access/deny bits
1042 if (!do_otw && (flag & FREAD))
1043 if (osp->os_share_acc_read == 0 &&
1044 dt == OPEN_DELEGATE_NONE)
1045 do_otw = 1;
1047 if (!do_otw && (flag & FWRITE))
1048 if (osp->os_share_acc_write == 0 &&
1049 dt != OPEN_DELEGATE_WRITE)
1050 do_otw = 1;
1052 if (!do_otw) {
1053 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
1054 "nfs4_is_otw_open_necessary: can skip this "
1055 "open OTW"));
1056 if (!just_been_created) {
1057 osp->os_open_ref_count++;
1058 if (flag & FREAD)
1059 osp->os_share_acc_read++;
1060 if (flag & FWRITE)
1061 osp->os_share_acc_write++;
1062 osp->os_share_deny_none++;
1066 * Need to reset this bitfield for the possible case
1067 * where we were going to OTW CLOSE the file, got a
1068 * non-recoverable error, and before we could retry
1069 * the CLOSE, OPENed the file again.
1071 ASSERT(osp->os_open_owner->oo_seqid_inuse);
1072 osp->os_final_close = 0;
1073 osp->os_force_close = 0;
1075 mutex_exit(&osp->os_sync_lock);
1076 open_stream_rele(osp, rp);
1078 #ifdef DEBUG
1079 bypass_otw[0]++;
1080 #endif
1082 *errorp = 0;
1083 return (0);
1085 mutex_exit(&osp->os_sync_lock);
1086 open_stream_rele(osp, rp);
1088 } else if (dt != OPEN_DELEGATE_NONE) {
1090 * Even if there isn't an open_stream yet, we may still be
1091 * able to bypass the otw open if the client owns a delegation.
1093 * If you are asking for for WRITE, but I only have
1094 * a read delegation, then you still have to go otw.
1097 if (flag & FWRITE && dt == OPEN_DELEGATE_READ)
1098 return (1);
1101 * TODO - evaluate the nfsace4
1105 * Check the access flags to make sure the caller
1106 * had permission.
1108 if (flag & FREAD && !(acc & VREAD))
1109 return (1);
1111 if (flag & FWRITE && !(acc & VWRITE))
1112 return (1);
1115 * create_open_stream will add a reference to oop,
1116 * this will prevent the open_owner_rele done in
1117 * nfs4open_otw from destroying the open_owner.
1120 /* returns with 'os_sync_lock' held */
1121 osp = create_open_stream(oop, rp);
1122 if (osp == NULL)
1123 return (1);
1125 osp->open_stateid = rp->r_deleg_stateid;
1126 osp->os_delegation = 1;
1128 if (flag & FREAD)
1129 osp->os_share_acc_read++;
1130 if (flag & FWRITE)
1131 osp->os_share_acc_write++;
1133 osp->os_share_deny_none++;
1134 mutex_exit(&osp->os_sync_lock);
1136 open_stream_rele(osp, rp);
1138 mutex_enter(&oop->oo_lock);
1139 oop->oo_just_created = NFS4_PERM_CREATED;
1140 mutex_exit(&oop->oo_lock);
1142 ASSERT(rsp != NULL);
1143 if (rsp->rs_sp != NULL) {
1144 mutex_enter(&rsp->rs_sp->s_lock);
1145 nfs4_inc_state_ref_count_nolock(rsp->rs_sp,
1146 VTOMI4(vp));
1147 mutex_exit(&rsp->rs_sp->s_lock);
1149 #ifdef DEBUG
1150 bypass_otw[1]++;
1151 #endif
1153 *errorp = 0;
1154 return (0);
1157 return (1);
1160 static open_delegation_type4
1161 get_dtype(rnode4_t *rp)
1163 open_delegation_type4 dt;
1165 mutex_enter(&rp->r_statev4_lock);
1166 ASSERT(!rp->r_deleg_return_inprog);
1167 if (rp->r_deleg_return_pending)
1168 dt = OPEN_DELEGATE_NONE;
1169 else
1170 dt = rp->r_deleg_type;
1171 mutex_exit(&rp->r_statev4_lock);
1173 return (dt);
1177 * Fill in *locker with the lock state arguments for a LOCK call. If
1178 * lop->lo_just_created == NFS4_JUST_CREATED, oop and osp must be non-NULL.
1179 * Caller must already hold the necessary seqid sync lock(s).
1182 void
1183 nfs4_setup_lock_args(nfs4_lock_owner_t *lop, nfs4_open_owner_t *oop,
1184 nfs4_open_stream_t *osp, clientid4 clientid, locker4 *locker)
1186 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
1187 if (lop->lo_just_created == NFS4_JUST_CREATED) {
1188 /* this is a new lock request */
1189 open_to_lock_owner4 *nown;
1191 ASSERT(oop != NULL);
1192 ASSERT(osp != NULL);
1194 locker->new_lock_owner = TRUE;
1195 nown = &locker->locker4_u.open_owner;
1196 nown->open_seqid = nfs4_get_open_seqid(oop) + 1;
1197 mutex_enter(&osp->os_sync_lock);
1198 nown->open_stateid = osp->open_stateid;
1199 mutex_exit(&osp->os_sync_lock);
1200 nown->lock_seqid = lop->lock_seqid; /* initial, so no +1 */
1202 nown->lock_owner.clientid = clientid;
1203 nown->lock_owner.owner_len = sizeof (lop->lock_owner_name);
1204 nown->lock_owner.owner_val =
1205 kmem_alloc(nown->lock_owner.owner_len, KM_SLEEP);
1206 bcopy(&lop->lock_owner_name, nown->lock_owner.owner_val,
1207 nown->lock_owner.owner_len);
1208 } else {
1209 exist_lock_owner4 *eown;
1210 /* have an existing lock owner */
1212 locker->new_lock_owner = FALSE;
1213 eown = &locker->locker4_u.lock_owner;
1214 mutex_enter(&lop->lo_lock);
1215 eown->lock_stateid = lop->lock_stateid;
1216 mutex_exit(&lop->lo_lock);
1217 eown->lock_seqid = lop->lock_seqid + 1;
1222 * This starts our use of the lock owner's lock seqid by setting
1223 * the lo_flags to NFS4_LOCK_SEQID_INUSE. We will wait (forever)
1224 * with a cv_wait() until we are woken up.
1226 * Return values:
1227 * 0 no problems
1228 * EAGAIN caller should retry (like a recovery retry)
1231 nfs4_start_lock_seqid_sync(nfs4_lock_owner_t *lop, mntinfo4_t *mi)
1233 int error = 0;
1234 #ifdef DEBUG
1235 static int ops = 0; /* fault injection */
1236 #endif
1238 #ifdef DEBUG
1239 if (seqid_sync_faults && curthread != mi->mi_recovthread &&
1240 ++ops % 7 == 0)
1241 return (EAGAIN);
1242 #endif
1244 mutex_enter(&mi->mi_lock);
1245 if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
1246 curthread != mi->mi_recovthread)
1247 error = EAGAIN;
1248 mutex_exit(&mi->mi_lock);
1249 if (error != 0)
1250 goto done;
1252 mutex_enter(&lop->lo_lock);
1254 ASSERT(lop->lo_seqid_holder != curthread);
1255 while (lop->lo_flags & NFS4_LOCK_SEQID_INUSE) {
1256 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
1257 "nfs4_start_lock_seqid_sync: waiting on cv"));
1259 cv_wait(&lop->lo_cv_seqid_sync, &lop->lo_lock);
1261 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, "nfs4_start_lock_seqid_sync: "
1262 "NFS4_LOCK_SEQID_INUSE"));
1264 lop->lo_flags |= NFS4_LOCK_SEQID_INUSE;
1265 lop->lo_seqid_holder = curthread;
1266 mutex_exit(&lop->lo_lock);
1268 mutex_enter(&mi->mi_lock);
1269 if ((mi->mi_flags & MI4_RECOV_ACTIV) &&
1270 curthread != mi->mi_recovthread)
1271 error = EAGAIN;
1272 mutex_exit(&mi->mi_lock);
1274 if (error == EAGAIN)
1275 nfs4_end_lock_seqid_sync(lop);
1277 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE,
1278 "nfs4_start_lock_seqid_sync: error=%d", error));
1280 done:
1281 return (error);
1285 * This ends our use of the lock owner's lock seqid by setting
1286 * the appropiate flags and issuing a cv_signal to wake up another
1287 * thread waiting to use the lock seqid.
1289 void
1290 nfs4_end_lock_seqid_sync(nfs4_lock_owner_t *lop)
1292 mutex_enter(&lop->lo_lock);
1293 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE);
1294 ASSERT(lop->lo_seqid_holder == curthread);
1295 lop->lo_flags &= ~NFS4_LOCK_SEQID_INUSE;
1296 lop->lo_seqid_holder = NULL;
1297 cv_broadcast(&lop->lo_cv_seqid_sync);
1298 mutex_exit(&lop->lo_lock);
1302 * Returns a reference to a lock owner via lopp, which has its lock seqid
1303 * synchronization started.
1304 * If the lock owner is in the 'just_created' state, then we return its open
1305 * owner and open stream and start the open seqid synchronization.
1307 * Return value:
1308 * NFS4_OK no problems
1309 * NFS4ERR_DELAY there is lost state to recover; caller should retry
1310 * NFS4ERR_IO no open stream
1312 nfsstat4
1313 nfs4_find_or_create_lock_owner(pid_t pid, rnode4_t *rp, cred_t *cr,
1314 nfs4_open_owner_t **oopp, nfs4_open_stream_t **ospp,
1315 nfs4_lock_owner_t **lopp)
1317 nfs4_lock_owner_t *lop, *next_lop;
1318 mntinfo4_t *mi;
1319 int error = 0;
1320 nfsstat4 stat;
1322 mi = VTOMI4(RTOV4(rp));
1324 mutex_enter(&rp->r_statev4_lock);
1326 lop = rp->r_lo_head.lo_next_rnode;
1327 while (lop != &rp->r_lo_head) {
1328 mutex_enter(&lop->lo_lock);
1329 if (lop->lo_pid == pid && lop->lo_valid != 0) {
1330 /* Found a matching lock owner */
1331 NFS4_DEBUG(nfs4_client_state_debug,
1332 (CE_NOTE, "nfs4_find_or_create_lock_owner: "
1333 "got a match"));
1334 lop->lo_ref_count++;
1335 break;
1337 next_lop = lop->lo_next_rnode;
1338 mutex_exit(&lop->lo_lock);
1339 lop = next_lop;
1342 if (lop == &rp->r_lo_head) {
1343 /* create temporary lock owner */
1344 lop = create_lock_owner(rp, pid);
1346 mutex_exit(&rp->r_statev4_lock);
1348 /* Have a locked down lock owner struct now */
1349 if (lop->lo_just_created != NFS4_JUST_CREATED) {
1350 /* This is an existing lock owner */
1351 *oopp = NULL;
1352 *ospp = NULL;
1353 } else {
1354 /* Lock owner doesn't exist yet */
1356 /* First grab open owner seqid synchronization */
1357 mutex_exit(&lop->lo_lock);
1358 *oopp = find_open_owner(cr, NFS4_PERM_CREATED, mi);
1359 if (*oopp == NULL)
1360 goto kill_new_lop;
1361 error = nfs4_start_open_seqid_sync(*oopp, mi);
1362 if (error == EAGAIN) {
1363 stat = NFS4ERR_DELAY;
1364 goto failed;
1366 *ospp = find_open_stream(*oopp, rp);
1367 if (*ospp == NULL) {
1368 nfs4_end_open_seqid_sync(*oopp);
1369 goto kill_new_lop;
1371 if ((*ospp)->os_failed_reopen) {
1372 mutex_exit(&(*ospp)->os_sync_lock);
1373 NFS4_DEBUG((nfs4_open_stream_debug ||
1374 nfs4_client_lock_debug), (CE_NOTE,
1375 "nfs4_find_or_create_lock_owner: os_failed_reopen;"
1376 "osp %p, cr %p, rp %s", (void *)(*ospp),
1377 (void *)cr, rnode4info(rp)));
1378 nfs4_end_open_seqid_sync(*oopp);
1379 stat = NFS4ERR_IO;
1380 goto failed;
1382 mutex_exit(&(*ospp)->os_sync_lock);
1385 * Now see if the lock owner has become permanent while we
1386 * had released our lock.
1388 mutex_enter(&lop->lo_lock);
1389 if (lop->lo_just_created != NFS4_JUST_CREATED) {
1390 nfs4_end_open_seqid_sync(*oopp);
1391 open_stream_rele(*ospp, rp);
1392 open_owner_rele(*oopp);
1393 *oopp = NULL;
1394 *ospp = NULL;
1397 mutex_exit(&lop->lo_lock);
1399 error = nfs4_start_lock_seqid_sync(lop, mi);
1400 if (error == EAGAIN) {
1401 if (*oopp != NULL)
1402 nfs4_end_open_seqid_sync(*oopp);
1403 stat = NFS4ERR_DELAY;
1404 goto failed;
1406 ASSERT(error == 0);
1408 *lopp = lop;
1409 return (NFS4_OK);
1411 kill_new_lop:
1413 * A previous CLOSE was attempted but got EINTR, but the application
1414 * continued to use the unspecified state file descriptor. But now the
1415 * open stream is gone (which could also destroy the open owner), hence
1416 * we can no longer continue. The calling function should return EIO
1417 * to the application.
1419 NFS4_DEBUG(nfs4_lost_rqst_debug || nfs4_client_lock_debug,
1420 (CE_NOTE, "nfs4_find_or_create_lock_owner: destroy newly created "
1421 "lop %p, oop %p, osp %p", (void *)lop, (void *)(*oopp),
1422 (void *)(*ospp)));
1424 nfs4_rnode_remove_lock_owner(rp, lop);
1425 stat = NFS4ERR_IO;
1427 failed:
1428 lock_owner_rele(lop);
1429 if (*oopp) {
1430 open_owner_rele(*oopp);
1431 *oopp = NULL;
1433 if (*ospp) {
1434 open_stream_rele(*ospp, rp);
1435 *ospp = NULL;
1437 return (stat);
1441 * This function grabs a recently freed open owner off of the freed open
1442 * owner list if there is a match on the cred 'cr'. It returns NULL if no
1443 * such match is found. It will set the 'oo_ref_count' and 'oo_valid' back
1444 * to both 1 (sane values) in the case a match is found.
1446 static nfs4_open_owner_t *
1447 find_freed_open_owner(cred_t *cr, nfs4_oo_hash_bucket_t *bucketp,
1448 mntinfo4_t *mi)
1450 nfs4_open_owner_t *foop;
1452 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE,
1453 "find_freed_open_owner: cred %p", (void*)cr));
1455 ASSERT(mutex_owned(&mi->mi_lock));
1456 ASSERT(mutex_owned(&bucketp->b_lock));
1458 /* got hash bucket, search through freed open owners */
1459 for (foop = list_head(&mi->mi_foo_list); foop != NULL;
1460 foop = list_next(&mi->mi_foo_list, foop)) {
1461 if (!crcmp(foop->oo_cred, cr)) {
1462 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE,
1463 "find_freed_open_owner: got a match open owner "
1464 "%p", (void *)foop));
1465 foop->oo_ref_count = 1;
1466 foop->oo_valid = 1;
1467 list_remove(&mi->mi_foo_list, foop);
1468 mi->mi_foo_num--;
1470 /* now add the struct into the cred hash table */
1471 list_insert_head(&bucketp->b_oo_hash_list, foop);
1472 return (foop);
1476 return (NULL);
1480 * Insert the newly freed 'oop' into the mi's freed oop list,
1481 * always at the head of the list. If we've already reached
1482 * our maximum allowed number of freed open owners (mi_foo_max),
1483 * then remove the LRU open owner on the list (namely the tail).
1485 static void
1486 nfs4_free_open_owner(nfs4_open_owner_t *oop, mntinfo4_t *mi)
1488 nfs4_open_owner_t *lru_foop;
1490 if (mi->mi_foo_num < mi->mi_foo_max) {
1491 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE,
1492 "nfs4_free_open_owner: num free %d, max free %d, "
1493 "insert open owner %p for mntinfo4 %p",
1494 mi->mi_foo_num, mi->mi_foo_max, (void *)oop,
1495 (void *)mi));
1496 list_insert_head(&mi->mi_foo_list, oop);
1497 mi->mi_foo_num++;
1498 return;
1501 /* need to replace a freed open owner */
1503 lru_foop = list_tail(&mi->mi_foo_list);
1505 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE,
1506 "nfs4_free_open_owner: destroy %p, insert %p",
1507 (void *)lru_foop, (void *)oop));
1509 list_remove(&mi->mi_foo_list, lru_foop);
1510 nfs4_destroy_open_owner(lru_foop);
1512 /* head always has latest freed oop */
1513 list_insert_head(&mi->mi_foo_list, oop);
1516 void
1517 nfs4_destroy_open_owner(nfs4_open_owner_t *oop)
1519 ASSERT(oop != NULL);
1521 crfree(oop->oo_cred);
1522 if (oop->oo_cred_otw)
1523 crfree(oop->oo_cred_otw);
1524 mutex_destroy(&oop->oo_lock);
1525 cv_destroy(&oop->oo_cv_seqid_sync);
1526 kmem_free(oop, sizeof (*oop));
1529 seqid4
1530 nfs4_get_open_seqid(nfs4_open_owner_t *oop)
1532 ASSERT(oop->oo_seqid_inuse);
1533 return (oop->oo_seqid);
1537 * This set's the open seqid for a <open owner/ mntinfo4> pair.
1539 void
1540 nfs4_set_open_seqid(seqid4 seqid, nfs4_open_owner_t *oop,
1541 nfs4_tag_type_t tag_type)
1543 ASSERT(oop->oo_seqid_inuse);
1544 oop->oo_seqid = seqid;
1545 oop->oo_last_good_seqid = seqid;
1546 oop->oo_last_good_op = tag_type;
1550 * This bumps the current open seqid for the open owner 'oop'.
1552 void
1553 nfs4_get_and_set_next_open_seqid(nfs4_open_owner_t *oop,
1554 nfs4_tag_type_t tag_type)
1556 ASSERT(oop->oo_seqid_inuse);
1557 oop->oo_seqid++;
1558 oop->oo_last_good_seqid = oop->oo_seqid;
1559 oop->oo_last_good_op = tag_type;
1563 * If no open owner was provided, this function takes the cred to find an
1564 * open owner within the given mntinfo4_t. Either way we return the
1565 * open owner's OTW credential if it exists; otherwise returns the
1566 * supplied 'cr'.
1568 * A hold is put on the returned credential, and it is up to the caller
1569 * to free the cred.
1571 cred_t *
1572 nfs4_get_otw_cred(cred_t *cr, mntinfo4_t *mi, nfs4_open_owner_t *provided_oop)
1574 cred_t *ret_cr;
1575 nfs4_open_owner_t *oop = provided_oop;
1577 if (oop == NULL)
1578 oop = find_open_owner(cr, NFS4_PERM_CREATED, mi);
1579 if (oop != NULL) {
1580 mutex_enter(&oop->oo_lock);
1581 if (oop->oo_cred_otw)
1582 ret_cr = oop->oo_cred_otw;
1583 else
1584 ret_cr = cr;
1585 crhold(ret_cr);
1586 mutex_exit(&oop->oo_lock);
1587 if (provided_oop == NULL)
1588 open_owner_rele(oop);
1589 } else {
1590 ret_cr = cr;
1591 crhold(ret_cr);
1593 return (ret_cr);
1597 * Retrieves the next open stream in the rnode's list if an open stream
1598 * is provided; otherwise gets the first open stream in the list.
1599 * The open owner for that open stream is then retrieved, and if its
1600 * oo_cred_otw exists then it is returned; otherwise the provided 'cr'
1601 * is returned. *osp is set to the 'found' open stream.
1603 * Note: we don't set *osp to the open stream retrieved via the
1604 * optimized check since that won't necessarily be at the beginning
1605 * of the rnode list, and if that osp doesn't work we'd like to
1606 * check _all_ open streams (starting from the beginning of the
1607 * rnode list).
1609 cred_t *
1610 nfs4_get_otw_cred_by_osp(rnode4_t *rp, cred_t *cr,
1611 nfs4_open_stream_t **osp, bool_t *first_time, bool_t *last_time)
1613 nfs4_open_stream_t *next_osp = NULL;
1614 cred_t *ret_cr;
1616 ASSERT(cr != NULL);
1618 * As an optimization, try to find the open owner
1619 * for the cred provided since that's most likely
1620 * to work.
1622 if (*first_time) {
1623 nfs4_open_owner_t *oop;
1625 oop = find_open_owner(cr, NFS4_PERM_CREATED, VTOMI4(RTOV4(rp)));
1626 if (oop) {
1627 next_osp = find_open_stream(oop, rp);
1628 if (next_osp)
1629 mutex_exit(&next_osp->os_sync_lock);
1630 open_owner_rele(oop);
1633 if (next_osp == NULL) {
1634 int delay_rele = 0;
1635 *first_time = FALSE;
1637 /* return the next open stream for this rnode */
1638 mutex_enter(&rp->r_os_lock);
1639 /* Now, no one can add or delete to rp's open streams list */
1641 if (*osp) {
1642 next_osp = list_next(&rp->r_open_streams, *osp);
1644 * Delay the rele of *osp until after we drop
1645 * r_os_lock to not deadlock with oo_lock
1646 * via an open_stream_rele()->open_owner_rele().
1648 delay_rele = 1;
1649 } else {
1650 next_osp = list_head(&rp->r_open_streams);
1652 if (next_osp) {
1653 nfs4_open_stream_t *tmp_osp;
1655 /* find the next valid open stream */
1656 mutex_enter(&next_osp->os_sync_lock);
1657 while (next_osp && !next_osp->os_valid) {
1658 tmp_osp =
1659 list_next(&rp->r_open_streams, next_osp);
1660 mutex_exit(&next_osp->os_sync_lock);
1661 next_osp = tmp_osp;
1662 if (next_osp)
1663 mutex_enter(&next_osp->os_sync_lock);
1665 if (next_osp) {
1666 next_osp->os_ref_count++;
1667 mutex_exit(&next_osp->os_sync_lock);
1670 mutex_exit(&rp->r_os_lock);
1671 if (delay_rele)
1672 open_stream_rele(*osp, rp);
1675 if (next_osp) {
1676 nfs4_open_owner_t *oop;
1678 oop = next_osp->os_open_owner;
1679 mutex_enter(&oop->oo_lock);
1680 if (oop->oo_cred_otw)
1681 ret_cr = oop->oo_cred_otw;
1682 else
1683 ret_cr = cr;
1684 crhold(ret_cr);
1685 mutex_exit(&oop->oo_lock);
1686 if (*first_time) {
1687 open_stream_rele(next_osp, rp);
1688 *osp = NULL;
1689 } else
1690 *osp = next_osp;
1691 } else {
1692 /* just return the cred provided to us */
1693 *last_time = TRUE;
1694 *osp = NULL;
1695 ret_cr = cr;
1696 crhold(ret_cr);
1699 *first_time = FALSE;
1700 return (ret_cr);
1703 void
1704 nfs4_init_stateid_types(nfs4_stateid_types_t *sid_tp)
1706 bzero(&sid_tp->d_sid, sizeof (stateid4));
1707 bzero(&sid_tp->l_sid, sizeof (stateid4));
1708 bzero(&sid_tp->o_sid, sizeof (stateid4));
1709 sid_tp->cur_sid_type = NO_SID;
1712 void
1713 nfs4_save_stateid(stateid4 *s1, nfs4_stateid_types_t *sid_tp)
1715 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE,
1716 "nfs4_save_stateid: saved %s stateid",
1717 sid_tp->cur_sid_type == DEL_SID ? "delegation" :
1718 sid_tp->cur_sid_type == LOCK_SID ? "lock" :
1719 sid_tp->cur_sid_type == OPEN_SID ? "open" : "special"));
1721 switch (sid_tp->cur_sid_type) {
1722 case DEL_SID:
1723 sid_tp->d_sid = *s1;
1724 break;
1725 case LOCK_SID:
1726 sid_tp->l_sid = *s1;
1727 break;
1728 case OPEN_SID:
1729 sid_tp->o_sid = *s1;
1730 break;
1731 case SPEC_SID:
1732 default:
1733 cmn_err(CE_PANIC, "nfs4_save_stateid: illegal "
1734 "stateid type %d", sid_tp->cur_sid_type);
1739 * We got NFS4ERR_BAD_SEQID. Setup some arguments to pass to recovery.
1740 * Caller is responsible for freeing.
1742 nfs4_bseqid_entry_t *
1743 nfs4_create_bseqid_entry(nfs4_open_owner_t *oop, nfs4_lock_owner_t *lop,
1744 vnode_t *vp, pid_t pid, nfs4_tag_type_t tag, seqid4 seqid)
1746 nfs4_bseqid_entry_t *bsep;
1748 bsep = kmem_alloc(sizeof (*bsep), KM_SLEEP);
1749 bsep->bs_oop = oop;
1750 bsep->bs_lop = lop;
1751 bsep->bs_vp = vp;
1752 bsep->bs_pid = pid;
1753 bsep->bs_tag = tag;
1754 bsep->bs_seqid = seqid;
1756 return (bsep);
1759 void
1760 nfs4open_dg_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp,
1761 nfs4_open_owner_t *oop, nfs4_open_stream_t *osp, cred_t *cr,
1762 vnode_t *vp, int access_close, int deny_close)
1764 lost_rqstp->lr_putfirst = FALSE;
1766 ASSERT(vp != NULL);
1767 if (error == ETIMEDOUT || error == EINTR ||
1768 NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) {
1769 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
1770 "nfs4open_dg_save_lost_rqst: error %d", error));
1772 lost_rqstp->lr_op = OP_OPEN_DOWNGRADE;
1774 * The vp is held and rele'd via the recovery code.
1775 * See nfs4_save_lost_rqst.
1777 lost_rqstp->lr_vp = vp;
1778 lost_rqstp->lr_dvp = NULL;
1779 lost_rqstp->lr_oop = oop;
1780 lost_rqstp->lr_osp = osp;
1781 lost_rqstp->lr_lop = NULL;
1782 lost_rqstp->lr_cr = cr;
1783 lost_rqstp->lr_flk = NULL;
1784 lost_rqstp->lr_dg_acc = access_close;
1785 lost_rqstp->lr_dg_deny = deny_close;
1786 lost_rqstp->lr_putfirst = FALSE;
1787 } else {
1788 lost_rqstp->lr_op = 0;
1793 * Change the access and deny bits of an OPEN.
1794 * If recovery is needed, *recov_credpp is set to the cred used OTW,
1795 * a hold is placed on it, and *recov_seqidp is set to the seqid used OTW.
1797 void
1798 nfs4_open_downgrade(int access_close, int deny_close, nfs4_open_owner_t *oop,
1799 nfs4_open_stream_t *osp, vnode_t *vp, cred_t *cr, nfs4_lost_rqst_t *lrp,
1800 nfs4_error_t *ep, cred_t **recov_credpp, seqid4 *recov_seqidp)
1802 mntinfo4_t *mi;
1803 int downgrade_acc, downgrade_deny;
1804 int new_acc, new_deny;
1805 COMPOUND4args_clnt args;
1806 COMPOUND4res_clnt res;
1807 OPEN_DOWNGRADE4res *odg_res;
1808 nfs_argop4 argop[3];
1809 nfs_resop4 *resop;
1810 rnode4_t *rp;
1811 bool_t needrecov = FALSE;
1812 int doqueue = 1;
1813 seqid4 seqid = 0;
1814 cred_t *cred_otw;
1815 hrtime_t t;
1817 ASSERT(mutex_owned(&osp->os_sync_lock));
1818 #if DEBUG
1819 mutex_enter(&oop->oo_lock);
1820 ASSERT(oop->oo_seqid_inuse);
1821 mutex_exit(&oop->oo_lock);
1822 #endif
1825 if (access_close == 0 && deny_close == 0) {
1826 nfs4_error_zinit(ep);
1827 return;
1830 cred_otw = nfs4_get_otw_cred(cr, VTOMI4(vp), oop);
1832 cred_retry:
1833 nfs4_error_zinit(ep);
1834 downgrade_acc = 0;
1835 downgrade_deny = 0;
1836 mi = VTOMI4(vp);
1837 rp = VTOR4(vp);
1840 * Check to see if the open stream got closed before we go OTW,
1841 * now that we have acquired the 'os_sync_lock'.
1843 if (!osp->os_valid) {
1844 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
1845 " open stream has already been closed, return success"));
1846 /* error has already been set */
1847 goto no_args_out;
1850 /* If the file failed recovery, just quit. */
1851 mutex_enter(&rp->r_statelock);
1852 if (rp->r_flags & R4RECOVERR) {
1853 mutex_exit(&rp->r_statelock);
1854 ep->error = EIO;
1855 goto no_args_out;
1857 mutex_exit(&rp->r_statelock);
1859 seqid = nfs4_get_open_seqid(oop) + 1;
1861 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
1862 "access_close %d, acc_read %"PRIu64" acc_write %"PRIu64"",
1863 access_close, osp->os_share_acc_read, osp->os_share_acc_write));
1865 /* If we're closing the last READ, need to downgrade */
1866 if ((access_close & FREAD) && (osp->os_share_acc_read == 1))
1867 downgrade_acc |= OPEN4_SHARE_ACCESS_READ;
1869 /* if we're closing the last WRITE, need to downgrade */
1870 if ((access_close & FWRITE) && (osp->os_share_acc_write == 1))
1871 downgrade_acc |= OPEN4_SHARE_ACCESS_WRITE;
1873 downgrade_deny = OPEN4_SHARE_DENY_NONE;
1875 new_acc = 0;
1876 new_deny = 0;
1878 /* set our new access and deny share bits */
1879 if ((osp->os_share_acc_read > 0) &&
1880 !(downgrade_acc & OPEN4_SHARE_ACCESS_READ))
1881 new_acc |= OPEN4_SHARE_ACCESS_READ;
1882 if ((osp->os_share_acc_write > 0) &&
1883 !(downgrade_acc & OPEN4_SHARE_ACCESS_WRITE))
1884 new_acc |= OPEN4_SHARE_ACCESS_WRITE;
1886 new_deny = OPEN4_SHARE_DENY_NONE;
1888 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
1889 "downgrade acc 0x%x deny 0x%x", downgrade_acc, downgrade_deny));
1890 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:"
1891 "new acc 0x%x deny 0x%x", new_acc, new_deny));
1894 * Check to see if we aren't actually doing any downgrade or
1895 * if this is the last 'close' but the file is still mmapped.
1896 * Skip this if this a lost request resend so we don't decrement
1897 * the osp's share counts more than once.
1899 if (!lrp &&
1900 ((downgrade_acc == 0 && downgrade_deny == 0) ||
1901 (new_acc == 0 && new_deny == 0))) {
1903 * No downgrade to do, but still need to
1904 * update osp's os_share_* counts.
1906 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE,
1907 "nfs4_open_downgrade: just lower the osp's count by %s",
1908 (access_close & FREAD) && (access_close & FWRITE) ?
1909 "read and write" : (access_close & FREAD) ? "read" :
1910 (access_close & FWRITE) ? "write" : "bogus"));
1911 if (access_close & FREAD)
1912 osp->os_share_acc_read--;
1913 if (access_close & FWRITE)
1914 osp->os_share_acc_write--;
1915 osp->os_share_deny_none--;
1916 nfs4_error_zinit(ep);
1918 goto no_args_out;
1921 if (osp->os_orig_oo_name != oop->oo_name) {
1922 ep->error = EIO;
1923 goto no_args_out;
1926 /* setup the COMPOUND args */
1927 if (lrp)
1928 args.ctag = TAG_OPEN_DG_LOST;
1929 else
1930 args.ctag = TAG_OPEN_DG;
1932 args.array_len = 3;
1933 args.array = argop;
1935 /* putfh */
1936 argop[0].argop = OP_CPUTFH;
1937 argop[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh;
1939 argop[1].argop = OP_GETATTR;
1940 argop[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
1941 argop[1].nfs_argop4_u.opgetattr.mi = mi;
1943 ASSERT(mutex_owned(&osp->os_sync_lock));
1944 ASSERT(osp->os_delegation == FALSE);
1946 /* open downgrade */
1947 argop[2].argop = OP_OPEN_DOWNGRADE;
1948 argop[2].nfs_argop4_u.opopen_downgrade.open_stateid = osp->open_stateid;
1949 argop[2].nfs_argop4_u.opopen_downgrade.share_access = new_acc;
1950 argop[2].nfs_argop4_u.opopen_downgrade.share_deny = new_deny;
1951 argop[2].nfs_argop4_u.opopen_downgrade.seqid = seqid;
1953 t = gethrtime();
1955 rfs4call(mi, &args, &res, cred_otw, &doqueue, 0, ep);
1957 if (ep->error == 0 && nfs4_need_to_bump_seqid(&res))
1958 nfs4_set_open_seqid(seqid, oop, args.ctag);
1960 if ((ep->error == EACCES ||
1961 (ep->error == 0 && res.status == NFS4ERR_ACCESS)) &&
1962 cred_otw != cr) {
1963 crfree(cred_otw);
1964 cred_otw = cr;
1965 crhold(cred_otw);
1966 if (!ep->error)
1967 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
1968 goto cred_retry;
1971 needrecov = nfs4_needs_recovery(ep, TRUE, mi->mi_vfsp);
1973 if (needrecov && recov_credpp) {
1974 *recov_credpp = cred_otw;
1975 crhold(*recov_credpp);
1976 if (recov_seqidp)
1977 *recov_seqidp = seqid;
1980 if (!ep->error && !res.status) {
1981 /* get the open downgrade results */
1982 resop = &res.array[2];
1983 odg_res = &resop->nfs_resop4_u.opopen_downgrade;
1985 osp->open_stateid = odg_res->open_stateid;
1987 /* set the open streams new access/deny bits */
1988 if (access_close & FREAD)
1989 osp->os_share_acc_read--;
1990 if (access_close & FWRITE)
1991 osp->os_share_acc_write--;
1992 osp->os_share_deny_none--;
1993 osp->os_dc_openacc = new_acc;
1995 nfs4_attr_cache(vp,
1996 &res.array[1].nfs_resop4_u.opgetattr.ga_res,
1997 t, cred_otw, TRUE, NULL);
2000 if (!ep->error)
2001 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2003 no_args_out:
2004 crfree(cred_otw);
2008 * If an OPEN request gets ETIMEDOUT or EINTR (that includes bailing out
2009 * because the filesystem was forcibly unmounted) then we don't know if we
2010 * potentially left state dangling on the server, therefore the recovery
2011 * framework makes this call to resend the OPEN request and then undo it.
2013 void
2014 nfs4_resend_open_otw(vnode_t **vpp, nfs4_lost_rqst_t *resend_rqstp,
2015 nfs4_error_t *ep)
2017 COMPOUND4args_clnt args;
2018 COMPOUND4res_clnt res;
2019 nfs_argop4 argop[4];
2020 GETFH4res *gf_res = NULL;
2021 OPEN4cargs *open_args;
2022 OPEN4res *op_res;
2023 char *destcfp;
2024 int destclen;
2025 nfs4_ga_res_t *garp;
2026 vnode_t *dvp = NULL, *vp = NULL;
2027 rnode4_t *rp = NULL, *drp = NULL;
2028 cred_t *cr = NULL;
2029 seqid4 seqid;
2030 nfs4_open_owner_t *oop = NULL;
2031 nfs4_open_stream_t *osp = NULL;
2032 component4 *srcfp;
2033 open_claim_type4 claim;
2034 mntinfo4_t *mi;
2035 int doqueue = 1;
2036 bool_t retry_open = FALSE;
2037 int created_osp = 0;
2038 hrtime_t t;
2039 char *failed_msg = "";
2040 int fh_different;
2041 int reopen = 0;
2043 nfs4_error_zinit(ep);
2045 cr = resend_rqstp->lr_cr;
2046 dvp = resend_rqstp->lr_dvp;
2048 vp = *vpp;
2049 if (vp) {
2050 ASSERT(nfs4_consistent_type(vp));
2051 rp = VTOR4(vp);
2054 if (rp) {
2055 /* If the file failed recovery, just quit. */
2056 mutex_enter(&rp->r_statelock);
2057 if (rp->r_flags & R4RECOVERR) {
2058 mutex_exit(&rp->r_statelock);
2059 ep->error = EIO;
2060 return;
2062 mutex_exit(&rp->r_statelock);
2065 if (dvp) {
2066 drp = VTOR4(dvp);
2067 /* If the parent directory failed recovery, just quit. */
2068 mutex_enter(&drp->r_statelock);
2069 if (drp->r_flags & R4RECOVERR) {
2070 mutex_exit(&drp->r_statelock);
2071 ep->error = EIO;
2072 return;
2074 mutex_exit(&drp->r_statelock);
2075 } else
2076 reopen = 1; /* NULL dvp means this is a reopen */
2078 claim = resend_rqstp->lr_oclaim;
2079 ASSERT(claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR);
2081 args.ctag = TAG_OPEN_LOST;
2082 args.array_len = 4;
2083 args.array = argop;
2085 argop[0].argop = OP_CPUTFH;
2086 if (reopen) {
2087 ASSERT(vp != NULL);
2089 mi = VTOMI4(vp);
2091 * if this is a file mount then
2092 * use the mntinfo parentfh
2094 argop[0].nfs_argop4_u.opcputfh.sfh =
2095 (vp->v_flag & VROOT) ? mi->mi_srvparentfh :
2096 VTOSV(vp)->sv_dfh;
2097 args.ctag = TAG_REOPEN_LOST;
2098 } else {
2099 argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(dvp)->r_fh;
2100 mi = VTOMI4(dvp);
2103 argop[1].argop = OP_COPEN;
2104 open_args = &argop[1].nfs_argop4_u.opcopen;
2105 open_args->claim = claim;
2108 * If we sent over a OPEN with CREATE then the only
2109 * thing we care about is to not leave dangling state
2110 * on the server, not whether the file we potentially
2111 * created remains on the server. So even though the
2112 * lost open request specified a CREATE, we only wish
2113 * to do a non-CREATE OPEN.
2115 open_args->opentype = OPEN4_NOCREATE;
2117 srcfp = &resend_rqstp->lr_ofile;
2118 destclen = srcfp->utf8string_len;
2119 destcfp = kmem_alloc(destclen + 1, KM_SLEEP);
2120 bcopy(srcfp->utf8string_val, destcfp, destclen);
2121 destcfp[destclen] = '\0';
2122 if (claim == CLAIM_DELEGATE_CUR) {
2123 open_args->open_claim4_u.delegate_cur_info.delegate_stateid =
2124 resend_rqstp->lr_ostateid;
2125 open_args->open_claim4_u.delegate_cur_info.cfile = destcfp;
2126 } else {
2127 open_args->open_claim4_u.cfile = destcfp;
2130 open_args->share_access = resend_rqstp->lr_oacc;
2131 open_args->share_deny = resend_rqstp->lr_odeny;
2132 oop = resend_rqstp->lr_oop;
2133 ASSERT(oop != NULL);
2135 open_args->owner.clientid = mi2clientid(mi);
2136 /* this length never changes */
2137 open_args->owner.owner_len = sizeof (oop->oo_name);
2138 open_args->owner.owner_val =
2139 kmem_alloc(open_args->owner.owner_len, KM_SLEEP);
2141 ep->error = nfs4_start_open_seqid_sync(oop, mi);
2142 ASSERT(ep->error == 0); /* recov thread always succeeds */
2144 * We can get away with not saving the seqid upon detection
2145 * of a lost request, and now just use the open owner's current
2146 * seqid since we only allow one op OTW per seqid and lost
2147 * requests are saved FIFO.
2149 seqid = nfs4_get_open_seqid(oop) + 1;
2150 open_args->seqid = seqid;
2152 bcopy(&oop->oo_name, open_args->owner.owner_val,
2153 open_args->owner.owner_len);
2155 /* getfh */
2156 argop[2].argop = OP_GETFH;
2158 /* Construct the getattr part of the compound */
2159 argop[3].argop = OP_GETATTR;
2160 argop[3].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
2161 argop[3].nfs_argop4_u.opgetattr.mi = mi;
2163 res.array = NULL;
2165 t = gethrtime();
2167 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep);
2169 if (ep->error == 0 && nfs4_need_to_bump_seqid(&res))
2170 nfs4_set_open_seqid(seqid, oop, args.ctag);
2172 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2173 "nfs4_resend_open_otw: error %d stat %d", ep->error, res.status));
2175 if (ep->error || res.status)
2176 goto err_out;
2178 op_res = &res.array[1].nfs_resop4_u.opopen;
2179 gf_res = &res.array[2].nfs_resop4_u.opgetfh;
2180 garp = &res.array[3].nfs_resop4_u.opgetattr.ga_res;
2182 if (!vp) {
2183 int rnode_err = 0;
2184 nfs4_sharedfh_t *sfh;
2187 * If we can't decode all the attributes they are not usable,
2188 * just make the vnode.
2191 sfh = sfh4_get(&gf_res->object, VTOMI4(dvp));
2192 *vpp = makenfs4node(sfh, garp, dvp->v_vfsp, t, cr, dvp,
2193 fn_get(VTOSV(dvp)->sv_name,
2194 open_args->open_claim4_u.cfile, sfh));
2195 sfh4_rele(&sfh);
2196 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2197 "nfs4_resend_open_otw: made vp %p for file %s",
2198 (void *)(*vpp), open_args->open_claim4_u.cfile));
2200 if (ep->error)
2201 PURGE_ATTRCACHE4(*vpp);
2204 * For the newly created *vpp case, make sure the rnode
2205 * isn't bad before using it.
2207 mutex_enter(&(VTOR4(*vpp))->r_statelock);
2208 if (VTOR4(*vpp)->r_flags & R4RECOVERR)
2209 rnode_err = EIO;
2210 mutex_exit(&(VTOR4(*vpp))->r_statelock);
2212 if (rnode_err) {
2213 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2214 "nfs4_resend_open_otw: rp %p is bad",
2215 (void *)VTOR4(*vpp)));
2216 ep->error = rnode_err;
2217 goto err_out;
2220 vp = *vpp;
2221 rp = VTOR4(vp);
2224 if (reopen) {
2226 * Check if the path we reopened really is the same
2227 * file. We could end up in a situation were the file
2228 * was removed and a new file created with the same name.
2230 (void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_READER, 0);
2231 fh_different =
2232 (nfs4cmpfh(&rp->r_fh->sfh_fh, &gf_res->object) != 0);
2233 if (fh_different) {
2234 if (mi->mi_fh_expire_type == FH4_PERSISTENT ||
2235 mi->mi_fh_expire_type & FH4_NOEXPIRE_WITH_OPEN) {
2236 /* Oops, we don't have the same file */
2237 if (mi->mi_fh_expire_type == FH4_PERSISTENT)
2238 failed_msg =
2239 "Couldn't reopen: Persistant "
2240 "file handle changed";
2241 else
2242 failed_msg =
2243 "Couldn't reopen: Volatile "
2244 "(no expire on open) file handle "
2245 "changed";
2247 nfs4_end_open_seqid_sync(oop);
2248 kmem_free(destcfp, destclen + 1);
2249 nfs4args_copen_free(open_args);
2250 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2251 nfs_rw_exit(&mi->mi_fh_lock);
2252 nfs4_fail_recov(vp, failed_msg, ep->error,
2253 ep->stat);
2254 return;
2255 } else {
2257 * We have volatile file handles that don't
2258 * compare. If the fids are the same then we
2259 * assume that the file handle expired but the
2260 * renode still refers to the same file object.
2262 * First check that we have fids or not.
2263 * If we don't we have a dumb server so we will
2264 * just assume every thing is ok for now.
2266 if (!ep->error &&
2267 garp->n4g_va.va_mask & AT_NODEID &&
2268 rp->r_attr.va_mask & AT_NODEID &&
2269 rp->r_attr.va_nodeid !=
2270 garp->n4g_va.va_nodeid) {
2272 * We have fids, but they don't
2273 * compare. So kill the file.
2275 failed_msg =
2276 "Couldn't reopen: file handle "
2277 "changed due to mismatched fids";
2278 nfs4_end_open_seqid_sync(oop);
2279 kmem_free(destcfp, destclen + 1);
2280 nfs4args_copen_free(open_args);
2281 xdr_free(xdr_COMPOUND4res_clnt,
2282 (caddr_t)&res);
2283 nfs_rw_exit(&mi->mi_fh_lock);
2284 nfs4_fail_recov(vp, failed_msg,
2285 ep->error, ep->stat);
2286 return;
2287 } else {
2289 * We have volatile file handles that
2290 * refers to the same file (at least
2291 * they have the same fid) or we don't
2292 * have fids so we can't tell. :(. We'll
2293 * be a kind and accepting client so
2294 * we'll update the rnode's file
2295 * handle with the otw handle.
2297 * We need to drop mi->mi_fh_lock since
2298 * sh4_update acquires it. Since there
2299 * is only one recovery thread there is
2300 * no race.
2302 nfs_rw_exit(&mi->mi_fh_lock);
2303 sfh4_update(rp->r_fh, &gf_res->object);
2306 } else {
2307 nfs_rw_exit(&mi->mi_fh_lock);
2311 ASSERT(nfs4_consistent_type(vp));
2313 if (op_res->rflags & OPEN4_RESULT_CONFIRM)
2314 nfs4open_confirm(vp, &seqid, &op_res->stateid, cr, TRUE,
2315 &retry_open, oop, TRUE, ep, NULL);
2316 if (ep->error || ep->stat) {
2317 nfs4_end_open_seqid_sync(oop);
2318 kmem_free(destcfp, destclen + 1);
2319 nfs4args_copen_free(open_args);
2320 if (!ep->error)
2321 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2322 return;
2325 if (reopen) {
2327 * Doing a reopen here so the osp should already exist.
2328 * If not, something changed or went very wrong.
2330 * returns with 'os_sync_lock' held
2332 osp = find_open_stream(oop, rp);
2333 if (!osp) {
2334 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2335 "nfs4_resend_open_otw: couldn't find osp"));
2336 ep->error = EINVAL;
2337 goto err_out;
2339 osp->os_open_ref_count++;
2340 } else {
2341 mutex_enter(&oop->oo_lock);
2342 oop->oo_just_created = NFS4_PERM_CREATED;
2343 mutex_exit(&oop->oo_lock);
2345 /* returns with 'os_sync_lock' held */
2346 osp = find_or_create_open_stream(oop, rp, &created_osp);
2347 if (!osp) {
2348 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE,
2349 "nfs4_resend_open_otw: couldn't create osp"));
2350 ep->error = EINVAL;
2351 goto err_out;
2355 osp->open_stateid = op_res->stateid;
2356 osp->os_delegation = FALSE;
2358 * Need to reset this bitfield for the possible case where we were
2359 * going to OTW CLOSE the file, got a non-recoverable error, and before
2360 * we could retry the CLOSE, OPENed the file again.
2362 ASSERT(osp->os_open_owner->oo_seqid_inuse);
2363 osp->os_final_close = 0;
2364 osp->os_force_close = 0;
2366 if (!reopen) {
2367 if (open_args->share_access & OPEN4_SHARE_ACCESS_READ)
2368 osp->os_share_acc_read++;
2369 if (open_args->share_access & OPEN4_SHARE_ACCESS_WRITE)
2370 osp->os_share_acc_write++;
2371 osp->os_share_deny_none++;
2374 mutex_exit(&osp->os_sync_lock);
2375 if (created_osp)
2376 nfs4_inc_state_ref_count(mi);
2377 open_stream_rele(osp, rp);
2379 nfs4_end_open_seqid_sync(oop);
2381 /* accept delegation, if any */
2382 nfs4_delegation_accept(rp, claim, op_res, garp, cr);
2384 kmem_free(destcfp, destclen + 1);
2385 nfs4args_copen_free(open_args);
2387 if (claim == CLAIM_DELEGATE_CUR)
2388 nfs4_attr_cache(vp, garp, t, cr, TRUE, NULL);
2389 else
2390 PURGE_ATTRCACHE4(vp);
2392 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
2394 ASSERT(nfs4_consistent_type(vp));
2396 return;
2398 err_out:
2399 nfs4_end_open_seqid_sync(oop);
2400 kmem_free(destcfp, destclen + 1);
2401 nfs4args_copen_free(open_args);
2402 if (!ep->error)
2403 xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);