4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
30 #include <sys/types.h>
31 #include <sys/sysmacros.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/fcntl.h>
36 #include <sys/vnode.h>
37 #include <sys/share.h>
38 #include <sys/cmn_err.h>
40 #include <sys/debug.h>
41 #include <sys/t_lock.h>
42 #include <sys/errno.h>
43 #include <sys/nbmlock.h>
48 static void print_shares(struct vnode
*);
49 static void print_share(struct shrlock
*);
52 static int isreadonly(struct vnode
*);
53 static void do_cleanshares(struct vnode
*, pid_t
, int32_t);
57 * Add the share reservation shr to vp.
60 add_share(struct vnode
*vp
, struct shrlock
*shr
)
62 struct shrlocklist
*shrl
;
65 * An access of zero is not legal, however some older clients
66 * generate it anyways. Allow the request only if it is
67 * coming from a remote system. Be generous in what you
68 * accept and strict in what you send.
70 if ((shr
->s_access
== 0) && (GETSYSID(shr
->s_sysid
) == 0)) {
75 * Sanity check to make sure we have valid options.
76 * There is known overlap but it doesn't hurt to be careful.
78 if (shr
->s_access
& ~(F_RDACC
|F_WRACC
|F_RWACC
|F_RMACC
|F_MDACC
)) {
81 if (shr
->s_deny
& ~(F_NODNY
|F_RDDNY
|F_WRDNY
|F_RWDNY
|F_COMPAT
|
86 mutex_enter(&vp
->v_lock
);
87 for (shrl
= vp
->v_shrlocks
; shrl
!= NULL
; shrl
= shrl
->next
) {
89 * If the share owner matches previous request
90 * do special handling.
92 if ((shrl
->shr
->s_sysid
== shr
->s_sysid
) &&
93 (shrl
->shr
->s_pid
== shr
->s_pid
) &&
94 (shrl
->shr
->s_own_len
== shr
->s_own_len
) &&
95 bcmp(shrl
->shr
->s_owner
, shr
->s_owner
,
96 shr
->s_own_len
) == 0) {
99 * If the existing request is F_COMPAT and
100 * is the first share then allow any F_COMPAT
101 * from the same process. Trick: If the existing
102 * F_COMPAT is write access then it must have
103 * the same owner as the first.
105 if ((shrl
->shr
->s_deny
& F_COMPAT
) &&
106 (shr
->s_deny
& F_COMPAT
) &&
107 ((shrl
->next
== NULL
) ||
108 (shrl
->shr
->s_access
& F_WRACC
)))
113 * If a first share has been done in compatibility mode
114 * handle the special cases.
116 if ((shrl
->shr
->s_deny
& F_COMPAT
) && (shrl
->next
== NULL
)) {
118 if (!(shr
->s_deny
& F_COMPAT
)) {
120 * If not compat and want write access or
121 * want to deny read or
122 * write exists, fails
124 if ((shr
->s_access
& F_WRACC
) ||
125 (shr
->s_deny
& F_RDDNY
) ||
126 (shrl
->shr
->s_access
& F_WRACC
)) {
127 mutex_exit(&vp
->v_lock
);
131 * If read only file allow, this may allow
132 * a deny write but that is meaningless on
137 mutex_exit(&vp
->v_lock
);
141 * This is a compat request and read access
142 * and the first was also read access
143 * we always allow it, otherwise we reject because
144 * we have handled the only valid write case above.
146 if ((shr
->s_access
== F_RDACC
) &&
147 (shrl
->shr
->s_access
== F_RDACC
))
149 mutex_exit(&vp
->v_lock
);
154 * If we are trying to share in compatibility mode
155 * and the current share is compat (and not the first)
156 * we don't know enough.
158 if ((shrl
->shr
->s_deny
& F_COMPAT
) && (shr
->s_deny
& F_COMPAT
))
162 * If this is a compat we check for what can't succeed.
164 if (shr
->s_deny
& F_COMPAT
) {
166 * If we want write access or
167 * if anyone is denying read or
168 * if anyone has write access we fail
170 if ((shr
->s_access
& F_WRACC
) ||
171 (shrl
->shr
->s_deny
& F_RDDNY
) ||
172 (shrl
->shr
->s_access
& F_WRACC
)) {
173 mutex_exit(&vp
->v_lock
);
177 * If the first was opened with only read access
178 * and is a read only file we allow.
180 if (shrl
->next
== NULL
) {
181 if ((shrl
->shr
->s_access
== F_RDACC
) &&
185 mutex_exit(&vp
->v_lock
);
189 * We still can't determine our fate so continue
195 * Simple bitwise test, if we are trying to access what
196 * someone else is denying or we are trying to deny
197 * what someone else is accessing we fail.
199 if ((shr
->s_access
& shrl
->shr
->s_deny
) ||
200 (shr
->s_deny
& shrl
->shr
->s_access
)) {
201 mutex_exit(&vp
->v_lock
);
206 shrl
= kmem_alloc(sizeof (struct shrlocklist
), KM_SLEEP
);
207 shrl
->shr
= kmem_alloc(sizeof (struct shrlock
), KM_SLEEP
);
208 shrl
->shr
->s_access
= shr
->s_access
;
209 shrl
->shr
->s_deny
= shr
->s_deny
;
212 * Make sure no other deny modes are also set with F_COMPAT
214 if (shrl
->shr
->s_deny
& F_COMPAT
)
215 shrl
->shr
->s_deny
= F_COMPAT
;
216 shrl
->shr
->s_sysid
= shr
->s_sysid
; /* XXX ref cnt? */
217 shrl
->shr
->s_pid
= shr
->s_pid
;
218 shrl
->shr
->s_own_len
= shr
->s_own_len
;
219 shrl
->shr
->s_owner
= kmem_alloc(shr
->s_own_len
, KM_SLEEP
);
220 bcopy(shr
->s_owner
, shrl
->shr
->s_owner
, shr
->s_own_len
);
221 shrl
->next
= vp
->v_shrlocks
;
222 vp
->v_shrlocks
= shrl
;
228 mutex_exit(&vp
->v_lock
);
236 * !=0 !=0 =0 in cluster; NLM lock
237 * !=0 =0 =0 in cluster; special case for NLM lock
238 * !=0 =0 !=0 in cluster; PXFS local lock
239 * !=0 !=0 !=0 cannot happen
240 * =0 !=0 =0 not in cluster; NLM lock
241 * =0 =0 !=0 not in cluster; local lock
242 * =0 =0 =0 cannot happen
243 * =0 !=0 !=0 cannot happen
246 is_match_for_del(struct shrlock
*shr
, struct shrlock
*element
)
251 nlmid1
= GETNLMID(shr
->s_sysid
);
252 nlmid2
= GETNLMID(element
->s_sysid
);
254 if (nlmid1
!= 0) { /* in a cluster */
255 if (GETSYSID(shr
->s_sysid
) != 0 && shr
->s_pid
== 0) {
257 * Lock obtained through nlm server. Just need to
258 * compare whole sysids. pid will always = 0.
260 result
= shr
->s_sysid
== element
->s_sysid
;
261 } else if (GETSYSID(shr
->s_sysid
) == 0 && shr
->s_pid
== 0) {
263 * This is a special case. The NLM server wishes to
264 * delete all share locks obtained through nlmid1.
266 result
= (nlmid1
== nlmid2
);
267 } else if (GETSYSID(shr
->s_sysid
) == 0 && shr
->s_pid
!= 0) {
269 * Lock obtained locally through PXFS. Match nlmids
272 result
= (nlmid1
== nlmid2
&&
273 shr
->s_pid
== element
->s_pid
);
275 } else { /* not in a cluster */
276 result
= ((shr
->s_sysid
== 0 &&
277 shr
->s_pid
== element
->s_pid
) ||
278 (shr
->s_sysid
!= 0 &&
279 shr
->s_sysid
== element
->s_sysid
));
285 * Delete the given share reservation. Returns 0 if okay, EINVAL if the
286 * share could not be found. If the share reservation is an NBMAND share
287 * reservation, signal anyone waiting for the share to go away (e.g.,
288 * blocking lock requests).
292 del_share(struct vnode
*vp
, struct shrlock
*shr
)
294 struct shrlocklist
*shrl
;
295 struct shrlocklist
**shrlp
;
299 mutex_enter(&vp
->v_lock
);
301 * Delete the shares with the matching sysid and owner
302 * But if own_len == 0 and sysid == 0 delete all with matching pid
303 * But if own_len == 0 delete all with matching sysid.
305 shrlp
= &vp
->v_shrlocks
;
307 if ((shr
->s_own_len
== (*shrlp
)->shr
->s_own_len
&&
308 (bcmp(shr
->s_owner
, (*shrlp
)->shr
->s_owner
,
309 shr
->s_own_len
) == 0)) ||
311 (shr
->s_own_len
== 0 &&
312 is_match_for_del(shr
, (*shrlp
)->shr
))) {
317 if (shrl
->shr
->s_deny
& F_MANDDNY
)
320 /* XXX deref sysid */
321 kmem_free(shrl
->shr
->s_owner
, shrl
->shr
->s_own_len
);
322 kmem_free(shrl
->shr
, sizeof (struct shrlock
));
323 kmem_free(shrl
, sizeof (struct shrlocklist
));
327 shrlp
= &(*shrlp
)->next
;
331 cv_broadcast(&vp
->v_cv
);
333 mutex_exit(&vp
->v_lock
);
334 return (found
? 0 : EINVAL
);
338 * Clean up all local share reservations that the given process has with
342 cleanshares(struct vnode
*vp
, pid_t pid
)
344 do_cleanshares(vp
, pid
, 0);
348 * Cleanup all remote share reservations that
349 * were made by the given sysid on given vnode.
352 cleanshares_by_sysid(struct vnode
*vp
, int32_t sysid
)
357 do_cleanshares(vp
, 0, sysid
);
361 * Cleanup share reservations on given vnode made
362 * by the either given pid or sysid.
363 * If sysid is 0, remove all shares made by given pid,
364 * otherwise all shares made by the given sysid will
368 do_cleanshares(struct vnode
*vp
, pid_t pid
, int32_t sysid
)
372 if (vp
->v_shrlocks
== NULL
)
382 (void) del_share(vp
, &shr
);
386 is_match_for_has_remote(int32_t sysid1
, int32_t sysid2
)
390 if (GETNLMID(sysid1
) != 0) { /* in a cluster */
391 if (GETSYSID(sysid1
) != 0) {
393 * Lock obtained through nlm server. Just need to
394 * compare whole sysids.
396 result
= (sysid1
== sysid2
);
397 } else if (GETSYSID(sysid1
) == 0) {
399 * This is a special case. The NLM server identified
400 * by nlmid1 wishes to find out if it has obtained
401 * any share locks on the vnode.
403 result
= (GETNLMID(sysid1
) == GETNLMID(sysid2
));
405 } else { /* not in a cluster */
406 result
= ((sysid1
!= 0 && sysid1
== sysid2
) ||
407 (sysid1
== 0 && sysid2
!= 0));
414 * Determine whether there are any shares for the given vnode
415 * with a remote sysid. Returns zero if not, non-zero if there are.
416 * If sysid is non-zero then determine if this sysid has a share.
418 * Note that the return value from this function is potentially invalid
419 * once it has been returned. The caller is responsible for providing its
420 * own synchronization mechanism to ensure that the return value is useful.
423 shr_has_remote_shares(vnode_t
*vp
, int32_t sysid
)
425 struct shrlocklist
*shrl
;
428 mutex_enter(&vp
->v_lock
);
429 shrl
= vp
->v_shrlocks
;
431 if (is_match_for_has_remote(sysid
, shrl
->shr
->s_sysid
)) {
438 mutex_exit(&vp
->v_lock
);
443 isreadonly(struct vnode
*vp
)
445 return (vp
->v_type
!= VCHR
&& vp
->v_type
!= VBLK
&&
446 vp
->v_type
!= VFIFO
&& vn_is_readonly(vp
));
451 print_shares(struct vnode
*vp
)
453 struct shrlocklist
*shrl
;
455 if (vp
->v_shrlocks
== NULL
) {
460 shrl
= vp
->v_shrlocks
;
462 print_share(shrl
->shr
);
468 print_share(struct shrlock
*shr
)
477 printf(" access(%d): ", shr
->s_access
);
478 if (shr
->s_access
& F_RDACC
)
480 if (shr
->s_access
& F_WRACC
)
482 if ((shr
->s_access
& (F_RDACC
|F_WRACC
)) == 0)
486 if (shr
->s_deny
& F_COMPAT
)
488 if (shr
->s_deny
& F_RDDNY
)
490 if (shr
->s_deny
& F_WRDNY
)
492 if (shr
->s_deny
== F_NODNY
)
495 printf(" sysid: %d\n", shr
->s_sysid
);
496 printf(" pid: %d\n", shr
->s_pid
);
497 printf(" owner: [%d]", shr
->s_own_len
);
499 for (i
= 0; i
< shr
->s_own_len
; i
++)
500 printf("%02x", (unsigned)shr
->s_owner
[i
]);
506 * Return non-zero if the given I/O request conflicts with a registered
507 * share reservation. Note: These are Windows-compatible semantics, but
508 * windows would do these checks only when opening a file. Details in:
509 * [MS-FSA] 2.1.5.1.2.2 Algorithm to check sharing access...
511 * A process is identified by the tuple (sysid, pid). When the caller
512 * context is passed to nbl_share_conflict, the sysid and pid in the
513 * caller context are used. Otherwise the sysid is zero, and the pid is
514 * taken from the current process.
516 * Conflict Algorithm:
517 * 1. An op request of NBL_READ will fail if a different
518 * process has a mandatory share reservation with deny read.
520 * 2. An op request of NBL_WRITE will fail if a different
521 * process has a mandatory share reservation with deny write.
523 * 3. An op request of NBL_READWRITE will fail if a different
524 * process has a mandatory share reservation with deny read
527 * 4. An op request of NBL_REMOVE will fail if there is
528 * a mandatory share reservation with deny remove.
530 * 5. An op request of NBL_RENAME ... (same as NBL_REMOVE)
532 * Otherwise there is no conflict and the op request succeeds.
534 * This behavior is required for interoperability between
535 * the nfs server, cifs server, and local access.
536 * This behavior can result in non-posix semantics.
538 * When mandatory share reservations are enabled, a process
539 * should call nbl_share_conflict to determine if the
540 * desired operation would conflict with an existing share
543 * The call to nbl_share_conflict may be skipped if the
544 * process has an existing share reservation and the operation
545 * is being performed in the context of that existing share
549 nbl_share_conflict(vnode_t
*vp
, nbl_op_t op
, caller_context_t
*ct
)
551 struct shrlocklist
*shrl
;
556 ASSERT(nbl_in_crit(vp
));
559 pid
= curproc
->p_pid
;
563 sysid
= ct
->cc_sysid
;
566 mutex_enter(&vp
->v_lock
);
567 for (shrl
= vp
->v_shrlocks
; shrl
!= NULL
; shrl
= shrl
->next
) {
568 if (!(shrl
->shr
->s_deny
& F_MANDDNY
))
571 * Share deny reservations apply to _subsequent_ opens
572 * and therefore only to I/O on _other_ handles.
574 if (shrl
->shr
->s_sysid
== sysid
&&
575 shrl
->shr
->s_pid
== pid
)
579 * NBL_READ, NBL_WRITE, and NBL_READWRITE need to
580 * check if the share reservation being examined
581 * belongs to the current process.
582 * NBL_REMOVE and NBL_RENAME do not.
583 * This behavior is required by the conflict
584 * algorithm described above.
588 if (shrl
->shr
->s_deny
& F_RDDNY
)
592 if (shrl
->shr
->s_deny
& F_WRDNY
)
596 if (shrl
->shr
->s_deny
& F_RWDNY
)
601 if (shrl
->shr
->s_deny
& F_RMDNY
)
607 "nbl_share_conflict: bogus op (%d)",
616 mutex_exit(&vp
->v_lock
);
621 * Determine if the given process has a NBMAND share reservation on the
622 * given vnode. Returns 1 if the process has such a share reservation,
623 * returns 0 otherwise.
626 proc_has_nbmand_share_on_vp(vnode_t
*vp
, pid_t pid
)
628 struct shrlocklist
*shrl
;
631 * Any NBMAND share reservation on the vp for this process?
633 mutex_enter(&vp
->v_lock
);
634 for (shrl
= vp
->v_shrlocks
; shrl
!= NULL
; shrl
= shrl
->next
) {
635 if (shrl
->shr
->s_sysid
== 0 &&
636 (shrl
->shr
->s_deny
& F_MANDDNY
) &&
637 (shrl
->shr
->s_pid
== pid
)) {
638 mutex_exit(&vp
->v_lock
);
642 mutex_exit(&vp
->v_lock
);