4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <sys/zfs_context.h>
27 #include <sys/sunddi.h>
31 #include <sys/refcount.h>
32 #include <sys/nvpair.h>
34 #include <sys/kidmap.h>
36 #include <sys/zfs_vfsops.h>
37 #include <sys/zfs_znode.h>
39 #include <sys/zfs_fuid.h>
42 * FUID Domain table(s).
44 * The FUID table is stored as a packed nvlist of an array
45 * of nvlists which contain an index, domain string and offset
47 * During file system initialization the nvlist(s) are read and
48 * two AVL trees are created. One tree is keyed by the index number
49 * and the other by the domain string. Nodes are never removed from
50 * trees, but new entries may be added. If a new entry is added then the
51 * on-disk packed nvlist will also be updated.
54 #define FUID_IDX "fuid_idx"
55 #define FUID_DOMAIN "fuid_domain"
56 #define FUID_OFFSET "fuid_offset"
57 #define FUID_NVP_ARRAY "fuid_nvlist"
59 typedef struct fuid_domain
{
66 static char *nulldomain
= "";
69 * Compare two indexes.
72 idx_compare(const void *arg1
, const void *arg2
)
74 const fuid_domain_t
*node1
= arg1
;
75 const fuid_domain_t
*node2
= arg2
;
77 if (node1
->f_idx
< node2
->f_idx
)
79 else if (node1
->f_idx
> node2
->f_idx
)
85 * Compare two domain strings.
88 domain_compare(const void *arg1
, const void *arg2
)
90 const fuid_domain_t
*node1
= arg1
;
91 const fuid_domain_t
*node2
= arg2
;
94 val
= strcmp(node1
->f_ksid
->kd_name
, node2
->f_ksid
->kd_name
);
97 return (val
> 0 ? 1 : -1);
101 * load initial fuid domain and idx trees. This function is used by
102 * both the kernel and zdb.
105 zfs_fuid_table_load(objset_t
*os
, uint64_t fuid_obj
, avl_tree_t
*idx_tree
,
106 avl_tree_t
*domain_tree
)
111 avl_create(idx_tree
, idx_compare
,
112 sizeof (fuid_domain_t
), offsetof(fuid_domain_t
, f_idxnode
));
113 avl_create(domain_tree
, domain_compare
,
114 sizeof (fuid_domain_t
), offsetof(fuid_domain_t
, f_domnode
));
116 VERIFY(0 == dmu_bonus_hold(os
, fuid_obj
, FTAG
, &db
));
117 fuid_size
= *(uint64_t *)db
->db_data
;
118 dmu_buf_rele(db
, FTAG
);
122 nvlist_t
*nvp
= NULL
;
127 packed
= kmem_alloc(fuid_size
, KM_SLEEP
);
128 VERIFY(dmu_read(os
, fuid_obj
, 0, fuid_size
, packed
) == 0);
129 VERIFY(nvlist_unpack(packed
, fuid_size
,
131 VERIFY(nvlist_lookup_nvlist_array(nvp
, FUID_NVP_ARRAY
,
132 &fuidnvp
, &count
) == 0);
134 for (i
= 0; i
!= count
; i
++) {
135 fuid_domain_t
*domnode
;
139 VERIFY(nvlist_lookup_string(fuidnvp
[i
], FUID_DOMAIN
,
141 VERIFY(nvlist_lookup_uint64(fuidnvp
[i
], FUID_IDX
,
144 domnode
= kmem_alloc(sizeof (fuid_domain_t
), KM_SLEEP
);
146 domnode
->f_idx
= idx
;
147 domnode
->f_ksid
= ksid_lookupdomain(domain
);
148 avl_add(idx_tree
, domnode
);
149 avl_add(domain_tree
, domnode
);
152 kmem_free(packed
, fuid_size
);
158 zfs_fuid_table_destroy(avl_tree_t
*idx_tree
, avl_tree_t
*domain_tree
)
160 fuid_domain_t
*domnode
;
164 while (domnode
= avl_destroy_nodes(domain_tree
, &cookie
))
165 ksiddomain_rele(domnode
->f_ksid
);
167 avl_destroy(domain_tree
);
169 while (domnode
= avl_destroy_nodes(idx_tree
, &cookie
))
170 kmem_free(domnode
, sizeof (fuid_domain_t
));
171 avl_destroy(idx_tree
);
175 zfs_fuid_idx_domain(avl_tree_t
*idx_tree
, uint32_t idx
)
177 fuid_domain_t searchnode
, *findnode
;
180 searchnode
.f_idx
= idx
;
182 findnode
= avl_find(idx_tree
, &searchnode
, &loc
);
184 return (findnode
? findnode
->f_ksid
->kd_name
: nulldomain
);
189 * Load the fuid table(s) into memory.
192 zfs_fuid_init(zfsvfs_t
*zfsvfs
, dmu_tx_t
*tx
)
196 rw_enter(&zfsvfs
->z_fuid_lock
, RW_WRITER
);
198 if (zfsvfs
->z_fuid_loaded
) {
199 rw_exit(&zfsvfs
->z_fuid_lock
);
203 if (zfsvfs
->z_fuid_obj
== 0) {
205 /* first make sure we need to allocate object */
207 error
= zap_lookup(zfsvfs
->z_os
, MASTER_NODE_OBJ
,
208 ZFS_FUID_TABLES
, 8, 1, &zfsvfs
->z_fuid_obj
);
209 if (error
== ENOENT
&& tx
!= NULL
) {
210 zfsvfs
->z_fuid_obj
= dmu_object_alloc(zfsvfs
->z_os
,
211 DMU_OT_FUID
, 1 << 14, DMU_OT_FUID_SIZE
,
212 sizeof (uint64_t), tx
);
213 VERIFY(zap_add(zfsvfs
->z_os
, MASTER_NODE_OBJ
,
214 ZFS_FUID_TABLES
, sizeof (uint64_t), 1,
215 &zfsvfs
->z_fuid_obj
, tx
) == 0);
219 if (zfsvfs
->z_fuid_obj
!= 0) {
220 zfsvfs
->z_fuid_size
= zfs_fuid_table_load(zfsvfs
->z_os
,
221 zfsvfs
->z_fuid_obj
, &zfsvfs
->z_fuid_idx
,
222 &zfsvfs
->z_fuid_domain
);
223 zfsvfs
->z_fuid_loaded
= B_TRUE
;
226 rw_exit(&zfsvfs
->z_fuid_lock
);
230 * Query domain table for a given domain.
232 * If domain isn't found it is added to AVL trees and
233 * the results are pushed out to disk.
236 zfs_fuid_find_by_domain(zfsvfs_t
*zfsvfs
, const char *domain
, char **retdomain
,
239 fuid_domain_t searchnode
, *findnode
;
241 krw_t rw
= RW_READER
;
244 * If the dummy "nobody" domain then return an index of 0
245 * to cause the created FUID to be a standard POSIX id
246 * for the user nobody.
248 if (domain
[0] == '\0') {
249 *retdomain
= nulldomain
;
253 searchnode
.f_ksid
= ksid_lookupdomain(domain
);
255 *retdomain
= searchnode
.f_ksid
->kd_name
;
257 if (!zfsvfs
->z_fuid_loaded
)
258 zfs_fuid_init(zfsvfs
, tx
);
261 rw_enter(&zfsvfs
->z_fuid_lock
, rw
);
262 findnode
= avl_find(&zfsvfs
->z_fuid_domain
, &searchnode
, &loc
);
265 rw_exit(&zfsvfs
->z_fuid_lock
);
266 ksiddomain_rele(searchnode
.f_ksid
);
267 return (findnode
->f_idx
);
269 fuid_domain_t
*domnode
;
278 if (rw
== RW_READER
&& !rw_tryupgrade(&zfsvfs
->z_fuid_lock
)) {
279 rw_exit(&zfsvfs
->z_fuid_lock
);
284 domnode
= kmem_alloc(sizeof (fuid_domain_t
), KM_SLEEP
);
285 domnode
->f_ksid
= searchnode
.f_ksid
;
287 retidx
= domnode
->f_idx
= avl_numnodes(&zfsvfs
->z_fuid_idx
) + 1;
289 avl_add(&zfsvfs
->z_fuid_domain
, domnode
);
290 avl_add(&zfsvfs
->z_fuid_idx
, domnode
);
292 * Now resync the on-disk nvlist.
294 VERIFY(nvlist_alloc(&nvp
, NV_UNIQUE_NAME
, KM_SLEEP
) == 0);
296 domnode
= avl_first(&zfsvfs
->z_fuid_domain
);
297 fuids
= kmem_alloc(retidx
* sizeof (void *), KM_SLEEP
);
299 VERIFY(nvlist_alloc(&fuids
[i
],
300 NV_UNIQUE_NAME
, KM_SLEEP
) == 0);
301 VERIFY(nvlist_add_uint64(fuids
[i
], FUID_IDX
,
302 domnode
->f_idx
) == 0);
303 VERIFY(nvlist_add_uint64(fuids
[i
],
304 FUID_OFFSET
, 0) == 0);
305 VERIFY(nvlist_add_string(fuids
[i
++], FUID_DOMAIN
,
306 domnode
->f_ksid
->kd_name
) == 0);
307 domnode
= AVL_NEXT(&zfsvfs
->z_fuid_domain
, domnode
);
309 VERIFY(nvlist_add_nvlist_array(nvp
, FUID_NVP_ARRAY
,
310 fuids
, retidx
) == 0);
311 for (i
= 0; i
!= retidx
; i
++)
312 nvlist_free(fuids
[i
]);
313 kmem_free(fuids
, retidx
* sizeof (void *));
314 VERIFY(nvlist_size(nvp
, &nvsize
, NV_ENCODE_XDR
) == 0);
315 packed
= kmem_alloc(nvsize
, KM_SLEEP
);
316 VERIFY(nvlist_pack(nvp
, &packed
, &nvsize
,
317 NV_ENCODE_XDR
, KM_SLEEP
) == 0);
319 zfsvfs
->z_fuid_size
= nvsize
;
320 dmu_write(zfsvfs
->z_os
, zfsvfs
->z_fuid_obj
, 0,
321 zfsvfs
->z_fuid_size
, packed
, tx
);
322 kmem_free(packed
, zfsvfs
->z_fuid_size
);
323 VERIFY(0 == dmu_bonus_hold(zfsvfs
->z_os
, zfsvfs
->z_fuid_obj
,
325 dmu_buf_will_dirty(db
, tx
);
326 *(uint64_t *)db
->db_data
= zfsvfs
->z_fuid_size
;
327 dmu_buf_rele(db
, FTAG
);
329 rw_exit(&zfsvfs
->z_fuid_lock
);
335 * Query domain table by index, returning domain string
337 * Returns a pointer from an avl node of the domain string.
341 zfs_fuid_find_by_idx(zfsvfs_t
*zfsvfs
, uint32_t idx
)
345 if (idx
== 0 || !zfsvfs
->z_use_fuids
)
348 if (!zfsvfs
->z_fuid_loaded
)
349 zfs_fuid_init(zfsvfs
, NULL
);
351 rw_enter(&zfsvfs
->z_fuid_lock
, RW_READER
);
353 if (zfsvfs
->z_fuid_obj
)
354 domain
= zfs_fuid_idx_domain(&zfsvfs
->z_fuid_idx
, idx
);
357 rw_exit(&zfsvfs
->z_fuid_lock
);
364 zfs_fuid_map_ids(znode_t
*zp
, cred_t
*cr
, uid_t
*uidp
, uid_t
*gidp
)
366 *uidp
= zfs_fuid_map_id(zp
->z_zfsvfs
, zp
->z_phys
->zp_uid
,
368 *gidp
= zfs_fuid_map_id(zp
->z_zfsvfs
, zp
->z_phys
->zp_gid
,
373 zfs_fuid_map_id(zfsvfs_t
*zfsvfs
, uint64_t fuid
,
374 cred_t
*cr
, zfs_fuid_type_t type
)
376 uint32_t index
= FUID_INDEX(fuid
);
383 domain
= zfs_fuid_find_by_idx(zfsvfs
, index
);
384 ASSERT(domain
!= NULL
);
386 if (type
== ZFS_OWNER
|| type
== ZFS_ACE_USER
) {
387 (void) kidmap_getuidbysid(crgetzone(cr
), domain
,
388 FUID_RID(fuid
), &id
);
390 (void) kidmap_getgidbysid(crgetzone(cr
), domain
,
391 FUID_RID(fuid
), &id
);
397 * Add a FUID node to the list of fuid's being created for this
400 * If ACL has multiple domains, then keep only one copy of each unique
404 zfs_fuid_node_add(zfs_fuid_info_t
**fuidpp
, const char *domain
, uint32_t rid
,
405 uint64_t idx
, uint64_t id
, zfs_fuid_type_t type
)
408 zfs_fuid_domain_t
*fuid_domain
;
409 zfs_fuid_info_t
*fuidp
;
411 boolean_t found
= B_FALSE
;
414 *fuidpp
= zfs_fuid_info_alloc();
418 * First find fuid domain index in linked list
420 * If one isn't found then create an entry.
423 for (fuididx
= 1, fuid_domain
= list_head(&fuidp
->z_domains
);
424 fuid_domain
; fuid_domain
= list_next(&fuidp
->z_domains
,
425 fuid_domain
), fuididx
++) {
426 if (idx
== fuid_domain
->z_domidx
) {
433 fuid_domain
= kmem_alloc(sizeof (zfs_fuid_domain_t
), KM_SLEEP
);
434 fuid_domain
->z_domain
= domain
;
435 fuid_domain
->z_domidx
= idx
;
436 list_insert_tail(&fuidp
->z_domains
, fuid_domain
);
437 fuidp
->z_domain_str_sz
+= strlen(domain
) + 1;
438 fuidp
->z_domain_cnt
++;
441 if (type
== ZFS_ACE_USER
|| type
== ZFS_ACE_GROUP
) {
443 * Now allocate fuid entry and add it on the end of the list
446 fuid
= kmem_alloc(sizeof (zfs_fuid_t
), KM_SLEEP
);
448 fuid
->z_domidx
= idx
;
449 fuid
->z_logfuid
= FUID_ENCODE(fuididx
, rid
);
451 list_insert_tail(&fuidp
->z_fuids
, fuid
);
454 if (type
== ZFS_OWNER
)
455 fuidp
->z_fuid_owner
= FUID_ENCODE(fuididx
, rid
);
457 fuidp
->z_fuid_group
= FUID_ENCODE(fuididx
, rid
);
462 * Create a file system FUID, based on information in the users cred
465 zfs_fuid_create_cred(zfsvfs_t
*zfsvfs
, zfs_fuid_type_t type
,
466 dmu_tx_t
*tx
, cred_t
*cr
, zfs_fuid_info_t
**fuidp
)
475 VERIFY(type
== ZFS_OWNER
|| type
== ZFS_GROUP
);
477 if (type
== ZFS_OWNER
)
483 ksid
= crgetsid(cr
, (type
== ZFS_OWNER
) ? KSID_OWNER
: KSID_GROUP
);
485 id
= ksid_getid(ksid
);
487 if (type
== ZFS_OWNER
)
494 if (!zfsvfs
->z_use_fuids
|| (!IS_EPHEMERAL(id
)))
495 return ((uint64_t)id
);
498 rid
= ksid_getrid(ksid
);
499 domain
= ksid_getdomain(ksid
);
501 idx
= zfs_fuid_find_by_domain(zfsvfs
, domain
, &kdomain
, tx
);
503 zfs_fuid_node_add(fuidp
, kdomain
, rid
, idx
, id
, type
);
505 return (FUID_ENCODE(idx
, rid
));
512 * Create a file system FUID for an ACL ace
513 * or a chown/chgrp of the file.
514 * This is similar to zfs_fuid_create_cred, except that
515 * we can't find the domain + rid information in the
516 * cred. Instead we have to query Winchester for the
519 * During replay operations the domain+rid information is
520 * found in the zfs_fuid_info_t that the replay code has
521 * attached to the zfsvfs of the file system.
524 zfs_fuid_create(zfsvfs_t
*zfsvfs
, uint64_t id
, cred_t
*cr
,
525 zfs_fuid_type_t type
, dmu_tx_t
*tx
, zfs_fuid_info_t
**fuidpp
)
529 uint32_t fuid_idx
= FUID_INDEX(id
);
533 boolean_t is_replay
= (zfsvfs
->z_assign
>= TXG_INITIAL
);
534 zfs_fuid_t
*zfuid
= NULL
;
535 zfs_fuid_info_t
*fuidp
;
538 * If POSIX ID, or entry is already a FUID then
541 * We may also be handed an already FUID'ized id via
545 if (!zfsvfs
->z_use_fuids
|| !IS_EPHEMERAL(id
) || fuid_idx
!= 0)
549 fuidp
= zfsvfs
->z_fuid_replay
;
552 * If we are passed an ephemeral id, but no
553 * fuid_info was logged then return NOBODY.
554 * This is most likely a result of idmap service
555 * not being available.
557 /* XXX NetBSD we need to define UID_NOBODY in
558 kernel sources otherwise */
560 return (crgetuid(cr
));
565 zfuid
= list_head(&fuidp
->z_fuids
);
566 rid
= FUID_RID(zfuid
->z_logfuid
);
567 idx
= FUID_INDEX(zfuid
->z_logfuid
);
570 rid
= FUID_RID(fuidp
->z_fuid_owner
);
571 idx
= FUID_INDEX(fuidp
->z_fuid_owner
);
574 rid
= FUID_RID(fuidp
->z_fuid_group
);
575 idx
= FUID_INDEX(fuidp
->z_fuid_group
);
578 domain
= fuidp
->z_domain_table
[idx
-1];
581 if (type
== ZFS_OWNER
|| type
== ZFS_ACE_USER
)
582 status
= kidmap_getsidbyuid(crgetzone(cr
), id
,
585 status
= kidmap_getsidbygid(crgetzone(cr
), id
,
590 * When returning nobody we will need to
591 * make a dummy fuid table entry for logging
602 idx
= zfs_fuid_find_by_domain(zfsvfs
, domain
, &kdomain
, tx
);
605 zfs_fuid_node_add(fuidpp
, kdomain
, rid
, idx
, id
, type
);
606 else if (zfuid
!= NULL
) {
607 list_remove(&fuidp
->z_fuids
, zfuid
);
608 kmem_free(zfuid
, sizeof (zfs_fuid_t
));
610 return (FUID_ENCODE(idx
, rid
));
614 zfs_fuid_destroy(zfsvfs_t
*zfsvfs
)
616 rw_enter(&zfsvfs
->z_fuid_lock
, RW_WRITER
);
617 if (!zfsvfs
->z_fuid_loaded
) {
618 rw_exit(&zfsvfs
->z_fuid_lock
);
621 zfs_fuid_table_destroy(&zfsvfs
->z_fuid_idx
, &zfsvfs
->z_fuid_domain
);
622 rw_exit(&zfsvfs
->z_fuid_lock
);
626 * Allocate zfs_fuid_info for tracking FUIDs created during
627 * zfs_mknode, VOP_SETATTR() or VOP_SETSECATTR()
630 zfs_fuid_info_alloc(void)
632 zfs_fuid_info_t
*fuidp
;
634 fuidp
= kmem_zalloc(sizeof (zfs_fuid_info_t
), KM_SLEEP
);
635 list_create(&fuidp
->z_domains
, sizeof (zfs_fuid_domain_t
),
636 offsetof(zfs_fuid_domain_t
, z_next
));
637 list_create(&fuidp
->z_fuids
, sizeof (zfs_fuid_t
),
638 offsetof(zfs_fuid_t
, z_next
));
643 * Release all memory associated with zfs_fuid_info_t
646 zfs_fuid_info_free(zfs_fuid_info_t
*fuidp
)
649 zfs_fuid_domain_t
*zdomain
;
651 while ((zfuid
= list_head(&fuidp
->z_fuids
)) != NULL
) {
652 list_remove(&fuidp
->z_fuids
, zfuid
);
653 kmem_free(zfuid
, sizeof (zfs_fuid_t
));
656 if (fuidp
->z_domain_table
!= NULL
)
657 kmem_free(fuidp
->z_domain_table
,
658 (sizeof (char **)) * fuidp
->z_domain_cnt
);
660 while ((zdomain
= list_head(&fuidp
->z_domains
)) != NULL
) {
661 list_remove(&fuidp
->z_domains
, zdomain
);
662 kmem_free(zdomain
, sizeof (zfs_fuid_domain_t
));
665 kmem_free(fuidp
, sizeof (zfs_fuid_info_t
));
669 * Check to see if id is a groupmember. If cred
670 * has ksid info then sidlist is checked first
671 * and if still not found then POSIX groups are checked
673 * Will use a straight FUID compare when possible.
676 zfs_groupmember(zfsvfs_t
*zfsvfs
, uint64_t id
, cred_t
*cr
)
678 ksid_t
*ksid
= crgetsid(cr
, KSID_GROUP
);
685 ksidlist_t
*ksidlist
= crgetsidlist(cr
);
686 uint32_t idx
= FUID_INDEX(id
);
687 uint32_t rid
= FUID_RID(id
);
690 ksid_groups
= ksidlist
->ksl_sids
;
692 for (i
= 0; i
!= ksidlist
->ksl_nsid
; i
++) {
694 if (id
!= IDMAP_WK_CREATOR_GROUP_GID
&&
695 id
== ksid_groups
[i
].ks_id
) {
701 domain
= zfs_fuid_find_by_idx(zfsvfs
, idx
);
702 ASSERT(domain
!= NULL
);
705 IDMAP_WK_CREATOR_SID_AUTHORITY
) == 0)
709 ksid_groups
[i
].ks_domain
->kd_name
) == 0) &&
710 rid
== ksid_groups
[i
].ks_rid
)
717 * Not found in ksidlist, check posix groups
719 gid
= zfs_fuid_map_id(zfsvfs
, id
, cr
, ZFS_GROUP
);
720 return (groupmember(gid
, cr
));