4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
24 * Extended attributes (xattr) on Solaris are implemented as files
25 * which exist in a hidden xattr directory. These extended attributes
26 * can be accessed using the attropen() system call which opens
27 * the extended attribute. It can then be manipulated just like
28 * a standard file descriptor. This has a couple advantages such
29 * as practically no size limit on the file, and the extended
30 * attributes permissions may differ from those of the parent file.
31 * This interface is really quite clever, but it's also completely
32 * different than what is supported on Linux. It also comes with a
33 * steep performance penalty when accessing small xattrs because they
34 * are not stored with the parent file.
36 * Under Linux extended attributes are manipulated by the system
37 * calls getxattr(2), setxattr(2), and listxattr(2). They consider
38 * extended attributes to be name/value pairs where the name is a
39 * NULL terminated string. The name must also include one of the
40 * following namespace prefixes:
42 * user - No restrictions and is available to user applications.
43 * trusted - Restricted to kernel and root (CAP_SYS_ADMIN) use.
44 * system - Used for access control lists (system.nfs4_acl, etc).
45 * security - Used by SELinux to store a files security context.
47 * The value under Linux to limited to 65536 bytes of binary data.
48 * In practice, individual xattrs tend to be much smaller than this
49 * and are typically less than 100 bytes. A good example of this
50 * are the security.selinux xattrs which are less than 100 bytes and
51 * exist for every file when xattr labeling is enabled.
53 * The Linux xattr implementation has been written to take advantage of
54 * this typical usage. When the dataset property 'xattr=sa' is set,
55 * then xattrs will be preferentially stored as System Attributes (SA).
56 * This allows tiny xattrs (~100 bytes) to be stored with the dnode and
57 * up to 64k of xattrs to be stored in the spill block. If additional
58 * xattr space is required, which is unlikely under Linux, they will
59 * be stored using the traditional directory approach.
61 * This optimization results in roughly a 3x performance improvement
62 * when accessing xattrs because it avoids the need to perform a seek
63 * for every xattr value. When multiple xattrs are stored per-file
64 * the performance improvements are even greater because all of the
65 * xattrs stored in the spill block will be cached.
67 * However, by default SA based xattrs are disabled in the Linux port
68 * to maximize compatibility with other implementations. If you do
69 * enable SA based xattrs then they will not be visible on platforms
70 * which do not support this feature.
72 * NOTE: One additional consequence of the xattr directory implementation
73 * is that when an extended attribute is manipulated an inode is created.
74 * This inode will exist in the Linux inode cache but there will be no
75 * associated entry in the dentry cache which references it. This is
76 * safe but it may result in some confusion. Enabling SA based xattrs
77 * largely avoids the issue except in the overflow case.
80 #include <sys/zfs_znode.h>
81 #include <sys/zfs_vfsops.h>
82 #include <sys/zfs_vnops.h>
86 #include <linux/vfs_compat.h>
88 enum xattr_permission
{
94 typedef struct xattr_filldir
{
98 struct dentry
*dentry
;
101 static enum xattr_permission
zpl_xattr_permission(xattr_filldir_t
*,
104 static int zfs_xattr_compat
= 0;
107 * Determine is a given xattr name should be visible and if so copy it
108 * in to the provided buffer (xf->buf).
111 zpl_xattr_filldir(xattr_filldir_t
*xf
, const char *name
, int name_len
)
113 enum xattr_permission perm
;
115 /* Check permissions using the per-namespace list xattr handler. */
116 perm
= zpl_xattr_permission(xf
, name
, name_len
);
117 if (perm
== XAPERM_DENY
)
120 /* Prefix the name with "user." if it does not have a namespace. */
121 if (perm
== XAPERM_COMPAT
) {
123 if (xf
->offset
+ XATTR_USER_PREFIX_LEN
+ 1 > xf
->size
)
126 memcpy(xf
->buf
+ xf
->offset
, XATTR_USER_PREFIX
,
127 XATTR_USER_PREFIX_LEN
);
128 xf
->buf
[xf
->offset
+ XATTR_USER_PREFIX_LEN
] = '\0';
131 xf
->offset
+= XATTR_USER_PREFIX_LEN
;
134 /* When xf->buf is NULL only calculate the required size. */
136 if (xf
->offset
+ name_len
+ 1 > xf
->size
)
139 memcpy(xf
->buf
+ xf
->offset
, name
, name_len
);
140 xf
->buf
[xf
->offset
+ name_len
] = '\0';
143 xf
->offset
+= (name_len
+ 1);
149 * Read as many directory entry names as will fit in to the provided buffer,
150 * or when no buffer is provided calculate the required buffer size.
153 zpl_xattr_readdir(struct inode
*dxip
, xattr_filldir_t
*xf
)
156 zap_attribute_t
*zap
= zap_attribute_alloc();
159 zap_cursor_init(&zc
, ITOZSB(dxip
)->z_os
, ITOZ(dxip
)->z_id
);
161 while ((error
= -zap_cursor_retrieve(&zc
, zap
)) == 0) {
163 if (zap
->za_integer_length
!= 8 || zap
->za_num_integers
!= 1) {
168 error
= zpl_xattr_filldir(xf
, zap
->za_name
,
169 strlen(zap
->za_name
));
173 zap_cursor_advance(&zc
);
176 zap_cursor_fini(&zc
);
177 zap_attribute_free(zap
);
179 if (error
== -ENOENT
)
186 zpl_xattr_list_dir(xattr_filldir_t
*xf
, cred_t
*cr
)
188 struct inode
*ip
= xf
->dentry
->d_inode
;
189 struct inode
*dxip
= NULL
;
193 /* Lookup the xattr directory */
194 error
= -zfs_lookup(ITOZ(ip
), NULL
, &dxzp
, LOOKUP_XATTR
,
197 if (error
== -ENOENT
)
204 error
= zpl_xattr_readdir(dxip
, xf
);
211 zpl_xattr_list_sa(xattr_filldir_t
*xf
)
213 znode_t
*zp
= ITOZ(xf
->dentry
->d_inode
);
214 nvpair_t
*nvp
= NULL
;
217 mutex_enter(&zp
->z_lock
);
218 if (zp
->z_xattr_cached
== NULL
)
219 error
= -zfs_sa_get_xattr(zp
);
220 mutex_exit(&zp
->z_lock
);
225 ASSERT(zp
->z_xattr_cached
);
227 while ((nvp
= nvlist_next_nvpair(zp
->z_xattr_cached
, nvp
)) != NULL
) {
228 ASSERT3U(nvpair_type(nvp
), ==, DATA_TYPE_BYTE_ARRAY
);
230 error
= zpl_xattr_filldir(xf
, nvpair_name(nvp
),
231 strlen(nvpair_name(nvp
)));
240 zpl_xattr_list(struct dentry
*dentry
, char *buffer
, size_t buffer_size
)
242 znode_t
*zp
= ITOZ(dentry
->d_inode
);
243 zfsvfs_t
*zfsvfs
= ZTOZSB(zp
);
244 xattr_filldir_t xf
= { buffer_size
, 0, buffer
, dentry
};
246 fstrans_cookie_t cookie
;
250 cookie
= spl_fstrans_mark();
251 if ((error
= zpl_enter_verify_zp(zfsvfs
, zp
, FTAG
)) != 0)
253 rw_enter(&zp
->z_xattr_lock
, RW_READER
);
255 if (zfsvfs
->z_use_sa
&& zp
->z_is_sa
) {
256 error
= zpl_xattr_list_sa(&xf
);
261 error
= zpl_xattr_list_dir(&xf
, cr
);
268 rw_exit(&zp
->z_xattr_lock
);
269 zpl_exit(zfsvfs
, FTAG
);
271 spl_fstrans_unmark(cookie
);
278 zpl_xattr_get_dir(struct inode
*ip
, const char *name
, void *value
,
279 size_t size
, cred_t
*cr
)
281 fstrans_cookie_t cookie
;
282 struct inode
*xip
= NULL
;
283 znode_t
*dxzp
= NULL
;
287 /* Lookup the xattr directory */
288 error
= -zfs_lookup(ITOZ(ip
), NULL
, &dxzp
, LOOKUP_XATTR
,
293 /* Lookup a specific xattr name in the directory */
294 error
= -zfs_lookup(dxzp
, (char *)name
, &xzp
, 0, cr
, NULL
, NULL
);
300 error
= i_size_read(xip
);
304 if (size
< i_size_read(xip
)) {
310 iov
.iov_base
= (void *)value
;
314 zfs_uio_iovec_init(&uio
, &iov
, 1, 0, UIO_SYSSPACE
, size
, 0);
316 cookie
= spl_fstrans_mark();
317 error
= -zfs_read(ITOZ(xip
), &uio
, 0, cr
);
318 spl_fstrans_unmark(cookie
);
321 error
= size
- zfs_uio_resid(&uio
);
333 zpl_xattr_get_sa(struct inode
*ip
, const char *name
, void *value
, size_t size
)
335 znode_t
*zp
= ITOZ(ip
);
340 ASSERT(RW_LOCK_HELD(&zp
->z_xattr_lock
));
342 mutex_enter(&zp
->z_lock
);
343 if (zp
->z_xattr_cached
== NULL
)
344 error
= -zfs_sa_get_xattr(zp
);
345 mutex_exit(&zp
->z_lock
);
350 ASSERT(zp
->z_xattr_cached
);
351 error
= -nvlist_lookup_byte_array(zp
->z_xattr_cached
, name
,
352 &nv_value
, &nv_size
);
356 if (size
== 0 || value
== NULL
)
362 memcpy(value
, nv_value
, nv_size
);
368 __zpl_xattr_get(struct inode
*ip
, const char *name
, void *value
, size_t size
,
371 znode_t
*zp
= ITOZ(ip
);
372 zfsvfs_t
*zfsvfs
= ZTOZSB(zp
);
375 ASSERT(RW_LOCK_HELD(&zp
->z_xattr_lock
));
377 if (zfsvfs
->z_use_sa
&& zp
->z_is_sa
) {
378 error
= zpl_xattr_get_sa(ip
, name
, value
, size
);
379 if (error
!= -ENOENT
)
383 error
= zpl_xattr_get_dir(ip
, name
, value
, size
, cr
);
385 if (error
== -ENOENT
)
391 #define XATTR_NOENT 0x0
392 #define XATTR_IN_SA 0x1
393 #define XATTR_IN_DIR 0x2
394 /* check where the xattr resides */
396 __zpl_xattr_where(struct inode
*ip
, const char *name
, int *where
, cred_t
*cr
)
398 znode_t
*zp
= ITOZ(ip
);
399 zfsvfs_t
*zfsvfs
= ZTOZSB(zp
);
403 ASSERT(RW_LOCK_HELD(&zp
->z_xattr_lock
));
405 *where
= XATTR_NOENT
;
406 if (zfsvfs
->z_use_sa
&& zp
->z_is_sa
) {
407 error
= zpl_xattr_get_sa(ip
, name
, NULL
, 0);
409 *where
|= XATTR_IN_SA
;
410 else if (error
!= -ENOENT
)
414 error
= zpl_xattr_get_dir(ip
, name
, NULL
, 0, cr
);
416 *where
|= XATTR_IN_DIR
;
417 else if (error
!= -ENOENT
)
420 if (*where
== (XATTR_IN_SA
|XATTR_IN_DIR
))
421 cmn_err(CE_WARN
, "ZFS: inode %p has xattr \"%s\""
422 " in both SA and dir", ip
, name
);
423 if (*where
== XATTR_NOENT
)
431 zpl_xattr_get(struct inode
*ip
, const char *name
, void *value
, size_t size
)
433 znode_t
*zp
= ITOZ(ip
);
434 zfsvfs_t
*zfsvfs
= ZTOZSB(zp
);
436 fstrans_cookie_t cookie
;
440 cookie
= spl_fstrans_mark();
441 if ((error
= zpl_enter_verify_zp(zfsvfs
, zp
, FTAG
)) != 0)
443 rw_enter(&zp
->z_xattr_lock
, RW_READER
);
444 error
= __zpl_xattr_get(ip
, name
, value
, size
, cr
);
445 rw_exit(&zp
->z_xattr_lock
);
446 zpl_exit(zfsvfs
, FTAG
);
448 spl_fstrans_unmark(cookie
);
455 zpl_xattr_set_dir(struct inode
*ip
, const char *name
, const void *value
,
456 size_t size
, int flags
, cred_t
*cr
)
458 znode_t
*dxzp
= NULL
;
461 int lookup_flags
, error
;
462 const int xattr_mode
= S_IFREG
| 0644;
466 * Lookup the xattr directory. When we're adding an entry pass
467 * CREATE_XATTR_DIR to ensure the xattr directory is created.
468 * When removing an entry this flag is not passed to avoid
469 * unnecessarily creating a new xattr directory.
471 lookup_flags
= LOOKUP_XATTR
;
473 lookup_flags
|= CREATE_XATTR_DIR
;
475 error
= -zfs_lookup(ITOZ(ip
), NULL
, &dxzp
, lookup_flags
,
480 /* Lookup a specific xattr name in the directory */
481 error
= -zfs_lookup(dxzp
, (char *)name
, &xzp
, 0, cr
, NULL
, NULL
);
482 if (error
&& (error
!= -ENOENT
))
487 /* Remove a specific name xattr when value is set to NULL. */
490 error
= -zfs_remove(dxzp
, (char *)name
, cr
, 0);
495 /* Lookup failed create a new xattr. */
497 vap
= kmem_zalloc(sizeof (vattr_t
), KM_SLEEP
);
498 vap
->va_mode
= xattr_mode
;
499 vap
->va_mask
= ATTR_MODE
;
500 vap
->va_uid
= crgetuid(cr
);
501 vap
->va_gid
= crgetgid(cr
);
503 error
= -zfs_create(dxzp
, (char *)name
, vap
, 0, 0644, &xzp
,
504 cr
, ATTR_NOACLCHECK
, NULL
, zfs_init_idmap
);
511 error
= -zfs_freesp(xzp
, 0, 0, xattr_mode
, TRUE
);
515 error
= -zfs_write_simple(xzp
, value
, size
, pos
, NULL
);
518 zpl_inode_set_ctime_to_ts(ip
, current_time(ip
));
519 zfs_mark_inode_dirty(ip
);
523 kmem_free(vap
, sizeof (vattr_t
));
531 if (error
== -ENOENT
)
534 ASSERT3S(error
, <=, 0);
540 zpl_xattr_set_sa(struct inode
*ip
, const char *name
, const void *value
,
541 size_t size
, int flags
, cred_t
*cr
)
543 znode_t
*zp
= ITOZ(ip
);
548 mutex_enter(&zp
->z_lock
);
549 if (zp
->z_xattr_cached
== NULL
)
550 error
= -zfs_sa_get_xattr(zp
);
551 mutex_exit(&zp
->z_lock
);
556 ASSERT(zp
->z_xattr_cached
);
557 nvl
= zp
->z_xattr_cached
;
560 error
= -nvlist_remove(nvl
, name
, DATA_TYPE_BYTE_ARRAY
);
561 if (error
== -ENOENT
)
562 error
= zpl_xattr_set_dir(ip
, name
, NULL
, 0, flags
, cr
);
564 /* Limited to 32k to keep nvpair memory allocations small */
565 if (size
> DXATTR_MAX_ENTRY_SIZE
)
568 /* Prevent the DXATTR SA from consuming the entire SA region */
569 error
= -nvlist_size(nvl
, &sa_size
, NV_ENCODE_XDR
);
573 if (sa_size
> DXATTR_MAX_SA_SIZE
)
576 error
= -nvlist_add_byte_array(nvl
, name
,
577 (uchar_t
*)value
, size
);
581 * Update the SA for additions, modifications, and removals. On
582 * error drop the inconsistent cached version of the nvlist, it
583 * will be reconstructed from the ARC when next accessed.
586 error
= -zfs_sa_set_xattr(zp
, name
, value
, size
);
590 zp
->z_xattr_cached
= NULL
;
593 ASSERT3S(error
, <=, 0);
599 zpl_xattr_set(struct inode
*ip
, const char *name
, const void *value
,
600 size_t size
, int flags
)
602 znode_t
*zp
= ITOZ(ip
);
603 zfsvfs_t
*zfsvfs
= ZTOZSB(zp
);
605 fstrans_cookie_t cookie
;
610 cookie
= spl_fstrans_mark();
611 if ((error
= zpl_enter_verify_zp(zfsvfs
, zp
, FTAG
)) != 0)
613 rw_enter(&zp
->z_xattr_lock
, RW_WRITER
);
616 * Before setting the xattr check to see if it already exists.
617 * This is done to ensure the following optional flags are honored.
619 * XATTR_CREATE: fail if xattr already exists
620 * XATTR_REPLACE: fail if xattr does not exist
622 * We also want to know if it resides in sa or dir, so we can make
623 * sure we don't end up with duplicate in both places.
625 error
= __zpl_xattr_where(ip
, name
, &where
, cr
);
627 if (error
!= -ENODATA
)
629 if (flags
& XATTR_REPLACE
)
632 /* The xattr to be removed already doesn't exist */
638 if (flags
& XATTR_CREATE
)
642 /* Preferentially store the xattr as a SA for better performance */
643 if (zfsvfs
->z_use_sa
&& zp
->z_is_sa
&&
644 (zfsvfs
->z_xattr_sa
|| (value
== NULL
&& where
& XATTR_IN_SA
))) {
645 error
= zpl_xattr_set_sa(ip
, name
, value
, size
, flags
, cr
);
648 * Successfully put into SA, we need to clear the one
651 if (where
& XATTR_IN_DIR
)
652 zpl_xattr_set_dir(ip
, name
, NULL
, 0, 0, cr
);
657 error
= zpl_xattr_set_dir(ip
, name
, value
, size
, flags
, cr
);
659 * Successfully put into dir, we need to clear the one in SA.
661 if (error
== 0 && (where
& XATTR_IN_SA
))
662 zpl_xattr_set_sa(ip
, name
, NULL
, 0, 0, cr
);
664 rw_exit(&zp
->z_xattr_lock
);
665 zpl_exit(zfsvfs
, FTAG
);
667 spl_fstrans_unmark(cookie
);
669 ASSERT3S(error
, <=, 0);
675 * Extended user attributes
677 * "Extended user attributes may be assigned to files and directories for
678 * storing arbitrary additional information such as the mime type,
679 * character set or encoding of a file. The access permissions for user
680 * attributes are defined by the file permission bits: read permission
681 * is required to retrieve the attribute value, and writer permission is
682 * required to change it.
684 * The file permission bits of regular files and directories are
685 * interpreted differently from the file permission bits of special
686 * files and symbolic links. For regular files and directories the file
687 * permission bits define access to the file's contents, while for
688 * device special files they define access to the device described by
689 * the special file. The file permissions of symbolic links are not
690 * used in access checks. These differences would allow users to
691 * consume filesystem resources in a way not controllable by disk quotas
692 * for group or world writable special files and directories.
694 * For this reason, extended user attributes are allowed only for
695 * regular files and directories, and access to extended user attributes
696 * is restricted to the owner and to users with appropriate capabilities
697 * for directories with the sticky bit set (see the chmod(1) manual page
698 * for an explanation of the sticky bit)." - xattr(7)
700 * ZFS allows extended user attributes to be disabled administratively
701 * by setting the 'xattr=off' property on the dataset.
704 __zpl_xattr_user_list(struct inode
*ip
, char *list
, size_t list_size
,
705 const char *name
, size_t name_len
)
707 return (ITOZSB(ip
)->z_flags
& ZSB_XATTR
);
709 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_user_list
);
712 __zpl_xattr_user_get(struct inode
*ip
, const char *name
,
713 void *value
, size_t size
)
716 /* xattr_resolve_name will do this for us if this is defined */
717 if (ZFS_XA_NS_PREFIX_FORBIDDEN(name
))
719 if (!(ITOZSB(ip
)->z_flags
& ZSB_XATTR
))
720 return (-EOPNOTSUPP
);
723 * Try to look up the name with the namespace prefix first for
724 * compatibility with xattrs from this platform. If that fails,
725 * try again without the namespace prefix for compatibility with
728 char *xattr_name
= kmem_asprintf("%s%s", XATTR_USER_PREFIX
, name
);
729 error
= zpl_xattr_get(ip
, xattr_name
, value
, size
);
730 kmem_strfree(xattr_name
);
731 if (error
== -ENODATA
)
732 error
= zpl_xattr_get(ip
, name
, value
, size
);
736 ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get
);
739 __zpl_xattr_user_set(zidmap_t
*user_ns
,
740 struct inode
*ip
, const char *name
,
741 const void *value
, size_t size
, int flags
)
745 /* xattr_resolve_name will do this for us if this is defined */
746 if (ZFS_XA_NS_PREFIX_FORBIDDEN(name
))
748 if (!(ITOZSB(ip
)->z_flags
& ZSB_XATTR
))
749 return (-EOPNOTSUPP
);
752 * Remove alternate compat version of the xattr so we only set the
753 * version specified by the zfs_xattr_compat tunable.
755 * The following flags must be handled correctly:
757 * XATTR_CREATE: fail if xattr already exists
758 * XATTR_REPLACE: fail if xattr does not exist
760 char *prefixed_name
= kmem_asprintf("%s%s", XATTR_USER_PREFIX
, name
);
761 const char *clear_name
, *set_name
;
762 if (zfs_xattr_compat
) {
763 clear_name
= prefixed_name
;
767 set_name
= prefixed_name
;
770 * Clear the old value with the alternative name format, if it exists.
772 error
= zpl_xattr_set(ip
, clear_name
, NULL
, 0, flags
);
774 * XATTR_CREATE was specified and we failed to clear the xattr
775 * because it already exists. Stop here.
777 if (error
== -EEXIST
)
780 * If XATTR_REPLACE was specified and we succeeded to clear
781 * an xattr, we don't need to replace anything when setting
782 * the new value. If we failed with -ENODATA that's fine,
783 * there was nothing to be cleared and we can ignore the error.
786 flags
&= ~XATTR_REPLACE
;
788 * Set the new value with the configured name format.
790 error
= zpl_xattr_set(ip
, set_name
, value
, size
, flags
);
792 kmem_strfree(prefixed_name
);
795 ZPL_XATTR_SET_WRAPPER(zpl_xattr_user_set
);
797 static xattr_handler_t zpl_xattr_user_handler
=
799 .prefix
= XATTR_USER_PREFIX
,
800 .list
= zpl_xattr_user_list
,
801 .get
= zpl_xattr_user_get
,
802 .set
= zpl_xattr_user_set
,
806 * Trusted extended attributes
808 * "Trusted extended attributes are visible and accessible only to
809 * processes that have the CAP_SYS_ADMIN capability. Attributes in this
810 * class are used to implement mechanisms in user space (i.e., outside
811 * the kernel) which keep information in extended attributes to which
812 * ordinary processes should not have access." - xattr(7)
815 __zpl_xattr_trusted_list(struct inode
*ip
, char *list
, size_t list_size
,
816 const char *name
, size_t name_len
)
818 return (capable(CAP_SYS_ADMIN
));
820 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_trusted_list
);
823 __zpl_xattr_trusted_get(struct inode
*ip
, const char *name
,
824 void *value
, size_t size
)
829 if (!capable(CAP_SYS_ADMIN
))
831 /* xattr_resolve_name will do this for us if this is defined */
832 xattr_name
= kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX
, name
);
833 error
= zpl_xattr_get(ip
, xattr_name
, value
, size
);
834 kmem_strfree(xattr_name
);
838 ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get
);
841 __zpl_xattr_trusted_set(zidmap_t
*user_ns
,
842 struct inode
*ip
, const char *name
,
843 const void *value
, size_t size
, int flags
)
849 if (!capable(CAP_SYS_ADMIN
))
851 /* xattr_resolve_name will do this for us if this is defined */
852 xattr_name
= kmem_asprintf("%s%s", XATTR_TRUSTED_PREFIX
, name
);
853 error
= zpl_xattr_set(ip
, xattr_name
, value
, size
, flags
);
854 kmem_strfree(xattr_name
);
858 ZPL_XATTR_SET_WRAPPER(zpl_xattr_trusted_set
);
860 static xattr_handler_t zpl_xattr_trusted_handler
= {
861 .prefix
= XATTR_TRUSTED_PREFIX
,
862 .list
= zpl_xattr_trusted_list
,
863 .get
= zpl_xattr_trusted_get
,
864 .set
= zpl_xattr_trusted_set
,
868 * Extended security attributes
870 * "The security attribute namespace is used by kernel security modules,
871 * such as Security Enhanced Linux, and also to implement file
872 * capabilities (see capabilities(7)). Read and write access
873 * permissions to security attributes depend on the policy implemented
874 * for each security attribute by the security module. When no security
875 * module is loaded, all processes have read access to extended security
876 * attributes, and write access is limited to processes that have the
877 * CAP_SYS_ADMIN capability." - xattr(7)
880 __zpl_xattr_security_list(struct inode
*ip
, char *list
, size_t list_size
,
881 const char *name
, size_t name_len
)
885 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_security_list
);
888 __zpl_xattr_security_get(struct inode
*ip
, const char *name
,
889 void *value
, size_t size
)
893 /* xattr_resolve_name will do this for us if this is defined */
894 xattr_name
= kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX
, name
);
895 error
= zpl_xattr_get(ip
, xattr_name
, value
, size
);
896 kmem_strfree(xattr_name
);
900 ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get
);
903 __zpl_xattr_security_set(zidmap_t
*user_ns
,
904 struct inode
*ip
, const char *name
,
905 const void *value
, size_t size
, int flags
)
910 /* xattr_resolve_name will do this for us if this is defined */
911 xattr_name
= kmem_asprintf("%s%s", XATTR_SECURITY_PREFIX
, name
);
912 error
= zpl_xattr_set(ip
, xattr_name
, value
, size
, flags
);
913 kmem_strfree(xattr_name
);
917 ZPL_XATTR_SET_WRAPPER(zpl_xattr_security_set
);
920 zpl_xattr_security_init_impl(struct inode
*ip
, const struct xattr
*xattrs
,
923 const struct xattr
*xattr
;
926 for (xattr
= xattrs
; xattr
->name
!= NULL
; xattr
++) {
927 error
= __zpl_xattr_security_set(NULL
, ip
,
928 xattr
->name
, xattr
->value
, xattr
->value_len
, 0);
938 zpl_xattr_security_init(struct inode
*ip
, struct inode
*dip
,
939 const struct qstr
*qstr
)
941 return security_inode_init_security(ip
, dip
, qstr
,
942 &zpl_xattr_security_init_impl
, NULL
);
946 * Security xattr namespace handlers.
948 static xattr_handler_t zpl_xattr_security_handler
= {
949 .prefix
= XATTR_SECURITY_PREFIX
,
950 .list
= zpl_xattr_security_list
,
951 .get
= zpl_xattr_security_get
,
952 .set
= zpl_xattr_security_set
,
956 * Extended system attributes
958 * "Extended system attributes are used by the kernel to store system
959 * objects such as Access Control Lists. Read and write access permissions
960 * to system attributes depend on the policy implemented for each system
961 * attribute implemented by filesystems in the kernel." - xattr(7)
963 #ifdef CONFIG_FS_POSIX_ACL
965 zpl_set_acl_impl(struct inode
*ip
, struct posix_acl
*acl
, int type
)
967 char *name
, *value
= NULL
;
971 if (S_ISLNK(ip
->i_mode
))
972 return (-EOPNOTSUPP
);
975 case ACL_TYPE_ACCESS
:
976 name
= XATTR_NAME_POSIX_ACL_ACCESS
;
978 umode_t mode
= ip
->i_mode
;
979 error
= posix_acl_equiv_mode(acl
, &mode
);
984 * The mode bits will have been set by
985 * ->zfs_setattr()->zfs_acl_chmod_setattr()
986 * using the ZFS ACL conversion. If they
987 * differ from the Posix ACL conversion dirty
988 * the inode to write the Posix mode bits.
990 if (ip
->i_mode
!= mode
) {
991 ip
->i_mode
= ITOZ(ip
)->z_mode
= mode
;
992 zpl_inode_set_ctime_to_ts(ip
,
994 zfs_mark_inode_dirty(ip
);
1003 case ACL_TYPE_DEFAULT
:
1004 name
= XATTR_NAME_POSIX_ACL_DEFAULT
;
1005 if (!S_ISDIR(ip
->i_mode
))
1006 return (acl
? -EACCES
: 0);
1014 size
= posix_acl_xattr_size(acl
->a_count
);
1015 value
= kmem_alloc(size
, KM_SLEEP
);
1017 error
= zpl_acl_to_xattr(acl
, value
, size
);
1019 kmem_free(value
, size
);
1024 error
= zpl_xattr_set(ip
, name
, value
, size
, 0);
1026 kmem_free(value
, size
);
1030 set_cached_acl(ip
, type
, acl
);
1032 forget_cached_acl(ip
, type
);
1039 #ifdef HAVE_SET_ACL_USERNS
1040 zpl_set_acl(struct user_namespace
*userns
, struct inode
*ip
,
1041 struct posix_acl
*acl
, int type
)
1042 #elif defined(HAVE_SET_ACL_IDMAP_DENTRY)
1043 zpl_set_acl(struct mnt_idmap
*userns
, struct dentry
*dentry
,
1044 struct posix_acl
*acl
, int type
)
1045 #elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2)
1046 zpl_set_acl(struct user_namespace
*userns
, struct dentry
*dentry
,
1047 struct posix_acl
*acl
, int type
)
1049 zpl_set_acl(struct inode
*ip
, struct posix_acl
*acl
, int type
)
1050 #endif /* HAVE_SET_ACL_USERNS */
1052 #ifdef HAVE_SET_ACL_USERNS_DENTRY_ARG2
1053 return (zpl_set_acl_impl(d_inode(dentry
), acl
, type
));
1054 #elif defined(HAVE_SET_ACL_IDMAP_DENTRY)
1055 return (zpl_set_acl_impl(d_inode(dentry
), acl
, type
));
1057 return (zpl_set_acl_impl(ip
, acl
, type
));
1058 #endif /* HAVE_SET_ACL_USERNS_DENTRY_ARG2 */
1061 static struct posix_acl
*
1062 zpl_get_acl_impl(struct inode
*ip
, int type
)
1064 struct posix_acl
*acl
;
1069 case ACL_TYPE_ACCESS
:
1070 name
= XATTR_NAME_POSIX_ACL_ACCESS
;
1072 case ACL_TYPE_DEFAULT
:
1073 name
= XATTR_NAME_POSIX_ACL_DEFAULT
;
1076 return (ERR_PTR(-EINVAL
));
1079 int size
= zpl_xattr_get(ip
, name
, NULL
, 0);
1081 value
= kmem_alloc(size
, KM_SLEEP
);
1082 size
= zpl_xattr_get(ip
, name
, value
, size
);
1086 acl
= zpl_acl_from_xattr(value
, size
);
1087 } else if (size
== -ENODATA
|| size
== -ENOSYS
) {
1090 acl
= ERR_PTR(-EIO
);
1094 kmem_free(value
, size
);
1099 #if defined(HAVE_GET_ACL_RCU) || defined(HAVE_GET_INODE_ACL)
1101 zpl_get_acl(struct inode
*ip
, int type
, bool rcu
)
1104 return (ERR_PTR(-ECHILD
));
1106 return (zpl_get_acl_impl(ip
, type
));
1108 #elif defined(HAVE_GET_ACL)
1110 zpl_get_acl(struct inode
*ip
, int type
)
1112 return (zpl_get_acl_impl(ip
, type
));
1115 #error "Unsupported iops->get_acl() implementation"
1116 #endif /* HAVE_GET_ACL_RCU */
1119 zpl_init_acl(struct inode
*ip
, struct inode
*dir
)
1121 struct posix_acl
*acl
= NULL
;
1124 if (ITOZSB(ip
)->z_acl_type
!= ZFS_ACLTYPE_POSIX
)
1127 if (!S_ISLNK(ip
->i_mode
)) {
1128 acl
= zpl_get_acl_impl(dir
, ACL_TYPE_DEFAULT
);
1130 return (PTR_ERR(acl
));
1132 ITOZ(ip
)->z_mode
= (ip
->i_mode
&= ~current_umask());
1133 zpl_inode_set_ctime_to_ts(ip
, current_time(ip
));
1134 zfs_mark_inode_dirty(ip
);
1142 if (S_ISDIR(ip
->i_mode
)) {
1143 error
= zpl_set_acl_impl(ip
, acl
, ACL_TYPE_DEFAULT
);
1149 error
= __posix_acl_create(&acl
, GFP_KERNEL
, &mode
);
1151 ip
->i_mode
= ITOZ(ip
)->z_mode
= mode
;
1152 zfs_mark_inode_dirty(ip
);
1154 error
= zpl_set_acl_impl(ip
, acl
,
1160 zpl_posix_acl_release(acl
);
1166 zpl_chmod_acl(struct inode
*ip
)
1168 struct posix_acl
*acl
;
1171 if (ITOZSB(ip
)->z_acl_type
!= ZFS_ACLTYPE_POSIX
)
1174 if (S_ISLNK(ip
->i_mode
))
1175 return (-EOPNOTSUPP
);
1177 acl
= zpl_get_acl_impl(ip
, ACL_TYPE_ACCESS
);
1178 if (IS_ERR(acl
) || !acl
)
1179 return (PTR_ERR(acl
));
1181 error
= __posix_acl_chmod(&acl
, GFP_KERNEL
, ip
->i_mode
);
1183 error
= zpl_set_acl_impl(ip
, acl
, ACL_TYPE_ACCESS
);
1185 zpl_posix_acl_release(acl
);
1191 __zpl_xattr_acl_list_access(struct inode
*ip
, char *list
, size_t list_size
,
1192 const char *name
, size_t name_len
)
1194 char *xattr_name
= XATTR_NAME_POSIX_ACL_ACCESS
;
1195 size_t xattr_size
= sizeof (XATTR_NAME_POSIX_ACL_ACCESS
);
1197 if (ITOZSB(ip
)->z_acl_type
!= ZFS_ACLTYPE_POSIX
)
1200 if (list
&& xattr_size
<= list_size
)
1201 memcpy(list
, xattr_name
, xattr_size
);
1203 return (xattr_size
);
1205 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_access
);
1208 __zpl_xattr_acl_list_default(struct inode
*ip
, char *list
, size_t list_size
,
1209 const char *name
, size_t name_len
)
1211 char *xattr_name
= XATTR_NAME_POSIX_ACL_DEFAULT
;
1212 size_t xattr_size
= sizeof (XATTR_NAME_POSIX_ACL_DEFAULT
);
1214 if (ITOZSB(ip
)->z_acl_type
!= ZFS_ACLTYPE_POSIX
)
1217 if (list
&& xattr_size
<= list_size
)
1218 memcpy(list
, xattr_name
, xattr_size
);
1220 return (xattr_size
);
1222 ZPL_XATTR_LIST_WRAPPER(zpl_xattr_acl_list_default
);
1225 __zpl_xattr_acl_get_access(struct inode
*ip
, const char *name
,
1226 void *buffer
, size_t size
)
1228 struct posix_acl
*acl
;
1229 int type
= ACL_TYPE_ACCESS
;
1231 /* xattr_resolve_name will do this for us if this is defined */
1232 if (ITOZSB(ip
)->z_acl_type
!= ZFS_ACLTYPE_POSIX
)
1233 return (-EOPNOTSUPP
);
1235 acl
= zpl_get_acl_impl(ip
, type
);
1237 return (PTR_ERR(acl
));
1241 error
= zpl_acl_to_xattr(acl
, buffer
, size
);
1242 zpl_posix_acl_release(acl
);
1246 ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_access
);
1249 __zpl_xattr_acl_get_default(struct inode
*ip
, const char *name
,
1250 void *buffer
, size_t size
)
1252 struct posix_acl
*acl
;
1253 int type
= ACL_TYPE_DEFAULT
;
1255 /* xattr_resolve_name will do this for us if this is defined */
1256 if (ITOZSB(ip
)->z_acl_type
!= ZFS_ACLTYPE_POSIX
)
1257 return (-EOPNOTSUPP
);
1259 acl
= zpl_get_acl_impl(ip
, type
);
1261 return (PTR_ERR(acl
));
1265 error
= zpl_acl_to_xattr(acl
, buffer
, size
);
1266 zpl_posix_acl_release(acl
);
1270 ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_default
);
1273 __zpl_xattr_acl_set_access(zidmap_t
*mnt_ns
,
1274 struct inode
*ip
, const char *name
,
1275 const void *value
, size_t size
, int flags
)
1277 struct posix_acl
*acl
;
1278 int type
= ACL_TYPE_ACCESS
;
1280 /* xattr_resolve_name will do this for us if this is defined */
1281 if (ITOZSB(ip
)->z_acl_type
!= ZFS_ACLTYPE_POSIX
)
1282 return (-EOPNOTSUPP
);
1284 #if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP)
1285 if (!zpl_inode_owner_or_capable(mnt_ns
, ip
))
1289 if (!zpl_inode_owner_or_capable(zfs_init_idmap
, ip
))
1294 acl
= zpl_acl_from_xattr(value
, size
);
1296 return (PTR_ERR(acl
));
1298 error
= posix_acl_valid(ip
->i_sb
->s_user_ns
, acl
);
1300 zpl_posix_acl_release(acl
);
1307 error
= zpl_set_acl_impl(ip
, acl
, type
);
1308 zpl_posix_acl_release(acl
);
1312 ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_access
);
1315 __zpl_xattr_acl_set_default(zidmap_t
*mnt_ns
,
1316 struct inode
*ip
, const char *name
,
1317 const void *value
, size_t size
, int flags
)
1319 struct posix_acl
*acl
;
1320 int type
= ACL_TYPE_DEFAULT
;
1322 /* xattr_resolve_name will do this for us if this is defined */
1323 if (ITOZSB(ip
)->z_acl_type
!= ZFS_ACLTYPE_POSIX
)
1324 return (-EOPNOTSUPP
);
1326 #if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP)
1327 if (!zpl_inode_owner_or_capable(mnt_ns
, ip
))
1331 if (!zpl_inode_owner_or_capable(zfs_init_idmap
, ip
))
1336 acl
= zpl_acl_from_xattr(value
, size
);
1338 return (PTR_ERR(acl
));
1340 error
= posix_acl_valid(ip
->i_sb
->s_user_ns
, acl
);
1342 zpl_posix_acl_release(acl
);
1350 error
= zpl_set_acl_impl(ip
, acl
, type
);
1351 zpl_posix_acl_release(acl
);
1355 ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_default
);
1358 * ACL access xattr namespace handlers.
1360 * Use .name instead of .prefix when available. xattr_resolve_name will match
1361 * whole name and reject anything that has .name only as prefix.
1363 static xattr_handler_t zpl_xattr_acl_access_handler
= {
1364 .name
= XATTR_NAME_POSIX_ACL_ACCESS
,
1365 .list
= zpl_xattr_acl_list_access
,
1366 .get
= zpl_xattr_acl_get_access
,
1367 .set
= zpl_xattr_acl_set_access
,
1368 .flags
= ACL_TYPE_ACCESS
,
1372 * ACL default xattr namespace handlers.
1374 * Use .name instead of .prefix. xattr_resolve_name will match whole name and
1375 * reject anything that has .name only as prefix.
1377 static xattr_handler_t zpl_xattr_acl_default_handler
= {
1378 .name
= XATTR_NAME_POSIX_ACL_DEFAULT
,
1379 .list
= zpl_xattr_acl_list_default
,
1380 .get
= zpl_xattr_acl_get_default
,
1381 .set
= zpl_xattr_acl_set_default
,
1382 .flags
= ACL_TYPE_DEFAULT
,
1385 #endif /* CONFIG_FS_POSIX_ACL */
1387 xattr_handler_t
*zpl_xattr_handlers
[] = {
1388 &zpl_xattr_security_handler
,
1389 &zpl_xattr_trusted_handler
,
1390 &zpl_xattr_user_handler
,
1391 #ifdef CONFIG_FS_POSIX_ACL
1392 &zpl_xattr_acl_access_handler
,
1393 &zpl_xattr_acl_default_handler
,
1394 #endif /* CONFIG_FS_POSIX_ACL */
1398 static const struct xattr_handler
*
1399 zpl_xattr_handler(const char *name
)
1401 if (strncmp(name
, XATTR_USER_PREFIX
,
1402 XATTR_USER_PREFIX_LEN
) == 0)
1403 return (&zpl_xattr_user_handler
);
1405 if (strncmp(name
, XATTR_TRUSTED_PREFIX
,
1406 XATTR_TRUSTED_PREFIX_LEN
) == 0)
1407 return (&zpl_xattr_trusted_handler
);
1409 if (strncmp(name
, XATTR_SECURITY_PREFIX
,
1410 XATTR_SECURITY_PREFIX_LEN
) == 0)
1411 return (&zpl_xattr_security_handler
);
1413 #ifdef CONFIG_FS_POSIX_ACL
1414 if (strncmp(name
, XATTR_NAME_POSIX_ACL_ACCESS
,
1415 sizeof (XATTR_NAME_POSIX_ACL_ACCESS
)) == 0)
1416 return (&zpl_xattr_acl_access_handler
);
1418 if (strncmp(name
, XATTR_NAME_POSIX_ACL_DEFAULT
,
1419 sizeof (XATTR_NAME_POSIX_ACL_DEFAULT
)) == 0)
1420 return (&zpl_xattr_acl_default_handler
);
1421 #endif /* CONFIG_FS_POSIX_ACL */
1426 static enum xattr_permission
1427 zpl_xattr_permission(xattr_filldir_t
*xf
, const char *name
, int name_len
)
1429 const struct xattr_handler
*handler
;
1430 struct dentry
*d __maybe_unused
= xf
->dentry
;
1431 enum xattr_permission perm
= XAPERM_ALLOW
;
1433 handler
= zpl_xattr_handler(name
);
1434 if (handler
== NULL
) {
1435 /* Do not expose FreeBSD system namespace xattrs. */
1436 if (ZFS_XA_NS_PREFIX_MATCH(FREEBSD
, name
))
1437 return (XAPERM_DENY
);
1439 * Anything that doesn't match a known namespace gets put in the
1440 * user namespace for compatibility with other platforms.
1442 perm
= XAPERM_COMPAT
;
1443 handler
= &zpl_xattr_user_handler
;
1446 if (handler
->list
) {
1447 if (!handler
->list(d
))
1448 return (XAPERM_DENY
);
1454 #ifdef CONFIG_FS_POSIX_ACL
1456 struct acl_rel_struct
{
1457 struct acl_rel_struct
*next
;
1458 struct posix_acl
*acl
;
1462 #define ACL_REL_GRACE (60*HZ)
1463 #define ACL_REL_WINDOW (1*HZ)
1464 #define ACL_REL_SCHED (ACL_REL_GRACE+ACL_REL_WINDOW)
1467 * Lockless multi-producer single-consumer fifo list.
1468 * Nodes are added to tail and removed from head. Tail pointer is our
1469 * synchronization point. It always points to the next pointer of the last
1470 * node, or head if list is empty.
1472 static struct acl_rel_struct
*acl_rel_head
= NULL
;
1473 static struct acl_rel_struct
**acl_rel_tail
= &acl_rel_head
;
1476 zpl_posix_acl_free(void *arg
)
1478 struct acl_rel_struct
*freelist
= NULL
;
1479 struct acl_rel_struct
*a
;
1481 boolean_t refire
= B_FALSE
;
1483 ASSERT3P(acl_rel_head
, !=, NULL
);
1484 while (acl_rel_head
) {
1486 if (ddi_get_lbolt() - a
->time
>= ACL_REL_GRACE
) {
1488 * If a is the last node we need to reset tail, but we
1489 * need to use cmpxchg to make sure it is still the
1492 if (acl_rel_tail
== &a
->next
) {
1493 acl_rel_head
= NULL
;
1494 if (cmpxchg(&acl_rel_tail
, &a
->next
,
1495 &acl_rel_head
) == &a
->next
) {
1496 ASSERT3P(a
->next
, ==, NULL
);
1503 * a is not last node, make sure next pointer is set
1504 * by the adder and advance the head.
1506 while (READ_ONCE(a
->next
) == NULL
)
1508 acl_rel_head
= a
->next
;
1513 * a is still in grace period. We are responsible to
1514 * reschedule the free task, since adder will only do
1515 * so if list is empty.
1517 new_time
= a
->time
+ ACL_REL_SCHED
;
1524 taskq_dispatch_delay(system_delay_taskq
, zpl_posix_acl_free
,
1525 NULL
, TQ_SLEEP
, new_time
);
1531 kmem_free(a
, sizeof (struct acl_rel_struct
));
1536 zpl_posix_acl_release_impl(struct posix_acl
*acl
)
1538 struct acl_rel_struct
*a
, **prev
;
1540 a
= kmem_alloc(sizeof (struct acl_rel_struct
), KM_SLEEP
);
1543 a
->time
= ddi_get_lbolt();
1544 /* atomically points tail to us and get the previous tail */
1545 prev
= xchg(&acl_rel_tail
, &a
->next
);
1546 ASSERT3P(*prev
, ==, NULL
);
1548 /* if it was empty before, schedule the free task */
1549 if (prev
== &acl_rel_head
)
1550 taskq_dispatch_delay(system_delay_taskq
, zpl_posix_acl_free
,
1551 NULL
, TQ_SLEEP
, ddi_get_lbolt() + ACL_REL_SCHED
);
1555 ZFS_MODULE_PARAM(zfs
, zfs_
, xattr_compat
, INT
, ZMOD_RW
,
1556 "Use legacy ZFS xattr naming for writing new user namespace xattrs");