1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 * Copyright (c) 2022-2024 Oracle.
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_shared.h"
12 #include "xfs_trans_resv.h"
13 #include "xfs_mount.h"
14 #include "xfs_bmap_btree.h"
15 #include "xfs_inode.h"
16 #include "xfs_error.h"
17 #include "xfs_trace.h"
18 #include "xfs_trans.h"
19 #include "xfs_da_format.h"
20 #include "xfs_da_btree.h"
22 #include "xfs_ioctl.h"
23 #include "xfs_parent.h"
24 #include "xfs_handle.h"
25 #include "xfs_health.h"
26 #include "xfs_icache.h"
27 #include "xfs_export.h"
28 #include "xfs_xattr.h"
31 #include <linux/namei.h>
34 xfs_filehandle_fid_len(void)
36 struct xfs_handle
*handle
= NULL
;
38 return sizeof(struct xfs_fid
) - sizeof(handle
->ha_fid
.fid_len
);
46 struct xfs_handle
*handle
)
48 memcpy(&handle
->ha_fsid
, mp
->m_fixedfsid
, sizeof(struct xfs_fsid
));
50 handle
->ha_fid
.fid_len
= xfs_filehandle_fid_len();
51 handle
->ha_fid
.fid_pad
= 0;
52 handle
->ha_fid
.fid_gen
= gen
;
53 handle
->ha_fid
.fid_ino
= ino
;
55 return sizeof(struct xfs_handle
);
61 struct xfs_handle
*handle
)
63 memcpy(&handle
->ha_fsid
, mp
->m_fixedfsid
, sizeof(struct xfs_fsid
));
64 memset(&handle
->ha_fid
, 0, sizeof(handle
->ha_fid
));
66 return sizeof(struct xfs_fsid
);
70 * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
71 * a file or fs handle.
73 * XFS_IOC_PATH_TO_FSHANDLE
74 * returns fs handle for a mount point or path within that mount point
75 * XFS_IOC_FD_TO_HANDLE
76 * returns full handle for a FD opened in user space
77 * XFS_IOC_PATH_TO_HANDLE
78 * returns full handle for a path
83 xfs_fsop_handlereq_t
*hreq
)
92 if (cmd
== XFS_IOC_FD_TO_HANDLE
) {
93 CLASS(fd
, f
)(hreq
->fd
);
97 path
= fd_file(f
)->f_path
;
100 error
= user_path_at(AT_FDCWD
, hreq
->path
, 0, &path
);
104 inode
= d_inode(path
.dentry
);
108 * We can only generate handles for inodes residing on a XFS filesystem,
109 * and only for regular files, directories or symbolic links.
112 if (inode
->i_sb
->s_magic
!= XFS_SB_MAGIC
)
116 if (!S_ISREG(inode
->i_mode
) &&
117 !S_ISDIR(inode
->i_mode
) &&
118 !S_ISLNK(inode
->i_mode
))
122 memcpy(&handle
.ha_fsid
, ip
->i_mount
->m_fixedfsid
, sizeof(xfs_fsid_t
));
124 if (cmd
== XFS_IOC_PATH_TO_FSHANDLE
)
125 hsize
= xfs_fshandle_init(ip
->i_mount
, &handle
);
127 hsize
= xfs_filehandle_init(ip
->i_mount
, ip
->i_ino
,
128 inode
->i_generation
, &handle
);
131 if (copy_to_user(hreq
->ohandle
, &handle
, hsize
) ||
132 copy_to_user(hreq
->ohandlen
, &hsize
, sizeof(__s32
)))
143 * No need to do permission checks on the various pathname components
144 * as the handle operations are privileged.
147 xfs_handle_acceptable(
149 struct dentry
*dentry
)
154 /* Convert handle already copied to kernel space into a dentry. */
155 static struct dentry
*
156 xfs_khandle_to_dentry(
158 struct xfs_handle
*handle
)
160 struct xfs_fid64 fid
= {
161 .ino
= handle
->ha_fid
.fid_ino
,
162 .gen
= handle
->ha_fid
.fid_gen
,
166 * Only allow handle opens under a directory.
168 if (!S_ISDIR(file_inode(file
)->i_mode
))
169 return ERR_PTR(-ENOTDIR
);
171 if (handle
->ha_fid
.fid_len
!= xfs_filehandle_fid_len())
172 return ERR_PTR(-EINVAL
);
174 return exportfs_decode_fh(file
->f_path
.mnt
, (struct fid
*)&fid
, 3,
175 FILEID_INO32_GEN
| XFS_FILEID_TYPE_64FLAG
,
176 xfs_handle_acceptable
, NULL
);
179 /* Convert handle already copied to kernel space into an xfs_inode. */
180 static struct xfs_inode
*
181 xfs_khandle_to_inode(
183 struct xfs_handle
*handle
)
185 struct xfs_inode
*ip
= XFS_I(file_inode(file
));
186 struct xfs_mount
*mp
= ip
->i_mount
;
189 if (!S_ISDIR(VFS_I(ip
)->i_mode
))
190 return ERR_PTR(-ENOTDIR
);
192 if (handle
->ha_fid
.fid_len
!= xfs_filehandle_fid_len())
193 return ERR_PTR(-EINVAL
);
195 inode
= xfs_nfs_get_inode(mp
->m_super
, handle
->ha_fid
.fid_ino
,
196 handle
->ha_fid
.fid_gen
);
198 return ERR_CAST(inode
);
204 * Convert userspace handle data into a dentry.
207 xfs_handle_to_dentry(
208 struct file
*parfilp
,
209 void __user
*uhandle
,
214 if (hlen
!= sizeof(xfs_handle_t
))
215 return ERR_PTR(-EINVAL
);
216 if (copy_from_user(&handle
, uhandle
, hlen
))
217 return ERR_PTR(-EFAULT
);
219 return xfs_khandle_to_dentry(parfilp
, &handle
);
222 STATIC
struct dentry
*
223 xfs_handlereq_to_dentry(
224 struct file
*parfilp
,
225 xfs_fsop_handlereq_t
*hreq
)
227 return xfs_handle_to_dentry(parfilp
, hreq
->ihandle
, hreq
->ihandlen
);
232 struct file
*parfilp
,
233 xfs_fsop_handlereq_t
*hreq
)
235 const struct cred
*cred
= current_cred();
241 struct dentry
*dentry
;
245 if (!capable(CAP_SYS_ADMIN
))
248 dentry
= xfs_handlereq_to_dentry(parfilp
, hreq
);
250 return PTR_ERR(dentry
);
251 inode
= d_inode(dentry
);
253 /* Restrict xfs_open_by_handle to directories & regular files. */
254 if (!(S_ISREG(inode
->i_mode
) || S_ISDIR(inode
->i_mode
))) {
259 #if BITS_PER_LONG != 32
260 hreq
->oflags
|= O_LARGEFILE
;
263 permflag
= hreq
->oflags
;
264 fmode
= OPEN_FMODE(permflag
);
265 if ((!(permflag
& O_APPEND
) || (permflag
& O_TRUNC
)) &&
266 (fmode
& FMODE_WRITE
) && IS_APPEND(inode
)) {
271 if ((fmode
& FMODE_WRITE
) && IS_IMMUTABLE(inode
)) {
276 /* Can't write directories. */
277 if (S_ISDIR(inode
->i_mode
) && (fmode
& FMODE_WRITE
)) {
282 fd
= get_unused_fd_flags(0);
288 path
.mnt
= parfilp
->f_path
.mnt
;
289 path
.dentry
= dentry
;
290 filp
= dentry_open(&path
, hreq
->oflags
, cred
);
294 return PTR_ERR(filp
);
297 if (S_ISREG(inode
->i_mode
)) {
298 filp
->f_flags
|= O_NOATIME
;
299 filp
->f_mode
|= FMODE_NOCMTIME
;
302 fd_install(fd
, filp
);
311 xfs_readlink_by_handle(
312 struct file
*parfilp
,
313 xfs_fsop_handlereq_t
*hreq
)
315 struct dentry
*dentry
;
319 if (!capable(CAP_SYS_ADMIN
))
322 dentry
= xfs_handlereq_to_dentry(parfilp
, hreq
);
324 return PTR_ERR(dentry
);
326 /* Restrict this handle operation to symlinks only. */
327 if (!d_is_symlink(dentry
)) {
332 if (copy_from_user(&olen
, hreq
->ohandlen
, sizeof(__u32
))) {
337 error
= vfs_readlink(dentry
, hreq
->ohandle
, olen
);
345 * Format an attribute and copy it out to the user's buffer.
346 * Take care to check values and protect against them changing later,
347 * we may be reading them directly out of a user buffer.
350 xfs_ioc_attr_put_listent(
351 struct xfs_attr_list_context
*context
,
358 struct xfs_attrlist
*alist
= context
->buffer
;
359 struct xfs_attrlist_ent
*aep
;
362 ASSERT(!context
->seen_enough
);
363 ASSERT(context
->count
>= 0);
364 ASSERT(context
->count
< (ATTR_MAX_VALUELEN
/8));
365 ASSERT(context
->firstu
>= sizeof(*alist
));
366 ASSERT(context
->firstu
<= context
->bufsize
);
369 * Only list entries in the right namespace.
371 if (context
->attr_filter
!= (flags
& XFS_ATTR_NSP_ONDISK_MASK
))
374 arraytop
= sizeof(*alist
) +
375 context
->count
* sizeof(alist
->al_offset
[0]);
377 /* decrement by the actual bytes used by the attr */
378 context
->firstu
-= round_up(offsetof(struct xfs_attrlist_ent
, a_name
) +
379 namelen
+ 1, sizeof(uint32_t));
380 if (context
->firstu
< arraytop
) {
381 trace_xfs_attr_list_full(context
);
383 context
->seen_enough
= 1;
387 aep
= context
->buffer
+ context
->firstu
;
388 aep
->a_valuelen
= valuelen
;
389 memcpy(aep
->a_name
, name
, namelen
);
390 aep
->a_name
[namelen
] = 0;
391 alist
->al_offset
[context
->count
++] = context
->firstu
;
392 alist
->al_count
= context
->count
;
393 trace_xfs_attr_list_add(context
);
400 if (ioc_flags
& XFS_IOC_ATTR_ROOT
)
401 return XFS_ATTR_ROOT
;
402 if (ioc_flags
& XFS_IOC_ATTR_SECURE
)
403 return XFS_ATTR_SECURE
;
407 static inline enum xfs_attr_update
413 return XFS_ATTRUPDATE_REMOVE
;
414 if (ioc_flags
& XFS_IOC_ATTR_CREATE
)
415 return XFS_ATTRUPDATE_CREATE
;
416 if (ioc_flags
& XFS_IOC_ATTR_REPLACE
)
417 return XFS_ATTRUPDATE_REPLACE
;
418 return XFS_ATTRUPDATE_UPSERT
;
423 struct xfs_inode
*dp
,
427 struct xfs_attrlist_cursor __user
*ucursor
)
429 struct xfs_attr_list_context context
= { };
430 struct xfs_attrlist
*alist
;
434 if (bufsize
< sizeof(struct xfs_attrlist
) ||
435 bufsize
> XFS_XATTR_LIST_MAX
)
439 * Reject flags, only allow namespaces.
441 if (flags
& ~(XFS_IOC_ATTR_ROOT
| XFS_IOC_ATTR_SECURE
))
443 if (flags
== (XFS_IOC_ATTR_ROOT
| XFS_IOC_ATTR_SECURE
))
447 * Validate the cursor.
449 if (copy_from_user(&context
.cursor
, ucursor
, sizeof(context
.cursor
)))
451 if (context
.cursor
.pad1
|| context
.cursor
.pad2
)
453 if (!context
.cursor
.initted
&&
454 (context
.cursor
.hashval
|| context
.cursor
.blkno
||
455 context
.cursor
.offset
))
458 buffer
= kvzalloc(bufsize
, GFP_KERNEL
);
463 * Initialize the output buffer.
467 context
.attr_filter
= xfs_attr_filter(flags
);
468 context
.buffer
= buffer
;
469 context
.bufsize
= round_down(bufsize
, sizeof(uint32_t));
470 context
.firstu
= context
.bufsize
;
471 context
.put_listent
= xfs_ioc_attr_put_listent
;
473 alist
= context
.buffer
;
476 alist
->al_offset
[0] = context
.bufsize
;
478 error
= xfs_attr_list(&context
);
482 if (copy_to_user(ubuf
, buffer
, bufsize
) ||
483 copy_to_user(ucursor
, &context
.cursor
, sizeof(context
.cursor
)))
491 xfs_attrlist_by_handle(
492 struct file
*parfilp
,
493 struct xfs_fsop_attrlist_handlereq __user
*p
)
495 struct xfs_fsop_attrlist_handlereq al_hreq
;
496 struct dentry
*dentry
;
499 if (!capable(CAP_SYS_ADMIN
))
501 if (copy_from_user(&al_hreq
, p
, sizeof(al_hreq
)))
504 dentry
= xfs_handlereq_to_dentry(parfilp
, &al_hreq
.hreq
);
506 return PTR_ERR(dentry
);
508 error
= xfs_ioc_attr_list(XFS_I(d_inode(dentry
)), al_hreq
.buffer
,
509 al_hreq
.buflen
, al_hreq
.flags
, &p
->pos
);
515 xfs_attrmulti_attr_get(
518 unsigned char __user
*ubuf
,
522 struct xfs_da_args args
= {
524 .attr_filter
= xfs_attr_filter(flags
),
526 .namelen
= strlen(name
),
531 if (*len
> XFS_XATTR_SIZE_MAX
)
534 error
= xfs_attr_get(&args
);
538 *len
= args
.valuelen
;
539 if (copy_to_user(ubuf
, args
.value
, args
.valuelen
))
548 xfs_attrmulti_attr_set(
551 const unsigned char __user
*ubuf
,
555 struct xfs_da_args args
= {
557 .attr_filter
= xfs_attr_filter(flags
),
559 .namelen
= strlen(name
),
563 if (IS_IMMUTABLE(inode
) || IS_APPEND(inode
))
567 if (len
> XFS_XATTR_SIZE_MAX
)
569 args
.value
= memdup_user(ubuf
, len
);
570 if (IS_ERR(args
.value
))
571 return PTR_ERR(args
.value
);
575 error
= xfs_attr_change(&args
, xfs_xattr_flags(flags
, args
.value
));
576 if (!error
&& (flags
& XFS_IOC_ATTR_ROOT
))
577 xfs_forget_acl(inode
, name
);
583 xfs_ioc_attrmulti_one(
584 struct file
*parfilp
,
595 if ((flags
& XFS_IOC_ATTR_ROOT
) && (flags
& XFS_IOC_ATTR_SECURE
))
598 name
= strndup_user(uname
, MAXNAMELEN
);
600 return PTR_ERR(name
);
604 error
= xfs_attrmulti_attr_get(inode
, name
, value
, len
, flags
);
611 error
= mnt_want_write_file(parfilp
);
614 error
= xfs_attrmulti_attr_set(inode
, name
, value
, *len
, flags
);
615 mnt_drop_write_file(parfilp
);
627 xfs_attrmulti_by_handle(
628 struct file
*parfilp
,
632 xfs_attr_multiop_t
*ops
;
633 xfs_fsop_attrmulti_handlereq_t am_hreq
;
634 struct dentry
*dentry
;
635 unsigned int i
, size
;
637 if (!capable(CAP_SYS_ADMIN
))
639 if (copy_from_user(&am_hreq
, arg
, sizeof(xfs_fsop_attrmulti_handlereq_t
)))
643 if (am_hreq
.opcount
>= INT_MAX
/ sizeof(xfs_attr_multiop_t
))
646 dentry
= xfs_handlereq_to_dentry(parfilp
, &am_hreq
.hreq
);
648 return PTR_ERR(dentry
);
651 size
= am_hreq
.opcount
* sizeof(xfs_attr_multiop_t
);
652 if (!size
|| size
> 16 * PAGE_SIZE
)
655 ops
= memdup_user(am_hreq
.ops
, size
);
657 error
= PTR_ERR(ops
);
662 for (i
= 0; i
< am_hreq
.opcount
; i
++) {
663 ops
[i
].am_error
= xfs_ioc_attrmulti_one(parfilp
,
664 d_inode(dentry
), ops
[i
].am_opcode
,
665 ops
[i
].am_attrname
, ops
[i
].am_attrvalue
,
666 &ops
[i
].am_length
, ops
[i
].am_flags
);
669 if (copy_to_user(am_hreq
.ops
, ops
, size
))
678 struct xfs_getparents_ctx
{
679 struct xfs_attr_list_context context
;
680 struct xfs_getparents_by_handle gph
;
683 struct xfs_inode
*ip
;
685 /* Internal buffer where we format records */
688 /* Last record filled out */
689 struct xfs_getparents_rec
*lastrec
;
694 static inline unsigned int
695 xfs_getparents_rec_sizeof(
696 unsigned int namelen
)
698 return round_up(sizeof(struct xfs_getparents_rec
) + namelen
+ 1,
703 xfs_getparents_put_listent(
704 struct xfs_attr_list_context
*context
,
711 struct xfs_getparents_ctx
*gpx
=
712 container_of(context
, struct xfs_getparents_ctx
, context
);
713 struct xfs_inode
*ip
= context
->dp
;
714 struct xfs_mount
*mp
= ip
->i_mount
;
715 struct xfs_getparents
*gp
= &gpx
->gph
.gph_request
;
716 struct xfs_getparents_rec
*gpr
= gpx
->krecords
+ context
->firstu
;
717 unsigned short reclen
=
718 xfs_getparents_rec_sizeof(namelen
);
723 if (!(flags
& XFS_ATTR_PARENT
))
726 error
= xfs_parent_from_attr(mp
, flags
, name
, namelen
, value
, valuelen
,
729 xfs_inode_mark_sick(ip
, XFS_SICK_INO_PARENT
);
730 context
->seen_enough
= -EFSCORRUPTED
;
735 * We found a parent pointer, but we've filled up the buffer. Signal
736 * to the caller that we did /not/ reach the end of the parent pointer
739 if (context
->firstu
> context
->bufsize
- reclen
) {
740 context
->seen_enough
= 1;
744 /* Format the parent pointer directly into the caller buffer. */
745 gpr
->gpr_reclen
= reclen
;
746 xfs_filehandle_init(mp
, ino
, gen
, &gpr
->gpr_parent
);
747 memcpy(gpr
->gpr_name
, name
, namelen
);
748 gpr
->gpr_name
[namelen
] = 0;
750 trace_xfs_getparents_put_listent(ip
, gp
, context
, gpr
);
752 context
->firstu
+= reclen
;
757 /* Expand the last record to fill the rest of the caller's buffer. */
759 xfs_getparents_expand_lastrec(
760 struct xfs_getparents_ctx
*gpx
)
762 struct xfs_getparents
*gp
= &gpx
->gph
.gph_request
;
763 struct xfs_getparents_rec
*gpr
= gpx
->lastrec
;
768 gpr
->gpr_reclen
= gp
->gp_bufsize
- ((void *)gpr
- gpx
->krecords
);
770 trace_xfs_getparents_expand_lastrec(gpx
->ip
, gp
, &gpx
->context
, gpr
);
773 /* Retrieve the parent pointers for a given inode. */
776 struct xfs_getparents_ctx
*gpx
)
778 struct xfs_getparents
*gp
= &gpx
->gph
.gph_request
;
779 struct xfs_inode
*ip
= gpx
->ip
;
780 struct xfs_mount
*mp
= ip
->i_mount
;
784 /* Check size of buffer requested by user */
785 if (gp
->gp_bufsize
> XFS_XATTR_LIST_MAX
)
787 if (gp
->gp_bufsize
< xfs_getparents_rec_sizeof(1))
790 if (gp
->gp_iflags
& ~XFS_GETPARENTS_IFLAGS_ALL
)
795 bufsize
= round_down(gp
->gp_bufsize
, sizeof(uint64_t));
796 gpx
->krecords
= kvzalloc(bufsize
, GFP_KERNEL
);
797 if (!gpx
->krecords
) {
798 bufsize
= min(bufsize
, PAGE_SIZE
);
799 gpx
->krecords
= kvzalloc(bufsize
, GFP_KERNEL
);
804 gpx
->context
.dp
= ip
;
805 gpx
->context
.resynch
= 1;
806 gpx
->context
.put_listent
= xfs_getparents_put_listent
;
807 gpx
->context
.bufsize
= bufsize
;
808 /* firstu is used to track the bytes filled in the buffer */
809 gpx
->context
.firstu
= 0;
811 /* Copy the cursor provided by caller */
812 memcpy(&gpx
->context
.cursor
, &gp
->gp_cursor
,
813 sizeof(struct xfs_attrlist_cursor
));
817 trace_xfs_getparents_begin(ip
, gp
, &gpx
->context
.cursor
);
819 error
= xfs_attr_list(&gpx
->context
);
822 if (gpx
->context
.seen_enough
< 0) {
823 error
= gpx
->context
.seen_enough
;
826 xfs_getparents_expand_lastrec(gpx
);
828 /* Update the caller with the current cursor position */
829 memcpy(&gp
->gp_cursor
, &gpx
->context
.cursor
,
830 sizeof(struct xfs_attrlist_cursor
));
832 /* Is this the root directory? */
833 if (ip
->i_ino
== mp
->m_sb
.sb_rootino
)
834 gp
->gp_oflags
|= XFS_GETPARENTS_OFLAG_ROOT
;
836 if (gpx
->context
.seen_enough
== 0) {
838 * If we did not run out of buffer space, then we reached the
839 * end of the pptr recordset, so set the DONE flag.
841 gp
->gp_oflags
|= XFS_GETPARENTS_OFLAG_DONE
;
842 } else if (gpx
->count
== 0) {
844 * If we ran out of buffer space before copying any parent
845 * pointers at all, the caller's buffer was too short. Tell
846 * userspace that, erm, the message is too long.
852 trace_xfs_getparents_end(ip
, gp
, &gpx
->context
.cursor
);
854 ASSERT(gpx
->context
.firstu
<= gpx
->gph
.gph_request
.gp_bufsize
);
856 /* Copy the records to userspace. */
857 if (copy_to_user(u64_to_user_ptr(gpx
->gph
.gph_request
.gp_buffer
),
858 gpx
->krecords
, gpx
->context
.firstu
))
862 kvfree(gpx
->krecords
);
863 gpx
->krecords
= NULL
;
867 /* Retrieve the parents of this file and pass them back to userspace. */
871 struct xfs_getparents __user
*ureq
)
873 struct xfs_getparents_ctx gpx
= {
874 .ip
= XFS_I(file_inode(file
)),
876 struct xfs_getparents
*kreq
= &gpx
.gph
.gph_request
;
877 struct xfs_mount
*mp
= gpx
.ip
->i_mount
;
880 if (!capable(CAP_SYS_ADMIN
))
882 if (!xfs_has_parent(mp
))
884 if (copy_from_user(kreq
, ureq
, sizeof(*kreq
)))
887 error
= xfs_getparents(&gpx
);
891 if (copy_to_user(ureq
, kreq
, sizeof(*kreq
)))
897 /* Retrieve the parents of this file handle and pass them back to userspace. */
899 xfs_ioc_getparents_by_handle(
901 struct xfs_getparents_by_handle __user
*ureq
)
903 struct xfs_getparents_ctx gpx
= { };
904 struct xfs_inode
*ip
= XFS_I(file_inode(file
));
905 struct xfs_mount
*mp
= ip
->i_mount
;
906 struct xfs_getparents_by_handle
*kreq
= &gpx
.gph
;
907 struct xfs_handle
*handle
= &kreq
->gph_handle
;
910 if (!capable(CAP_SYS_ADMIN
))
912 if (!xfs_has_parent(mp
))
914 if (copy_from_user(kreq
, ureq
, sizeof(*kreq
)))
918 * We don't use exportfs_decode_fh because it does too much work here.
919 * If the handle refers to a directory, the exportfs code will walk
920 * upwards through the directory tree to connect the dentries to the
921 * root directory dentry. For GETPARENTS we don't care about that
922 * because we're not actually going to open a file descriptor; we only
923 * want to open an inode and read its parent pointers.
925 * Note that xfs_scrub uses GETPARENTS to log that it will try to fix a
926 * corrupted file's metadata. For this usecase we would really rather
927 * userspace single-step the path reconstruction to avoid loops or
928 * other strange things if the directory tree is corrupt.
930 gpx
.ip
= xfs_khandle_to_inode(file
, handle
);
932 return PTR_ERR(gpx
.ip
);
934 error
= xfs_getparents(&gpx
);
938 if (copy_to_user(ureq
, kreq
, sizeof(*kreq
)))