1 // SPDX-License-Identifier: GPL-2.0
3 * (C) 2001 Clemson University and The University of Chicago
5 * See COPYING in top-level directory.
8 #include "orangefs-kernel.h"
9 #include "orangefs-dev-proto.h"
10 #include "orangefs-bufmap.h"
12 __s32
fsid_of_op(struct orangefs_kernel_op_s
*op
)
14 __s32 fsid
= ORANGEFS_FS_ID_NULL
;
17 switch (op
->upcall
.type
) {
18 case ORANGEFS_VFS_OP_FILE_IO
:
19 fsid
= op
->upcall
.req
.io
.refn
.fs_id
;
21 case ORANGEFS_VFS_OP_LOOKUP
:
22 fsid
= op
->upcall
.req
.lookup
.parent_refn
.fs_id
;
24 case ORANGEFS_VFS_OP_CREATE
:
25 fsid
= op
->upcall
.req
.create
.parent_refn
.fs_id
;
27 case ORANGEFS_VFS_OP_GETATTR
:
28 fsid
= op
->upcall
.req
.getattr
.refn
.fs_id
;
30 case ORANGEFS_VFS_OP_REMOVE
:
31 fsid
= op
->upcall
.req
.remove
.parent_refn
.fs_id
;
33 case ORANGEFS_VFS_OP_MKDIR
:
34 fsid
= op
->upcall
.req
.mkdir
.parent_refn
.fs_id
;
36 case ORANGEFS_VFS_OP_READDIR
:
37 fsid
= op
->upcall
.req
.readdir
.refn
.fs_id
;
39 case ORANGEFS_VFS_OP_SETATTR
:
40 fsid
= op
->upcall
.req
.setattr
.refn
.fs_id
;
42 case ORANGEFS_VFS_OP_SYMLINK
:
43 fsid
= op
->upcall
.req
.sym
.parent_refn
.fs_id
;
45 case ORANGEFS_VFS_OP_RENAME
:
46 fsid
= op
->upcall
.req
.rename
.old_parent_refn
.fs_id
;
48 case ORANGEFS_VFS_OP_STATFS
:
49 fsid
= op
->upcall
.req
.statfs
.fs_id
;
51 case ORANGEFS_VFS_OP_TRUNCATE
:
52 fsid
= op
->upcall
.req
.truncate
.refn
.fs_id
;
54 case ORANGEFS_VFS_OP_RA_FLUSH
:
55 fsid
= op
->upcall
.req
.ra_cache_flush
.refn
.fs_id
;
57 case ORANGEFS_VFS_OP_FS_UMOUNT
:
58 fsid
= op
->upcall
.req
.fs_umount
.fs_id
;
60 case ORANGEFS_VFS_OP_GETXATTR
:
61 fsid
= op
->upcall
.req
.getxattr
.refn
.fs_id
;
63 case ORANGEFS_VFS_OP_SETXATTR
:
64 fsid
= op
->upcall
.req
.setxattr
.refn
.fs_id
;
66 case ORANGEFS_VFS_OP_LISTXATTR
:
67 fsid
= op
->upcall
.req
.listxattr
.refn
.fs_id
;
69 case ORANGEFS_VFS_OP_REMOVEXATTR
:
70 fsid
= op
->upcall
.req
.removexattr
.refn
.fs_id
;
72 case ORANGEFS_VFS_OP_FSYNC
:
73 fsid
= op
->upcall
.req
.fsync
.refn
.fs_id
;
82 static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s
*attrs
)
85 if (attrs
->flags
& ORANGEFS_IMMUTABLE_FL
)
88 flags
&= ~S_IMMUTABLE
;
89 if (attrs
->flags
& ORANGEFS_APPEND_FL
)
93 if (attrs
->flags
& ORANGEFS_NOATIME_FL
)
100 static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s
*attrs
)
104 if (attrs
->perms
& ORANGEFS_O_EXECUTE
)
105 perm_mode
|= S_IXOTH
;
106 if (attrs
->perms
& ORANGEFS_O_WRITE
)
107 perm_mode
|= S_IWOTH
;
108 if (attrs
->perms
& ORANGEFS_O_READ
)
109 perm_mode
|= S_IROTH
;
111 if (attrs
->perms
& ORANGEFS_G_EXECUTE
)
112 perm_mode
|= S_IXGRP
;
113 if (attrs
->perms
& ORANGEFS_G_WRITE
)
114 perm_mode
|= S_IWGRP
;
115 if (attrs
->perms
& ORANGEFS_G_READ
)
116 perm_mode
|= S_IRGRP
;
118 if (attrs
->perms
& ORANGEFS_U_EXECUTE
)
119 perm_mode
|= S_IXUSR
;
120 if (attrs
->perms
& ORANGEFS_U_WRITE
)
121 perm_mode
|= S_IWUSR
;
122 if (attrs
->perms
& ORANGEFS_U_READ
)
123 perm_mode
|= S_IRUSR
;
125 if (attrs
->perms
& ORANGEFS_G_SGID
)
126 perm_mode
|= S_ISGID
;
127 if (attrs
->perms
& ORANGEFS_U_SUID
)
128 perm_mode
|= S_ISUID
;
134 * NOTE: in kernel land, we never use the sys_attr->link_target for
135 * anything, so don't bother copying it into the sys_attr object here.
137 static inline int copy_attributes_from_inode(struct inode
*inode
,
138 struct ORANGEFS_sys_attr_s
*attrs
,
143 if (!iattr
|| !inode
|| !attrs
) {
144 gossip_err("NULL iattr (%p), inode (%p), attrs (%p) "
145 "in copy_attributes_from_inode!\n",
152 * We need to be careful to only copy the attributes out of the
153 * iattr object that we know are valid.
156 if (iattr
->ia_valid
& ATTR_UID
) {
157 attrs
->owner
= from_kuid(&init_user_ns
, iattr
->ia_uid
);
158 attrs
->mask
|= ORANGEFS_ATTR_SYS_UID
;
159 gossip_debug(GOSSIP_UTILS_DEBUG
, "(UID) %d\n", attrs
->owner
);
161 if (iattr
->ia_valid
& ATTR_GID
) {
162 attrs
->group
= from_kgid(&init_user_ns
, iattr
->ia_gid
);
163 attrs
->mask
|= ORANGEFS_ATTR_SYS_GID
;
164 gossip_debug(GOSSIP_UTILS_DEBUG
, "(GID) %d\n", attrs
->group
);
167 if (iattr
->ia_valid
& ATTR_ATIME
) {
168 attrs
->mask
|= ORANGEFS_ATTR_SYS_ATIME
;
169 if (iattr
->ia_valid
& ATTR_ATIME_SET
) {
170 attrs
->atime
= (time64_t
)iattr
->ia_atime
.tv_sec
;
171 attrs
->mask
|= ORANGEFS_ATTR_SYS_ATIME_SET
;
174 if (iattr
->ia_valid
& ATTR_MTIME
) {
175 attrs
->mask
|= ORANGEFS_ATTR_SYS_MTIME
;
176 if (iattr
->ia_valid
& ATTR_MTIME_SET
) {
177 attrs
->mtime
= (time64_t
)iattr
->ia_mtime
.tv_sec
;
178 attrs
->mask
|= ORANGEFS_ATTR_SYS_MTIME_SET
;
181 if (iattr
->ia_valid
& ATTR_CTIME
)
182 attrs
->mask
|= ORANGEFS_ATTR_SYS_CTIME
;
185 * ORANGEFS cannot set size with a setattr operation. Probably not likely
186 * to be requested through the VFS, but just in case, don't worry about
190 if (iattr
->ia_valid
& ATTR_MODE
) {
191 tmp_mode
= iattr
->ia_mode
;
192 if (tmp_mode
& (S_ISVTX
)) {
193 if (is_root_handle(inode
)) {
195 * allow sticky bit to be set on root (since
196 * it shows up that way by default anyhow),
197 * but don't show it to the server
201 gossip_debug(GOSSIP_UTILS_DEBUG
,
202 "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
207 if (tmp_mode
& (S_ISUID
)) {
208 gossip_debug(GOSSIP_UTILS_DEBUG
,
209 "Attempting to set setuid bit (not supported); returning EINVAL.\n");
213 attrs
->perms
= ORANGEFS_util_translate_mode(tmp_mode
);
214 attrs
->mask
|= ORANGEFS_ATTR_SYS_PERM
;
220 static int orangefs_inode_type(enum orangefs_ds_type objtype
)
222 if (objtype
== ORANGEFS_TYPE_METAFILE
)
224 else if (objtype
== ORANGEFS_TYPE_DIRECTORY
)
226 else if (objtype
== ORANGEFS_TYPE_SYMLINK
)
232 static int orangefs_inode_is_stale(struct inode
*inode
, int new,
233 struct ORANGEFS_sys_attr_s
*attrs
, char *link_target
)
235 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
236 int type
= orangefs_inode_type(attrs
->objtype
);
239 * If the inode type or symlink target have changed then this
242 if (type
== -1 || !(inode
->i_mode
& type
)) {
243 orangefs_make_bad_inode(inode
);
246 if (type
== S_IFLNK
&& strncmp(orangefs_inode
->link_target
,
247 link_target
, ORANGEFS_NAME_MAX
)) {
248 orangefs_make_bad_inode(inode
);
255 int orangefs_inode_getattr(struct inode
*inode
, int new, int bypass
,
258 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
259 struct orangefs_kernel_op_s
*new_op
;
260 loff_t inode_size
, rounded_up_size
;
263 gossip_debug(GOSSIP_UTILS_DEBUG
, "%s: called on inode %pU\n", __func__
,
264 get_khandle_from_ino(inode
));
266 if (!new && !bypass
) {
268 * Must have all the attributes in the mask and be within cache
271 if ((request_mask
& orangefs_inode
->getattr_mask
) ==
273 time_before(jiffies
, orangefs_inode
->getattr_time
))
277 new_op
= op_alloc(ORANGEFS_VFS_OP_GETATTR
);
280 new_op
->upcall
.req
.getattr
.refn
= orangefs_inode
->refn
;
282 * Size is the hardest attribute to get. The incremental cost of any
283 * other attribute is essentially zero.
285 if (request_mask
& STATX_SIZE
|| new)
286 new_op
->upcall
.req
.getattr
.mask
= ORANGEFS_ATTR_SYS_ALL_NOHINT
;
288 new_op
->upcall
.req
.getattr
.mask
=
289 ORANGEFS_ATTR_SYS_ALL_NOHINT
& ~ORANGEFS_ATTR_SYS_SIZE
;
291 ret
= service_operation(new_op
, __func__
,
292 get_interruptible_flag(inode
));
296 type
= orangefs_inode_type(new_op
->
297 downcall
.resp
.getattr
.attributes
.objtype
);
298 ret
= orangefs_inode_is_stale(inode
, new,
299 &new_op
->downcall
.resp
.getattr
.attributes
,
300 new_op
->downcall
.resp
.getattr
.link_target
);
308 inode
->i_flags
= orangefs_inode_flags(&new_op
->
309 downcall
.resp
.getattr
.attributes
);
310 if (request_mask
& STATX_SIZE
|| new) {
311 inode_size
= (loff_t
)new_op
->
312 downcall
.resp
.getattr
.attributes
.size
;
314 (inode_size
+ (4096 - (inode_size
% 4096)));
315 inode
->i_size
= inode_size
;
316 orangefs_inode
->blksize
=
317 new_op
->downcall
.resp
.getattr
.attributes
.blksize
;
318 spin_lock(&inode
->i_lock
);
319 inode
->i_bytes
= inode_size
;
321 (unsigned long)(rounded_up_size
/ 512);
322 spin_unlock(&inode
->i_lock
);
326 if (request_mask
& STATX_SIZE
|| new) {
327 inode
->i_size
= PAGE_SIZE
;
328 orangefs_inode
->blksize
= i_blocksize(inode
);
329 spin_lock(&inode
->i_lock
);
330 inode_set_bytes(inode
, inode
->i_size
);
331 spin_unlock(&inode
->i_lock
);
337 inode
->i_size
= (loff_t
)strlen(new_op
->
338 downcall
.resp
.getattr
.link_target
);
339 orangefs_inode
->blksize
= i_blocksize(inode
);
340 ret
= strscpy(orangefs_inode
->link_target
,
341 new_op
->downcall
.resp
.getattr
.link_target
,
347 inode
->i_link
= orangefs_inode
->link_target
;
352 inode
->i_uid
= make_kuid(&init_user_ns
, new_op
->
353 downcall
.resp
.getattr
.attributes
.owner
);
354 inode
->i_gid
= make_kgid(&init_user_ns
, new_op
->
355 downcall
.resp
.getattr
.attributes
.group
);
356 inode
->i_atime
.tv_sec
= (time64_t
)new_op
->
357 downcall
.resp
.getattr
.attributes
.atime
;
358 inode
->i_mtime
.tv_sec
= (time64_t
)new_op
->
359 downcall
.resp
.getattr
.attributes
.mtime
;
360 inode
->i_ctime
.tv_sec
= (time64_t
)new_op
->
361 downcall
.resp
.getattr
.attributes
.ctime
;
362 inode
->i_atime
.tv_nsec
= 0;
363 inode
->i_mtime
.tv_nsec
= 0;
364 inode
->i_ctime
.tv_nsec
= 0;
366 /* special case: mark the root inode as sticky */
367 inode
->i_mode
= type
| (is_root_handle(inode
) ? S_ISVTX
: 0) |
368 orangefs_inode_perms(&new_op
->downcall
.resp
.getattr
.attributes
);
370 orangefs_inode
->getattr_time
= jiffies
+
371 orangefs_getattr_timeout_msecs
*HZ
/1000;
372 if (request_mask
& STATX_SIZE
|| new)
373 orangefs_inode
->getattr_mask
= STATX_BASIC_STATS
;
375 orangefs_inode
->getattr_mask
= STATX_BASIC_STATS
& ~STATX_SIZE
;
382 int orangefs_inode_check_changed(struct inode
*inode
)
384 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
385 struct orangefs_kernel_op_s
*new_op
;
388 gossip_debug(GOSSIP_UTILS_DEBUG
, "%s: called on inode %pU\n", __func__
,
389 get_khandle_from_ino(inode
));
391 new_op
= op_alloc(ORANGEFS_VFS_OP_GETATTR
);
394 new_op
->upcall
.req
.getattr
.refn
= orangefs_inode
->refn
;
395 new_op
->upcall
.req
.getattr
.mask
= ORANGEFS_ATTR_SYS_TYPE
|
396 ORANGEFS_ATTR_SYS_LNK_TARGET
;
398 ret
= service_operation(new_op
, __func__
,
399 get_interruptible_flag(inode
));
403 ret
= orangefs_inode_is_stale(inode
, 0,
404 &new_op
->downcall
.resp
.getattr
.attributes
,
405 new_op
->downcall
.resp
.getattr
.link_target
);
412 * issues a orangefs setattr request to make sure the new attribute values
413 * take effect if successful. returns 0 on success; -errno otherwise
415 int orangefs_inode_setattr(struct inode
*inode
, struct iattr
*iattr
)
417 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
418 struct orangefs_kernel_op_s
*new_op
;
421 new_op
= op_alloc(ORANGEFS_VFS_OP_SETATTR
);
425 new_op
->upcall
.req
.setattr
.refn
= orangefs_inode
->refn
;
426 ret
= copy_attributes_from_inode(inode
,
427 &new_op
->upcall
.req
.setattr
.attributes
,
430 ret
= service_operation(new_op
, __func__
,
431 get_interruptible_flag(inode
));
433 gossip_debug(GOSSIP_UTILS_DEBUG
,
434 "orangefs_inode_setattr: returning %d\n",
441 * successful setattr should clear the atime, mtime and
445 ClearAtimeFlag(orangefs_inode
);
446 ClearMtimeFlag(orangefs_inode
);
447 ClearCtimeFlag(orangefs_inode
);
448 ClearModeFlag(orangefs_inode
);
449 orangefs_inode
->getattr_time
= jiffies
- 1;
455 int orangefs_flush_inode(struct inode
*inode
)
458 * If it is a dirty inode, this function gets called.
459 * Gather all the information that needs to be setattr'ed
460 * Right now, this will only be used for mode, atime, mtime
469 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
471 memset(&wbattr
, 0, sizeof(wbattr
));
474 * check inode flags up front, and clear them if they are set. This
475 * will prevent multiple processes from all trying to flush the same
476 * inode if they call close() simultaneously
478 mtime_flag
= MtimeFlag(orangefs_inode
);
479 ClearMtimeFlag(orangefs_inode
);
480 ctime_flag
= CtimeFlag(orangefs_inode
);
481 ClearCtimeFlag(orangefs_inode
);
482 atime_flag
= AtimeFlag(orangefs_inode
);
483 ClearAtimeFlag(orangefs_inode
);
484 mode_flag
= ModeFlag(orangefs_inode
);
485 ClearModeFlag(orangefs_inode
);
487 /* -- Lazy atime,mtime and ctime update --
488 * Note: all times are dictated by server in the new scheme
489 * and not by the clients
491 * Also mode updates are being handled now..
495 wbattr
.ia_valid
|= ATTR_MTIME
;
497 wbattr
.ia_valid
|= ATTR_CTIME
;
499 wbattr
.ia_valid
|= ATTR_ATIME
;
502 wbattr
.ia_mode
= inode
->i_mode
;
503 wbattr
.ia_valid
|= ATTR_MODE
;
506 gossip_debug(GOSSIP_UTILS_DEBUG
,
507 "*********** orangefs_flush_inode: %pU "
509 get_khandle_from_ino(inode
),
511 if (wbattr
.ia_valid
== 0) {
512 gossip_debug(GOSSIP_UTILS_DEBUG
,
513 "orangefs_flush_inode skipping setattr()\n");
517 gossip_debug(GOSSIP_UTILS_DEBUG
,
518 "orangefs_flush_inode (%pU) writing mode %o\n",
519 get_khandle_from_ino(inode
),
522 ret
= orangefs_inode_setattr(inode
, &wbattr
);
527 void orangefs_make_bad_inode(struct inode
*inode
)
529 if (is_root_handle(inode
)) {
531 * if this occurs, the pvfs2-client-core was killed but we
532 * can't afford to lose the inode operations and such
533 * associated with the root handle in any case.
535 gossip_debug(GOSSIP_UTILS_DEBUG
,
536 "*** NOT making bad root inode %pU\n",
537 get_khandle_from_ino(inode
));
539 gossip_debug(GOSSIP_UTILS_DEBUG
,
540 "*** making bad inode %pU\n",
541 get_khandle_from_ino(inode
));
542 make_bad_inode(inode
);
547 * The following is a very dirty hack that is now a permanent part of the
548 * ORANGEFS protocol. See protocol.h for more error definitions.
551 /* The order matches include/orangefs-types.h in the OrangeFS source. */
552 static int PINT_errno_mapping
[] = {
553 0, EPERM
, ENOENT
, EINTR
, EIO
, ENXIO
, EBADF
, EAGAIN
, ENOMEM
,
554 EFAULT
, EBUSY
, EEXIST
, ENODEV
, ENOTDIR
, EISDIR
, EINVAL
, EMFILE
,
555 EFBIG
, ENOSPC
, EROFS
, EMLINK
, EPIPE
, EDEADLK
, ENAMETOOLONG
,
556 ENOLCK
, ENOSYS
, ENOTEMPTY
, ELOOP
, EWOULDBLOCK
, ENOMSG
, EUNATCH
,
557 EBADR
, EDEADLOCK
, ENODATA
, ETIME
, ENONET
, EREMOTE
, ECOMM
,
558 EPROTO
, EBADMSG
, EOVERFLOW
, ERESTART
, EMSGSIZE
, EPROTOTYPE
,
559 ENOPROTOOPT
, EPROTONOSUPPORT
, EOPNOTSUPP
, EADDRINUSE
,
560 EADDRNOTAVAIL
, ENETDOWN
, ENETUNREACH
, ENETRESET
, ENOBUFS
,
561 ETIMEDOUT
, ECONNREFUSED
, EHOSTDOWN
, EHOSTUNREACH
, EALREADY
,
562 EACCES
, ECONNRESET
, ERANGE
565 int orangefs_normalize_to_errno(__s32 error_code
)
570 if (error_code
== 0) {
573 * This shouldn't ever happen. If it does it should be fixed on the
576 } else if (error_code
> 0) {
577 gossip_err("orangefs: error status receieved.\n");
578 gossip_err("orangefs: assuming error code is inverted.\n");
579 error_code
= -error_code
;
583 * XXX: This is very bad since error codes from ORANGEFS may not be
584 * suitable for return into userspace.
588 * Convert ORANGEFS error values into errno values suitable for return
591 if ((-error_code
) & ORANGEFS_NON_ERRNO_ERROR_BIT
) {
593 (ORANGEFS_ERROR_NUMBER_BITS
|ORANGEFS_NON_ERRNO_ERROR_BIT
|
594 ORANGEFS_ERROR_BIT
)) == ORANGEFS_ECANCEL
) {
596 * cancellation error codes generally correspond to
597 * a timeout from the client's perspective
599 error_code
= -ETIMEDOUT
;
601 /* assume a default error code */
602 gossip_err("orangefs: warning: got error code without errno equivalent: %d.\n", error_code
);
603 error_code
= -EINVAL
;
606 /* Convert ORANGEFS encoded errno values into regular errno values. */
607 } else if ((-error_code
) & ORANGEFS_ERROR_BIT
) {
608 i
= (-error_code
) & ~(ORANGEFS_ERROR_BIT
|ORANGEFS_ERROR_CLASS_BITS
);
609 if (i
< sizeof(PINT_errno_mapping
)/sizeof(*PINT_errno_mapping
))
610 error_code
= -PINT_errno_mapping
[i
];
612 error_code
= -EINVAL
;
615 * Only ORANGEFS protocol error codes should ever come here. Otherwise
616 * there is a bug somewhere.
619 gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n");
625 __s32
ORANGEFS_util_translate_mode(int mode
)
629 static int modes
[NUM_MODES
] = {
630 S_IXOTH
, S_IWOTH
, S_IROTH
,
631 S_IXGRP
, S_IWGRP
, S_IRGRP
,
632 S_IXUSR
, S_IWUSR
, S_IRUSR
,
635 static int orangefs_modes
[NUM_MODES
] = {
636 ORANGEFS_O_EXECUTE
, ORANGEFS_O_WRITE
, ORANGEFS_O_READ
,
637 ORANGEFS_G_EXECUTE
, ORANGEFS_G_WRITE
, ORANGEFS_G_READ
,
638 ORANGEFS_U_EXECUTE
, ORANGEFS_U_WRITE
, ORANGEFS_U_READ
,
639 ORANGEFS_G_SGID
, ORANGEFS_U_SUID
642 for (i
= 0; i
< NUM_MODES
; i
++)
644 ret
|= orangefs_modes
[i
];