2 * (C) 2001 Clemson University and The University of Chicago
4 * See COPYING in top-level directory.
7 #include "orangefs-kernel.h"
8 #include "orangefs-dev-proto.h"
9 #include "orangefs-bufmap.h"
11 __s32
fsid_of_op(struct orangefs_kernel_op_s
*op
)
13 __s32 fsid
= ORANGEFS_FS_ID_NULL
;
16 switch (op
->upcall
.type
) {
17 case ORANGEFS_VFS_OP_FILE_IO
:
18 fsid
= op
->upcall
.req
.io
.refn
.fs_id
;
20 case ORANGEFS_VFS_OP_LOOKUP
:
21 fsid
= op
->upcall
.req
.lookup
.parent_refn
.fs_id
;
23 case ORANGEFS_VFS_OP_CREATE
:
24 fsid
= op
->upcall
.req
.create
.parent_refn
.fs_id
;
26 case ORANGEFS_VFS_OP_GETATTR
:
27 fsid
= op
->upcall
.req
.getattr
.refn
.fs_id
;
29 case ORANGEFS_VFS_OP_REMOVE
:
30 fsid
= op
->upcall
.req
.remove
.parent_refn
.fs_id
;
32 case ORANGEFS_VFS_OP_MKDIR
:
33 fsid
= op
->upcall
.req
.mkdir
.parent_refn
.fs_id
;
35 case ORANGEFS_VFS_OP_READDIR
:
36 fsid
= op
->upcall
.req
.readdir
.refn
.fs_id
;
38 case ORANGEFS_VFS_OP_SETATTR
:
39 fsid
= op
->upcall
.req
.setattr
.refn
.fs_id
;
41 case ORANGEFS_VFS_OP_SYMLINK
:
42 fsid
= op
->upcall
.req
.sym
.parent_refn
.fs_id
;
44 case ORANGEFS_VFS_OP_RENAME
:
45 fsid
= op
->upcall
.req
.rename
.old_parent_refn
.fs_id
;
47 case ORANGEFS_VFS_OP_STATFS
:
48 fsid
= op
->upcall
.req
.statfs
.fs_id
;
50 case ORANGEFS_VFS_OP_TRUNCATE
:
51 fsid
= op
->upcall
.req
.truncate
.refn
.fs_id
;
53 case ORANGEFS_VFS_OP_RA_FLUSH
:
54 fsid
= op
->upcall
.req
.ra_cache_flush
.refn
.fs_id
;
56 case ORANGEFS_VFS_OP_FS_UMOUNT
:
57 fsid
= op
->upcall
.req
.fs_umount
.fs_id
;
59 case ORANGEFS_VFS_OP_GETXATTR
:
60 fsid
= op
->upcall
.req
.getxattr
.refn
.fs_id
;
62 case ORANGEFS_VFS_OP_SETXATTR
:
63 fsid
= op
->upcall
.req
.setxattr
.refn
.fs_id
;
65 case ORANGEFS_VFS_OP_LISTXATTR
:
66 fsid
= op
->upcall
.req
.listxattr
.refn
.fs_id
;
68 case ORANGEFS_VFS_OP_REMOVEXATTR
:
69 fsid
= op
->upcall
.req
.removexattr
.refn
.fs_id
;
71 case ORANGEFS_VFS_OP_FSYNC
:
72 fsid
= op
->upcall
.req
.fsync
.refn
.fs_id
;
81 static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s
*attrs
)
84 if (attrs
->flags
& ORANGEFS_IMMUTABLE_FL
)
87 flags
&= ~S_IMMUTABLE
;
88 if (attrs
->flags
& ORANGEFS_APPEND_FL
)
92 if (attrs
->flags
& ORANGEFS_NOATIME_FL
)
99 static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s
*attrs
)
103 if (attrs
->perms
& ORANGEFS_O_EXECUTE
)
104 perm_mode
|= S_IXOTH
;
105 if (attrs
->perms
& ORANGEFS_O_WRITE
)
106 perm_mode
|= S_IWOTH
;
107 if (attrs
->perms
& ORANGEFS_O_READ
)
108 perm_mode
|= S_IROTH
;
110 if (attrs
->perms
& ORANGEFS_G_EXECUTE
)
111 perm_mode
|= S_IXGRP
;
112 if (attrs
->perms
& ORANGEFS_G_WRITE
)
113 perm_mode
|= S_IWGRP
;
114 if (attrs
->perms
& ORANGEFS_G_READ
)
115 perm_mode
|= S_IRGRP
;
117 if (attrs
->perms
& ORANGEFS_U_EXECUTE
)
118 perm_mode
|= S_IXUSR
;
119 if (attrs
->perms
& ORANGEFS_U_WRITE
)
120 perm_mode
|= S_IWUSR
;
121 if (attrs
->perms
& ORANGEFS_U_READ
)
122 perm_mode
|= S_IRUSR
;
124 if (attrs
->perms
& ORANGEFS_G_SGID
)
125 perm_mode
|= S_ISGID
;
126 if (attrs
->perms
& ORANGEFS_U_SUID
)
127 perm_mode
|= S_ISUID
;
133 * NOTE: in kernel land, we never use the sys_attr->link_target for
134 * anything, so don't bother copying it into the sys_attr object here.
136 static inline int copy_attributes_from_inode(struct inode
*inode
,
137 struct ORANGEFS_sys_attr_s
*attrs
,
142 if (!iattr
|| !inode
|| !attrs
) {
143 gossip_err("NULL iattr (%p), inode (%p), attrs (%p) "
144 "in copy_attributes_from_inode!\n",
151 * We need to be careful to only copy the attributes out of the
152 * iattr object that we know are valid.
155 if (iattr
->ia_valid
& ATTR_UID
) {
156 attrs
->owner
= from_kuid(&init_user_ns
, iattr
->ia_uid
);
157 attrs
->mask
|= ORANGEFS_ATTR_SYS_UID
;
158 gossip_debug(GOSSIP_UTILS_DEBUG
, "(UID) %d\n", attrs
->owner
);
160 if (iattr
->ia_valid
& ATTR_GID
) {
161 attrs
->group
= from_kgid(&init_user_ns
, iattr
->ia_gid
);
162 attrs
->mask
|= ORANGEFS_ATTR_SYS_GID
;
163 gossip_debug(GOSSIP_UTILS_DEBUG
, "(GID) %d\n", attrs
->group
);
166 if (iattr
->ia_valid
& ATTR_ATIME
) {
167 attrs
->mask
|= ORANGEFS_ATTR_SYS_ATIME
;
168 if (iattr
->ia_valid
& ATTR_ATIME_SET
) {
169 attrs
->atime
= (time64_t
)iattr
->ia_atime
.tv_sec
;
170 attrs
->mask
|= ORANGEFS_ATTR_SYS_ATIME_SET
;
173 if (iattr
->ia_valid
& ATTR_MTIME
) {
174 attrs
->mask
|= ORANGEFS_ATTR_SYS_MTIME
;
175 if (iattr
->ia_valid
& ATTR_MTIME_SET
) {
176 attrs
->mtime
= (time64_t
)iattr
->ia_mtime
.tv_sec
;
177 attrs
->mask
|= ORANGEFS_ATTR_SYS_MTIME_SET
;
180 if (iattr
->ia_valid
& ATTR_CTIME
)
181 attrs
->mask
|= ORANGEFS_ATTR_SYS_CTIME
;
184 * ORANGEFS cannot set size with a setattr operation. Probably not likely
185 * to be requested through the VFS, but just in case, don't worry about
189 if (iattr
->ia_valid
& ATTR_MODE
) {
190 tmp_mode
= iattr
->ia_mode
;
191 if (tmp_mode
& (S_ISVTX
)) {
192 if (is_root_handle(inode
)) {
194 * allow sticky bit to be set on root (since
195 * it shows up that way by default anyhow),
196 * but don't show it to the server
200 gossip_debug(GOSSIP_UTILS_DEBUG
,
201 "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
206 if (tmp_mode
& (S_ISUID
)) {
207 gossip_debug(GOSSIP_UTILS_DEBUG
,
208 "Attempting to set setuid bit (not supported); returning EINVAL.\n");
212 attrs
->perms
= ORANGEFS_util_translate_mode(tmp_mode
);
213 attrs
->mask
|= ORANGEFS_ATTR_SYS_PERM
;
219 static int orangefs_inode_type(enum orangefs_ds_type objtype
)
221 if (objtype
== ORANGEFS_TYPE_METAFILE
)
223 else if (objtype
== ORANGEFS_TYPE_DIRECTORY
)
225 else if (objtype
== ORANGEFS_TYPE_SYMLINK
)
231 static int orangefs_inode_is_stale(struct inode
*inode
, int new,
232 struct ORANGEFS_sys_attr_s
*attrs
, char *link_target
)
234 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
235 int type
= orangefs_inode_type(attrs
->objtype
);
238 * If the inode type or symlink target have changed then this
241 if (type
== -1 || !(inode
->i_mode
& type
)) {
242 orangefs_make_bad_inode(inode
);
245 if (type
== S_IFLNK
&& strncmp(orangefs_inode
->link_target
,
246 link_target
, ORANGEFS_NAME_MAX
)) {
247 orangefs_make_bad_inode(inode
);
254 int orangefs_inode_getattr(struct inode
*inode
, int new, int bypass
)
256 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
257 struct orangefs_kernel_op_s
*new_op
;
258 loff_t inode_size
, rounded_up_size
;
261 gossip_debug(GOSSIP_UTILS_DEBUG
, "%s: called on inode %pU\n", __func__
,
262 get_khandle_from_ino(inode
));
264 if (!new && !bypass
) {
265 if (time_before(jiffies
, orangefs_inode
->getattr_time
))
269 new_op
= op_alloc(ORANGEFS_VFS_OP_GETATTR
);
272 new_op
->upcall
.req
.getattr
.refn
= orangefs_inode
->refn
;
273 new_op
->upcall
.req
.getattr
.mask
= ORANGEFS_ATTR_SYS_ALL_NOHINT
;
275 ret
= service_operation(new_op
, __func__
,
276 get_interruptible_flag(inode
));
280 type
= orangefs_inode_type(new_op
->
281 downcall
.resp
.getattr
.attributes
.objtype
);
282 ret
= orangefs_inode_is_stale(inode
, new,
283 &new_op
->downcall
.resp
.getattr
.attributes
,
284 new_op
->downcall
.resp
.getattr
.link_target
);
292 inode
->i_flags
= orangefs_inode_flags(&new_op
->
293 downcall
.resp
.getattr
.attributes
);
294 inode_size
= (loff_t
)new_op
->
295 downcall
.resp
.getattr
.attributes
.size
;
297 (inode_size
+ (4096 - (inode_size
% 4096)));
298 inode
->i_size
= inode_size
;
299 orangefs_inode
->blksize
=
300 new_op
->downcall
.resp
.getattr
.attributes
.blksize
;
301 spin_lock(&inode
->i_lock
);
302 inode
->i_bytes
= inode_size
;
304 (unsigned long)(rounded_up_size
/ 512);
305 spin_unlock(&inode
->i_lock
);
308 inode
->i_size
= PAGE_SIZE
;
309 orangefs_inode
->blksize
= (1 << inode
->i_blkbits
);
310 spin_lock(&inode
->i_lock
);
311 inode_set_bytes(inode
, inode
->i_size
);
312 spin_unlock(&inode
->i_lock
);
317 inode
->i_size
= (loff_t
)strlen(new_op
->
318 downcall
.resp
.getattr
.link_target
);
319 orangefs_inode
->blksize
= (1 << inode
->i_blkbits
);
320 ret
= strscpy(orangefs_inode
->link_target
,
321 new_op
->downcall
.resp
.getattr
.link_target
,
327 inode
->i_link
= orangefs_inode
->link_target
;
332 inode
->i_uid
= make_kuid(&init_user_ns
, new_op
->
333 downcall
.resp
.getattr
.attributes
.owner
);
334 inode
->i_gid
= make_kgid(&init_user_ns
, new_op
->
335 downcall
.resp
.getattr
.attributes
.group
);
336 inode
->i_atime
.tv_sec
= (time64_t
)new_op
->
337 downcall
.resp
.getattr
.attributes
.atime
;
338 inode
->i_mtime
.tv_sec
= (time64_t
)new_op
->
339 downcall
.resp
.getattr
.attributes
.mtime
;
340 inode
->i_ctime
.tv_sec
= (time64_t
)new_op
->
341 downcall
.resp
.getattr
.attributes
.ctime
;
342 inode
->i_atime
.tv_nsec
= 0;
343 inode
->i_mtime
.tv_nsec
= 0;
344 inode
->i_ctime
.tv_nsec
= 0;
346 /* special case: mark the root inode as sticky */
347 inode
->i_mode
= type
| (is_root_handle(inode
) ? S_ISVTX
: 0) |
348 orangefs_inode_perms(&new_op
->downcall
.resp
.getattr
.attributes
);
350 orangefs_inode
->getattr_time
= jiffies
+
351 orangefs_getattr_timeout_msecs
*HZ
/1000;
358 int orangefs_inode_check_changed(struct inode
*inode
)
360 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
361 struct orangefs_kernel_op_s
*new_op
;
364 gossip_debug(GOSSIP_UTILS_DEBUG
, "%s: called on inode %pU\n", __func__
,
365 get_khandle_from_ino(inode
));
367 new_op
= op_alloc(ORANGEFS_VFS_OP_GETATTR
);
370 new_op
->upcall
.req
.getattr
.refn
= orangefs_inode
->refn
;
371 new_op
->upcall
.req
.getattr
.mask
= ORANGEFS_ATTR_SYS_TYPE
|
372 ORANGEFS_ATTR_SYS_LNK_TARGET
;
374 ret
= service_operation(new_op
, __func__
,
375 get_interruptible_flag(inode
));
379 ret
= orangefs_inode_is_stale(inode
, 0,
380 &new_op
->downcall
.resp
.getattr
.attributes
,
381 new_op
->downcall
.resp
.getattr
.link_target
);
388 * issues a orangefs setattr request to make sure the new attribute values
389 * take effect if successful. returns 0 on success; -errno otherwise
391 int orangefs_inode_setattr(struct inode
*inode
, struct iattr
*iattr
)
393 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
394 struct orangefs_kernel_op_s
*new_op
;
397 new_op
= op_alloc(ORANGEFS_VFS_OP_SETATTR
);
401 new_op
->upcall
.req
.setattr
.refn
= orangefs_inode
->refn
;
402 ret
= copy_attributes_from_inode(inode
,
403 &new_op
->upcall
.req
.setattr
.attributes
,
406 ret
= service_operation(new_op
, __func__
,
407 get_interruptible_flag(inode
));
409 gossip_debug(GOSSIP_UTILS_DEBUG
,
410 "orangefs_inode_setattr: returning %d\n",
417 * successful setattr should clear the atime, mtime and
421 ClearAtimeFlag(orangefs_inode
);
422 ClearMtimeFlag(orangefs_inode
);
423 ClearCtimeFlag(orangefs_inode
);
424 ClearModeFlag(orangefs_inode
);
425 orangefs_inode
->getattr_time
= jiffies
- 1;
431 int orangefs_flush_inode(struct inode
*inode
)
434 * If it is a dirty inode, this function gets called.
435 * Gather all the information that needs to be setattr'ed
436 * Right now, this will only be used for mode, atime, mtime
445 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
447 memset(&wbattr
, 0, sizeof(wbattr
));
450 * check inode flags up front, and clear them if they are set. This
451 * will prevent multiple processes from all trying to flush the same
452 * inode if they call close() simultaneously
454 mtime_flag
= MtimeFlag(orangefs_inode
);
455 ClearMtimeFlag(orangefs_inode
);
456 ctime_flag
= CtimeFlag(orangefs_inode
);
457 ClearCtimeFlag(orangefs_inode
);
458 atime_flag
= AtimeFlag(orangefs_inode
);
459 ClearAtimeFlag(orangefs_inode
);
460 mode_flag
= ModeFlag(orangefs_inode
);
461 ClearModeFlag(orangefs_inode
);
463 /* -- Lazy atime,mtime and ctime update --
464 * Note: all times are dictated by server in the new scheme
465 * and not by the clients
467 * Also mode updates are being handled now..
471 wbattr
.ia_valid
|= ATTR_MTIME
;
473 wbattr
.ia_valid
|= ATTR_CTIME
;
475 wbattr
.ia_valid
|= ATTR_ATIME
;
478 wbattr
.ia_mode
= inode
->i_mode
;
479 wbattr
.ia_valid
|= ATTR_MODE
;
482 gossip_debug(GOSSIP_UTILS_DEBUG
,
483 "*********** orangefs_flush_inode: %pU "
485 get_khandle_from_ino(inode
),
487 if (wbattr
.ia_valid
== 0) {
488 gossip_debug(GOSSIP_UTILS_DEBUG
,
489 "orangefs_flush_inode skipping setattr()\n");
493 gossip_debug(GOSSIP_UTILS_DEBUG
,
494 "orangefs_flush_inode (%pU) writing mode %o\n",
495 get_khandle_from_ino(inode
),
498 ret
= orangefs_inode_setattr(inode
, &wbattr
);
503 int orangefs_unmount_sb(struct super_block
*sb
)
506 struct orangefs_kernel_op_s
*new_op
= NULL
;
508 gossip_debug(GOSSIP_UTILS_DEBUG
,
509 "orangefs_unmount_sb called on sb %p\n",
512 new_op
= op_alloc(ORANGEFS_VFS_OP_FS_UMOUNT
);
515 new_op
->upcall
.req
.fs_umount
.id
= ORANGEFS_SB(sb
)->id
;
516 new_op
->upcall
.req
.fs_umount
.fs_id
= ORANGEFS_SB(sb
)->fs_id
;
517 strncpy(new_op
->upcall
.req
.fs_umount
.orangefs_config_server
,
518 ORANGEFS_SB(sb
)->devname
,
519 ORANGEFS_MAX_SERVER_ADDR_LEN
);
521 gossip_debug(GOSSIP_UTILS_DEBUG
,
522 "Attempting ORANGEFS Unmount via host %s\n",
523 new_op
->upcall
.req
.fs_umount
.orangefs_config_server
);
525 ret
= service_operation(new_op
, "orangefs_fs_umount", 0);
527 gossip_debug(GOSSIP_UTILS_DEBUG
,
528 "orangefs_unmount: got return value of %d\n", ret
);
532 ORANGEFS_SB(sb
)->mount_pending
= 1;
538 void orangefs_make_bad_inode(struct inode
*inode
)
540 if (is_root_handle(inode
)) {
542 * if this occurs, the pvfs2-client-core was killed but we
543 * can't afford to lose the inode operations and such
544 * associated with the root handle in any case.
546 gossip_debug(GOSSIP_UTILS_DEBUG
,
547 "*** NOT making bad root inode %pU\n",
548 get_khandle_from_ino(inode
));
550 gossip_debug(GOSSIP_UTILS_DEBUG
,
551 "*** making bad inode %pU\n",
552 get_khandle_from_ino(inode
));
553 make_bad_inode(inode
);
558 * The following is a very dirty hack that is now a permanent part of the
559 * ORANGEFS protocol. See protocol.h for more error definitions.
562 /* The order matches include/orangefs-types.h in the OrangeFS source. */
563 static int PINT_errno_mapping
[] = {
564 0, EPERM
, ENOENT
, EINTR
, EIO
, ENXIO
, EBADF
, EAGAIN
, ENOMEM
,
565 EFAULT
, EBUSY
, EEXIST
, ENODEV
, ENOTDIR
, EISDIR
, EINVAL
, EMFILE
,
566 EFBIG
, ENOSPC
, EROFS
, EMLINK
, EPIPE
, EDEADLK
, ENAMETOOLONG
,
567 ENOLCK
, ENOSYS
, ENOTEMPTY
, ELOOP
, EWOULDBLOCK
, ENOMSG
, EUNATCH
,
568 EBADR
, EDEADLOCK
, ENODATA
, ETIME
, ENONET
, EREMOTE
, ECOMM
,
569 EPROTO
, EBADMSG
, EOVERFLOW
, ERESTART
, EMSGSIZE
, EPROTOTYPE
,
570 ENOPROTOOPT
, EPROTONOSUPPORT
, EOPNOTSUPP
, EADDRINUSE
,
571 EADDRNOTAVAIL
, ENETDOWN
, ENETUNREACH
, ENETRESET
, ENOBUFS
,
572 ETIMEDOUT
, ECONNREFUSED
, EHOSTDOWN
, EHOSTUNREACH
, EALREADY
,
573 EACCES
, ECONNRESET
, ERANGE
576 int orangefs_normalize_to_errno(__s32 error_code
)
581 if (error_code
== 0) {
584 * This shouldn't ever happen. If it does it should be fixed on the
587 } else if (error_code
> 0) {
588 gossip_err("orangefs: error status receieved.\n");
589 gossip_err("orangefs: assuming error code is inverted.\n");
590 error_code
= -error_code
;
594 * XXX: This is very bad since error codes from ORANGEFS may not be
595 * suitable for return into userspace.
599 * Convert ORANGEFS error values into errno values suitable for return
602 if ((-error_code
) & ORANGEFS_NON_ERRNO_ERROR_BIT
) {
604 (ORANGEFS_ERROR_NUMBER_BITS
|ORANGEFS_NON_ERRNO_ERROR_BIT
|
605 ORANGEFS_ERROR_BIT
)) == ORANGEFS_ECANCEL
) {
607 * cancellation error codes generally correspond to
608 * a timeout from the client's perspective
610 error_code
= -ETIMEDOUT
;
612 /* assume a default error code */
613 gossip_err("orangefs: warning: got error code without errno equivalent: %d.\n", error_code
);
614 error_code
= -EINVAL
;
617 /* Convert ORANGEFS encoded errno values into regular errno values. */
618 } else if ((-error_code
) & ORANGEFS_ERROR_BIT
) {
619 i
= (-error_code
) & ~(ORANGEFS_ERROR_BIT
|ORANGEFS_ERROR_CLASS_BITS
);
620 if (i
< sizeof(PINT_errno_mapping
)/sizeof(*PINT_errno_mapping
))
621 error_code
= -PINT_errno_mapping
[i
];
623 error_code
= -EINVAL
;
626 * Only ORANGEFS protocol error codes should ever come here. Otherwise
627 * there is a bug somewhere.
630 gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n");
636 __s32
ORANGEFS_util_translate_mode(int mode
)
640 static int modes
[NUM_MODES
] = {
641 S_IXOTH
, S_IWOTH
, S_IROTH
,
642 S_IXGRP
, S_IWGRP
, S_IRGRP
,
643 S_IXUSR
, S_IWUSR
, S_IRUSR
,
646 static int orangefs_modes
[NUM_MODES
] = {
647 ORANGEFS_O_EXECUTE
, ORANGEFS_O_WRITE
, ORANGEFS_O_READ
,
648 ORANGEFS_G_EXECUTE
, ORANGEFS_G_WRITE
, ORANGEFS_G_READ
,
649 ORANGEFS_U_EXECUTE
, ORANGEFS_U_WRITE
, ORANGEFS_U_READ
,
650 ORANGEFS_G_SGID
, ORANGEFS_U_SUID
653 for (i
= 0; i
< NUM_MODES
; i
++)
655 ret
|= orangefs_modes
[i
];