1 // SPDX-License-Identifier: GPL-2.0
3 * (C) 2001 Clemson University and The University of Chicago
4 * Copyright 2018 Omnibond Systems, L.L.C.
6 * See COPYING in top-level directory.
10 * Linux VFS inode operations.
13 #include <linux/blkdev.h>
14 #include <linux/fileattr.h>
16 #include "orangefs-kernel.h"
17 #include "orangefs-bufmap.h"
19 static int orangefs_writepage_locked(struct page
*page
,
20 struct writeback_control
*wbc
)
22 struct inode
*inode
= page
->mapping
->host
;
23 struct orangefs_write_range
*wr
= NULL
;
30 set_page_writeback(page
);
32 len
= i_size_read(inode
);
33 if (PagePrivate(page
)) {
34 wr
= (struct orangefs_write_range
*)page_private(page
);
35 WARN_ON(wr
->pos
>= len
);
37 if (off
+ wr
->len
> len
)
43 off
= page_offset(page
);
44 if (off
+ PAGE_SIZE
> len
)
49 /* Should've been handled in orangefs_invalidate_folio. */
50 WARN_ON(off
== len
|| off
+ wlen
> len
);
53 bvec_set_page(&bv
, page
, wlen
, off
% PAGE_SIZE
);
54 iov_iter_bvec(&iter
, ITER_SOURCE
, &bv
, 1, wlen
);
56 ret
= wait_for_direct_io(ORANGEFS_IO_WRITE
, inode
, &off
, &iter
, wlen
,
59 mapping_set_error(page
->mapping
, ret
);
63 kfree(detach_page_private(page
));
67 static int orangefs_writepage(struct page
*page
, struct writeback_control
*wbc
)
70 ret
= orangefs_writepage_locked(page
, wbc
);
72 end_page_writeback(page
);
76 struct orangefs_writepages
{
87 static int orangefs_writepages_work(struct orangefs_writepages
*ow
,
88 struct writeback_control
*wbc
)
90 struct inode
*inode
= ow
->pages
[0]->mapping
->host
;
91 struct orangefs_write_range
*wrp
, wr
;
98 len
= i_size_read(inode
);
100 for (i
= 0; i
< ow
->npages
; i
++) {
101 set_page_writeback(ow
->pages
[i
]);
102 bvec_set_page(&ow
->bv
[i
], ow
->pages
[i
],
103 min(page_offset(ow
->pages
[i
]) + PAGE_SIZE
,
105 max(ow
->off
, page_offset(ow
->pages
[i
])),
106 i
== 0 ? ow
->off
- page_offset(ow
->pages
[i
]) : 0);
108 iov_iter_bvec(&iter
, ITER_SOURCE
, ow
->bv
, ow
->npages
, ow
->len
);
110 WARN_ON(ow
->off
>= len
);
111 if (ow
->off
+ ow
->len
> len
)
112 ow
->len
= len
- ow
->off
;
117 ret
= wait_for_direct_io(ORANGEFS_IO_WRITE
, inode
, &off
, &iter
, ow
->len
,
120 for (i
= 0; i
< ow
->npages
; i
++) {
121 mapping_set_error(ow
->pages
[i
]->mapping
, ret
);
122 if (PagePrivate(ow
->pages
[i
])) {
123 wrp
= (struct orangefs_write_range
*)
124 page_private(ow
->pages
[i
]);
125 ClearPagePrivate(ow
->pages
[i
]);
126 put_page(ow
->pages
[i
]);
129 end_page_writeback(ow
->pages
[i
]);
130 unlock_page(ow
->pages
[i
]);
134 for (i
= 0; i
< ow
->npages
; i
++) {
135 if (PagePrivate(ow
->pages
[i
])) {
136 wrp
= (struct orangefs_write_range
*)
137 page_private(ow
->pages
[i
]);
138 ClearPagePrivate(ow
->pages
[i
]);
139 put_page(ow
->pages
[i
]);
142 end_page_writeback(ow
->pages
[i
]);
143 unlock_page(ow
->pages
[i
]);
149 static int orangefs_writepages_callback(struct folio
*folio
,
150 struct writeback_control
*wbc
, void *data
)
152 struct orangefs_writepages
*ow
= data
;
153 struct orangefs_write_range
*wr
= folio
->private;
158 /* It's not private so there's nothing to write, right? */
159 printk("writepages_callback not private!\n");
165 if (ow
->npages
== 0) {
170 ow
->pages
[ow
->npages
++] = &folio
->page
;
174 if (!uid_eq(ow
->uid
, wr
->uid
) || !gid_eq(ow
->gid
, wr
->gid
)) {
175 orangefs_writepages_work(ow
, wbc
);
180 if (ow
->off
+ ow
->len
== wr
->pos
) {
182 ow
->pages
[ow
->npages
++] = &folio
->page
;
189 orangefs_writepages_work(ow
, wbc
);
192 ret
= orangefs_writepage_locked(&folio
->page
, wbc
);
193 mapping_set_error(folio
->mapping
, ret
);
195 folio_end_writeback(folio
);
197 if (ow
->npages
== ow
->maxpages
) {
198 orangefs_writepages_work(ow
, wbc
);
205 static int orangefs_writepages(struct address_space
*mapping
,
206 struct writeback_control
*wbc
)
208 struct orangefs_writepages
*ow
;
209 struct blk_plug plug
;
211 ow
= kzalloc(sizeof(struct orangefs_writepages
), GFP_KERNEL
);
214 ow
->maxpages
= orangefs_bufmap_size_query()/PAGE_SIZE
;
215 ow
->pages
= kcalloc(ow
->maxpages
, sizeof(struct page
*), GFP_KERNEL
);
220 ow
->bv
= kcalloc(ow
->maxpages
, sizeof(struct bio_vec
), GFP_KERNEL
);
226 blk_start_plug(&plug
);
227 ret
= write_cache_pages(mapping
, wbc
, orangefs_writepages_callback
, ow
);
229 ret
= orangefs_writepages_work(ow
, wbc
);
230 blk_finish_plug(&plug
);
237 static int orangefs_launder_folio(struct folio
*);
239 static void orangefs_readahead(struct readahead_control
*rac
)
242 struct iov_iter iter
;
243 struct inode
*inode
= rac
->mapping
->host
;
244 struct xarray
*i_pages
;
246 loff_t new_start
= readahead_pos(rac
);
250 loff_t bytes_remaining
= inode
->i_size
- readahead_pos(rac
);
251 loff_t pages_remaining
= bytes_remaining
/ PAGE_SIZE
;
253 if (pages_remaining
>= 1024)
255 else if (pages_remaining
> readahead_count(rac
))
256 new_len
= bytes_remaining
;
259 readahead_expand(rac
, new_start
, new_len
);
261 offset
= readahead_pos(rac
);
262 i_pages
= &rac
->mapping
->i_pages
;
264 iov_iter_xarray(&iter
, ITER_DEST
, i_pages
, offset
, readahead_length(rac
));
266 /* read in the pages. */
267 if ((ret
= wait_for_direct_io(ORANGEFS_IO_READ
, inode
,
268 &offset
, &iter
, readahead_length(rac
),
269 inode
->i_size
, NULL
, NULL
, rac
->file
)) < 0)
270 gossip_debug(GOSSIP_FILE_DEBUG
,
271 "%s: wait_for_direct_io failed. \n", __func__
);
276 while ((folio
= readahead_folio(rac
))) {
278 folio_mark_uptodate(folio
);
283 static int orangefs_read_folio(struct file
*file
, struct folio
*folio
)
285 struct inode
*inode
= folio
->mapping
->host
;
286 struct iov_iter iter
;
289 loff_t off
; /* offset of this folio in the file */
291 if (folio_test_dirty(folio
))
292 orangefs_launder_folio(folio
);
294 off
= folio_pos(folio
);
295 bvec_set_folio(&bv
, folio
, folio_size(folio
), 0);
296 iov_iter_bvec(&iter
, ITER_DEST
, &bv
, 1, folio_size(folio
));
298 ret
= wait_for_direct_io(ORANGEFS_IO_READ
, inode
, &off
, &iter
,
299 folio_size(folio
), inode
->i_size
, NULL
, NULL
, file
);
300 /* this will only zero remaining unread portions of the folio data */
301 iov_iter_zero(~0U, &iter
);
302 /* takes care of potential aliasing */
303 flush_dcache_folio(folio
);
306 folio_end_read(folio
, ret
== 0);
310 static int orangefs_write_begin(struct file
*file
,
311 struct address_space
*mapping
, loff_t pos
, unsigned len
,
312 struct folio
**foliop
, void **fsdata
)
314 struct orangefs_write_range
*wr
;
318 folio
= __filemap_get_folio(mapping
, pos
/ PAGE_SIZE
, FGP_WRITEBEGIN
,
319 mapping_gfp_mask(mapping
));
321 return PTR_ERR(folio
);
325 if (folio_test_dirty(folio
) && !folio_test_private(folio
)) {
327 * Should be impossible. If it happens, launder the page
328 * since we don't know what's dirty. This will WARN in
329 * orangefs_writepage_locked.
331 ret
= orangefs_launder_folio(folio
);
335 if (folio_test_private(folio
)) {
336 struct orangefs_write_range
*wr
;
337 wr
= folio_get_private(folio
);
338 if (wr
->pos
+ wr
->len
== pos
&&
339 uid_eq(wr
->uid
, current_fsuid()) &&
340 gid_eq(wr
->gid
, current_fsgid())) {
344 ret
= orangefs_launder_folio(folio
);
350 wr
= kmalloc(sizeof *wr
, GFP_KERNEL
);
356 wr
->uid
= current_fsuid();
357 wr
->gid
= current_fsgid();
358 folio_attach_private(folio
, wr
);
363 static int orangefs_write_end(struct file
*file
, struct address_space
*mapping
,
364 loff_t pos
, unsigned len
, unsigned copied
, struct folio
*folio
,
367 struct inode
*inode
= folio
->mapping
->host
;
368 loff_t last_pos
= pos
+ copied
;
371 * No need to use i_size_read() here, the i_size
372 * cannot change under us because we hold the i_mutex.
374 if (last_pos
> inode
->i_size
)
375 i_size_write(inode
, last_pos
);
377 /* zero the stale part of the folio if we did a short copy */
378 if (!folio_test_uptodate(folio
)) {
379 unsigned from
= pos
& (PAGE_SIZE
- 1);
381 folio_zero_range(folio
, from
+ copied
, len
- copied
);
383 /* Set fully written pages uptodate. */
384 if (pos
== folio_pos(folio
) &&
385 (len
== PAGE_SIZE
|| pos
+ len
== inode
->i_size
)) {
386 folio_zero_segment(folio
, from
+ copied
, PAGE_SIZE
);
387 folio_mark_uptodate(folio
);
391 folio_mark_dirty(folio
);
395 mark_inode_dirty_sync(file_inode(file
));
399 static void orangefs_invalidate_folio(struct folio
*folio
,
400 size_t offset
, size_t length
)
402 struct orangefs_write_range
*wr
= folio_get_private(folio
);
404 if (offset
== 0 && length
== PAGE_SIZE
) {
405 kfree(folio_detach_private(folio
));
407 /* write range entirely within invalidate range (or equal) */
408 } else if (folio_pos(folio
) + offset
<= wr
->pos
&&
409 wr
->pos
+ wr
->len
<= folio_pos(folio
) + offset
+ length
) {
410 kfree(folio_detach_private(folio
));
411 /* XXX is this right? only caller in fs */
412 folio_cancel_dirty(folio
);
414 /* invalidate range chops off end of write range */
415 } else if (wr
->pos
< folio_pos(folio
) + offset
&&
416 wr
->pos
+ wr
->len
<= folio_pos(folio
) + offset
+ length
&&
417 folio_pos(folio
) + offset
< wr
->pos
+ wr
->len
) {
419 x
= wr
->pos
+ wr
->len
- (folio_pos(folio
) + offset
);
420 WARN_ON(x
> wr
->len
);
422 wr
->uid
= current_fsuid();
423 wr
->gid
= current_fsgid();
424 /* invalidate range chops off beginning of write range */
425 } else if (folio_pos(folio
) + offset
<= wr
->pos
&&
426 folio_pos(folio
) + offset
+ length
< wr
->pos
+ wr
->len
&&
427 wr
->pos
< folio_pos(folio
) + offset
+ length
) {
429 x
= folio_pos(folio
) + offset
+ length
- wr
->pos
;
430 WARN_ON(x
> wr
->len
);
433 wr
->uid
= current_fsuid();
434 wr
->gid
= current_fsgid();
435 /* invalidate range entirely within write range (punch hole) */
436 } else if (wr
->pos
< folio_pos(folio
) + offset
&&
437 folio_pos(folio
) + offset
+ length
< wr
->pos
+ wr
->len
) {
438 /* XXX what do we do here... should not WARN_ON */
442 * should we just ignore this and write it out anyway?
443 * it hardly makes sense
446 /* non-overlapping ranges */
448 /* WARN if they do overlap */
449 if (!((folio_pos(folio
) + offset
+ length
<= wr
->pos
) ^
450 (wr
->pos
+ wr
->len
<= folio_pos(folio
) + offset
))) {
452 printk("invalidate range offset %llu length %zu\n",
453 folio_pos(folio
) + offset
, length
);
454 printk("write range offset %llu length %zu\n",
461 * Above there are returns where wr is freed or where we WARN.
462 * Thus the following runs if wr was modified above.
465 orangefs_launder_folio(folio
);
468 static bool orangefs_release_folio(struct folio
*folio
, gfp_t foo
)
470 return !folio_test_private(folio
);
473 static void orangefs_free_folio(struct folio
*folio
)
475 kfree(folio_detach_private(folio
));
478 static int orangefs_launder_folio(struct folio
*folio
)
481 struct writeback_control wbc
= {
482 .sync_mode
= WB_SYNC_ALL
,
485 folio_wait_writeback(folio
);
486 if (folio_clear_dirty_for_io(folio
)) {
487 r
= orangefs_writepage_locked(&folio
->page
, &wbc
);
488 folio_end_writeback(folio
);
493 static ssize_t
orangefs_direct_IO(struct kiocb
*iocb
,
494 struct iov_iter
*iter
)
497 * Comment from original do_readv_writev:
498 * Common entry point for read/write/readv/writev
499 * This function will dispatch it to either the direct I/O
500 * or buffered I/O path depending on the mount options and/or
501 * augmented/extended metadata attached to the file.
502 * Note: File extended attributes override any mount options.
504 struct file
*file
= iocb
->ki_filp
;
505 loff_t pos
= iocb
->ki_pos
;
506 enum ORANGEFS_io_type type
= iov_iter_rw(iter
) == WRITE
?
507 ORANGEFS_IO_WRITE
: ORANGEFS_IO_READ
;
508 loff_t
*offset
= &pos
;
509 struct inode
*inode
= file
->f_mapping
->host
;
510 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
511 struct orangefs_khandle
*handle
= &orangefs_inode
->refn
.khandle
;
512 size_t count
= iov_iter_count(iter
);
513 ssize_t total_count
= 0;
514 ssize_t ret
= -EINVAL
;
516 gossip_debug(GOSSIP_FILE_DEBUG
,
517 "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n",
522 if (type
== ORANGEFS_IO_WRITE
) {
523 gossip_debug(GOSSIP_FILE_DEBUG
,
524 "%s(%pU): proceeding with offset : %llu, "
537 while (iov_iter_count(iter
)) {
538 size_t each_count
= iov_iter_count(iter
);
541 /* how much to transfer in this loop iteration */
542 if (each_count
> orangefs_bufmap_size_query())
543 each_count
= orangefs_bufmap_size_query();
545 gossip_debug(GOSSIP_FILE_DEBUG
,
546 "%s(%pU): size of each_count(%d)\n",
550 gossip_debug(GOSSIP_FILE_DEBUG
,
551 "%s(%pU): BEFORE wait_for_io: offset is %d\n",
556 ret
= wait_for_direct_io(type
, inode
, offset
, iter
,
557 each_count
, 0, NULL
, NULL
, file
);
558 gossip_debug(GOSSIP_FILE_DEBUG
,
559 "%s(%pU): return from wait_for_io:%d\n",
571 gossip_debug(GOSSIP_FILE_DEBUG
,
572 "%s(%pU): AFTER wait_for_io: offset is %d\n",
578 * if we got a short I/O operations,
579 * fall out and return what we got so far
581 if (amt_complete
< each_count
)
589 if (type
== ORANGEFS_IO_READ
) {
592 file_update_time(file
);
593 if (*offset
> i_size_read(inode
))
594 i_size_write(inode
, *offset
);
598 gossip_debug(GOSSIP_FILE_DEBUG
,
599 "%s(%pU): Value(%d) returned.\n",
607 /** ORANGEFS2 implementation of address space operations */
608 static const struct address_space_operations orangefs_address_operations
= {
609 .writepage
= orangefs_writepage
,
610 .readahead
= orangefs_readahead
,
611 .read_folio
= orangefs_read_folio
,
612 .writepages
= orangefs_writepages
,
613 .dirty_folio
= filemap_dirty_folio
,
614 .write_begin
= orangefs_write_begin
,
615 .write_end
= orangefs_write_end
,
616 .invalidate_folio
= orangefs_invalidate_folio
,
617 .release_folio
= orangefs_release_folio
,
618 .free_folio
= orangefs_free_folio
,
619 .launder_folio
= orangefs_launder_folio
,
620 .direct_IO
= orangefs_direct_IO
,
623 vm_fault_t
orangefs_page_mkwrite(struct vm_fault
*vmf
)
625 struct folio
*folio
= page_folio(vmf
->page
);
626 struct inode
*inode
= file_inode(vmf
->vma
->vm_file
);
627 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
628 unsigned long *bitlock
= &orangefs_inode
->bitlock
;
630 struct orangefs_write_range
*wr
;
632 sb_start_pagefault(inode
->i_sb
);
634 if (wait_on_bit(bitlock
, 1, TASK_KILLABLE
)) {
635 ret
= VM_FAULT_RETRY
;
640 if (folio_test_dirty(folio
) && !folio_test_private(folio
)) {
642 * Should be impossible. If it happens, launder the folio
643 * since we don't know what's dirty. This will WARN in
644 * orangefs_writepage_locked.
646 if (orangefs_launder_folio(folio
)) {
647 ret
= VM_FAULT_LOCKED
|VM_FAULT_RETRY
;
651 if (folio_test_private(folio
)) {
652 wr
= folio_get_private(folio
);
653 if (uid_eq(wr
->uid
, current_fsuid()) &&
654 gid_eq(wr
->gid
, current_fsgid())) {
655 wr
->pos
= page_offset(vmf
->page
);
659 if (orangefs_launder_folio(folio
)) {
660 ret
= VM_FAULT_LOCKED
|VM_FAULT_RETRY
;
665 wr
= kmalloc(sizeof *wr
, GFP_KERNEL
);
667 ret
= VM_FAULT_LOCKED
|VM_FAULT_RETRY
;
670 wr
->pos
= page_offset(vmf
->page
);
672 wr
->uid
= current_fsuid();
673 wr
->gid
= current_fsgid();
674 folio_attach_private(folio
, wr
);
677 file_update_time(vmf
->vma
->vm_file
);
678 if (folio
->mapping
!= inode
->i_mapping
) {
680 ret
= VM_FAULT_LOCKED
|VM_FAULT_NOPAGE
;
685 * We mark the folio dirty already here so that when freeze is in
686 * progress, we are guaranteed that writeback during freezing will
687 * see the dirty folio and writeprotect it again.
689 folio_mark_dirty(folio
);
690 folio_wait_stable(folio
);
691 ret
= VM_FAULT_LOCKED
;
693 sb_end_pagefault(inode
->i_sb
);
697 static int orangefs_setattr_size(struct inode
*inode
, struct iattr
*iattr
)
699 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
700 struct orangefs_kernel_op_s
*new_op
;
704 gossip_debug(GOSSIP_INODE_DEBUG
,
705 "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n",
707 get_khandle_from_ino(inode
),
708 &orangefs_inode
->refn
.khandle
,
709 orangefs_inode
->refn
.fs_id
,
712 /* Ensure that we have a up to date size, so we know if it changed. */
713 ret
= orangefs_inode_getattr(inode
, ORANGEFS_GETATTR_SIZE
);
717 gossip_err("%s: orangefs_inode_getattr failed, ret:%d:.\n",
721 orig_size
= i_size_read(inode
);
723 /* This is truncate_setsize in a different order. */
724 truncate_pagecache(inode
, iattr
->ia_size
);
725 i_size_write(inode
, iattr
->ia_size
);
726 if (iattr
->ia_size
> orig_size
)
727 pagecache_isize_extended(inode
, orig_size
, iattr
->ia_size
);
729 new_op
= op_alloc(ORANGEFS_VFS_OP_TRUNCATE
);
733 new_op
->upcall
.req
.truncate
.refn
= orangefs_inode
->refn
;
734 new_op
->upcall
.req
.truncate
.size
= (__s64
) iattr
->ia_size
;
736 ret
= service_operation(new_op
,
738 get_interruptible_flag(inode
));
741 * the truncate has no downcall members to retrieve, but
742 * the status value tells us if it went through ok or not
744 gossip_debug(GOSSIP_INODE_DEBUG
, "%s: ret:%d:\n", __func__
, ret
);
751 if (orig_size
!= i_size_read(inode
))
752 iattr
->ia_valid
|= ATTR_CTIME
| ATTR_MTIME
;
757 int __orangefs_setattr(struct inode
*inode
, struct iattr
*iattr
)
761 if (iattr
->ia_valid
& ATTR_MODE
) {
762 if (iattr
->ia_mode
& (S_ISVTX
)) {
763 if (is_root_handle(inode
)) {
765 * allow sticky bit to be set on root (since
766 * it shows up that way by default anyhow),
767 * but don't show it to the server
769 iattr
->ia_mode
-= S_ISVTX
;
771 gossip_debug(GOSSIP_UTILS_DEBUG
,
772 "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
777 if (iattr
->ia_mode
& (S_ISUID
)) {
778 gossip_debug(GOSSIP_UTILS_DEBUG
,
779 "Attempting to set setuid bit (not supported); returning EINVAL.\n");
785 if (iattr
->ia_valid
& ATTR_SIZE
) {
786 ret
= orangefs_setattr_size(inode
, iattr
);
792 spin_lock(&inode
->i_lock
);
793 if (ORANGEFS_I(inode
)->attr_valid
) {
794 if (uid_eq(ORANGEFS_I(inode
)->attr_uid
, current_fsuid()) &&
795 gid_eq(ORANGEFS_I(inode
)->attr_gid
, current_fsgid())) {
796 ORANGEFS_I(inode
)->attr_valid
= iattr
->ia_valid
;
798 spin_unlock(&inode
->i_lock
);
799 write_inode_now(inode
, 1);
803 ORANGEFS_I(inode
)->attr_valid
= iattr
->ia_valid
;
804 ORANGEFS_I(inode
)->attr_uid
= current_fsuid();
805 ORANGEFS_I(inode
)->attr_gid
= current_fsgid();
807 setattr_copy(&nop_mnt_idmap
, inode
, iattr
);
808 spin_unlock(&inode
->i_lock
);
809 mark_inode_dirty(inode
);
816 int __orangefs_setattr_mode(struct dentry
*dentry
, struct iattr
*iattr
)
819 struct inode
*inode
= d_inode(dentry
);
821 ret
= __orangefs_setattr(inode
, iattr
);
822 /* change mode on a file that has ACLs */
823 if (!ret
&& (iattr
->ia_valid
& ATTR_MODE
))
824 ret
= posix_acl_chmod(&nop_mnt_idmap
, dentry
, inode
->i_mode
);
829 * Change attributes of an object referenced by dentry.
831 int orangefs_setattr(struct mnt_idmap
*idmap
, struct dentry
*dentry
,
835 gossip_debug(GOSSIP_INODE_DEBUG
, "__orangefs_setattr: called on %pd\n",
837 ret
= setattr_prepare(&nop_mnt_idmap
, dentry
, iattr
);
840 ret
= __orangefs_setattr_mode(dentry
, iattr
);
841 sync_inode_metadata(d_inode(dentry
), 1);
843 gossip_debug(GOSSIP_INODE_DEBUG
, "orangefs_setattr: returning %d\n",
849 * Obtain attributes of an object given a dentry
851 int orangefs_getattr(struct mnt_idmap
*idmap
, const struct path
*path
,
852 struct kstat
*stat
, u32 request_mask
, unsigned int flags
)
855 struct inode
*inode
= path
->dentry
->d_inode
;
857 gossip_debug(GOSSIP_INODE_DEBUG
,
858 "orangefs_getattr: called on %pd mask %u\n",
859 path
->dentry
, request_mask
);
861 ret
= orangefs_inode_getattr(inode
,
862 request_mask
& STATX_SIZE
? ORANGEFS_GETATTR_SIZE
: 0);
864 generic_fillattr(&nop_mnt_idmap
, request_mask
, inode
, stat
);
866 /* override block size reported to stat */
867 if (!(request_mask
& STATX_SIZE
))
868 stat
->result_mask
&= ~STATX_SIZE
;
870 generic_fill_statx_attr(inode
, stat
);
875 int orangefs_permission(struct mnt_idmap
*idmap
,
876 struct inode
*inode
, int mask
)
880 if (mask
& MAY_NOT_BLOCK
)
883 gossip_debug(GOSSIP_INODE_DEBUG
, "%s: refreshing\n", __func__
);
885 /* Make sure the permission (and other common attrs) are up to date. */
886 ret
= orangefs_inode_getattr(inode
, 0);
890 return generic_permission(&nop_mnt_idmap
, inode
, mask
);
893 int orangefs_update_time(struct inode
*inode
, int flags
)
897 gossip_debug(GOSSIP_INODE_DEBUG
, "orangefs_update_time: %pU\n",
898 get_khandle_from_ino(inode
));
899 flags
= generic_update_time(inode
, flags
);
900 memset(&iattr
, 0, sizeof iattr
);
902 iattr
.ia_valid
|= ATTR_ATIME
;
904 iattr
.ia_valid
|= ATTR_CTIME
;
906 iattr
.ia_valid
|= ATTR_MTIME
;
907 return __orangefs_setattr(inode
, &iattr
);
910 static int orangefs_fileattr_get(struct dentry
*dentry
, struct fileattr
*fa
)
915 gossip_debug(GOSSIP_FILE_DEBUG
, "%s: called on %pd\n", __func__
,
918 ret
= orangefs_inode_getxattr(d_inode(dentry
),
919 "user.pvfs2.meta_hint",
921 if (ret
< 0 && ret
!= -ENODATA
)
924 gossip_debug(GOSSIP_FILE_DEBUG
, "%s: flags=%u\n", __func__
, (u32
) val
);
926 fileattr_fill_flags(fa
, val
);
930 static int orangefs_fileattr_set(struct mnt_idmap
*idmap
,
931 struct dentry
*dentry
, struct fileattr
*fa
)
935 gossip_debug(GOSSIP_FILE_DEBUG
, "%s: called on %pd\n", __func__
,
938 * ORANGEFS_MIRROR_FL is set internally when the mirroring mode is
939 * turned on for a file. The user is not allowed to turn on this bit,
940 * but the bit is present if the user first gets the flags and then
941 * updates the flags with some new settings. So, we ignore it in the
942 * following edit. bligon.
944 if (fileattr_has_fsx(fa
) ||
945 (fa
->flags
& ~(FS_IMMUTABLE_FL
| FS_APPEND_FL
| FS_NOATIME_FL
| ORANGEFS_MIRROR_FL
))) {
946 gossip_err("%s: only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n",
951 gossip_debug(GOSSIP_FILE_DEBUG
, "%s: flags=%u\n", __func__
, (u32
) val
);
952 return orangefs_inode_setxattr(d_inode(dentry
),
953 "user.pvfs2.meta_hint",
954 &val
, sizeof(val
), 0);
957 /* ORANGEFS2 implementation of VFS inode operations for files */
958 static const struct inode_operations orangefs_file_inode_operations
= {
959 .get_inode_acl
= orangefs_get_acl
,
960 .set_acl
= orangefs_set_acl
,
961 .setattr
= orangefs_setattr
,
962 .getattr
= orangefs_getattr
,
963 .listxattr
= orangefs_listxattr
,
964 .permission
= orangefs_permission
,
965 .update_time
= orangefs_update_time
,
966 .fileattr_get
= orangefs_fileattr_get
,
967 .fileattr_set
= orangefs_fileattr_set
,
970 static int orangefs_init_iops(struct inode
*inode
)
972 inode
->i_mapping
->a_ops
= &orangefs_address_operations
;
974 switch (inode
->i_mode
& S_IFMT
) {
976 inode
->i_op
= &orangefs_file_inode_operations
;
977 inode
->i_fop
= &orangefs_file_operations
;
980 inode
->i_op
= &orangefs_symlink_inode_operations
;
983 inode
->i_op
= &orangefs_dir_inode_operations
;
984 inode
->i_fop
= &orangefs_dir_operations
;
987 gossip_debug(GOSSIP_INODE_DEBUG
,
988 "%s: unsupported mode\n",
997 * Given an ORANGEFS object identifier (fsid, handle), convert it into
998 * a ino_t type that will be used as a hash-index from where the handle will
999 * be searched for in the VFS hash table of inodes.
1001 static inline ino_t
orangefs_handle_hash(struct orangefs_object_kref
*ref
)
1005 return orangefs_khandle_to_ino(&(ref
->khandle
));
1009 * Called to set up an inode from iget5_locked.
1011 static int orangefs_set_inode(struct inode
*inode
, void *data
)
1013 struct orangefs_object_kref
*ref
= (struct orangefs_object_kref
*) data
;
1014 ORANGEFS_I(inode
)->refn
.fs_id
= ref
->fs_id
;
1015 ORANGEFS_I(inode
)->refn
.khandle
= ref
->khandle
;
1016 ORANGEFS_I(inode
)->attr_valid
= 0;
1017 hash_init(ORANGEFS_I(inode
)->xattr_cache
);
1018 ORANGEFS_I(inode
)->mapping_time
= jiffies
- 1;
1019 ORANGEFS_I(inode
)->bitlock
= 0;
1024 * Called to determine if handles match.
1026 static int orangefs_test_inode(struct inode
*inode
, void *data
)
1028 struct orangefs_object_kref
*ref
= (struct orangefs_object_kref
*) data
;
1029 struct orangefs_inode_s
*orangefs_inode
= NULL
;
1031 orangefs_inode
= ORANGEFS_I(inode
);
1032 /* test handles and fs_ids... */
1033 return (!ORANGEFS_khandle_cmp(&(orangefs_inode
->refn
.khandle
),
1035 orangefs_inode
->refn
.fs_id
== ref
->fs_id
);
1039 * Front-end to lookup the inode-cache maintained by the VFS using the ORANGEFS
1042 * @sb: the file system super block instance.
1043 * @ref: The ORANGEFS object for which we are trying to locate an inode.
1045 struct inode
*orangefs_iget(struct super_block
*sb
,
1046 struct orangefs_object_kref
*ref
)
1048 struct inode
*inode
= NULL
;
1052 hash
= orangefs_handle_hash(ref
);
1053 inode
= iget5_locked(sb
,
1055 orangefs_test_inode
,
1060 return ERR_PTR(-ENOMEM
);
1062 if (!(inode
->i_state
& I_NEW
))
1065 error
= orangefs_inode_getattr(inode
, ORANGEFS_GETATTR_NEW
);
1068 return ERR_PTR(error
);
1071 inode
->i_ino
= hash
; /* needed for stat etc */
1072 orangefs_init_iops(inode
);
1073 unlock_new_inode(inode
);
1075 gossip_debug(GOSSIP_INODE_DEBUG
,
1076 "iget handle %pU, fsid %d hash %ld i_ino %lu\n",
1086 * Allocate an inode for a newly created file and insert it into the inode hash.
1088 struct inode
*orangefs_new_inode(struct super_block
*sb
, struct inode
*dir
,
1089 umode_t mode
, dev_t dev
, struct orangefs_object_kref
*ref
)
1091 struct posix_acl
*acl
= NULL
, *default_acl
= NULL
;
1092 unsigned long hash
= orangefs_handle_hash(ref
);
1093 struct inode
*inode
;
1096 gossip_debug(GOSSIP_INODE_DEBUG
,
1097 "%s:(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n",
1104 inode
= new_inode(sb
);
1106 return ERR_PTR(-ENOMEM
);
1108 error
= posix_acl_create(dir
, &mode
, &default_acl
, &acl
);
1112 orangefs_set_inode(inode
, ref
);
1113 inode
->i_ino
= hash
; /* needed for stat etc */
1115 error
= orangefs_inode_getattr(inode
, ORANGEFS_GETATTR_NEW
);
1119 orangefs_init_iops(inode
);
1120 inode
->i_rdev
= dev
;
1123 error
= __orangefs_set_acl(inode
, default_acl
,
1130 error
= __orangefs_set_acl(inode
, acl
, ACL_TYPE_ACCESS
);
1135 error
= insert_inode_locked4(inode
, hash
, orangefs_test_inode
, ref
);
1139 gossip_debug(GOSSIP_INODE_DEBUG
,
1140 "Initializing ACL's for inode %pU\n",
1141 get_khandle_from_ino(inode
));
1142 if (mode
!= inode
->i_mode
) {
1143 struct iattr iattr
= {
1145 .ia_valid
= ATTR_MODE
,
1147 inode
->i_mode
= mode
;
1148 __orangefs_setattr(inode
, &iattr
);
1149 __posix_acl_chmod(&acl
, GFP_KERNEL
, inode
->i_mode
);
1151 posix_acl_release(acl
);
1152 posix_acl_release(default_acl
);
1157 posix_acl_release(acl
);
1158 posix_acl_release(default_acl
);
1159 return ERR_PTR(error
);