1 // SPDX-License-Identifier: GPL-2.0
3 * (C) 2001 Clemson University and The University of Chicago
4 * Copyright 2018 Omnibond Systems, L.L.C.
6 * See COPYING in top-level directory.
10 * Linux VFS inode operations.
13 #include <linux/bvec.h>
15 #include "orangefs-kernel.h"
16 #include "orangefs-bufmap.h"
18 static int orangefs_writepage_locked(struct page
*page
,
19 struct writeback_control
*wbc
)
21 struct inode
*inode
= page
->mapping
->host
;
22 struct orangefs_write_range
*wr
= NULL
;
29 set_page_writeback(page
);
31 len
= i_size_read(inode
);
32 if (PagePrivate(page
)) {
33 wr
= (struct orangefs_write_range
*)page_private(page
);
34 WARN_ON(wr
->pos
>= len
);
36 if (off
+ wr
->len
> len
)
42 off
= page_offset(page
);
43 if (off
+ PAGE_SIZE
> len
)
48 /* Should've been handled in orangefs_invalidatepage. */
49 WARN_ON(off
== len
|| off
+ wlen
> len
);
53 bv
.bv_offset
= off
% PAGE_SIZE
;
55 iov_iter_bvec(&iter
, WRITE
, &bv
, 1, wlen
);
57 ret
= wait_for_direct_io(ORANGEFS_IO_WRITE
, inode
, &off
, &iter
, wlen
,
61 mapping_set_error(page
->mapping
, ret
);
67 set_page_private(page
, 0);
68 ClearPagePrivate(page
);
74 static int orangefs_writepage(struct page
*page
, struct writeback_control
*wbc
)
77 ret
= orangefs_writepage_locked(page
, wbc
);
79 end_page_writeback(page
);
83 struct orangefs_writepages
{
94 static int orangefs_writepages_work(struct orangefs_writepages
*ow
,
95 struct writeback_control
*wbc
)
97 struct inode
*inode
= ow
->pages
[0]->mapping
->host
;
98 struct orangefs_write_range
*wrp
, wr
;
105 len
= i_size_read(inode
);
107 for (i
= 0; i
< ow
->npages
; i
++) {
108 set_page_writeback(ow
->pages
[i
]);
109 ow
->bv
[i
].bv_page
= ow
->pages
[i
];
110 ow
->bv
[i
].bv_len
= min(page_offset(ow
->pages
[i
]) + PAGE_SIZE
,
112 max(ow
->off
, page_offset(ow
->pages
[i
]));
114 ow
->bv
[i
].bv_offset
= ow
->off
-
115 page_offset(ow
->pages
[i
]);
117 ow
->bv
[i
].bv_offset
= 0;
119 iov_iter_bvec(&iter
, WRITE
, ow
->bv
, ow
->npages
, ow
->len
);
121 WARN_ON(ow
->off
>= len
);
122 if (ow
->off
+ ow
->len
> len
)
123 ow
->len
= len
- ow
->off
;
128 ret
= wait_for_direct_io(ORANGEFS_IO_WRITE
, inode
, &off
, &iter
, ow
->len
,
131 for (i
= 0; i
< ow
->npages
; i
++) {
132 SetPageError(ow
->pages
[i
]);
133 mapping_set_error(ow
->pages
[i
]->mapping
, ret
);
134 if (PagePrivate(ow
->pages
[i
])) {
135 wrp
= (struct orangefs_write_range
*)
136 page_private(ow
->pages
[i
]);
137 ClearPagePrivate(ow
->pages
[i
]);
138 put_page(ow
->pages
[i
]);
141 end_page_writeback(ow
->pages
[i
]);
142 unlock_page(ow
->pages
[i
]);
146 for (i
= 0; i
< ow
->npages
; i
++) {
147 if (PagePrivate(ow
->pages
[i
])) {
148 wrp
= (struct orangefs_write_range
*)
149 page_private(ow
->pages
[i
]);
150 ClearPagePrivate(ow
->pages
[i
]);
151 put_page(ow
->pages
[i
]);
154 end_page_writeback(ow
->pages
[i
]);
155 unlock_page(ow
->pages
[i
]);
161 static int orangefs_writepages_callback(struct page
*page
,
162 struct writeback_control
*wbc
, void *data
)
164 struct orangefs_writepages
*ow
= data
;
165 struct orangefs_write_range
*wr
;
168 if (!PagePrivate(page
)) {
170 /* It's not private so there's nothing to write, right? */
171 printk("writepages_callback not private!\n");
175 wr
= (struct orangefs_write_range
*)page_private(page
);
178 if (ow
->npages
== 0) {
183 ow
->pages
[ow
->npages
++] = page
;
187 if (!uid_eq(ow
->uid
, wr
->uid
) || !gid_eq(ow
->gid
, wr
->gid
)) {
188 orangefs_writepages_work(ow
, wbc
);
193 if (ow
->off
+ ow
->len
== wr
->pos
) {
195 ow
->pages
[ow
->npages
++] = page
;
202 orangefs_writepages_work(ow
, wbc
);
205 ret
= orangefs_writepage_locked(page
, wbc
);
206 mapping_set_error(page
->mapping
, ret
);
208 end_page_writeback(page
);
210 if (ow
->npages
== ow
->maxpages
) {
211 orangefs_writepages_work(ow
, wbc
);
218 static int orangefs_writepages(struct address_space
*mapping
,
219 struct writeback_control
*wbc
)
221 struct orangefs_writepages
*ow
;
222 struct blk_plug plug
;
224 ow
= kzalloc(sizeof(struct orangefs_writepages
), GFP_KERNEL
);
227 ow
->maxpages
= orangefs_bufmap_size_query()/PAGE_SIZE
;
228 ow
->pages
= kcalloc(ow
->maxpages
, sizeof(struct page
*), GFP_KERNEL
);
233 ow
->bv
= kcalloc(ow
->maxpages
, sizeof(struct bio_vec
), GFP_KERNEL
);
239 blk_start_plug(&plug
);
240 ret
= write_cache_pages(mapping
, wbc
, orangefs_writepages_callback
, ow
);
242 ret
= orangefs_writepages_work(ow
, wbc
);
243 blk_finish_plug(&plug
);
250 static int orangefs_launder_page(struct page
*);
252 static int orangefs_readpage(struct file
*file
, struct page
*page
)
254 struct inode
*inode
= page
->mapping
->host
;
255 struct iov_iter iter
;
258 loff_t off
; /* offset into this page */
259 pgoff_t index
; /* which page */
260 struct page
*next_page
;
262 struct orangefs_read_options
*ro
= file
->private_data
;
265 int buffer_index
= -1; /* orangefs shared memory slot */
266 int slot_index
; /* index into slot */
270 * If they set some miniscule size for "count" in read(2)
271 * (for example) then let's try to read a page, or the whole file
272 * if it is smaller than a page. Once "count" goes over a page
273 * then lets round up to the highest page size multiple that is
274 * less than or equal to "count" and do that much orangefs IO and
275 * try to fill as many pages as we can from it.
277 * "count" should be represented in ro->blksiz.
279 * inode->i_size = file size.
282 if (ro
->blksiz
< PAGE_SIZE
) {
283 if (inode
->i_size
< PAGE_SIZE
)
284 read_size
= inode
->i_size
;
286 read_size
= PAGE_SIZE
;
288 roundedup
= ((PAGE_SIZE
- 1) & ro
->blksiz
) ?
289 ((ro
->blksiz
+ PAGE_SIZE
) & ~(PAGE_SIZE
-1)) :
291 if (roundedup
> inode
->i_size
)
292 read_size
= inode
->i_size
;
294 read_size
= roundedup
;
298 read_size
= PAGE_SIZE
;
301 read_size
= PAGE_SIZE
;
304 orangefs_launder_page(page
);
306 off
= page_offset(page
);
307 index
= off
>> PAGE_SHIFT
;
309 bv
.bv_len
= PAGE_SIZE
;
311 iov_iter_bvec(&iter
, READ
, &bv
, 1, PAGE_SIZE
);
313 ret
= wait_for_direct_io(ORANGEFS_IO_READ
, inode
, &off
, &iter
,
314 read_size
, inode
->i_size
, NULL
, &buffer_index
);
316 /* this will only zero remaining unread portions of the page data */
317 iov_iter_zero(~0U, &iter
);
318 /* takes care of potential aliasing */
319 flush_dcache_page(page
);
325 SetPageUptodate(page
);
327 ClearPageError(page
);
330 /* unlock the page after the ->readpage() routine completes */
333 if (remaining
> PAGE_SIZE
) {
335 while ((remaining
- PAGE_SIZE
) >= PAGE_SIZE
) {
336 remaining
-= PAGE_SIZE
;
338 * It is an optimization to try and fill more than one
339 * page... by now we've already gotten the single
340 * page we were after, if stuff doesn't seem to
341 * be going our way at this point just return
342 * and hope for the best.
344 * If we look for pages and they're already there is
345 * one reason to give up, and if they're not there
346 * and we can't create them is another reason.
351 next_page
= find_get_page(inode
->i_mapping
, index
);
353 gossip_debug(GOSSIP_FILE_DEBUG
,
354 "%s: found next page, quitting\n",
359 next_page
= find_or_create_page(inode
->i_mapping
,
363 * I've never hit this, leave it as a printk for
364 * now so it will be obvious.
367 printk("%s: can't create next page, quitting\n",
371 kaddr
= kmap_atomic(next_page
);
372 orangefs_bufmap_page_fill(kaddr
,
375 kunmap_atomic(kaddr
);
376 SetPageUptodate(next_page
);
377 unlock_page(next_page
);
383 if (buffer_index
!= -1)
384 orangefs_bufmap_put(buffer_index
);
388 static int orangefs_write_begin(struct file
*file
,
389 struct address_space
*mapping
,
390 loff_t pos
, unsigned len
, unsigned flags
, struct page
**pagep
,
393 struct orangefs_write_range
*wr
;
398 index
= pos
>> PAGE_SHIFT
;
400 page
= grab_cache_page_write_begin(mapping
, index
, flags
);
406 if (PageDirty(page
) && !PagePrivate(page
)) {
408 * Should be impossible. If it happens, launder the page
409 * since we don't know what's dirty. This will WARN in
410 * orangefs_writepage_locked.
412 ret
= orangefs_launder_page(page
);
416 if (PagePrivate(page
)) {
417 struct orangefs_write_range
*wr
;
418 wr
= (struct orangefs_write_range
*)page_private(page
);
419 if (wr
->pos
+ wr
->len
== pos
&&
420 uid_eq(wr
->uid
, current_fsuid()) &&
421 gid_eq(wr
->gid
, current_fsgid())) {
425 ret
= orangefs_launder_page(page
);
431 wr
= kmalloc(sizeof *wr
, GFP_KERNEL
);
437 wr
->uid
= current_fsuid();
438 wr
->gid
= current_fsgid();
439 SetPagePrivate(page
);
440 set_page_private(page
, (unsigned long)wr
);
446 static int orangefs_write_end(struct file
*file
, struct address_space
*mapping
,
447 loff_t pos
, unsigned len
, unsigned copied
, struct page
*page
, void *fsdata
)
449 struct inode
*inode
= page
->mapping
->host
;
450 loff_t last_pos
= pos
+ copied
;
453 * No need to use i_size_read() here, the i_size
454 * cannot change under us because we hold the i_mutex.
456 if (last_pos
> inode
->i_size
)
457 i_size_write(inode
, last_pos
);
459 /* zero the stale part of the page if we did a short copy */
460 if (!PageUptodate(page
)) {
461 unsigned from
= pos
& (PAGE_SIZE
- 1);
463 zero_user(page
, from
+ copied
, len
- copied
);
465 /* Set fully written pages uptodate. */
466 if (pos
== page_offset(page
) &&
467 (len
== PAGE_SIZE
|| pos
+ len
== inode
->i_size
)) {
468 zero_user_segment(page
, from
+ copied
, PAGE_SIZE
);
469 SetPageUptodate(page
);
473 set_page_dirty(page
);
477 mark_inode_dirty_sync(file_inode(file
));
481 static void orangefs_invalidatepage(struct page
*page
,
485 struct orangefs_write_range
*wr
;
486 wr
= (struct orangefs_write_range
*)page_private(page
);
488 if (offset
== 0 && length
== PAGE_SIZE
) {
489 kfree((struct orangefs_write_range
*)page_private(page
));
490 set_page_private(page
, 0);
491 ClearPagePrivate(page
);
494 /* write range entirely within invalidate range (or equal) */
495 } else if (page_offset(page
) + offset
<= wr
->pos
&&
496 wr
->pos
+ wr
->len
<= page_offset(page
) + offset
+ length
) {
497 kfree((struct orangefs_write_range
*)page_private(page
));
498 set_page_private(page
, 0);
499 ClearPagePrivate(page
);
501 /* XXX is this right? only caller in fs */
502 cancel_dirty_page(page
);
504 /* invalidate range chops off end of write range */
505 } else if (wr
->pos
< page_offset(page
) + offset
&&
506 wr
->pos
+ wr
->len
<= page_offset(page
) + offset
+ length
&&
507 page_offset(page
) + offset
< wr
->pos
+ wr
->len
) {
509 x
= wr
->pos
+ wr
->len
- (page_offset(page
) + offset
);
510 WARN_ON(x
> wr
->len
);
512 wr
->uid
= current_fsuid();
513 wr
->gid
= current_fsgid();
514 /* invalidate range chops off beginning of write range */
515 } else if (page_offset(page
) + offset
<= wr
->pos
&&
516 page_offset(page
) + offset
+ length
< wr
->pos
+ wr
->len
&&
517 wr
->pos
< page_offset(page
) + offset
+ length
) {
519 x
= page_offset(page
) + offset
+ length
- wr
->pos
;
520 WARN_ON(x
> wr
->len
);
523 wr
->uid
= current_fsuid();
524 wr
->gid
= current_fsgid();
525 /* invalidate range entirely within write range (punch hole) */
526 } else if (wr
->pos
< page_offset(page
) + offset
&&
527 page_offset(page
) + offset
+ length
< wr
->pos
+ wr
->len
) {
528 /* XXX what do we do here... should not WARN_ON */
532 * should we just ignore this and write it out anyway?
533 * it hardly makes sense
536 /* non-overlapping ranges */
538 /* WARN if they do overlap */
539 if (!((page_offset(page
) + offset
+ length
<= wr
->pos
) ^
540 (wr
->pos
+ wr
->len
<= page_offset(page
) + offset
))) {
542 printk("invalidate range offset %llu length %u\n",
543 page_offset(page
) + offset
, length
);
544 printk("write range offset %llu length %zu\n",
551 * Above there are returns where wr is freed or where we WARN.
552 * Thus the following runs if wr was modified above.
555 orangefs_launder_page(page
);
558 static int orangefs_releasepage(struct page
*page
, gfp_t foo
)
560 return !PagePrivate(page
);
563 static void orangefs_freepage(struct page
*page
)
565 if (PagePrivate(page
)) {
566 kfree((struct orangefs_write_range
*)page_private(page
));
567 set_page_private(page
, 0);
568 ClearPagePrivate(page
);
573 static int orangefs_launder_page(struct page
*page
)
576 struct writeback_control wbc
= {
577 .sync_mode
= WB_SYNC_ALL
,
580 wait_on_page_writeback(page
);
581 if (clear_page_dirty_for_io(page
)) {
582 r
= orangefs_writepage_locked(page
, &wbc
);
583 end_page_writeback(page
);
588 static ssize_t
orangefs_direct_IO(struct kiocb
*iocb
,
589 struct iov_iter
*iter
)
592 * Comment from original do_readv_writev:
593 * Common entry point for read/write/readv/writev
594 * This function will dispatch it to either the direct I/O
595 * or buffered I/O path depending on the mount options and/or
596 * augmented/extended metadata attached to the file.
597 * Note: File extended attributes override any mount options.
599 struct file
*file
= iocb
->ki_filp
;
600 loff_t pos
= iocb
->ki_pos
;
601 enum ORANGEFS_io_type type
= iov_iter_rw(iter
) == WRITE
?
602 ORANGEFS_IO_WRITE
: ORANGEFS_IO_READ
;
603 loff_t
*offset
= &pos
;
604 struct inode
*inode
= file
->f_mapping
->host
;
605 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
606 struct orangefs_khandle
*handle
= &orangefs_inode
->refn
.khandle
;
607 size_t count
= iov_iter_count(iter
);
608 ssize_t total_count
= 0;
609 ssize_t ret
= -EINVAL
;
612 gossip_debug(GOSSIP_FILE_DEBUG
,
613 "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n",
618 if (type
== ORANGEFS_IO_WRITE
) {
619 gossip_debug(GOSSIP_FILE_DEBUG
,
620 "%s(%pU): proceeding with offset : %llu, "
633 while (iov_iter_count(iter
)) {
634 size_t each_count
= iov_iter_count(iter
);
638 /* how much to transfer in this loop iteration */
639 if (each_count
> orangefs_bufmap_size_query())
640 each_count
= orangefs_bufmap_size_query();
642 gossip_debug(GOSSIP_FILE_DEBUG
,
643 "%s(%pU): size of each_count(%d)\n",
647 gossip_debug(GOSSIP_FILE_DEBUG
,
648 "%s(%pU): BEFORE wait_for_io: offset is %d\n",
653 ret
= wait_for_direct_io(type
, inode
, offset
, iter
,
654 each_count
, 0, NULL
, NULL
);
655 gossip_debug(GOSSIP_FILE_DEBUG
,
656 "%s(%pU): return from wait_for_io:%d\n",
668 gossip_debug(GOSSIP_FILE_DEBUG
,
669 "%s(%pU): AFTER wait_for_io: offset is %d\n",
675 * if we got a short I/O operations,
676 * fall out and return what we got so far
678 if (amt_complete
< each_count
)
686 if (type
== ORANGEFS_IO_READ
) {
689 file_update_time(file
);
690 if (*offset
> i_size_read(inode
))
691 i_size_write(inode
, *offset
);
695 gossip_debug(GOSSIP_FILE_DEBUG
,
696 "%s(%pU): Value(%d) returned.\n",
704 /** ORANGEFS2 implementation of address space operations */
705 static const struct address_space_operations orangefs_address_operations
= {
706 .writepage
= orangefs_writepage
,
707 .readpage
= orangefs_readpage
,
708 .writepages
= orangefs_writepages
,
709 .set_page_dirty
= __set_page_dirty_nobuffers
,
710 .write_begin
= orangefs_write_begin
,
711 .write_end
= orangefs_write_end
,
712 .invalidatepage
= orangefs_invalidatepage
,
713 .releasepage
= orangefs_releasepage
,
714 .freepage
= orangefs_freepage
,
715 .launder_page
= orangefs_launder_page
,
716 .direct_IO
= orangefs_direct_IO
,
719 vm_fault_t
orangefs_page_mkwrite(struct vm_fault
*vmf
)
721 struct page
*page
= vmf
->page
;
722 struct inode
*inode
= file_inode(vmf
->vma
->vm_file
);
723 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
724 unsigned long *bitlock
= &orangefs_inode
->bitlock
;
726 struct orangefs_write_range
*wr
;
728 sb_start_pagefault(inode
->i_sb
);
730 if (wait_on_bit(bitlock
, 1, TASK_KILLABLE
)) {
731 ret
= VM_FAULT_RETRY
;
736 if (PageDirty(page
) && !PagePrivate(page
)) {
738 * Should be impossible. If it happens, launder the page
739 * since we don't know what's dirty. This will WARN in
740 * orangefs_writepage_locked.
742 if (orangefs_launder_page(page
)) {
743 ret
= VM_FAULT_LOCKED
|VM_FAULT_RETRY
;
747 if (PagePrivate(page
)) {
748 wr
= (struct orangefs_write_range
*)page_private(page
);
749 if (uid_eq(wr
->uid
, current_fsuid()) &&
750 gid_eq(wr
->gid
, current_fsgid())) {
751 wr
->pos
= page_offset(page
);
755 if (orangefs_launder_page(page
)) {
756 ret
= VM_FAULT_LOCKED
|VM_FAULT_RETRY
;
761 wr
= kmalloc(sizeof *wr
, GFP_KERNEL
);
763 ret
= VM_FAULT_LOCKED
|VM_FAULT_RETRY
;
766 wr
->pos
= page_offset(page
);
768 wr
->uid
= current_fsuid();
769 wr
->gid
= current_fsgid();
770 SetPagePrivate(page
);
771 set_page_private(page
, (unsigned long)wr
);
775 file_update_time(vmf
->vma
->vm_file
);
776 if (page
->mapping
!= inode
->i_mapping
) {
778 ret
= VM_FAULT_LOCKED
|VM_FAULT_NOPAGE
;
783 * We mark the page dirty already here so that when freeze is in
784 * progress, we are guaranteed that writeback during freezing will
785 * see the dirty page and writeprotect it again.
787 set_page_dirty(page
);
788 wait_for_stable_page(page
);
789 ret
= VM_FAULT_LOCKED
;
791 sb_end_pagefault(inode
->i_sb
);
795 static int orangefs_setattr_size(struct inode
*inode
, struct iattr
*iattr
)
797 struct orangefs_inode_s
*orangefs_inode
= ORANGEFS_I(inode
);
798 struct orangefs_kernel_op_s
*new_op
;
802 gossip_debug(GOSSIP_INODE_DEBUG
,
803 "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n",
805 get_khandle_from_ino(inode
),
806 &orangefs_inode
->refn
.khandle
,
807 orangefs_inode
->refn
.fs_id
,
810 /* Ensure that we have a up to date size, so we know if it changed. */
811 ret
= orangefs_inode_getattr(inode
, ORANGEFS_GETATTR_SIZE
);
815 gossip_err("%s: orangefs_inode_getattr failed, ret:%d:.\n",
819 orig_size
= i_size_read(inode
);
821 /* This is truncate_setsize in a different order. */
822 truncate_pagecache(inode
, iattr
->ia_size
);
823 i_size_write(inode
, iattr
->ia_size
);
824 if (iattr
->ia_size
> orig_size
)
825 pagecache_isize_extended(inode
, orig_size
, iattr
->ia_size
);
827 new_op
= op_alloc(ORANGEFS_VFS_OP_TRUNCATE
);
831 new_op
->upcall
.req
.truncate
.refn
= orangefs_inode
->refn
;
832 new_op
->upcall
.req
.truncate
.size
= (__s64
) iattr
->ia_size
;
834 ret
= service_operation(new_op
,
836 get_interruptible_flag(inode
));
839 * the truncate has no downcall members to retrieve, but
840 * the status value tells us if it went through ok or not
842 gossip_debug(GOSSIP_INODE_DEBUG
, "%s: ret:%d:\n", __func__
, ret
);
849 if (orig_size
!= i_size_read(inode
))
850 iattr
->ia_valid
|= ATTR_CTIME
| ATTR_MTIME
;
855 int __orangefs_setattr(struct inode
*inode
, struct iattr
*iattr
)
859 if (iattr
->ia_valid
& ATTR_MODE
) {
860 if (iattr
->ia_mode
& (S_ISVTX
)) {
861 if (is_root_handle(inode
)) {
863 * allow sticky bit to be set on root (since
864 * it shows up that way by default anyhow),
865 * but don't show it to the server
867 iattr
->ia_mode
-= S_ISVTX
;
869 gossip_debug(GOSSIP_UTILS_DEBUG
,
870 "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
875 if (iattr
->ia_mode
& (S_ISUID
)) {
876 gossip_debug(GOSSIP_UTILS_DEBUG
,
877 "Attempting to set setuid bit (not supported); returning EINVAL.\n");
883 if (iattr
->ia_valid
& ATTR_SIZE
) {
884 ret
= orangefs_setattr_size(inode
, iattr
);
890 spin_lock(&inode
->i_lock
);
891 if (ORANGEFS_I(inode
)->attr_valid
) {
892 if (uid_eq(ORANGEFS_I(inode
)->attr_uid
, current_fsuid()) &&
893 gid_eq(ORANGEFS_I(inode
)->attr_gid
, current_fsgid())) {
894 ORANGEFS_I(inode
)->attr_valid
= iattr
->ia_valid
;
896 spin_unlock(&inode
->i_lock
);
897 write_inode_now(inode
, 1);
901 ORANGEFS_I(inode
)->attr_valid
= iattr
->ia_valid
;
902 ORANGEFS_I(inode
)->attr_uid
= current_fsuid();
903 ORANGEFS_I(inode
)->attr_gid
= current_fsgid();
905 setattr_copy(inode
, iattr
);
906 spin_unlock(&inode
->i_lock
);
907 mark_inode_dirty(inode
);
909 if (iattr
->ia_valid
& ATTR_MODE
)
910 /* change mod on a file that has ACLs */
911 ret
= posix_acl_chmod(inode
, inode
->i_mode
);
919 * Change attributes of an object referenced by dentry.
921 int orangefs_setattr(struct dentry
*dentry
, struct iattr
*iattr
)
924 gossip_debug(GOSSIP_INODE_DEBUG
, "__orangefs_setattr: called on %pd\n",
926 ret
= setattr_prepare(dentry
, iattr
);
929 ret
= __orangefs_setattr(d_inode(dentry
), iattr
);
930 sync_inode_metadata(d_inode(dentry
), 1);
932 gossip_debug(GOSSIP_INODE_DEBUG
, "orangefs_setattr: returning %d\n",
938 * Obtain attributes of an object given a dentry
940 int orangefs_getattr(const struct path
*path
, struct kstat
*stat
,
941 u32 request_mask
, unsigned int flags
)
944 struct inode
*inode
= path
->dentry
->d_inode
;
946 gossip_debug(GOSSIP_INODE_DEBUG
,
947 "orangefs_getattr: called on %pd mask %u\n",
948 path
->dentry
, request_mask
);
950 ret
= orangefs_inode_getattr(inode
,
951 request_mask
& STATX_SIZE
? ORANGEFS_GETATTR_SIZE
: 0);
953 generic_fillattr(inode
, stat
);
955 /* override block size reported to stat */
956 if (!(request_mask
& STATX_SIZE
))
957 stat
->result_mask
&= ~STATX_SIZE
;
959 stat
->attributes_mask
= STATX_ATTR_IMMUTABLE
|
961 if (inode
->i_flags
& S_IMMUTABLE
)
962 stat
->attributes
|= STATX_ATTR_IMMUTABLE
;
963 if (inode
->i_flags
& S_APPEND
)
964 stat
->attributes
|= STATX_ATTR_APPEND
;
969 int orangefs_permission(struct inode
*inode
, int mask
)
973 if (mask
& MAY_NOT_BLOCK
)
976 gossip_debug(GOSSIP_INODE_DEBUG
, "%s: refreshing\n", __func__
);
978 /* Make sure the permission (and other common attrs) are up to date. */
979 ret
= orangefs_inode_getattr(inode
, 0);
983 return generic_permission(inode
, mask
);
986 int orangefs_update_time(struct inode
*inode
, struct timespec64
*time
, int flags
)
989 gossip_debug(GOSSIP_INODE_DEBUG
, "orangefs_update_time: %pU\n",
990 get_khandle_from_ino(inode
));
991 generic_update_time(inode
, time
, flags
);
992 memset(&iattr
, 0, sizeof iattr
);
994 iattr
.ia_valid
|= ATTR_ATIME
;
996 iattr
.ia_valid
|= ATTR_CTIME
;
998 iattr
.ia_valid
|= ATTR_MTIME
;
999 return __orangefs_setattr(inode
, &iattr
);
1002 /* ORANGEFS2 implementation of VFS inode operations for files */
1003 static const struct inode_operations orangefs_file_inode_operations
= {
1004 .get_acl
= orangefs_get_acl
,
1005 .set_acl
= orangefs_set_acl
,
1006 .setattr
= orangefs_setattr
,
1007 .getattr
= orangefs_getattr
,
1008 .listxattr
= orangefs_listxattr
,
1009 .permission
= orangefs_permission
,
1010 .update_time
= orangefs_update_time
,
1013 static int orangefs_init_iops(struct inode
*inode
)
1015 inode
->i_mapping
->a_ops
= &orangefs_address_operations
;
1017 switch (inode
->i_mode
& S_IFMT
) {
1019 inode
->i_op
= &orangefs_file_inode_operations
;
1020 inode
->i_fop
= &orangefs_file_operations
;
1023 inode
->i_op
= &orangefs_symlink_inode_operations
;
1026 inode
->i_op
= &orangefs_dir_inode_operations
;
1027 inode
->i_fop
= &orangefs_dir_operations
;
1030 gossip_debug(GOSSIP_INODE_DEBUG
,
1031 "%s: unsupported mode\n",
1040 * Given an ORANGEFS object identifier (fsid, handle), convert it into
1041 * a ino_t type that will be used as a hash-index from where the handle will
1042 * be searched for in the VFS hash table of inodes.
1044 static inline ino_t
orangefs_handle_hash(struct orangefs_object_kref
*ref
)
1048 return orangefs_khandle_to_ino(&(ref
->khandle
));
1052 * Called to set up an inode from iget5_locked.
1054 static int orangefs_set_inode(struct inode
*inode
, void *data
)
1056 struct orangefs_object_kref
*ref
= (struct orangefs_object_kref
*) data
;
1057 ORANGEFS_I(inode
)->refn
.fs_id
= ref
->fs_id
;
1058 ORANGEFS_I(inode
)->refn
.khandle
= ref
->khandle
;
1059 ORANGEFS_I(inode
)->attr_valid
= 0;
1060 hash_init(ORANGEFS_I(inode
)->xattr_cache
);
1061 ORANGEFS_I(inode
)->mapping_time
= jiffies
- 1;
1062 ORANGEFS_I(inode
)->bitlock
= 0;
1067 * Called to determine if handles match.
1069 static int orangefs_test_inode(struct inode
*inode
, void *data
)
1071 struct orangefs_object_kref
*ref
= (struct orangefs_object_kref
*) data
;
1072 struct orangefs_inode_s
*orangefs_inode
= NULL
;
1074 orangefs_inode
= ORANGEFS_I(inode
);
1075 /* test handles and fs_ids... */
1076 return (!ORANGEFS_khandle_cmp(&(orangefs_inode
->refn
.khandle
),
1078 orangefs_inode
->refn
.fs_id
== ref
->fs_id
);
1082 * Front-end to lookup the inode-cache maintained by the VFS using the ORANGEFS
1085 * @sb: the file system super block instance.
1086 * @ref: The ORANGEFS object for which we are trying to locate an inode.
1088 struct inode
*orangefs_iget(struct super_block
*sb
,
1089 struct orangefs_object_kref
*ref
)
1091 struct inode
*inode
= NULL
;
1095 hash
= orangefs_handle_hash(ref
);
1096 inode
= iget5_locked(sb
,
1098 orangefs_test_inode
,
1103 return ERR_PTR(-ENOMEM
);
1105 if (!(inode
->i_state
& I_NEW
))
1108 error
= orangefs_inode_getattr(inode
, ORANGEFS_GETATTR_NEW
);
1111 return ERR_PTR(error
);
1114 inode
->i_ino
= hash
; /* needed for stat etc */
1115 orangefs_init_iops(inode
);
1116 unlock_new_inode(inode
);
1118 gossip_debug(GOSSIP_INODE_DEBUG
,
1119 "iget handle %pU, fsid %d hash %ld i_ino %lu\n",
1129 * Allocate an inode for a newly created file and insert it into the inode hash.
1131 struct inode
*orangefs_new_inode(struct super_block
*sb
, struct inode
*dir
,
1132 int mode
, dev_t dev
, struct orangefs_object_kref
*ref
)
1134 unsigned long hash
= orangefs_handle_hash(ref
);
1135 struct inode
*inode
;
1138 gossip_debug(GOSSIP_INODE_DEBUG
,
1139 "%s:(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n",
1146 inode
= new_inode(sb
);
1148 return ERR_PTR(-ENOMEM
);
1150 orangefs_set_inode(inode
, ref
);
1151 inode
->i_ino
= hash
; /* needed for stat etc */
1153 error
= orangefs_inode_getattr(inode
, ORANGEFS_GETATTR_NEW
);
1157 orangefs_init_iops(inode
);
1158 inode
->i_rdev
= dev
;
1160 error
= insert_inode_locked4(inode
, hash
, orangefs_test_inode
, ref
);
1164 gossip_debug(GOSSIP_INODE_DEBUG
,
1165 "Initializing ACL's for inode %pU\n",
1166 get_khandle_from_ino(inode
));
1167 orangefs_init_acl(inode
, dir
);
1172 return ERR_PTR(error
);