2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
5 This program can be distributed under the terms of the GNU GPL.
11 #include <linux/pagemap.h>
12 #include <linux/slab.h>
13 #include <linux/file.h>
14 #include <linux/seq_file.h>
15 #include <linux/init.h>
16 #include <linux/module.h>
17 #include <linux/moduleparam.h>
18 #include <linux/fs_context.h>
19 #include <linux/fs_parser.h>
20 #include <linux/statfs.h>
21 #include <linux/random.h>
22 #include <linux/sched.h>
23 #include <linux/exportfs.h>
24 #include <linux/posix_acl.h>
25 #include <linux/pid_namespace.h>
27 MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
28 MODULE_DESCRIPTION("Filesystem in Userspace");
29 MODULE_LICENSE("GPL");
31 static struct kmem_cache
*fuse_inode_cachep
;
32 struct list_head fuse_conn_list
;
33 DEFINE_MUTEX(fuse_mutex
);
35 static int set_global_limit(const char *val
, const struct kernel_param
*kp
);
37 unsigned max_user_bgreq
;
38 module_param_call(max_user_bgreq
, set_global_limit
, param_get_uint
,
39 &max_user_bgreq
, 0644);
40 __MODULE_PARM_TYPE(max_user_bgreq
, "uint");
41 MODULE_PARM_DESC(max_user_bgreq
,
42 "Global limit for the maximum number of backgrounded requests an "
43 "unprivileged user can set");
45 unsigned max_user_congthresh
;
46 module_param_call(max_user_congthresh
, set_global_limit
, param_get_uint
,
47 &max_user_congthresh
, 0644);
48 __MODULE_PARM_TYPE(max_user_congthresh
, "uint");
49 MODULE_PARM_DESC(max_user_congthresh
,
50 "Global limit for the maximum congestion threshold an "
51 "unprivileged user can set");
53 #define FUSE_SUPER_MAGIC 0x65735546
55 #define FUSE_DEFAULT_BLKSIZE 512
57 /** Maximum number of outstanding background requests */
58 #define FUSE_DEFAULT_MAX_BACKGROUND 12
60 /** Congestion starts at 75% of maximum */
61 #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)
64 static struct file_system_type fuseblk_fs_type
;
67 struct fuse_forget_link
*fuse_alloc_forget(void)
69 return kzalloc(sizeof(struct fuse_forget_link
), GFP_KERNEL_ACCOUNT
);
72 static struct inode
*fuse_alloc_inode(struct super_block
*sb
)
74 struct fuse_inode
*fi
;
76 fi
= kmem_cache_alloc(fuse_inode_cachep
, GFP_KERNEL
);
87 mutex_init(&fi
->mutex
);
88 spin_lock_init(&fi
->lock
);
89 fi
->forget
= fuse_alloc_forget();
91 kmem_cache_free(fuse_inode_cachep
, fi
);
98 static void fuse_free_inode(struct inode
*inode
)
100 struct fuse_inode
*fi
= get_fuse_inode(inode
);
102 mutex_destroy(&fi
->mutex
);
104 kmem_cache_free(fuse_inode_cachep
, fi
);
107 static void fuse_evict_inode(struct inode
*inode
)
109 struct fuse_inode
*fi
= get_fuse_inode(inode
);
111 truncate_inode_pages_final(&inode
->i_data
);
113 if (inode
->i_sb
->s_flags
& SB_ACTIVE
) {
114 struct fuse_conn
*fc
= get_fuse_conn(inode
);
115 fuse_queue_forget(fc
, fi
->forget
, fi
->nodeid
, fi
->nlookup
);
118 if (S_ISREG(inode
->i_mode
) && !is_bad_inode(inode
)) {
119 WARN_ON(!list_empty(&fi
->write_files
));
120 WARN_ON(!list_empty(&fi
->queued_writes
));
124 static int fuse_remount_fs(struct super_block
*sb
, int *flags
, char *data
)
127 if (*flags
& SB_MANDLOCK
)
134 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
135 * so that it will fit.
137 static ino_t
fuse_squash_ino(u64 ino64
)
139 ino_t ino
= (ino_t
) ino64
;
140 if (sizeof(ino_t
) < sizeof(u64
))
141 ino
^= ino64
>> (sizeof(u64
) - sizeof(ino_t
)) * 8;
145 void fuse_change_attributes_common(struct inode
*inode
, struct fuse_attr
*attr
,
148 struct fuse_conn
*fc
= get_fuse_conn(inode
);
149 struct fuse_inode
*fi
= get_fuse_inode(inode
);
151 lockdep_assert_held(&fi
->lock
);
153 fi
->attr_version
= atomic64_inc_return(&fc
->attr_version
);
154 fi
->i_time
= attr_valid
;
155 WRITE_ONCE(fi
->inval_mask
, 0);
157 inode
->i_ino
= fuse_squash_ino(attr
->ino
);
158 inode
->i_mode
= (inode
->i_mode
& S_IFMT
) | (attr
->mode
& 07777);
159 set_nlink(inode
, attr
->nlink
);
160 inode
->i_uid
= make_kuid(fc
->user_ns
, attr
->uid
);
161 inode
->i_gid
= make_kgid(fc
->user_ns
, attr
->gid
);
162 inode
->i_blocks
= attr
->blocks
;
163 inode
->i_atime
.tv_sec
= attr
->atime
;
164 inode
->i_atime
.tv_nsec
= attr
->atimensec
;
165 /* mtime from server may be stale due to local buffered write */
166 if (!fc
->writeback_cache
|| !S_ISREG(inode
->i_mode
)) {
167 inode
->i_mtime
.tv_sec
= attr
->mtime
;
168 inode
->i_mtime
.tv_nsec
= attr
->mtimensec
;
169 inode
->i_ctime
.tv_sec
= attr
->ctime
;
170 inode
->i_ctime
.tv_nsec
= attr
->ctimensec
;
173 if (attr
->blksize
!= 0)
174 inode
->i_blkbits
= ilog2(attr
->blksize
);
176 inode
->i_blkbits
= inode
->i_sb
->s_blocksize_bits
;
179 * Don't set the sticky bit in i_mode, unless we want the VFS
180 * to check permissions. This prevents failures due to the
181 * check in may_delete().
183 fi
->orig_i_mode
= inode
->i_mode
;
184 if (!fc
->default_permissions
)
185 inode
->i_mode
&= ~S_ISVTX
;
187 fi
->orig_ino
= attr
->ino
;
190 void fuse_change_attributes(struct inode
*inode
, struct fuse_attr
*attr
,
191 u64 attr_valid
, u64 attr_version
)
193 struct fuse_conn
*fc
= get_fuse_conn(inode
);
194 struct fuse_inode
*fi
= get_fuse_inode(inode
);
195 bool is_wb
= fc
->writeback_cache
;
197 struct timespec64 old_mtime
;
199 spin_lock(&fi
->lock
);
200 if ((attr_version
!= 0 && fi
->attr_version
> attr_version
) ||
201 test_bit(FUSE_I_SIZE_UNSTABLE
, &fi
->state
)) {
202 spin_unlock(&fi
->lock
);
206 old_mtime
= inode
->i_mtime
;
207 fuse_change_attributes_common(inode
, attr
, attr_valid
);
209 oldsize
= inode
->i_size
;
211 * In case of writeback_cache enabled, the cached writes beyond EOF
212 * extend local i_size without keeping userspace server in sync. So,
213 * attr->size coming from server can be stale. We cannot trust it.
215 if (!is_wb
|| !S_ISREG(inode
->i_mode
))
216 i_size_write(inode
, attr
->size
);
217 spin_unlock(&fi
->lock
);
219 if (!is_wb
&& S_ISREG(inode
->i_mode
)) {
222 if (oldsize
!= attr
->size
) {
223 truncate_pagecache(inode
, attr
->size
);
224 if (!fc
->explicit_inval_data
)
226 } else if (fc
->auto_inval_data
) {
227 struct timespec64 new_mtime
= {
228 .tv_sec
= attr
->mtime
,
229 .tv_nsec
= attr
->mtimensec
,
233 * Auto inval mode also checks and invalidates if mtime
236 if (!timespec64_equal(&old_mtime
, &new_mtime
))
241 invalidate_inode_pages2(inode
->i_mapping
);
245 static void fuse_init_inode(struct inode
*inode
, struct fuse_attr
*attr
)
247 inode
->i_mode
= attr
->mode
& S_IFMT
;
248 inode
->i_size
= attr
->size
;
249 inode
->i_mtime
.tv_sec
= attr
->mtime
;
250 inode
->i_mtime
.tv_nsec
= attr
->mtimensec
;
251 inode
->i_ctime
.tv_sec
= attr
->ctime
;
252 inode
->i_ctime
.tv_nsec
= attr
->ctimensec
;
253 if (S_ISREG(inode
->i_mode
)) {
254 fuse_init_common(inode
);
255 fuse_init_file_inode(inode
);
256 } else if (S_ISDIR(inode
->i_mode
))
257 fuse_init_dir(inode
);
258 else if (S_ISLNK(inode
->i_mode
))
259 fuse_init_symlink(inode
);
260 else if (S_ISCHR(inode
->i_mode
) || S_ISBLK(inode
->i_mode
) ||
261 S_ISFIFO(inode
->i_mode
) || S_ISSOCK(inode
->i_mode
)) {
262 fuse_init_common(inode
);
263 init_special_inode(inode
, inode
->i_mode
,
264 new_decode_dev(attr
->rdev
));
269 int fuse_inode_eq(struct inode
*inode
, void *_nodeidp
)
271 u64 nodeid
= *(u64
*) _nodeidp
;
272 if (get_node_id(inode
) == nodeid
)
278 static int fuse_inode_set(struct inode
*inode
, void *_nodeidp
)
280 u64 nodeid
= *(u64
*) _nodeidp
;
281 get_fuse_inode(inode
)->nodeid
= nodeid
;
285 struct inode
*fuse_iget(struct super_block
*sb
, u64 nodeid
,
286 int generation
, struct fuse_attr
*attr
,
287 u64 attr_valid
, u64 attr_version
)
290 struct fuse_inode
*fi
;
291 struct fuse_conn
*fc
= get_fuse_conn_super(sb
);
294 inode
= iget5_locked(sb
, nodeid
, fuse_inode_eq
, fuse_inode_set
, &nodeid
);
298 if ((inode
->i_state
& I_NEW
)) {
299 inode
->i_flags
|= S_NOATIME
;
300 if (!fc
->writeback_cache
|| !S_ISREG(attr
->mode
))
301 inode
->i_flags
|= S_NOCMTIME
;
302 inode
->i_generation
= generation
;
303 fuse_init_inode(inode
, attr
);
304 unlock_new_inode(inode
);
305 } else if ((inode
->i_mode
^ attr
->mode
) & S_IFMT
) {
306 /* Inode has changed type, any I/O on the old should fail */
307 make_bad_inode(inode
);
312 fi
= get_fuse_inode(inode
);
313 spin_lock(&fi
->lock
);
315 spin_unlock(&fi
->lock
);
316 fuse_change_attributes(inode
, attr
, attr_valid
, attr_version
);
321 int fuse_reverse_inval_inode(struct super_block
*sb
, u64 nodeid
,
322 loff_t offset
, loff_t len
)
328 inode
= ilookup5(sb
, nodeid
, fuse_inode_eq
, &nodeid
);
332 fuse_invalidate_attr(inode
);
333 forget_all_cached_acls(inode
);
335 pg_start
= offset
>> PAGE_SHIFT
;
339 pg_end
= (offset
+ len
- 1) >> PAGE_SHIFT
;
340 invalidate_inode_pages2_range(inode
->i_mapping
,
347 bool fuse_lock_inode(struct inode
*inode
)
351 if (!get_fuse_conn(inode
)->parallel_dirops
) {
352 mutex_lock(&get_fuse_inode(inode
)->mutex
);
359 void fuse_unlock_inode(struct inode
*inode
, bool locked
)
362 mutex_unlock(&get_fuse_inode(inode
)->mutex
);
365 static void fuse_umount_begin(struct super_block
*sb
)
367 struct fuse_conn
*fc
= get_fuse_conn_super(sb
);
369 if (!fc
->no_force_umount
)
373 static void fuse_send_destroy(struct fuse_conn
*fc
)
378 args
.opcode
= FUSE_DESTROY
;
381 fuse_simple_request(fc
, &args
);
385 static void fuse_put_super(struct super_block
*sb
)
387 struct fuse_conn
*fc
= get_fuse_conn_super(sb
);
389 mutex_lock(&fuse_mutex
);
390 list_del(&fc
->entry
);
391 fuse_ctl_remove_conn(fc
);
392 mutex_unlock(&fuse_mutex
);
397 static void convert_fuse_statfs(struct kstatfs
*stbuf
, struct fuse_kstatfs
*attr
)
399 stbuf
->f_type
= FUSE_SUPER_MAGIC
;
400 stbuf
->f_bsize
= attr
->bsize
;
401 stbuf
->f_frsize
= attr
->frsize
;
402 stbuf
->f_blocks
= attr
->blocks
;
403 stbuf
->f_bfree
= attr
->bfree
;
404 stbuf
->f_bavail
= attr
->bavail
;
405 stbuf
->f_files
= attr
->files
;
406 stbuf
->f_ffree
= attr
->ffree
;
407 stbuf
->f_namelen
= attr
->namelen
;
408 /* fsid is left zero */
411 static int fuse_statfs(struct dentry
*dentry
, struct kstatfs
*buf
)
413 struct super_block
*sb
= dentry
->d_sb
;
414 struct fuse_conn
*fc
= get_fuse_conn_super(sb
);
416 struct fuse_statfs_out outarg
;
419 if (!fuse_allow_current_process(fc
)) {
420 buf
->f_type
= FUSE_SUPER_MAGIC
;
424 memset(&outarg
, 0, sizeof(outarg
));
426 args
.opcode
= FUSE_STATFS
;
427 args
.nodeid
= get_node_id(d_inode(dentry
));
428 args
.out_numargs
= 1;
429 args
.out_args
[0].size
= sizeof(outarg
);
430 args
.out_args
[0].value
= &outarg
;
431 err
= fuse_simple_request(fc
, &args
);
433 convert_fuse_statfs(buf
, &outarg
.st
);
444 OPT_DEFAULT_PERMISSIONS
,
451 static const struct fs_parameter_spec fuse_fs_parameters
[] = {
452 fsparam_string ("source", OPT_SOURCE
),
453 fsparam_u32 ("fd", OPT_FD
),
454 fsparam_u32oct ("rootmode", OPT_ROOTMODE
),
455 fsparam_u32 ("user_id", OPT_USER_ID
),
456 fsparam_u32 ("group_id", OPT_GROUP_ID
),
457 fsparam_flag ("default_permissions", OPT_DEFAULT_PERMISSIONS
),
458 fsparam_flag ("allow_other", OPT_ALLOW_OTHER
),
459 fsparam_u32 ("max_read", OPT_MAX_READ
),
460 fsparam_u32 ("blksize", OPT_BLKSIZE
),
461 fsparam_string ("subtype", OPT_SUBTYPE
),
465 static int fuse_parse_param(struct fs_context
*fc
, struct fs_parameter
*param
)
467 struct fs_parse_result result
;
468 struct fuse_fs_context
*ctx
= fc
->fs_private
;
471 opt
= fs_parse(fc
, fuse_fs_parameters
, param
, &result
);
478 return invalfc(fc
, "Multiple sources specified");
479 fc
->source
= param
->string
;
480 param
->string
= NULL
;
485 return invalfc(fc
, "Multiple subtypes specified");
486 ctx
->subtype
= param
->string
;
487 param
->string
= NULL
;
491 ctx
->fd
= result
.uint_32
;
492 ctx
->fd_present
= true;
496 if (!fuse_valid_type(result
.uint_32
))
497 return invalfc(fc
, "Invalid rootmode");
498 ctx
->rootmode
= result
.uint_32
;
499 ctx
->rootmode_present
= true;
503 ctx
->user_id
= make_kuid(fc
->user_ns
, result
.uint_32
);
504 if (!uid_valid(ctx
->user_id
))
505 return invalfc(fc
, "Invalid user_id");
506 ctx
->user_id_present
= true;
510 ctx
->group_id
= make_kgid(fc
->user_ns
, result
.uint_32
);
511 if (!gid_valid(ctx
->group_id
))
512 return invalfc(fc
, "Invalid group_id");
513 ctx
->group_id_present
= true;
516 case OPT_DEFAULT_PERMISSIONS
:
517 ctx
->default_permissions
= true;
520 case OPT_ALLOW_OTHER
:
521 ctx
->allow_other
= true;
525 ctx
->max_read
= result
.uint_32
;
530 return invalfc(fc
, "blksize only supported for fuseblk");
531 ctx
->blksize
= result
.uint_32
;
541 static void fuse_free_fc(struct fs_context
*fc
)
543 struct fuse_fs_context
*ctx
= fc
->fs_private
;
551 static int fuse_show_options(struct seq_file
*m
, struct dentry
*root
)
553 struct super_block
*sb
= root
->d_sb
;
554 struct fuse_conn
*fc
= get_fuse_conn_super(sb
);
556 if (fc
->no_mount_options
)
559 seq_printf(m
, ",user_id=%u", from_kuid_munged(fc
->user_ns
, fc
->user_id
));
560 seq_printf(m
, ",group_id=%u", from_kgid_munged(fc
->user_ns
, fc
->group_id
));
561 if (fc
->default_permissions
)
562 seq_puts(m
, ",default_permissions");
564 seq_puts(m
, ",allow_other");
565 if (fc
->max_read
!= ~0)
566 seq_printf(m
, ",max_read=%u", fc
->max_read
);
567 if (sb
->s_bdev
&& sb
->s_blocksize
!= FUSE_DEFAULT_BLKSIZE
)
568 seq_printf(m
, ",blksize=%lu", sb
->s_blocksize
);
572 static void fuse_iqueue_init(struct fuse_iqueue
*fiq
,
573 const struct fuse_iqueue_ops
*ops
,
576 memset(fiq
, 0, sizeof(struct fuse_iqueue
));
577 spin_lock_init(&fiq
->lock
);
578 init_waitqueue_head(&fiq
->waitq
);
579 INIT_LIST_HEAD(&fiq
->pending
);
580 INIT_LIST_HEAD(&fiq
->interrupts
);
581 fiq
->forget_list_tail
= &fiq
->forget_list_head
;
587 static void fuse_pqueue_init(struct fuse_pqueue
*fpq
)
591 spin_lock_init(&fpq
->lock
);
592 for (i
= 0; i
< FUSE_PQ_HASH_SIZE
; i
++)
593 INIT_LIST_HEAD(&fpq
->processing
[i
]);
594 INIT_LIST_HEAD(&fpq
->io
);
598 void fuse_conn_init(struct fuse_conn
*fc
, struct user_namespace
*user_ns
,
599 const struct fuse_iqueue_ops
*fiq_ops
, void *fiq_priv
)
601 memset(fc
, 0, sizeof(*fc
));
602 spin_lock_init(&fc
->lock
);
603 spin_lock_init(&fc
->bg_lock
);
604 init_rwsem(&fc
->killsb
);
605 refcount_set(&fc
->count
, 1);
606 atomic_set(&fc
->dev_count
, 1);
607 init_waitqueue_head(&fc
->blocked_waitq
);
608 fuse_iqueue_init(&fc
->iq
, fiq_ops
, fiq_priv
);
609 INIT_LIST_HEAD(&fc
->bg_queue
);
610 INIT_LIST_HEAD(&fc
->entry
);
611 INIT_LIST_HEAD(&fc
->devices
);
612 atomic_set(&fc
->num_waiting
, 0);
613 fc
->max_background
= FUSE_DEFAULT_MAX_BACKGROUND
;
614 fc
->congestion_threshold
= FUSE_DEFAULT_CONGESTION_THRESHOLD
;
615 atomic64_set(&fc
->khctr
, 0);
616 fc
->polled_files
= RB_ROOT
;
620 atomic64_set(&fc
->attr_version
, 1);
621 get_random_bytes(&fc
->scramble_key
, sizeof(fc
->scramble_key
));
622 fc
->pid_ns
= get_pid_ns(task_active_pid_ns(current
));
623 fc
->user_ns
= get_user_ns(user_ns
);
624 fc
->max_pages
= FUSE_DEFAULT_MAX_PAGES_PER_REQ
;
626 EXPORT_SYMBOL_GPL(fuse_conn_init
);
628 void fuse_conn_put(struct fuse_conn
*fc
)
630 if (refcount_dec_and_test(&fc
->count
)) {
631 struct fuse_iqueue
*fiq
= &fc
->iq
;
633 if (fiq
->ops
->release
)
634 fiq
->ops
->release(fiq
);
635 put_pid_ns(fc
->pid_ns
);
636 put_user_ns(fc
->user_ns
);
640 EXPORT_SYMBOL_GPL(fuse_conn_put
);
642 struct fuse_conn
*fuse_conn_get(struct fuse_conn
*fc
)
644 refcount_inc(&fc
->count
);
647 EXPORT_SYMBOL_GPL(fuse_conn_get
);
649 static struct inode
*fuse_get_root_inode(struct super_block
*sb
, unsigned mode
)
651 struct fuse_attr attr
;
652 memset(&attr
, 0, sizeof(attr
));
655 attr
.ino
= FUSE_ROOT_ID
;
657 return fuse_iget(sb
, 1, 0, &attr
, 0, 0);
660 struct fuse_inode_handle
{
665 static struct dentry
*fuse_get_dentry(struct super_block
*sb
,
666 struct fuse_inode_handle
*handle
)
668 struct fuse_conn
*fc
= get_fuse_conn_super(sb
);
670 struct dentry
*entry
;
673 if (handle
->nodeid
== 0)
676 inode
= ilookup5(sb
, handle
->nodeid
, fuse_inode_eq
, &handle
->nodeid
);
678 struct fuse_entry_out outarg
;
679 const struct qstr name
= QSTR_INIT(".", 1);
681 if (!fc
->export_support
)
684 err
= fuse_lookup_name(sb
, handle
->nodeid
, &name
, &outarg
,
686 if (err
&& err
!= -ENOENT
)
693 if (get_node_id(inode
) != handle
->nodeid
)
697 if (inode
->i_generation
!= handle
->generation
)
700 entry
= d_obtain_alias(inode
);
701 if (!IS_ERR(entry
) && get_node_id(inode
) != FUSE_ROOT_ID
)
702 fuse_invalidate_entry_cache(entry
);
712 static int fuse_encode_fh(struct inode
*inode
, u32
*fh
, int *max_len
,
713 struct inode
*parent
)
715 int len
= parent
? 6 : 3;
719 if (*max_len
< len
) {
721 return FILEID_INVALID
;
724 nodeid
= get_fuse_inode(inode
)->nodeid
;
725 generation
= inode
->i_generation
;
727 fh
[0] = (u32
)(nodeid
>> 32);
728 fh
[1] = (u32
)(nodeid
& 0xffffffff);
732 nodeid
= get_fuse_inode(parent
)->nodeid
;
733 generation
= parent
->i_generation
;
735 fh
[3] = (u32
)(nodeid
>> 32);
736 fh
[4] = (u32
)(nodeid
& 0xffffffff);
741 return parent
? 0x82 : 0x81;
744 static struct dentry
*fuse_fh_to_dentry(struct super_block
*sb
,
745 struct fid
*fid
, int fh_len
, int fh_type
)
747 struct fuse_inode_handle handle
;
749 if ((fh_type
!= 0x81 && fh_type
!= 0x82) || fh_len
< 3)
752 handle
.nodeid
= (u64
) fid
->raw
[0] << 32;
753 handle
.nodeid
|= (u64
) fid
->raw
[1];
754 handle
.generation
= fid
->raw
[2];
755 return fuse_get_dentry(sb
, &handle
);
758 static struct dentry
*fuse_fh_to_parent(struct super_block
*sb
,
759 struct fid
*fid
, int fh_len
, int fh_type
)
761 struct fuse_inode_handle parent
;
763 if (fh_type
!= 0x82 || fh_len
< 6)
766 parent
.nodeid
= (u64
) fid
->raw
[3] << 32;
767 parent
.nodeid
|= (u64
) fid
->raw
[4];
768 parent
.generation
= fid
->raw
[5];
769 return fuse_get_dentry(sb
, &parent
);
772 static struct dentry
*fuse_get_parent(struct dentry
*child
)
774 struct inode
*child_inode
= d_inode(child
);
775 struct fuse_conn
*fc
= get_fuse_conn(child_inode
);
777 struct dentry
*parent
;
778 struct fuse_entry_out outarg
;
779 const struct qstr name
= QSTR_INIT("..", 2);
782 if (!fc
->export_support
)
783 return ERR_PTR(-ESTALE
);
785 err
= fuse_lookup_name(child_inode
->i_sb
, get_node_id(child_inode
),
786 &name
, &outarg
, &inode
);
789 return ERR_PTR(-ESTALE
);
793 parent
= d_obtain_alias(inode
);
794 if (!IS_ERR(parent
) && get_node_id(inode
) != FUSE_ROOT_ID
)
795 fuse_invalidate_entry_cache(parent
);
800 static const struct export_operations fuse_export_operations
= {
801 .fh_to_dentry
= fuse_fh_to_dentry
,
802 .fh_to_parent
= fuse_fh_to_parent
,
803 .encode_fh
= fuse_encode_fh
,
804 .get_parent
= fuse_get_parent
,
807 static const struct super_operations fuse_super_operations
= {
808 .alloc_inode
= fuse_alloc_inode
,
809 .free_inode
= fuse_free_inode
,
810 .evict_inode
= fuse_evict_inode
,
811 .write_inode
= fuse_write_inode
,
812 .drop_inode
= generic_delete_inode
,
813 .remount_fs
= fuse_remount_fs
,
814 .put_super
= fuse_put_super
,
815 .umount_begin
= fuse_umount_begin
,
816 .statfs
= fuse_statfs
,
817 .show_options
= fuse_show_options
,
820 static void sanitize_global_limit(unsigned *limit
)
823 * The default maximum number of async requests is calculated to consume
824 * 1/2^13 of the total memory, assuming 392 bytes per request.
827 *limit
= ((totalram_pages() << PAGE_SHIFT
) >> 13) / 392;
829 if (*limit
>= 1 << 16)
830 *limit
= (1 << 16) - 1;
833 static int set_global_limit(const char *val
, const struct kernel_param
*kp
)
837 rv
= param_set_uint(val
, kp
);
841 sanitize_global_limit((unsigned *)kp
->arg
);
846 static void process_init_limits(struct fuse_conn
*fc
, struct fuse_init_out
*arg
)
848 int cap_sys_admin
= capable(CAP_SYS_ADMIN
);
853 sanitize_global_limit(&max_user_bgreq
);
854 sanitize_global_limit(&max_user_congthresh
);
856 spin_lock(&fc
->bg_lock
);
857 if (arg
->max_background
) {
858 fc
->max_background
= arg
->max_background
;
860 if (!cap_sys_admin
&& fc
->max_background
> max_user_bgreq
)
861 fc
->max_background
= max_user_bgreq
;
863 if (arg
->congestion_threshold
) {
864 fc
->congestion_threshold
= arg
->congestion_threshold
;
866 if (!cap_sys_admin
&&
867 fc
->congestion_threshold
> max_user_congthresh
)
868 fc
->congestion_threshold
= max_user_congthresh
;
870 spin_unlock(&fc
->bg_lock
);
873 struct fuse_init_args
{
874 struct fuse_args args
;
875 struct fuse_init_in in
;
876 struct fuse_init_out out
;
879 static void process_init_reply(struct fuse_conn
*fc
, struct fuse_args
*args
,
882 struct fuse_init_args
*ia
= container_of(args
, typeof(*ia
), args
);
883 struct fuse_init_out
*arg
= &ia
->out
;
885 if (error
|| arg
->major
!= FUSE_KERNEL_VERSION
)
888 unsigned long ra_pages
;
890 process_init_limits(fc
, arg
);
892 if (arg
->minor
>= 6) {
893 ra_pages
= arg
->max_readahead
/ PAGE_SIZE
;
894 if (arg
->flags
& FUSE_ASYNC_READ
)
896 if (!(arg
->flags
& FUSE_POSIX_LOCKS
))
898 if (arg
->minor
>= 17) {
899 if (!(arg
->flags
& FUSE_FLOCK_LOCKS
))
902 if (!(arg
->flags
& FUSE_POSIX_LOCKS
))
905 if (arg
->flags
& FUSE_ATOMIC_O_TRUNC
)
906 fc
->atomic_o_trunc
= 1;
907 if (arg
->minor
>= 9) {
908 /* LOOKUP has dependency on proto version */
909 if (arg
->flags
& FUSE_EXPORT_SUPPORT
)
910 fc
->export_support
= 1;
912 if (arg
->flags
& FUSE_BIG_WRITES
)
914 if (arg
->flags
& FUSE_DONT_MASK
)
916 if (arg
->flags
& FUSE_AUTO_INVAL_DATA
)
917 fc
->auto_inval_data
= 1;
918 else if (arg
->flags
& FUSE_EXPLICIT_INVAL_DATA
)
919 fc
->explicit_inval_data
= 1;
920 if (arg
->flags
& FUSE_DO_READDIRPLUS
) {
921 fc
->do_readdirplus
= 1;
922 if (arg
->flags
& FUSE_READDIRPLUS_AUTO
)
923 fc
->readdirplus_auto
= 1;
925 if (arg
->flags
& FUSE_ASYNC_DIO
)
927 if (arg
->flags
& FUSE_WRITEBACK_CACHE
)
928 fc
->writeback_cache
= 1;
929 if (arg
->flags
& FUSE_PARALLEL_DIROPS
)
930 fc
->parallel_dirops
= 1;
931 if (arg
->flags
& FUSE_HANDLE_KILLPRIV
)
932 fc
->handle_killpriv
= 1;
933 if (arg
->time_gran
&& arg
->time_gran
<= 1000000000)
934 fc
->sb
->s_time_gran
= arg
->time_gran
;
935 if ((arg
->flags
& FUSE_POSIX_ACL
)) {
936 fc
->default_permissions
= 1;
938 fc
->sb
->s_xattr
= fuse_acl_xattr_handlers
;
940 if (arg
->flags
& FUSE_CACHE_SYMLINKS
)
941 fc
->cache_symlinks
= 1;
942 if (arg
->flags
& FUSE_ABORT_ERROR
)
944 if (arg
->flags
& FUSE_MAX_PAGES
) {
946 min_t(unsigned int, FUSE_MAX_MAX_PAGES
,
947 max_t(unsigned int, arg
->max_pages
, 1));
950 ra_pages
= fc
->max_read
/ PAGE_SIZE
;
955 fc
->sb
->s_bdi
->ra_pages
=
956 min(fc
->sb
->s_bdi
->ra_pages
, ra_pages
);
957 fc
->minor
= arg
->minor
;
958 fc
->max_write
= arg
->minor
< 5 ? 4096 : arg
->max_write
;
959 fc
->max_write
= max_t(unsigned, 4096, fc
->max_write
);
964 fuse_set_initialized(fc
);
965 wake_up_all(&fc
->blocked_waitq
);
968 void fuse_send_init(struct fuse_conn
*fc
)
970 struct fuse_init_args
*ia
;
972 ia
= kzalloc(sizeof(*ia
), GFP_KERNEL
| __GFP_NOFAIL
);
974 ia
->in
.major
= FUSE_KERNEL_VERSION
;
975 ia
->in
.minor
= FUSE_KERNEL_MINOR_VERSION
;
976 ia
->in
.max_readahead
= fc
->sb
->s_bdi
->ra_pages
* PAGE_SIZE
;
978 FUSE_ASYNC_READ
| FUSE_POSIX_LOCKS
| FUSE_ATOMIC_O_TRUNC
|
979 FUSE_EXPORT_SUPPORT
| FUSE_BIG_WRITES
| FUSE_DONT_MASK
|
980 FUSE_SPLICE_WRITE
| FUSE_SPLICE_MOVE
| FUSE_SPLICE_READ
|
981 FUSE_FLOCK_LOCKS
| FUSE_HAS_IOCTL_DIR
| FUSE_AUTO_INVAL_DATA
|
982 FUSE_DO_READDIRPLUS
| FUSE_READDIRPLUS_AUTO
| FUSE_ASYNC_DIO
|
983 FUSE_WRITEBACK_CACHE
| FUSE_NO_OPEN_SUPPORT
|
984 FUSE_PARALLEL_DIROPS
| FUSE_HANDLE_KILLPRIV
| FUSE_POSIX_ACL
|
985 FUSE_ABORT_ERROR
| FUSE_MAX_PAGES
| FUSE_CACHE_SYMLINKS
|
986 FUSE_NO_OPENDIR_SUPPORT
| FUSE_EXPLICIT_INVAL_DATA
;
987 ia
->args
.opcode
= FUSE_INIT
;
988 ia
->args
.in_numargs
= 1;
989 ia
->args
.in_args
[0].size
= sizeof(ia
->in
);
990 ia
->args
.in_args
[0].value
= &ia
->in
;
991 ia
->args
.out_numargs
= 1;
992 /* Variable length argument used for backward compatibility
993 with interface version < 7.5. Rest of init_out is zeroed
994 by do_get_request(), so a short reply is not a problem */
995 ia
->args
.out_argvar
= true;
996 ia
->args
.out_args
[0].size
= sizeof(ia
->out
);
997 ia
->args
.out_args
[0].value
= &ia
->out
;
998 ia
->args
.force
= true;
999 ia
->args
.nocreds
= true;
1000 ia
->args
.end
= process_init_reply
;
1002 if (fuse_simple_background(fc
, &ia
->args
, GFP_KERNEL
) != 0)
1003 process_init_reply(fc
, &ia
->args
, -ENOTCONN
);
1005 EXPORT_SYMBOL_GPL(fuse_send_init
);
1007 void fuse_free_conn(struct fuse_conn
*fc
)
1009 WARN_ON(!list_empty(&fc
->devices
));
1012 EXPORT_SYMBOL_GPL(fuse_free_conn
);
1014 static int fuse_bdi_init(struct fuse_conn
*fc
, struct super_block
*sb
)
1020 suffix
= "-fuseblk";
1022 * sb->s_bdi points to blkdev's bdi however we want to redirect
1023 * it to our private bdi...
1026 sb
->s_bdi
= &noop_backing_dev_info
;
1028 err
= super_setup_bdi_name(sb
, "%u:%u%s", MAJOR(fc
->dev
),
1029 MINOR(fc
->dev
), suffix
);
1033 sb
->s_bdi
->ra_pages
= VM_READAHEAD_PAGES
;
1034 /* fuse does it's own writeback accounting */
1035 sb
->s_bdi
->capabilities
= BDI_CAP_NO_ACCT_WB
| BDI_CAP_STRICTLIMIT
;
1038 * For a single fuse filesystem use max 1% of dirty +
1039 * writeback threshold.
1041 * This gives about 1M of write buffer for memory maps on a
1042 * machine with 1G and 10% dirty_ratio, which should be more
1045 * Privileged users can raise it by writing to
1047 * /sys/class/bdi/<bdi>/max_ratio
1049 bdi_set_max_ratio(sb
->s_bdi
, 1);
1054 struct fuse_dev
*fuse_dev_alloc(void)
1056 struct fuse_dev
*fud
;
1057 struct list_head
*pq
;
1059 fud
= kzalloc(sizeof(struct fuse_dev
), GFP_KERNEL
);
1063 pq
= kcalloc(FUSE_PQ_HASH_SIZE
, sizeof(struct list_head
), GFP_KERNEL
);
1069 fud
->pq
.processing
= pq
;
1070 fuse_pqueue_init(&fud
->pq
);
1074 EXPORT_SYMBOL_GPL(fuse_dev_alloc
);
1076 void fuse_dev_install(struct fuse_dev
*fud
, struct fuse_conn
*fc
)
1078 fud
->fc
= fuse_conn_get(fc
);
1079 spin_lock(&fc
->lock
);
1080 list_add_tail(&fud
->entry
, &fc
->devices
);
1081 spin_unlock(&fc
->lock
);
1083 EXPORT_SYMBOL_GPL(fuse_dev_install
);
1085 struct fuse_dev
*fuse_dev_alloc_install(struct fuse_conn
*fc
)
1087 struct fuse_dev
*fud
;
1089 fud
= fuse_dev_alloc();
1093 fuse_dev_install(fud
, fc
);
1096 EXPORT_SYMBOL_GPL(fuse_dev_alloc_install
);
1098 void fuse_dev_free(struct fuse_dev
*fud
)
1100 struct fuse_conn
*fc
= fud
->fc
;
1103 spin_lock(&fc
->lock
);
1104 list_del(&fud
->entry
);
1105 spin_unlock(&fc
->lock
);
1109 kfree(fud
->pq
.processing
);
1112 EXPORT_SYMBOL_GPL(fuse_dev_free
);
1114 int fuse_fill_super_common(struct super_block
*sb
, struct fuse_fs_context
*ctx
)
1116 struct fuse_dev
*fud
;
1117 struct fuse_conn
*fc
= get_fuse_conn_super(sb
);
1119 struct dentry
*root_dentry
;
1123 if (sb
->s_flags
& SB_MANDLOCK
)
1126 sb
->s_flags
&= ~(SB_NOSEC
| SB_I_VERSION
);
1131 if (!sb_set_blocksize(sb
, ctx
->blksize
))
1135 sb
->s_blocksize
= PAGE_SIZE
;
1136 sb
->s_blocksize_bits
= PAGE_SHIFT
;
1139 sb
->s_subtype
= ctx
->subtype
;
1140 ctx
->subtype
= NULL
;
1141 sb
->s_magic
= FUSE_SUPER_MAGIC
;
1142 sb
->s_op
= &fuse_super_operations
;
1143 sb
->s_xattr
= fuse_xattr_handlers
;
1144 sb
->s_maxbytes
= MAX_LFS_FILESIZE
;
1145 sb
->s_time_gran
= 1;
1146 sb
->s_export_op
= &fuse_export_operations
;
1147 sb
->s_iflags
|= SB_I_IMA_UNVERIFIABLE_SIGNATURE
;
1148 if (sb
->s_user_ns
!= &init_user_ns
)
1149 sb
->s_iflags
|= SB_I_UNTRUSTED_MOUNTER
;
1152 * If we are not in the initial user namespace posix
1153 * acls must be translated.
1155 if (sb
->s_user_ns
!= &init_user_ns
)
1156 sb
->s_xattr
= fuse_no_acl_xattr_handlers
;
1158 fud
= fuse_dev_alloc_install(fc
);
1162 fc
->dev
= sb
->s_dev
;
1164 err
= fuse_bdi_init(fc
, sb
);
1168 /* Handle umasking inside the fuse code */
1169 if (sb
->s_flags
& SB_POSIXACL
)
1171 sb
->s_flags
|= SB_POSIXACL
;
1173 fc
->default_permissions
= ctx
->default_permissions
;
1174 fc
->allow_other
= ctx
->allow_other
;
1175 fc
->user_id
= ctx
->user_id
;
1176 fc
->group_id
= ctx
->group_id
;
1177 fc
->max_read
= max_t(unsigned, 4096, ctx
->max_read
);
1178 fc
->destroy
= ctx
->destroy
;
1179 fc
->no_control
= ctx
->no_control
;
1180 fc
->no_force_umount
= ctx
->no_force_umount
;
1181 fc
->no_mount_options
= ctx
->no_mount_options
;
1184 root
= fuse_get_root_inode(sb
, ctx
->rootmode
);
1185 sb
->s_d_op
= &fuse_root_dentry_operations
;
1186 root_dentry
= d_make_root(root
);
1189 /* Root dentry doesn't have .d_revalidate */
1190 sb
->s_d_op
= &fuse_dentry_operations
;
1192 mutex_lock(&fuse_mutex
);
1197 err
= fuse_ctl_add_conn(fc
);
1201 list_add_tail(&fc
->entry
, &fuse_conn_list
);
1202 sb
->s_root
= root_dentry
;
1204 mutex_unlock(&fuse_mutex
);
1208 mutex_unlock(&fuse_mutex
);
1215 EXPORT_SYMBOL_GPL(fuse_fill_super_common
);
1217 static int fuse_fill_super(struct super_block
*sb
, struct fs_context
*fsc
)
1219 struct fuse_fs_context
*ctx
= fsc
->fs_private
;
1222 struct fuse_conn
*fc
;
1225 file
= fget(ctx
->fd
);
1230 * Require mount to happen from the same user namespace which
1231 * opened /dev/fuse to prevent potential attacks.
1233 if ((file
->f_op
!= &fuse_dev_operations
) ||
1234 (file
->f_cred
->user_ns
!= sb
->s_user_ns
))
1236 ctx
->fudptr
= &file
->private_data
;
1238 fc
= kmalloc(sizeof(*fc
), GFP_KERNEL
);
1243 fuse_conn_init(fc
, sb
->s_user_ns
, &fuse_dev_fiq_ops
, NULL
);
1244 fc
->release
= fuse_free_conn
;
1247 err
= fuse_fill_super_common(sb
, ctx
);
1251 * atomic_dec_and_test() in fput() provides the necessary
1252 * memory barrier for file->private_data to be visible on all
1256 fuse_send_init(get_fuse_conn_super(sb
));
1261 sb
->s_fs_info
= NULL
;
1268 static int fuse_get_tree(struct fs_context
*fc
)
1270 struct fuse_fs_context
*ctx
= fc
->fs_private
;
1272 if (!ctx
->fd_present
|| !ctx
->rootmode_present
||
1273 !ctx
->user_id_present
|| !ctx
->group_id_present
)
1278 return get_tree_bdev(fc
, fuse_fill_super
);
1281 return get_tree_nodev(fc
, fuse_fill_super
);
1284 static const struct fs_context_operations fuse_context_ops
= {
1285 .free
= fuse_free_fc
,
1286 .parse_param
= fuse_parse_param
,
1287 .get_tree
= fuse_get_tree
,
1291 * Set up the filesystem mount context.
1293 static int fuse_init_fs_context(struct fs_context
*fc
)
1295 struct fuse_fs_context
*ctx
;
1297 ctx
= kzalloc(sizeof(struct fuse_fs_context
), GFP_KERNEL
);
1302 ctx
->blksize
= FUSE_DEFAULT_BLKSIZE
;
1305 if (fc
->fs_type
== &fuseblk_fs_type
) {
1306 ctx
->is_bdev
= true;
1307 ctx
->destroy
= true;
1311 fc
->fs_private
= ctx
;
1312 fc
->ops
= &fuse_context_ops
;
1316 static void fuse_sb_destroy(struct super_block
*sb
)
1318 struct fuse_conn
*fc
= get_fuse_conn_super(sb
);
1322 fuse_send_destroy(fc
);
1324 fuse_abort_conn(fc
);
1325 fuse_wait_aborted(fc
);
1327 down_write(&fc
->killsb
);
1329 up_write(&fc
->killsb
);
1333 void fuse_kill_sb_anon(struct super_block
*sb
)
1335 fuse_sb_destroy(sb
);
1336 kill_anon_super(sb
);
1338 EXPORT_SYMBOL_GPL(fuse_kill_sb_anon
);
1340 static struct file_system_type fuse_fs_type
= {
1341 .owner
= THIS_MODULE
,
1343 .fs_flags
= FS_HAS_SUBTYPE
| FS_USERNS_MOUNT
,
1344 .init_fs_context
= fuse_init_fs_context
,
1345 .parameters
= fuse_fs_parameters
,
1346 .kill_sb
= fuse_kill_sb_anon
,
1348 MODULE_ALIAS_FS("fuse");
1351 static void fuse_kill_sb_blk(struct super_block
*sb
)
1353 fuse_sb_destroy(sb
);
1354 kill_block_super(sb
);
1357 static struct file_system_type fuseblk_fs_type
= {
1358 .owner
= THIS_MODULE
,
1360 .init_fs_context
= fuse_init_fs_context
,
1361 .parameters
= fuse_fs_parameters
,
1362 .kill_sb
= fuse_kill_sb_blk
,
1363 .fs_flags
= FS_REQUIRES_DEV
| FS_HAS_SUBTYPE
,
1365 MODULE_ALIAS_FS("fuseblk");
1367 static inline int register_fuseblk(void)
1369 return register_filesystem(&fuseblk_fs_type
);
1372 static inline void unregister_fuseblk(void)
1374 unregister_filesystem(&fuseblk_fs_type
);
1377 static inline int register_fuseblk(void)
1382 static inline void unregister_fuseblk(void)
1387 static void fuse_inode_init_once(void *foo
)
1389 struct inode
*inode
= foo
;
1391 inode_init_once(inode
);
1394 static int __init
fuse_fs_init(void)
1398 fuse_inode_cachep
= kmem_cache_create("fuse_inode",
1399 sizeof(struct fuse_inode
), 0,
1400 SLAB_HWCACHE_ALIGN
|SLAB_ACCOUNT
|SLAB_RECLAIM_ACCOUNT
,
1401 fuse_inode_init_once
);
1403 if (!fuse_inode_cachep
)
1406 err
= register_fuseblk();
1410 err
= register_filesystem(&fuse_fs_type
);
1417 unregister_fuseblk();
1419 kmem_cache_destroy(fuse_inode_cachep
);
1424 static void fuse_fs_cleanup(void)
1426 unregister_filesystem(&fuse_fs_type
);
1427 unregister_fuseblk();
1430 * Make sure all delayed rcu free inodes are flushed before we
1434 kmem_cache_destroy(fuse_inode_cachep
);
1437 static struct kobject
*fuse_kobj
;
1439 static int fuse_sysfs_init(void)
1443 fuse_kobj
= kobject_create_and_add("fuse", fs_kobj
);
1449 err
= sysfs_create_mount_point(fuse_kobj
, "connections");
1451 goto out_fuse_unregister
;
1455 out_fuse_unregister
:
1456 kobject_put(fuse_kobj
);
1461 static void fuse_sysfs_cleanup(void)
1463 sysfs_remove_mount_point(fuse_kobj
, "connections");
1464 kobject_put(fuse_kobj
);
1467 static int __init
fuse_init(void)
1471 pr_info("init (API version %i.%i)\n",
1472 FUSE_KERNEL_VERSION
, FUSE_KERNEL_MINOR_VERSION
);
1474 INIT_LIST_HEAD(&fuse_conn_list
);
1475 res
= fuse_fs_init();
1479 res
= fuse_dev_init();
1481 goto err_fs_cleanup
;
1483 res
= fuse_sysfs_init();
1485 goto err_dev_cleanup
;
1487 res
= fuse_ctl_init();
1489 goto err_sysfs_cleanup
;
1491 sanitize_global_limit(&max_user_bgreq
);
1492 sanitize_global_limit(&max_user_congthresh
);
1497 fuse_sysfs_cleanup();
1506 static void __exit
fuse_exit(void)
1511 fuse_sysfs_cleanup();
1516 module_init(fuse_init
);
1517 module_exit(fuse_exit
);