1 // SPDX-License-Identifier: GPL-2.0-only
3 #include <linux/ceph/ceph_debug.h>
5 #include <linux/backing-dev.h>
6 #include <linux/ctype.h>
8 #include <linux/inet.h>
10 #include <linux/module.h>
11 #include <linux/mount.h>
12 #include <linux/fs_context.h>
13 #include <linux/fs_parser.h>
14 #include <linux/sched.h>
15 #include <linux/seq_file.h>
16 #include <linux/slab.h>
17 #include <linux/statfs.h>
18 #include <linux/string.h>
21 #include "mds_client.h"
25 #include <linux/ceph/ceph_features.h>
26 #include <linux/ceph/decode.h>
27 #include <linux/ceph/mon_client.h>
28 #include <linux/ceph/auth.h>
29 #include <linux/ceph/debugfs.h>
31 #include <uapi/linux/magic.h>
33 static DEFINE_SPINLOCK(ceph_fsc_lock
);
34 static LIST_HEAD(ceph_fsc_list
);
37 * Ceph superblock operations
39 * Handle the basics of mounting, unmounting.
45 static void ceph_put_super(struct super_block
*s
)
47 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(s
);
49 doutc(fsc
->client
, "begin\n");
50 ceph_fscrypt_free_dummy_policy(fsc
);
51 ceph_mdsc_close_sessions(fsc
->mdsc
);
52 doutc(fsc
->client
, "done\n");
55 static int ceph_statfs(struct dentry
*dentry
, struct kstatfs
*buf
)
57 struct ceph_fs_client
*fsc
= ceph_inode_to_fs_client(d_inode(dentry
));
58 struct ceph_mon_client
*monc
= &fsc
->client
->monc
;
59 struct ceph_statfs st
;
63 doutc(fsc
->client
, "begin\n");
64 if (fsc
->mdsc
->mdsmap
->m_num_data_pg_pools
== 1) {
65 data_pool
= fsc
->mdsc
->mdsmap
->m_data_pg_pools
[0];
67 data_pool
= CEPH_NOPOOL
;
70 err
= ceph_monc_do_statfs(monc
, data_pool
, &st
);
75 buf
->f_type
= CEPH_SUPER_MAGIC
; /* ?? */
78 * Express utilization in terms of large blocks to avoid
79 * overflow on 32-bit machines.
81 buf
->f_frsize
= 1 << CEPH_BLOCK_SHIFT
;
84 * By default use root quota for stats; fallback to overall filesystem
85 * usage if using 'noquotadf' mount option or if the root dir doesn't
86 * have max_bytes quota set.
88 if (ceph_test_mount_opt(fsc
, NOQUOTADF
) ||
89 !ceph_quota_update_statfs(fsc
, buf
)) {
90 buf
->f_blocks
= le64_to_cpu(st
.kb
) >> (CEPH_BLOCK_SHIFT
-10);
91 buf
->f_bfree
= le64_to_cpu(st
.kb_avail
) >> (CEPH_BLOCK_SHIFT
-10);
92 buf
->f_bavail
= le64_to_cpu(st
.kb_avail
) >> (CEPH_BLOCK_SHIFT
-10);
96 * NOTE: for the time being, we make bsize == frsize to humor
97 * not-yet-ancient versions of glibc that are broken.
98 * Someday, we will probably want to report a real block
99 * size... whatever that may mean for a network file system!
101 buf
->f_bsize
= buf
->f_frsize
;
103 buf
->f_files
= le64_to_cpu(st
.num_objects
);
105 buf
->f_namelen
= NAME_MAX
;
107 /* Must convert the fsid, for consistent values across arches */
108 buf
->f_fsid
.val
[0] = 0;
109 mutex_lock(&monc
->mutex
);
110 for (i
= 0 ; i
< sizeof(monc
->monmap
->fsid
) / sizeof(__le32
) ; ++i
)
111 buf
->f_fsid
.val
[0] ^= le32_to_cpu(((__le32
*)&monc
->monmap
->fsid
)[i
]);
112 mutex_unlock(&monc
->mutex
);
114 /* fold the fs_cluster_id into the upper bits */
115 buf
->f_fsid
.val
[1] = monc
->fs_cluster_id
;
117 doutc(fsc
->client
, "done\n");
121 static int ceph_sync_fs(struct super_block
*sb
, int wait
)
123 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(sb
);
124 struct ceph_client
*cl
= fsc
->client
;
127 doutc(cl
, "(non-blocking)\n");
128 ceph_flush_dirty_caps(fsc
->mdsc
);
129 ceph_flush_cap_releases(fsc
->mdsc
);
130 doutc(cl
, "(non-blocking) done\n");
134 doutc(cl
, "(blocking)\n");
135 ceph_osdc_sync(&fsc
->client
->osdc
);
136 ceph_mdsc_sync(fsc
->mdsc
);
137 doutc(cl
, "(blocking) done\n");
148 Opt_caps_wanted_delay_min
,
149 Opt_caps_wanted_delay_max
,
151 Opt_readdir_max_entries
,
152 Opt_readdir_max_bytes
,
160 Opt_test_dummy_encryption
,
161 /* string args above */
169 Opt_require_active_mds
,
178 enum ceph_recover_session_mode
{
179 ceph_recover_session_no
,
180 ceph_recover_session_clean
183 static const struct constant_table ceph_param_recover
[] = {
184 { "no", ceph_recover_session_no
},
185 { "clean", ceph_recover_session_clean
},
189 static const struct fs_parameter_spec ceph_mount_parameters
[] = {
190 fsparam_flag_no ("acl", Opt_acl
),
191 fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir
),
192 fsparam_s32 ("caps_max", Opt_caps_max
),
193 fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max
),
194 fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min
),
195 fsparam_u32 ("write_congestion_kb", Opt_congestion_kb
),
196 fsparam_flag_no ("copyfrom", Opt_copyfrom
),
197 fsparam_flag_no ("dcache", Opt_dcache
),
198 fsparam_flag_no ("dirstat", Opt_dirstat
),
199 fsparam_flag_no ("fsc", Opt_fscache
), // fsc|nofsc
200 fsparam_string ("fsc", Opt_fscache
), // fsc=...
201 fsparam_flag_no ("ino32", Opt_ino32
),
202 fsparam_string ("mds_namespace", Opt_mds_namespace
),
203 fsparam_string ("mon_addr", Opt_mon_addr
),
204 fsparam_flag_no ("poolperm", Opt_poolperm
),
205 fsparam_flag_no ("quotadf", Opt_quotadf
),
206 fsparam_u32 ("rasize", Opt_rasize
),
207 fsparam_flag_no ("rbytes", Opt_rbytes
),
208 fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes
),
209 fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries
),
210 fsparam_enum ("recover_session", Opt_recover_session
, ceph_param_recover
),
211 fsparam_flag_no ("require_active_mds", Opt_require_active_mds
),
212 fsparam_u32 ("rsize", Opt_rsize
),
213 fsparam_string ("snapdirname", Opt_snapdirname
),
214 fsparam_string ("source", Opt_source
),
215 fsparam_flag ("test_dummy_encryption", Opt_test_dummy_encryption
),
216 fsparam_string ("test_dummy_encryption", Opt_test_dummy_encryption
),
217 fsparam_u32 ("wsize", Opt_wsize
),
218 fsparam_flag_no ("wsync", Opt_wsync
),
219 fsparam_flag_no ("pagecache", Opt_pagecache
),
220 fsparam_flag_no ("sparseread", Opt_sparseread
),
224 struct ceph_parse_opts_ctx
{
225 struct ceph_options
*copts
;
226 struct ceph_mount_options
*opts
;
230 * Remove adjacent slashes and then the trailing slash, unless it is
231 * the only remaining character.
233 * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/".
235 static void canonicalize_path(char *path
)
239 for (i
= 0; path
[i
] != '\0'; i
++) {
240 if (path
[i
] != '/' || j
< 1 || path
[j
- 1] != '/')
244 if (j
> 1 && path
[j
- 1] == '/')
250 * Check if the mds namespace in ceph_mount_options matches
251 * the passed in namespace string. First time match (when
252 * ->mds_namespace is NULL) is treated specially, since
253 * ->mds_namespace needs to be initialized by the caller.
255 static int namespace_equals(struct ceph_mount_options
*fsopt
,
256 const char *namespace, size_t len
)
258 return !(fsopt
->mds_namespace
&&
259 (strlen(fsopt
->mds_namespace
) != len
||
260 strncmp(fsopt
->mds_namespace
, namespace, len
)));
263 static int ceph_parse_old_source(const char *dev_name
, const char *dev_name_end
,
264 struct fs_context
*fc
)
267 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
268 struct ceph_mount_options
*fsopt
= pctx
->opts
;
270 if (*dev_name_end
!= ':')
271 return invalfc(fc
, "separator ':' missing in source");
273 r
= ceph_parse_mon_ips(dev_name
, dev_name_end
- dev_name
,
274 pctx
->copts
, fc
->log
.log
, ',');
278 fsopt
->new_dev_syntax
= false;
282 static int ceph_parse_new_source(const char *dev_name
, const char *dev_name_end
,
283 struct fs_context
*fc
)
286 struct ceph_fsid fsid
;
287 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
288 struct ceph_options
*opts
= pctx
->copts
;
289 struct ceph_mount_options
*fsopt
= pctx
->opts
;
290 const char *name_start
= dev_name
;
291 const char *fsid_start
, *fs_name_start
;
293 if (*dev_name_end
!= '=') {
294 dout("separator '=' missing in source");
298 fsid_start
= strchr(dev_name
, '@');
300 return invalfc(fc
, "missing cluster fsid");
301 len
= fsid_start
- name_start
;
303 opts
->name
= kstrndup(name_start
, len
, GFP_KERNEL
);
306 dout("using %s entity name", opts
->name
);
308 ++fsid_start
; /* start of cluster fsid */
309 fs_name_start
= strchr(fsid_start
, '.');
311 return invalfc(fc
, "missing file system name");
313 if (ceph_parse_fsid(fsid_start
, &fsid
))
314 return invalfc(fc
, "Invalid FSID");
316 ++fs_name_start
; /* start of file system name */
317 len
= dev_name_end
- fs_name_start
;
319 if (!namespace_equals(fsopt
, fs_name_start
, len
))
320 return invalfc(fc
, "Mismatching mds_namespace");
321 kfree(fsopt
->mds_namespace
);
322 fsopt
->mds_namespace
= kstrndup(fs_name_start
, len
, GFP_KERNEL
);
323 if (!fsopt
->mds_namespace
)
325 dout("file system (mds namespace) '%s'\n", fsopt
->mds_namespace
);
327 fsopt
->new_dev_syntax
= true;
332 * Parse the source parameter for new device format. Distinguish the device
333 * spec from the path. Try parsing new device format and fallback to old
336 * New device syntax will looks like:
337 * <device_spec>=/<path>
339 * <device_spec> is name@fsid.fsname
340 * <path> is optional, but if present must begin with '/'
341 * (monitor addresses are passed via mount option)
343 * Old device syntax is:
344 * <server_spec>[,<server_spec>...]:[<path>]
346 * <server_spec> is <ip>[:<port>]
347 * <path> is optional, but if present must begin with '/'
349 static int ceph_parse_source(struct fs_parameter
*param
, struct fs_context
*fc
)
351 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
352 struct ceph_mount_options
*fsopt
= pctx
->opts
;
353 char *dev_name
= param
->string
, *dev_name_end
;
356 dout("'%s'\n", dev_name
);
357 if (!dev_name
|| !*dev_name
)
358 return invalfc(fc
, "Empty source");
360 dev_name_end
= strchr(dev_name
, '/');
363 * The server_path will include the whole chars from userland
364 * including the leading '/'.
366 kfree(fsopt
->server_path
);
367 fsopt
->server_path
= kstrdup(dev_name_end
, GFP_KERNEL
);
368 if (!fsopt
->server_path
)
371 canonicalize_path(fsopt
->server_path
);
373 dev_name_end
= dev_name
+ strlen(dev_name
);
376 dev_name_end
--; /* back up to separator */
377 if (dev_name_end
< dev_name
)
378 return invalfc(fc
, "Path missing in source");
380 dout("device name '%.*s'\n", (int)(dev_name_end
- dev_name
), dev_name
);
381 if (fsopt
->server_path
)
382 dout("server path '%s'\n", fsopt
->server_path
);
384 dout("trying new device syntax");
385 ret
= ceph_parse_new_source(dev_name
, dev_name_end
, fc
);
389 dout("trying old device syntax");
390 ret
= ceph_parse_old_source(dev_name
, dev_name_end
, fc
);
395 fc
->source
= param
->string
;
396 param
->string
= NULL
;
400 static int ceph_parse_mon_addr(struct fs_parameter
*param
,
401 struct fs_context
*fc
)
403 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
404 struct ceph_mount_options
*fsopt
= pctx
->opts
;
406 kfree(fsopt
->mon_addr
);
407 fsopt
->mon_addr
= param
->string
;
408 param
->string
= NULL
;
410 return ceph_parse_mon_ips(fsopt
->mon_addr
, strlen(fsopt
->mon_addr
),
411 pctx
->copts
, fc
->log
.log
, '/');
414 static int ceph_parse_mount_param(struct fs_context
*fc
,
415 struct fs_parameter
*param
)
417 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
418 struct ceph_mount_options
*fsopt
= pctx
->opts
;
419 struct fs_parse_result result
;
423 ret
= ceph_parse_param(param
, pctx
->copts
, fc
->log
.log
);
424 if (ret
!= -ENOPARAM
)
427 token
= fs_parse(fc
, ceph_mount_parameters
, param
, &result
);
428 dout("%s: fs_parse '%s' token %d\n",__func__
, param
->key
, token
);
433 case Opt_snapdirname
:
434 if (strlen(param
->string
) > NAME_MAX
)
435 return invalfc(fc
, "snapdirname too long");
436 kfree(fsopt
->snapdir_name
);
437 fsopt
->snapdir_name
= param
->string
;
438 param
->string
= NULL
;
440 case Opt_mds_namespace
:
441 if (!namespace_equals(fsopt
, param
->string
, strlen(param
->string
)))
442 return invalfc(fc
, "Mismatching mds_namespace");
443 kfree(fsopt
->mds_namespace
);
444 fsopt
->mds_namespace
= param
->string
;
445 param
->string
= NULL
;
447 case Opt_recover_session
:
448 mode
= result
.uint_32
;
449 if (mode
== ceph_recover_session_no
)
450 fsopt
->flags
&= ~CEPH_MOUNT_OPT_CLEANRECOVER
;
451 else if (mode
== ceph_recover_session_clean
)
452 fsopt
->flags
|= CEPH_MOUNT_OPT_CLEANRECOVER
;
458 return invalfc(fc
, "Multiple sources specified");
459 return ceph_parse_source(param
, fc
);
461 return ceph_parse_mon_addr(param
, fc
);
463 if (result
.uint_32
< PAGE_SIZE
||
464 result
.uint_32
> CEPH_MAX_WRITE_SIZE
)
466 fsopt
->wsize
= ALIGN(result
.uint_32
, PAGE_SIZE
);
469 if (result
.uint_32
< PAGE_SIZE
||
470 result
.uint_32
> CEPH_MAX_READ_SIZE
)
472 fsopt
->rsize
= ALIGN(result
.uint_32
, PAGE_SIZE
);
475 fsopt
->rasize
= ALIGN(result
.uint_32
, PAGE_SIZE
);
477 case Opt_caps_wanted_delay_min
:
478 if (result
.uint_32
< 1)
480 fsopt
->caps_wanted_delay_min
= result
.uint_32
;
482 case Opt_caps_wanted_delay_max
:
483 if (result
.uint_32
< 1)
485 fsopt
->caps_wanted_delay_max
= result
.uint_32
;
488 if (result
.int_32
< 0)
490 fsopt
->caps_max
= result
.int_32
;
492 case Opt_readdir_max_entries
:
493 if (result
.uint_32
< 1)
495 fsopt
->max_readdir
= result
.uint_32
;
497 case Opt_readdir_max_bytes
:
498 if (result
.uint_32
< PAGE_SIZE
&& result
.uint_32
!= 0)
500 fsopt
->max_readdir_bytes
= result
.uint_32
;
502 case Opt_congestion_kb
:
503 if (result
.uint_32
< 1024) /* at least 1M */
505 fsopt
->congestion_kb
= result
.uint_32
;
509 fsopt
->flags
|= CEPH_MOUNT_OPT_DIRSTAT
;
511 fsopt
->flags
&= ~CEPH_MOUNT_OPT_DIRSTAT
;
515 fsopt
->flags
|= CEPH_MOUNT_OPT_RBYTES
;
517 fsopt
->flags
&= ~CEPH_MOUNT_OPT_RBYTES
;
519 case Opt_asyncreaddir
:
521 fsopt
->flags
&= ~CEPH_MOUNT_OPT_NOASYNCREADDIR
;
523 fsopt
->flags
|= CEPH_MOUNT_OPT_NOASYNCREADDIR
;
527 fsopt
->flags
|= CEPH_MOUNT_OPT_DCACHE
;
529 fsopt
->flags
&= ~CEPH_MOUNT_OPT_DCACHE
;
533 fsopt
->flags
|= CEPH_MOUNT_OPT_INO32
;
535 fsopt
->flags
&= ~CEPH_MOUNT_OPT_INO32
;
539 #ifdef CONFIG_CEPH_FSCACHE
540 kfree(fsopt
->fscache_uniq
);
541 fsopt
->fscache_uniq
= NULL
;
542 if (result
.negated
) {
543 fsopt
->flags
&= ~CEPH_MOUNT_OPT_FSCACHE
;
545 fsopt
->flags
|= CEPH_MOUNT_OPT_FSCACHE
;
546 fsopt
->fscache_uniq
= param
->string
;
547 param
->string
= NULL
;
551 return invalfc(fc
, "fscache support is disabled");
555 fsopt
->flags
&= ~CEPH_MOUNT_OPT_NOPOOLPERM
;
557 fsopt
->flags
|= CEPH_MOUNT_OPT_NOPOOLPERM
;
559 case Opt_require_active_mds
:
561 fsopt
->flags
&= ~CEPH_MOUNT_OPT_MOUNTWAIT
;
563 fsopt
->flags
|= CEPH_MOUNT_OPT_MOUNTWAIT
;
567 fsopt
->flags
&= ~CEPH_MOUNT_OPT_NOQUOTADF
;
569 fsopt
->flags
|= CEPH_MOUNT_OPT_NOQUOTADF
;
573 fsopt
->flags
&= ~CEPH_MOUNT_OPT_NOCOPYFROM
;
575 fsopt
->flags
|= CEPH_MOUNT_OPT_NOCOPYFROM
;
578 if (!result
.negated
) {
579 #ifdef CONFIG_CEPH_FS_POSIX_ACL
580 fc
->sb_flags
|= SB_POSIXACL
;
582 return invalfc(fc
, "POSIX ACL support is disabled");
585 fc
->sb_flags
&= ~SB_POSIXACL
;
590 fsopt
->flags
&= ~CEPH_MOUNT_OPT_ASYNC_DIROPS
;
592 fsopt
->flags
|= CEPH_MOUNT_OPT_ASYNC_DIROPS
;
596 fsopt
->flags
|= CEPH_MOUNT_OPT_NOPAGECACHE
;
598 fsopt
->flags
&= ~CEPH_MOUNT_OPT_NOPAGECACHE
;
602 fsopt
->flags
&= ~CEPH_MOUNT_OPT_SPARSEREAD
;
604 fsopt
->flags
|= CEPH_MOUNT_OPT_SPARSEREAD
;
606 case Opt_test_dummy_encryption
:
607 #ifdef CONFIG_FS_ENCRYPTION
608 fscrypt_free_dummy_policy(&fsopt
->dummy_enc_policy
);
609 ret
= fscrypt_parse_test_dummy_encryption(param
,
610 &fsopt
->dummy_enc_policy
);
611 if (ret
== -EINVAL
) {
612 warnfc(fc
, "Value of option \"%s\" is unrecognized",
614 } else if (ret
== -EEXIST
) {
615 warnfc(fc
, "Conflicting test_dummy_encryption options");
620 "FS encryption not supported: test_dummy_encryption mount option ignored");
629 return invalfc(fc
, "%s out of range", param
->key
);
632 static void destroy_mount_options(struct ceph_mount_options
*args
)
634 dout("destroy_mount_options %p\n", args
);
638 kfree(args
->snapdir_name
);
639 kfree(args
->mds_namespace
);
640 kfree(args
->server_path
);
641 kfree(args
->fscache_uniq
);
642 kfree(args
->mon_addr
);
643 fscrypt_free_dummy_policy(&args
->dummy_enc_policy
);
647 static int strcmp_null(const char *s1
, const char *s2
)
655 return strcmp(s1
, s2
);
658 static int compare_mount_options(struct ceph_mount_options
*new_fsopt
,
659 struct ceph_options
*new_opt
,
660 struct ceph_fs_client
*fsc
)
662 struct ceph_mount_options
*fsopt1
= new_fsopt
;
663 struct ceph_mount_options
*fsopt2
= fsc
->mount_options
;
664 int ofs
= offsetof(struct ceph_mount_options
, snapdir_name
);
667 ret
= memcmp(fsopt1
, fsopt2
, ofs
);
671 ret
= strcmp_null(fsopt1
->snapdir_name
, fsopt2
->snapdir_name
);
675 ret
= strcmp_null(fsopt1
->mds_namespace
, fsopt2
->mds_namespace
);
679 ret
= strcmp_null(fsopt1
->server_path
, fsopt2
->server_path
);
683 ret
= strcmp_null(fsopt1
->fscache_uniq
, fsopt2
->fscache_uniq
);
687 ret
= strcmp_null(fsopt1
->mon_addr
, fsopt2
->mon_addr
);
691 return ceph_compare_options(new_opt
, fsc
->client
);
695 * ceph_show_options - Show mount options in /proc/mounts
696 * @m: seq_file to write to
697 * @root: root of that (sub)tree
699 static int ceph_show_options(struct seq_file
*m
, struct dentry
*root
)
701 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(root
->d_sb
);
702 struct ceph_mount_options
*fsopt
= fsc
->mount_options
;
706 /* a comma between MNT/MS and client options */
710 ret
= ceph_print_client_options(m
, fsc
->client
, false);
714 /* retract our comma if no client options */
718 if (fsopt
->flags
& CEPH_MOUNT_OPT_DIRSTAT
)
719 seq_puts(m
, ",dirstat");
720 if ((fsopt
->flags
& CEPH_MOUNT_OPT_RBYTES
))
721 seq_puts(m
, ",rbytes");
722 if (fsopt
->flags
& CEPH_MOUNT_OPT_NOASYNCREADDIR
)
723 seq_puts(m
, ",noasyncreaddir");
724 if ((fsopt
->flags
& CEPH_MOUNT_OPT_DCACHE
) == 0)
725 seq_puts(m
, ",nodcache");
726 if (fsopt
->flags
& CEPH_MOUNT_OPT_INO32
)
727 seq_puts(m
, ",ino32");
728 if (fsopt
->flags
& CEPH_MOUNT_OPT_FSCACHE
) {
729 seq_show_option(m
, "fsc", fsopt
->fscache_uniq
);
731 if (fsopt
->flags
& CEPH_MOUNT_OPT_NOPOOLPERM
)
732 seq_puts(m
, ",nopoolperm");
733 if (fsopt
->flags
& CEPH_MOUNT_OPT_NOQUOTADF
)
734 seq_puts(m
, ",noquotadf");
736 #ifdef CONFIG_CEPH_FS_POSIX_ACL
737 if (root
->d_sb
->s_flags
& SB_POSIXACL
)
740 seq_puts(m
, ",noacl");
743 if ((fsopt
->flags
& CEPH_MOUNT_OPT_NOCOPYFROM
) == 0)
744 seq_puts(m
, ",copyfrom");
746 /* dump mds_namespace when old device syntax is in use */
747 if (fsopt
->mds_namespace
&& !fsopt
->new_dev_syntax
)
748 seq_show_option(m
, "mds_namespace", fsopt
->mds_namespace
);
751 seq_printf(m
, ",mon_addr=%s", fsopt
->mon_addr
);
753 if (fsopt
->flags
& CEPH_MOUNT_OPT_CLEANRECOVER
)
754 seq_show_option(m
, "recover_session", "clean");
756 if (!(fsopt
->flags
& CEPH_MOUNT_OPT_ASYNC_DIROPS
))
757 seq_puts(m
, ",wsync");
758 if (fsopt
->flags
& CEPH_MOUNT_OPT_NOPAGECACHE
)
759 seq_puts(m
, ",nopagecache");
760 if (fsopt
->flags
& CEPH_MOUNT_OPT_SPARSEREAD
)
761 seq_puts(m
, ",sparseread");
763 fscrypt_show_test_dummy_encryption(m
, ',', root
->d_sb
);
765 if (fsopt
->wsize
!= CEPH_MAX_WRITE_SIZE
)
766 seq_printf(m
, ",wsize=%u", fsopt
->wsize
);
767 if (fsopt
->rsize
!= CEPH_MAX_READ_SIZE
)
768 seq_printf(m
, ",rsize=%u", fsopt
->rsize
);
769 if (fsopt
->rasize
!= CEPH_RASIZE_DEFAULT
)
770 seq_printf(m
, ",rasize=%u", fsopt
->rasize
);
771 if (fsopt
->congestion_kb
!= default_congestion_kb())
772 seq_printf(m
, ",write_congestion_kb=%u", fsopt
->congestion_kb
);
774 seq_printf(m
, ",caps_max=%d", fsopt
->caps_max
);
775 if (fsopt
->caps_wanted_delay_min
!= CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT
)
776 seq_printf(m
, ",caps_wanted_delay_min=%u",
777 fsopt
->caps_wanted_delay_min
);
778 if (fsopt
->caps_wanted_delay_max
!= CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT
)
779 seq_printf(m
, ",caps_wanted_delay_max=%u",
780 fsopt
->caps_wanted_delay_max
);
781 if (fsopt
->max_readdir
!= CEPH_MAX_READDIR_DEFAULT
)
782 seq_printf(m
, ",readdir_max_entries=%u", fsopt
->max_readdir
);
783 if (fsopt
->max_readdir_bytes
!= CEPH_MAX_READDIR_BYTES_DEFAULT
)
784 seq_printf(m
, ",readdir_max_bytes=%u", fsopt
->max_readdir_bytes
);
785 if (strcmp(fsopt
->snapdir_name
, CEPH_SNAPDIRNAME_DEFAULT
))
786 seq_show_option(m
, "snapdirname", fsopt
->snapdir_name
);
792 * handle any mon messages the standard library doesn't understand.
793 * return error if we don't either.
795 static int extra_mon_dispatch(struct ceph_client
*client
, struct ceph_msg
*msg
)
797 struct ceph_fs_client
*fsc
= client
->private;
798 int type
= le16_to_cpu(msg
->hdr
.type
);
801 case CEPH_MSG_MDS_MAP
:
802 ceph_mdsc_handle_mdsmap(fsc
->mdsc
, msg
);
804 case CEPH_MSG_FS_MAP_USER
:
805 ceph_mdsc_handle_fsmap(fsc
->mdsc
, msg
);
813 * create a new fs client
815 * Success or not, this function consumes @fsopt and @opt.
817 static struct ceph_fs_client
*create_fs_client(struct ceph_mount_options
*fsopt
,
818 struct ceph_options
*opt
)
820 struct ceph_fs_client
*fsc
;
823 fsc
= kzalloc(sizeof(*fsc
), GFP_KERNEL
);
829 fsc
->client
= ceph_create_client(opt
, fsc
);
830 if (IS_ERR(fsc
->client
)) {
831 err
= PTR_ERR(fsc
->client
);
834 opt
= NULL
; /* fsc->client now owns this */
836 fsc
->client
->extra_mon_dispatch
= extra_mon_dispatch
;
837 ceph_set_opt(fsc
->client
, ABORT_ON_FULL
);
839 if (!fsopt
->mds_namespace
) {
840 ceph_monc_want_map(&fsc
->client
->monc
, CEPH_SUB_MDSMAP
,
843 ceph_monc_want_map(&fsc
->client
->monc
, CEPH_SUB_FSMAP
,
847 fsc
->mount_options
= fsopt
;
850 fsc
->mount_state
= CEPH_MOUNT_MOUNTING
;
852 fsc
->have_copy_from2
= true;
854 atomic_long_set(&fsc
->writeback_count
, 0);
855 fsc
->write_congested
= false;
859 * The number of concurrent works can be high but they don't need
860 * to be processed in parallel, limit concurrency.
862 fsc
->inode_wq
= alloc_workqueue("ceph-inode", WQ_UNBOUND
, 0);
865 fsc
->cap_wq
= alloc_workqueue("ceph-cap", 0, 1);
869 hash_init(fsc
->async_unlink_conflict
);
870 spin_lock_init(&fsc
->async_unlink_conflict_lock
);
872 spin_lock(&ceph_fsc_lock
);
873 list_add_tail(&fsc
->metric_wakeup
, &ceph_fsc_list
);
874 spin_unlock(&ceph_fsc_lock
);
879 destroy_workqueue(fsc
->inode_wq
);
881 ceph_destroy_client(fsc
->client
);
885 ceph_destroy_options(opt
);
886 destroy_mount_options(fsopt
);
890 static void flush_fs_workqueues(struct ceph_fs_client
*fsc
)
892 flush_workqueue(fsc
->inode_wq
);
893 flush_workqueue(fsc
->cap_wq
);
896 static void destroy_fs_client(struct ceph_fs_client
*fsc
)
898 doutc(fsc
->client
, "%p\n", fsc
);
900 spin_lock(&ceph_fsc_lock
);
901 list_del(&fsc
->metric_wakeup
);
902 spin_unlock(&ceph_fsc_lock
);
904 ceph_mdsc_destroy(fsc
);
905 destroy_workqueue(fsc
->inode_wq
);
906 destroy_workqueue(fsc
->cap_wq
);
908 destroy_mount_options(fsc
->mount_options
);
910 ceph_destroy_client(fsc
->client
);
913 dout("%s: %p done\n", __func__
, fsc
);
919 struct kmem_cache
*ceph_inode_cachep
;
920 struct kmem_cache
*ceph_cap_cachep
;
921 struct kmem_cache
*ceph_cap_snap_cachep
;
922 struct kmem_cache
*ceph_cap_flush_cachep
;
923 struct kmem_cache
*ceph_dentry_cachep
;
924 struct kmem_cache
*ceph_file_cachep
;
925 struct kmem_cache
*ceph_dir_file_cachep
;
926 struct kmem_cache
*ceph_mds_request_cachep
;
927 mempool_t
*ceph_wb_pagevec_pool
;
929 static void ceph_inode_init_once(void *foo
)
931 struct ceph_inode_info
*ci
= foo
;
932 inode_init_once(&ci
->netfs
.inode
);
935 static int __init
init_caches(void)
939 ceph_inode_cachep
= kmem_cache_create("ceph_inode_info",
940 sizeof(struct ceph_inode_info
),
941 __alignof__(struct ceph_inode_info
),
942 SLAB_RECLAIM_ACCOUNT
| SLAB_ACCOUNT
,
943 ceph_inode_init_once
);
944 if (!ceph_inode_cachep
)
947 ceph_cap_cachep
= KMEM_CACHE(ceph_cap
, 0);
948 if (!ceph_cap_cachep
)
950 ceph_cap_snap_cachep
= KMEM_CACHE(ceph_cap_snap
, 0);
951 if (!ceph_cap_snap_cachep
)
953 ceph_cap_flush_cachep
= KMEM_CACHE(ceph_cap_flush
,
954 SLAB_RECLAIM_ACCOUNT
);
955 if (!ceph_cap_flush_cachep
)
958 ceph_dentry_cachep
= KMEM_CACHE(ceph_dentry_info
,
959 SLAB_RECLAIM_ACCOUNT
);
960 if (!ceph_dentry_cachep
)
963 ceph_file_cachep
= KMEM_CACHE(ceph_file_info
, 0);
964 if (!ceph_file_cachep
)
967 ceph_dir_file_cachep
= KMEM_CACHE(ceph_dir_file_info
, 0);
968 if (!ceph_dir_file_cachep
)
971 ceph_mds_request_cachep
= KMEM_CACHE(ceph_mds_request
, 0);
972 if (!ceph_mds_request_cachep
)
975 ceph_wb_pagevec_pool
= mempool_create_kmalloc_pool(10,
976 (CEPH_MAX_WRITE_SIZE
>> PAGE_SHIFT
) * sizeof(struct page
*));
977 if (!ceph_wb_pagevec_pool
)
978 goto bad_pagevec_pool
;
983 kmem_cache_destroy(ceph_mds_request_cachep
);
985 kmem_cache_destroy(ceph_dir_file_cachep
);
987 kmem_cache_destroy(ceph_file_cachep
);
989 kmem_cache_destroy(ceph_dentry_cachep
);
991 kmem_cache_destroy(ceph_cap_flush_cachep
);
993 kmem_cache_destroy(ceph_cap_snap_cachep
);
995 kmem_cache_destroy(ceph_cap_cachep
);
997 kmem_cache_destroy(ceph_inode_cachep
);
1001 static void destroy_caches(void)
1004 * Make sure all delayed rcu free inodes are flushed before we
1009 kmem_cache_destroy(ceph_inode_cachep
);
1010 kmem_cache_destroy(ceph_cap_cachep
);
1011 kmem_cache_destroy(ceph_cap_snap_cachep
);
1012 kmem_cache_destroy(ceph_cap_flush_cachep
);
1013 kmem_cache_destroy(ceph_dentry_cachep
);
1014 kmem_cache_destroy(ceph_file_cachep
);
1015 kmem_cache_destroy(ceph_dir_file_cachep
);
1016 kmem_cache_destroy(ceph_mds_request_cachep
);
1017 mempool_destroy(ceph_wb_pagevec_pool
);
1020 static void __ceph_umount_begin(struct ceph_fs_client
*fsc
)
1022 ceph_osdc_abort_requests(&fsc
->client
->osdc
, -EIO
);
1023 ceph_mdsc_force_umount(fsc
->mdsc
);
1024 fsc
->filp_gen
++; // invalidate open files
1028 * ceph_umount_begin - initiate forced umount. Tear down the
1029 * mount, skipping steps that may hang while waiting for server(s).
1031 void ceph_umount_begin(struct super_block
*sb
)
1033 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(sb
);
1035 doutc(fsc
->client
, "starting forced umount\n");
1038 fsc
->mount_state
= CEPH_MOUNT_SHUTDOWN
;
1039 __ceph_umount_begin(fsc
);
1042 static const struct super_operations ceph_super_ops
= {
1043 .alloc_inode
= ceph_alloc_inode
,
1044 .free_inode
= ceph_free_inode
,
1045 .write_inode
= ceph_write_inode
,
1046 .drop_inode
= generic_delete_inode
,
1047 .evict_inode
= ceph_evict_inode
,
1048 .sync_fs
= ceph_sync_fs
,
1049 .put_super
= ceph_put_super
,
1050 .show_options
= ceph_show_options
,
1051 .statfs
= ceph_statfs
,
1052 .umount_begin
= ceph_umount_begin
,
1056 * Bootstrap mount by opening the root directory. Note the mount
1057 * @started time from caller, and time out if this takes too long.
1059 static struct dentry
*open_root_dentry(struct ceph_fs_client
*fsc
,
1061 unsigned long started
)
1063 struct ceph_client
*cl
= fsc
->client
;
1064 struct ceph_mds_client
*mdsc
= fsc
->mdsc
;
1065 struct ceph_mds_request
*req
= NULL
;
1067 struct dentry
*root
;
1070 doutc(cl
, "opening '%s'\n", path
);
1071 req
= ceph_mdsc_create_request(mdsc
, CEPH_MDS_OP_GETATTR
, USE_ANY_MDS
);
1073 return ERR_CAST(req
);
1074 req
->r_path1
= kstrdup(path
, GFP_NOFS
);
1075 if (!req
->r_path1
) {
1076 root
= ERR_PTR(-ENOMEM
);
1080 req
->r_ino1
.ino
= CEPH_INO_ROOT
;
1081 req
->r_ino1
.snap
= CEPH_NOSNAP
;
1082 req
->r_started
= started
;
1083 req
->r_timeout
= fsc
->client
->options
->mount_timeout
;
1084 req
->r_args
.getattr
.mask
= cpu_to_le32(CEPH_STAT_CAP_INODE
);
1085 req
->r_num_caps
= 2;
1086 err
= ceph_mdsc_do_request(mdsc
, NULL
, req
);
1088 struct inode
*inode
= req
->r_target_inode
;
1089 req
->r_target_inode
= NULL
;
1090 doutc(cl
, "success\n");
1091 root
= d_make_root(inode
);
1093 root
= ERR_PTR(-ENOMEM
);
1096 doutc(cl
, "success, root dentry is %p\n", root
);
1098 root
= ERR_PTR(err
);
1101 ceph_mdsc_put_request(req
);
1105 #ifdef CONFIG_FS_ENCRYPTION
1106 static int ceph_apply_test_dummy_encryption(struct super_block
*sb
,
1107 struct fs_context
*fc
,
1108 struct ceph_mount_options
*fsopt
)
1110 struct ceph_fs_client
*fsc
= sb
->s_fs_info
;
1112 if (!fscrypt_is_dummy_policy_set(&fsopt
->dummy_enc_policy
))
1115 /* No changing encryption context on remount. */
1116 if (fc
->purpose
== FS_CONTEXT_FOR_RECONFIGURE
&&
1117 !fscrypt_is_dummy_policy_set(&fsc
->fsc_dummy_enc_policy
)) {
1118 if (fscrypt_dummy_policies_equal(&fsopt
->dummy_enc_policy
,
1119 &fsc
->fsc_dummy_enc_policy
))
1121 errorfc(fc
, "Can't set test_dummy_encryption on remount");
1125 /* Also make sure fsopt doesn't contain a conflicting value. */
1126 if (fscrypt_is_dummy_policy_set(&fsc
->fsc_dummy_enc_policy
)) {
1127 if (fscrypt_dummy_policies_equal(&fsopt
->dummy_enc_policy
,
1128 &fsc
->fsc_dummy_enc_policy
))
1130 errorfc(fc
, "Conflicting test_dummy_encryption options");
1134 fsc
->fsc_dummy_enc_policy
= fsopt
->dummy_enc_policy
;
1135 memset(&fsopt
->dummy_enc_policy
, 0, sizeof(fsopt
->dummy_enc_policy
));
1137 warnfc(fc
, "test_dummy_encryption mode enabled");
1141 static int ceph_apply_test_dummy_encryption(struct super_block
*sb
,
1142 struct fs_context
*fc
,
1143 struct ceph_mount_options
*fsopt
)
1150 * mount: join the ceph cluster, and open root directory.
1152 static struct dentry
*ceph_real_mount(struct ceph_fs_client
*fsc
,
1153 struct fs_context
*fc
)
1155 struct ceph_client
*cl
= fsc
->client
;
1157 unsigned long started
= jiffies
; /* note the start time */
1158 struct dentry
*root
;
1160 doutc(cl
, "mount start %p\n", fsc
);
1161 mutex_lock(&fsc
->client
->mount_mutex
);
1163 if (!fsc
->sb
->s_root
) {
1164 const char *path
= fsc
->mount_options
->server_path
?
1165 fsc
->mount_options
->server_path
+ 1 : "";
1167 err
= __ceph_open_session(fsc
->client
, started
);
1172 if (fsc
->mount_options
->flags
& CEPH_MOUNT_OPT_FSCACHE
) {
1173 err
= ceph_fscache_register_fs(fsc
, fc
);
1178 err
= ceph_apply_test_dummy_encryption(fsc
->sb
, fc
,
1179 fsc
->mount_options
);
1183 doutc(cl
, "mount opening path '%s'\n", path
);
1185 ceph_fs_debugfs_init(fsc
);
1187 root
= open_root_dentry(fsc
, path
, started
);
1189 err
= PTR_ERR(root
);
1192 fsc
->sb
->s_root
= dget(root
);
1194 root
= dget(fsc
->sb
->s_root
);
1197 fsc
->mount_state
= CEPH_MOUNT_MOUNTED
;
1198 doutc(cl
, "mount success\n");
1199 mutex_unlock(&fsc
->client
->mount_mutex
);
1203 mutex_unlock(&fsc
->client
->mount_mutex
);
1204 ceph_fscrypt_free_dummy_policy(fsc
);
1205 return ERR_PTR(err
);
1208 static int ceph_set_super(struct super_block
*s
, struct fs_context
*fc
)
1210 struct ceph_fs_client
*fsc
= s
->s_fs_info
;
1211 struct ceph_client
*cl
= fsc
->client
;
1214 doutc(cl
, "%p\n", s
);
1216 s
->s_maxbytes
= MAX_LFS_FILESIZE
;
1218 s
->s_xattr
= ceph_xattr_handlers
;
1220 fsc
->max_file_size
= 1ULL << 40; /* temp value until we get mdsmap */
1222 s
->s_op
= &ceph_super_ops
;
1223 s
->s_d_op
= &ceph_dentry_ops
;
1224 s
->s_export_op
= &ceph_export_ops
;
1228 s
->s_time_max
= U32_MAX
;
1229 s
->s_flags
|= SB_NODIRATIME
| SB_NOATIME
;
1231 ceph_fscrypt_set_ops(s
);
1233 ret
= set_anon_super_fc(s
, fc
);
1240 * share superblock if same fs AND options
1242 static int ceph_compare_super(struct super_block
*sb
, struct fs_context
*fc
)
1244 struct ceph_fs_client
*new = fc
->s_fs_info
;
1245 struct ceph_mount_options
*fsopt
= new->mount_options
;
1246 struct ceph_options
*opt
= new->client
->options
;
1247 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(sb
);
1248 struct ceph_client
*cl
= fsc
->client
;
1250 doutc(cl
, "%p\n", sb
);
1252 if (compare_mount_options(fsopt
, opt
, fsc
)) {
1253 doutc(cl
, "monitor(s)/mount options don't match\n");
1256 if ((opt
->flags
& CEPH_OPT_FSID
) &&
1257 ceph_fsid_compare(&opt
->fsid
, &fsc
->client
->fsid
)) {
1258 doutc(cl
, "fsid doesn't match\n");
1261 if (fc
->sb_flags
!= (sb
->s_flags
& ~SB_BORN
)) {
1262 doutc(cl
, "flags differ\n");
1266 if (fsc
->blocklisted
&& !ceph_test_mount_opt(fsc
, CLEANRECOVER
)) {
1267 doutc(cl
, "client is blocklisted (and CLEANRECOVER is not set)\n");
1271 if (fsc
->mount_state
== CEPH_MOUNT_SHUTDOWN
) {
1272 doutc(cl
, "client has been forcibly unmounted\n");
1280 * construct our own bdi so we can control readahead, etc.
1282 static atomic_long_t bdi_seq
= ATOMIC_LONG_INIT(0);
1284 static int ceph_setup_bdi(struct super_block
*sb
, struct ceph_fs_client
*fsc
)
1288 err
= super_setup_bdi_name(sb
, "ceph-%ld",
1289 atomic_long_inc_return(&bdi_seq
));
1293 /* set ra_pages based on rasize mount option? */
1294 sb
->s_bdi
->ra_pages
= fsc
->mount_options
->rasize
>> PAGE_SHIFT
;
1296 /* set io_pages based on max osd read size */
1297 sb
->s_bdi
->io_pages
= fsc
->mount_options
->rsize
>> PAGE_SHIFT
;
1302 static int ceph_get_tree(struct fs_context
*fc
)
1304 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
1305 struct ceph_mount_options
*fsopt
= pctx
->opts
;
1306 struct super_block
*sb
;
1307 struct ceph_fs_client
*fsc
;
1309 int (*compare_super
)(struct super_block
*, struct fs_context
*) =
1313 dout("ceph_get_tree\n");
1316 return invalfc(fc
, "No source");
1317 if (fsopt
->new_dev_syntax
&& !fsopt
->mon_addr
)
1318 return invalfc(fc
, "No monitor address");
1320 /* create client (which we may/may not use) */
1321 fsc
= create_fs_client(pctx
->opts
, pctx
->copts
);
1329 err
= ceph_mdsc_init(fsc
);
1333 if (ceph_test_opt(fsc
->client
, NOSHARE
))
1334 compare_super
= NULL
;
1336 fc
->s_fs_info
= fsc
;
1337 sb
= sget_fc(fc
, compare_super
, ceph_set_super
);
1338 fc
->s_fs_info
= NULL
;
1344 if (ceph_sb_to_fs_client(sb
) != fsc
) {
1345 destroy_fs_client(fsc
);
1346 fsc
= ceph_sb_to_fs_client(sb
);
1347 dout("get_sb got existing client %p\n", fsc
);
1349 dout("get_sb using new client %p\n", fsc
);
1350 err
= ceph_setup_bdi(sb
, fsc
);
1355 res
= ceph_real_mount(fsc
, fc
);
1361 doutc(fsc
->client
, "root %p inode %p ino %llx.%llx\n", res
,
1362 d_inode(res
), ceph_vinop(d_inode(res
)));
1363 fc
->root
= fsc
->sb
->s_root
;
1367 if (!ceph_mdsmap_is_cluster_available(fsc
->mdsc
->mdsmap
)) {
1368 pr_info("No mds server is up or the cluster is laggy\n");
1369 err
= -EHOSTUNREACH
;
1372 ceph_mdsc_close_sessions(fsc
->mdsc
);
1373 deactivate_locked_super(sb
);
1377 destroy_fs_client(fsc
);
1379 dout("ceph_get_tree fail %d\n", err
);
1383 static void ceph_free_fc(struct fs_context
*fc
)
1385 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
1388 destroy_mount_options(pctx
->opts
);
1389 ceph_destroy_options(pctx
->copts
);
1394 static int ceph_reconfigure_fc(struct fs_context
*fc
)
1397 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
1398 struct ceph_mount_options
*fsopt
= pctx
->opts
;
1399 struct super_block
*sb
= fc
->root
->d_sb
;
1400 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(sb
);
1402 err
= ceph_apply_test_dummy_encryption(sb
, fc
, fsopt
);
1406 if (fsopt
->flags
& CEPH_MOUNT_OPT_ASYNC_DIROPS
)
1407 ceph_set_mount_opt(fsc
, ASYNC_DIROPS
);
1409 ceph_clear_mount_opt(fsc
, ASYNC_DIROPS
);
1411 if (fsopt
->flags
& CEPH_MOUNT_OPT_SPARSEREAD
)
1412 ceph_set_mount_opt(fsc
, SPARSEREAD
);
1414 ceph_clear_mount_opt(fsc
, SPARSEREAD
);
1416 if (strcmp_null(fsc
->mount_options
->mon_addr
, fsopt
->mon_addr
)) {
1417 kfree(fsc
->mount_options
->mon_addr
);
1418 fsc
->mount_options
->mon_addr
= fsopt
->mon_addr
;
1419 fsopt
->mon_addr
= NULL
;
1420 pr_notice_client(fsc
->client
,
1421 "monitor addresses recorded, but not used for reconnection");
1424 sync_filesystem(sb
);
1428 static const struct fs_context_operations ceph_context_ops
= {
1429 .free
= ceph_free_fc
,
1430 .parse_param
= ceph_parse_mount_param
,
1431 .get_tree
= ceph_get_tree
,
1432 .reconfigure
= ceph_reconfigure_fc
,
1436 * Set up the filesystem mount context.
1438 static int ceph_init_fs_context(struct fs_context
*fc
)
1440 struct ceph_parse_opts_ctx
*pctx
;
1441 struct ceph_mount_options
*fsopt
;
1443 pctx
= kzalloc(sizeof(*pctx
), GFP_KERNEL
);
1447 pctx
->copts
= ceph_alloc_options();
1451 pctx
->opts
= kzalloc(sizeof(*pctx
->opts
), GFP_KERNEL
);
1456 fsopt
->flags
= CEPH_MOUNT_OPT_DEFAULT
;
1458 fsopt
->wsize
= CEPH_MAX_WRITE_SIZE
;
1459 fsopt
->rsize
= CEPH_MAX_READ_SIZE
;
1460 fsopt
->rasize
= CEPH_RASIZE_DEFAULT
;
1461 fsopt
->snapdir_name
= kstrdup(CEPH_SNAPDIRNAME_DEFAULT
, GFP_KERNEL
);
1462 if (!fsopt
->snapdir_name
)
1465 fsopt
->caps_wanted_delay_min
= CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT
;
1466 fsopt
->caps_wanted_delay_max
= CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT
;
1467 fsopt
->max_readdir
= CEPH_MAX_READDIR_DEFAULT
;
1468 fsopt
->max_readdir_bytes
= CEPH_MAX_READDIR_BYTES_DEFAULT
;
1469 fsopt
->congestion_kb
= default_congestion_kb();
1471 #ifdef CONFIG_CEPH_FS_POSIX_ACL
1472 fc
->sb_flags
|= SB_POSIXACL
;
1475 fc
->fs_private
= pctx
;
1476 fc
->ops
= &ceph_context_ops
;
1480 destroy_mount_options(pctx
->opts
);
1481 ceph_destroy_options(pctx
->copts
);
1487 * Return true if it successfully increases the blocker counter,
1488 * or false if the mdsc is in stopping and flushed state.
1490 static bool __inc_stopping_blocker(struct ceph_mds_client
*mdsc
)
1492 spin_lock(&mdsc
->stopping_lock
);
1493 if (mdsc
->stopping
>= CEPH_MDSC_STOPPING_FLUSHING
) {
1494 spin_unlock(&mdsc
->stopping_lock
);
1497 atomic_inc(&mdsc
->stopping_blockers
);
1498 spin_unlock(&mdsc
->stopping_lock
);
1502 static void __dec_stopping_blocker(struct ceph_mds_client
*mdsc
)
1504 spin_lock(&mdsc
->stopping_lock
);
1505 if (!atomic_dec_return(&mdsc
->stopping_blockers
) &&
1506 mdsc
->stopping
>= CEPH_MDSC_STOPPING_FLUSHING
)
1507 complete_all(&mdsc
->stopping_waiter
);
1508 spin_unlock(&mdsc
->stopping_lock
);
1511 /* For metadata IO requests */
1512 bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client
*mdsc
,
1513 struct ceph_mds_session
*session
)
1515 mutex_lock(&session
->s_mutex
);
1516 inc_session_sequence(session
);
1517 mutex_unlock(&session
->s_mutex
);
1519 return __inc_stopping_blocker(mdsc
);
1522 void ceph_dec_mds_stopping_blocker(struct ceph_mds_client
*mdsc
)
1524 __dec_stopping_blocker(mdsc
);
1527 /* For data IO requests */
1528 bool ceph_inc_osd_stopping_blocker(struct ceph_mds_client
*mdsc
)
1530 return __inc_stopping_blocker(mdsc
);
1533 void ceph_dec_osd_stopping_blocker(struct ceph_mds_client
*mdsc
)
1535 __dec_stopping_blocker(mdsc
);
1538 static void ceph_kill_sb(struct super_block
*s
)
1540 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(s
);
1541 struct ceph_client
*cl
= fsc
->client
;
1542 struct ceph_mds_client
*mdsc
= fsc
->mdsc
;
1545 doutc(cl
, "%p\n", s
);
1547 ceph_mdsc_pre_umount(mdsc
);
1548 flush_fs_workqueues(fsc
);
1551 * Though the kill_anon_super() will finally trigger the
1552 * sync_filesystem() anyway, we still need to do it here and
1553 * then bump the stage of shutdown. This will allow us to
1554 * drop any further message, which will increase the inodes'
1555 * i_count reference counters but makes no sense any more,
1558 * Without this when evicting the inodes it may fail in the
1559 * kill_anon_super(), which will trigger a warning when
1560 * destroying the fscrypt keyring and then possibly trigger
1561 * a further crash in ceph module when the iput() tries to
1562 * evict the inodes later.
1566 spin_lock(&mdsc
->stopping_lock
);
1567 mdsc
->stopping
= CEPH_MDSC_STOPPING_FLUSHING
;
1568 wait
= !!atomic_read(&mdsc
->stopping_blockers
);
1569 spin_unlock(&mdsc
->stopping_lock
);
1571 if (wait
&& atomic_read(&mdsc
->stopping_blockers
)) {
1572 long timeleft
= wait_for_completion_killable_timeout(
1573 &mdsc
->stopping_waiter
,
1574 fsc
->client
->options
->mount_timeout
);
1575 if (!timeleft
) /* timed out */
1576 pr_warn_client(cl
, "umount timed out, %ld\n", timeleft
);
1577 else if (timeleft
< 0) /* killed */
1578 pr_warn_client(cl
, "umount was killed, %ld\n", timeleft
);
1581 mdsc
->stopping
= CEPH_MDSC_STOPPING_FLUSHED
;
1584 fsc
->client
->extra_mon_dispatch
= NULL
;
1585 ceph_fs_debugfs_cleanup(fsc
);
1587 ceph_fscache_unregister_fs(fsc
);
1589 destroy_fs_client(fsc
);
1592 static struct file_system_type ceph_fs_type
= {
1593 .owner
= THIS_MODULE
,
1595 .init_fs_context
= ceph_init_fs_context
,
1596 .kill_sb
= ceph_kill_sb
,
1597 .fs_flags
= FS_RENAME_DOES_D_MOVE
| FS_ALLOW_IDMAP
,
1599 MODULE_ALIAS_FS("ceph");
1601 int ceph_force_reconnect(struct super_block
*sb
)
1603 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(sb
);
1606 fsc
->mount_state
= CEPH_MOUNT_RECOVER
;
1607 __ceph_umount_begin(fsc
);
1609 /* Make sure all page caches get invalidated.
1610 * see remove_session_caps_cb() */
1611 flush_workqueue(fsc
->inode_wq
);
1613 /* In case that we were blocklisted. This also reset
1614 * all mon/osd connections */
1615 ceph_reset_client_addr(fsc
->client
);
1617 ceph_osdc_clear_abort_err(&fsc
->client
->osdc
);
1619 fsc
->blocklisted
= false;
1620 fsc
->mount_state
= CEPH_MOUNT_MOUNTED
;
1623 err
= __ceph_do_getattr(d_inode(sb
->s_root
), NULL
,
1624 CEPH_STAT_CAP_INODE
, true);
1629 static int __init
init_ceph(void)
1631 int ret
= init_caches();
1636 ret
= register_filesystem(&ceph_fs_type
);
1640 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL
);
1650 static void __exit
exit_ceph(void)
1652 dout("exit_ceph\n");
1653 unregister_filesystem(&ceph_fs_type
);
1657 static int param_set_metrics(const char *val
, const struct kernel_param
*kp
)
1659 struct ceph_fs_client
*fsc
;
1662 ret
= param_set_bool(val
, kp
);
1664 pr_err("Failed to parse sending metrics switch value '%s'\n",
1667 } else if (!disable_send_metrics
) {
1668 // wake up all the mds clients
1669 spin_lock(&ceph_fsc_lock
);
1670 list_for_each_entry(fsc
, &ceph_fsc_list
, metric_wakeup
) {
1671 metric_schedule_delayed(&fsc
->mdsc
->metric
);
1673 spin_unlock(&ceph_fsc_lock
);
1679 static const struct kernel_param_ops param_ops_metrics
= {
1680 .set
= param_set_metrics
,
1681 .get
= param_get_bool
,
1684 bool disable_send_metrics
= false;
1685 module_param_cb(disable_send_metrics
, ¶m_ops_metrics
, &disable_send_metrics
, 0644);
1686 MODULE_PARM_DESC(disable_send_metrics
, "Enable sending perf metrics to ceph cluster (default: on)");
1688 /* for both v1 and v2 syntax */
1689 static bool mount_support
= true;
1690 static const struct kernel_param_ops param_ops_mount_syntax
= {
1691 .get
= param_get_bool
,
1693 module_param_cb(mount_syntax_v1
, ¶m_ops_mount_syntax
, &mount_support
, 0444);
1694 module_param_cb(mount_syntax_v2
, ¶m_ops_mount_syntax
, &mount_support
, 0444);
1696 bool enable_unsafe_idmap
= false;
1697 module_param(enable_unsafe_idmap
, bool, 0644);
1698 MODULE_PARM_DESC(enable_unsafe_idmap
,
1699 "Allow to use idmapped mounts with MDS without CEPHFS_FEATURE_HAS_OWNER_UIDGID");
1701 module_init(init_ceph
);
1702 module_exit(exit_ceph
);
1704 MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
1705 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
1706 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
1707 MODULE_DESCRIPTION("Ceph filesystem for Linux");
1708 MODULE_LICENSE("GPL");