1 // SPDX-License-Identifier: GPL-2.0-only
3 #include <linux/ceph/ceph_debug.h>
5 #include <linux/backing-dev.h>
6 #include <linux/ctype.h>
8 #include <linux/inet.h>
10 #include <linux/module.h>
11 #include <linux/mount.h>
12 #include <linux/fs_context.h>
13 #include <linux/fs_parser.h>
14 #include <linux/sched.h>
15 #include <linux/seq_file.h>
16 #include <linux/slab.h>
17 #include <linux/statfs.h>
18 #include <linux/string.h>
21 #include "mds_client.h"
25 #include <linux/ceph/ceph_features.h>
26 #include <linux/ceph/decode.h>
27 #include <linux/ceph/mon_client.h>
28 #include <linux/ceph/auth.h>
29 #include <linux/ceph/debugfs.h>
31 #include <uapi/linux/magic.h>
33 static DEFINE_SPINLOCK(ceph_fsc_lock
);
34 static LIST_HEAD(ceph_fsc_list
);
37 * Ceph superblock operations
39 * Handle the basics of mounting, unmounting.
45 static void ceph_put_super(struct super_block
*s
)
47 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(s
);
49 doutc(fsc
->client
, "begin\n");
50 ceph_fscrypt_free_dummy_policy(fsc
);
51 ceph_mdsc_close_sessions(fsc
->mdsc
);
52 doutc(fsc
->client
, "done\n");
55 static int ceph_statfs(struct dentry
*dentry
, struct kstatfs
*buf
)
57 struct ceph_fs_client
*fsc
= ceph_inode_to_fs_client(d_inode(dentry
));
58 struct ceph_mon_client
*monc
= &fsc
->client
->monc
;
59 struct ceph_statfs st
;
63 doutc(fsc
->client
, "begin\n");
64 if (fsc
->mdsc
->mdsmap
->m_num_data_pg_pools
== 1) {
65 data_pool
= fsc
->mdsc
->mdsmap
->m_data_pg_pools
[0];
67 data_pool
= CEPH_NOPOOL
;
70 err
= ceph_monc_do_statfs(monc
, data_pool
, &st
);
75 buf
->f_type
= CEPH_SUPER_MAGIC
; /* ?? */
78 * Express utilization in terms of large blocks to avoid
79 * overflow on 32-bit machines.
81 buf
->f_frsize
= 1 << CEPH_BLOCK_SHIFT
;
84 * By default use root quota for stats; fallback to overall filesystem
85 * usage if using 'noquotadf' mount option or if the root dir doesn't
86 * have max_bytes quota set.
88 if (ceph_test_mount_opt(fsc
, NOQUOTADF
) ||
89 !ceph_quota_update_statfs(fsc
, buf
)) {
90 buf
->f_blocks
= le64_to_cpu(st
.kb
) >> (CEPH_BLOCK_SHIFT
-10);
91 buf
->f_bfree
= le64_to_cpu(st
.kb_avail
) >> (CEPH_BLOCK_SHIFT
-10);
92 buf
->f_bavail
= le64_to_cpu(st
.kb_avail
) >> (CEPH_BLOCK_SHIFT
-10);
96 * NOTE: for the time being, we make bsize == frsize to humor
97 * not-yet-ancient versions of glibc that are broken.
98 * Someday, we will probably want to report a real block
99 * size... whatever that may mean for a network file system!
101 buf
->f_bsize
= buf
->f_frsize
;
103 buf
->f_files
= le64_to_cpu(st
.num_objects
);
105 buf
->f_namelen
= NAME_MAX
;
107 /* Must convert the fsid, for consistent values across arches */
108 buf
->f_fsid
.val
[0] = 0;
109 mutex_lock(&monc
->mutex
);
110 for (i
= 0 ; i
< sizeof(monc
->monmap
->fsid
) / sizeof(__le32
) ; ++i
)
111 buf
->f_fsid
.val
[0] ^= le32_to_cpu(((__le32
*)&monc
->monmap
->fsid
)[i
]);
112 mutex_unlock(&monc
->mutex
);
114 /* fold the fs_cluster_id into the upper bits */
115 buf
->f_fsid
.val
[1] = monc
->fs_cluster_id
;
117 doutc(fsc
->client
, "done\n");
121 static int ceph_sync_fs(struct super_block
*sb
, int wait
)
123 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(sb
);
124 struct ceph_client
*cl
= fsc
->client
;
127 doutc(cl
, "(non-blocking)\n");
128 ceph_flush_dirty_caps(fsc
->mdsc
);
129 ceph_flush_cap_releases(fsc
->mdsc
);
130 doutc(cl
, "(non-blocking) done\n");
134 doutc(cl
, "(blocking)\n");
135 ceph_osdc_sync(&fsc
->client
->osdc
);
136 ceph_mdsc_sync(fsc
->mdsc
);
137 doutc(cl
, "(blocking) done\n");
148 Opt_caps_wanted_delay_min
,
149 Opt_caps_wanted_delay_max
,
151 Opt_readdir_max_entries
,
152 Opt_readdir_max_bytes
,
160 Opt_test_dummy_encryption
,
161 /* string args above */
169 Opt_require_active_mds
,
178 enum ceph_recover_session_mode
{
179 ceph_recover_session_no
,
180 ceph_recover_session_clean
183 static const struct constant_table ceph_param_recover
[] = {
184 { "no", ceph_recover_session_no
},
185 { "clean", ceph_recover_session_clean
},
189 static const struct fs_parameter_spec ceph_mount_parameters
[] = {
190 fsparam_flag_no ("acl", Opt_acl
),
191 fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir
),
192 fsparam_s32 ("caps_max", Opt_caps_max
),
193 fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max
),
194 fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min
),
195 fsparam_u32 ("write_congestion_kb", Opt_congestion_kb
),
196 fsparam_flag_no ("copyfrom", Opt_copyfrom
),
197 fsparam_flag_no ("dcache", Opt_dcache
),
198 fsparam_flag_no ("dirstat", Opt_dirstat
),
199 fsparam_flag_no ("fsc", Opt_fscache
), // fsc|nofsc
200 fsparam_string ("fsc", Opt_fscache
), // fsc=...
201 fsparam_flag_no ("ino32", Opt_ino32
),
202 fsparam_string ("mds_namespace", Opt_mds_namespace
),
203 fsparam_string ("mon_addr", Opt_mon_addr
),
204 fsparam_flag_no ("poolperm", Opt_poolperm
),
205 fsparam_flag_no ("quotadf", Opt_quotadf
),
206 fsparam_u32 ("rasize", Opt_rasize
),
207 fsparam_flag_no ("rbytes", Opt_rbytes
),
208 fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes
),
209 fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries
),
210 fsparam_enum ("recover_session", Opt_recover_session
, ceph_param_recover
),
211 fsparam_flag_no ("require_active_mds", Opt_require_active_mds
),
212 fsparam_u32 ("rsize", Opt_rsize
),
213 fsparam_string ("snapdirname", Opt_snapdirname
),
214 fsparam_string ("source", Opt_source
),
215 fsparam_flag ("test_dummy_encryption", Opt_test_dummy_encryption
),
216 fsparam_string ("test_dummy_encryption", Opt_test_dummy_encryption
),
217 fsparam_u32 ("wsize", Opt_wsize
),
218 fsparam_flag_no ("wsync", Opt_wsync
),
219 fsparam_flag_no ("pagecache", Opt_pagecache
),
220 fsparam_flag_no ("sparseread", Opt_sparseread
),
224 struct ceph_parse_opts_ctx
{
225 struct ceph_options
*copts
;
226 struct ceph_mount_options
*opts
;
230 * Remove adjacent slashes and then the trailing slash, unless it is
231 * the only remaining character.
233 * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/".
235 static void canonicalize_path(char *path
)
239 for (i
= 0; path
[i
] != '\0'; i
++) {
240 if (path
[i
] != '/' || j
< 1 || path
[j
- 1] != '/')
244 if (j
> 1 && path
[j
- 1] == '/')
250 * Check if the mds namespace in ceph_mount_options matches
251 * the passed in namespace string. First time match (when
252 * ->mds_namespace is NULL) is treated specially, since
253 * ->mds_namespace needs to be initialized by the caller.
255 static int namespace_equals(struct ceph_mount_options
*fsopt
,
256 const char *namespace, size_t len
)
258 return !(fsopt
->mds_namespace
&&
259 (strlen(fsopt
->mds_namespace
) != len
||
260 strncmp(fsopt
->mds_namespace
, namespace, len
)));
263 static int ceph_parse_old_source(const char *dev_name
, const char *dev_name_end
,
264 struct fs_context
*fc
)
267 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
268 struct ceph_mount_options
*fsopt
= pctx
->opts
;
270 if (*dev_name_end
!= ':')
271 return invalfc(fc
, "separator ':' missing in source");
273 r
= ceph_parse_mon_ips(dev_name
, dev_name_end
- dev_name
,
274 pctx
->copts
, fc
->log
.log
, ',');
278 fsopt
->new_dev_syntax
= false;
282 static int ceph_parse_new_source(const char *dev_name
, const char *dev_name_end
,
283 struct fs_context
*fc
)
286 struct ceph_fsid fsid
;
287 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
288 struct ceph_options
*opts
= pctx
->copts
;
289 struct ceph_mount_options
*fsopt
= pctx
->opts
;
290 const char *name_start
= dev_name
;
291 const char *fsid_start
, *fs_name_start
;
293 if (*dev_name_end
!= '=') {
294 dout("separator '=' missing in source");
298 fsid_start
= strchr(dev_name
, '@');
300 return invalfc(fc
, "missing cluster fsid");
301 len
= fsid_start
- name_start
;
303 opts
->name
= kstrndup(name_start
, len
, GFP_KERNEL
);
306 dout("using %s entity name", opts
->name
);
308 ++fsid_start
; /* start of cluster fsid */
309 fs_name_start
= strchr(fsid_start
, '.');
311 return invalfc(fc
, "missing file system name");
313 if (ceph_parse_fsid(fsid_start
, &fsid
))
314 return invalfc(fc
, "Invalid FSID");
316 ++fs_name_start
; /* start of file system name */
317 len
= dev_name_end
- fs_name_start
;
319 if (!namespace_equals(fsopt
, fs_name_start
, len
))
320 return invalfc(fc
, "Mismatching mds_namespace");
321 kfree(fsopt
->mds_namespace
);
322 fsopt
->mds_namespace
= kstrndup(fs_name_start
, len
, GFP_KERNEL
);
323 if (!fsopt
->mds_namespace
)
325 dout("file system (mds namespace) '%s'\n", fsopt
->mds_namespace
);
327 fsopt
->new_dev_syntax
= true;
332 * Parse the source parameter for new device format. Distinguish the device
333 * spec from the path. Try parsing new device format and fallback to old
336 * New device syntax will looks like:
337 * <device_spec>=/<path>
339 * <device_spec> is name@fsid.fsname
340 * <path> is optional, but if present must begin with '/'
341 * (monitor addresses are passed via mount option)
343 * Old device syntax is:
344 * <server_spec>[,<server_spec>...]:[<path>]
346 * <server_spec> is <ip>[:<port>]
347 * <path> is optional, but if present must begin with '/'
349 static int ceph_parse_source(struct fs_parameter
*param
, struct fs_context
*fc
)
351 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
352 struct ceph_mount_options
*fsopt
= pctx
->opts
;
353 char *dev_name
= param
->string
, *dev_name_end
;
356 dout("'%s'\n", dev_name
);
357 if (!dev_name
|| !*dev_name
)
358 return invalfc(fc
, "Empty source");
360 dev_name_end
= strchr(dev_name
, '/');
363 * The server_path will include the whole chars from userland
364 * including the leading '/'.
366 kfree(fsopt
->server_path
);
367 fsopt
->server_path
= kstrdup(dev_name_end
, GFP_KERNEL
);
368 if (!fsopt
->server_path
)
371 canonicalize_path(fsopt
->server_path
);
373 dev_name_end
= dev_name
+ strlen(dev_name
);
376 dev_name_end
--; /* back up to separator */
377 if (dev_name_end
< dev_name
)
378 return invalfc(fc
, "Path missing in source");
380 dout("device name '%.*s'\n", (int)(dev_name_end
- dev_name
), dev_name
);
381 if (fsopt
->server_path
)
382 dout("server path '%s'\n", fsopt
->server_path
);
384 dout("trying new device syntax");
385 ret
= ceph_parse_new_source(dev_name
, dev_name_end
, fc
);
389 dout("trying old device syntax");
390 ret
= ceph_parse_old_source(dev_name
, dev_name_end
, fc
);
395 fc
->source
= param
->string
;
396 param
->string
= NULL
;
400 static int ceph_parse_mon_addr(struct fs_parameter
*param
,
401 struct fs_context
*fc
)
403 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
404 struct ceph_mount_options
*fsopt
= pctx
->opts
;
406 kfree(fsopt
->mon_addr
);
407 fsopt
->mon_addr
= param
->string
;
408 param
->string
= NULL
;
410 return ceph_parse_mon_ips(fsopt
->mon_addr
, strlen(fsopt
->mon_addr
),
411 pctx
->copts
, fc
->log
.log
, '/');
414 static int ceph_parse_mount_param(struct fs_context
*fc
,
415 struct fs_parameter
*param
)
417 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
418 struct ceph_mount_options
*fsopt
= pctx
->opts
;
419 struct fs_parse_result result
;
423 ret
= ceph_parse_param(param
, pctx
->copts
, fc
->log
.log
);
424 if (ret
!= -ENOPARAM
)
427 token
= fs_parse(fc
, ceph_mount_parameters
, param
, &result
);
428 dout("%s: fs_parse '%s' token %d\n",__func__
, param
->key
, token
);
433 case Opt_snapdirname
:
434 kfree(fsopt
->snapdir_name
);
435 fsopt
->snapdir_name
= param
->string
;
436 param
->string
= NULL
;
438 case Opt_mds_namespace
:
439 if (!namespace_equals(fsopt
, param
->string
, strlen(param
->string
)))
440 return invalfc(fc
, "Mismatching mds_namespace");
441 kfree(fsopt
->mds_namespace
);
442 fsopt
->mds_namespace
= param
->string
;
443 param
->string
= NULL
;
445 case Opt_recover_session
:
446 mode
= result
.uint_32
;
447 if (mode
== ceph_recover_session_no
)
448 fsopt
->flags
&= ~CEPH_MOUNT_OPT_CLEANRECOVER
;
449 else if (mode
== ceph_recover_session_clean
)
450 fsopt
->flags
|= CEPH_MOUNT_OPT_CLEANRECOVER
;
456 return invalfc(fc
, "Multiple sources specified");
457 return ceph_parse_source(param
, fc
);
459 return ceph_parse_mon_addr(param
, fc
);
461 if (result
.uint_32
< PAGE_SIZE
||
462 result
.uint_32
> CEPH_MAX_WRITE_SIZE
)
464 fsopt
->wsize
= ALIGN(result
.uint_32
, PAGE_SIZE
);
467 if (result
.uint_32
< PAGE_SIZE
||
468 result
.uint_32
> CEPH_MAX_READ_SIZE
)
470 fsopt
->rsize
= ALIGN(result
.uint_32
, PAGE_SIZE
);
473 fsopt
->rasize
= ALIGN(result
.uint_32
, PAGE_SIZE
);
475 case Opt_caps_wanted_delay_min
:
476 if (result
.uint_32
< 1)
478 fsopt
->caps_wanted_delay_min
= result
.uint_32
;
480 case Opt_caps_wanted_delay_max
:
481 if (result
.uint_32
< 1)
483 fsopt
->caps_wanted_delay_max
= result
.uint_32
;
486 if (result
.int_32
< 0)
488 fsopt
->caps_max
= result
.int_32
;
490 case Opt_readdir_max_entries
:
491 if (result
.uint_32
< 1)
493 fsopt
->max_readdir
= result
.uint_32
;
495 case Opt_readdir_max_bytes
:
496 if (result
.uint_32
< PAGE_SIZE
&& result
.uint_32
!= 0)
498 fsopt
->max_readdir_bytes
= result
.uint_32
;
500 case Opt_congestion_kb
:
501 if (result
.uint_32
< 1024) /* at least 1M */
503 fsopt
->congestion_kb
= result
.uint_32
;
507 fsopt
->flags
|= CEPH_MOUNT_OPT_DIRSTAT
;
509 fsopt
->flags
&= ~CEPH_MOUNT_OPT_DIRSTAT
;
513 fsopt
->flags
|= CEPH_MOUNT_OPT_RBYTES
;
515 fsopt
->flags
&= ~CEPH_MOUNT_OPT_RBYTES
;
517 case Opt_asyncreaddir
:
519 fsopt
->flags
&= ~CEPH_MOUNT_OPT_NOASYNCREADDIR
;
521 fsopt
->flags
|= CEPH_MOUNT_OPT_NOASYNCREADDIR
;
525 fsopt
->flags
|= CEPH_MOUNT_OPT_DCACHE
;
527 fsopt
->flags
&= ~CEPH_MOUNT_OPT_DCACHE
;
531 fsopt
->flags
|= CEPH_MOUNT_OPT_INO32
;
533 fsopt
->flags
&= ~CEPH_MOUNT_OPT_INO32
;
537 #ifdef CONFIG_CEPH_FSCACHE
538 kfree(fsopt
->fscache_uniq
);
539 fsopt
->fscache_uniq
= NULL
;
540 if (result
.negated
) {
541 fsopt
->flags
&= ~CEPH_MOUNT_OPT_FSCACHE
;
543 fsopt
->flags
|= CEPH_MOUNT_OPT_FSCACHE
;
544 fsopt
->fscache_uniq
= param
->string
;
545 param
->string
= NULL
;
549 return invalfc(fc
, "fscache support is disabled");
553 fsopt
->flags
&= ~CEPH_MOUNT_OPT_NOPOOLPERM
;
555 fsopt
->flags
|= CEPH_MOUNT_OPT_NOPOOLPERM
;
557 case Opt_require_active_mds
:
559 fsopt
->flags
&= ~CEPH_MOUNT_OPT_MOUNTWAIT
;
561 fsopt
->flags
|= CEPH_MOUNT_OPT_MOUNTWAIT
;
565 fsopt
->flags
&= ~CEPH_MOUNT_OPT_NOQUOTADF
;
567 fsopt
->flags
|= CEPH_MOUNT_OPT_NOQUOTADF
;
571 fsopt
->flags
&= ~CEPH_MOUNT_OPT_NOCOPYFROM
;
573 fsopt
->flags
|= CEPH_MOUNT_OPT_NOCOPYFROM
;
576 if (!result
.negated
) {
577 #ifdef CONFIG_CEPH_FS_POSIX_ACL
578 fc
->sb_flags
|= SB_POSIXACL
;
580 return invalfc(fc
, "POSIX ACL support is disabled");
583 fc
->sb_flags
&= ~SB_POSIXACL
;
588 fsopt
->flags
&= ~CEPH_MOUNT_OPT_ASYNC_DIROPS
;
590 fsopt
->flags
|= CEPH_MOUNT_OPT_ASYNC_DIROPS
;
594 fsopt
->flags
|= CEPH_MOUNT_OPT_NOPAGECACHE
;
596 fsopt
->flags
&= ~CEPH_MOUNT_OPT_NOPAGECACHE
;
600 fsopt
->flags
&= ~CEPH_MOUNT_OPT_SPARSEREAD
;
602 fsopt
->flags
|= CEPH_MOUNT_OPT_SPARSEREAD
;
604 case Opt_test_dummy_encryption
:
605 #ifdef CONFIG_FS_ENCRYPTION
606 fscrypt_free_dummy_policy(&fsopt
->dummy_enc_policy
);
607 ret
= fscrypt_parse_test_dummy_encryption(param
,
608 &fsopt
->dummy_enc_policy
);
609 if (ret
== -EINVAL
) {
610 warnfc(fc
, "Value of option \"%s\" is unrecognized",
612 } else if (ret
== -EEXIST
) {
613 warnfc(fc
, "Conflicting test_dummy_encryption options");
618 "FS encryption not supported: test_dummy_encryption mount option ignored");
627 return invalfc(fc
, "%s out of range", param
->key
);
630 static void destroy_mount_options(struct ceph_mount_options
*args
)
632 dout("destroy_mount_options %p\n", args
);
636 kfree(args
->snapdir_name
);
637 kfree(args
->mds_namespace
);
638 kfree(args
->server_path
);
639 kfree(args
->fscache_uniq
);
640 kfree(args
->mon_addr
);
641 fscrypt_free_dummy_policy(&args
->dummy_enc_policy
);
645 static int strcmp_null(const char *s1
, const char *s2
)
653 return strcmp(s1
, s2
);
656 static int compare_mount_options(struct ceph_mount_options
*new_fsopt
,
657 struct ceph_options
*new_opt
,
658 struct ceph_fs_client
*fsc
)
660 struct ceph_mount_options
*fsopt1
= new_fsopt
;
661 struct ceph_mount_options
*fsopt2
= fsc
->mount_options
;
662 int ofs
= offsetof(struct ceph_mount_options
, snapdir_name
);
665 ret
= memcmp(fsopt1
, fsopt2
, ofs
);
669 ret
= strcmp_null(fsopt1
->snapdir_name
, fsopt2
->snapdir_name
);
673 ret
= strcmp_null(fsopt1
->mds_namespace
, fsopt2
->mds_namespace
);
677 ret
= strcmp_null(fsopt1
->server_path
, fsopt2
->server_path
);
681 ret
= strcmp_null(fsopt1
->fscache_uniq
, fsopt2
->fscache_uniq
);
685 ret
= strcmp_null(fsopt1
->mon_addr
, fsopt2
->mon_addr
);
689 return ceph_compare_options(new_opt
, fsc
->client
);
693 * ceph_show_options - Show mount options in /proc/mounts
694 * @m: seq_file to write to
695 * @root: root of that (sub)tree
697 static int ceph_show_options(struct seq_file
*m
, struct dentry
*root
)
699 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(root
->d_sb
);
700 struct ceph_mount_options
*fsopt
= fsc
->mount_options
;
704 /* a comma between MNT/MS and client options */
708 ret
= ceph_print_client_options(m
, fsc
->client
, false);
712 /* retract our comma if no client options */
716 if (fsopt
->flags
& CEPH_MOUNT_OPT_DIRSTAT
)
717 seq_puts(m
, ",dirstat");
718 if ((fsopt
->flags
& CEPH_MOUNT_OPT_RBYTES
))
719 seq_puts(m
, ",rbytes");
720 if (fsopt
->flags
& CEPH_MOUNT_OPT_NOASYNCREADDIR
)
721 seq_puts(m
, ",noasyncreaddir");
722 if ((fsopt
->flags
& CEPH_MOUNT_OPT_DCACHE
) == 0)
723 seq_puts(m
, ",nodcache");
724 if (fsopt
->flags
& CEPH_MOUNT_OPT_INO32
)
725 seq_puts(m
, ",ino32");
726 if (fsopt
->flags
& CEPH_MOUNT_OPT_FSCACHE
) {
727 seq_show_option(m
, "fsc", fsopt
->fscache_uniq
);
729 if (fsopt
->flags
& CEPH_MOUNT_OPT_NOPOOLPERM
)
730 seq_puts(m
, ",nopoolperm");
731 if (fsopt
->flags
& CEPH_MOUNT_OPT_NOQUOTADF
)
732 seq_puts(m
, ",noquotadf");
734 #ifdef CONFIG_CEPH_FS_POSIX_ACL
735 if (root
->d_sb
->s_flags
& SB_POSIXACL
)
738 seq_puts(m
, ",noacl");
741 if ((fsopt
->flags
& CEPH_MOUNT_OPT_NOCOPYFROM
) == 0)
742 seq_puts(m
, ",copyfrom");
744 /* dump mds_namespace when old device syntax is in use */
745 if (fsopt
->mds_namespace
&& !fsopt
->new_dev_syntax
)
746 seq_show_option(m
, "mds_namespace", fsopt
->mds_namespace
);
749 seq_printf(m
, ",mon_addr=%s", fsopt
->mon_addr
);
751 if (fsopt
->flags
& CEPH_MOUNT_OPT_CLEANRECOVER
)
752 seq_show_option(m
, "recover_session", "clean");
754 if (!(fsopt
->flags
& CEPH_MOUNT_OPT_ASYNC_DIROPS
))
755 seq_puts(m
, ",wsync");
756 if (fsopt
->flags
& CEPH_MOUNT_OPT_NOPAGECACHE
)
757 seq_puts(m
, ",nopagecache");
758 if (fsopt
->flags
& CEPH_MOUNT_OPT_SPARSEREAD
)
759 seq_puts(m
, ",sparseread");
761 fscrypt_show_test_dummy_encryption(m
, ',', root
->d_sb
);
763 if (fsopt
->wsize
!= CEPH_MAX_WRITE_SIZE
)
764 seq_printf(m
, ",wsize=%u", fsopt
->wsize
);
765 if (fsopt
->rsize
!= CEPH_MAX_READ_SIZE
)
766 seq_printf(m
, ",rsize=%u", fsopt
->rsize
);
767 if (fsopt
->rasize
!= CEPH_RASIZE_DEFAULT
)
768 seq_printf(m
, ",rasize=%u", fsopt
->rasize
);
769 if (fsopt
->congestion_kb
!= default_congestion_kb())
770 seq_printf(m
, ",write_congestion_kb=%u", fsopt
->congestion_kb
);
772 seq_printf(m
, ",caps_max=%d", fsopt
->caps_max
);
773 if (fsopt
->caps_wanted_delay_min
!= CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT
)
774 seq_printf(m
, ",caps_wanted_delay_min=%u",
775 fsopt
->caps_wanted_delay_min
);
776 if (fsopt
->caps_wanted_delay_max
!= CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT
)
777 seq_printf(m
, ",caps_wanted_delay_max=%u",
778 fsopt
->caps_wanted_delay_max
);
779 if (fsopt
->max_readdir
!= CEPH_MAX_READDIR_DEFAULT
)
780 seq_printf(m
, ",readdir_max_entries=%u", fsopt
->max_readdir
);
781 if (fsopt
->max_readdir_bytes
!= CEPH_MAX_READDIR_BYTES_DEFAULT
)
782 seq_printf(m
, ",readdir_max_bytes=%u", fsopt
->max_readdir_bytes
);
783 if (strcmp(fsopt
->snapdir_name
, CEPH_SNAPDIRNAME_DEFAULT
))
784 seq_show_option(m
, "snapdirname", fsopt
->snapdir_name
);
790 * handle any mon messages the standard library doesn't understand.
791 * return error if we don't either.
793 static int extra_mon_dispatch(struct ceph_client
*client
, struct ceph_msg
*msg
)
795 struct ceph_fs_client
*fsc
= client
->private;
796 int type
= le16_to_cpu(msg
->hdr
.type
);
799 case CEPH_MSG_MDS_MAP
:
800 ceph_mdsc_handle_mdsmap(fsc
->mdsc
, msg
);
802 case CEPH_MSG_FS_MAP_USER
:
803 ceph_mdsc_handle_fsmap(fsc
->mdsc
, msg
);
811 * create a new fs client
813 * Success or not, this function consumes @fsopt and @opt.
815 static struct ceph_fs_client
*create_fs_client(struct ceph_mount_options
*fsopt
,
816 struct ceph_options
*opt
)
818 struct ceph_fs_client
*fsc
;
821 fsc
= kzalloc(sizeof(*fsc
), GFP_KERNEL
);
827 fsc
->client
= ceph_create_client(opt
, fsc
);
828 if (IS_ERR(fsc
->client
)) {
829 err
= PTR_ERR(fsc
->client
);
832 opt
= NULL
; /* fsc->client now owns this */
834 fsc
->client
->extra_mon_dispatch
= extra_mon_dispatch
;
835 ceph_set_opt(fsc
->client
, ABORT_ON_FULL
);
837 if (!fsopt
->mds_namespace
) {
838 ceph_monc_want_map(&fsc
->client
->monc
, CEPH_SUB_MDSMAP
,
841 ceph_monc_want_map(&fsc
->client
->monc
, CEPH_SUB_FSMAP
,
845 fsc
->mount_options
= fsopt
;
848 fsc
->mount_state
= CEPH_MOUNT_MOUNTING
;
850 fsc
->have_copy_from2
= true;
852 atomic_long_set(&fsc
->writeback_count
, 0);
853 fsc
->write_congested
= false;
857 * The number of concurrent works can be high but they don't need
858 * to be processed in parallel, limit concurrency.
860 fsc
->inode_wq
= alloc_workqueue("ceph-inode", WQ_UNBOUND
, 0);
863 fsc
->cap_wq
= alloc_workqueue("ceph-cap", 0, 1);
867 hash_init(fsc
->async_unlink_conflict
);
868 spin_lock_init(&fsc
->async_unlink_conflict_lock
);
870 spin_lock(&ceph_fsc_lock
);
871 list_add_tail(&fsc
->metric_wakeup
, &ceph_fsc_list
);
872 spin_unlock(&ceph_fsc_lock
);
877 destroy_workqueue(fsc
->inode_wq
);
879 ceph_destroy_client(fsc
->client
);
883 ceph_destroy_options(opt
);
884 destroy_mount_options(fsopt
);
888 static void flush_fs_workqueues(struct ceph_fs_client
*fsc
)
890 flush_workqueue(fsc
->inode_wq
);
891 flush_workqueue(fsc
->cap_wq
);
894 static void destroy_fs_client(struct ceph_fs_client
*fsc
)
896 doutc(fsc
->client
, "%p\n", fsc
);
898 spin_lock(&ceph_fsc_lock
);
899 list_del(&fsc
->metric_wakeup
);
900 spin_unlock(&ceph_fsc_lock
);
902 ceph_mdsc_destroy(fsc
);
903 destroy_workqueue(fsc
->inode_wq
);
904 destroy_workqueue(fsc
->cap_wq
);
906 destroy_mount_options(fsc
->mount_options
);
908 ceph_destroy_client(fsc
->client
);
911 dout("%s: %p done\n", __func__
, fsc
);
917 struct kmem_cache
*ceph_inode_cachep
;
918 struct kmem_cache
*ceph_cap_cachep
;
919 struct kmem_cache
*ceph_cap_snap_cachep
;
920 struct kmem_cache
*ceph_cap_flush_cachep
;
921 struct kmem_cache
*ceph_dentry_cachep
;
922 struct kmem_cache
*ceph_file_cachep
;
923 struct kmem_cache
*ceph_dir_file_cachep
;
924 struct kmem_cache
*ceph_mds_request_cachep
;
925 mempool_t
*ceph_wb_pagevec_pool
;
927 static void ceph_inode_init_once(void *foo
)
929 struct ceph_inode_info
*ci
= foo
;
930 inode_init_once(&ci
->netfs
.inode
);
933 static int __init
init_caches(void)
937 ceph_inode_cachep
= kmem_cache_create("ceph_inode_info",
938 sizeof(struct ceph_inode_info
),
939 __alignof__(struct ceph_inode_info
),
940 SLAB_RECLAIM_ACCOUNT
| SLAB_ACCOUNT
,
941 ceph_inode_init_once
);
942 if (!ceph_inode_cachep
)
945 ceph_cap_cachep
= KMEM_CACHE(ceph_cap
, 0);
946 if (!ceph_cap_cachep
)
948 ceph_cap_snap_cachep
= KMEM_CACHE(ceph_cap_snap
, 0);
949 if (!ceph_cap_snap_cachep
)
951 ceph_cap_flush_cachep
= KMEM_CACHE(ceph_cap_flush
,
952 SLAB_RECLAIM_ACCOUNT
);
953 if (!ceph_cap_flush_cachep
)
956 ceph_dentry_cachep
= KMEM_CACHE(ceph_dentry_info
,
957 SLAB_RECLAIM_ACCOUNT
);
958 if (!ceph_dentry_cachep
)
961 ceph_file_cachep
= KMEM_CACHE(ceph_file_info
, 0);
962 if (!ceph_file_cachep
)
965 ceph_dir_file_cachep
= KMEM_CACHE(ceph_dir_file_info
, 0);
966 if (!ceph_dir_file_cachep
)
969 ceph_mds_request_cachep
= KMEM_CACHE(ceph_mds_request
, 0);
970 if (!ceph_mds_request_cachep
)
973 ceph_wb_pagevec_pool
= mempool_create_kmalloc_pool(10,
974 (CEPH_MAX_WRITE_SIZE
>> PAGE_SHIFT
) * sizeof(struct page
*));
975 if (!ceph_wb_pagevec_pool
)
976 goto bad_pagevec_pool
;
981 kmem_cache_destroy(ceph_mds_request_cachep
);
983 kmem_cache_destroy(ceph_dir_file_cachep
);
985 kmem_cache_destroy(ceph_file_cachep
);
987 kmem_cache_destroy(ceph_dentry_cachep
);
989 kmem_cache_destroy(ceph_cap_flush_cachep
);
991 kmem_cache_destroy(ceph_cap_snap_cachep
);
993 kmem_cache_destroy(ceph_cap_cachep
);
995 kmem_cache_destroy(ceph_inode_cachep
);
999 static void destroy_caches(void)
1002 * Make sure all delayed rcu free inodes are flushed before we
1007 kmem_cache_destroy(ceph_inode_cachep
);
1008 kmem_cache_destroy(ceph_cap_cachep
);
1009 kmem_cache_destroy(ceph_cap_snap_cachep
);
1010 kmem_cache_destroy(ceph_cap_flush_cachep
);
1011 kmem_cache_destroy(ceph_dentry_cachep
);
1012 kmem_cache_destroy(ceph_file_cachep
);
1013 kmem_cache_destroy(ceph_dir_file_cachep
);
1014 kmem_cache_destroy(ceph_mds_request_cachep
);
1015 mempool_destroy(ceph_wb_pagevec_pool
);
1018 static void __ceph_umount_begin(struct ceph_fs_client
*fsc
)
1020 ceph_osdc_abort_requests(&fsc
->client
->osdc
, -EIO
);
1021 ceph_mdsc_force_umount(fsc
->mdsc
);
1022 fsc
->filp_gen
++; // invalidate open files
1026 * ceph_umount_begin - initiate forced umount. Tear down the
1027 * mount, skipping steps that may hang while waiting for server(s).
1029 void ceph_umount_begin(struct super_block
*sb
)
1031 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(sb
);
1033 doutc(fsc
->client
, "starting forced umount\n");
1036 fsc
->mount_state
= CEPH_MOUNT_SHUTDOWN
;
1037 __ceph_umount_begin(fsc
);
1040 static const struct super_operations ceph_super_ops
= {
1041 .alloc_inode
= ceph_alloc_inode
,
1042 .free_inode
= ceph_free_inode
,
1043 .write_inode
= ceph_write_inode
,
1044 .drop_inode
= generic_delete_inode
,
1045 .evict_inode
= ceph_evict_inode
,
1046 .sync_fs
= ceph_sync_fs
,
1047 .put_super
= ceph_put_super
,
1048 .show_options
= ceph_show_options
,
1049 .statfs
= ceph_statfs
,
1050 .umount_begin
= ceph_umount_begin
,
1054 * Bootstrap mount by opening the root directory. Note the mount
1055 * @started time from caller, and time out if this takes too long.
1057 static struct dentry
*open_root_dentry(struct ceph_fs_client
*fsc
,
1059 unsigned long started
)
1061 struct ceph_client
*cl
= fsc
->client
;
1062 struct ceph_mds_client
*mdsc
= fsc
->mdsc
;
1063 struct ceph_mds_request
*req
= NULL
;
1065 struct dentry
*root
;
1068 doutc(cl
, "opening '%s'\n", path
);
1069 req
= ceph_mdsc_create_request(mdsc
, CEPH_MDS_OP_GETATTR
, USE_ANY_MDS
);
1071 return ERR_CAST(req
);
1072 req
->r_path1
= kstrdup(path
, GFP_NOFS
);
1073 if (!req
->r_path1
) {
1074 root
= ERR_PTR(-ENOMEM
);
1078 req
->r_ino1
.ino
= CEPH_INO_ROOT
;
1079 req
->r_ino1
.snap
= CEPH_NOSNAP
;
1080 req
->r_started
= started
;
1081 req
->r_timeout
= fsc
->client
->options
->mount_timeout
;
1082 req
->r_args
.getattr
.mask
= cpu_to_le32(CEPH_STAT_CAP_INODE
);
1083 req
->r_num_caps
= 2;
1084 err
= ceph_mdsc_do_request(mdsc
, NULL
, req
);
1086 struct inode
*inode
= req
->r_target_inode
;
1087 req
->r_target_inode
= NULL
;
1088 doutc(cl
, "success\n");
1089 root
= d_make_root(inode
);
1091 root
= ERR_PTR(-ENOMEM
);
1094 doutc(cl
, "success, root dentry is %p\n", root
);
1096 root
= ERR_PTR(err
);
1099 ceph_mdsc_put_request(req
);
1103 #ifdef CONFIG_FS_ENCRYPTION
1104 static int ceph_apply_test_dummy_encryption(struct super_block
*sb
,
1105 struct fs_context
*fc
,
1106 struct ceph_mount_options
*fsopt
)
1108 struct ceph_fs_client
*fsc
= sb
->s_fs_info
;
1110 if (!fscrypt_is_dummy_policy_set(&fsopt
->dummy_enc_policy
))
1113 /* No changing encryption context on remount. */
1114 if (fc
->purpose
== FS_CONTEXT_FOR_RECONFIGURE
&&
1115 !fscrypt_is_dummy_policy_set(&fsc
->fsc_dummy_enc_policy
)) {
1116 if (fscrypt_dummy_policies_equal(&fsopt
->dummy_enc_policy
,
1117 &fsc
->fsc_dummy_enc_policy
))
1119 errorfc(fc
, "Can't set test_dummy_encryption on remount");
1123 /* Also make sure fsopt doesn't contain a conflicting value. */
1124 if (fscrypt_is_dummy_policy_set(&fsc
->fsc_dummy_enc_policy
)) {
1125 if (fscrypt_dummy_policies_equal(&fsopt
->dummy_enc_policy
,
1126 &fsc
->fsc_dummy_enc_policy
))
1128 errorfc(fc
, "Conflicting test_dummy_encryption options");
1132 fsc
->fsc_dummy_enc_policy
= fsopt
->dummy_enc_policy
;
1133 memset(&fsopt
->dummy_enc_policy
, 0, sizeof(fsopt
->dummy_enc_policy
));
1135 warnfc(fc
, "test_dummy_encryption mode enabled");
1139 static int ceph_apply_test_dummy_encryption(struct super_block
*sb
,
1140 struct fs_context
*fc
,
1141 struct ceph_mount_options
*fsopt
)
1148 * mount: join the ceph cluster, and open root directory.
1150 static struct dentry
*ceph_real_mount(struct ceph_fs_client
*fsc
,
1151 struct fs_context
*fc
)
1153 struct ceph_client
*cl
= fsc
->client
;
1155 unsigned long started
= jiffies
; /* note the start time */
1156 struct dentry
*root
;
1158 doutc(cl
, "mount start %p\n", fsc
);
1159 mutex_lock(&fsc
->client
->mount_mutex
);
1161 if (!fsc
->sb
->s_root
) {
1162 const char *path
= fsc
->mount_options
->server_path
?
1163 fsc
->mount_options
->server_path
+ 1 : "";
1165 err
= __ceph_open_session(fsc
->client
, started
);
1170 if (fsc
->mount_options
->flags
& CEPH_MOUNT_OPT_FSCACHE
) {
1171 err
= ceph_fscache_register_fs(fsc
, fc
);
1176 err
= ceph_apply_test_dummy_encryption(fsc
->sb
, fc
,
1177 fsc
->mount_options
);
1181 doutc(cl
, "mount opening path '%s'\n", path
);
1183 ceph_fs_debugfs_init(fsc
);
1185 root
= open_root_dentry(fsc
, path
, started
);
1187 err
= PTR_ERR(root
);
1190 fsc
->sb
->s_root
= dget(root
);
1192 root
= dget(fsc
->sb
->s_root
);
1195 fsc
->mount_state
= CEPH_MOUNT_MOUNTED
;
1196 doutc(cl
, "mount success\n");
1197 mutex_unlock(&fsc
->client
->mount_mutex
);
1201 mutex_unlock(&fsc
->client
->mount_mutex
);
1202 ceph_fscrypt_free_dummy_policy(fsc
);
1203 return ERR_PTR(err
);
1206 static int ceph_set_super(struct super_block
*s
, struct fs_context
*fc
)
1208 struct ceph_fs_client
*fsc
= s
->s_fs_info
;
1209 struct ceph_client
*cl
= fsc
->client
;
1212 doutc(cl
, "%p\n", s
);
1214 s
->s_maxbytes
= MAX_LFS_FILESIZE
;
1216 s
->s_xattr
= ceph_xattr_handlers
;
1218 fsc
->max_file_size
= 1ULL << 40; /* temp value until we get mdsmap */
1220 s
->s_op
= &ceph_super_ops
;
1221 s
->s_d_op
= &ceph_dentry_ops
;
1222 s
->s_export_op
= &ceph_export_ops
;
1226 s
->s_time_max
= U32_MAX
;
1227 s
->s_flags
|= SB_NODIRATIME
| SB_NOATIME
;
1229 ceph_fscrypt_set_ops(s
);
1231 ret
= set_anon_super_fc(s
, fc
);
1238 * share superblock if same fs AND options
1240 static int ceph_compare_super(struct super_block
*sb
, struct fs_context
*fc
)
1242 struct ceph_fs_client
*new = fc
->s_fs_info
;
1243 struct ceph_mount_options
*fsopt
= new->mount_options
;
1244 struct ceph_options
*opt
= new->client
->options
;
1245 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(sb
);
1246 struct ceph_client
*cl
= fsc
->client
;
1248 doutc(cl
, "%p\n", sb
);
1250 if (compare_mount_options(fsopt
, opt
, fsc
)) {
1251 doutc(cl
, "monitor(s)/mount options don't match\n");
1254 if ((opt
->flags
& CEPH_OPT_FSID
) &&
1255 ceph_fsid_compare(&opt
->fsid
, &fsc
->client
->fsid
)) {
1256 doutc(cl
, "fsid doesn't match\n");
1259 if (fc
->sb_flags
!= (sb
->s_flags
& ~SB_BORN
)) {
1260 doutc(cl
, "flags differ\n");
1264 if (fsc
->blocklisted
&& !ceph_test_mount_opt(fsc
, CLEANRECOVER
)) {
1265 doutc(cl
, "client is blocklisted (and CLEANRECOVER is not set)\n");
1269 if (fsc
->mount_state
== CEPH_MOUNT_SHUTDOWN
) {
1270 doutc(cl
, "client has been forcibly unmounted\n");
1278 * construct our own bdi so we can control readahead, etc.
1280 static atomic_long_t bdi_seq
= ATOMIC_LONG_INIT(0);
1282 static int ceph_setup_bdi(struct super_block
*sb
, struct ceph_fs_client
*fsc
)
1286 err
= super_setup_bdi_name(sb
, "ceph-%ld",
1287 atomic_long_inc_return(&bdi_seq
));
1291 /* set ra_pages based on rasize mount option? */
1292 sb
->s_bdi
->ra_pages
= fsc
->mount_options
->rasize
>> PAGE_SHIFT
;
1294 /* set io_pages based on max osd read size */
1295 sb
->s_bdi
->io_pages
= fsc
->mount_options
->rsize
>> PAGE_SHIFT
;
1300 static int ceph_get_tree(struct fs_context
*fc
)
1302 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
1303 struct ceph_mount_options
*fsopt
= pctx
->opts
;
1304 struct super_block
*sb
;
1305 struct ceph_fs_client
*fsc
;
1307 int (*compare_super
)(struct super_block
*, struct fs_context
*) =
1311 dout("ceph_get_tree\n");
1314 return invalfc(fc
, "No source");
1315 if (fsopt
->new_dev_syntax
&& !fsopt
->mon_addr
)
1316 return invalfc(fc
, "No monitor address");
1318 /* create client (which we may/may not use) */
1319 fsc
= create_fs_client(pctx
->opts
, pctx
->copts
);
1327 err
= ceph_mdsc_init(fsc
);
1331 if (ceph_test_opt(fsc
->client
, NOSHARE
))
1332 compare_super
= NULL
;
1334 fc
->s_fs_info
= fsc
;
1335 sb
= sget_fc(fc
, compare_super
, ceph_set_super
);
1336 fc
->s_fs_info
= NULL
;
1342 if (ceph_sb_to_fs_client(sb
) != fsc
) {
1343 destroy_fs_client(fsc
);
1344 fsc
= ceph_sb_to_fs_client(sb
);
1345 dout("get_sb got existing client %p\n", fsc
);
1347 dout("get_sb using new client %p\n", fsc
);
1348 err
= ceph_setup_bdi(sb
, fsc
);
1353 res
= ceph_real_mount(fsc
, fc
);
1359 doutc(fsc
->client
, "root %p inode %p ino %llx.%llx\n", res
,
1360 d_inode(res
), ceph_vinop(d_inode(res
)));
1361 fc
->root
= fsc
->sb
->s_root
;
1365 if (!ceph_mdsmap_is_cluster_available(fsc
->mdsc
->mdsmap
)) {
1366 pr_info("No mds server is up or the cluster is laggy\n");
1367 err
= -EHOSTUNREACH
;
1370 ceph_mdsc_close_sessions(fsc
->mdsc
);
1371 deactivate_locked_super(sb
);
1375 destroy_fs_client(fsc
);
1377 dout("ceph_get_tree fail %d\n", err
);
1381 static void ceph_free_fc(struct fs_context
*fc
)
1383 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
1386 destroy_mount_options(pctx
->opts
);
1387 ceph_destroy_options(pctx
->copts
);
1392 static int ceph_reconfigure_fc(struct fs_context
*fc
)
1395 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
1396 struct ceph_mount_options
*fsopt
= pctx
->opts
;
1397 struct super_block
*sb
= fc
->root
->d_sb
;
1398 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(sb
);
1400 err
= ceph_apply_test_dummy_encryption(sb
, fc
, fsopt
);
1404 if (fsopt
->flags
& CEPH_MOUNT_OPT_ASYNC_DIROPS
)
1405 ceph_set_mount_opt(fsc
, ASYNC_DIROPS
);
1407 ceph_clear_mount_opt(fsc
, ASYNC_DIROPS
);
1409 if (fsopt
->flags
& CEPH_MOUNT_OPT_SPARSEREAD
)
1410 ceph_set_mount_opt(fsc
, SPARSEREAD
);
1412 ceph_clear_mount_opt(fsc
, SPARSEREAD
);
1414 if (strcmp_null(fsc
->mount_options
->mon_addr
, fsopt
->mon_addr
)) {
1415 kfree(fsc
->mount_options
->mon_addr
);
1416 fsc
->mount_options
->mon_addr
= fsopt
->mon_addr
;
1417 fsopt
->mon_addr
= NULL
;
1418 pr_notice_client(fsc
->client
,
1419 "monitor addresses recorded, but not used for reconnection");
1422 sync_filesystem(sb
);
1426 static const struct fs_context_operations ceph_context_ops
= {
1427 .free
= ceph_free_fc
,
1428 .parse_param
= ceph_parse_mount_param
,
1429 .get_tree
= ceph_get_tree
,
1430 .reconfigure
= ceph_reconfigure_fc
,
1434 * Set up the filesystem mount context.
1436 static int ceph_init_fs_context(struct fs_context
*fc
)
1438 struct ceph_parse_opts_ctx
*pctx
;
1439 struct ceph_mount_options
*fsopt
;
1441 pctx
= kzalloc(sizeof(*pctx
), GFP_KERNEL
);
1445 pctx
->copts
= ceph_alloc_options();
1449 pctx
->opts
= kzalloc(sizeof(*pctx
->opts
), GFP_KERNEL
);
1454 fsopt
->flags
= CEPH_MOUNT_OPT_DEFAULT
;
1456 fsopt
->wsize
= CEPH_MAX_WRITE_SIZE
;
1457 fsopt
->rsize
= CEPH_MAX_READ_SIZE
;
1458 fsopt
->rasize
= CEPH_RASIZE_DEFAULT
;
1459 fsopt
->snapdir_name
= kstrdup(CEPH_SNAPDIRNAME_DEFAULT
, GFP_KERNEL
);
1460 if (!fsopt
->snapdir_name
)
1463 fsopt
->caps_wanted_delay_min
= CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT
;
1464 fsopt
->caps_wanted_delay_max
= CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT
;
1465 fsopt
->max_readdir
= CEPH_MAX_READDIR_DEFAULT
;
1466 fsopt
->max_readdir_bytes
= CEPH_MAX_READDIR_BYTES_DEFAULT
;
1467 fsopt
->congestion_kb
= default_congestion_kb();
1469 #ifdef CONFIG_CEPH_FS_POSIX_ACL
1470 fc
->sb_flags
|= SB_POSIXACL
;
1473 fc
->fs_private
= pctx
;
1474 fc
->ops
= &ceph_context_ops
;
1478 destroy_mount_options(pctx
->opts
);
1479 ceph_destroy_options(pctx
->copts
);
1485 * Return true if it successfully increases the blocker counter,
1486 * or false if the mdsc is in stopping and flushed state.
1488 static bool __inc_stopping_blocker(struct ceph_mds_client
*mdsc
)
1490 spin_lock(&mdsc
->stopping_lock
);
1491 if (mdsc
->stopping
>= CEPH_MDSC_STOPPING_FLUSHING
) {
1492 spin_unlock(&mdsc
->stopping_lock
);
1495 atomic_inc(&mdsc
->stopping_blockers
);
1496 spin_unlock(&mdsc
->stopping_lock
);
1500 static void __dec_stopping_blocker(struct ceph_mds_client
*mdsc
)
1502 spin_lock(&mdsc
->stopping_lock
);
1503 if (!atomic_dec_return(&mdsc
->stopping_blockers
) &&
1504 mdsc
->stopping
>= CEPH_MDSC_STOPPING_FLUSHING
)
1505 complete_all(&mdsc
->stopping_waiter
);
1506 spin_unlock(&mdsc
->stopping_lock
);
1509 /* For metadata IO requests */
1510 bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client
*mdsc
,
1511 struct ceph_mds_session
*session
)
1513 mutex_lock(&session
->s_mutex
);
1514 inc_session_sequence(session
);
1515 mutex_unlock(&session
->s_mutex
);
1517 return __inc_stopping_blocker(mdsc
);
1520 void ceph_dec_mds_stopping_blocker(struct ceph_mds_client
*mdsc
)
1522 __dec_stopping_blocker(mdsc
);
1525 /* For data IO requests */
1526 bool ceph_inc_osd_stopping_blocker(struct ceph_mds_client
*mdsc
)
1528 return __inc_stopping_blocker(mdsc
);
1531 void ceph_dec_osd_stopping_blocker(struct ceph_mds_client
*mdsc
)
1533 __dec_stopping_blocker(mdsc
);
1536 static void ceph_kill_sb(struct super_block
*s
)
1538 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(s
);
1539 struct ceph_client
*cl
= fsc
->client
;
1540 struct ceph_mds_client
*mdsc
= fsc
->mdsc
;
1543 doutc(cl
, "%p\n", s
);
1545 ceph_mdsc_pre_umount(mdsc
);
1546 flush_fs_workqueues(fsc
);
1549 * Though the kill_anon_super() will finally trigger the
1550 * sync_filesystem() anyway, we still need to do it here and
1551 * then bump the stage of shutdown. This will allow us to
1552 * drop any further message, which will increase the inodes'
1553 * i_count reference counters but makes no sense any more,
1556 * Without this when evicting the inodes it may fail in the
1557 * kill_anon_super(), which will trigger a warning when
1558 * destroying the fscrypt keyring and then possibly trigger
1559 * a further crash in ceph module when the iput() tries to
1560 * evict the inodes later.
1564 spin_lock(&mdsc
->stopping_lock
);
1565 mdsc
->stopping
= CEPH_MDSC_STOPPING_FLUSHING
;
1566 wait
= !!atomic_read(&mdsc
->stopping_blockers
);
1567 spin_unlock(&mdsc
->stopping_lock
);
1569 if (wait
&& atomic_read(&mdsc
->stopping_blockers
)) {
1570 long timeleft
= wait_for_completion_killable_timeout(
1571 &mdsc
->stopping_waiter
,
1572 fsc
->client
->options
->mount_timeout
);
1573 if (!timeleft
) /* timed out */
1574 pr_warn_client(cl
, "umount timed out, %ld\n", timeleft
);
1575 else if (timeleft
< 0) /* killed */
1576 pr_warn_client(cl
, "umount was killed, %ld\n", timeleft
);
1579 mdsc
->stopping
= CEPH_MDSC_STOPPING_FLUSHED
;
1582 fsc
->client
->extra_mon_dispatch
= NULL
;
1583 ceph_fs_debugfs_cleanup(fsc
);
1585 ceph_fscache_unregister_fs(fsc
);
1587 destroy_fs_client(fsc
);
1590 static struct file_system_type ceph_fs_type
= {
1591 .owner
= THIS_MODULE
,
1593 .init_fs_context
= ceph_init_fs_context
,
1594 .kill_sb
= ceph_kill_sb
,
1595 .fs_flags
= FS_RENAME_DOES_D_MOVE
| FS_ALLOW_IDMAP
,
1597 MODULE_ALIAS_FS("ceph");
1599 int ceph_force_reconnect(struct super_block
*sb
)
1601 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(sb
);
1604 fsc
->mount_state
= CEPH_MOUNT_RECOVER
;
1605 __ceph_umount_begin(fsc
);
1607 /* Make sure all page caches get invalidated.
1608 * see remove_session_caps_cb() */
1609 flush_workqueue(fsc
->inode_wq
);
1611 /* In case that we were blocklisted. This also reset
1612 * all mon/osd connections */
1613 ceph_reset_client_addr(fsc
->client
);
1615 ceph_osdc_clear_abort_err(&fsc
->client
->osdc
);
1617 fsc
->blocklisted
= false;
1618 fsc
->mount_state
= CEPH_MOUNT_MOUNTED
;
1621 err
= __ceph_do_getattr(d_inode(sb
->s_root
), NULL
,
1622 CEPH_STAT_CAP_INODE
, true);
1627 static int __init
init_ceph(void)
1629 int ret
= init_caches();
1634 ret
= register_filesystem(&ceph_fs_type
);
1638 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL
);
1648 static void __exit
exit_ceph(void)
1650 dout("exit_ceph\n");
1651 unregister_filesystem(&ceph_fs_type
);
1655 static int param_set_metrics(const char *val
, const struct kernel_param
*kp
)
1657 struct ceph_fs_client
*fsc
;
1660 ret
= param_set_bool(val
, kp
);
1662 pr_err("Failed to parse sending metrics switch value '%s'\n",
1665 } else if (!disable_send_metrics
) {
1666 // wake up all the mds clients
1667 spin_lock(&ceph_fsc_lock
);
1668 list_for_each_entry(fsc
, &ceph_fsc_list
, metric_wakeup
) {
1669 metric_schedule_delayed(&fsc
->mdsc
->metric
);
1671 spin_unlock(&ceph_fsc_lock
);
1677 static const struct kernel_param_ops param_ops_metrics
= {
1678 .set
= param_set_metrics
,
1679 .get
= param_get_bool
,
1682 bool disable_send_metrics
= false;
1683 module_param_cb(disable_send_metrics
, ¶m_ops_metrics
, &disable_send_metrics
, 0644);
1684 MODULE_PARM_DESC(disable_send_metrics
, "Enable sending perf metrics to ceph cluster (default: on)");
1686 /* for both v1 and v2 syntax */
1687 static bool mount_support
= true;
1688 static const struct kernel_param_ops param_ops_mount_syntax
= {
1689 .get
= param_get_bool
,
1691 module_param_cb(mount_syntax_v1
, ¶m_ops_mount_syntax
, &mount_support
, 0444);
1692 module_param_cb(mount_syntax_v2
, ¶m_ops_mount_syntax
, &mount_support
, 0444);
1694 bool enable_unsafe_idmap
= false;
1695 module_param(enable_unsafe_idmap
, bool, 0644);
1696 MODULE_PARM_DESC(enable_unsafe_idmap
,
1697 "Allow to use idmapped mounts with MDS without CEPHFS_FEATURE_HAS_OWNER_UIDGID");
1699 module_init(init_ceph
);
1700 module_exit(exit_ceph
);
1702 MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
1703 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
1704 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
1705 MODULE_DESCRIPTION("Ceph filesystem for Linux");
1706 MODULE_LICENSE("GPL");