1 // SPDX-License-Identifier: GPL-2.0-only
3 #include <linux/ceph/ceph_debug.h>
5 #include <linux/backing-dev.h>
6 #include <linux/ctype.h>
8 #include <linux/inet.h>
10 #include <linux/module.h>
11 #include <linux/mount.h>
12 #include <linux/fs_context.h>
13 #include <linux/fs_parser.h>
14 #include <linux/sched.h>
15 #include <linux/seq_file.h>
16 #include <linux/slab.h>
17 #include <linux/statfs.h>
18 #include <linux/string.h>
21 #include "mds_client.h"
25 #include <linux/ceph/ceph_features.h>
26 #include <linux/ceph/decode.h>
27 #include <linux/ceph/mon_client.h>
28 #include <linux/ceph/auth.h>
29 #include <linux/ceph/debugfs.h>
31 #include <uapi/linux/magic.h>
33 static DEFINE_SPINLOCK(ceph_fsc_lock
);
34 static LIST_HEAD(ceph_fsc_list
);
37 * Ceph superblock operations
39 * Handle the basics of mounting, unmounting.
45 static void ceph_put_super(struct super_block
*s
)
47 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(s
);
49 doutc(fsc
->client
, "begin\n");
50 ceph_fscrypt_free_dummy_policy(fsc
);
51 ceph_mdsc_close_sessions(fsc
->mdsc
);
52 doutc(fsc
->client
, "done\n");
55 static int ceph_statfs(struct dentry
*dentry
, struct kstatfs
*buf
)
57 struct ceph_fs_client
*fsc
= ceph_inode_to_fs_client(d_inode(dentry
));
58 struct ceph_mon_client
*monc
= &fsc
->client
->monc
;
59 struct ceph_statfs st
;
63 doutc(fsc
->client
, "begin\n");
64 if (fsc
->mdsc
->mdsmap
->m_num_data_pg_pools
== 1) {
65 data_pool
= fsc
->mdsc
->mdsmap
->m_data_pg_pools
[0];
67 data_pool
= CEPH_NOPOOL
;
70 err
= ceph_monc_do_statfs(monc
, data_pool
, &st
);
75 buf
->f_type
= CEPH_SUPER_MAGIC
; /* ?? */
78 * Express utilization in terms of large blocks to avoid
79 * overflow on 32-bit machines.
81 buf
->f_frsize
= 1 << CEPH_BLOCK_SHIFT
;
84 * By default use root quota for stats; fallback to overall filesystem
85 * usage if using 'noquotadf' mount option or if the root dir doesn't
86 * have max_bytes quota set.
88 if (ceph_test_mount_opt(fsc
, NOQUOTADF
) ||
89 !ceph_quota_update_statfs(fsc
, buf
)) {
90 buf
->f_blocks
= le64_to_cpu(st
.kb
) >> (CEPH_BLOCK_SHIFT
-10);
91 buf
->f_bfree
= le64_to_cpu(st
.kb_avail
) >> (CEPH_BLOCK_SHIFT
-10);
92 buf
->f_bavail
= le64_to_cpu(st
.kb_avail
) >> (CEPH_BLOCK_SHIFT
-10);
96 * NOTE: for the time being, we make bsize == frsize to humor
97 * not-yet-ancient versions of glibc that are broken.
98 * Someday, we will probably want to report a real block
99 * size... whatever that may mean for a network file system!
101 buf
->f_bsize
= buf
->f_frsize
;
103 buf
->f_files
= le64_to_cpu(st
.num_objects
);
105 buf
->f_namelen
= NAME_MAX
;
107 /* Must convert the fsid, for consistent values across arches */
108 buf
->f_fsid
.val
[0] = 0;
109 mutex_lock(&monc
->mutex
);
110 for (i
= 0 ; i
< sizeof(monc
->monmap
->fsid
) / sizeof(__le32
) ; ++i
)
111 buf
->f_fsid
.val
[0] ^= le32_to_cpu(((__le32
*)&monc
->monmap
->fsid
)[i
]);
112 mutex_unlock(&monc
->mutex
);
114 /* fold the fs_cluster_id into the upper bits */
115 buf
->f_fsid
.val
[1] = monc
->fs_cluster_id
;
117 doutc(fsc
->client
, "done\n");
121 static int ceph_sync_fs(struct super_block
*sb
, int wait
)
123 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(sb
);
124 struct ceph_client
*cl
= fsc
->client
;
127 doutc(cl
, "(non-blocking)\n");
128 ceph_flush_dirty_caps(fsc
->mdsc
);
129 ceph_flush_cap_releases(fsc
->mdsc
);
130 doutc(cl
, "(non-blocking) done\n");
134 doutc(cl
, "(blocking)\n");
135 ceph_osdc_sync(&fsc
->client
->osdc
);
136 ceph_mdsc_sync(fsc
->mdsc
);
137 doutc(cl
, "(blocking) done\n");
148 Opt_caps_wanted_delay_min
,
149 Opt_caps_wanted_delay_max
,
151 Opt_readdir_max_entries
,
152 Opt_readdir_max_bytes
,
160 Opt_test_dummy_encryption
,
161 /* string args above */
169 Opt_require_active_mds
,
178 enum ceph_recover_session_mode
{
179 ceph_recover_session_no
,
180 ceph_recover_session_clean
183 static const struct constant_table ceph_param_recover
[] = {
184 { "no", ceph_recover_session_no
},
185 { "clean", ceph_recover_session_clean
},
189 static const struct fs_parameter_spec ceph_mount_parameters
[] = {
190 fsparam_flag_no ("acl", Opt_acl
),
191 fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir
),
192 fsparam_s32 ("caps_max", Opt_caps_max
),
193 fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max
),
194 fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min
),
195 fsparam_u32 ("write_congestion_kb", Opt_congestion_kb
),
196 fsparam_flag_no ("copyfrom", Opt_copyfrom
),
197 fsparam_flag_no ("dcache", Opt_dcache
),
198 fsparam_flag_no ("dirstat", Opt_dirstat
),
199 fsparam_flag_no ("fsc", Opt_fscache
), // fsc|nofsc
200 fsparam_string ("fsc", Opt_fscache
), // fsc=...
201 fsparam_flag_no ("ino32", Opt_ino32
),
202 fsparam_string ("mds_namespace", Opt_mds_namespace
),
203 fsparam_string ("mon_addr", Opt_mon_addr
),
204 fsparam_flag_no ("poolperm", Opt_poolperm
),
205 fsparam_flag_no ("quotadf", Opt_quotadf
),
206 fsparam_u32 ("rasize", Opt_rasize
),
207 fsparam_flag_no ("rbytes", Opt_rbytes
),
208 fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes
),
209 fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries
),
210 fsparam_enum ("recover_session", Opt_recover_session
, ceph_param_recover
),
211 fsparam_flag_no ("require_active_mds", Opt_require_active_mds
),
212 fsparam_u32 ("rsize", Opt_rsize
),
213 fsparam_string ("snapdirname", Opt_snapdirname
),
214 fsparam_string ("source", Opt_source
),
215 fsparam_flag ("test_dummy_encryption", Opt_test_dummy_encryption
),
216 fsparam_string ("test_dummy_encryption", Opt_test_dummy_encryption
),
217 fsparam_u32 ("wsize", Opt_wsize
),
218 fsparam_flag_no ("wsync", Opt_wsync
),
219 fsparam_flag_no ("pagecache", Opt_pagecache
),
220 fsparam_flag_no ("sparseread", Opt_sparseread
),
224 struct ceph_parse_opts_ctx
{
225 struct ceph_options
*copts
;
226 struct ceph_mount_options
*opts
;
230 * Remove adjacent slashes and then the trailing slash, unless it is
231 * the only remaining character.
233 * E.g. "//dir1////dir2///" --> "/dir1/dir2", "///" --> "/".
235 static void canonicalize_path(char *path
)
239 for (i
= 0; path
[i
] != '\0'; i
++) {
240 if (path
[i
] != '/' || j
< 1 || path
[j
- 1] != '/')
244 if (j
> 1 && path
[j
- 1] == '/')
250 * Check if the mds namespace in ceph_mount_options matches
251 * the passed in namespace string. First time match (when
252 * ->mds_namespace is NULL) is treated specially, since
253 * ->mds_namespace needs to be initialized by the caller.
255 static int namespace_equals(struct ceph_mount_options
*fsopt
,
256 const char *namespace, size_t len
)
258 return !(fsopt
->mds_namespace
&&
259 (strlen(fsopt
->mds_namespace
) != len
||
260 strncmp(fsopt
->mds_namespace
, namespace, len
)));
263 static int ceph_parse_old_source(const char *dev_name
, const char *dev_name_end
,
264 struct fs_context
*fc
)
267 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
268 struct ceph_mount_options
*fsopt
= pctx
->opts
;
270 if (*dev_name_end
!= ':')
271 return invalfc(fc
, "separator ':' missing in source");
273 r
= ceph_parse_mon_ips(dev_name
, dev_name_end
- dev_name
,
274 pctx
->copts
, fc
->log
.log
, ',');
278 fsopt
->new_dev_syntax
= false;
282 static int ceph_parse_new_source(const char *dev_name
, const char *dev_name_end
,
283 struct fs_context
*fc
)
286 struct ceph_fsid fsid
;
287 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
288 struct ceph_mount_options
*fsopt
= pctx
->opts
;
289 char *fsid_start
, *fs_name_start
;
291 if (*dev_name_end
!= '=') {
292 dout("separator '=' missing in source");
296 fsid_start
= strchr(dev_name
, '@');
298 return invalfc(fc
, "missing cluster fsid");
299 ++fsid_start
; /* start of cluster fsid */
301 fs_name_start
= strchr(fsid_start
, '.');
303 return invalfc(fc
, "missing file system name");
305 if (ceph_parse_fsid(fsid_start
, &fsid
))
306 return invalfc(fc
, "Invalid FSID");
308 ++fs_name_start
; /* start of file system name */
309 len
= dev_name_end
- fs_name_start
;
311 if (!namespace_equals(fsopt
, fs_name_start
, len
))
312 return invalfc(fc
, "Mismatching mds_namespace");
313 kfree(fsopt
->mds_namespace
);
314 fsopt
->mds_namespace
= kstrndup(fs_name_start
, len
, GFP_KERNEL
);
315 if (!fsopt
->mds_namespace
)
317 dout("file system (mds namespace) '%s'\n", fsopt
->mds_namespace
);
319 fsopt
->new_dev_syntax
= true;
324 * Parse the source parameter for new device format. Distinguish the device
325 * spec from the path. Try parsing new device format and fallback to old
328 * New device syntax will looks like:
329 * <device_spec>=/<path>
331 * <device_spec> is name@fsid.fsname
332 * <path> is optional, but if present must begin with '/'
333 * (monitor addresses are passed via mount option)
335 * Old device syntax is:
336 * <server_spec>[,<server_spec>...]:[<path>]
338 * <server_spec> is <ip>[:<port>]
339 * <path> is optional, but if present must begin with '/'
341 static int ceph_parse_source(struct fs_parameter
*param
, struct fs_context
*fc
)
343 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
344 struct ceph_mount_options
*fsopt
= pctx
->opts
;
345 char *dev_name
= param
->string
, *dev_name_end
;
348 dout("'%s'\n", dev_name
);
349 if (!dev_name
|| !*dev_name
)
350 return invalfc(fc
, "Empty source");
352 dev_name_end
= strchr(dev_name
, '/');
355 * The server_path will include the whole chars from userland
356 * including the leading '/'.
358 kfree(fsopt
->server_path
);
359 fsopt
->server_path
= kstrdup(dev_name_end
, GFP_KERNEL
);
360 if (!fsopt
->server_path
)
363 canonicalize_path(fsopt
->server_path
);
365 dev_name_end
= dev_name
+ strlen(dev_name
);
368 dev_name_end
--; /* back up to separator */
369 if (dev_name_end
< dev_name
)
370 return invalfc(fc
, "Path missing in source");
372 dout("device name '%.*s'\n", (int)(dev_name_end
- dev_name
), dev_name
);
373 if (fsopt
->server_path
)
374 dout("server path '%s'\n", fsopt
->server_path
);
376 dout("trying new device syntax");
377 ret
= ceph_parse_new_source(dev_name
, dev_name_end
, fc
);
381 dout("trying old device syntax");
382 ret
= ceph_parse_old_source(dev_name
, dev_name_end
, fc
);
387 fc
->source
= param
->string
;
388 param
->string
= NULL
;
392 static int ceph_parse_mon_addr(struct fs_parameter
*param
,
393 struct fs_context
*fc
)
395 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
396 struct ceph_mount_options
*fsopt
= pctx
->opts
;
398 kfree(fsopt
->mon_addr
);
399 fsopt
->mon_addr
= param
->string
;
400 param
->string
= NULL
;
402 return ceph_parse_mon_ips(fsopt
->mon_addr
, strlen(fsopt
->mon_addr
),
403 pctx
->copts
, fc
->log
.log
, '/');
406 static int ceph_parse_mount_param(struct fs_context
*fc
,
407 struct fs_parameter
*param
)
409 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
410 struct ceph_mount_options
*fsopt
= pctx
->opts
;
411 struct fs_parse_result result
;
415 ret
= ceph_parse_param(param
, pctx
->copts
, fc
->log
.log
);
416 if (ret
!= -ENOPARAM
)
419 token
= fs_parse(fc
, ceph_mount_parameters
, param
, &result
);
420 dout("%s: fs_parse '%s' token %d\n",__func__
, param
->key
, token
);
425 case Opt_snapdirname
:
426 kfree(fsopt
->snapdir_name
);
427 fsopt
->snapdir_name
= param
->string
;
428 param
->string
= NULL
;
430 case Opt_mds_namespace
:
431 if (!namespace_equals(fsopt
, param
->string
, strlen(param
->string
)))
432 return invalfc(fc
, "Mismatching mds_namespace");
433 kfree(fsopt
->mds_namespace
);
434 fsopt
->mds_namespace
= param
->string
;
435 param
->string
= NULL
;
437 case Opt_recover_session
:
438 mode
= result
.uint_32
;
439 if (mode
== ceph_recover_session_no
)
440 fsopt
->flags
&= ~CEPH_MOUNT_OPT_CLEANRECOVER
;
441 else if (mode
== ceph_recover_session_clean
)
442 fsopt
->flags
|= CEPH_MOUNT_OPT_CLEANRECOVER
;
448 return invalfc(fc
, "Multiple sources specified");
449 return ceph_parse_source(param
, fc
);
451 return ceph_parse_mon_addr(param
, fc
);
453 if (result
.uint_32
< PAGE_SIZE
||
454 result
.uint_32
> CEPH_MAX_WRITE_SIZE
)
456 fsopt
->wsize
= ALIGN(result
.uint_32
, PAGE_SIZE
);
459 if (result
.uint_32
< PAGE_SIZE
||
460 result
.uint_32
> CEPH_MAX_READ_SIZE
)
462 fsopt
->rsize
= ALIGN(result
.uint_32
, PAGE_SIZE
);
465 fsopt
->rasize
= ALIGN(result
.uint_32
, PAGE_SIZE
);
467 case Opt_caps_wanted_delay_min
:
468 if (result
.uint_32
< 1)
470 fsopt
->caps_wanted_delay_min
= result
.uint_32
;
472 case Opt_caps_wanted_delay_max
:
473 if (result
.uint_32
< 1)
475 fsopt
->caps_wanted_delay_max
= result
.uint_32
;
478 if (result
.int_32
< 0)
480 fsopt
->caps_max
= result
.int_32
;
482 case Opt_readdir_max_entries
:
483 if (result
.uint_32
< 1)
485 fsopt
->max_readdir
= result
.uint_32
;
487 case Opt_readdir_max_bytes
:
488 if (result
.uint_32
< PAGE_SIZE
&& result
.uint_32
!= 0)
490 fsopt
->max_readdir_bytes
= result
.uint_32
;
492 case Opt_congestion_kb
:
493 if (result
.uint_32
< 1024) /* at least 1M */
495 fsopt
->congestion_kb
= result
.uint_32
;
499 fsopt
->flags
|= CEPH_MOUNT_OPT_DIRSTAT
;
501 fsopt
->flags
&= ~CEPH_MOUNT_OPT_DIRSTAT
;
505 fsopt
->flags
|= CEPH_MOUNT_OPT_RBYTES
;
507 fsopt
->flags
&= ~CEPH_MOUNT_OPT_RBYTES
;
509 case Opt_asyncreaddir
:
511 fsopt
->flags
&= ~CEPH_MOUNT_OPT_NOASYNCREADDIR
;
513 fsopt
->flags
|= CEPH_MOUNT_OPT_NOASYNCREADDIR
;
517 fsopt
->flags
|= CEPH_MOUNT_OPT_DCACHE
;
519 fsopt
->flags
&= ~CEPH_MOUNT_OPT_DCACHE
;
523 fsopt
->flags
|= CEPH_MOUNT_OPT_INO32
;
525 fsopt
->flags
&= ~CEPH_MOUNT_OPT_INO32
;
529 #ifdef CONFIG_CEPH_FSCACHE
530 kfree(fsopt
->fscache_uniq
);
531 fsopt
->fscache_uniq
= NULL
;
532 if (result
.negated
) {
533 fsopt
->flags
&= ~CEPH_MOUNT_OPT_FSCACHE
;
535 fsopt
->flags
|= CEPH_MOUNT_OPT_FSCACHE
;
536 fsopt
->fscache_uniq
= param
->string
;
537 param
->string
= NULL
;
541 return invalfc(fc
, "fscache support is disabled");
545 fsopt
->flags
&= ~CEPH_MOUNT_OPT_NOPOOLPERM
;
547 fsopt
->flags
|= CEPH_MOUNT_OPT_NOPOOLPERM
;
549 case Opt_require_active_mds
:
551 fsopt
->flags
&= ~CEPH_MOUNT_OPT_MOUNTWAIT
;
553 fsopt
->flags
|= CEPH_MOUNT_OPT_MOUNTWAIT
;
557 fsopt
->flags
&= ~CEPH_MOUNT_OPT_NOQUOTADF
;
559 fsopt
->flags
|= CEPH_MOUNT_OPT_NOQUOTADF
;
563 fsopt
->flags
&= ~CEPH_MOUNT_OPT_NOCOPYFROM
;
565 fsopt
->flags
|= CEPH_MOUNT_OPT_NOCOPYFROM
;
568 if (!result
.negated
) {
569 #ifdef CONFIG_CEPH_FS_POSIX_ACL
570 fc
->sb_flags
|= SB_POSIXACL
;
572 return invalfc(fc
, "POSIX ACL support is disabled");
575 fc
->sb_flags
&= ~SB_POSIXACL
;
580 fsopt
->flags
&= ~CEPH_MOUNT_OPT_ASYNC_DIROPS
;
582 fsopt
->flags
|= CEPH_MOUNT_OPT_ASYNC_DIROPS
;
586 fsopt
->flags
|= CEPH_MOUNT_OPT_NOPAGECACHE
;
588 fsopt
->flags
&= ~CEPH_MOUNT_OPT_NOPAGECACHE
;
592 fsopt
->flags
&= ~CEPH_MOUNT_OPT_SPARSEREAD
;
594 fsopt
->flags
|= CEPH_MOUNT_OPT_SPARSEREAD
;
596 case Opt_test_dummy_encryption
:
597 #ifdef CONFIG_FS_ENCRYPTION
598 fscrypt_free_dummy_policy(&fsopt
->dummy_enc_policy
);
599 ret
= fscrypt_parse_test_dummy_encryption(param
,
600 &fsopt
->dummy_enc_policy
);
601 if (ret
== -EINVAL
) {
602 warnfc(fc
, "Value of option \"%s\" is unrecognized",
604 } else if (ret
== -EEXIST
) {
605 warnfc(fc
, "Conflicting test_dummy_encryption options");
610 "FS encryption not supported: test_dummy_encryption mount option ignored");
619 return invalfc(fc
, "%s out of range", param
->key
);
622 static void destroy_mount_options(struct ceph_mount_options
*args
)
624 dout("destroy_mount_options %p\n", args
);
628 kfree(args
->snapdir_name
);
629 kfree(args
->mds_namespace
);
630 kfree(args
->server_path
);
631 kfree(args
->fscache_uniq
);
632 kfree(args
->mon_addr
);
633 fscrypt_free_dummy_policy(&args
->dummy_enc_policy
);
637 static int strcmp_null(const char *s1
, const char *s2
)
645 return strcmp(s1
, s2
);
648 static int compare_mount_options(struct ceph_mount_options
*new_fsopt
,
649 struct ceph_options
*new_opt
,
650 struct ceph_fs_client
*fsc
)
652 struct ceph_mount_options
*fsopt1
= new_fsopt
;
653 struct ceph_mount_options
*fsopt2
= fsc
->mount_options
;
654 int ofs
= offsetof(struct ceph_mount_options
, snapdir_name
);
657 ret
= memcmp(fsopt1
, fsopt2
, ofs
);
661 ret
= strcmp_null(fsopt1
->snapdir_name
, fsopt2
->snapdir_name
);
665 ret
= strcmp_null(fsopt1
->mds_namespace
, fsopt2
->mds_namespace
);
669 ret
= strcmp_null(fsopt1
->server_path
, fsopt2
->server_path
);
673 ret
= strcmp_null(fsopt1
->fscache_uniq
, fsopt2
->fscache_uniq
);
677 ret
= strcmp_null(fsopt1
->mon_addr
, fsopt2
->mon_addr
);
681 return ceph_compare_options(new_opt
, fsc
->client
);
685 * ceph_show_options - Show mount options in /proc/mounts
686 * @m: seq_file to write to
687 * @root: root of that (sub)tree
689 static int ceph_show_options(struct seq_file
*m
, struct dentry
*root
)
691 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(root
->d_sb
);
692 struct ceph_mount_options
*fsopt
= fsc
->mount_options
;
696 /* a comma between MNT/MS and client options */
700 ret
= ceph_print_client_options(m
, fsc
->client
, false);
704 /* retract our comma if no client options */
708 if (fsopt
->flags
& CEPH_MOUNT_OPT_DIRSTAT
)
709 seq_puts(m
, ",dirstat");
710 if ((fsopt
->flags
& CEPH_MOUNT_OPT_RBYTES
))
711 seq_puts(m
, ",rbytes");
712 if (fsopt
->flags
& CEPH_MOUNT_OPT_NOASYNCREADDIR
)
713 seq_puts(m
, ",noasyncreaddir");
714 if ((fsopt
->flags
& CEPH_MOUNT_OPT_DCACHE
) == 0)
715 seq_puts(m
, ",nodcache");
716 if (fsopt
->flags
& CEPH_MOUNT_OPT_INO32
)
717 seq_puts(m
, ",ino32");
718 if (fsopt
->flags
& CEPH_MOUNT_OPT_FSCACHE
) {
719 seq_show_option(m
, "fsc", fsopt
->fscache_uniq
);
721 if (fsopt
->flags
& CEPH_MOUNT_OPT_NOPOOLPERM
)
722 seq_puts(m
, ",nopoolperm");
723 if (fsopt
->flags
& CEPH_MOUNT_OPT_NOQUOTADF
)
724 seq_puts(m
, ",noquotadf");
726 #ifdef CONFIG_CEPH_FS_POSIX_ACL
727 if (root
->d_sb
->s_flags
& SB_POSIXACL
)
730 seq_puts(m
, ",noacl");
733 if ((fsopt
->flags
& CEPH_MOUNT_OPT_NOCOPYFROM
) == 0)
734 seq_puts(m
, ",copyfrom");
736 /* dump mds_namespace when old device syntax is in use */
737 if (fsopt
->mds_namespace
&& !fsopt
->new_dev_syntax
)
738 seq_show_option(m
, "mds_namespace", fsopt
->mds_namespace
);
741 seq_printf(m
, ",mon_addr=%s", fsopt
->mon_addr
);
743 if (fsopt
->flags
& CEPH_MOUNT_OPT_CLEANRECOVER
)
744 seq_show_option(m
, "recover_session", "clean");
746 if (!(fsopt
->flags
& CEPH_MOUNT_OPT_ASYNC_DIROPS
))
747 seq_puts(m
, ",wsync");
748 if (fsopt
->flags
& CEPH_MOUNT_OPT_NOPAGECACHE
)
749 seq_puts(m
, ",nopagecache");
750 if (fsopt
->flags
& CEPH_MOUNT_OPT_SPARSEREAD
)
751 seq_puts(m
, ",sparseread");
753 fscrypt_show_test_dummy_encryption(m
, ',', root
->d_sb
);
755 if (fsopt
->wsize
!= CEPH_MAX_WRITE_SIZE
)
756 seq_printf(m
, ",wsize=%u", fsopt
->wsize
);
757 if (fsopt
->rsize
!= CEPH_MAX_READ_SIZE
)
758 seq_printf(m
, ",rsize=%u", fsopt
->rsize
);
759 if (fsopt
->rasize
!= CEPH_RASIZE_DEFAULT
)
760 seq_printf(m
, ",rasize=%u", fsopt
->rasize
);
761 if (fsopt
->congestion_kb
!= default_congestion_kb())
762 seq_printf(m
, ",write_congestion_kb=%u", fsopt
->congestion_kb
);
764 seq_printf(m
, ",caps_max=%d", fsopt
->caps_max
);
765 if (fsopt
->caps_wanted_delay_min
!= CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT
)
766 seq_printf(m
, ",caps_wanted_delay_min=%u",
767 fsopt
->caps_wanted_delay_min
);
768 if (fsopt
->caps_wanted_delay_max
!= CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT
)
769 seq_printf(m
, ",caps_wanted_delay_max=%u",
770 fsopt
->caps_wanted_delay_max
);
771 if (fsopt
->max_readdir
!= CEPH_MAX_READDIR_DEFAULT
)
772 seq_printf(m
, ",readdir_max_entries=%u", fsopt
->max_readdir
);
773 if (fsopt
->max_readdir_bytes
!= CEPH_MAX_READDIR_BYTES_DEFAULT
)
774 seq_printf(m
, ",readdir_max_bytes=%u", fsopt
->max_readdir_bytes
);
775 if (strcmp(fsopt
->snapdir_name
, CEPH_SNAPDIRNAME_DEFAULT
))
776 seq_show_option(m
, "snapdirname", fsopt
->snapdir_name
);
782 * handle any mon messages the standard library doesn't understand.
783 * return error if we don't either.
785 static int extra_mon_dispatch(struct ceph_client
*client
, struct ceph_msg
*msg
)
787 struct ceph_fs_client
*fsc
= client
->private;
788 int type
= le16_to_cpu(msg
->hdr
.type
);
791 case CEPH_MSG_MDS_MAP
:
792 ceph_mdsc_handle_mdsmap(fsc
->mdsc
, msg
);
794 case CEPH_MSG_FS_MAP_USER
:
795 ceph_mdsc_handle_fsmap(fsc
->mdsc
, msg
);
803 * create a new fs client
805 * Success or not, this function consumes @fsopt and @opt.
807 static struct ceph_fs_client
*create_fs_client(struct ceph_mount_options
*fsopt
,
808 struct ceph_options
*opt
)
810 struct ceph_fs_client
*fsc
;
813 fsc
= kzalloc(sizeof(*fsc
), GFP_KERNEL
);
819 fsc
->client
= ceph_create_client(opt
, fsc
);
820 if (IS_ERR(fsc
->client
)) {
821 err
= PTR_ERR(fsc
->client
);
824 opt
= NULL
; /* fsc->client now owns this */
826 fsc
->client
->extra_mon_dispatch
= extra_mon_dispatch
;
827 ceph_set_opt(fsc
->client
, ABORT_ON_FULL
);
829 if (!fsopt
->mds_namespace
) {
830 ceph_monc_want_map(&fsc
->client
->monc
, CEPH_SUB_MDSMAP
,
833 ceph_monc_want_map(&fsc
->client
->monc
, CEPH_SUB_FSMAP
,
837 fsc
->mount_options
= fsopt
;
840 fsc
->mount_state
= CEPH_MOUNT_MOUNTING
;
842 fsc
->have_copy_from2
= true;
844 atomic_long_set(&fsc
->writeback_count
, 0);
845 fsc
->write_congested
= false;
849 * The number of concurrent works can be high but they don't need
850 * to be processed in parallel, limit concurrency.
852 fsc
->inode_wq
= alloc_workqueue("ceph-inode", WQ_UNBOUND
, 0);
855 fsc
->cap_wq
= alloc_workqueue("ceph-cap", 0, 1);
859 hash_init(fsc
->async_unlink_conflict
);
860 spin_lock_init(&fsc
->async_unlink_conflict_lock
);
862 spin_lock(&ceph_fsc_lock
);
863 list_add_tail(&fsc
->metric_wakeup
, &ceph_fsc_list
);
864 spin_unlock(&ceph_fsc_lock
);
869 destroy_workqueue(fsc
->inode_wq
);
871 ceph_destroy_client(fsc
->client
);
875 ceph_destroy_options(opt
);
876 destroy_mount_options(fsopt
);
880 static void flush_fs_workqueues(struct ceph_fs_client
*fsc
)
882 flush_workqueue(fsc
->inode_wq
);
883 flush_workqueue(fsc
->cap_wq
);
886 static void destroy_fs_client(struct ceph_fs_client
*fsc
)
888 doutc(fsc
->client
, "%p\n", fsc
);
890 spin_lock(&ceph_fsc_lock
);
891 list_del(&fsc
->metric_wakeup
);
892 spin_unlock(&ceph_fsc_lock
);
894 ceph_mdsc_destroy(fsc
);
895 destroy_workqueue(fsc
->inode_wq
);
896 destroy_workqueue(fsc
->cap_wq
);
898 destroy_mount_options(fsc
->mount_options
);
900 ceph_destroy_client(fsc
->client
);
903 dout("%s: %p done\n", __func__
, fsc
);
909 struct kmem_cache
*ceph_inode_cachep
;
910 struct kmem_cache
*ceph_cap_cachep
;
911 struct kmem_cache
*ceph_cap_snap_cachep
;
912 struct kmem_cache
*ceph_cap_flush_cachep
;
913 struct kmem_cache
*ceph_dentry_cachep
;
914 struct kmem_cache
*ceph_file_cachep
;
915 struct kmem_cache
*ceph_dir_file_cachep
;
916 struct kmem_cache
*ceph_mds_request_cachep
;
917 mempool_t
*ceph_wb_pagevec_pool
;
919 static void ceph_inode_init_once(void *foo
)
921 struct ceph_inode_info
*ci
= foo
;
922 inode_init_once(&ci
->netfs
.inode
);
925 static int __init
init_caches(void)
929 ceph_inode_cachep
= kmem_cache_create("ceph_inode_info",
930 sizeof(struct ceph_inode_info
),
931 __alignof__(struct ceph_inode_info
),
932 SLAB_RECLAIM_ACCOUNT
| SLAB_ACCOUNT
,
933 ceph_inode_init_once
);
934 if (!ceph_inode_cachep
)
937 ceph_cap_cachep
= KMEM_CACHE(ceph_cap
, 0);
938 if (!ceph_cap_cachep
)
940 ceph_cap_snap_cachep
= KMEM_CACHE(ceph_cap_snap
, 0);
941 if (!ceph_cap_snap_cachep
)
943 ceph_cap_flush_cachep
= KMEM_CACHE(ceph_cap_flush
,
944 SLAB_RECLAIM_ACCOUNT
);
945 if (!ceph_cap_flush_cachep
)
948 ceph_dentry_cachep
= KMEM_CACHE(ceph_dentry_info
,
949 SLAB_RECLAIM_ACCOUNT
);
950 if (!ceph_dentry_cachep
)
953 ceph_file_cachep
= KMEM_CACHE(ceph_file_info
, 0);
954 if (!ceph_file_cachep
)
957 ceph_dir_file_cachep
= KMEM_CACHE(ceph_dir_file_info
, 0);
958 if (!ceph_dir_file_cachep
)
961 ceph_mds_request_cachep
= KMEM_CACHE(ceph_mds_request
, 0);
962 if (!ceph_mds_request_cachep
)
965 ceph_wb_pagevec_pool
= mempool_create_kmalloc_pool(10,
966 (CEPH_MAX_WRITE_SIZE
>> PAGE_SHIFT
) * sizeof(struct page
*));
967 if (!ceph_wb_pagevec_pool
)
968 goto bad_pagevec_pool
;
973 kmem_cache_destroy(ceph_mds_request_cachep
);
975 kmem_cache_destroy(ceph_dir_file_cachep
);
977 kmem_cache_destroy(ceph_file_cachep
);
979 kmem_cache_destroy(ceph_dentry_cachep
);
981 kmem_cache_destroy(ceph_cap_flush_cachep
);
983 kmem_cache_destroy(ceph_cap_snap_cachep
);
985 kmem_cache_destroy(ceph_cap_cachep
);
987 kmem_cache_destroy(ceph_inode_cachep
);
991 static void destroy_caches(void)
994 * Make sure all delayed rcu free inodes are flushed before we
999 kmem_cache_destroy(ceph_inode_cachep
);
1000 kmem_cache_destroy(ceph_cap_cachep
);
1001 kmem_cache_destroy(ceph_cap_snap_cachep
);
1002 kmem_cache_destroy(ceph_cap_flush_cachep
);
1003 kmem_cache_destroy(ceph_dentry_cachep
);
1004 kmem_cache_destroy(ceph_file_cachep
);
1005 kmem_cache_destroy(ceph_dir_file_cachep
);
1006 kmem_cache_destroy(ceph_mds_request_cachep
);
1007 mempool_destroy(ceph_wb_pagevec_pool
);
1010 static void __ceph_umount_begin(struct ceph_fs_client
*fsc
)
1012 ceph_osdc_abort_requests(&fsc
->client
->osdc
, -EIO
);
1013 ceph_mdsc_force_umount(fsc
->mdsc
);
1014 fsc
->filp_gen
++; // invalidate open files
1018 * ceph_umount_begin - initiate forced umount. Tear down the
1019 * mount, skipping steps that may hang while waiting for server(s).
1021 void ceph_umount_begin(struct super_block
*sb
)
1023 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(sb
);
1025 doutc(fsc
->client
, "starting forced umount\n");
1028 fsc
->mount_state
= CEPH_MOUNT_SHUTDOWN
;
1029 __ceph_umount_begin(fsc
);
1032 static const struct super_operations ceph_super_ops
= {
1033 .alloc_inode
= ceph_alloc_inode
,
1034 .free_inode
= ceph_free_inode
,
1035 .write_inode
= ceph_write_inode
,
1036 .drop_inode
= generic_delete_inode
,
1037 .evict_inode
= ceph_evict_inode
,
1038 .sync_fs
= ceph_sync_fs
,
1039 .put_super
= ceph_put_super
,
1040 .show_options
= ceph_show_options
,
1041 .statfs
= ceph_statfs
,
1042 .umount_begin
= ceph_umount_begin
,
1046 * Bootstrap mount by opening the root directory. Note the mount
1047 * @started time from caller, and time out if this takes too long.
1049 static struct dentry
*open_root_dentry(struct ceph_fs_client
*fsc
,
1051 unsigned long started
)
1053 struct ceph_client
*cl
= fsc
->client
;
1054 struct ceph_mds_client
*mdsc
= fsc
->mdsc
;
1055 struct ceph_mds_request
*req
= NULL
;
1057 struct dentry
*root
;
1060 doutc(cl
, "opening '%s'\n", path
);
1061 req
= ceph_mdsc_create_request(mdsc
, CEPH_MDS_OP_GETATTR
, USE_ANY_MDS
);
1063 return ERR_CAST(req
);
1064 req
->r_path1
= kstrdup(path
, GFP_NOFS
);
1065 if (!req
->r_path1
) {
1066 root
= ERR_PTR(-ENOMEM
);
1070 req
->r_ino1
.ino
= CEPH_INO_ROOT
;
1071 req
->r_ino1
.snap
= CEPH_NOSNAP
;
1072 req
->r_started
= started
;
1073 req
->r_timeout
= fsc
->client
->options
->mount_timeout
;
1074 req
->r_args
.getattr
.mask
= cpu_to_le32(CEPH_STAT_CAP_INODE
);
1075 req
->r_num_caps
= 2;
1076 err
= ceph_mdsc_do_request(mdsc
, NULL
, req
);
1078 struct inode
*inode
= req
->r_target_inode
;
1079 req
->r_target_inode
= NULL
;
1080 doutc(cl
, "success\n");
1081 root
= d_make_root(inode
);
1083 root
= ERR_PTR(-ENOMEM
);
1086 doutc(cl
, "success, root dentry is %p\n", root
);
1088 root
= ERR_PTR(err
);
1091 ceph_mdsc_put_request(req
);
1095 #ifdef CONFIG_FS_ENCRYPTION
1096 static int ceph_apply_test_dummy_encryption(struct super_block
*sb
,
1097 struct fs_context
*fc
,
1098 struct ceph_mount_options
*fsopt
)
1100 struct ceph_fs_client
*fsc
= sb
->s_fs_info
;
1102 if (!fscrypt_is_dummy_policy_set(&fsopt
->dummy_enc_policy
))
1105 /* No changing encryption context on remount. */
1106 if (fc
->purpose
== FS_CONTEXT_FOR_RECONFIGURE
&&
1107 !fscrypt_is_dummy_policy_set(&fsc
->fsc_dummy_enc_policy
)) {
1108 if (fscrypt_dummy_policies_equal(&fsopt
->dummy_enc_policy
,
1109 &fsc
->fsc_dummy_enc_policy
))
1111 errorfc(fc
, "Can't set test_dummy_encryption on remount");
1115 /* Also make sure fsopt doesn't contain a conflicting value. */
1116 if (fscrypt_is_dummy_policy_set(&fsc
->fsc_dummy_enc_policy
)) {
1117 if (fscrypt_dummy_policies_equal(&fsopt
->dummy_enc_policy
,
1118 &fsc
->fsc_dummy_enc_policy
))
1120 errorfc(fc
, "Conflicting test_dummy_encryption options");
1124 fsc
->fsc_dummy_enc_policy
= fsopt
->dummy_enc_policy
;
1125 memset(&fsopt
->dummy_enc_policy
, 0, sizeof(fsopt
->dummy_enc_policy
));
1127 warnfc(fc
, "test_dummy_encryption mode enabled");
1131 static int ceph_apply_test_dummy_encryption(struct super_block
*sb
,
1132 struct fs_context
*fc
,
1133 struct ceph_mount_options
*fsopt
)
1140 * mount: join the ceph cluster, and open root directory.
1142 static struct dentry
*ceph_real_mount(struct ceph_fs_client
*fsc
,
1143 struct fs_context
*fc
)
1145 struct ceph_client
*cl
= fsc
->client
;
1147 unsigned long started
= jiffies
; /* note the start time */
1148 struct dentry
*root
;
1150 doutc(cl
, "mount start %p\n", fsc
);
1151 mutex_lock(&fsc
->client
->mount_mutex
);
1153 if (!fsc
->sb
->s_root
) {
1154 const char *path
= fsc
->mount_options
->server_path
?
1155 fsc
->mount_options
->server_path
+ 1 : "";
1157 err
= __ceph_open_session(fsc
->client
, started
);
1162 if (fsc
->mount_options
->flags
& CEPH_MOUNT_OPT_FSCACHE
) {
1163 err
= ceph_fscache_register_fs(fsc
, fc
);
1168 err
= ceph_apply_test_dummy_encryption(fsc
->sb
, fc
,
1169 fsc
->mount_options
);
1173 doutc(cl
, "mount opening path '%s'\n", path
);
1175 ceph_fs_debugfs_init(fsc
);
1177 root
= open_root_dentry(fsc
, path
, started
);
1179 err
= PTR_ERR(root
);
1182 fsc
->sb
->s_root
= dget(root
);
1184 root
= dget(fsc
->sb
->s_root
);
1187 fsc
->mount_state
= CEPH_MOUNT_MOUNTED
;
1188 doutc(cl
, "mount success\n");
1189 mutex_unlock(&fsc
->client
->mount_mutex
);
1193 mutex_unlock(&fsc
->client
->mount_mutex
);
1194 ceph_fscrypt_free_dummy_policy(fsc
);
1195 return ERR_PTR(err
);
1198 static int ceph_set_super(struct super_block
*s
, struct fs_context
*fc
)
1200 struct ceph_fs_client
*fsc
= s
->s_fs_info
;
1201 struct ceph_client
*cl
= fsc
->client
;
1204 doutc(cl
, "%p\n", s
);
1206 s
->s_maxbytes
= MAX_LFS_FILESIZE
;
1208 s
->s_xattr
= ceph_xattr_handlers
;
1210 fsc
->max_file_size
= 1ULL << 40; /* temp value until we get mdsmap */
1212 s
->s_op
= &ceph_super_ops
;
1213 s
->s_d_op
= &ceph_dentry_ops
;
1214 s
->s_export_op
= &ceph_export_ops
;
1218 s
->s_time_max
= U32_MAX
;
1219 s
->s_flags
|= SB_NODIRATIME
| SB_NOATIME
;
1221 ceph_fscrypt_set_ops(s
);
1223 ret
= set_anon_super_fc(s
, fc
);
1230 * share superblock if same fs AND options
1232 static int ceph_compare_super(struct super_block
*sb
, struct fs_context
*fc
)
1234 struct ceph_fs_client
*new = fc
->s_fs_info
;
1235 struct ceph_mount_options
*fsopt
= new->mount_options
;
1236 struct ceph_options
*opt
= new->client
->options
;
1237 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(sb
);
1238 struct ceph_client
*cl
= fsc
->client
;
1240 doutc(cl
, "%p\n", sb
);
1242 if (compare_mount_options(fsopt
, opt
, fsc
)) {
1243 doutc(cl
, "monitor(s)/mount options don't match\n");
1246 if ((opt
->flags
& CEPH_OPT_FSID
) &&
1247 ceph_fsid_compare(&opt
->fsid
, &fsc
->client
->fsid
)) {
1248 doutc(cl
, "fsid doesn't match\n");
1251 if (fc
->sb_flags
!= (sb
->s_flags
& ~SB_BORN
)) {
1252 doutc(cl
, "flags differ\n");
1256 if (fsc
->blocklisted
&& !ceph_test_mount_opt(fsc
, CLEANRECOVER
)) {
1257 doutc(cl
, "client is blocklisted (and CLEANRECOVER is not set)\n");
1261 if (fsc
->mount_state
== CEPH_MOUNT_SHUTDOWN
) {
1262 doutc(cl
, "client has been forcibly unmounted\n");
1270 * construct our own bdi so we can control readahead, etc.
1272 static atomic_long_t bdi_seq
= ATOMIC_LONG_INIT(0);
1274 static int ceph_setup_bdi(struct super_block
*sb
, struct ceph_fs_client
*fsc
)
1278 err
= super_setup_bdi_name(sb
, "ceph-%ld",
1279 atomic_long_inc_return(&bdi_seq
));
1283 /* set ra_pages based on rasize mount option? */
1284 sb
->s_bdi
->ra_pages
= fsc
->mount_options
->rasize
>> PAGE_SHIFT
;
1286 /* set io_pages based on max osd read size */
1287 sb
->s_bdi
->io_pages
= fsc
->mount_options
->rsize
>> PAGE_SHIFT
;
1292 static int ceph_get_tree(struct fs_context
*fc
)
1294 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
1295 struct ceph_mount_options
*fsopt
= pctx
->opts
;
1296 struct super_block
*sb
;
1297 struct ceph_fs_client
*fsc
;
1299 int (*compare_super
)(struct super_block
*, struct fs_context
*) =
1303 dout("ceph_get_tree\n");
1306 return invalfc(fc
, "No source");
1307 if (fsopt
->new_dev_syntax
&& !fsopt
->mon_addr
)
1308 return invalfc(fc
, "No monitor address");
1310 /* create client (which we may/may not use) */
1311 fsc
= create_fs_client(pctx
->opts
, pctx
->copts
);
1319 err
= ceph_mdsc_init(fsc
);
1323 if (ceph_test_opt(fsc
->client
, NOSHARE
))
1324 compare_super
= NULL
;
1326 fc
->s_fs_info
= fsc
;
1327 sb
= sget_fc(fc
, compare_super
, ceph_set_super
);
1328 fc
->s_fs_info
= NULL
;
1334 if (ceph_sb_to_fs_client(sb
) != fsc
) {
1335 destroy_fs_client(fsc
);
1336 fsc
= ceph_sb_to_fs_client(sb
);
1337 dout("get_sb got existing client %p\n", fsc
);
1339 dout("get_sb using new client %p\n", fsc
);
1340 err
= ceph_setup_bdi(sb
, fsc
);
1345 res
= ceph_real_mount(fsc
, fc
);
1351 doutc(fsc
->client
, "root %p inode %p ino %llx.%llx\n", res
,
1352 d_inode(res
), ceph_vinop(d_inode(res
)));
1353 fc
->root
= fsc
->sb
->s_root
;
1357 if (!ceph_mdsmap_is_cluster_available(fsc
->mdsc
->mdsmap
)) {
1358 pr_info("No mds server is up or the cluster is laggy\n");
1359 err
= -EHOSTUNREACH
;
1362 ceph_mdsc_close_sessions(fsc
->mdsc
);
1363 deactivate_locked_super(sb
);
1367 destroy_fs_client(fsc
);
1369 dout("ceph_get_tree fail %d\n", err
);
1373 static void ceph_free_fc(struct fs_context
*fc
)
1375 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
1378 destroy_mount_options(pctx
->opts
);
1379 ceph_destroy_options(pctx
->copts
);
1384 static int ceph_reconfigure_fc(struct fs_context
*fc
)
1387 struct ceph_parse_opts_ctx
*pctx
= fc
->fs_private
;
1388 struct ceph_mount_options
*fsopt
= pctx
->opts
;
1389 struct super_block
*sb
= fc
->root
->d_sb
;
1390 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(sb
);
1392 err
= ceph_apply_test_dummy_encryption(sb
, fc
, fsopt
);
1396 if (fsopt
->flags
& CEPH_MOUNT_OPT_ASYNC_DIROPS
)
1397 ceph_set_mount_opt(fsc
, ASYNC_DIROPS
);
1399 ceph_clear_mount_opt(fsc
, ASYNC_DIROPS
);
1401 if (fsopt
->flags
& CEPH_MOUNT_OPT_SPARSEREAD
)
1402 ceph_set_mount_opt(fsc
, SPARSEREAD
);
1404 ceph_clear_mount_opt(fsc
, SPARSEREAD
);
1406 if (strcmp_null(fsc
->mount_options
->mon_addr
, fsopt
->mon_addr
)) {
1407 kfree(fsc
->mount_options
->mon_addr
);
1408 fsc
->mount_options
->mon_addr
= fsopt
->mon_addr
;
1409 fsopt
->mon_addr
= NULL
;
1410 pr_notice_client(fsc
->client
,
1411 "monitor addresses recorded, but not used for reconnection");
1414 sync_filesystem(sb
);
1418 static const struct fs_context_operations ceph_context_ops
= {
1419 .free
= ceph_free_fc
,
1420 .parse_param
= ceph_parse_mount_param
,
1421 .get_tree
= ceph_get_tree
,
1422 .reconfigure
= ceph_reconfigure_fc
,
1426 * Set up the filesystem mount context.
1428 static int ceph_init_fs_context(struct fs_context
*fc
)
1430 struct ceph_parse_opts_ctx
*pctx
;
1431 struct ceph_mount_options
*fsopt
;
1433 pctx
= kzalloc(sizeof(*pctx
), GFP_KERNEL
);
1437 pctx
->copts
= ceph_alloc_options();
1441 pctx
->opts
= kzalloc(sizeof(*pctx
->opts
), GFP_KERNEL
);
1446 fsopt
->flags
= CEPH_MOUNT_OPT_DEFAULT
;
1448 fsopt
->wsize
= CEPH_MAX_WRITE_SIZE
;
1449 fsopt
->rsize
= CEPH_MAX_READ_SIZE
;
1450 fsopt
->rasize
= CEPH_RASIZE_DEFAULT
;
1451 fsopt
->snapdir_name
= kstrdup(CEPH_SNAPDIRNAME_DEFAULT
, GFP_KERNEL
);
1452 if (!fsopt
->snapdir_name
)
1455 fsopt
->caps_wanted_delay_min
= CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT
;
1456 fsopt
->caps_wanted_delay_max
= CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT
;
1457 fsopt
->max_readdir
= CEPH_MAX_READDIR_DEFAULT
;
1458 fsopt
->max_readdir_bytes
= CEPH_MAX_READDIR_BYTES_DEFAULT
;
1459 fsopt
->congestion_kb
= default_congestion_kb();
1461 #ifdef CONFIG_CEPH_FS_POSIX_ACL
1462 fc
->sb_flags
|= SB_POSIXACL
;
1465 fc
->fs_private
= pctx
;
1466 fc
->ops
= &ceph_context_ops
;
1470 destroy_mount_options(pctx
->opts
);
1471 ceph_destroy_options(pctx
->copts
);
1477 * Return true if it successfully increases the blocker counter,
1478 * or false if the mdsc is in stopping and flushed state.
1480 static bool __inc_stopping_blocker(struct ceph_mds_client
*mdsc
)
1482 spin_lock(&mdsc
->stopping_lock
);
1483 if (mdsc
->stopping
>= CEPH_MDSC_STOPPING_FLUSHING
) {
1484 spin_unlock(&mdsc
->stopping_lock
);
1487 atomic_inc(&mdsc
->stopping_blockers
);
1488 spin_unlock(&mdsc
->stopping_lock
);
1492 static void __dec_stopping_blocker(struct ceph_mds_client
*mdsc
)
1494 spin_lock(&mdsc
->stopping_lock
);
1495 if (!atomic_dec_return(&mdsc
->stopping_blockers
) &&
1496 mdsc
->stopping
>= CEPH_MDSC_STOPPING_FLUSHING
)
1497 complete_all(&mdsc
->stopping_waiter
);
1498 spin_unlock(&mdsc
->stopping_lock
);
1501 /* For metadata IO requests */
1502 bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client
*mdsc
,
1503 struct ceph_mds_session
*session
)
1505 mutex_lock(&session
->s_mutex
);
1506 inc_session_sequence(session
);
1507 mutex_unlock(&session
->s_mutex
);
1509 return __inc_stopping_blocker(mdsc
);
1512 void ceph_dec_mds_stopping_blocker(struct ceph_mds_client
*mdsc
)
1514 __dec_stopping_blocker(mdsc
);
1517 /* For data IO requests */
1518 bool ceph_inc_osd_stopping_blocker(struct ceph_mds_client
*mdsc
)
1520 return __inc_stopping_blocker(mdsc
);
1523 void ceph_dec_osd_stopping_blocker(struct ceph_mds_client
*mdsc
)
1525 __dec_stopping_blocker(mdsc
);
1528 static void ceph_kill_sb(struct super_block
*s
)
1530 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(s
);
1531 struct ceph_client
*cl
= fsc
->client
;
1532 struct ceph_mds_client
*mdsc
= fsc
->mdsc
;
1535 doutc(cl
, "%p\n", s
);
1537 ceph_mdsc_pre_umount(mdsc
);
1538 flush_fs_workqueues(fsc
);
1541 * Though the kill_anon_super() will finally trigger the
1542 * sync_filesystem() anyway, we still need to do it here and
1543 * then bump the stage of shutdown. This will allow us to
1544 * drop any further message, which will increase the inodes'
1545 * i_count reference counters but makes no sense any more,
1548 * Without this when evicting the inodes it may fail in the
1549 * kill_anon_super(), which will trigger a warning when
1550 * destroying the fscrypt keyring and then possibly trigger
1551 * a further crash in ceph module when the iput() tries to
1552 * evict the inodes later.
1556 spin_lock(&mdsc
->stopping_lock
);
1557 mdsc
->stopping
= CEPH_MDSC_STOPPING_FLUSHING
;
1558 wait
= !!atomic_read(&mdsc
->stopping_blockers
);
1559 spin_unlock(&mdsc
->stopping_lock
);
1561 if (wait
&& atomic_read(&mdsc
->stopping_blockers
)) {
1562 long timeleft
= wait_for_completion_killable_timeout(
1563 &mdsc
->stopping_waiter
,
1564 fsc
->client
->options
->mount_timeout
);
1565 if (!timeleft
) /* timed out */
1566 pr_warn_client(cl
, "umount timed out, %ld\n", timeleft
);
1567 else if (timeleft
< 0) /* killed */
1568 pr_warn_client(cl
, "umount was killed, %ld\n", timeleft
);
1571 mdsc
->stopping
= CEPH_MDSC_STOPPING_FLUSHED
;
1574 fsc
->client
->extra_mon_dispatch
= NULL
;
1575 ceph_fs_debugfs_cleanup(fsc
);
1577 ceph_fscache_unregister_fs(fsc
);
1579 destroy_fs_client(fsc
);
1582 static struct file_system_type ceph_fs_type
= {
1583 .owner
= THIS_MODULE
,
1585 .init_fs_context
= ceph_init_fs_context
,
1586 .kill_sb
= ceph_kill_sb
,
1587 .fs_flags
= FS_RENAME_DOES_D_MOVE
| FS_ALLOW_IDMAP
,
1589 MODULE_ALIAS_FS("ceph");
1591 int ceph_force_reconnect(struct super_block
*sb
)
1593 struct ceph_fs_client
*fsc
= ceph_sb_to_fs_client(sb
);
1596 fsc
->mount_state
= CEPH_MOUNT_RECOVER
;
1597 __ceph_umount_begin(fsc
);
1599 /* Make sure all page caches get invalidated.
1600 * see remove_session_caps_cb() */
1601 flush_workqueue(fsc
->inode_wq
);
1603 /* In case that we were blocklisted. This also reset
1604 * all mon/osd connections */
1605 ceph_reset_client_addr(fsc
->client
);
1607 ceph_osdc_clear_abort_err(&fsc
->client
->osdc
);
1609 fsc
->blocklisted
= false;
1610 fsc
->mount_state
= CEPH_MOUNT_MOUNTED
;
1613 err
= __ceph_do_getattr(d_inode(sb
->s_root
), NULL
,
1614 CEPH_STAT_CAP_INODE
, true);
1619 static int __init
init_ceph(void)
1621 int ret
= init_caches();
1626 ret
= register_filesystem(&ceph_fs_type
);
1630 pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL
);
1640 static void __exit
exit_ceph(void)
1642 dout("exit_ceph\n");
1643 unregister_filesystem(&ceph_fs_type
);
1647 static int param_set_metrics(const char *val
, const struct kernel_param
*kp
)
1649 struct ceph_fs_client
*fsc
;
1652 ret
= param_set_bool(val
, kp
);
1654 pr_err("Failed to parse sending metrics switch value '%s'\n",
1657 } else if (!disable_send_metrics
) {
1658 // wake up all the mds clients
1659 spin_lock(&ceph_fsc_lock
);
1660 list_for_each_entry(fsc
, &ceph_fsc_list
, metric_wakeup
) {
1661 metric_schedule_delayed(&fsc
->mdsc
->metric
);
1663 spin_unlock(&ceph_fsc_lock
);
1669 static const struct kernel_param_ops param_ops_metrics
= {
1670 .set
= param_set_metrics
,
1671 .get
= param_get_bool
,
1674 bool disable_send_metrics
= false;
1675 module_param_cb(disable_send_metrics
, ¶m_ops_metrics
, &disable_send_metrics
, 0644);
1676 MODULE_PARM_DESC(disable_send_metrics
, "Enable sending perf metrics to ceph cluster (default: on)");
1678 /* for both v1 and v2 syntax */
1679 static bool mount_support
= true;
1680 static const struct kernel_param_ops param_ops_mount_syntax
= {
1681 .get
= param_get_bool
,
1683 module_param_cb(mount_syntax_v1
, ¶m_ops_mount_syntax
, &mount_support
, 0444);
1684 module_param_cb(mount_syntax_v2
, ¶m_ops_mount_syntax
, &mount_support
, 0444);
1686 bool enable_unsafe_idmap
= false;
1687 module_param(enable_unsafe_idmap
, bool, 0644);
1688 MODULE_PARM_DESC(enable_unsafe_idmap
,
1689 "Allow to use idmapped mounts with MDS without CEPHFS_FEATURE_HAS_OWNER_UIDGID");
1691 module_init(init_ceph
);
1692 module_exit(exit_ceph
);
1694 MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
1695 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
1696 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
1697 MODULE_DESCRIPTION("Ceph filesystem for Linux");
1698 MODULE_LICENSE("GPL");