fs/ceph/super.c

   1
   2 #include <linux/ceph/ceph_debug.h>
   3
   4 #include <linux/backing-dev.h>
   5 #include <linux/ctype.h>
   6 #include <linux/fs.h>
   7 #include <linux/inet.h>
   8 #include <linux/in6.h>
   9 #include <linux/module.h>
  10 #include <linux/mount.h>
  11 #include <linux/parser.h>
  12 #include <linux/sched.h>
  13 #include <linux/seq_file.h>
  14 #include <linux/slab.h>
  15 #include <linux/statfs.h>
  16 #include <linux/string.h>
  17
  18 #include "super.h"
  19 #include "mds_client.h"
  20 #include "cache.h"
  21
  22 #include <linux/ceph/ceph_features.h>
  23 #include <linux/ceph/decode.h>
  24 #include <linux/ceph/mon_client.h>
  25 #include <linux/ceph/auth.h>
  26 #include <linux/ceph/debugfs.h>
  27
  28 /*
  29  * Ceph superblock operations
  30  *
  31  * Handle the basics of mounting, unmounting.
  32  */
  33
  34 /*
  35  * super ops
  36  */
  37 static void ceph_put_super(struct super_block *s)
  38 {
  39         struct ceph_fs_client *fsc = ceph_sb_to_client(s);
  40
  41         dout("put_super\n");
  42         ceph_mdsc_close_sessions(fsc->mdsc);
  43 }
  44
  45 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
  46 {
  47         struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry));
  48         struct ceph_monmap *monmap = fsc->client->monc.monmap;
  49         struct ceph_statfs st;
  50         u64 fsid;
  51         int err;
  52
  53         dout("statfs\n");
  54         err = ceph_monc_do_statfs(&fsc->client->monc, &st);
  55         if (err < 0)
  56                 return err;
  57
  58         /* fill in kstatfs */
  59         buf->f_type = CEPH_SUPER_MAGIC;  /* ?? */
  60
  61         /*
  62          * express utilization in terms of large blocks to avoid
  63          * overflow on 32-bit machines.
  64          *
  65          * NOTE: for the time being, we make bsize == frsize to humor
  66          * not-yet-ancient versions of glibc that are broken.
  67          * Someday, we will probably want to report a real block
  68          * size...  whatever that may mean for a network file system!
  69          */
  70         buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
  71         buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
  72         buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
  73         buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
  74         buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
  75
  76         buf->f_files = le64_to_cpu(st.num_objects);
  77         buf->f_ffree = -1;
  78         buf->f_namelen = NAME_MAX;
  79
  80         /* leave fsid little-endian, regardless of host endianness */
  81         fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1);
  82         buf->f_fsid.val[0] = fsid & 0xffffffff;
  83         buf->f_fsid.val[1] = fsid >> 32;
  84
  85         return 0;
  86 }
  87
  88
  89 static int ceph_sync_fs(struct super_block *sb, int wait)
  90 {
  91         struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
  92
  93         if (!wait) {
  94                 dout("sync_fs (non-blocking)\n");
  95                 ceph_flush_dirty_caps(fsc->mdsc);
  96                 dout("sync_fs (non-blocking) done\n");
  97                 return 0;
  98         }
  99
 100         dout("sync_fs (blocking)\n");
 101         ceph_osdc_sync(&fsc->client->osdc);
 102         ceph_mdsc_sync(fsc->mdsc);
 103         dout("sync_fs (blocking) done\n");
 104         return 0;
 105 }
 106
 107 /*
 108  * mount options
 109  */
 110 enum {
 111         Opt_wsize,
 112         Opt_rsize,
 113         Opt_rasize,
 114         Opt_caps_wanted_delay_min,
 115         Opt_caps_wanted_delay_max,
 116         Opt_cap_release_safety,
 117         Opt_readdir_max_entries,
 118         Opt_readdir_max_bytes,
 119         Opt_congestion_kb,
 120         Opt_last_int,
 121         /* int args above */
 122         Opt_snapdirname,
 123         Opt_last_string,
 124         /* string args above */
 125         Opt_dirstat,
 126         Opt_nodirstat,
 127         Opt_rbytes,
 128         Opt_norbytes,
 129         Opt_asyncreaddir,
 130         Opt_noasyncreaddir,
 131         Opt_dcache,
 132         Opt_nodcache,
 133         Opt_ino32,
 134         Opt_noino32,
 135         Opt_fscache,
 136         Opt_nofscache,
 137         Opt_poolperm,
 138         Opt_nopoolperm,
 139 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 140         Opt_acl,
 141 #endif
 142         Opt_noacl,
 143 };
 144
 145 static match_table_t fsopt_tokens = {
 146         {Opt_wsize, "wsize=%d"},
 147         {Opt_rsize, "rsize=%d"},
 148         {Opt_rasize, "rasize=%d"},
 149         {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
 150         {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
 151         {Opt_cap_release_safety, "cap_release_safety=%d"},
 152         {Opt_readdir_max_entries, "readdir_max_entries=%d"},
 153         {Opt_readdir_max_bytes, "readdir_max_bytes=%d"},
 154         {Opt_congestion_kb, "write_congestion_kb=%d"},
 155         /* int args above */
 156         {Opt_snapdirname, "snapdirname=%s"},
 157         /* string args above */
 158         {Opt_dirstat, "dirstat"},
 159         {Opt_nodirstat, "nodirstat"},
 160         {Opt_rbytes, "rbytes"},
 161         {Opt_norbytes, "norbytes"},
 162         {Opt_asyncreaddir, "asyncreaddir"},
 163         {Opt_noasyncreaddir, "noasyncreaddir"},
 164         {Opt_dcache, "dcache"},
 165         {Opt_nodcache, "nodcache"},
 166         {Opt_ino32, "ino32"},
 167         {Opt_noino32, "noino32"},
 168         {Opt_fscache, "fsc"},
 169         {Opt_nofscache, "nofsc"},
 170         {Opt_poolperm, "poolperm"},
 171         {Opt_nopoolperm, "nopoolperm"},
 172 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 173         {Opt_acl, "acl"},
 174 #endif
 175         {Opt_noacl, "noacl"},
 176         {-1, NULL}
 177 };
 178
 179 static int parse_fsopt_token(char *c, void *private)
 180 {
 181         struct ceph_mount_options *fsopt = private;
 182         substring_t argstr[MAX_OPT_ARGS];
 183         int token, intval, ret;
 184
 185         token = match_token((char *)c, fsopt_tokens, argstr);
 186         if (token < 0)
 187                 return -EINVAL;
 188
 189         if (token < Opt_last_int) {
 190                 ret = match_int(&argstr[0], &intval);
 191                 if (ret < 0) {
 192                         pr_err("bad mount option arg (not int) "
 193                                "at '%s'\n", c);
 194                         return ret;
 195                 }
 196                 dout("got int token %d val %d\n", token, intval);
 197         } else if (token > Opt_last_int && token < Opt_last_string) {
 198                 dout("got string token %d val %s\n", token,
 199                      argstr[0].from);
 200         } else {
 201                 dout("got token %d\n", token);
 202         }
 203
 204         switch (token) {
 205         case Opt_snapdirname:
 206                 kfree(fsopt->snapdir_name);
 207                 fsopt->snapdir_name = kstrndup(argstr[0].from,
 208                                                argstr[0].to-argstr[0].from,
 209                                                GFP_KERNEL);
 210                 if (!fsopt->snapdir_name)
 211                         return -ENOMEM;
 212                 break;
 213
 214                 /* misc */
 215         case Opt_wsize:
 216                 fsopt->wsize = intval;
 217                 break;
 218         case Opt_rsize:
 219                 fsopt->rsize = intval;
 220                 break;
 221         case Opt_rasize:
 222                 fsopt->rasize = intval;
 223                 break;
 224         case Opt_caps_wanted_delay_min:
 225                 fsopt->caps_wanted_delay_min = intval;
 226                 break;
 227         case Opt_caps_wanted_delay_max:
 228                 fsopt->caps_wanted_delay_max = intval;
 229                 break;
 230         case Opt_readdir_max_entries:
 231                 fsopt->max_readdir = intval;
 232                 break;
 233         case Opt_readdir_max_bytes:
 234                 fsopt->max_readdir_bytes = intval;
 235                 break;
 236         case Opt_congestion_kb:
 237                 fsopt->congestion_kb = intval;
 238                 break;
 239         case Opt_dirstat:
 240                 fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT;
 241                 break;
 242         case Opt_nodirstat:
 243                 fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT;
 244                 break;
 245         case Opt_rbytes:
 246                 fsopt->flags |= CEPH_MOUNT_OPT_RBYTES;
 247                 break;
 248         case Opt_norbytes:
 249                 fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES;
 250                 break;
 251         case Opt_asyncreaddir:
 252                 fsopt->flags &= ~CEPH_MOUNT_OPT_NOASYNCREADDIR;
 253                 break;
 254         case Opt_noasyncreaddir:
 255                 fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR;
 256                 break;
 257         case Opt_dcache:
 258                 fsopt->flags |= CEPH_MOUNT_OPT_DCACHE;
 259                 break;
 260         case Opt_nodcache:
 261                 fsopt->flags &= ~CEPH_MOUNT_OPT_DCACHE;
 262                 break;
 263         case Opt_ino32:
 264                 fsopt->flags |= CEPH_MOUNT_OPT_INO32;
 265                 break;
 266         case Opt_noino32:
 267                 fsopt->flags &= ~CEPH_MOUNT_OPT_INO32;
 268                 break;
 269         case Opt_fscache:
 270                 fsopt->flags |= CEPH_MOUNT_OPT_FSCACHE;
 271                 break;
 272         case Opt_nofscache:
 273                 fsopt->flags &= ~CEPH_MOUNT_OPT_FSCACHE;
 274                 break;
 275         case Opt_poolperm:
 276                 fsopt->flags &= ~CEPH_MOUNT_OPT_NOPOOLPERM;
 277                 printk ("pool perm");
 278                 break;
 279         case Opt_nopoolperm:
 280                 fsopt->flags |= CEPH_MOUNT_OPT_NOPOOLPERM;
 281                 break;
 282 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 283         case Opt_acl:
 284                 fsopt->sb_flags |= MS_POSIXACL;
 285                 break;
 286 #endif
 287         case Opt_noacl:
 288                 fsopt->sb_flags &= ~MS_POSIXACL;
 289                 break;
 290         default:
 291                 BUG_ON(token);
 292         }
 293         return 0;
 294 }
 295
 296 static void destroy_mount_options(struct ceph_mount_options *args)
 297 {
 298         dout("destroy_mount_options %p\n", args);
 299         kfree(args->snapdir_name);
 300         kfree(args);
 301 }
 302
 303 static int strcmp_null(const char *s1, const char *s2)
 304 {
 305         if (!s1 && !s2)
 306                 return 0;
 307         if (s1 && !s2)
 308                 return -1;
 309         if (!s1 && s2)
 310                 return 1;
 311         return strcmp(s1, s2);
 312 }
 313
 314 static int compare_mount_options(struct ceph_mount_options *new_fsopt,
 315                                  struct ceph_options *new_opt,
 316                                  struct ceph_fs_client *fsc)
 317 {
 318         struct ceph_mount_options *fsopt1 = new_fsopt;
 319         struct ceph_mount_options *fsopt2 = fsc->mount_options;
 320         int ofs = offsetof(struct ceph_mount_options, snapdir_name);
 321         int ret;
 322
 323         ret = memcmp(fsopt1, fsopt2, ofs);
 324         if (ret)
 325                 return ret;
 326
 327         ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name);
 328         if (ret)
 329                 return ret;
 330
 331         return ceph_compare_options(new_opt, fsc->client);
 332 }
 333
 334 static int parse_mount_options(struct ceph_mount_options **pfsopt,
 335                                struct ceph_options **popt,
 336                                int flags, char *options,
 337                                const char *dev_name,
 338                                const char **path)
 339 {
 340         struct ceph_mount_options *fsopt;
 341         const char *dev_name_end;
 342         int err;
 343
 344         if (!dev_name || !*dev_name)
 345                 return -EINVAL;
 346
 347         fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL);
 348         if (!fsopt)
 349                 return -ENOMEM;
 350
 351         dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
 352
 353         fsopt->sb_flags = flags;
 354         fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
 355
 356         fsopt->rsize = CEPH_RSIZE_DEFAULT;
 357         fsopt->rasize = CEPH_RASIZE_DEFAULT;
 358         fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
 359         if (!fsopt->snapdir_name) {
 360                 err = -ENOMEM;
 361                 goto out;
 362         }
 363
 364         fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
 365         fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
 366         fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
 367         fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
 368         fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
 369         fsopt->congestion_kb = default_congestion_kb();
 370
 371         /*
 372          * Distinguish the server list from the path in "dev_name".
 373          * Internally we do not include the leading '/' in the path.
 374          *
 375          * "dev_name" will look like:
 376          *     <server_spec>[,<server_spec>...]:[<path>]
 377          * where
 378          *     <server_spec> is <ip>[:<port>]
 379          *     <path> is optional, but if present must begin with '/'
 380          */
 381         dev_name_end = strchr(dev_name, '/');
 382         if (dev_name_end) {
 383                 /* skip over leading '/' for path */
 384                 *path = dev_name_end + 1;
 385         } else {
 386                 /* path is empty */
 387                 dev_name_end = dev_name + strlen(dev_name);
 388                 *path = dev_name_end;
 389         }
 390         err = -EINVAL;
 391         dev_name_end--;         /* back up to ':' separator */
 392         if (dev_name_end < dev_name || *dev_name_end != ':') {
 393                 pr_err("device name is missing path (no : separator in %s)\n",
 394                                 dev_name);
 395                 goto out;
 396         }
 397         dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
 398         dout("server path '%s'\n", *path);
 399
 400         *popt = ceph_parse_options(options, dev_name, dev_name_end,
 401                                  parse_fsopt_token, (void *)fsopt);
 402         if (IS_ERR(*popt)) {
 403                 err = PTR_ERR(*popt);
 404                 goto out;
 405         }
 406
 407         /* success */
 408         *pfsopt = fsopt;
 409         return 0;
 410
 411 out:
 412         destroy_mount_options(fsopt);
 413         return err;
 414 }
 415
 416 /**
 417  * ceph_show_options - Show mount options in /proc/mounts
 418  * @m: seq_file to write to
 419  * @root: root of that (sub)tree
 420  */
 421 static int ceph_show_options(struct seq_file *m, struct dentry *root)
 422 {
 423         struct ceph_fs_client *fsc = ceph_sb_to_client(root->d_sb);
 424         struct ceph_mount_options *fsopt = fsc->mount_options;
 425         size_t pos;
 426         int ret;
 427
 428         /* a comma between MNT/MS and client options */
 429         seq_putc(m, ',');
 430         pos = m->count;
 431
 432         ret = ceph_print_client_options(m, fsc->client);
 433         if (ret)
 434                 return ret;
 435
 436         /* retract our comma if no client options */
 437         if (m->count == pos)
 438                 m->count--;
 439
 440         if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
 441                 seq_puts(m, ",dirstat");
 442         if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0)
 443                 seq_puts(m, ",norbytes");
 444         if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
 445                 seq_puts(m, ",noasyncreaddir");
 446         if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
 447                 seq_puts(m, ",nodcache");
 448         if (fsopt->flags & CEPH_MOUNT_OPT_FSCACHE)
 449                 seq_puts(m, ",fsc");
 450         if (fsopt->flags & CEPH_MOUNT_OPT_NOPOOLPERM)
 451                 seq_puts(m, ",nopoolperm");
 452
 453 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 454         if (fsopt->sb_flags & MS_POSIXACL)
 455                 seq_puts(m, ",acl");
 456         else
 457                 seq_puts(m, ",noacl");
 458 #endif
 459
 460         if (fsopt->wsize)
 461                 seq_printf(m, ",wsize=%d", fsopt->wsize);
 462         if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
 463                 seq_printf(m, ",rsize=%d", fsopt->rsize);
 464         if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
 465                 seq_printf(m, ",rasize=%d", fsopt->rasize);
 466         if (fsopt->congestion_kb != default_congestion_kb())
 467                 seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
 468         if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
 469                 seq_printf(m, ",caps_wanted_delay_min=%d",
 470                          fsopt->caps_wanted_delay_min);
 471         if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
 472                 seq_printf(m, ",caps_wanted_delay_max=%d",
 473                            fsopt->caps_wanted_delay_max);
 474         if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT)
 475                 seq_printf(m, ",cap_release_safety=%d",
 476                            fsopt->cap_release_safety);
 477         if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
 478                 seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
 479         if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
 480                 seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
 481         if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
 482                 seq_show_option(m, "snapdirname", fsopt->snapdir_name);
 483
 484         return 0;
 485 }
 486
 487 /*
 488  * handle any mon messages the standard library doesn't understand.
 489  * return error if we don't either.
 490  */
 491 static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
 492 {
 493         struct ceph_fs_client *fsc = client->private;
 494         int type = le16_to_cpu(msg->hdr.type);
 495
 496         switch (type) {
 497         case CEPH_MSG_MDS_MAP:
 498                 ceph_mdsc_handle_map(fsc->mdsc, msg);
 499                 return 0;
 500
 501         default:
 502                 return -1;
 503         }
 504 }
 505
 506 /*
 507  * create a new fs client
 508  */
 509 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 510                                         struct ceph_options *opt)
 511 {
 512         struct ceph_fs_client *fsc;
 513         const u64 supported_features =
 514                 CEPH_FEATURE_FLOCK |
 515                 CEPH_FEATURE_DIRLAYOUTHASH |
 516                 CEPH_FEATURE_MDS_INLINE_DATA;
 517         const u64 required_features = 0;
 518         int page_count;
 519         size_t size;
 520         int err = -ENOMEM;
 521
 522         fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
 523         if (!fsc)
 524                 return ERR_PTR(-ENOMEM);
 525
 526         fsc->client = ceph_create_client(opt, fsc, supported_features,
 527                                          required_features);
 528         if (IS_ERR(fsc->client)) {
 529                 err = PTR_ERR(fsc->client);
 530                 goto fail;
 531         }
 532         fsc->client->extra_mon_dispatch = extra_mon_dispatch;
 533         fsc->client->monc.want_mdsmap = 1;
 534
 535         fsc->mount_options = fsopt;
 536
 537         fsc->sb = NULL;
 538         fsc->mount_state = CEPH_MOUNT_MOUNTING;
 539
 540         atomic_long_set(&fsc->writeback_count, 0);
 541
 542         err = bdi_init(&fsc->backing_dev_info);
 543         if (err < 0)
 544                 goto fail_client;
 545
 546         err = -ENOMEM;
 547         /*
 548          * The number of concurrent works can be high but they don't need
 549          * to be processed in parallel, limit concurrency.
 550          */
 551         fsc->wb_wq = alloc_workqueue("ceph-writeback", 0, 1);
 552         if (fsc->wb_wq == NULL)
 553                 goto fail_bdi;
 554         fsc->pg_inv_wq = alloc_workqueue("ceph-pg-invalid", 0, 1);
 555         if (fsc->pg_inv_wq == NULL)
 556                 goto fail_wb_wq;
 557         fsc->trunc_wq = alloc_workqueue("ceph-trunc", 0, 1);
 558         if (fsc->trunc_wq == NULL)
 559                 goto fail_pg_inv_wq;
 560
 561         /* set up mempools */
 562         err = -ENOMEM;
 563         page_count = fsc->mount_options->wsize >> PAGE_CACHE_SHIFT;
 564         size = sizeof (struct page *) * (page_count ? page_count : 1);
 565         fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size);
 566         if (!fsc->wb_pagevec_pool)
 567                 goto fail_trunc_wq;
 568
 569         /* setup fscache */
 570         if ((fsopt->flags & CEPH_MOUNT_OPT_FSCACHE) &&
 571             (ceph_fscache_register_fs(fsc) != 0))
 572                 goto fail_fscache;
 573
 574         /* caps */
 575         fsc->min_caps = fsopt->max_readdir;
 576
 577         return fsc;
 578
 579 fail_fscache:
 580         ceph_fscache_unregister_fs(fsc);
 581 fail_trunc_wq:
 582         destroy_workqueue(fsc->trunc_wq);
 583 fail_pg_inv_wq:
 584         destroy_workqueue(fsc->pg_inv_wq);
 585 fail_wb_wq:
 586         destroy_workqueue(fsc->wb_wq);
 587 fail_bdi:
 588         bdi_destroy(&fsc->backing_dev_info);
 589 fail_client:
 590         ceph_destroy_client(fsc->client);
 591 fail:
 592         kfree(fsc);
 593         return ERR_PTR(err);
 594 }
 595
 596 static void destroy_fs_client(struct ceph_fs_client *fsc)
 597 {
 598         dout("destroy_fs_client %p\n", fsc);
 599
 600         ceph_fscache_unregister_fs(fsc);
 601
 602         destroy_workqueue(fsc->wb_wq);
 603         destroy_workqueue(fsc->pg_inv_wq);
 604         destroy_workqueue(fsc->trunc_wq);
 605
 606         bdi_destroy(&fsc->backing_dev_info);
 607
 608         mempool_destroy(fsc->wb_pagevec_pool);
 609
 610         destroy_mount_options(fsc->mount_options);
 611
 612         ceph_fs_debugfs_cleanup(fsc);
 613
 614         ceph_destroy_client(fsc->client);
 615
 616         kfree(fsc);
 617         dout("destroy_fs_client %p done\n", fsc);
 618 }
 619
 620 /*
 621  * caches
 622  */
 623 struct kmem_cache *ceph_inode_cachep;
 624 struct kmem_cache *ceph_cap_cachep;
 625 struct kmem_cache *ceph_cap_flush_cachep;
 626 struct kmem_cache *ceph_dentry_cachep;
 627 struct kmem_cache *ceph_file_cachep;
 628
 629 static void ceph_inode_init_once(void *foo)
 630 {
 631         struct ceph_inode_info *ci = foo;
 632         inode_init_once(&ci->vfs_inode);
 633 }
 634
 635 static int __init init_caches(void)
 636 {
 637         int error = -ENOMEM;
 638
 639         ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
 640                                       sizeof(struct ceph_inode_info),
 641                                       __alignof__(struct ceph_inode_info),
 642                                       (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
 643                                       ceph_inode_init_once);
 644         if (ceph_inode_cachep == NULL)
 645                 return -ENOMEM;
 646
 647         ceph_cap_cachep = KMEM_CACHE(ceph_cap,
 648                                      SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
 649         if (ceph_cap_cachep == NULL)
 650                 goto bad_cap;
 651         ceph_cap_flush_cachep = KMEM_CACHE(ceph_cap_flush,
 652                                            SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
 653         if (ceph_cap_flush_cachep == NULL)
 654                 goto bad_cap_flush;
 655
 656         ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
 657                                         SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
 658         if (ceph_dentry_cachep == NULL)
 659                 goto bad_dentry;
 660
 661         ceph_file_cachep = KMEM_CACHE(ceph_file_info,
 662                                       SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
 663         if (ceph_file_cachep == NULL)
 664                 goto bad_file;
 665
 666         if ((error = ceph_fscache_register()))
 667                 goto bad_file;
 668
 669         return 0;
 670 bad_file:
 671         kmem_cache_destroy(ceph_dentry_cachep);
 672 bad_dentry:
 673         kmem_cache_destroy(ceph_cap_flush_cachep);
 674 bad_cap_flush:
 675         kmem_cache_destroy(ceph_cap_cachep);
 676 bad_cap:
 677         kmem_cache_destroy(ceph_inode_cachep);
 678         return error;
 679 }
 680
 681 static void destroy_caches(void)
 682 {
 683         /*
 684          * Make sure all delayed rcu free inodes are flushed before we
 685          * destroy cache.
 686          */
 687         rcu_barrier();
 688
 689         kmem_cache_destroy(ceph_inode_cachep);
 690         kmem_cache_destroy(ceph_cap_cachep);
 691         kmem_cache_destroy(ceph_cap_flush_cachep);
 692         kmem_cache_destroy(ceph_dentry_cachep);
 693         kmem_cache_destroy(ceph_file_cachep);
 694
 695         ceph_fscache_unregister();
 696 }
 697
 698
 699 /*
 700  * ceph_umount_begin - initiate forced umount.  Tear down down the
 701  * mount, skipping steps that may hang while waiting for server(s).
 702  */
 703 static void ceph_umount_begin(struct super_block *sb)
 704 {
 705         struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
 706
 707         dout("ceph_umount_begin - starting forced umount\n");
 708         if (!fsc)
 709                 return;
 710         fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
 711         ceph_mdsc_force_umount(fsc->mdsc);
 712         return;
 713 }
 714
 715 static int ceph_remount(struct super_block *sb, int *flags, char *data)
 716 {
 717         sync_filesystem(sb);
 718         return 0;
 719 }
 720
 721 static const struct super_operations ceph_super_ops = {
 722         .alloc_inode    = ceph_alloc_inode,
 723         .destroy_inode  = ceph_destroy_inode,
 724         .write_inode    = ceph_write_inode,
 725         .drop_inode     = ceph_drop_inode,
 726         .sync_fs        = ceph_sync_fs,
 727         .put_super      = ceph_put_super,
 728         .remount_fs     = ceph_remount,
 729         .show_options   = ceph_show_options,
 730         .statfs         = ceph_statfs,
 731         .umount_begin   = ceph_umount_begin,
 732 };
 733
 734 /*
 735  * Bootstrap mount by opening the root directory.  Note the mount
 736  * @started time from caller, and time out if this takes too long.
 737  */
 738 static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
 739                                        const char *path,
 740                                        unsigned long started)
 741 {
 742         struct ceph_mds_client *mdsc = fsc->mdsc;
 743         struct ceph_mds_request *req = NULL;
 744         int err;
 745         struct dentry *root;
 746
 747         /* open dir */
 748         dout("open_root_inode opening '%s'\n", path);
 749         req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
 750         if (IS_ERR(req))
 751                 return ERR_CAST(req);
 752         req->r_path1 = kstrdup(path, GFP_NOFS);
 753         if (!req->r_path1) {
 754                 root = ERR_PTR(-ENOMEM);
 755                 goto out;
 756         }
 757
 758         req->r_ino1.ino = CEPH_INO_ROOT;
 759         req->r_ino1.snap = CEPH_NOSNAP;
 760         req->r_started = started;
 761         req->r_timeout = fsc->client->options->mount_timeout;
 762         req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
 763         req->r_num_caps = 2;
 764         err = ceph_mdsc_do_request(mdsc, NULL, req);
 765         if (err == 0) {
 766                 struct inode *inode = req->r_target_inode;
 767                 req->r_target_inode = NULL;
 768                 dout("open_root_inode success\n");
 769                 if (ceph_ino(inode) == CEPH_INO_ROOT &&
 770                     fsc->sb->s_root == NULL) {
 771                         root = d_make_root(inode);
 772                         if (!root) {
 773                                 root = ERR_PTR(-ENOMEM);
 774                                 goto out;
 775                         }
 776                 } else {
 777                         root = d_obtain_root(inode);
 778                 }
 779                 ceph_init_dentry(root);
 780                 dout("open_root_inode success, root dentry is %p\n", root);
 781         } else {
 782                 root = ERR_PTR(err);
 783         }
 784 out:
 785         ceph_mdsc_put_request(req);
 786         return root;
 787 }
 788
 789
 790
 791
 792 /*
 793  * mount: join the ceph cluster, and open root directory.
 794  */
 795 static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
 796                       const char *path)
 797 {
 798         int err;
 799         unsigned long started = jiffies;  /* note the start time */
 800         struct dentry *root;
 801         int first = 0;   /* first vfsmount for this super_block */
 802
 803         dout("mount start\n");
 804         mutex_lock(&fsc->client->mount_mutex);
 805
 806         err = __ceph_open_session(fsc->client, started);
 807         if (err < 0)
 808                 goto out;
 809
 810         dout("mount opening root\n");
 811         root = open_root_dentry(fsc, "", started);
 812         if (IS_ERR(root)) {
 813                 err = PTR_ERR(root);
 814                 goto out;
 815         }
 816         if (fsc->sb->s_root) {
 817                 dput(root);
 818         } else {
 819                 fsc->sb->s_root = root;
 820                 first = 1;
 821
 822                 err = ceph_fs_debugfs_init(fsc);
 823                 if (err < 0)
 824                         goto fail;
 825         }
 826
 827         if (path[0] == 0) {
 828                 dget(root);
 829         } else {
 830                 dout("mount opening base mountpoint\n");
 831                 root = open_root_dentry(fsc, path, started);
 832                 if (IS_ERR(root)) {
 833                         err = PTR_ERR(root);
 834                         goto fail;
 835                 }
 836         }
 837
 838         fsc->mount_state = CEPH_MOUNT_MOUNTED;
 839         dout("mount success\n");
 840         mutex_unlock(&fsc->client->mount_mutex);
 841         return root;
 842
 843 out:
 844         mutex_unlock(&fsc->client->mount_mutex);
 845         return ERR_PTR(err);
 846
 847 fail:
 848         if (first) {
 849                 dput(fsc->sb->s_root);
 850                 fsc->sb->s_root = NULL;
 851         }
 852         goto out;
 853 }
 854
 855 static int ceph_set_super(struct super_block *s, void *data)
 856 {
 857         struct ceph_fs_client *fsc = data;
 858         int ret;
 859
 860         dout("set_super %p data %p\n", s, data);
 861
 862         s->s_flags = fsc->mount_options->sb_flags;
 863         s->s_maxbytes = 1ULL << 40;  /* temp value until we get mdsmap */
 864
 865         s->s_xattr = ceph_xattr_handlers;
 866         s->s_fs_info = fsc;
 867         fsc->sb = s;
 868
 869         s->s_op = &ceph_super_ops;
 870         s->s_export_op = &ceph_export_ops;
 871
 872         s->s_time_gran = 1000;  /* 1000 ns == 1 us */
 873
 874         ret = set_anon_super(s, NULL);  /* what is that second arg for? */
 875         if (ret != 0)
 876                 goto fail;
 877
 878         return ret;
 879
 880 fail:
 881         s->s_fs_info = NULL;
 882         fsc->sb = NULL;
 883         return ret;
 884 }
 885
 886 /*
 887  * share superblock if same fs AND options
 888  */
 889 static int ceph_compare_super(struct super_block *sb, void *data)
 890 {
 891         struct ceph_fs_client *new = data;
 892         struct ceph_mount_options *fsopt = new->mount_options;
 893         struct ceph_options *opt = new->client->options;
 894         struct ceph_fs_client *other = ceph_sb_to_client(sb);
 895
 896         dout("ceph_compare_super %p\n", sb);
 897
 898         if (compare_mount_options(fsopt, opt, other)) {
 899                 dout("monitor(s)/mount options don't match\n");
 900                 return 0;
 901         }
 902         if ((opt->flags & CEPH_OPT_FSID) &&
 903             ceph_fsid_compare(&opt->fsid, &other->client->fsid)) {
 904                 dout("fsid doesn't match\n");
 905                 return 0;
 906         }
 907         if (fsopt->sb_flags != other->mount_options->sb_flags) {
 908                 dout("flags differ\n");
 909                 return 0;
 910         }
 911         return 1;
 912 }
 913
 914 /*
 915  * construct our own bdi so we can control readahead, etc.
 916  */
 917 static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
 918
 919 static int ceph_register_bdi(struct super_block *sb,
 920                              struct ceph_fs_client *fsc)
 921 {
 922         int err;
 923
 924         /* set ra_pages based on rasize mount option? */
 925         if (fsc->mount_options->rasize >= PAGE_CACHE_SIZE)
 926                 fsc->backing_dev_info.ra_pages =
 927                         (fsc->mount_options->rasize + PAGE_CACHE_SIZE - 1)
 928                         >> PAGE_SHIFT;
 929         else
 930                 fsc->backing_dev_info.ra_pages =
 931                         VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
 932
 933         err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
 934                            atomic_long_inc_return(&bdi_seq));
 935         if (!err)
 936                 sb->s_bdi = &fsc->backing_dev_info;
 937         return err;
 938 }
 939
 940 static struct dentry *ceph_mount(struct file_system_type *fs_type,
 941                        int flags, const char *dev_name, void *data)
 942 {
 943         struct super_block *sb;
 944         struct ceph_fs_client *fsc;
 945         struct dentry *res;
 946         int err;
 947         int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
 948         const char *path = NULL;
 949         struct ceph_mount_options *fsopt = NULL;
 950         struct ceph_options *opt = NULL;
 951
 952         dout("ceph_mount\n");
 953
 954 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 955         flags |= MS_POSIXACL;
 956 #endif
 957         err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
 958         if (err < 0) {
 959                 res = ERR_PTR(err);
 960                 goto out_final;
 961         }
 962
 963         /* create client (which we may/may not use) */
 964         fsc = create_fs_client(fsopt, opt);
 965         if (IS_ERR(fsc)) {
 966                 res = ERR_CAST(fsc);
 967                 destroy_mount_options(fsopt);
 968                 ceph_destroy_options(opt);
 969                 goto out_final;
 970         }
 971
 972         err = ceph_mdsc_init(fsc);
 973         if (err < 0) {
 974                 res = ERR_PTR(err);
 975                 goto out;
 976         }
 977
 978         if (ceph_test_opt(fsc->client, NOSHARE))
 979                 compare_super = NULL;
 980         sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc);
 981         if (IS_ERR(sb)) {
 982                 res = ERR_CAST(sb);
 983                 goto out;
 984         }
 985
 986         if (ceph_sb_to_client(sb) != fsc) {
 987                 ceph_mdsc_destroy(fsc);
 988                 destroy_fs_client(fsc);
 989                 fsc = ceph_sb_to_client(sb);
 990                 dout("get_sb got existing client %p\n", fsc);
 991         } else {
 992                 dout("get_sb using new client %p\n", fsc);
 993                 err = ceph_register_bdi(sb, fsc);
 994                 if (err < 0) {
 995                         res = ERR_PTR(err);
 996                         goto out_splat;
 997                 }
 998         }
 999
1000         res = ceph_real_mount(fsc, path);
1001         if (IS_ERR(res))
1002                 goto out_splat;
1003         dout("root %p inode %p ino %llx.%llx\n", res,
1004              d_inode(res), ceph_vinop(d_inode(res)));
1005         return res;
1006
1007 out_splat:
1008         ceph_mdsc_close_sessions(fsc->mdsc);
1009         deactivate_locked_super(sb);
1010         goto out_final;
1011
1012 out:
1013         ceph_mdsc_destroy(fsc);
1014         destroy_fs_client(fsc);
1015 out_final:
1016         dout("ceph_mount fail %ld\n", PTR_ERR(res));
1017         return res;
1018 }
1019
1020 static void ceph_kill_sb(struct super_block *s)
1021 {
1022         struct ceph_fs_client *fsc = ceph_sb_to_client(s);
1023         dev_t dev = s->s_dev;
1024
1025         dout("kill_sb %p\n", s);
1026
1027         ceph_mdsc_pre_umount(fsc->mdsc);
1028         generic_shutdown_super(s);
1029         ceph_mdsc_destroy(fsc);
1030
1031         destroy_fs_client(fsc);
1032         free_anon_bdev(dev);
1033 }
1034
1035 static struct file_system_type ceph_fs_type = {
1036         .owner          = THIS_MODULE,
1037         .name           = "ceph",
1038         .mount          = ceph_mount,
1039         .kill_sb        = ceph_kill_sb,
1040         .fs_flags       = FS_RENAME_DOES_D_MOVE,
1041 };
1042 MODULE_ALIAS_FS("ceph");
1043
1044 static int __init init_ceph(void)
1045 {
1046         int ret = init_caches();
1047         if (ret)
1048                 goto out;
1049
1050         ceph_flock_init();
1051         ceph_xattr_init();
1052         ret = ceph_snap_init();
1053         if (ret)
1054                 goto out_xattr;
1055         ret = register_filesystem(&ceph_fs_type);
1056         if (ret)
1057                 goto out_snap;
1058
1059         pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
1060
1061         return 0;
1062
1063 out_snap:
1064         ceph_snap_exit();
1065 out_xattr:
1066         ceph_xattr_exit();
1067         destroy_caches();
1068 out:
1069         return ret;
1070 }
1071
1072 static void __exit exit_ceph(void)
1073 {
1074         dout("exit_ceph\n");
1075         unregister_filesystem(&ceph_fs_type);
1076         ceph_snap_exit();
1077         ceph_xattr_exit();
1078         destroy_caches();
1079 }
1080
1081 module_init(init_ceph);
1082 module_exit(exit_ceph);
1083
1084 MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
1085 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
1086 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
1087 MODULE_DESCRIPTION("Ceph filesystem for Linux");
1088 MODULE_LICENSE("GPL");