fs/logfs/inode.c

   1 /*
   2  * fs/logfs/inode.c     - inode handling code
   3  *
   4  * As should be obvious for Linux kernel code, license is GPLv2
   5  *
   6  * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
   7  */
   8 #include "logfs.h"
   9 #include <linux/writeback.h>
  10 #include <linux/backing-dev.h>
  11
  12 /*
  13  * How soon to reuse old inode numbers?  LogFS doesn't store deleted inodes
  14  * on the medium.  It therefore also lacks a method to store the previous
  15  * generation number for deleted inodes.  Instead a single generation number
  16  * is stored which will be used for new inodes.  Being just a 32bit counter,
  17  * this can obvious wrap relatively quickly.  So we only reuse inodes if we
  18  * know that a fair number of inodes can be created before we have to increment
  19  * the generation again - effectively adding some bits to the counter.
  20  * But being too aggressive here means we keep a very large and very sparse
  21  * inode file, wasting space on indirect blocks.
  22  * So what is a good value?  Beats me.  64k seems moderately bad on both
  23  * fronts, so let's use that for now...
  24  *
  25  * NFS sucks, as everyone already knows.
  26  */
  27 #define INOS_PER_WRAP (0x10000)
  28
  29 /*
  30  * Logfs' requirement to read inodes for garbage collection makes life a bit
  31  * harder.  GC may have to read inodes that are in I_FREEING state, when they
  32  * are being written out - and waiting for GC to make progress, naturally.
  33  *
  34  * So we cannot just call iget() or some variant of it, but first have to check
  35  * wether the inode in question might be in I_FREEING state.  Therefore we
  36  * maintain our own per-sb list of "almost deleted" inodes and check against
  37  * that list first.  Normally this should be at most 1-2 entries long.
  38  *
  39  * Also, inodes have logfs-specific reference counting on top of what the vfs
  40  * does.  When .destroy_inode is called, normally the reference count will drop
  41  * to zero and the inode gets deleted.  But if GC accessed the inode, its
  42  * refcount will remain nonzero and final deletion will have to wait.
  43  *
  44  * As a result we have two sets of functions to get/put inodes:
  45  * logfs_safe_iget/logfs_safe_iput      - safe to call from GC context
  46  * logfs_iget/iput                      - normal version
  47  */
  48 static struct kmem_cache *logfs_inode_cache;
  49
  50 static DEFINE_SPINLOCK(logfs_inode_lock);
  51
  52 static void logfs_inode_setops(struct inode *inode)
  53 {
  54         switch (inode->i_mode & S_IFMT) {
  55         case S_IFDIR:
  56                 inode->i_op = &logfs_dir_iops;
  57                 inode->i_fop = &logfs_dir_fops;
  58                 inode->i_mapping->a_ops = &logfs_reg_aops;
  59                 break;
  60         case S_IFREG:
  61                 inode->i_op = &logfs_reg_iops;
  62                 inode->i_fop = &logfs_reg_fops;
  63                 inode->i_mapping->a_ops = &logfs_reg_aops;
  64                 break;
  65         case S_IFLNK:
  66                 inode->i_op = &logfs_symlink_iops;
  67                 inode->i_mapping->a_ops = &logfs_reg_aops;
  68                 break;
  69         case S_IFSOCK:  /* fall through */
  70         case S_IFBLK:   /* fall through */
  71         case S_IFCHR:   /* fall through */
  72         case S_IFIFO:
  73                 init_special_inode(inode, inode->i_mode, inode->i_rdev);
  74                 break;
  75         default:
  76                 BUG();
  77         }
  78 }
  79
  80 static struct inode *__logfs_iget(struct super_block *sb, ino_t ino)
  81 {
  82         struct inode *inode = iget_locked(sb, ino);
  83         int err;
  84
  85         if (!inode)
  86                 return ERR_PTR(-ENOMEM);
  87         if (!(inode->i_state & I_NEW))
  88                 return inode;
  89
  90         err = logfs_read_inode(inode);
  91         if (err || inode->i_nlink == 0) {
  92                 /* inode->i_nlink == 0 can be true when called from
  93                  * block validator */
  94                 /* set i_nlink to 0 to prevent caching */
  95                 inode->i_nlink = 0;
  96                 logfs_inode(inode)->li_flags |= LOGFS_IF_ZOMBIE;
  97                 iget_failed(inode);
  98                 if (!err)
  99                         err = -ENOENT;
 100                 return ERR_PTR(err);
 101         }
 102
 103         logfs_inode_setops(inode);
 104         unlock_new_inode(inode);
 105         return inode;
 106 }
 107
 108 struct inode *logfs_iget(struct super_block *sb, ino_t ino)
 109 {
 110         BUG_ON(ino == LOGFS_INO_MASTER);
 111         BUG_ON(ino == LOGFS_INO_SEGFILE);
 112         return __logfs_iget(sb, ino);
 113 }
 114
 115 /*
 116  * is_cached is set to 1 if we hand out a cached inode, 0 otherwise.
 117  * this allows logfs_iput to do the right thing later
 118  */
 119 struct inode *logfs_safe_iget(struct super_block *sb, ino_t ino, int *is_cached)
 120 {
 121         struct logfs_super *super = logfs_super(sb);
 122         struct logfs_inode *li;
 123
 124         if (ino == LOGFS_INO_MASTER)
 125                 return super->s_master_inode;
 126         if (ino == LOGFS_INO_SEGFILE)
 127                 return super->s_segfile_inode;
 128
 129         spin_lock(&logfs_inode_lock);
 130         list_for_each_entry(li, &super->s_freeing_list, li_freeing_list)
 131                 if (li->vfs_inode.i_ino == ino) {
 132                         li->li_refcount++;
 133                         spin_unlock(&logfs_inode_lock);
 134                         *is_cached = 1;
 135                         return &li->vfs_inode;
 136                 }
 137         spin_unlock(&logfs_inode_lock);
 138
 139         *is_cached = 0;
 140         return __logfs_iget(sb, ino);
 141 }
 142
 143 static void __logfs_destroy_inode(struct inode *inode)
 144 {
 145         struct logfs_inode *li = logfs_inode(inode);
 146
 147         BUG_ON(li->li_block);
 148         list_del(&li->li_freeing_list);
 149         kmem_cache_free(logfs_inode_cache, li);
 150 }
 151
 152 static void logfs_destroy_inode(struct inode *inode)
 153 {
 154         struct logfs_inode *li = logfs_inode(inode);
 155
 156         BUG_ON(list_empty(&li->li_freeing_list));
 157         spin_lock(&logfs_inode_lock);
 158         li->li_refcount--;
 159         if (li->li_refcount == 0)
 160                 __logfs_destroy_inode(inode);
 161         spin_unlock(&logfs_inode_lock);
 162 }
 163
 164 void logfs_safe_iput(struct inode *inode, int is_cached)
 165 {
 166         if (inode->i_ino == LOGFS_INO_MASTER)
 167                 return;
 168         if (inode->i_ino == LOGFS_INO_SEGFILE)
 169                 return;
 170
 171         if (is_cached) {
 172                 logfs_destroy_inode(inode);
 173                 return;
 174         }
 175
 176         iput(inode);
 177 }
 178
 179 static void logfs_init_inode(struct super_block *sb, struct inode *inode)
 180 {
 181         struct logfs_inode *li = logfs_inode(inode);
 182         int i;
 183
 184         li->li_flags    = 0;
 185         li->li_height   = 0;
 186         li->li_used_bytes = 0;
 187         li->li_block    = NULL;
 188         inode->i_uid    = 0;
 189         inode->i_gid    = 0;
 190         inode->i_size   = 0;
 191         inode->i_blocks = 0;
 192         inode->i_ctime  = CURRENT_TIME;
 193         inode->i_mtime  = CURRENT_TIME;
 194         inode->i_nlink  = 1;
 195         INIT_LIST_HEAD(&li->li_freeing_list);
 196
 197         for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
 198                 li->li_data[i] = 0;
 199
 200         return;
 201 }
 202
 203 static struct inode *logfs_alloc_inode(struct super_block *sb)
 204 {
 205         struct logfs_inode *li;
 206
 207         li = kmem_cache_alloc(logfs_inode_cache, GFP_NOFS);
 208         if (!li)
 209                 return NULL;
 210         logfs_init_inode(sb, &li->vfs_inode);
 211         return &li->vfs_inode;
 212 }
 213
 214 /*
 215  * In logfs inodes are written to an inode file.  The inode file, like any
 216  * other file, is managed with a inode.  The inode file's inode, aka master
 217  * inode, requires special handling in several respects.  First, it cannot be
 218  * written to the inode file, so it is stored in the journal instead.
 219  *
 220  * Secondly, this inode cannot be written back and destroyed before all other
 221  * inodes have been written.  The ordering is important.  Linux' VFS is happily
 222  * unaware of the ordering constraint and would ordinarily destroy the master
 223  * inode at umount time while other inodes are still in use and dirty.  Not
 224  * good.
 225  *
 226  * So logfs makes sure the master inode is not written until all other inodes
 227  * have been destroyed.  Sadly, this method has another side-effect.  The VFS
 228  * will notice one remaining inode and print a frightening warning message.
 229  * Worse, it is impossible to judge whether such a warning was caused by the
 230  * master inode or any other inodes have leaked as well.
 231  *
 232  * Our attempt of solving this is with logfs_new_meta_inode() below.  Its
 233  * purpose is to create a new inode that will not trigger the warning if such
 234  * an inode is still in use.  An ugly hack, no doubt.  Suggections for
 235  * improvement are welcome.
 236  */
 237 struct inode *logfs_new_meta_inode(struct super_block *sb, u64 ino)
 238 {
 239         struct inode *inode;
 240
 241         inode = logfs_alloc_inode(sb);
 242         if (!inode)
 243                 return ERR_PTR(-ENOMEM);
 244
 245         inode->i_mode = S_IFREG;
 246         inode->i_ino = ino;
 247         inode->i_sb = sb;
 248
 249         /* This is a blatant copy of alloc_inode code.  We'd need alloc_inode
 250          * to be nonstatic, alas. */
 251         {
 252                 struct address_space * const mapping = &inode->i_data;
 253
 254                 mapping->a_ops = &logfs_reg_aops;
 255                 mapping->host = inode;
 256                 mapping->flags = 0;
 257                 mapping_set_gfp_mask(mapping, GFP_NOFS);
 258                 mapping->assoc_mapping = NULL;
 259                 mapping->backing_dev_info = &default_backing_dev_info;
 260                 inode->i_mapping = mapping;
 261                 inode->i_nlink = 1;
 262         }
 263
 264         return inode;
 265 }
 266
 267 struct inode *logfs_read_meta_inode(struct super_block *sb, u64 ino)
 268 {
 269         struct inode *inode;
 270         int err;
 271
 272         inode = logfs_new_meta_inode(sb, ino);
 273         if (IS_ERR(inode))
 274                 return inode;
 275
 276         err = logfs_read_inode(inode);
 277         if (err) {
 278                 destroy_meta_inode(inode);
 279                 return ERR_PTR(err);
 280         }
 281         logfs_inode_setops(inode);
 282         return inode;
 283 }
 284
 285 static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 286 {
 287         int ret;
 288         long flags = WF_LOCK;
 289
 290         /* Can only happen if creat() failed.  Safe to skip. */
 291         if (logfs_inode(inode)->li_flags & LOGFS_IF_STILLBORN)
 292                 return 0;
 293
 294         ret = __logfs_write_inode(inode, flags);
 295         LOGFS_BUG_ON(ret, inode->i_sb);
 296         return ret;
 297 }
 298
 299 void destroy_meta_inode(struct inode *inode)
 300 {
 301         if (inode) {
 302                 if (inode->i_data.nrpages)
 303                         truncate_inode_pages(&inode->i_data, 0);
 304                 logfs_clear_inode(inode);
 305                 kmem_cache_free(logfs_inode_cache, logfs_inode(inode));
 306         }
 307 }
 308
 309 /* called with inode_lock held */
 310 static void logfs_drop_inode(struct inode *inode)
 311 {
 312         struct logfs_super *super = logfs_super(inode->i_sb);
 313         struct logfs_inode *li = logfs_inode(inode);
 314
 315         spin_lock(&logfs_inode_lock);
 316         list_move(&li->li_freeing_list, &super->s_freeing_list);
 317         spin_unlock(&logfs_inode_lock);
 318         generic_drop_inode(inode);
 319 }
 320
 321 static void logfs_set_ino_generation(struct super_block *sb,
 322                 struct inode *inode)
 323 {
 324         struct logfs_super *super = logfs_super(sb);
 325         u64 ino;
 326
 327         mutex_lock(&super->s_journal_mutex);
 328         ino = logfs_seek_hole(super->s_master_inode, super->s_last_ino);
 329         super->s_last_ino = ino;
 330         super->s_inos_till_wrap--;
 331         if (super->s_inos_till_wrap < 0) {
 332                 super->s_last_ino = LOGFS_RESERVED_INOS;
 333                 super->s_generation++;
 334                 super->s_inos_till_wrap = INOS_PER_WRAP;
 335         }
 336         inode->i_ino = ino;
 337         inode->i_generation = super->s_generation;
 338         mutex_unlock(&super->s_journal_mutex);
 339 }
 340
 341 struct inode *logfs_new_inode(struct inode *dir, int mode)
 342 {
 343         struct super_block *sb = dir->i_sb;
 344         struct inode *inode;
 345
 346         inode = new_inode(sb);
 347         if (!inode)
 348                 return ERR_PTR(-ENOMEM);
 349
 350         logfs_init_inode(sb, inode);
 351
 352         /* inherit parent flags */
 353         logfs_inode(inode)->li_flags |=
 354                 logfs_inode(dir)->li_flags & LOGFS_FL_INHERITED;
 355
 356         inode->i_mode = mode;
 357         logfs_set_ino_generation(sb, inode);
 358
 359         inode->i_uid = current_fsuid();
 360         inode->i_gid = current_fsgid();
 361         if (dir->i_mode & S_ISGID) {
 362                 inode->i_gid = dir->i_gid;
 363                 if (S_ISDIR(mode))
 364                         inode->i_mode |= S_ISGID;
 365         }
 366
 367         logfs_inode_setops(inode);
 368         insert_inode_hash(inode);
 369
 370         return inode;
 371 }
 372
 373 static void logfs_init_once(void *_li)
 374 {
 375         struct logfs_inode *li = _li;
 376         int i;
 377
 378         li->li_flags = 0;
 379         li->li_used_bytes = 0;
 380         li->li_refcount = 1;
 381         for (i = 0; i < LOGFS_EMBEDDED_FIELDS; i++)
 382                 li->li_data[i] = 0;
 383         inode_init_once(&li->vfs_inode);
 384 }
 385
 386 static int logfs_sync_fs(struct super_block *sb, int wait)
 387 {
 388         /* FIXME: write anchor */
 389         logfs_super(sb)->s_devops->sync(sb);
 390         return 0;
 391 }
 392
 393 const struct super_operations logfs_super_operations = {
 394         .alloc_inode    = logfs_alloc_inode,
 395         .clear_inode    = logfs_clear_inode,
 396         .delete_inode   = logfs_delete_inode,
 397         .destroy_inode  = logfs_destroy_inode,
 398         .drop_inode     = logfs_drop_inode,
 399         .write_inode    = logfs_write_inode,
 400         .statfs         = logfs_statfs,
 401         .sync_fs        = logfs_sync_fs,
 402 };
 403
 404 int logfs_init_inode_cache(void)
 405 {
 406         logfs_inode_cache = kmem_cache_create("logfs_inode_cache",
 407                         sizeof(struct logfs_inode), 0, SLAB_RECLAIM_ACCOUNT,
 408                         logfs_init_once);
 409         if (!logfs_inode_cache)
 410                 return -ENOMEM;
 411         return 0;
 412 }
 413
 414 void logfs_destroy_inode_cache(void)
 415 {
 416         kmem_cache_destroy(logfs_inode_cache);
 417 }