fs/jffs2/gc.c

   1 /*
   2  * JFFS2 -- Journalling Flash File System, Version 2.
   3  *
   4  * Copyright © 2001-2007 Red Hat, Inc.
   5  * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
   6  *
   7  * Created by David Woodhouse <dwmw2@infradead.org>
   8  *
   9  * For licensing information, see the file 'LICENCE' in this directory.
  10  *
  11  */
  12
  13 #include <linux/kernel.h>
  14 #include <linux/mtd/mtd.h>
  15 #include <linux/slab.h>
  16 #include <linux/pagemap.h>
  17 #include <linux/crc32.h>
  18 #include <linux/compiler.h>
  19 #include <linux/stat.h>
  20 #include "nodelist.h"
  21 #include "compr.h"
  22
  23 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
  24                                           struct jffs2_inode_cache *ic,
  25                                           struct jffs2_raw_node_ref *raw);
  26 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  27                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
  28 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  29                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
  30 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  31                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
  32 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  33                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
  34                                       uint32_t start, uint32_t end);
  35 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
  36                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
  37                                        uint32_t start, uint32_t end);
  38 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
  39                                struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
  40
  41 /* Called with erase_completion_lock held */
  42 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
  43 {
  44         struct jffs2_eraseblock *ret;
  45         struct list_head *nextlist = NULL;
  46         int n = jiffies % 128;
  47
  48         /* Pick an eraseblock to garbage collect next. This is where we'll
  49            put the clever wear-levelling algorithms. Eventually.  */
  50         /* We possibly want to favour the dirtier blocks more when the
  51            number of free blocks is low. */
  52 again:
  53         if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
  54                 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
  55                 nextlist = &c->bad_used_list;
  56         } else if (n < 50 && !list_empty(&c->erasable_list)) {
  57                 /* Note that most of them will have gone directly to be erased.
  58                    So don't favour the erasable_list _too_ much. */
  59                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
  60                 nextlist = &c->erasable_list;
  61         } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
  62                 /* Most of the time, pick one off the very_dirty list */
  63                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
  64                 nextlist = &c->very_dirty_list;
  65         } else if (n < 126 && !list_empty(&c->dirty_list)) {
  66                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
  67                 nextlist = &c->dirty_list;
  68         } else if (!list_empty(&c->clean_list)) {
  69                 D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
  70                 nextlist = &c->clean_list;
  71         } else if (!list_empty(&c->dirty_list)) {
  72                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
  73
  74                 nextlist = &c->dirty_list;
  75         } else if (!list_empty(&c->very_dirty_list)) {
  76                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
  77                 nextlist = &c->very_dirty_list;
  78         } else if (!list_empty(&c->erasable_list)) {
  79                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
  80
  81                 nextlist = &c->erasable_list;
  82         } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
  83                 /* There are blocks are wating for the wbuf sync */
  84                 D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
  85                 spin_unlock(&c->erase_completion_lock);
  86                 jffs2_flush_wbuf_pad(c);
  87                 spin_lock(&c->erase_completion_lock);
  88                 goto again;
  89         } else {
  90                 /* Eep. All were empty */
  91                 D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
  92                 return NULL;
  93         }
  94
  95         ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
  96         list_del(&ret->list);
  97         c->gcblock = ret;
  98         ret->gc_node = ret->first_node;
  99         if (!ret->gc_node) {
 100                 printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
 101                 BUG();
 102         }
 103
 104         /* Have we accidentally picked a clean block with wasted space ? */
 105         if (ret->wasted_size) {
 106                 D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
 107                 ret->dirty_size += ret->wasted_size;
 108                 c->wasted_size -= ret->wasted_size;
 109                 c->dirty_size += ret->wasted_size;
 110                 ret->wasted_size = 0;
 111         }
 112
 113         return ret;
 114 }
 115
 116 /* jffs2_garbage_collect_pass
 117  * Make a single attempt to progress GC. Move one node, and possibly
 118  * start erasing one eraseblock.
 119  */
 120 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
 121 {
 122         struct jffs2_inode_info *f;
 123         struct jffs2_inode_cache *ic;
 124         struct jffs2_eraseblock *jeb;
 125         struct jffs2_raw_node_ref *raw;
 126         uint32_t gcblock_dirty;
 127         int ret = 0, inum, nlink;
 128         int xattr = 0;
 129
 130         if (mutex_lock_interruptible(&c->alloc_sem))
 131                 return -EINTR;
 132
 133         for (;;) {
 134                 spin_lock(&c->erase_completion_lock);
 135                 if (!c->unchecked_size)
 136                         break;
 137
 138                 /* We can't start doing GC yet. We haven't finished checking
 139                    the node CRCs etc. Do it now. */
 140
 141                 /* checked_ino is protected by the alloc_sem */
 142                 if (c->checked_ino > c->highest_ino && xattr) {
 143                         printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
 144                                c->unchecked_size);
 145                         jffs2_dbg_dump_block_lists_nolock(c);
 146                         spin_unlock(&c->erase_completion_lock);
 147                         mutex_unlock(&c->alloc_sem);
 148                         return -ENOSPC;
 149                 }
 150
 151                 spin_unlock(&c->erase_completion_lock);
 152
 153                 if (!xattr)
 154                         xattr = jffs2_verify_xattr(c);
 155
 156                 spin_lock(&c->inocache_lock);
 157
 158                 ic = jffs2_get_ino_cache(c, c->checked_ino++);
 159
 160                 if (!ic) {
 161                         spin_unlock(&c->inocache_lock);
 162                         continue;
 163                 }
 164
 165                 if (!ic->pino_nlink) {
 166                         D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink/pino zero\n",
 167                                   ic->ino));
 168                         spin_unlock(&c->inocache_lock);
 169                         jffs2_xattr_delete_inode(c, ic);
 170                         continue;
 171                 }
 172                 switch(ic->state) {
 173                 case INO_STATE_CHECKEDABSENT:
 174                 case INO_STATE_PRESENT:
 175                         D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
 176                         spin_unlock(&c->inocache_lock);
 177                         continue;
 178
 179                 case INO_STATE_GC:
 180                 case INO_STATE_CHECKING:
 181                         printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
 182                         spin_unlock(&c->inocache_lock);
 183                         BUG();
 184
 185                 case INO_STATE_READING:
 186                         /* We need to wait for it to finish, lest we move on
 187                            and trigger the BUG() above while we haven't yet
 188                            finished checking all its nodes */
 189                         D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
 190                         /* We need to come back again for the _same_ inode. We've
 191                          made no progress in this case, but that should be OK */
 192                         c->checked_ino--;
 193
 194                         mutex_unlock(&c->alloc_sem);
 195                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
 196                         return 0;
 197
 198                 default:
 199                         BUG();
 200
 201                 case INO_STATE_UNCHECKED:
 202                         ;
 203                 }
 204                 ic->state = INO_STATE_CHECKING;
 205                 spin_unlock(&c->inocache_lock);
 206
 207                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
 208
 209                 ret = jffs2_do_crccheck_inode(c, ic);
 210                 if (ret)
 211                         printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
 212
 213                 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
 214                 mutex_unlock(&c->alloc_sem);
 215                 return ret;
 216         }
 217
 218         /* If there are any blocks which need erasing, erase them now */
 219         if (!list_empty(&c->erase_complete_list) ||
 220             !list_empty(&c->erase_pending_list)) {
 221                 spin_unlock(&c->erase_completion_lock);
 222                 mutex_unlock(&c->alloc_sem);
 223                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n"));
 224                 if (jffs2_erase_pending_blocks(c, 1))
 225                         return 0;
 226
 227                 D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n"));
 228                 spin_lock(&c->erase_completion_lock);
 229                 mutex_lock(&c->alloc_sem);
 230         }
 231
 232         /* First, work out which block we're garbage-collecting */
 233         jeb = c->gcblock;
 234
 235         if (!jeb)
 236                 jeb = jffs2_find_gc_block(c);
 237
 238         if (!jeb) {
 239                 /* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
 240                 if (c->nr_erasing_blocks) {
 241                         spin_unlock(&c->erase_completion_lock);
 242                         mutex_unlock(&c->alloc_sem);
 243                         return -EAGAIN;
 244                 }
 245                 D1(printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
 246                 spin_unlock(&c->erase_completion_lock);
 247                 mutex_unlock(&c->alloc_sem);
 248                 return -EIO;
 249         }
 250
 251         D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
 252         D1(if (c->nextblock)
 253            printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
 254
 255         if (!jeb->used_size) {
 256                 mutex_unlock(&c->alloc_sem);
 257                 goto eraseit;
 258         }
 259
 260         raw = jeb->gc_node;
 261         gcblock_dirty = jeb->dirty_size;
 262
 263         while(ref_obsolete(raw)) {
 264                 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
 265                 raw = ref_next(raw);
 266                 if (unlikely(!raw)) {
 267                         printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
 268                         printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
 269                                jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
 270                         jeb->gc_node = raw;
 271                         spin_unlock(&c->erase_completion_lock);
 272                         mutex_unlock(&c->alloc_sem);
 273                         BUG();
 274                 }
 275         }
 276         jeb->gc_node = raw;
 277
 278         D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
 279
 280         if (!raw->next_in_ino) {
 281                 /* Inode-less node. Clean marker, snapshot or something like that */
 282                 spin_unlock(&c->erase_completion_lock);
 283                 if (ref_flags(raw) == REF_PRISTINE) {
 284                         /* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
 285                         jffs2_garbage_collect_pristine(c, NULL, raw);
 286                 } else {
 287                         /* Just mark it obsolete */
 288                         jffs2_mark_node_obsolete(c, raw);
 289                 }
 290                 mutex_unlock(&c->alloc_sem);
 291                 goto eraseit_lock;
 292         }
 293
 294         ic = jffs2_raw_ref_to_ic(raw);
 295
 296 #ifdef CONFIG_JFFS2_FS_XATTR
 297         /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
 298          * We can decide whether this node is inode or xattr by ic->class.     */
 299         if (ic->class == RAWNODE_CLASS_XATTR_DATUM
 300             || ic->class == RAWNODE_CLASS_XATTR_REF) {
 301                 spin_unlock(&c->erase_completion_lock);
 302
 303                 if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
 304                         ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
 305                 } else {
 306                         ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
 307                 }
 308                 goto test_gcnode;
 309         }
 310 #endif
 311
 312         /* We need to hold the inocache. Either the erase_completion_lock or
 313            the inocache_lock are sufficient; we trade down since the inocache_lock
 314            causes less contention. */
 315         spin_lock(&c->inocache_lock);
 316
 317         spin_unlock(&c->erase_completion_lock);
 318
 319         D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
 320
 321         /* Three possibilities:
 322            1. Inode is already in-core. We must iget it and do proper
 323               updating to its fragtree, etc.
 324            2. Inode is not in-core, node is REF_PRISTINE. We lock the
 325               inocache to prevent a read_inode(), copy the node intact.
 326            3. Inode is not in-core, node is not pristine. We must iget()
 327               and take the slow path.
 328         */
 329
 330         switch(ic->state) {
 331         case INO_STATE_CHECKEDABSENT:
 332                 /* It's been checked, but it's not currently in-core.
 333                    We can just copy any pristine nodes, but have
 334                    to prevent anyone else from doing read_inode() while
 335                    we're at it, so we set the state accordingly */
 336                 if (ref_flags(raw) == REF_PRISTINE)
 337                         ic->state = INO_STATE_GC;
 338                 else {
 339                         D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
 340                                   ic->ino));
 341                 }
 342                 break;
 343
 344         case INO_STATE_PRESENT:
 345                 /* It's in-core. GC must iget() it. */
 346                 break;
 347
 348         case INO_STATE_UNCHECKED:
 349         case INO_STATE_CHECKING:
 350         case INO_STATE_GC:
 351                 /* Should never happen. We should have finished checking
 352                    by the time we actually start doing any GC, and since
 353                    we're holding the alloc_sem, no other garbage collection
 354                    can happen.
 355                 */
 356                 printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
 357                        ic->ino, ic->state);
 358                 mutex_unlock(&c->alloc_sem);
 359                 spin_unlock(&c->inocache_lock);
 360                 BUG();
 361
 362         case INO_STATE_READING:
 363                 /* Someone's currently trying to read it. We must wait for
 364                    them to finish and then go through the full iget() route
 365                    to do the GC. However, sometimes read_inode() needs to get
 366                    the alloc_sem() (for marking nodes invalid) so we must
 367                    drop the alloc_sem before sleeping. */
 368
 369                 mutex_unlock(&c->alloc_sem);
 370                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
 371                           ic->ino, ic->state));
 372                 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
 373                 /* And because we dropped the alloc_sem we must start again from the
 374                    beginning. Ponder chance of livelock here -- we're returning success
 375                    without actually making any progress.
 376
 377                    Q: What are the chances that the inode is back in INO_STATE_READING
 378                    again by the time we next enter this function? And that this happens
 379                    enough times to cause a real delay?
 380
 381                    A: Small enough that I don't care :)
 382                 */
 383                 return 0;
 384         }
 385
 386         /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
 387            node intact, and we don't have to muck about with the fragtree etc.
 388            because we know it's not in-core. If it _was_ in-core, we go through
 389            all the iget() crap anyway */
 390
 391         if (ic->state == INO_STATE_GC) {
 392                 spin_unlock(&c->inocache_lock);
 393
 394                 ret = jffs2_garbage_collect_pristine(c, ic, raw);
 395
 396                 spin_lock(&c->inocache_lock);
 397                 ic->state = INO_STATE_CHECKEDABSENT;
 398                 wake_up(&c->inocache_wq);
 399
 400                 if (ret != -EBADFD) {
 401                         spin_unlock(&c->inocache_lock);
 402                         goto test_gcnode;
 403                 }
 404
 405                 /* Fall through if it wanted us to, with inocache_lock held */
 406         }
 407
 408         /* Prevent the fairly unlikely race where the gcblock is
 409            entirely obsoleted by the final close of a file which had
 410            the only valid nodes in the block, followed by erasure,
 411            followed by freeing of the ic because the erased block(s)
 412            held _all_ the nodes of that inode.... never been seen but
 413            it's vaguely possible. */
 414
 415         inum = ic->ino;
 416         nlink = ic->pino_nlink;
 417         spin_unlock(&c->inocache_lock);
 418
 419         f = jffs2_gc_fetch_inode(c, inum, !nlink);
 420         if (IS_ERR(f)) {
 421                 ret = PTR_ERR(f);
 422                 goto release_sem;
 423         }
 424         if (!f) {
 425                 ret = 0;
 426                 goto release_sem;
 427         }
 428
 429         ret = jffs2_garbage_collect_live(c, jeb, raw, f);
 430
 431         jffs2_gc_release_inode(c, f);
 432
 433  test_gcnode:
 434         if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
 435                 /* Eep. This really should never happen. GC is broken */
 436                 printk(KERN_ERR "Error garbage collecting node at %08x!\n", ref_offset(jeb->gc_node));
 437                 ret = -ENOSPC;
 438         }
 439  release_sem:
 440         mutex_unlock(&c->alloc_sem);
 441
 442  eraseit_lock:
 443         /* If we've finished this block, start it erasing */
 444         spin_lock(&c->erase_completion_lock);
 445
 446  eraseit:
 447         if (c->gcblock && !c->gcblock->used_size) {
 448                 D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
 449                 /* We're GC'ing an empty block? */
 450                 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
 451                 c->gcblock = NULL;
 452                 c->nr_erasing_blocks++;
 453                 jffs2_garbage_collect_trigger(c);
 454         }
 455         spin_unlock(&c->erase_completion_lock);
 456
 457         return ret;
 458 }
 459
 460 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
 461                                       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
 462 {
 463         struct jffs2_node_frag *frag;
 464         struct jffs2_full_dnode *fn = NULL;
 465         struct jffs2_full_dirent *fd;
 466         uint32_t start = 0, end = 0, nrfrags = 0;
 467         int ret = 0;
 468
 469         mutex_lock(&f->sem);
 470
 471         /* Now we have the lock for this inode. Check that it's still the one at the head
 472            of the list. */
 473
 474         spin_lock(&c->erase_completion_lock);
 475
 476         if (c->gcblock != jeb) {
 477                 spin_unlock(&c->erase_completion_lock);
 478                 D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
 479                 goto upnout;
 480         }
 481         if (ref_obsolete(raw)) {
 482                 spin_unlock(&c->erase_completion_lock);
 483                 D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
 484                 /* They'll call again */
 485                 goto upnout;
 486         }
 487         spin_unlock(&c->erase_completion_lock);
 488
 489         /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
 490         if (f->metadata && f->metadata->raw == raw) {
 491                 fn = f->metadata;
 492                 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
 493                 goto upnout;
 494         }
 495
 496         /* FIXME. Read node and do lookup? */
 497         for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
 498                 if (frag->node && frag->node->raw == raw) {
 499                         fn = frag->node;
 500                         end = frag->ofs + frag->size;
 501                         if (!nrfrags++)
 502                                 start = frag->ofs;
 503                         if (nrfrags == frag->node->frags)
 504                                 break; /* We've found them all */
 505                 }
 506         }
 507         if (fn) {
 508                 if (ref_flags(raw) == REF_PRISTINE) {
 509                         ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
 510                         if (!ret) {
 511                                 /* Urgh. Return it sensibly. */
 512                                 frag->node->raw = f->inocache->nodes;
 513                         }
 514                         if (ret != -EBADFD)
 515                                 goto upnout;
 516                 }
 517                 /* We found a datanode. Do the GC */
 518                 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
 519                         /* It crosses a page boundary. Therefore, it must be a hole. */
 520                         ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
 521                 } else {
 522                         /* It could still be a hole. But we GC the page this way anyway */
 523                         ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
 524                 }
 525                 goto upnout;
 526         }
 527
 528         /* Wasn't a dnode. Try dirent */
 529         for (fd = f->dents; fd; fd=fd->next) {
 530                 if (fd->raw == raw)
 531                         break;
 532         }
 533
 534         if (fd && fd->ino) {
 535                 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
 536         } else if (fd) {
 537                 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
 538         } else {
 539                 printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
 540                        ref_offset(raw), f->inocache->ino);
 541                 if (ref_obsolete(raw)) {
 542                         printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
 543                 } else {
 544                         jffs2_dbg_dump_node(c, ref_offset(raw));
 545                         BUG();
 546                 }
 547         }
 548  upnout:
 549         mutex_unlock(&f->sem);
 550
 551         return ret;
 552 }
 553
 554 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
 555                                           struct jffs2_inode_cache *ic,
 556                                           struct jffs2_raw_node_ref *raw)
 557 {
 558         union jffs2_node_union *node;
 559         size_t retlen;
 560         int ret;
 561         uint32_t phys_ofs, alloclen;
 562         uint32_t crc, rawlen;
 563         int retried = 0;
 564
 565         D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
 566
 567         alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
 568
 569         /* Ask for a small amount of space (or the totlen if smaller) because we
 570            don't want to force wastage of the end of a block if splitting would
 571            work. */
 572         if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
 573                 alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
 574
 575         ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
 576         /* 'rawlen' is not the exact summary size; it is only an upper estimation */
 577
 578         if (ret)
 579                 return ret;
 580
 581         if (alloclen < rawlen) {
 582                 /* Doesn't fit untouched. We'll go the old route and split it */
 583                 return -EBADFD;
 584         }
 585
 586         node = kmalloc(rawlen, GFP_KERNEL);
 587         if (!node)
 588                 return -ENOMEM;
 589
 590         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
 591         if (!ret && retlen != rawlen)
 592                 ret = -EIO;
 593         if (ret)
 594                 goto out_node;
 595
 596         crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
 597         if (je32_to_cpu(node->u.hdr_crc) != crc) {
 598                 printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 599                        ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
 600                 goto bail;
 601         }
 602
 603         switch(je16_to_cpu(node->u.nodetype)) {
 604         case JFFS2_NODETYPE_INODE:
 605                 crc = crc32(0, node, sizeof(node->i)-8);
 606                 if (je32_to_cpu(node->i.node_crc) != crc) {
 607                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 608                                ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
 609                         goto bail;
 610                 }
 611
 612                 if (je32_to_cpu(node->i.dsize)) {
 613                         crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
 614                         if (je32_to_cpu(node->i.data_crc) != crc) {
 615                                 printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 616                                        ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
 617                                 goto bail;
 618                         }
 619                 }
 620                 break;
 621
 622         case JFFS2_NODETYPE_DIRENT:
 623                 crc = crc32(0, node, sizeof(node->d)-8);
 624                 if (je32_to_cpu(node->d.node_crc) != crc) {
 625                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 626                                ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
 627                         goto bail;
 628                 }
 629
 630                 if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
 631                         printk(KERN_WARNING "Name in dirent node at 0x%08x contains zeroes\n", ref_offset(raw));
 632                         goto bail;
 633                 }
 634
 635                 if (node->d.nsize) {
 636                         crc = crc32(0, node->d.name, node->d.nsize);
 637                         if (je32_to_cpu(node->d.name_crc) != crc) {
 638                                 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
 639                                        ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
 640                                 goto bail;
 641                         }
 642                 }
 643                 break;
 644         default:
 645                 /* If it's inode-less, we don't _know_ what it is. Just copy it intact */
 646                 if (ic) {
 647                         printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
 648                                ref_offset(raw), je16_to_cpu(node->u.nodetype));
 649                         goto bail;
 650                 }
 651         }
 652
 653         /* OK, all the CRCs are good; this node can just be copied as-is. */
 654  retry:
 655         phys_ofs = write_ofs(c);
 656
 657         ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
 658
 659         if (ret || (retlen != rawlen)) {
 660                 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
 661                        rawlen, phys_ofs, ret, retlen);
 662                 if (retlen) {
 663                         jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
 664                 } else {
 665                         printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs);
 666                 }
 667                 if (!retried) {
 668                         /* Try to reallocate space and retry */
 669                         uint32_t dummy;
 670                         struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
 671
 672                         retried = 1;
 673
 674                         D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
 675
 676                         jffs2_dbg_acct_sanity_check(c,jeb);
 677                         jffs2_dbg_acct_paranoia_check(c, jeb);
 678
 679                         ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
 680                                                 /* this is not the exact summary size of it,
 681                                                         it is only an upper estimation */
 682
 683                         if (!ret) {
 684                                 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
 685
 686                                 jffs2_dbg_acct_sanity_check(c,jeb);
 687                                 jffs2_dbg_acct_paranoia_check(c, jeb);
 688
 689                                 goto retry;
 690                         }
 691                         D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
 692                 }
 693
 694                 if (!ret)
 695                         ret = -EIO;
 696                 goto out_node;
 697         }
 698         jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
 699
 700         jffs2_mark_node_obsolete(c, raw);
 701         D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
 702
 703  out_node:
 704         kfree(node);
 705         return ret;
 706  bail:
 707         ret = -EBADFD;
 708         goto out_node;
 709 }
 710
 711 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 712                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
 713 {
 714         struct jffs2_full_dnode *new_fn;
 715         struct jffs2_raw_inode ri;
 716         struct jffs2_node_frag *last_frag;
 717         union jffs2_device_node dev;
 718         char *mdata = NULL;
 719         int mdatalen = 0;
 720         uint32_t alloclen, ilen;
 721         int ret;
 722
 723         if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
 724             S_ISCHR(JFFS2_F_I_MODE(f)) ) {
 725                 /* For these, we don't actually need to read the old node */
 726                 mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
 727                 mdata = (char *)&dev;
 728                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
 729         } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
 730                 mdatalen = fn->size;
 731                 mdata = kmalloc(fn->size, GFP_KERNEL);
 732                 if (!mdata) {
 733                         printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
 734                         return -ENOMEM;
 735                 }
 736                 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
 737                 if (ret) {
 738                         printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
 739                         kfree(mdata);
 740                         return ret;
 741                 }
 742                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
 743
 744         }
 745
 746         ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
 747                                 JFFS2_SUMMARY_INODE_SIZE);
 748         if (ret) {
 749                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
 750                        sizeof(ri)+ mdatalen, ret);
 751                 goto out;
 752         }
 753
 754         last_frag = frag_last(&f->fragtree);
 755         if (last_frag)
 756                 /* Fetch the inode length from the fragtree rather then
 757                  * from i_size since i_size may have not been updated yet */
 758                 ilen = last_frag->ofs + last_frag->size;
 759         else
 760                 ilen = JFFS2_F_I_SIZE(f);
 761
 762         memset(&ri, 0, sizeof(ri));
 763         ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
 764         ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
 765         ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
 766         ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
 767
 768         ri.ino = cpu_to_je32(f->inocache->ino);
 769         ri.version = cpu_to_je32(++f->highest_version);
 770         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
 771         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
 772         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
 773         ri.isize = cpu_to_je32(ilen);
 774         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
 775         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
 776         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
 777         ri.offset = cpu_to_je32(0);
 778         ri.csize = cpu_to_je32(mdatalen);
 779         ri.dsize = cpu_to_je32(mdatalen);
 780         ri.compr = JFFS2_COMPR_NONE;
 781         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
 782         ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
 783
 784         new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
 785
 786         if (IS_ERR(new_fn)) {
 787                 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
 788                 ret = PTR_ERR(new_fn);
 789                 goto out;
 790         }
 791         jffs2_mark_node_obsolete(c, fn->raw);
 792         jffs2_free_full_dnode(fn);
 793         f->metadata = new_fn;
 794  out:
 795         if (S_ISLNK(JFFS2_F_I_MODE(f)))
 796                 kfree(mdata);
 797         return ret;
 798 }
 799
 800 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 801                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
 802 {
 803         struct jffs2_full_dirent *new_fd;
 804         struct jffs2_raw_dirent rd;
 805         uint32_t alloclen;
 806         int ret;
 807
 808         rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
 809         rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
 810         rd.nsize = strlen(fd->name);
 811         rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
 812         rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
 813
 814         rd.pino = cpu_to_je32(f->inocache->ino);
 815         rd.version = cpu_to_je32(++f->highest_version);
 816         rd.ino = cpu_to_je32(fd->ino);
 817         /* If the times on this inode were set by explicit utime() they can be different,
 818            so refrain from splatting them. */
 819         if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
 820                 rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
 821         else
 822                 rd.mctime = cpu_to_je32(0);
 823         rd.type = fd->type;
 824         rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
 825         rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
 826
 827         ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
 828                                 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
 829         if (ret) {
 830                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
 831                        sizeof(rd)+rd.nsize, ret);
 832                 return ret;
 833         }
 834         new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
 835
 836         if (IS_ERR(new_fd)) {
 837                 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
 838                 return PTR_ERR(new_fd);
 839         }
 840         jffs2_add_fd_to_list(c, new_fd, &f->dents);
 841         return 0;
 842 }
 843
 844 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 845                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
 846 {
 847         struct jffs2_full_dirent **fdp = &f->dents;
 848         int found = 0;
 849
 850         /* On a medium where we can't actually mark nodes obsolete
 851            pernamently, such as NAND flash, we need to work out
 852            whether this deletion dirent is still needed to actively
 853            delete a 'real' dirent with the same name that's still
 854            somewhere else on the flash. */
 855         if (!jffs2_can_mark_obsolete(c)) {
 856                 struct jffs2_raw_dirent *rd;
 857                 struct jffs2_raw_node_ref *raw;
 858                 int ret;
 859                 size_t retlen;
 860                 int name_len = strlen(fd->name);
 861                 uint32_t name_crc = crc32(0, fd->name, name_len);
 862                 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
 863
 864                 rd = kmalloc(rawlen, GFP_KERNEL);
 865                 if (!rd)
 866                         return -ENOMEM;
 867
 868                 /* Prevent the erase code from nicking the obsolete node refs while
 869                    we're looking at them. I really don't like this extra lock but
 870                    can't see any alternative. Suggestions on a postcard to... */
 871                 mutex_lock(&c->erase_free_sem);
 872
 873                 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
 874
 875                         cond_resched();
 876
 877                         /* We only care about obsolete ones */
 878                         if (!(ref_obsolete(raw)))
 879                                 continue;
 880
 881                         /* Any dirent with the same name is going to have the same length... */
 882                         if (ref_totlen(c, NULL, raw) != rawlen)
 883                                 continue;
 884
 885                         /* Doesn't matter if there's one in the same erase block. We're going to
 886                            delete it too at the same time. */
 887                         if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
 888                                 continue;
 889
 890                         D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
 891
 892                         /* This is an obsolete node belonging to the same directory, and it's of the right
 893                            length. We need to take a closer look...*/
 894                         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
 895                         if (ret) {
 896                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
 897                                 /* If we can't read it, we don't need to continue to obsolete it. Continue */
 898                                 continue;
 899                         }
 900                         if (retlen != rawlen) {
 901                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
 902                                        retlen, rawlen, ref_offset(raw));
 903                                 continue;
 904                         }
 905
 906                         if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
 907                                 continue;
 908
 909                         /* If the name CRC doesn't match, skip */
 910                         if (je32_to_cpu(rd->name_crc) != name_crc)
 911                                 continue;
 912
 913                         /* If the name length doesn't match, or it's another deletion dirent, skip */
 914                         if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
 915                                 continue;
 916
 917                         /* OK, check the actual name now */
 918                         if (memcmp(rd->name, fd->name, name_len))
 919                                 continue;
 920
 921                         /* OK. The name really does match. There really is still an older node on
 922                            the flash which our deletion dirent obsoletes. So we have to write out
 923                            a new deletion dirent to replace it */
 924                         mutex_unlock(&c->erase_free_sem);
 925
 926                         D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
 927                                   ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
 928                         kfree(rd);
 929
 930                         return jffs2_garbage_collect_dirent(c, jeb, f, fd);
 931                 }
 932
 933                 mutex_unlock(&c->erase_free_sem);
 934                 kfree(rd);
 935         }
 936
 937         /* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
 938            we should update the metadata node with those times accordingly */
 939
 940         /* No need for it any more. Just mark it obsolete and remove it from the list */
 941         while (*fdp) {
 942                 if ((*fdp) == fd) {
 943                         found = 1;
 944                         *fdp = fd->next;
 945                         break;
 946                 }
 947                 fdp = &(*fdp)->next;
 948         }
 949         if (!found) {
 950                 printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
 951         }
 952         jffs2_mark_node_obsolete(c, fd->raw);
 953         jffs2_free_full_dirent(fd);
 954         return 0;
 955 }
 956
 957 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
 958                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
 959                                       uint32_t start, uint32_t end)
 960 {
 961         struct jffs2_raw_inode ri;
 962         struct jffs2_node_frag *frag;
 963         struct jffs2_full_dnode *new_fn;
 964         uint32_t alloclen, ilen;
 965         int ret;
 966
 967         D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
 968                   f->inocache->ino, start, end));
 969
 970         memset(&ri, 0, sizeof(ri));
 971
 972         if(fn->frags > 1) {
 973                 size_t readlen;
 974                 uint32_t crc;
 975                 /* It's partially obsoleted by a later write. So we have to
 976                    write it out again with the _same_ version as before */
 977                 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
 978                 if (readlen != sizeof(ri) || ret) {
 979                         printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
 980                         goto fill;
 981                 }
 982                 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
 983                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
 984                                ref_offset(fn->raw),
 985                                je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
 986                         return -EIO;
 987                 }
 988                 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
 989                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
 990                                ref_offset(fn->raw),
 991                                je32_to_cpu(ri.totlen), sizeof(ri));
 992                         return -EIO;
 993                 }
 994                 crc = crc32(0, &ri, sizeof(ri)-8);
 995                 if (crc != je32_to_cpu(ri.node_crc)) {
 996                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
 997                                ref_offset(fn->raw),
 998                                je32_to_cpu(ri.node_crc), crc);
 999                         /* FIXME: We could possibly deal with this by writing new holes for each frag */
1000                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1001                                start, end, f->inocache->ino);
1002                         goto fill;
1003                 }
1004                 if (ri.compr != JFFS2_COMPR_ZERO) {
1005                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
1006                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1007                                start, end, f->inocache->ino);
1008                         goto fill;
1009                 }
1010         } else {
1011         fill:
1012                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1013                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1014                 ri.totlen = cpu_to_je32(sizeof(ri));
1015                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1016
1017                 ri.ino = cpu_to_je32(f->inocache->ino);
1018                 ri.version = cpu_to_je32(++f->highest_version);
1019                 ri.offset = cpu_to_je32(start);
1020                 ri.dsize = cpu_to_je32(end - start);
1021                 ri.csize = cpu_to_je32(0);
1022                 ri.compr = JFFS2_COMPR_ZERO;
1023         }
1024
1025         frag = frag_last(&f->fragtree);
1026         if (frag)
1027                 /* Fetch the inode length from the fragtree rather then
1028                  * from i_size since i_size may have not been updated yet */
1029                 ilen = frag->ofs + frag->size;
1030         else
1031                 ilen = JFFS2_F_I_SIZE(f);
1032
1033         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1034         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1035         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1036         ri.isize = cpu_to_je32(ilen);
1037         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1038         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1039         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1040         ri.data_crc = cpu_to_je32(0);
1041         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1042
1043         ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1044                                      JFFS2_SUMMARY_INODE_SIZE);
1045         if (ret) {
1046                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1047                        sizeof(ri), ret);
1048                 return ret;
1049         }
1050         new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1051
1052         if (IS_ERR(new_fn)) {
1053                 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1054                 return PTR_ERR(new_fn);
1055         }
1056         if (je32_to_cpu(ri.version) == f->highest_version) {
1057                 jffs2_add_full_dnode_to_inode(c, f, new_fn);
1058                 if (f->metadata) {
1059                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1060                         jffs2_free_full_dnode(f->metadata);
1061                         f->metadata = NULL;
1062                 }
1063                 return 0;
1064         }
1065
1066         /*
1067          * We should only get here in the case where the node we are
1068          * replacing had more than one frag, so we kept the same version
1069          * number as before. (Except in case of error -- see 'goto fill;'
1070          * above.)
1071          */
1072         D1(if(unlikely(fn->frags <= 1)) {
1073                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1074                        fn->frags, je32_to_cpu(ri.version), f->highest_version,
1075                        je32_to_cpu(ri.ino));
1076         });
1077
1078         /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1079         mark_ref_normal(new_fn->raw);
1080
1081         for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1082              frag; frag = frag_next(frag)) {
1083                 if (frag->ofs > fn->size + fn->ofs)
1084                         break;
1085                 if (frag->node == fn) {
1086                         frag->node = new_fn;
1087                         new_fn->frags++;
1088                         fn->frags--;
1089                 }
1090         }
1091         if (fn->frags) {
1092                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1093                 BUG();
1094         }
1095         if (!new_fn->frags) {
1096                 printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1097                 BUG();
1098         }
1099
1100         jffs2_mark_node_obsolete(c, fn->raw);
1101         jffs2_free_full_dnode(fn);
1102
1103         return 0;
1104 }
1105
1106 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *orig_jeb,
1107                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1108                                        uint32_t start, uint32_t end)
1109 {
1110         struct jffs2_full_dnode *new_fn;
1111         struct jffs2_raw_inode ri;
1112         uint32_t alloclen, offset, orig_end, orig_start;
1113         int ret = 0;
1114         unsigned char *comprbuf = NULL, *writebuf;
1115         unsigned long pg;
1116         unsigned char *pg_ptr;
1117
1118         memset(&ri, 0, sizeof(ri));
1119
1120         D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1121                   f->inocache->ino, start, end));
1122
1123         orig_end = end;
1124         orig_start = start;
1125
1126         if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1127                 /* Attempt to do some merging. But only expand to cover logically
1128                    adjacent frags if the block containing them is already considered
1129                    to be dirty. Otherwise we end up with GC just going round in
1130                    circles dirtying the nodes it already wrote out, especially
1131                    on NAND where we have small eraseblocks and hence a much higher
1132                    chance of nodes having to be split to cross boundaries. */
1133
1134                 struct jffs2_node_frag *frag;
1135                 uint32_t min, max;
1136
1137                 min = start & ~(PAGE_CACHE_SIZE-1);
1138                 max = min + PAGE_CACHE_SIZE;
1139
1140                 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1141
1142                 /* BUG_ON(!frag) but that'll happen anyway... */
1143
1144                 BUG_ON(frag->ofs != start);
1145
1146                 /* First grow down... */
1147                 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1148
1149                         /* If the previous frag doesn't even reach the beginning, there's
1150                            excessive fragmentation. Just merge. */
1151                         if (frag->ofs > min) {
1152                                 D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1153                                           frag->ofs, frag->ofs+frag->size));
1154                                 start = frag->ofs;
1155                                 continue;
1156                         }
1157                         /* OK. This frag holds the first byte of the page. */
1158                         if (!frag->node || !frag->node->raw) {
1159                                 D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1160                                           frag->ofs, frag->ofs+frag->size));
1161                                 break;
1162                         } else {
1163
1164                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1165                                    in a block which is still considered clean? If so, don't obsolete it.
1166                                    If not, cover it anyway. */
1167
1168                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1169                                 struct jffs2_eraseblock *jeb;
1170
1171                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1172
1173                                 if (jeb == c->gcblock) {
1174                                         D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1175                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1176                                         start = frag->ofs;
1177                                         break;
1178                                 }
1179                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1180                                         D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1181                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1182                                         break;
1183                                 }
1184
1185                                 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1186                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1187                                 start = frag->ofs;
1188                                 break;
1189                         }
1190                 }
1191
1192                 /* ... then up */
1193
1194                 /* Find last frag which is actually part of the node we're to GC. */
1195                 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1196
1197                 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1198
1199                         /* If the previous frag doesn't even reach the beginning, there's lots
1200                            of fragmentation. Just merge. */
1201                         if (frag->ofs+frag->size < max) {
1202                                 D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1203                                           frag->ofs, frag->ofs+frag->size));
1204                                 end = frag->ofs + frag->size;
1205                                 continue;
1206                         }
1207
1208                         if (!frag->node || !frag->node->raw) {
1209                                 D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1210                                           frag->ofs, frag->ofs+frag->size));
1211                                 break;
1212                         } else {
1213
1214                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1215                                    in a block which is still considered clean? If so, don't obsolete it.
1216                                    If not, cover it anyway. */
1217
1218                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1219                                 struct jffs2_eraseblock *jeb;
1220
1221                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1222
1223                                 if (jeb == c->gcblock) {
1224                                         D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1225                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1226                                         end = frag->ofs + frag->size;
1227                                         break;
1228                                 }
1229                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1230                                         D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1231                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1232                                         break;
1233                                 }
1234
1235                                 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1236                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1237                                 end = frag->ofs + frag->size;
1238                                 break;
1239                         }
1240                 }
1241                 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1242                           orig_start, orig_end, start, end));
1243
1244                 D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1245                 BUG_ON(end < orig_end);
1246                 BUG_ON(start > orig_start);
1247         }
1248
1249         /* First, use readpage() to read the appropriate page into the page cache */
1250         /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1251          *    triggered garbage collection in the first place?
1252          * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1253          *    page OK. We'll actually write it out again in commit_write, which is a little
1254          *    suboptimal, but at least we're correct.
1255          */
1256         pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1257
1258         if (IS_ERR(pg_ptr)) {
1259                 printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1260                 return PTR_ERR(pg_ptr);
1261         }
1262
1263         offset = start;
1264         while(offset < orig_end) {
1265                 uint32_t datalen;
1266                 uint32_t cdatalen;
1267                 uint16_t comprtype = JFFS2_COMPR_NONE;
1268
1269                 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1270                                         &alloclen, JFFS2_SUMMARY_INODE_SIZE);
1271
1272                 if (ret) {
1273                         printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1274                                sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1275                         break;
1276                 }
1277                 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1278                 datalen = end - offset;
1279
1280                 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1281
1282                 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1283
1284                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1285                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1286                 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1287                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1288
1289                 ri.ino = cpu_to_je32(f->inocache->ino);
1290                 ri.version = cpu_to_je32(++f->highest_version);
1291                 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1292                 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1293                 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1294                 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1295                 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1296                 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1297                 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1298                 ri.offset = cpu_to_je32(offset);
1299                 ri.csize = cpu_to_je32(cdatalen);
1300                 ri.dsize = cpu_to_je32(datalen);
1301                 ri.compr = comprtype & 0xff;
1302                 ri.usercompr = (comprtype >> 8) & 0xff;
1303                 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1304                 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1305
1306                 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1307
1308                 jffs2_free_comprbuf(comprbuf, writebuf);
1309
1310                 if (IS_ERR(new_fn)) {
1311                         printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1312                         ret = PTR_ERR(new_fn);
1313                         break;
1314                 }
1315                 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1316                 offset += datalen;
1317                 if (f->metadata) {
1318                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1319                         jffs2_free_full_dnode(f->metadata);
1320                         f->metadata = NULL;
1321                 }
1322         }
1323
1324         jffs2_gc_release_page(c, pg_ptr, &pg);
1325         return ret;
1326 }