fs/jffs2.org/wbuf.c

   1 /*
   2  * JFFS2 -- Journalling Flash File System, Version 2.
   3  *
   4  * Copyright (C) 2001-2003 Red Hat, Inc.
   5  * Copyright (C) 2004 Thomas Gleixner <tglx@linutronix.de>
   6  *
   7  * Created by David Woodhouse <dwmw2@redhat.com>
   8  * Modified debugged and enhanced by Thomas Gleixner <tglx@linutronix.de>
   9  *
  10  * For licensing information, see the file 'LICENCE' in this directory.
  11  *
  12  * $Id: wbuf.c,v 1.70 2004/07/13 08:58:25 dwmw2 Exp $
  13  *
  14  */
  15
  16 #include <linux/kernel.h>
  17 #include <linux/slab.h>
  18 #include <linux/mtd/mtd.h>
  19 #include <linux/crc32.h>
  20 #include <linux/mtd/nand.h>
  21 #include "nodelist.h"
  22
  23 /* For testing write failures */
  24 #undef BREAKME
  25 #undef BREAKMEHEADER
  26
  27 #ifdef BREAKME
  28 static unsigned char *brokenbuf;
  29 #endif
  30
  31 /* max. erase failures before we mark a block bad */
  32 #define MAX_ERASE_FAILURES      2
  33
  34 /* two seconds timeout for timed wbuf-flushing */
  35 #define WBUF_FLUSH_TIMEOUT      2 * HZ
  36
  37 struct jffs2_inodirty {
  38         uint32_t ino;
  39         struct jffs2_inodirty *next;
  40 };
  41
  42 static struct jffs2_inodirty inodirty_nomem;
  43
  44 static int jffs2_wbuf_pending_for_ino(struct jffs2_sb_info *c, uint32_t ino)
  45 {
  46         struct jffs2_inodirty *this = c->wbuf_inodes;
  47
  48         /* If a malloc failed, consider _everything_ dirty */
  49         if (this == &inodirty_nomem)
  50                 return 1;
  51
  52         /* If ino == 0, _any_ non-GC writes mean 'yes' */
  53         if (this && !ino)
  54                 return 1;
  55
  56         /* Look to see if the inode in question is pending in the wbuf */
  57         while (this) {
  58                 if (this->ino == ino)
  59                         return 1;
  60                 this = this->next;
  61         }
  62         return 0;
  63 }
  64
  65 static void jffs2_clear_wbuf_ino_list(struct jffs2_sb_info *c)
  66 {
  67         struct jffs2_inodirty *this;
  68
  69         this = c->wbuf_inodes;
  70
  71         if (this != &inodirty_nomem) {
  72                 while (this) {
  73                         struct jffs2_inodirty *next = this->next;
  74                         kfree(this);
  75                         this = next;
  76                 }
  77         }
  78         c->wbuf_inodes = NULL;
  79 }
  80
  81 static void jffs2_wbuf_dirties_inode(struct jffs2_sb_info *c, uint32_t ino)
  82 {
  83         struct jffs2_inodirty *new;
  84
  85         /* Mark the superblock dirty so that kupdated will flush... */
  86         OFNI_BS_2SFFJ(c)->s_dirt = 1;
  87
  88         if (jffs2_wbuf_pending_for_ino(c, ino))
  89                 return;
  90
  91         new = kmalloc(sizeof(*new), GFP_KERNEL);
  92         if (!new) {
  93                 D1(printk(KERN_DEBUG "No memory to allocate inodirty. Fallback to all considered dirty\n"));
  94                 jffs2_clear_wbuf_ino_list(c);
  95                 c->wbuf_inodes = &inodirty_nomem;
  96                 return;
  97         }
  98         new->ino = ino;
  99         new->next = c->wbuf_inodes;
 100         c->wbuf_inodes = new;
 101         return;
 102 }
 103
 104 static inline void jffs2_refile_wbuf_blocks(struct jffs2_sb_info *c)
 105 {
 106         struct list_head *this, *next;
 107         static int n;
 108
 109         if (list_empty(&c->erasable_pending_wbuf_list))
 110                 return;
 111
 112         list_for_each_safe(this, next, &c->erasable_pending_wbuf_list) {
 113                 struct jffs2_eraseblock *jeb = list_entry(this, struct jffs2_eraseblock, list);
 114
 115                 D1(printk(KERN_DEBUG "Removing eraseblock at 0x%08x from erasable_pending_wbuf_list...\n", jeb->offset));
 116                 list_del(this);
 117                 if ((jiffies + (n++)) & 127) {
 118                         /* Most of the time, we just erase it immediately. Otherwise we
 119                            spend ages scanning it on mount, etc. */
 120                         D1(printk(KERN_DEBUG "...and adding to erase_pending_list\n"));
 121                         list_add_tail(&jeb->list, &c->erase_pending_list);
 122                         c->nr_erasing_blocks++;
 123                         jffs2_erase_pending_trigger(c);
 124                 } else {
 125                         /* Sometimes, however, we leave it elsewhere so it doesn't get
 126                            immediately reused, and we spread the load a bit. */
 127                         D1(printk(KERN_DEBUG "...and adding to erasable_list\n"));
 128                         list_add_tail(&jeb->list, &c->erasable_list);
 129                 }
 130         }
 131 }
 132
 133 /* Recover from failure to write wbuf. Recover the nodes up to the
 134  * wbuf, not the one which we were starting to try to write. */
 135
 136 static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
 137 {
 138         struct jffs2_eraseblock *jeb, *new_jeb;
 139         struct jffs2_raw_node_ref **first_raw, **raw;
 140         size_t retlen;
 141         int ret;
 142         unsigned char *buf;
 143         uint32_t start, end, ofs, len;
 144
 145         spin_lock(&c->erase_completion_lock);
 146
 147         jeb = &c->blocks[c->wbuf_ofs / c->sector_size];
 148
 149         D1(printk("About to refile bad block at %08x\n", jeb->offset));
 150
 151         D2(jffs2_dump_block_lists(c));
 152         /* File the existing block on the bad_used_list.... */
 153         if (c->nextblock == jeb)
 154                 c->nextblock = NULL;
 155         else /* Not sure this should ever happen... need more coffee */
 156                 list_del(&jeb->list);
 157         if (jeb->first_node) {
 158                 D1(printk("Refiling block at %08x to bad_used_list\n", jeb->offset));
 159                 list_add(&jeb->list, &c->bad_used_list);
 160         } else {
 161                 BUG();
 162                 /* It has to have had some nodes or we couldn't be here */
 163                 D1(printk("Refiling block at %08x to erase_pending_list\n", jeb->offset));
 164                 list_add(&jeb->list, &c->erase_pending_list);
 165                 c->nr_erasing_blocks++;
 166                 jffs2_erase_pending_trigger(c);
 167         }
 168         D2(jffs2_dump_block_lists(c));
 169
 170         /* Adjust its size counts accordingly */
 171         c->wasted_size += jeb->free_size;
 172         c->free_size -= jeb->free_size;
 173         jeb->wasted_size += jeb->free_size;
 174         jeb->free_size = 0;
 175
 176         ACCT_SANITY_CHECK(c,jeb);
 177         D1(ACCT_PARANOIA_CHECK(jeb));
 178
 179         /* Find the first node to be recovered, by skipping over every
 180            node which ends before the wbuf starts, or which is obsolete. */
 181         first_raw = &jeb->first_node;
 182         while (*first_raw &&
 183                (ref_obsolete(*first_raw) ||
 184                 (ref_offset(*first_raw)+ref_totlen(c, jeb, *first_raw)) < c->wbuf_ofs)) {
 185                 D1(printk(KERN_DEBUG "Skipping node at 0x%08x(%d)-0x%08x which is either before 0x%08x or obsolete\n",
 186                           ref_offset(*first_raw), ref_flags(*first_raw),
 187                           (ref_offset(*first_raw) + ref_totlen(c, jeb, *first_raw)),
 188                           c->wbuf_ofs));
 189                 first_raw = &(*first_raw)->next_phys;
 190         }
 191
 192         if (!*first_raw) {
 193                 /* All nodes were obsolete. Nothing to recover. */
 194                 D1(printk(KERN_DEBUG "No non-obsolete nodes to be recovered. Just filing block bad\n"));
 195                 spin_unlock(&c->erase_completion_lock);
 196                 return;
 197         }
 198
 199         start = ref_offset(*first_raw);
 200         end = ref_offset(*first_raw) + ref_totlen(c, jeb, *first_raw);
 201
 202         /* Find the last node to be recovered */
 203         raw = first_raw;
 204         while ((*raw)) {
 205                 if (!ref_obsolete(*raw))
 206                         end = ref_offset(*raw) + ref_totlen(c, jeb, *raw);
 207
 208                 raw = &(*raw)->next_phys;
 209         }
 210         spin_unlock(&c->erase_completion_lock);
 211
 212         D1(printk(KERN_DEBUG "wbuf recover %08x-%08x\n", start, end));
 213
 214         buf = NULL;
 215         if (start < c->wbuf_ofs) {
 216                 /* First affected node was already partially written.
 217                  * Attempt to reread the old data into our buffer. */
 218
 219                 buf = kmalloc(end - start, GFP_KERNEL);
 220                 if (!buf) {
 221                         printk(KERN_CRIT "Malloc failure in wbuf recovery. Data loss ensues.\n");
 222
 223                         goto read_failed;
 224                 }
 225
 226                 /* Do the read... */
 227                 ret = c->mtd->read_ecc(c->mtd, start, c->wbuf_ofs - start, &retlen, buf, NULL, c->oobinfo);
 228                 if (ret == -EIO && retlen == c->wbuf_ofs - start) {
 229                         /* ECC recovered */
 230                         ret = 0;
 231                 }
 232                 if (ret || retlen != c->wbuf_ofs - start) {
 233                         printk(KERN_CRIT "Old data are already lost in wbuf recovery. Data loss ensues.\n");
 234
 235                         kfree(buf);
 236                         buf = NULL;
 237                 read_failed:
 238                         first_raw = &(*first_raw)->next_phys;
 239                         /* If this was the only node to be recovered, give up */
 240                         if (!(*first_raw))
 241                                 return;
 242
 243                         /* It wasn't. Go on and try to recover nodes complete in the wbuf */
 244                         start = ref_offset(*first_raw);
 245                 } else {
 246                         /* Read succeeded. Copy the remaining data from the wbuf */
 247                         memcpy(buf + (c->wbuf_ofs - start), c->wbuf, end - c->wbuf_ofs);
 248                 }
 249         }
 250         /* OK... we're to rewrite (end-start) bytes of data from first_raw onwards.
 251            Either 'buf' contains the data, or we find it in the wbuf */
 252
 253
 254         /* ... and get an allocation of space from a shiny new block instead */
 255         ret = jffs2_reserve_space_gc(c, end-start, &ofs, &len);
 256         if (ret) {
 257                 printk(KERN_WARNING "Failed to allocate space for wbuf recovery. Data loss ensues.\n");
 258                 if (buf)
 259                         kfree(buf);
 260                 return;
 261         }
 262         if (end-start >= c->wbuf_pagesize) {
 263                 /* Need to do another write immediately. This, btw,
 264                  means that we'll be writing from 'buf' and not from
 265                  the wbuf. Since if we're writing from the wbuf there
 266                  won't be more than a wbuf full of data, now will
 267                  there? :) */
 268
 269                 uint32_t towrite = (end-start) - ((end-start)%c->wbuf_pagesize);
 270
 271                 D1(printk(KERN_DEBUG "Write 0x%x bytes at 0x%08x in wbuf recover\n",
 272                           towrite, ofs));
 273
 274 #ifdef BREAKMEHEADER
 275                 static int breakme;
 276                 if (breakme++ == 20) {
 277                         printk(KERN_NOTICE "Faking write error at 0x%08x\n", ofs);
 278                         breakme = 0;
 279                         c->mtd->write_ecc(c->mtd, ofs, towrite, &retlen,
 280                                           brokenbuf, NULL, c->oobinfo);
 281                         ret = -EIO;
 282                 } else
 283 #endif
 284                         ret = c->mtd->write_ecc(c->mtd, ofs, towrite, &retlen,
 285                                                 buf, NULL, c->oobinfo);
 286
 287                 if (ret || retlen != towrite) {
 288                         /* Argh. We tried. Really we did. */
 289                         printk(KERN_CRIT "Recovery of wbuf failed due to a second write error\n");
 290                         kfree(buf);
 291
 292                         if (retlen) {
 293                                 struct jffs2_raw_node_ref *raw2;
 294
 295                                 raw2 = jffs2_alloc_raw_node_ref();
 296                                 if (!raw2)
 297                                         return;
 298
 299                                 raw2->flash_offset = ofs | REF_OBSOLETE;
 300                                 raw2->__totlen = ref_totlen(c, jeb, *first_raw);
 301                                 raw2->next_phys = NULL;
 302                                 raw2->next_in_ino = NULL;
 303
 304                                 jffs2_add_physical_node_ref(c, raw2);
 305                         }
 306                         return;
 307                 }
 308                 printk(KERN_NOTICE "Recovery of wbuf succeeded to %08x\n", ofs);
 309
 310                 c->wbuf_len = (end - start) - towrite;
 311                 c->wbuf_ofs = ofs + towrite;
 312                 memcpy(c->wbuf, buf + towrite, c->wbuf_len);
 313                 /* Don't muck about with c->wbuf_inodes. False positives are harmless. */
 314
 315                 kfree(buf);
 316         } else {
 317                 /* OK, now we're left with the dregs in whichever buffer we're using */
 318                 if (buf) {
 319                         memcpy(c->wbuf, buf, end-start);
 320                         kfree(buf);
 321                 } else {
 322                         memmove(c->wbuf, c->wbuf + (start - c->wbuf_ofs), end - start);
 323                 }
 324                 c->wbuf_ofs = ofs;
 325                 c->wbuf_len = end - start;
 326         }
 327
 328         /* Now sort out the jffs2_raw_node_refs, moving them from the old to the next block */
 329         new_jeb = &c->blocks[ofs / c->sector_size];
 330
 331         spin_lock(&c->erase_completion_lock);
 332         if (new_jeb->first_node) {
 333                 /* Odd, but possible with ST flash later maybe */
 334                 new_jeb->last_node->next_phys = *first_raw;
 335         } else {
 336                 new_jeb->first_node = *first_raw;
 337         }
 338
 339         raw = first_raw;
 340         while (*raw) {
 341                 uint32_t rawlen = ref_totlen(c, jeb, *raw);
 342
 343                 D1(printk(KERN_DEBUG "Refiling block of %08x at %08x(%d) to %08x\n",
 344                           rawlen, ref_offset(*raw), ref_flags(*raw), ofs));
 345
 346                 if (ref_obsolete(*raw)) {
 347                         /* Shouldn't really happen much */
 348                         new_jeb->dirty_size += rawlen;
 349                         new_jeb->free_size -= rawlen;
 350                         c->dirty_size += rawlen;
 351                 } else {
 352                         new_jeb->used_size += rawlen;
 353                         new_jeb->free_size -= rawlen;
 354                         jeb->dirty_size += rawlen;
 355                         jeb->used_size  -= rawlen;
 356                         c->dirty_size += rawlen;
 357                 }
 358                 c->free_size -= rawlen;
 359                 (*raw)->flash_offset = ofs | ref_flags(*raw);
 360                 ofs += rawlen;
 361                 new_jeb->last_node = *raw;
 362
 363                 raw = &(*raw)->next_phys;
 364         }
 365
 366         /* Fix up the original jeb now it's on the bad_list */
 367         *first_raw = NULL;
 368         if (first_raw == &jeb->first_node) {
 369                 jeb->last_node = NULL;
 370                 D1(printk(KERN_DEBUG "Failing block at %08x is now empty. Moving to erase_pending_list\n", jeb->offset));
 371                 list_del(&jeb->list);
 372                 list_add(&jeb->list, &c->erase_pending_list);
 373                 c->nr_erasing_blocks++;
 374                 jffs2_erase_pending_trigger(c);
 375         }
 376         else
 377                 jeb->last_node = container_of(first_raw, struct jffs2_raw_node_ref, next_phys);
 378
 379         ACCT_SANITY_CHECK(c,jeb);
 380         D1(ACCT_PARANOIA_CHECK(jeb));
 381
 382         ACCT_SANITY_CHECK(c,new_jeb);
 383         D1(ACCT_PARANOIA_CHECK(new_jeb));
 384
 385         spin_unlock(&c->erase_completion_lock);
 386
 387         D1(printk(KERN_DEBUG "wbuf recovery completed OK\n"));
 388 }
 389
 390 /* Meaning of pad argument:
 391    0: Do not pad. Probably pointless - we only ever use this when we can't pad anyway.
 392    1: Pad, do not adjust nextblock free_size
 393    2: Pad, adjust nextblock free_size
 394 */
 395 static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
 396 {
 397         int ret;
 398         size_t retlen;
 399
 400         /* Nothing to do if not NAND flash. In particular, we shouldn't
 401            del_timer() the timer we never initialised. */
 402         if (jffs2_can_mark_obsolete(c))
 403                 return 0;
 404
 405         if (!down_trylock(&c->alloc_sem)) {
 406                 up(&c->alloc_sem);
 407                 printk(KERN_CRIT "jffs2_flush_wbuf() called with alloc_sem not locked!\n");
 408                 BUG();
 409         }
 410
 411         if(!c->wbuf || !c->wbuf_len)
 412                 return 0;
 413
 414         /* claim remaining space on the page
 415            this happens, if we have a change to a new block,
 416            or if fsync forces us to flush the writebuffer.
 417            if we have a switch to next page, we will not have
 418            enough remaining space for this.
 419         */
 420         if (pad) {
 421                 c->wbuf_len = PAD(c->wbuf_len);
 422
 423                 if ( c->wbuf_len + sizeof(struct jffs2_unknown_node) < c->wbuf_pagesize) {
 424                         struct jffs2_unknown_node *padnode = (void *)(c->wbuf + c->wbuf_len);
 425                         padnode->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
 426                         padnode->nodetype = cpu_to_je16(JFFS2_NODETYPE_PADDING);
 427                         padnode->totlen = cpu_to_je32(c->wbuf_pagesize - c->wbuf_len);
 428                         padnode->hdr_crc = cpu_to_je32(crc32(0, padnode, sizeof(*padnode)-4));
 429                 } else {
 430                         /* Pad with JFFS2_DIRTY_BITMASK */
 431                         memset(c->wbuf + c->wbuf_len, 0, c->wbuf_pagesize - c->wbuf_len);
 432                 }
 433         }
 434         /* else jffs2_flash_writev has actually filled in the rest of the
 435            buffer for us, and will deal with the node refs etc. later. */
 436
 437 #ifdef BREAKME
 438         static int breakme;
 439         if (breakme++ == 20) {
 440                 printk(KERN_NOTICE "Faking write error at 0x%08x\n", c->wbuf_ofs);
 441                 breakme = 0;
 442                 c->mtd->write_ecc(c->mtd, c->wbuf_ofs, c->wbuf_pagesize,
 443                                         &retlen, brokenbuf, NULL, c->oobinfo);
 444                 ret = -EIO;
 445         } else
 446 #endif
 447         ret = c->mtd->write_ecc(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, c->wbuf, NULL, c->oobinfo);
 448
 449
 450         if (ret || retlen != c->wbuf_pagesize) {
 451                 if (ret)
 452                         printk(KERN_WARNING "jffs2_flush_wbuf(): Write failed with %d\n",ret);
 453                 else {
 454                         printk(KERN_WARNING "jffs2_flush_wbuf(): Write was short: %zd instead of %d\n",
 455                                 retlen, c->wbuf_pagesize);
 456                         ret = -EIO;
 457                 }
 458
 459                 jffs2_wbuf_recover(c);
 460
 461                 return ret;
 462         }
 463
 464         spin_lock(&c->erase_completion_lock);
 465
 466         /* Adjust free size of the block if we padded. */
 467         if (pad) {
 468                 struct jffs2_eraseblock *jeb;
 469
 470                 jeb = &c->blocks[c->wbuf_ofs / c->sector_size];
 471
 472                 D1(printk(KERN_DEBUG "jffs2_flush_wbuf() adjusting free_size of %sblock at %08x\n",
 473                           (jeb==c->nextblock)?"next":"", jeb->offset));
 474
 475                 /* wbuf_pagesize - wbuf_len is the amount of space that's to be
 476                    padded. If there is less free space in the block than that,
 477                    something screwed up */
 478                 if (jeb->free_size < (c->wbuf_pagesize - c->wbuf_len)) {
 479                         printk(KERN_CRIT "jffs2_flush_wbuf(): Accounting error. wbuf at 0x%08x has 0x%03x bytes, 0x%03x left.\n",
 480                                c->wbuf_ofs, c->wbuf_len, c->wbuf_pagesize-c->wbuf_len);
 481                         printk(KERN_CRIT "jffs2_flush_wbuf(): But free_size for block at 0x%08x is only 0x%08x\n",
 482                                jeb->offset, jeb->free_size);
 483                         BUG();
 484                 }
 485                 jeb->free_size -= (c->wbuf_pagesize - c->wbuf_len);
 486                 c->free_size -= (c->wbuf_pagesize - c->wbuf_len);
 487                 jeb->wasted_size += (c->wbuf_pagesize - c->wbuf_len);
 488                 c->wasted_size += (c->wbuf_pagesize - c->wbuf_len);
 489         }
 490
 491         /* Stick any now-obsoleted blocks on the erase_pending_list */
 492         jffs2_refile_wbuf_blocks(c);
 493         jffs2_clear_wbuf_ino_list(c);
 494         spin_unlock(&c->erase_completion_lock);
 495
 496         memset(c->wbuf,0xff,c->wbuf_pagesize);
 497         /* adjust write buffer offset, else we get a non contiguous write bug */
 498         c->wbuf_ofs += c->wbuf_pagesize;
 499         c->wbuf_len = 0;
 500         return 0;
 501 }
 502
 503 /* Trigger garbage collection to flush the write-buffer.
 504    If ino arg is zero, do it if _any_ real (i.e. not GC) writes are
 505    outstanding. If ino arg non-zero, do it only if a write for the
 506    given inode is outstanding. */
 507 int jffs2_flush_wbuf_gc(struct jffs2_sb_info *c, uint32_t ino)
 508 {
 509         uint32_t old_wbuf_ofs;
 510         uint32_t old_wbuf_len;
 511         int ret = 0;
 512
 513         D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() called for ino #%u...\n", ino));
 514
 515         down(&c->alloc_sem);
 516         if (!jffs2_wbuf_pending_for_ino(c, ino)) {
 517                 D1(printk(KERN_DEBUG "Ino #%d not pending in wbuf. Returning\n", ino));
 518                 up(&c->alloc_sem);
 519                 return 0;
 520         }
 521
 522         old_wbuf_ofs = c->wbuf_ofs;
 523         old_wbuf_len = c->wbuf_len;
 524
 525         if (c->unchecked_size) {
 526                 /* GC won't make any progress for a while */
 527                 D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() padding. Not finished checking\n"));
 528                 ret = __jffs2_flush_wbuf(c, 2);
 529         } else while (old_wbuf_len &&
 530                       old_wbuf_ofs == c->wbuf_ofs) {
 531
 532                 up(&c->alloc_sem);
 533
 534                 D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() calls gc pass\n"));
 535
 536                 ret = jffs2_garbage_collect_pass(c);
 537                 if (ret) {
 538                         /* GC failed. Flush it with padding instead */
 539                         down(&c->alloc_sem);
 540                         ret = __jffs2_flush_wbuf(c, 2);
 541                         break;
 542                 }
 543                 down(&c->alloc_sem);
 544         }
 545
 546         D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() ends...\n"));
 547
 548         up(&c->alloc_sem);
 549         return ret;
 550 }
 551
 552 /* Pad write-buffer to end and write it, wasting space. */
 553 int jffs2_flush_wbuf_pad(struct jffs2_sb_info *c)
 554 {
 555         return __jffs2_flush_wbuf(c, 1);
 556 }
 557
 558
 559 #define PAGE_DIV(x) ( (x) & (~(c->wbuf_pagesize - 1)) )
 560 #define PAGE_MOD(x) ( (x) & (c->wbuf_pagesize - 1) )
 561 int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs, unsigned long count, loff_t to, size_t *retlen, uint32_t ino)
 562 {
 563         struct kvec outvecs[3];
 564         uint32_t totlen = 0;
 565         uint32_t split_ofs = 0;
 566         uint32_t old_totlen;
 567         int ret, splitvec = -1;
 568         int invec, outvec;
 569         size_t wbuf_retlen;
 570         unsigned char *wbuf_ptr;
 571         size_t donelen = 0;
 572         uint32_t outvec_to = to;
 573
 574         /* If not NAND flash, don't bother */
 575         if (!c->wbuf)
 576                 return jffs2_flash_direct_writev(c, invecs, count, to, retlen);
 577
 578         /* If wbuf_ofs is not initialized, set it to target address */
 579         if (c->wbuf_ofs == 0xFFFFFFFF) {
 580                 c->wbuf_ofs = PAGE_DIV(to);
 581                 c->wbuf_len = PAGE_MOD(to);
 582                 memset(c->wbuf,0xff,c->wbuf_pagesize);
 583         }
 584
 585         /* Sanity checks on target address.
 586            It's permitted to write at PAD(c->wbuf_len+c->wbuf_ofs),
 587            and it's permitted to write at the beginning of a new
 588            erase block. Anything else, and you die.
 589            New block starts at xxx000c (0-b = block header)
 590         */
 591         if ( (to & ~(c->sector_size-1)) != (c->wbuf_ofs & ~(c->sector_size-1)) ) {
 592                 /* It's a write to a new block */
 593                 if (c->wbuf_len) {
 594                         D1(printk(KERN_DEBUG "jffs2_flash_writev() to 0x%lx causes flush of wbuf at 0x%08x\n", (unsigned long)to, c->wbuf_ofs));
 595                         ret = jffs2_flush_wbuf_pad(c);
 596                         if (ret) {
 597                                 /* the underlying layer has to check wbuf_len to do the cleanup */
 598                                 D1(printk(KERN_WARNING "jffs2_flush_wbuf() called from jffs2_flash_writev() failed %d\n", ret));
 599                                 *retlen = 0;
 600                                 return ret;
 601                         }
 602                 }
 603                 /* set pointer to new block */
 604                 c->wbuf_ofs = PAGE_DIV(to);
 605                 c->wbuf_len = PAGE_MOD(to);
 606         }
 607
 608         if (to != PAD(c->wbuf_ofs + c->wbuf_len)) {
 609                 /* We're not writing immediately after the writebuffer. Bad. */
 610                 printk(KERN_CRIT "jffs2_flash_writev(): Non-contiguous write to %08lx\n", (unsigned long)to);
 611                 if (c->wbuf_len)
 612                         printk(KERN_CRIT "wbuf was previously %08x-%08x\n",
 613                                           c->wbuf_ofs, c->wbuf_ofs+c->wbuf_len);
 614                 BUG();
 615         }
 616
 617         /* Note outvecs[3] above. We know count is never greater than 2 */
 618         if (count > 2) {
 619                 printk(KERN_CRIT "jffs2_flash_writev(): count is %ld\n", count);
 620                 BUG();
 621         }
 622
 623         invec = 0;
 624         outvec = 0;
 625
 626
 627         /* Fill writebuffer first, if already in use */
 628         if (c->wbuf_len) {
 629                 uint32_t invec_ofs = 0;
 630
 631                 /* adjust alignment offset */
 632                 if (c->wbuf_len != PAGE_MOD(to)) {
 633                         c->wbuf_len = PAGE_MOD(to);
 634                         /* take care of alignment to next page */
 635                         if (!c->wbuf_len)
 636                                 c->wbuf_len = c->wbuf_pagesize;
 637                 }
 638
 639                 while(c->wbuf_len < c->wbuf_pagesize) {
 640                         uint32_t thislen;
 641
 642                         if (invec == count)
 643                                 goto alldone;
 644
 645                         thislen = c->wbuf_pagesize - c->wbuf_len;
 646
 647                         if (thislen >= invecs[invec].iov_len)
 648                                 thislen = invecs[invec].iov_len;
 649
 650                         invec_ofs = thislen;
 651
 652                         memcpy(c->wbuf + c->wbuf_len, invecs[invec].iov_base, thislen);
 653                         c->wbuf_len += thislen;
 654                         donelen += thislen;
 655                         /* Get next invec, if actual did not fill the buffer */
 656                         if (c->wbuf_len < c->wbuf_pagesize)
 657                                 invec++;
 658                 }
 659
 660                 /* write buffer is full, flush buffer */
 661                 ret = __jffs2_flush_wbuf(c, 0);
 662                 if (ret) {
 663                         /* the underlying layer has to check wbuf_len to do the cleanup */
 664                         D1(printk(KERN_WARNING "jffs2_flush_wbuf() called from jffs2_flash_writev() failed %d\n", ret));
 665                         /* Retlen zero to make sure our caller doesn't mark the space dirty.
 666                            We've already done everything that's necessary */
 667                         *retlen = 0;
 668                         return ret;
 669                 }
 670                 outvec_to += donelen;
 671                 c->wbuf_ofs = outvec_to;
 672
 673                 /* All invecs done ? */
 674                 if (invec == count)
 675                         goto alldone;
 676
 677                 /* Set up the first outvec, containing the remainder of the
 678                    invec we partially used */
 679                 if (invecs[invec].iov_len > invec_ofs) {
 680                         outvecs[0].iov_base = invecs[invec].iov_base+invec_ofs;
 681                         totlen = outvecs[0].iov_len = invecs[invec].iov_len-invec_ofs;
 682                         if (totlen > c->wbuf_pagesize) {
 683                                 splitvec = outvec;
 684                                 split_ofs = outvecs[0].iov_len - PAGE_MOD(totlen);
 685                         }
 686                         outvec++;
 687                 }
 688                 invec++;
 689         }
 690
 691         /* OK, now we've flushed the wbuf and the start of the bits
 692            we have been asked to write, now to write the rest.... */
 693
 694         /* totlen holds the amount of data still to be written */
 695         old_totlen = totlen;
 696         for ( ; invec < count; invec++,outvec++ ) {
 697                 outvecs[outvec].iov_base = invecs[invec].iov_base;
 698                 totlen += outvecs[outvec].iov_len = invecs[invec].iov_len;
 699                 if (PAGE_DIV(totlen) != PAGE_DIV(old_totlen)) {
 700                         splitvec = outvec;
 701                         split_ofs = outvecs[outvec].iov_len - PAGE_MOD(totlen);
 702                         old_totlen = totlen;
 703                 }
 704         }
 705
 706         /* Now the outvecs array holds all the remaining data to write */
 707         /* Up to splitvec,split_ofs is to be written immediately. The rest
 708            goes into the (now-empty) wbuf */
 709
 710         if (splitvec != -1) {
 711                 uint32_t remainder;
 712                 int ret;
 713
 714                 remainder = outvecs[splitvec].iov_len - split_ofs;
 715                 outvecs[splitvec].iov_len = split_ofs;
 716
 717                 /* We did cross a page boundary, so we write some now */
 718                 ret = c->mtd->writev_ecc(c->mtd, outvecs, splitvec+1, outvec_to, &wbuf_retlen, NULL, c->oobinfo);
 719                 if (ret < 0 || wbuf_retlen != PAGE_DIV(totlen)) {
 720                         /* At this point we have no problem,
 721                            c->wbuf is empty.
 722                         */
 723                         *retlen = donelen;
 724                         return ret;
 725                 }
 726
 727                 donelen += wbuf_retlen;
 728                 c->wbuf_ofs = PAGE_DIV(outvec_to) + PAGE_DIV(totlen);
 729
 730                 if (remainder) {
 731                         outvecs[splitvec].iov_base += split_ofs;
 732                         outvecs[splitvec].iov_len = remainder;
 733                 } else {
 734                         splitvec++;
 735                 }
 736
 737         } else {
 738                 splitvec = 0;
 739         }
 740
 741         /* Now splitvec points to the start of the bits we have to copy
 742            into the wbuf */
 743         wbuf_ptr = c->wbuf;
 744
 745         for ( ; splitvec < outvec; splitvec++) {
 746                 /* Don't copy the wbuf into itself */
 747                 if (outvecs[splitvec].iov_base == c->wbuf)
 748                         continue;
 749                 memcpy(wbuf_ptr, outvecs[splitvec].iov_base, outvecs[splitvec].iov_len);
 750                 wbuf_ptr += outvecs[splitvec].iov_len;
 751                 donelen += outvecs[splitvec].iov_len;
 752         }
 753         c->wbuf_len = wbuf_ptr - c->wbuf;
 754
 755         /* If there's a remainder in the wbuf and it's a non-GC write,
 756            remember that the wbuf affects this ino */
 757 alldone:
 758         *retlen = donelen;
 759
 760         if (c->wbuf_len && ino)
 761                 jffs2_wbuf_dirties_inode(c, ino);
 762
 763         return 0;
 764 }
 765
 766 /*
 767  *      This is the entry for flash write.
 768  *      Check, if we work on NAND FLASH, if so build an kvec and write it via vritev
 769 */
 770 int jffs2_flash_write(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *retlen, const u_char *buf)
 771 {
 772         struct kvec vecs[1];
 773
 774         if (jffs2_can_mark_obsolete(c))
 775                 return c->mtd->write(c->mtd, ofs, len, retlen, buf);
 776
 777         vecs[0].iov_base = (unsigned char *) buf;
 778         vecs[0].iov_len = len;
 779         return jffs2_flash_writev(c, vecs, 1, ofs, retlen, 0);
 780 }
 781
 782 /*
 783         Handle readback from writebuffer and ECC failure return
 784 */
 785 int jffs2_flash_read(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *retlen, u_char *buf)
 786 {
 787         loff_t  orbf = 0, owbf = 0, lwbf = 0;
 788         int     ret;
 789
 790         /* Read flash */
 791         if (!jffs2_can_mark_obsolete(c)) {
 792                 ret = c->mtd->read_ecc(c->mtd, ofs, len, retlen, buf, NULL, c->oobinfo);
 793
 794                 if ( (ret == -EIO) && (*retlen == len) ) {
 795                         printk(KERN_WARNING "mtd->read(0x%zx bytes from 0x%llx) returned ECC error\n",
 796                                len, ofs);
 797                         /*
 798                          * We have the raw data without ECC correction in the buffer, maybe
 799                          * we are lucky and all data or parts are correct. We check the node.
 800                          * If data are corrupted node check will sort it out.
 801                          * We keep this block, it will fail on write or erase and the we
 802                          * mark it bad. Or should we do that now? But we should give him a chance.
 803                          * Maybe we had a system crash or power loss before the ecc write or
 804                          * a erase was completed.
 805                          * So we return success. :)
 806                          */
 807                         ret = 0;
 808                  }
 809         } else
 810                 return c->mtd->read(c->mtd, ofs, len, retlen, buf);
 811
 812         /* if no writebuffer available or write buffer empty, return */
 813         if (!c->wbuf_pagesize || !c->wbuf_len)
 814                 return ret;
 815
 816         /* if we read in a different block, return */
 817         if ( (ofs & ~(c->sector_size-1)) != (c->wbuf_ofs & ~(c->sector_size-1)) )
 818                 return ret;
 819
 820         if (ofs >= c->wbuf_ofs) {
 821                 owbf = (ofs - c->wbuf_ofs);     /* offset in write buffer */
 822                 if (owbf > c->wbuf_len)         /* is read beyond write buffer ? */
 823                         return ret;
 824                 lwbf = c->wbuf_len - owbf;      /* number of bytes to copy */
 825                 if (lwbf > len)
 826                         lwbf = len;
 827         } else {
 828                 orbf = (c->wbuf_ofs - ofs);     /* offset in read buffer */
 829                 if (orbf > len)                 /* is write beyond write buffer ? */
 830                         return ret;
 831                 lwbf = len - orbf;              /* number of bytes to copy */
 832                 if (lwbf > c->wbuf_len)
 833                         lwbf = c->wbuf_len;
 834         }
 835         if (lwbf > 0)
 836                 memcpy(buf+orbf,c->wbuf+owbf,lwbf);
 837
 838         return ret;
 839 }
 840
 841 /*
 842  *      Check, if the out of band area is empty
 843  */
 844 int jffs2_check_oob_empty( struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, int mode)
 845 {
 846         unsigned char *buf;
 847         int     ret = 0;
 848         int     i,len,page;
 849         size_t  retlen;
 850         int     oob_size;
 851
 852         /* allocate a buffer for all oob data in this sector */
 853         oob_size = c->mtd->oobsize;
 854         len = 4 * oob_size;
 855         buf = kmalloc(len, GFP_KERNEL);
 856         if (!buf) {
 857                 printk(KERN_NOTICE "jffs2_check_oob_empty(): allocation of temporary data buffer for oob check failed\n");
 858                 return -ENOMEM;
 859         }
 860         /*
 861          * if mode = 0, we scan for a total empty oob area, else we have
 862          * to take care of the cleanmarker in the first page of the block
 863         */
 864         ret = jffs2_flash_read_oob(c, jeb->offset, len , &retlen, buf);
 865         if (ret) {
 866                 D1(printk(KERN_WARNING "jffs2_check_oob_empty(): Read OOB failed %d for block at %08x\n", ret, jeb->offset));
 867                 goto out;
 868         }
 869
 870         if (retlen < len) {
 871                 D1(printk(KERN_WARNING "jffs2_check_oob_empty(): Read OOB return short read "
 872                           "(%zd bytes not %d) for block at %08x\n", retlen, len, jeb->offset));
 873                 ret = -EIO;
 874                 goto out;
 875         }
 876
 877         /* Special check for first page */
 878         for(i = 0; i < oob_size ; i++) {
 879                 /* Yeah, we know about the cleanmarker. */
 880                 if (mode && i >= c->fsdata_pos &&
 881                     i < c->fsdata_pos + c->fsdata_len)
 882                         continue;
 883
 884                 if (buf[i] != 0xFF) {
 885                         D2(printk(KERN_DEBUG "Found %02x at %x in OOB for %08x\n",
 886                                   buf[page+i], page+i, jeb->offset));
 887                         ret = 1;
 888                         goto out;
 889                 }
 890         }
 891
 892         /* we know, we are aligned :) */
 893         for (page = oob_size; page < len; page += sizeof(long)) {
 894                 unsigned long dat = *(unsigned long *)(&buf[page]);
 895                 if(dat != -1) {
 896                         ret = 1;
 897                         goto out;
 898                 }
 899         }
 900
 901 out:
 902         kfree(buf);
 903
 904         return ret;
 905 }
 906
 907 /*
 908 *       Scan for a valid cleanmarker and for bad blocks
 909 *       For virtual blocks (concatenated physical blocks) check the cleanmarker
 910 *       only in the first page of the first physical block, but scan for bad blocks in all
 911 *       physical blocks
 912 */
 913 int jffs2_check_nand_cleanmarker (struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
 914 {
 915         struct jffs2_unknown_node n;
 916         unsigned char buf[2 * NAND_MAX_OOBSIZE];
 917         unsigned char *p;
 918         int ret, i, cnt, retval = 0;
 919         size_t retlen, offset;
 920         int oob_size;
 921
 922         offset = jeb->offset;
 923         oob_size = c->mtd->oobsize;
 924
 925         /* Loop through the physical blocks */
 926         for (cnt = 0; cnt < (c->sector_size / c->mtd->erasesize); cnt++) {
 927                 /* Check first if the block is bad. */
 928                 if (c->mtd->block_isbad (c->mtd, offset)) {
 929                         D1 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): Bad block at %08x\n", jeb->offset));
 930                         return 2;
 931                 }
 932                 /*
 933                    *    We read oob data from page 0 and 1 of the block.
 934                    *    page 0 contains cleanmarker and badblock info
 935                    *    page 1 contains failure count of this block
 936                  */
 937                 ret = c->mtd->read_oob (c->mtd, offset, oob_size << 1, &retlen, buf);
 938
 939                 if (ret) {
 940                         D1 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): Read OOB failed %d for block at %08x\n", ret, jeb->offset));
 941                         return ret;
 942                 }
 943                 if (retlen < (oob_size << 1)) {
 944                         D1 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): Read OOB return short read (%zd bytes not %d) for block at %08x\n", retlen, oob_size << 1, jeb->offset));
 945                         return -EIO;
 946                 }
 947
 948                 /* Check cleanmarker only on the first physical block */
 949                 if (!cnt) {
 950                         n.magic = cpu_to_je16 (JFFS2_MAGIC_BITMASK);
 951                         n.nodetype = cpu_to_je16 (JFFS2_NODETYPE_CLEANMARKER);
 952                         n.totlen = cpu_to_je32 (8);
 953                         p = (unsigned char *) &n;
 954
 955                         for (i = 0; i < c->fsdata_len; i++) {
 956                                 if (buf[c->fsdata_pos + i] != p[i]) {
 957                                         retval = 1;
 958                                 }
 959                         }
 960                         D1(if (retval == 1) {
 961                                 printk(KERN_WARNING "jffs2_check_nand_cleanmarker(): Cleanmarker node not detected in block at %08x\n", jeb->offset);
 962                                 printk(KERN_WARNING "OOB at %08x was ", offset);
 963                                 for (i=0; i < oob_size; i++) {
 964                                         printk("%02x ", buf[i]);
 965                                 }
 966                                 printk("\n");
 967                         })
 968                 }
 969                 offset += c->mtd->erasesize;
 970         }
 971         return retval;
 972 }
 973
 974 int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb)
 975 {
 976         struct  jffs2_unknown_node n;
 977         int     ret;
 978         size_t  retlen;
 979
 980         n.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
 981         n.nodetype = cpu_to_je16(JFFS2_NODETYPE_CLEANMARKER);
 982         n.totlen = cpu_to_je32(8);
 983
 984         ret = jffs2_flash_write_oob(c, jeb->offset + c->fsdata_pos, c->fsdata_len, &retlen, (unsigned char *)&n);
 985
 986         if (ret) {
 987                 D1(printk(KERN_WARNING "jffs2_write_nand_cleanmarker(): Write failed for block at %08x: error %d\n", jeb->offset, ret));
 988                 return ret;
 989         }
 990         if (retlen != c->fsdata_len) {
 991                 D1(printk(KERN_WARNING "jffs2_write_nand_cleanmarker(): Short write for block at %08x: %zd not %d\n", jeb->offset, retlen, c->fsdata_len));
 992                 return ret;
 993         }
 994         return 0;
 995 }
 996
 997 /*
 998  * On NAND we try to mark this block bad. If the block was erased more
 999  * than MAX_ERASE_FAILURES we mark it finaly bad.
1000  * Don't care about failures. This block remains on the erase-pending
1001  * or badblock list as long as nobody manipulates the flash with
1002  * a bootloader or something like that.
1003  */
1004
1005 int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t bad_offset)
1006 {
1007         int     ret;
1008
1009         /* if the count is < max, we try to write the counter to the 2nd page oob area */
1010         if( ++jeb->bad_count < MAX_ERASE_FAILURES)
1011                 return 0;
1012
1013         if (!c->mtd->block_markbad)
1014                 return 1; // What else can we do?
1015
1016         D1(printk(KERN_WARNING "jffs2_write_nand_badblock(): Marking bad block at %08x\n", bad_offset));
1017         ret = c->mtd->block_markbad(c->mtd, bad_offset);
1018
1019         if (ret) {
1020                 D1(printk(KERN_WARNING "jffs2_write_nand_badblock(): Write failed for block at %08x: error %d\n", jeb->offset, ret));
1021                 return ret;
1022         }
1023         return 1;
1024 }
1025
1026 #define NAND_JFFS2_OOB16_FSDALEN        8
1027
1028 static struct nand_oobinfo jffs2_oobinfo_docecc = {
1029         .useecc = MTD_NANDECC_PLACE,
1030         .eccbytes = 6,
1031         .eccpos = {0,1,2,3,4,5}
1032 };
1033
1034
1035 int jffs2_nand_set_oobinfo(struct jffs2_sb_info *c)
1036 {
1037         struct nand_oobinfo *oinfo = &c->mtd->oobinfo;
1038
1039         /* Do this only, if we have an oob buffer */
1040         if (!c->mtd->oobsize)
1041                 return 0;
1042
1043         /* Cleanmarker is out-of-band, so inline size zero */
1044         c->cleanmarker_size = 0;
1045
1046         /* Should we use autoplacement ? */
1047         if (oinfo && oinfo->useecc == MTD_NANDECC_AUTOPLACE) {
1048                 D1(printk(KERN_DEBUG "JFFS2 using autoplace on NAND\n"));
1049                 /* Get the position of the free bytes */
1050                 if (!oinfo->oobfree[0][0]) {
1051                         printk (KERN_WARNING "jffs2_nand_set_oobinfo(): Eeep. Autoplacement selected and no empty space in oob\n");
1052                         return -ENOSPC;
1053                 }
1054                 c->fsdata_pos = oinfo->oobfree[0][0];
1055                 c->fsdata_len = oinfo->oobfree[0][1];
1056                 if (c->fsdata_len > 8)
1057                         c->fsdata_len = 8;
1058         } else {
1059                 /* This is just a legacy fallback and should go away soon */
1060                 switch(c->mtd->ecctype) {
1061                 case MTD_ECC_RS_DiskOnChip:
1062                         printk(KERN_WARNING "JFFS2 using DiskOnChip hardware ECC without autoplacement. Fix it!\n");
1063                         c->oobinfo = &jffs2_oobinfo_docecc;
1064                         c->fsdata_pos = 6;
1065                         c->fsdata_len = NAND_JFFS2_OOB16_FSDALEN;
1066                         c->badblock_pos = 15;
1067                         break;
1068
1069                 default:
1070                         D1(printk(KERN_DEBUG "JFFS2 on NAND. No autoplacment info found\n"));
1071                         return -EINVAL;
1072                 }
1073         }
1074         return 0;
1075 }
1076
1077 int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
1078 {
1079         int res;
1080
1081         /* Initialise write buffer */
1082         c->wbuf_pagesize = c->mtd->oobblock;
1083         c->wbuf_ofs = 0xFFFFFFFF;
1084
1085
1086         c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
1087         if (!c->wbuf)
1088                 return -ENOMEM;
1089
1090         res = jffs2_nand_set_oobinfo(c);
1091
1092 #ifdef BREAKME
1093         if (!brokenbuf)
1094                 brokenbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
1095         if (!brokenbuf) {
1096                 kfree(c->wbuf);
1097                 return -ENOMEM;
1098         }
1099         memset(brokenbuf, 0xdb, c->wbuf_pagesize);
1100 #endif
1101         return res;
1102 }
1103
1104 void jffs2_nand_flash_cleanup(struct jffs2_sb_info *c)
1105 {
1106         kfree(c->wbuf);
1107 }