[TCP]: TCP_CONG_YEAH requires TCP_CONG_VEGAS
[linux-2.6/verdex.git] / fs / jffs2 / wbuf.c
blob91d1d0f1c66c72c1d0c925d383f8e2a02973e2c4
1 /*
2 * JFFS2 -- Journalling Flash File System, Version 2.
4 * Copyright © 2001-2007 Red Hat, Inc.
5 * Copyright © 2004 Thomas Gleixner <tglx@linutronix.de>
7 * Created by David Woodhouse <dwmw2@infradead.org>
8 * Modified debugged and enhanced by Thomas Gleixner <tglx@linutronix.de>
10 * For licensing information, see the file 'LICENCE' in this directory.
14 #include <linux/kernel.h>
15 #include <linux/slab.h>
16 #include <linux/mtd/mtd.h>
17 #include <linux/crc32.h>
18 #include <linux/mtd/nand.h>
19 #include <linux/jiffies.h>
20 #include <linux/sched.h>
22 #include "nodelist.h"
24 /* For testing write failures */
25 #undef BREAKME
26 #undef BREAKMEHEADER
28 #ifdef BREAKME
29 static unsigned char *brokenbuf;
30 #endif
32 #define PAGE_DIV(x) ( ((unsigned long)(x) / (unsigned long)(c->wbuf_pagesize)) * (unsigned long)(c->wbuf_pagesize) )
33 #define PAGE_MOD(x) ( (unsigned long)(x) % (unsigned long)(c->wbuf_pagesize) )
35 /* max. erase failures before we mark a block bad */
36 #define MAX_ERASE_FAILURES 2
38 struct jffs2_inodirty {
39 uint32_t ino;
40 struct jffs2_inodirty *next;
43 static struct jffs2_inodirty inodirty_nomem;
45 static int jffs2_wbuf_pending_for_ino(struct jffs2_sb_info *c, uint32_t ino)
47 struct jffs2_inodirty *this = c->wbuf_inodes;
49 /* If a malloc failed, consider _everything_ dirty */
50 if (this == &inodirty_nomem)
51 return 1;
53 /* If ino == 0, _any_ non-GC writes mean 'yes' */
54 if (this && !ino)
55 return 1;
57 /* Look to see if the inode in question is pending in the wbuf */
58 while (this) {
59 if (this->ino == ino)
60 return 1;
61 this = this->next;
63 return 0;
66 static void jffs2_clear_wbuf_ino_list(struct jffs2_sb_info *c)
68 struct jffs2_inodirty *this;
70 this = c->wbuf_inodes;
72 if (this != &inodirty_nomem) {
73 while (this) {
74 struct jffs2_inodirty *next = this->next;
75 kfree(this);
76 this = next;
79 c->wbuf_inodes = NULL;
82 static void jffs2_wbuf_dirties_inode(struct jffs2_sb_info *c, uint32_t ino)
84 struct jffs2_inodirty *new;
86 /* Mark the superblock dirty so that kupdated will flush... */
87 jffs2_erase_pending_trigger(c);
89 if (jffs2_wbuf_pending_for_ino(c, ino))
90 return;
92 new = kmalloc(sizeof(*new), GFP_KERNEL);
93 if (!new) {
94 D1(printk(KERN_DEBUG "No memory to allocate inodirty. Fallback to all considered dirty\n"));
95 jffs2_clear_wbuf_ino_list(c);
96 c->wbuf_inodes = &inodirty_nomem;
97 return;
99 new->ino = ino;
100 new->next = c->wbuf_inodes;
101 c->wbuf_inodes = new;
102 return;
105 static inline void jffs2_refile_wbuf_blocks(struct jffs2_sb_info *c)
107 struct list_head *this, *next;
108 static int n;
110 if (list_empty(&c->erasable_pending_wbuf_list))
111 return;
113 list_for_each_safe(this, next, &c->erasable_pending_wbuf_list) {
114 struct jffs2_eraseblock *jeb = list_entry(this, struct jffs2_eraseblock, list);
116 D1(printk(KERN_DEBUG "Removing eraseblock at 0x%08x from erasable_pending_wbuf_list...\n", jeb->offset));
117 list_del(this);
118 if ((jiffies + (n++)) & 127) {
119 /* Most of the time, we just erase it immediately. Otherwise we
120 spend ages scanning it on mount, etc. */
121 D1(printk(KERN_DEBUG "...and adding to erase_pending_list\n"));
122 list_add_tail(&jeb->list, &c->erase_pending_list);
123 c->nr_erasing_blocks++;
124 jffs2_erase_pending_trigger(c);
125 } else {
126 /* Sometimes, however, we leave it elsewhere so it doesn't get
127 immediately reused, and we spread the load a bit. */
128 D1(printk(KERN_DEBUG "...and adding to erasable_list\n"));
129 list_add_tail(&jeb->list, &c->erasable_list);
134 #define REFILE_NOTEMPTY 0
135 #define REFILE_ANYWAY 1
137 static void jffs2_block_refile(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, int allow_empty)
139 D1(printk("About to refile bad block at %08x\n", jeb->offset));
141 /* File the existing block on the bad_used_list.... */
142 if (c->nextblock == jeb)
143 c->nextblock = NULL;
144 else /* Not sure this should ever happen... need more coffee */
145 list_del(&jeb->list);
146 if (jeb->first_node) {
147 D1(printk("Refiling block at %08x to bad_used_list\n", jeb->offset));
148 list_add(&jeb->list, &c->bad_used_list);
149 } else {
150 BUG_ON(allow_empty == REFILE_NOTEMPTY);
151 /* It has to have had some nodes or we couldn't be here */
152 D1(printk("Refiling block at %08x to erase_pending_list\n", jeb->offset));
153 list_add(&jeb->list, &c->erase_pending_list);
154 c->nr_erasing_blocks++;
155 jffs2_erase_pending_trigger(c);
158 if (!jffs2_prealloc_raw_node_refs(c, jeb, 1)) {
159 uint32_t oldfree = jeb->free_size;
161 jffs2_link_node_ref(c, jeb,
162 (jeb->offset+c->sector_size-oldfree) | REF_OBSOLETE,
163 oldfree, NULL);
164 /* convert to wasted */
165 c->wasted_size += oldfree;
166 jeb->wasted_size += oldfree;
167 c->dirty_size -= oldfree;
168 jeb->dirty_size -= oldfree;
171 jffs2_dbg_dump_block_lists_nolock(c);
172 jffs2_dbg_acct_sanity_check_nolock(c,jeb);
173 jffs2_dbg_acct_paranoia_check_nolock(c, jeb);
176 static struct jffs2_raw_node_ref **jffs2_incore_replace_raw(struct jffs2_sb_info *c,
177 struct jffs2_inode_info *f,
178 struct jffs2_raw_node_ref *raw,
179 union jffs2_node_union *node)
181 struct jffs2_node_frag *frag;
182 struct jffs2_full_dirent *fd;
184 dbg_noderef("incore_replace_raw: node at %p is {%04x,%04x}\n",
185 node, je16_to_cpu(node->u.magic), je16_to_cpu(node->u.nodetype));
187 BUG_ON(je16_to_cpu(node->u.magic) != 0x1985 &&
188 je16_to_cpu(node->u.magic) != 0);
190 switch (je16_to_cpu(node->u.nodetype)) {
191 case JFFS2_NODETYPE_INODE:
192 if (f->metadata && f->metadata->raw == raw) {
193 dbg_noderef("Will replace ->raw in f->metadata at %p\n", f->metadata);
194 return &f->metadata->raw;
196 frag = jffs2_lookup_node_frag(&f->fragtree, je32_to_cpu(node->i.offset));
197 BUG_ON(!frag);
198 /* Find a frag which refers to the full_dnode we want to modify */
199 while (!frag->node || frag->node->raw != raw) {
200 frag = frag_next(frag);
201 BUG_ON(!frag);
203 dbg_noderef("Will replace ->raw in full_dnode at %p\n", frag->node);
204 return &frag->node->raw;
206 case JFFS2_NODETYPE_DIRENT:
207 for (fd = f->dents; fd; fd = fd->next) {
208 if (fd->raw == raw) {
209 dbg_noderef("Will replace ->raw in full_dirent at %p\n", fd);
210 return &fd->raw;
213 BUG();
215 default:
216 dbg_noderef("Don't care about replacing raw for nodetype %x\n",
217 je16_to_cpu(node->u.nodetype));
218 break;
220 return NULL;
223 /* Recover from failure to write wbuf. Recover the nodes up to the
224 * wbuf, not the one which we were starting to try to write. */
226 static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
228 struct jffs2_eraseblock *jeb, *new_jeb;
229 struct jffs2_raw_node_ref *raw, *next, *first_raw = NULL;
230 size_t retlen;
231 int ret;
232 int nr_refile = 0;
233 unsigned char *buf;
234 uint32_t start, end, ofs, len;
236 jeb = &c->blocks[c->wbuf_ofs / c->sector_size];
238 spin_lock(&c->erase_completion_lock);
239 if (c->wbuf_ofs % c->mtd->erasesize)
240 jffs2_block_refile(c, jeb, REFILE_NOTEMPTY);
241 else
242 jffs2_block_refile(c, jeb, REFILE_ANYWAY);
243 spin_unlock(&c->erase_completion_lock);
245 BUG_ON(!ref_obsolete(jeb->last_node));
247 /* Find the first node to be recovered, by skipping over every
248 node which ends before the wbuf starts, or which is obsolete. */
249 for (next = raw = jeb->first_node; next; raw = next) {
250 next = ref_next(raw);
252 if (ref_obsolete(raw) ||
253 (next && ref_offset(next) <= c->wbuf_ofs)) {
254 dbg_noderef("Skipping node at 0x%08x(%d)-0x%08x which is either before 0x%08x or obsolete\n",
255 ref_offset(raw), ref_flags(raw),
256 (ref_offset(raw) + ref_totlen(c, jeb, raw)),
257 c->wbuf_ofs);
258 continue;
260 dbg_noderef("First node to be recovered is at 0x%08x(%d)-0x%08x\n",
261 ref_offset(raw), ref_flags(raw),
262 (ref_offset(raw) + ref_totlen(c, jeb, raw)));
264 first_raw = raw;
265 break;
268 if (!first_raw) {
269 /* All nodes were obsolete. Nothing to recover. */
270 D1(printk(KERN_DEBUG "No non-obsolete nodes to be recovered. Just filing block bad\n"));
271 c->wbuf_len = 0;
272 return;
275 start = ref_offset(first_raw);
276 end = ref_offset(jeb->last_node);
277 nr_refile = 1;
279 /* Count the number of refs which need to be copied */
280 while ((raw = ref_next(raw)) != jeb->last_node)
281 nr_refile++;
283 dbg_noderef("wbuf recover %08x-%08x (%d bytes in %d nodes)\n",
284 start, end, end - start, nr_refile);
286 buf = NULL;
287 if (start < c->wbuf_ofs) {
288 /* First affected node was already partially written.
289 * Attempt to reread the old data into our buffer. */
291 buf = kmalloc(end - start, GFP_KERNEL);
292 if (!buf) {
293 printk(KERN_CRIT "Malloc failure in wbuf recovery. Data loss ensues.\n");
295 goto read_failed;
298 /* Do the read... */
299 ret = c->mtd->read(c->mtd, start, c->wbuf_ofs - start, &retlen, buf);
301 /* ECC recovered ? */
302 if ((ret == -EUCLEAN || ret == -EBADMSG) &&
303 (retlen == c->wbuf_ofs - start))
304 ret = 0;
306 if (ret || retlen != c->wbuf_ofs - start) {
307 printk(KERN_CRIT "Old data are already lost in wbuf recovery. Data loss ensues.\n");
309 kfree(buf);
310 buf = NULL;
311 read_failed:
312 first_raw = ref_next(first_raw);
313 nr_refile--;
314 while (first_raw && ref_obsolete(first_raw)) {
315 first_raw = ref_next(first_raw);
316 nr_refile--;
319 /* If this was the only node to be recovered, give up */
320 if (!first_raw) {
321 c->wbuf_len = 0;
322 return;
325 /* It wasn't. Go on and try to recover nodes complete in the wbuf */
326 start = ref_offset(first_raw);
327 dbg_noderef("wbuf now recover %08x-%08x (%d bytes in %d nodes)\n",
328 start, end, end - start, nr_refile);
330 } else {
331 /* Read succeeded. Copy the remaining data from the wbuf */
332 memcpy(buf + (c->wbuf_ofs - start), c->wbuf, end - c->wbuf_ofs);
335 /* OK... we're to rewrite (end-start) bytes of data from first_raw onwards.
336 Either 'buf' contains the data, or we find it in the wbuf */
338 /* ... and get an allocation of space from a shiny new block instead */
339 ret = jffs2_reserve_space_gc(c, end-start, &len, JFFS2_SUMMARY_NOSUM_SIZE);
340 if (ret) {
341 printk(KERN_WARNING "Failed to allocate space for wbuf recovery. Data loss ensues.\n");
342 kfree(buf);
343 return;
346 /* The summary is not recovered, so it must be disabled for this erase block */
347 jffs2_sum_disable_collecting(c->summary);
349 ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, nr_refile);
350 if (ret) {
351 printk(KERN_WARNING "Failed to allocate node refs for wbuf recovery. Data loss ensues.\n");
352 kfree(buf);
353 return;
356 ofs = write_ofs(c);
358 if (end-start >= c->wbuf_pagesize) {
359 /* Need to do another write immediately, but it's possible
360 that this is just because the wbuf itself is completely
361 full, and there's nothing earlier read back from the
362 flash. Hence 'buf' isn't necessarily what we're writing
363 from. */
364 unsigned char *rewrite_buf = buf?:c->wbuf;
365 uint32_t towrite = (end-start) - ((end-start)%c->wbuf_pagesize);
367 D1(printk(KERN_DEBUG "Write 0x%x bytes at 0x%08x in wbuf recover\n",
368 towrite, ofs));
370 #ifdef BREAKMEHEADER
371 static int breakme;
372 if (breakme++ == 20) {
373 printk(KERN_NOTICE "Faking write error at 0x%08x\n", ofs);
374 breakme = 0;
375 c->mtd->write(c->mtd, ofs, towrite, &retlen,
376 brokenbuf);
377 ret = -EIO;
378 } else
379 #endif
380 ret = c->mtd->write(c->mtd, ofs, towrite, &retlen,
381 rewrite_buf);
383 if (ret || retlen != towrite) {
384 /* Argh. We tried. Really we did. */
385 printk(KERN_CRIT "Recovery of wbuf failed due to a second write error\n");
386 kfree(buf);
388 if (retlen)
389 jffs2_add_physical_node_ref(c, ofs | REF_OBSOLETE, ref_totlen(c, jeb, first_raw), NULL);
391 return;
393 printk(KERN_NOTICE "Recovery of wbuf succeeded to %08x\n", ofs);
395 c->wbuf_len = (end - start) - towrite;
396 c->wbuf_ofs = ofs + towrite;
397 memmove(c->wbuf, rewrite_buf + towrite, c->wbuf_len);
398 /* Don't muck about with c->wbuf_inodes. False positives are harmless. */
399 } else {
400 /* OK, now we're left with the dregs in whichever buffer we're using */
401 if (buf) {
402 memcpy(c->wbuf, buf, end-start);
403 } else {
404 memmove(c->wbuf, c->wbuf + (start - c->wbuf_ofs), end - start);
406 c->wbuf_ofs = ofs;
407 c->wbuf_len = end - start;
410 /* Now sort out the jffs2_raw_node_refs, moving them from the old to the next block */
411 new_jeb = &c->blocks[ofs / c->sector_size];
413 spin_lock(&c->erase_completion_lock);
414 for (raw = first_raw; raw != jeb->last_node; raw = ref_next(raw)) {
415 uint32_t rawlen = ref_totlen(c, jeb, raw);
416 struct jffs2_inode_cache *ic;
417 struct jffs2_raw_node_ref *new_ref;
418 struct jffs2_raw_node_ref **adjust_ref = NULL;
419 struct jffs2_inode_info *f = NULL;
421 D1(printk(KERN_DEBUG "Refiling block of %08x at %08x(%d) to %08x\n",
422 rawlen, ref_offset(raw), ref_flags(raw), ofs));
424 ic = jffs2_raw_ref_to_ic(raw);
426 /* Ick. This XATTR mess should be fixed shortly... */
427 if (ic && ic->class == RAWNODE_CLASS_XATTR_DATUM) {
428 struct jffs2_xattr_datum *xd = (void *)ic;
429 BUG_ON(xd->node != raw);
430 adjust_ref = &xd->node;
431 raw->next_in_ino = NULL;
432 ic = NULL;
433 } else if (ic && ic->class == RAWNODE_CLASS_XATTR_REF) {
434 struct jffs2_xattr_datum *xr = (void *)ic;
435 BUG_ON(xr->node != raw);
436 adjust_ref = &xr->node;
437 raw->next_in_ino = NULL;
438 ic = NULL;
439 } else if (ic && ic->class == RAWNODE_CLASS_INODE_CACHE) {
440 struct jffs2_raw_node_ref **p = &ic->nodes;
442 /* Remove the old node from the per-inode list */
443 while (*p && *p != (void *)ic) {
444 if (*p == raw) {
445 (*p) = (raw->next_in_ino);
446 raw->next_in_ino = NULL;
447 break;
449 p = &((*p)->next_in_ino);
452 if (ic->state == INO_STATE_PRESENT && !ref_obsolete(raw)) {
453 /* If it's an in-core inode, then we have to adjust any
454 full_dirent or full_dnode structure to point to the
455 new version instead of the old */
456 f = jffs2_gc_fetch_inode(c, ic->ino, ic->nlink);
457 if (IS_ERR(f)) {
458 /* Should never happen; it _must_ be present */
459 JFFS2_ERROR("Failed to iget() ino #%u, err %ld\n",
460 ic->ino, PTR_ERR(f));
461 BUG();
463 /* We don't lock f->sem. There's a number of ways we could
464 end up in here with it already being locked, and nobody's
465 going to modify it on us anyway because we hold the
466 alloc_sem. We're only changing one ->raw pointer too,
467 which we can get away with without upsetting readers. */
468 adjust_ref = jffs2_incore_replace_raw(c, f, raw,
469 (void *)(buf?:c->wbuf) + (ref_offset(raw) - start));
470 } else if (unlikely(ic->state != INO_STATE_PRESENT &&
471 ic->state != INO_STATE_CHECKEDABSENT &&
472 ic->state != INO_STATE_GC)) {
473 JFFS2_ERROR("Inode #%u is in strange state %d!\n", ic->ino, ic->state);
474 BUG();
478 new_ref = jffs2_link_node_ref(c, new_jeb, ofs | ref_flags(raw), rawlen, ic);
480 if (adjust_ref) {
481 BUG_ON(*adjust_ref != raw);
482 *adjust_ref = new_ref;
484 if (f)
485 jffs2_gc_release_inode(c, f);
487 if (!ref_obsolete(raw)) {
488 jeb->dirty_size += rawlen;
489 jeb->used_size -= rawlen;
490 c->dirty_size += rawlen;
491 c->used_size -= rawlen;
492 raw->flash_offset = ref_offset(raw) | REF_OBSOLETE;
493 BUG_ON(raw->next_in_ino);
495 ofs += rawlen;
498 kfree(buf);
500 /* Fix up the original jeb now it's on the bad_list */
501 if (first_raw == jeb->first_node) {
502 D1(printk(KERN_DEBUG "Failing block at %08x is now empty. Moving to erase_pending_list\n", jeb->offset));
503 list_move(&jeb->list, &c->erase_pending_list);
504 c->nr_erasing_blocks++;
505 jffs2_erase_pending_trigger(c);
508 jffs2_dbg_acct_sanity_check_nolock(c, jeb);
509 jffs2_dbg_acct_paranoia_check_nolock(c, jeb);
511 jffs2_dbg_acct_sanity_check_nolock(c, new_jeb);
512 jffs2_dbg_acct_paranoia_check_nolock(c, new_jeb);
514 spin_unlock(&c->erase_completion_lock);
516 D1(printk(KERN_DEBUG "wbuf recovery completed OK. wbuf_ofs 0x%08x, len 0x%x\n", c->wbuf_ofs, c->wbuf_len));
520 /* Meaning of pad argument:
521 0: Do not pad. Probably pointless - we only ever use this when we can't pad anyway.
522 1: Pad, do not adjust nextblock free_size
523 2: Pad, adjust nextblock free_size
525 #define NOPAD 0
526 #define PAD_NOACCOUNT 1
527 #define PAD_ACCOUNTING 2
529 static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
531 struct jffs2_eraseblock *wbuf_jeb;
532 int ret;
533 size_t retlen;
535 /* Nothing to do if not write-buffering the flash. In particular, we shouldn't
536 del_timer() the timer we never initialised. */
537 if (!jffs2_is_writebuffered(c))
538 return 0;
540 if (!down_trylock(&c->alloc_sem)) {
541 up(&c->alloc_sem);
542 printk(KERN_CRIT "jffs2_flush_wbuf() called with alloc_sem not locked!\n");
543 BUG();
546 if (!c->wbuf_len) /* already checked c->wbuf above */
547 return 0;
549 wbuf_jeb = &c->blocks[c->wbuf_ofs / c->sector_size];
550 if (jffs2_prealloc_raw_node_refs(c, wbuf_jeb, c->nextblock->allocated_refs + 1))
551 return -ENOMEM;
553 /* claim remaining space on the page
554 this happens, if we have a change to a new block,
555 or if fsync forces us to flush the writebuffer.
556 if we have a switch to next page, we will not have
557 enough remaining space for this.
559 if (pad ) {
560 c->wbuf_len = PAD(c->wbuf_len);
562 /* Pad with JFFS2_DIRTY_BITMASK initially. this helps out ECC'd NOR
563 with 8 byte page size */
564 memset(c->wbuf + c->wbuf_len, 0, c->wbuf_pagesize - c->wbuf_len);
566 if ( c->wbuf_len + sizeof(struct jffs2_unknown_node) < c->wbuf_pagesize) {
567 struct jffs2_unknown_node *padnode = (void *)(c->wbuf + c->wbuf_len);
568 padnode->magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
569 padnode->nodetype = cpu_to_je16(JFFS2_NODETYPE_PADDING);
570 padnode->totlen = cpu_to_je32(c->wbuf_pagesize - c->wbuf_len);
571 padnode->hdr_crc = cpu_to_je32(crc32(0, padnode, sizeof(*padnode)-4));
574 /* else jffs2_flash_writev has actually filled in the rest of the
575 buffer for us, and will deal with the node refs etc. later. */
577 #ifdef BREAKME
578 static int breakme;
579 if (breakme++ == 20) {
580 printk(KERN_NOTICE "Faking write error at 0x%08x\n", c->wbuf_ofs);
581 breakme = 0;
582 c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen,
583 brokenbuf);
584 ret = -EIO;
585 } else
586 #endif
588 ret = c->mtd->write(c->mtd, c->wbuf_ofs, c->wbuf_pagesize, &retlen, c->wbuf);
590 if (ret || retlen != c->wbuf_pagesize) {
591 if (ret)
592 printk(KERN_WARNING "jffs2_flush_wbuf(): Write failed with %d\n",ret);
593 else {
594 printk(KERN_WARNING "jffs2_flush_wbuf(): Write was short: %zd instead of %d\n",
595 retlen, c->wbuf_pagesize);
596 ret = -EIO;
599 jffs2_wbuf_recover(c);
601 return ret;
604 /* Adjust free size of the block if we padded. */
605 if (pad) {
606 uint32_t waste = c->wbuf_pagesize - c->wbuf_len;
608 D1(printk(KERN_DEBUG "jffs2_flush_wbuf() adjusting free_size of %sblock at %08x\n",
609 (wbuf_jeb==c->nextblock)?"next":"", wbuf_jeb->offset));
611 /* wbuf_pagesize - wbuf_len is the amount of space that's to be
612 padded. If there is less free space in the block than that,
613 something screwed up */
614 if (wbuf_jeb->free_size < waste) {
615 printk(KERN_CRIT "jffs2_flush_wbuf(): Accounting error. wbuf at 0x%08x has 0x%03x bytes, 0x%03x left.\n",
616 c->wbuf_ofs, c->wbuf_len, waste);
617 printk(KERN_CRIT "jffs2_flush_wbuf(): But free_size for block at 0x%08x is only 0x%08x\n",
618 wbuf_jeb->offset, wbuf_jeb->free_size);
619 BUG();
622 spin_lock(&c->erase_completion_lock);
624 jffs2_link_node_ref(c, wbuf_jeb, (c->wbuf_ofs + c->wbuf_len) | REF_OBSOLETE, waste, NULL);
625 /* FIXME: that made it count as dirty. Convert to wasted */
626 wbuf_jeb->dirty_size -= waste;
627 c->dirty_size -= waste;
628 wbuf_jeb->wasted_size += waste;
629 c->wasted_size += waste;
630 } else
631 spin_lock(&c->erase_completion_lock);
633 /* Stick any now-obsoleted blocks on the erase_pending_list */
634 jffs2_refile_wbuf_blocks(c);
635 jffs2_clear_wbuf_ino_list(c);
636 spin_unlock(&c->erase_completion_lock);
638 memset(c->wbuf,0xff,c->wbuf_pagesize);
639 /* adjust write buffer offset, else we get a non contiguous write bug */
640 if (SECTOR_ADDR(c->wbuf_ofs) == SECTOR_ADDR(c->wbuf_ofs+c->wbuf_pagesize))
641 c->wbuf_ofs += c->wbuf_pagesize;
642 else
643 c->wbuf_ofs = 0xffffffff;
644 c->wbuf_len = 0;
645 return 0;
648 /* Trigger garbage collection to flush the write-buffer.
649 If ino arg is zero, do it if _any_ real (i.e. not GC) writes are
650 outstanding. If ino arg non-zero, do it only if a write for the
651 given inode is outstanding. */
652 int jffs2_flush_wbuf_gc(struct jffs2_sb_info *c, uint32_t ino)
654 uint32_t old_wbuf_ofs;
655 uint32_t old_wbuf_len;
656 int ret = 0;
658 D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() called for ino #%u...\n", ino));
660 if (!c->wbuf)
661 return 0;
663 down(&c->alloc_sem);
664 if (!jffs2_wbuf_pending_for_ino(c, ino)) {
665 D1(printk(KERN_DEBUG "Ino #%d not pending in wbuf. Returning\n", ino));
666 up(&c->alloc_sem);
667 return 0;
670 old_wbuf_ofs = c->wbuf_ofs;
671 old_wbuf_len = c->wbuf_len;
673 if (c->unchecked_size) {
674 /* GC won't make any progress for a while */
675 D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() padding. Not finished checking\n"));
676 down_write(&c->wbuf_sem);
677 ret = __jffs2_flush_wbuf(c, PAD_ACCOUNTING);
678 /* retry flushing wbuf in case jffs2_wbuf_recover
679 left some data in the wbuf */
680 if (ret)
681 ret = __jffs2_flush_wbuf(c, PAD_ACCOUNTING);
682 up_write(&c->wbuf_sem);
683 } else while (old_wbuf_len &&
684 old_wbuf_ofs == c->wbuf_ofs) {
686 up(&c->alloc_sem);
688 D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() calls gc pass\n"));
690 ret = jffs2_garbage_collect_pass(c);
691 if (ret) {
692 /* GC failed. Flush it with padding instead */
693 down(&c->alloc_sem);
694 down_write(&c->wbuf_sem);
695 ret = __jffs2_flush_wbuf(c, PAD_ACCOUNTING);
696 /* retry flushing wbuf in case jffs2_wbuf_recover
697 left some data in the wbuf */
698 if (ret)
699 ret = __jffs2_flush_wbuf(c, PAD_ACCOUNTING);
700 up_write(&c->wbuf_sem);
701 break;
703 down(&c->alloc_sem);
706 D1(printk(KERN_DEBUG "jffs2_flush_wbuf_gc() ends...\n"));
708 up(&c->alloc_sem);
709 return ret;
712 /* Pad write-buffer to end and write it, wasting space. */
713 int jffs2_flush_wbuf_pad(struct jffs2_sb_info *c)
715 int ret;
717 if (!c->wbuf)
718 return 0;
720 down_write(&c->wbuf_sem);
721 ret = __jffs2_flush_wbuf(c, PAD_NOACCOUNT);
722 /* retry - maybe wbuf recover left some data in wbuf. */
723 if (ret)
724 ret = __jffs2_flush_wbuf(c, PAD_NOACCOUNT);
725 up_write(&c->wbuf_sem);
727 return ret;
730 static size_t jffs2_fill_wbuf(struct jffs2_sb_info *c, const uint8_t *buf,
731 size_t len)
733 if (len && !c->wbuf_len && (len >= c->wbuf_pagesize))
734 return 0;
736 if (len > (c->wbuf_pagesize - c->wbuf_len))
737 len = c->wbuf_pagesize - c->wbuf_len;
738 memcpy(c->wbuf + c->wbuf_len, buf, len);
739 c->wbuf_len += (uint32_t) len;
740 return len;
743 int jffs2_flash_writev(struct jffs2_sb_info *c, const struct kvec *invecs,
744 unsigned long count, loff_t to, size_t *retlen,
745 uint32_t ino)
747 struct jffs2_eraseblock *jeb;
748 size_t wbuf_retlen, donelen = 0;
749 uint32_t outvec_to = to;
750 int ret, invec;
752 /* If not writebuffered flash, don't bother */
753 if (!jffs2_is_writebuffered(c))
754 return jffs2_flash_direct_writev(c, invecs, count, to, retlen);
756 down_write(&c->wbuf_sem);
758 /* If wbuf_ofs is not initialized, set it to target address */
759 if (c->wbuf_ofs == 0xFFFFFFFF) {
760 c->wbuf_ofs = PAGE_DIV(to);
761 c->wbuf_len = PAGE_MOD(to);
762 memset(c->wbuf,0xff,c->wbuf_pagesize);
766 * Sanity checks on target address. It's permitted to write
767 * at PAD(c->wbuf_len+c->wbuf_ofs), and it's permitted to
768 * write at the beginning of a new erase block. Anything else,
769 * and you die. New block starts at xxx000c (0-b = block
770 * header)
772 if (SECTOR_ADDR(to) != SECTOR_ADDR(c->wbuf_ofs)) {
773 /* It's a write to a new block */
774 if (c->wbuf_len) {
775 D1(printk(KERN_DEBUG "jffs2_flash_writev() to 0x%lx "
776 "causes flush of wbuf at 0x%08x\n",
777 (unsigned long)to, c->wbuf_ofs));
778 ret = __jffs2_flush_wbuf(c, PAD_NOACCOUNT);
779 if (ret)
780 goto outerr;
782 /* set pointer to new block */
783 c->wbuf_ofs = PAGE_DIV(to);
784 c->wbuf_len = PAGE_MOD(to);
787 if (to != PAD(c->wbuf_ofs + c->wbuf_len)) {
788 /* We're not writing immediately after the writebuffer. Bad. */
789 printk(KERN_CRIT "jffs2_flash_writev(): Non-contiguous write "
790 "to %08lx\n", (unsigned long)to);
791 if (c->wbuf_len)
792 printk(KERN_CRIT "wbuf was previously %08x-%08x\n",
793 c->wbuf_ofs, c->wbuf_ofs+c->wbuf_len);
794 BUG();
797 /* adjust alignment offset */
798 if (c->wbuf_len != PAGE_MOD(to)) {
799 c->wbuf_len = PAGE_MOD(to);
800 /* take care of alignment to next page */
801 if (!c->wbuf_len) {
802 c->wbuf_len = c->wbuf_pagesize;
803 ret = __jffs2_flush_wbuf(c, NOPAD);
804 if (ret)
805 goto outerr;
809 for (invec = 0; invec < count; invec++) {
810 int vlen = invecs[invec].iov_len;
811 uint8_t *v = invecs[invec].iov_base;
813 wbuf_retlen = jffs2_fill_wbuf(c, v, vlen);
815 if (c->wbuf_len == c->wbuf_pagesize) {
816 ret = __jffs2_flush_wbuf(c, NOPAD);
817 if (ret)
818 goto outerr;
820 vlen -= wbuf_retlen;
821 outvec_to += wbuf_retlen;
822 donelen += wbuf_retlen;
823 v += wbuf_retlen;
825 if (vlen >= c->wbuf_pagesize) {
826 ret = c->mtd->write(c->mtd, outvec_to, PAGE_DIV(vlen),
827 &wbuf_retlen, v);
828 if (ret < 0 || wbuf_retlen != PAGE_DIV(vlen))
829 goto outfile;
831 vlen -= wbuf_retlen;
832 outvec_to += wbuf_retlen;
833 c->wbuf_ofs = outvec_to;
834 donelen += wbuf_retlen;
835 v += wbuf_retlen;
838 wbuf_retlen = jffs2_fill_wbuf(c, v, vlen);
839 if (c->wbuf_len == c->wbuf_pagesize) {
840 ret = __jffs2_flush_wbuf(c, NOPAD);
841 if (ret)
842 goto outerr;
845 outvec_to += wbuf_retlen;
846 donelen += wbuf_retlen;
850 * If there's a remainder in the wbuf and it's a non-GC write,
851 * remember that the wbuf affects this ino
853 *retlen = donelen;
855 if (jffs2_sum_active()) {
856 int res = jffs2_sum_add_kvec(c, invecs, count, (uint32_t) to);
857 if (res)
858 return res;
861 if (c->wbuf_len && ino)
862 jffs2_wbuf_dirties_inode(c, ino);
864 ret = 0;
865 up_write(&c->wbuf_sem);
866 return ret;
868 outfile:
870 * At this point we have no problem, c->wbuf is empty. However
871 * refile nextblock to avoid writing again to same address.
874 spin_lock(&c->erase_completion_lock);
876 jeb = &c->blocks[outvec_to / c->sector_size];
877 jffs2_block_refile(c, jeb, REFILE_ANYWAY);
879 spin_unlock(&c->erase_completion_lock);
881 outerr:
882 *retlen = 0;
883 up_write(&c->wbuf_sem);
884 return ret;
888 * This is the entry for flash write.
889 * Check, if we work on NAND FLASH, if so build an kvec and write it via vritev
891 int jffs2_flash_write(struct jffs2_sb_info *c, loff_t ofs, size_t len,
892 size_t *retlen, const u_char *buf)
894 struct kvec vecs[1];
896 if (!jffs2_is_writebuffered(c))
897 return jffs2_flash_direct_write(c, ofs, len, retlen, buf);
899 vecs[0].iov_base = (unsigned char *) buf;
900 vecs[0].iov_len = len;
901 return jffs2_flash_writev(c, vecs, 1, ofs, retlen, 0);
905 Handle readback from writebuffer and ECC failure return
907 int jffs2_flash_read(struct jffs2_sb_info *c, loff_t ofs, size_t len, size_t *retlen, u_char *buf)
909 loff_t orbf = 0, owbf = 0, lwbf = 0;
910 int ret;
912 if (!jffs2_is_writebuffered(c))
913 return c->mtd->read(c->mtd, ofs, len, retlen, buf);
915 /* Read flash */
916 down_read(&c->wbuf_sem);
917 ret = c->mtd->read(c->mtd, ofs, len, retlen, buf);
919 if ( (ret == -EBADMSG || ret == -EUCLEAN) && (*retlen == len) ) {
920 if (ret == -EBADMSG)
921 printk(KERN_WARNING "mtd->read(0x%zx bytes from 0x%llx)"
922 " returned ECC error\n", len, ofs);
924 * We have the raw data without ECC correction in the buffer,
925 * maybe we are lucky and all data or parts are correct. We
926 * check the node. If data are corrupted node check will sort
927 * it out. We keep this block, it will fail on write or erase
928 * and the we mark it bad. Or should we do that now? But we
929 * should give him a chance. Maybe we had a system crash or
930 * power loss before the ecc write or a erase was completed.
931 * So we return success. :)
933 ret = 0;
936 /* if no writebuffer available or write buffer empty, return */
937 if (!c->wbuf_pagesize || !c->wbuf_len)
938 goto exit;
940 /* if we read in a different block, return */
941 if (SECTOR_ADDR(ofs) != SECTOR_ADDR(c->wbuf_ofs))
942 goto exit;
944 if (ofs >= c->wbuf_ofs) {
945 owbf = (ofs - c->wbuf_ofs); /* offset in write buffer */
946 if (owbf > c->wbuf_len) /* is read beyond write buffer ? */
947 goto exit;
948 lwbf = c->wbuf_len - owbf; /* number of bytes to copy */
949 if (lwbf > len)
950 lwbf = len;
951 } else {
952 orbf = (c->wbuf_ofs - ofs); /* offset in read buffer */
953 if (orbf > len) /* is write beyond write buffer ? */
954 goto exit;
955 lwbf = len - orbf; /* number of bytes to copy */
956 if (lwbf > c->wbuf_len)
957 lwbf = c->wbuf_len;
959 if (lwbf > 0)
960 memcpy(buf+orbf,c->wbuf+owbf,lwbf);
962 exit:
963 up_read(&c->wbuf_sem);
964 return ret;
967 #define NR_OOB_SCAN_PAGES 4
969 /* For historical reasons we use only 12 bytes for OOB clean marker */
970 #define OOB_CM_SIZE 12
972 static const struct jffs2_unknown_node oob_cleanmarker =
974 .magic = constant_cpu_to_je16(JFFS2_MAGIC_BITMASK),
975 .nodetype = constant_cpu_to_je16(JFFS2_NODETYPE_CLEANMARKER),
976 .totlen = constant_cpu_to_je32(8)
980 * Check, if the out of band area is empty. This function knows about the clean
981 * marker and if it is present in OOB, treats the OOB as empty anyway.
983 int jffs2_check_oob_empty(struct jffs2_sb_info *c,
984 struct jffs2_eraseblock *jeb, int mode)
986 int i, ret;
987 int cmlen = min_t(int, c->oobavail, OOB_CM_SIZE);
988 struct mtd_oob_ops ops;
990 ops.mode = MTD_OOB_AUTO;
991 ops.ooblen = NR_OOB_SCAN_PAGES * c->oobavail;
992 ops.oobbuf = c->oobbuf;
993 ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0;
994 ops.datbuf = NULL;
996 ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops);
997 if (ret || ops.oobretlen != ops.ooblen) {
998 printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd"
999 " bytes, read %zd bytes, error %d\n",
1000 jeb->offset, ops.ooblen, ops.oobretlen, ret);
1001 if (!ret)
1002 ret = -EIO;
1003 return ret;
1006 for(i = 0; i < ops.ooblen; i++) {
1007 if (mode && i < cmlen)
1008 /* Yeah, we know about the cleanmarker */
1009 continue;
1011 if (ops.oobbuf[i] != 0xFF) {
1012 D2(printk(KERN_DEBUG "Found %02x at %x in OOB for "
1013 "%08x\n", ops.oobbuf[i], i, jeb->offset));
1014 return 1;
1018 return 0;
1022 * Check for a valid cleanmarker.
1023 * Returns: 0 if a valid cleanmarker was found
1024 * 1 if no cleanmarker was found
1025 * negative error code if an error occurred
1027 int jffs2_check_nand_cleanmarker(struct jffs2_sb_info *c,
1028 struct jffs2_eraseblock *jeb)
1030 struct mtd_oob_ops ops;
1031 int ret, cmlen = min_t(int, c->oobavail, OOB_CM_SIZE);
1033 ops.mode = MTD_OOB_AUTO;
1034 ops.ooblen = cmlen;
1035 ops.oobbuf = c->oobbuf;
1036 ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0;
1037 ops.datbuf = NULL;
1039 ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops);
1040 if (ret || ops.oobretlen != ops.ooblen) {
1041 printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd"
1042 " bytes, read %zd bytes, error %d\n",
1043 jeb->offset, ops.ooblen, ops.oobretlen, ret);
1044 if (!ret)
1045 ret = -EIO;
1046 return ret;
1049 return !!memcmp(&oob_cleanmarker, c->oobbuf, cmlen);
1052 int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c,
1053 struct jffs2_eraseblock *jeb)
1055 int ret;
1056 struct mtd_oob_ops ops;
1057 int cmlen = min_t(int, c->oobavail, OOB_CM_SIZE);
1059 ops.mode = MTD_OOB_AUTO;
1060 ops.ooblen = cmlen;
1061 ops.oobbuf = (uint8_t *)&oob_cleanmarker;
1062 ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0;
1063 ops.datbuf = NULL;
1065 ret = c->mtd->write_oob(c->mtd, jeb->offset, &ops);
1066 if (ret || ops.oobretlen != ops.ooblen) {
1067 printk(KERN_ERR "cannot write OOB for EB at %08x, requested %zd"
1068 " bytes, read %zd bytes, error %d\n",
1069 jeb->offset, ops.ooblen, ops.oobretlen, ret);
1070 if (!ret)
1071 ret = -EIO;
1072 return ret;
1075 return 0;
1079 * On NAND we try to mark this block bad. If the block was erased more
1080 * than MAX_ERASE_FAILURES we mark it finaly bad.
1081 * Don't care about failures. This block remains on the erase-pending
1082 * or badblock list as long as nobody manipulates the flash with
1083 * a bootloader or something like that.
1086 int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, uint32_t bad_offset)
1088 int ret;
1090 /* if the count is < max, we try to write the counter to the 2nd page oob area */
1091 if( ++jeb->bad_count < MAX_ERASE_FAILURES)
1092 return 0;
1094 if (!c->mtd->block_markbad)
1095 return 1; // What else can we do?
1097 printk(KERN_WARNING "JFFS2: marking eraseblock at %08x\n as bad", bad_offset);
1098 ret = c->mtd->block_markbad(c->mtd, bad_offset);
1100 if (ret) {
1101 D1(printk(KERN_WARNING "jffs2_write_nand_badblock(): Write failed for block at %08x: error %d\n", jeb->offset, ret));
1102 return ret;
1104 return 1;
1107 int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
1109 struct nand_ecclayout *oinfo = c->mtd->ecclayout;
1111 if (!c->mtd->oobsize)
1112 return 0;
1114 /* Cleanmarker is out-of-band, so inline size zero */
1115 c->cleanmarker_size = 0;
1117 if (!oinfo || oinfo->oobavail == 0) {
1118 printk(KERN_ERR "inconsistent device description\n");
1119 return -EINVAL;
1122 D1(printk(KERN_DEBUG "JFFS2 using OOB on NAND\n"));
1124 c->oobavail = oinfo->oobavail;
1126 /* Initialise write buffer */
1127 init_rwsem(&c->wbuf_sem);
1128 c->wbuf_pagesize = c->mtd->writesize;
1129 c->wbuf_ofs = 0xFFFFFFFF;
1131 c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
1132 if (!c->wbuf)
1133 return -ENOMEM;
1135 c->oobbuf = kmalloc(NR_OOB_SCAN_PAGES * c->oobavail, GFP_KERNEL);
1136 if (!c->oobbuf) {
1137 kfree(c->wbuf);
1138 return -ENOMEM;
1141 return 0;
1144 void jffs2_nand_flash_cleanup(struct jffs2_sb_info *c)
1146 kfree(c->wbuf);
1147 kfree(c->oobbuf);
1150 int jffs2_dataflash_setup(struct jffs2_sb_info *c) {
1151 c->cleanmarker_size = 0; /* No cleanmarkers needed */
1153 /* Initialize write buffer */
1154 init_rwsem(&c->wbuf_sem);
1157 c->wbuf_pagesize = c->mtd->erasesize;
1159 /* Find a suitable c->sector_size
1160 * - Not too much sectors
1161 * - Sectors have to be at least 4 K + some bytes
1162 * - All known dataflashes have erase sizes of 528 or 1056
1163 * - we take at least 8 eraseblocks and want to have at least 8K size
1164 * - The concatenation should be a power of 2
1167 c->sector_size = 8 * c->mtd->erasesize;
1169 while (c->sector_size < 8192) {
1170 c->sector_size *= 2;
1173 /* It may be necessary to adjust the flash size */
1174 c->flash_size = c->mtd->size;
1176 if ((c->flash_size % c->sector_size) != 0) {
1177 c->flash_size = (c->flash_size / c->sector_size) * c->sector_size;
1178 printk(KERN_WARNING "JFFS2 flash size adjusted to %dKiB\n", c->flash_size);
1181 c->wbuf_ofs = 0xFFFFFFFF;
1182 c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
1183 if (!c->wbuf)
1184 return -ENOMEM;
1186 printk(KERN_INFO "JFFS2 write-buffering enabled buffer (%d) erasesize (%d)\n", c->wbuf_pagesize, c->sector_size);
1188 return 0;
1191 void jffs2_dataflash_cleanup(struct jffs2_sb_info *c) {
1192 kfree(c->wbuf);
1195 int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) {
1196 /* Cleanmarker currently occupies whole programming regions,
1197 * either one or 2 for 8Byte STMicro flashes. */
1198 c->cleanmarker_size = max(16u, c->mtd->writesize);
1200 /* Initialize write buffer */
1201 init_rwsem(&c->wbuf_sem);
1202 c->wbuf_pagesize = c->mtd->writesize;
1203 c->wbuf_ofs = 0xFFFFFFFF;
1205 c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
1206 if (!c->wbuf)
1207 return -ENOMEM;
1209 return 0;
1212 void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c) {
1213 kfree(c->wbuf);
1216 int jffs2_ubivol_setup(struct jffs2_sb_info *c) {
1217 c->cleanmarker_size = 0;
1219 if (c->mtd->writesize == 1)
1220 /* We do not need write-buffer */
1221 return 0;
1223 init_rwsem(&c->wbuf_sem);
1225 c->wbuf_pagesize = c->mtd->writesize;
1226 c->wbuf_ofs = 0xFFFFFFFF;
1227 c->wbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
1228 if (!c->wbuf)
1229 return -ENOMEM;
1231 printk(KERN_INFO "JFFS2 write-buffering enabled buffer (%d) erasesize (%d)\n", c->wbuf_pagesize, c->sector_size);
1233 return 0;
1236 void jffs2_ubivol_cleanup(struct jffs2_sb_info *c) {
1237 kfree(c->wbuf);