etc/services - sync with NetBSD-8
[minix.git] / sys / ufs / chfs / chfs_gc.c
blobee62d72093a6dde1955a167638423482b00210d9
1 /* $NetBSD: chfs_gc.c,v 1.8 2015/01/11 17:28:22 hannken Exp $ */
3 /*-
4 * Copyright (c) 2010 Department of Software Engineering,
5 * University of Szeged, Hungary
6 * Copyright (c) 2010 Tamas Toth <ttoth@inf.u-szeged.hu>
7 * Copyright (c) 2010 Adam Hoka <ahoka@NetBSD.org>
8 * All rights reserved.
10 * This code is derived from software contributed to The NetBSD Foundation
11 * by the Department of Software Engineering, University of Szeged, Hungary
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
24 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
25 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 #include <sys/cprng.h>
36 #include "chfs.h"
38 void chfs_gc_release_inode(struct chfs_mount *,
39 struct chfs_inode *);
40 struct chfs_inode *chfs_gc_fetch_inode(struct chfs_mount *,
41 ino_t, uint32_t);
42 int chfs_check(struct chfs_mount *, struct chfs_vnode_cache *);
43 void chfs_clear_inode(struct chfs_mount *, struct chfs_inode *);
46 struct chfs_eraseblock *find_gc_block(struct chfs_mount *);
47 int chfs_gcollect_pristine(struct chfs_mount *,
48 struct chfs_eraseblock *,
49 struct chfs_vnode_cache *, struct chfs_node_ref *);
50 int chfs_gcollect_live(struct chfs_mount *,
51 struct chfs_eraseblock *, struct chfs_node_ref *,
52 struct chfs_inode *);
53 int chfs_gcollect_vnode(struct chfs_mount *, struct chfs_inode *);
54 int chfs_gcollect_dirent(struct chfs_mount *,
55 struct chfs_eraseblock *, struct chfs_inode *,
56 struct chfs_dirent *);
57 int chfs_gcollect_deletion_dirent(struct chfs_mount *,
58 struct chfs_eraseblock *, struct chfs_inode *,
59 struct chfs_dirent *);
60 int chfs_gcollect_dnode(struct chfs_mount *,
61 struct chfs_eraseblock *, struct chfs_inode *,
62 struct chfs_full_dnode *, uint32_t, uint32_t);
64 /*
65 * chfs_gc_trigger - wakes up GC thread, if it should run
66 * Must be called with chm_lock_mountfields held.
68 void
69 chfs_gc_trigger(struct chfs_mount *chmp)
71 struct garbage_collector_thread *gc = &chmp->chm_gc_thread;
73 if (gc->gcth_running &&
74 chfs_gc_thread_should_wake(chmp)) {
75 cv_signal(&gc->gcth_wakeup);
80 /* chfs_gc_thread - garbage collector's thread */
81 void
82 chfs_gc_thread(void *data)
84 struct chfs_mount *chmp = data;
85 struct garbage_collector_thread *gc = &chmp->chm_gc_thread;
87 dbg_gc("[GC THREAD] thread started\n");
89 mutex_enter(&chmp->chm_lock_mountfields);
90 while (gc->gcth_running) {
91 /* we must call chfs_gc_thread_should_wake with chm_lock_mountfields
92 * held, which is a bit awkwardly done here, but we cant relly
93 * do it otherway with the current design...
95 if (chfs_gc_thread_should_wake(chmp)) {
96 if (chfs_gcollect_pass(chmp) == ENOSPC) {
97 mutex_exit(&chmp->chm_lock_mountfields);
98 panic("No space for garbage collection\n");
99 /* XXX why break here? i have added a panic
100 * here to see if it gets triggered -ahoka
102 break;
104 /* XXX gcollect_pass drops the mutex */
107 cv_timedwait_sig(&gc->gcth_wakeup,
108 &chmp->chm_lock_mountfields, mstohz(100));
110 mutex_exit(&chmp->chm_lock_mountfields);
112 dbg_gc("[GC THREAD] thread stopped\n");
113 kthread_exit(0);
116 /* chfs_gc_thread_start - starts GC */
117 void
118 chfs_gc_thread_start(struct chfs_mount *chmp)
120 struct garbage_collector_thread *gc = &chmp->chm_gc_thread;
122 cv_init(&gc->gcth_wakeup, "chfsgccv");
124 gc->gcth_running = true;
125 kthread_create(PRI_NONE, /*KTHREAD_MPSAFE |*/ KTHREAD_MUSTJOIN,
126 NULL, chfs_gc_thread, chmp, &gc->gcth_thread,
127 "chfsgcth");
130 /* chfs_gc_thread_start - stops GC */
131 void
132 chfs_gc_thread_stop(struct chfs_mount *chmp)
134 struct garbage_collector_thread *gc = &chmp->chm_gc_thread;
136 /* check if it is actually running */
137 if (gc->gcth_running) {
138 gc->gcth_running = false;
139 } else {
140 return;
142 cv_signal(&gc->gcth_wakeup);
143 dbg_gc("[GC THREAD] stop signal sent\n");
145 kthread_join(gc->gcth_thread);
146 #ifdef BROKEN_KTH_JOIN
147 kpause("chfsthjoin", false, mstohz(1000), NULL);
148 #endif
150 cv_destroy(&gc->gcth_wakeup);
154 * chfs_gc_thread_should_wake - checks if GC thread should wake up
155 * Must be called with chm_lock_mountfields held.
156 * Returns 1, if GC should wake up and 0 else.
159 chfs_gc_thread_should_wake(struct chfs_mount *chmp)
161 int nr_very_dirty = 0;
162 struct chfs_eraseblock *cheb;
163 uint32_t dirty;
165 KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
167 /* Erase pending queue is not empty. */
168 if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) {
169 dbg_gc("erase_pending\n");
170 return 1;
173 /* There is something unchecked in the filesystem. */
174 if (chmp->chm_unchecked_size) {
175 dbg_gc("unchecked\n");
176 return 1;
179 dirty = chmp->chm_dirty_size - chmp->chm_nr_erasable_blocks *
180 chmp->chm_ebh->eb_size;
182 /* Number of free and erasable blocks are critical. */
183 if (chmp->chm_nr_free_blocks + chmp->chm_nr_erasable_blocks <
184 chmp->chm_resv_blocks_gctrigger && (dirty > chmp->chm_nospc_dirty)) {
185 dbg_gc("free: %d + erasable: %d < resv: %d\n",
186 chmp->chm_nr_free_blocks, chmp->chm_nr_erasable_blocks,
187 chmp->chm_resv_blocks_gctrigger);
188 dbg_gc("dirty: %d > nospc_dirty: %d\n",
189 dirty, chmp->chm_nospc_dirty);
191 return 1;
194 /* There is too much very dirty blocks. */
195 TAILQ_FOREACH(cheb, &chmp->chm_very_dirty_queue, queue) {
196 nr_very_dirty++;
197 if (nr_very_dirty == chmp->chm_vdirty_blocks_gctrigger) {
198 dbg_gc("nr_very_dirty\n");
199 return 1;
203 /* Everythin OK, GC shouldn't run. */
204 return 0;
207 /* chfs_gc_release_inode - does nothing yet */
208 void
209 chfs_gc_release_inode(struct chfs_mount *chmp,
210 struct chfs_inode *ip)
212 dbg_gc("release inode\n");
215 /* chfs_gc_fetch_inode - assign the given inode to the GC */
216 struct chfs_inode *
217 chfs_gc_fetch_inode(struct chfs_mount *chmp, ino_t vno,
218 uint32_t unlinked)
220 struct vnode *vp = NULL;
221 struct chfs_vnode_cache *vc;
222 struct chfs_inode *ip;
223 dbg_gc("fetch inode %llu\n", (unsigned long long)vno);
225 if (unlinked) {
226 dbg_gc("unlinked\n");
227 vp = chfs_vnode_lookup(chmp, vno);
228 if (!vp) {
229 mutex_enter(&chmp->chm_lock_vnocache);
230 vc = chfs_vnode_cache_get(chmp, vno);
231 if (!vc) {
232 mutex_exit(&chmp->chm_lock_vnocache);
233 return NULL;
235 if (vc->state != VNO_STATE_CHECKEDABSENT) {
236 mutex_exit(&chmp->chm_lock_vnocache);
237 /* XXX why do we need the delay here?! */
238 KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
239 cv_timedwait_sig(
240 &chmp->chm_gc_thread.gcth_wakeup,
241 &chmp->chm_lock_mountfields, mstohz(50));
242 } else {
243 mutex_exit(&chmp->chm_lock_vnocache);
245 return NULL;
247 } else {
248 dbg_gc("vnode lookup\n");
249 vp = chfs_vnode_lookup(chmp, vno);
251 dbg_gc("vp to ip\n");
252 ip = VTOI(vp);
253 KASSERT(ip);
254 vrele(vp);
256 return ip;
259 extern rb_tree_ops_t frag_rbtree_ops;
261 /* chfs_check - checks an inode with minimal initialization */
263 chfs_check(struct chfs_mount *chmp, struct chfs_vnode_cache *chvc)
265 KASSERT(mutex_owned(&chmp->chm_lock_vnocache));
267 struct chfs_inode *ip;
268 struct vnode *vp;
269 int ret;
271 /* Get a new inode. */
272 ip = pool_get(&chfs_inode_pool, PR_WAITOK);
273 if (!ip) {
274 return ENOMEM;
277 vp = kmem_zalloc(sizeof(struct vnode), KM_SLEEP);
279 /* Minimal initialization. */
280 ip->chvc = chvc;
281 ip->vp = vp;
283 vp->v_data = ip;
285 rb_tree_init(&ip->fragtree, &frag_rbtree_ops);
286 TAILQ_INIT(&ip->dents);
288 /* Build the node. */
289 mutex_exit(&chmp->chm_lock_vnocache);
290 ret = chfs_read_inode_internal(chmp, ip);
291 mutex_enter(&chmp->chm_lock_vnocache);
292 if (!ret) {
293 chfs_clear_inode(chmp, ip);
296 /* Release inode. */
297 pool_put(&chfs_inode_pool, ip);
299 return ret;
302 /* chfs_clear_inode - kills a minimal inode */
303 void
304 chfs_clear_inode(struct chfs_mount *chmp, struct chfs_inode *ip)
306 KASSERT(mutex_owned(&chmp->chm_lock_vnocache));
308 struct chfs_dirent *fd, *tmpfd;
309 struct chfs_vnode_cache *chvc;
310 struct chfs_node_ref *nref;
312 chvc = ip->chvc;
313 /* shouldnt this be: */
314 //bool deleted = (chvc && !(chvc->pvno || chvc->nlink));
315 int deleted = (chvc && !(chvc->pvno | chvc->nlink));
317 /* Set actual state. */
318 if (chvc && chvc->state != VNO_STATE_CHECKING) {
319 chvc->state = VNO_STATE_CLEARING;
322 /* Remove vnode information. */
323 while (deleted && chvc->v != (struct chfs_node_ref *)chvc) {
324 nref = chvc->v;
325 chfs_remove_and_obsolete(chmp, chvc, nref, &chvc->v);
328 /* Destroy data. */
329 chfs_kill_fragtree(chmp, &ip->fragtree);
331 /* Clear dirents. */
332 TAILQ_FOREACH_SAFE(fd, &ip->dents, fds, tmpfd) {
333 chfs_free_dirent(fd);
336 /* Remove node from vnode cache. */
337 if (chvc && chvc->state == VNO_STATE_CHECKING) {
338 chvc->state = VNO_STATE_CHECKEDABSENT;
339 if ((struct chfs_vnode_cache *)chvc->v == chvc &&
340 (struct chfs_vnode_cache *)chvc->dirents == chvc &&
341 (struct chfs_vnode_cache *)chvc->dnode == chvc)
342 chfs_vnode_cache_remove(chmp, chvc);
346 /* find_gc_block - finds the next block for GC */
347 struct chfs_eraseblock *
348 find_gc_block(struct chfs_mount *chmp)
350 struct chfs_eraseblock *ret;
351 struct chfs_eraseblock_queue *nextqueue;
353 KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
355 /* Get a random number. */
356 uint32_t n = cprng_fast32() % 128;
358 again:
359 /* Find an eraseblock queue. */
360 if (n<50 && !TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) {
361 dbg_gc("Picking block from erase_pending_queue to GC next\n");
362 nextqueue = &chmp->chm_erase_pending_queue;
363 } else if (n<110 && !TAILQ_EMPTY(&chmp->chm_very_dirty_queue) ) {
364 dbg_gc("Picking block from very_dirty_queue to GC next\n");
365 nextqueue = &chmp->chm_very_dirty_queue;
366 } else if (n<126 && !TAILQ_EMPTY(&chmp->chm_dirty_queue) ) {
367 dbg_gc("Picking block from dirty_queue to GC next\n");
368 nextqueue = &chmp->chm_dirty_queue;
369 } else if (!TAILQ_EMPTY(&chmp->chm_clean_queue)) {
370 dbg_gc("Picking block from clean_queue to GC next\n");
371 nextqueue = &chmp->chm_clean_queue;
372 } else if (!TAILQ_EMPTY(&chmp->chm_dirty_queue)) {
373 dbg_gc("Picking block from dirty_queue to GC next"
374 " (clean_queue was empty)\n");
375 nextqueue = &chmp->chm_dirty_queue;
376 } else if (!TAILQ_EMPTY(&chmp->chm_very_dirty_queue)) {
377 dbg_gc("Picking block from very_dirty_queue to GC next"
378 " (clean_queue and dirty_queue were empty)\n");
379 nextqueue = &chmp->chm_very_dirty_queue;
380 } else if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) {
381 dbg_gc("Picking block from erase_pending_queue to GC next"
382 " (clean_queue and {very_,}dirty_queue were empty)\n");
383 nextqueue = &chmp->chm_erase_pending_queue;
384 } else if (!TAILQ_EMPTY(&chmp->chm_erasable_pending_wbuf_queue)) {
385 dbg_gc("Synching wbuf in order to reuse "
386 "erasable_pendig_wbuf_queue blocks\n");
387 rw_enter(&chmp->chm_lock_wbuf, RW_WRITER);
388 chfs_flush_pending_wbuf(chmp);
389 rw_exit(&chmp->chm_lock_wbuf);
390 goto again;
391 } else {
392 dbg_gc("CHFS: no clean, dirty _or_ erasable"
393 " blocks to GC from! Where are they all?\n");
394 return NULL;
397 /* Get the first block of the queue. */
398 ret = TAILQ_FIRST(nextqueue);
399 if (chmp->chm_nextblock) {
400 dbg_gc("nextblock num: %u - gcblock num: %u\n",
401 chmp->chm_nextblock->lnr, ret->lnr);
402 if (ret == chmp->chm_nextblock)
403 goto again;
405 TAILQ_REMOVE(nextqueue, ret, queue);
407 /* Set GC block. */
408 chmp->chm_gcblock = ret;
409 /* Set GC node. */
410 ret->gc_node = ret->first_node;
412 if (!ret->gc_node) {
413 dbg_gc("Oops! ret->gc_node at LEB: %u is NULL\n", ret->lnr);
414 panic("CHFS BUG - one LEB's gc_node is NULL\n");
417 /* TODO wasted size? */
418 return ret;
421 /* chfs_gcollect_pass - this is the main function of GC */
423 chfs_gcollect_pass(struct chfs_mount *chmp)
425 struct chfs_vnode_cache *vc;
426 struct chfs_eraseblock *eb;
427 struct chfs_node_ref *nref;
428 uint32_t gcblock_dirty;
429 struct chfs_inode *ip;
430 ino_t vno, pvno;
431 uint32_t nlink;
432 int ret = 0;
434 KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
436 /* Check all vnodes. */
437 for (;;) {
438 mutex_enter(&chmp->chm_lock_sizes);
440 /* Check unchecked size. */
441 dbg_gc("unchecked size == %u\n", chmp->chm_unchecked_size);
442 if (!chmp->chm_unchecked_size)
443 break;
445 /* Compare vnode number to the maximum. */
446 if (chmp->chm_checked_vno > chmp->chm_max_vno) {
447 mutex_exit(&chmp->chm_lock_sizes);
448 dbg_gc("checked_vno (#%llu) > max_vno (#%llu)\n",
449 (unsigned long long)chmp->chm_checked_vno,
450 (unsigned long long)chmp->chm_max_vno);
451 return ENOSPC;
454 mutex_exit(&chmp->chm_lock_sizes);
456 mutex_enter(&chmp->chm_lock_vnocache);
457 dbg_gc("checking vno #%llu\n",
458 (unsigned long long)chmp->chm_checked_vno);
459 dbg_gc("get vnode cache\n");
461 /* OK, Get and check the vnode cache. */
462 vc = chfs_vnode_cache_get(chmp, chmp->chm_checked_vno++);
464 if (!vc) {
465 dbg_gc("!vc\n");
466 mutex_exit(&chmp->chm_lock_vnocache);
467 continue;
470 if ((vc->pvno | vc->nlink) == 0) {
471 dbg_gc("(pvno | nlink) == 0\n");
472 mutex_exit(&chmp->chm_lock_vnocache);
473 continue;
476 /* Find out the state of the vnode. */
477 dbg_gc("switch\n");
478 switch (vc->state) {
479 case VNO_STATE_CHECKEDABSENT:
480 /* FALLTHROUGH */
481 case VNO_STATE_PRESENT:
482 mutex_exit(&chmp->chm_lock_vnocache);
483 continue;
485 case VNO_STATE_GC:
486 /* FALLTHROUGH */
487 case VNO_STATE_CHECKING:
488 mutex_exit(&chmp->chm_lock_vnocache);
489 dbg_gc("VNO_STATE GC or CHECKING\n");
490 panic("CHFS BUG - vc state gc or checking\n");
492 case VNO_STATE_READING:
493 chmp->chm_checked_vno--;
494 mutex_exit(&chmp->chm_lock_vnocache);
495 /* XXX why do we need the delay here?! */
496 kpause("chvncrea", true, mstohz(50), NULL);
498 return 0;
500 default:
501 mutex_exit(&chmp->chm_lock_vnocache);
502 dbg_gc("default\n");
503 panic("CHFS BUG - vc state is other what we"
504 " checked\n");
506 case VNO_STATE_UNCHECKED:
510 /* We found an unchecked vnode. */
512 vc->state = VNO_STATE_CHECKING;
514 /* XXX check if this is too heavy to call under
515 * chm_lock_vnocache
517 ret = chfs_check(chmp, vc);
518 vc->state = VNO_STATE_CHECKEDABSENT;
520 mutex_exit(&chmp->chm_lock_vnocache);
521 return ret;
524 /* Get GC block. */
525 eb = chmp->chm_gcblock;
527 if (!eb) {
528 eb = find_gc_block(chmp);
531 if (!eb) {
532 dbg_gc("!eb\n");
533 if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) {
534 mutex_exit(&chmp->chm_lock_sizes);
535 return EAGAIN;
537 mutex_exit(&chmp->chm_lock_sizes);
538 return EIO;
541 if (!eb->used_size) {
542 dbg_gc("!eb->used_size\n");
543 goto eraseit;
546 /* Get GC node. */
547 nref = eb->gc_node;
548 gcblock_dirty = eb->dirty_size;
550 /* Find a node which wasn't obsoleted yet.
551 * Obsoleted nodes will be simply deleted after the whole block has checked. */
552 while(CHFS_REF_OBSOLETE(nref)) {
553 #ifdef DBG_MSG_GC
554 if (nref == chmp->chm_blocks[nref->nref_lnr].last_node) {
555 dbg_gc("THIS NODE IS THE LAST NODE OF ITS EB\n");
557 #endif
558 nref = node_next(nref);
559 if (!nref) {
560 eb->gc_node = nref;
561 mutex_exit(&chmp->chm_lock_sizes);
562 panic("CHFS BUG - nref is NULL)\n");
566 /* We found a "not obsoleted" node. */
567 eb->gc_node = nref;
568 KASSERT(nref->nref_lnr == chmp->chm_gcblock->lnr);
570 /* Check if node is in any chain. */
571 if (!nref->nref_next) {
572 /* This node is not in any chain. Simply collect it, or obsolete. */
573 mutex_exit(&chmp->chm_lock_sizes);
574 if (CHFS_REF_FLAGS(nref) == CHFS_PRISTINE_NODE_MASK) {
575 chfs_gcollect_pristine(chmp, eb, NULL, nref);
576 } else {
577 chfs_mark_node_obsolete(chmp, nref);
579 goto lock_size;
582 mutex_exit(&chmp->chm_lock_sizes);
584 mutex_enter(&chmp->chm_lock_vnocache);
586 dbg_gc("nref lnr: %u - offset: %u\n", nref->nref_lnr, nref->nref_offset);
587 vc = chfs_nref_to_vc(nref);
589 /* Check the state of the node. */
590 dbg_gc("switch\n");
591 switch(vc->state) {
592 case VNO_STATE_CHECKEDABSENT:
593 if (CHFS_REF_FLAGS(nref) == CHFS_PRISTINE_NODE_MASK) {
594 vc->state = VNO_STATE_GC;
596 break;
598 case VNO_STATE_PRESENT:
599 break;
601 case VNO_STATE_UNCHECKED:
602 /* FALLTHROUGH */
603 case VNO_STATE_CHECKING:
604 /* FALLTHROUGH */
605 case VNO_STATE_GC:
606 mutex_exit(&chmp->chm_lock_vnocache);
607 panic("CHFS BUG - vc state unchecked,"
608 " checking or gc (vno #%llu, num #%d)\n",
609 (unsigned long long)vc->vno, vc->state);
611 case VNO_STATE_READING:
612 /* Node is in use at this time. */
613 mutex_exit(&chmp->chm_lock_vnocache);
614 kpause("chvncrea", true, mstohz(50), NULL);
615 return 0;
618 if (vc->state == VNO_STATE_GC) {
619 dbg_gc("vc->state == VNO_STATE_GC\n");
620 vc->state = VNO_STATE_CHECKEDABSENT;
621 mutex_exit(&chmp->chm_lock_vnocache);
622 ret = chfs_gcollect_pristine(chmp, eb, NULL, nref);
624 //TODO wake_up(&chmp->chm_vnocache_wq);
625 if (ret != EBADF)
626 goto test_gcnode;
627 mutex_enter(&chmp->chm_lock_vnocache);
630 /* Collect living node. */
631 vno = vc->vno;
632 pvno = vc->pvno;
633 nlink = vc->nlink;
634 mutex_exit(&chmp->chm_lock_vnocache);
636 ip = chfs_gc_fetch_inode(chmp, vno, !(pvno | nlink));
638 if (!ip) {
639 dbg_gc("!ip\n");
640 ret = 0;
641 goto lock_size;
644 chfs_gcollect_live(chmp, eb, nref, ip);
646 chfs_gc_release_inode(chmp, ip);
648 test_gcnode:
649 if (eb->dirty_size == gcblock_dirty &&
650 !CHFS_REF_OBSOLETE(eb->gc_node)) {
651 dbg_gc("ERROR collecting node at %u failed.\n",
652 CHFS_GET_OFS(eb->gc_node->nref_offset));
654 ret = ENOSPC;
657 lock_size:
658 KASSERT(mutex_owned(&chmp->chm_lock_mountfields));
659 mutex_enter(&chmp->chm_lock_sizes);
660 eraseit:
661 dbg_gc("eraseit\n");
663 if (chmp->chm_gcblock) {
664 /* This is only for debugging. */
665 dbg_gc("eb used size = %u\n", chmp->chm_gcblock->used_size);
666 dbg_gc("eb free size = %u\n", chmp->chm_gcblock->free_size);
667 dbg_gc("eb dirty size = %u\n", chmp->chm_gcblock->dirty_size);
668 dbg_gc("eb unchecked size = %u\n",
669 chmp->chm_gcblock->unchecked_size);
670 dbg_gc("eb wasted size = %u\n", chmp->chm_gcblock->wasted_size);
672 KASSERT(chmp->chm_gcblock->used_size + chmp->chm_gcblock->free_size +
673 chmp->chm_gcblock->dirty_size +
674 chmp->chm_gcblock->unchecked_size +
675 chmp->chm_gcblock->wasted_size == chmp->chm_ebh->eb_size);
679 /* Check the state of GC block. */
680 if (chmp->chm_gcblock && chmp->chm_gcblock->dirty_size +
681 chmp->chm_gcblock->wasted_size == chmp->chm_ebh->eb_size) {
682 dbg_gc("Block at leb #%u completely obsoleted by GC, "
683 "Moving to erase_pending_queue\n", chmp->chm_gcblock->lnr);
684 TAILQ_INSERT_TAIL(&chmp->chm_erase_pending_queue,
685 chmp->chm_gcblock, queue);
686 chmp->chm_gcblock = NULL;
687 chmp->chm_nr_erasable_blocks++;
688 if (!TAILQ_EMPTY(&chmp->chm_erase_pending_queue)) {
689 ret = chfs_remap_leb(chmp);
693 mutex_exit(&chmp->chm_lock_sizes);
694 dbg_gc("return\n");
695 return ret;
699 /* chfs_gcollect_pristine - collects a pristine node */
701 chfs_gcollect_pristine(struct chfs_mount *chmp, struct chfs_eraseblock *cheb,
702 struct chfs_vnode_cache *chvc, struct chfs_node_ref *nref)
704 struct chfs_node_ref *newnref;
705 struct chfs_flash_node_hdr *nhdr;
706 struct chfs_flash_vnode *fvnode;
707 struct chfs_flash_dirent_node *fdirent;
708 struct chfs_flash_data_node *fdata;
709 int ret, retries = 0;
710 uint32_t ofs, crc;
711 size_t totlen = chfs_nref_len(chmp, cheb, nref);
712 char *data;
713 struct iovec vec;
714 size_t retlen;
716 dbg_gc("gcollect_pristine\n");
718 data = kmem_alloc(totlen, KM_SLEEP);
719 if (!data)
720 return ENOMEM;
722 ofs = CHFS_GET_OFS(nref->nref_offset);
724 /* Read header. */
725 ret = chfs_read_leb(chmp, nref->nref_lnr, data, ofs, totlen, &retlen);
726 if (ret) {
727 dbg_gc("reading error\n");
728 goto err_out;
730 if (retlen != totlen) {
731 dbg_gc("read size error\n");
732 ret = EIO;
733 goto err_out;
735 nhdr = (struct chfs_flash_node_hdr *)data;
737 /* Check the header. */
738 if (le16toh(nhdr->magic) != CHFS_FS_MAGIC_BITMASK) {
739 dbg_gc("node header magic number error\n");
740 ret = EBADF;
741 goto err_out;
743 crc = crc32(0, (uint8_t *)nhdr, CHFS_NODE_HDR_SIZE - 4);
744 if (crc != le32toh(nhdr->hdr_crc)) {
745 dbg_gc("node header crc error\n");
746 ret = EBADF;
747 goto err_out;
750 /* Read the remaining parts. */
751 switch(le16toh(nhdr->type)) {
752 case CHFS_NODETYPE_VNODE:
753 /* vnode information node */
754 fvnode = (struct chfs_flash_vnode *)data;
755 crc = crc32(0, (uint8_t *)fvnode, sizeof(struct chfs_flash_vnode) - 4);
756 if (crc != le32toh(fvnode->node_crc)) {
757 dbg_gc("vnode crc error\n");
758 ret = EBADF;
759 goto err_out;
761 break;
762 case CHFS_NODETYPE_DIRENT:
763 /* dirent node */
764 fdirent = (struct chfs_flash_dirent_node *)data;
765 crc = crc32(0, (uint8_t *)fdirent, sizeof(struct chfs_flash_dirent_node) - 4);
766 if (crc != le32toh(fdirent->node_crc)) {
767 dbg_gc("dirent crc error\n");
768 ret = EBADF;
769 goto err_out;
771 crc = crc32(0, fdirent->name, fdirent->nsize);
772 if (crc != le32toh(fdirent->name_crc)) {
773 dbg_gc("dirent name crc error\n");
774 ret = EBADF;
775 goto err_out;
777 break;
778 case CHFS_NODETYPE_DATA:
779 /* data node */
780 fdata = (struct chfs_flash_data_node *)data;
781 crc = crc32(0, (uint8_t *)fdata, sizeof(struct chfs_flash_data_node) - 4);
782 if (crc != le32toh(fdata->node_crc)) {
783 dbg_gc("data node crc error\n");
784 ret = EBADF;
785 goto err_out;
787 break;
788 default:
789 /* unknown node */
790 if (chvc) {
791 dbg_gc("unknown node have vnode cache\n");
792 ret = EBADF;
793 goto err_out;
796 /* CRC's OK, write node to its new place */
797 retry:
798 ret = chfs_reserve_space_gc(chmp, totlen);
799 if (ret)
800 goto err_out;
802 newnref = chfs_alloc_node_ref(chmp->chm_nextblock);
803 if (!newnref) {
804 ret = ENOMEM;
805 goto err_out;
808 ofs = chmp->chm_ebh->eb_size - chmp->chm_nextblock->free_size;
809 newnref->nref_offset = ofs;
811 /* write out the whole node */
812 vec.iov_base = (void *)data;
813 vec.iov_len = totlen;
814 mutex_enter(&chmp->chm_lock_sizes);
815 ret = chfs_write_wbuf(chmp, &vec, 1, ofs, &retlen);
817 if (ret || retlen != totlen) {
818 /* error while writing */
819 chfs_err("error while writing out to the media\n");
820 chfs_err("err: %d | size: %zu | retlen : %zu\n",
821 ret, totlen, retlen);
823 chfs_change_size_dirty(chmp, chmp->chm_nextblock, totlen);
824 if (retries) {
825 mutex_exit(&chmp->chm_lock_sizes);
826 ret = EIO;
827 goto err_out;
830 /* try again */
831 retries++;
832 mutex_exit(&chmp->chm_lock_sizes);
833 goto retry;
836 /* update vnode information */
837 mutex_exit(&chmp->chm_lock_sizes);
838 //TODO should we set free_size?
839 mutex_enter(&chmp->chm_lock_vnocache);
840 chfs_add_vnode_ref_to_vc(chmp, chvc, newnref);
841 mutex_exit(&chmp->chm_lock_vnocache);
842 ret = 0;
843 /* FALLTHROUGH */
844 err_out:
845 kmem_free(data, totlen);
846 return ret;
850 /* chfs_gcollect_live - collects a living node */
852 chfs_gcollect_live(struct chfs_mount *chmp,
853 struct chfs_eraseblock *cheb, struct chfs_node_ref *nref,
854 struct chfs_inode *ip)
856 struct chfs_node_frag *frag;
857 struct chfs_full_dnode *fn = NULL;
858 int start = 0, end = 0, nrfrags = 0;
859 struct chfs_dirent *fd = NULL;
860 int ret = 0;
861 bool is_dirent;
863 dbg_gc("gcollect_live\n");
865 if (chmp->chm_gcblock != cheb) {
866 dbg_gc("GC block is no longer gcblock. Restart.\n");
867 goto upnout;
870 if (CHFS_REF_OBSOLETE(nref)) {
871 dbg_gc("node to be GC'd was obsoleted in the meantime.\n");
872 goto upnout;
875 /* It's a vnode? */
876 if (ip->chvc->v == nref) {
877 chfs_gcollect_vnode(chmp, ip);
878 goto upnout;
881 /* Find data node. */
882 dbg_gc("find full dnode\n");
883 for(frag = frag_first(&ip->fragtree);
884 frag; frag = frag_next(&ip->fragtree, frag)) {
885 if (frag->node && frag->node->nref == nref) {
886 fn = frag->node;
887 end = frag->ofs + frag->size;
888 if (!nrfrags++)
889 start = frag->ofs;
890 if (nrfrags == frag->node->frags)
891 break;
895 /* It's a pristine node, or dnode (or hole? XXX have we hole nodes?) */
896 if (fn) {
897 if (CHFS_REF_FLAGS(nref) == CHFS_PRISTINE_NODE_MASK) {
898 ret = chfs_gcollect_pristine(chmp,
899 cheb, ip->chvc, nref);
900 if (!ret) {
901 frag->node->nref = ip->chvc->v;
903 if (ret != EBADF)
904 goto upnout;
906 ret = chfs_gcollect_dnode(chmp, cheb, ip, fn, start, end);
907 goto upnout;
910 /* Is it a dirent? */
911 dbg_gc("find full dirent\n");
912 is_dirent = false;
913 TAILQ_FOREACH(fd, &ip->dents, fds) {
914 if (fd->nref == nref) {
915 is_dirent = true;
916 break;
920 if (is_dirent && fd->vno) {
921 /* Living dirent. */
922 ret = chfs_gcollect_dirent(chmp, cheb, ip, fd);
923 } else if (is_dirent) {
924 /* Already deleted dirent. */
925 ret = chfs_gcollect_deletion_dirent(chmp, cheb, ip, fd);
926 } else {
927 dbg_gc("Nref at leb #%u offset 0x%08x wasn't in node list"
928 " for ino #%llu\n",
929 nref->nref_lnr, CHFS_GET_OFS(nref->nref_offset),
930 (unsigned long long)ip->ino);
931 if (CHFS_REF_OBSOLETE(nref)) {
932 dbg_gc("But it's obsolete so we don't mind"
933 " too much.\n");
937 upnout:
938 return ret;
941 /* chfs_gcollect_vnode - collects a vnode information node */
943 chfs_gcollect_vnode(struct chfs_mount *chmp, struct chfs_inode *ip)
945 int ret;
946 dbg_gc("gcollect_vnode\n");
948 /* Simply write the new vnode information to the flash
949 * with GC's space allocation */
950 ret = chfs_write_flash_vnode(chmp, ip, ALLOC_GC);
952 return ret;
955 /* chfs_gcollect_dirent - collects a dirent */
957 chfs_gcollect_dirent(struct chfs_mount *chmp,
958 struct chfs_eraseblock *cheb, struct chfs_inode *parent,
959 struct chfs_dirent *fd)
961 struct vnode *vnode = NULL;
962 struct chfs_inode *ip;
963 dbg_gc("gcollect_dirent\n");
965 /* Find vnode. */
966 vnode = chfs_vnode_lookup(chmp, fd->vno);
968 /* XXX maybe KASSERT or panic on this? */
969 if (vnode == NULL) {
970 return ENOENT;
973 ip = VTOI(vnode);
974 vrele(vnode);
976 /* Remove and obsolete the previous version. */
977 mutex_enter(&chmp->chm_lock_vnocache);
978 chfs_remove_and_obsolete(chmp, parent->chvc, fd->nref,
979 &parent->chvc->dirents);
980 mutex_exit(&chmp->chm_lock_vnocache);
982 /* Write the new dirent to the flash. */
983 return chfs_write_flash_dirent(chmp,
984 parent, ip, fd, fd->vno, ALLOC_GC);
988 * chfs_gcollect_deletion_dirent -
989 * collects a dirent what was marked as deleted
992 chfs_gcollect_deletion_dirent(struct chfs_mount *chmp,
993 struct chfs_eraseblock *cheb, struct chfs_inode *parent,
994 struct chfs_dirent *fd)
996 struct chfs_flash_dirent_node chfdn;
997 struct chfs_node_ref *nref;
998 size_t retlen, name_len, nref_len;
999 uint32_t name_crc;
1001 int ret;
1003 dbg_gc("gcollect_deletion_dirent\n");
1005 /* Check node. */
1006 name_len = strlen(fd->name);
1007 name_crc = crc32(0, fd->name, name_len);
1009 nref_len = chfs_nref_len(chmp, cheb, fd->nref);
1011 /* XXX This was a noop (void)chfs_vnode_lookup(chmp, fd->vno); */
1013 /* Find it in parent dirents. */
1014 for (nref = parent->chvc->dirents;
1015 nref != (void*)parent->chvc;
1016 nref = nref->nref_next) {
1018 if (!CHFS_REF_OBSOLETE(nref))
1019 continue;
1021 /* if node refs have different length, skip */
1022 if (chfs_nref_len(chmp, NULL, nref) != nref_len)
1023 continue;
1025 if (CHFS_GET_OFS(nref->nref_offset) ==
1026 CHFS_GET_OFS(fd->nref->nref_offset)) {
1027 continue;
1030 /* read it from flash */
1031 ret = chfs_read_leb(chmp,
1032 nref->nref_lnr, (void*)&chfdn, CHFS_GET_OFS(nref->nref_offset),
1033 nref_len, &retlen);
1035 if (ret) {
1036 dbg_gc("Read error: %d\n", ret);
1037 continue;
1040 if (retlen != nref_len) {
1041 dbg_gc("Error reading node:"
1042 " read: %zu insted of: %zu\n", retlen, nref_len);
1043 continue;
1046 /* if node type doesn't match, skip */
1047 if (le16toh(chfdn.type) != CHFS_NODETYPE_DIRENT)
1048 continue;
1050 /* if crc doesn't match, skip */
1051 if (le32toh(chfdn.name_crc) != name_crc)
1052 continue;
1054 /* if length of name different, or this is an another deletion
1055 * dirent, skip
1057 if (chfdn.nsize != name_len || !le64toh(chfdn.vno))
1058 continue;
1060 /* check actual name */
1061 if (memcmp(chfdn.name, fd->name, name_len))
1062 continue;
1064 mutex_enter(&chmp->chm_lock_vnocache);
1065 chfs_remove_and_obsolete(chmp, parent->chvc, fd->nref,
1066 &parent->chvc->dirents);
1067 mutex_exit(&chmp->chm_lock_vnocache);
1068 return chfs_write_flash_dirent(chmp,
1069 parent, NULL, fd, fd->vno, ALLOC_GC);
1072 /* Simply remove it from the parent dirents. */
1073 TAILQ_REMOVE(&parent->dents, fd, fds);
1074 chfs_free_dirent(fd);
1075 return 0;
1078 /* chfs_gcollect_dnode - */
1080 chfs_gcollect_dnode(struct chfs_mount *chmp,
1081 struct chfs_eraseblock *orig_cheb, struct chfs_inode *ip,
1082 struct chfs_full_dnode *fn, uint32_t orig_start, uint32_t orig_end)
1084 struct chfs_node_ref *nref;
1085 struct chfs_full_dnode *newfn;
1086 struct chfs_flash_data_node *fdnode;
1087 int ret = 0, retries = 0;
1088 uint32_t totlen;
1089 char *data = NULL;
1090 struct iovec vec;
1091 size_t retlen;
1092 dbg_gc("gcollect_dnode\n");
1094 //TODO merge frags
1096 KASSERT(orig_cheb->lnr == fn->nref->nref_lnr);
1097 totlen = chfs_nref_len(chmp, orig_cheb, fn->nref);
1098 data = kmem_alloc(totlen, KM_SLEEP);
1100 /* Read the node from the flash. */
1101 ret = chfs_read_leb(chmp, fn->nref->nref_lnr, data, fn->nref->nref_offset,
1102 totlen, &retlen);
1104 fdnode = (struct chfs_flash_data_node *)data;
1105 fdnode->version = htole64(++ip->chvc->highest_version);
1106 fdnode->node_crc = htole32(crc32(0, (uint8_t *)fdnode,
1107 sizeof(*fdnode) - 4));
1109 vec.iov_base = (void *)data;
1110 vec.iov_len = totlen;
1112 retry:
1113 /* Set the next block where we can write. */
1114 ret = chfs_reserve_space_gc(chmp, totlen);
1115 if (ret)
1116 goto out;
1118 nref = chfs_alloc_node_ref(chmp->chm_nextblock);
1119 if (!nref) {
1120 ret = ENOMEM;
1121 goto out;
1124 mutex_enter(&chmp->chm_lock_sizes);
1126 nref->nref_offset = chmp->chm_ebh->eb_size - chmp->chm_nextblock->free_size;
1127 KASSERT(nref->nref_offset % 4 == 0);
1128 chfs_change_size_free(chmp, chmp->chm_nextblock, -totlen);
1130 /* Write it to the writebuffer. */
1131 ret = chfs_write_wbuf(chmp, &vec, 1, nref->nref_offset, &retlen);
1132 if (ret || retlen != totlen) {
1133 /* error during writing */
1134 chfs_err("error while writing out to the media\n");
1135 chfs_err("err: %d | size: %d | retlen : %zu\n",
1136 ret, totlen, retlen);
1137 chfs_change_size_dirty(chmp, chmp->chm_nextblock, totlen);
1138 if (retries) {
1139 ret = EIO;
1140 mutex_exit(&chmp->chm_lock_sizes);
1141 goto out;
1144 /* try again */
1145 retries++;
1146 mutex_exit(&chmp->chm_lock_sizes);
1147 goto retry;
1150 dbg_gc("new nref lnr: %u - offset: %u\n", nref->nref_lnr, nref->nref_offset);
1152 chfs_change_size_used(chmp, &chmp->chm_blocks[nref->nref_lnr], totlen);
1153 mutex_exit(&chmp->chm_lock_sizes);
1154 KASSERT(chmp->chm_blocks[nref->nref_lnr].used_size <= chmp->chm_ebh->eb_size);
1156 /* Set fields of the new node. */
1157 newfn = chfs_alloc_full_dnode();
1158 newfn->nref = nref;
1159 newfn->ofs = fn->ofs;
1160 newfn->size = fn->size;
1161 newfn->frags = 0;
1163 mutex_enter(&chmp->chm_lock_vnocache);
1164 /* Remove every part of the old node. */
1165 chfs_remove_frags_of_node(chmp, &ip->fragtree, fn->nref);
1166 chfs_remove_and_obsolete(chmp, ip->chvc, fn->nref, &ip->chvc->dnode);
1168 /* Add the new nref to inode. */
1169 chfs_add_full_dnode_to_inode(chmp, ip, newfn);
1170 chfs_add_node_to_list(chmp,
1171 ip->chvc, newfn->nref, &ip->chvc->dnode);
1172 mutex_exit(&chmp->chm_lock_vnocache);
1174 out:
1175 kmem_free(data, totlen);
1176 return ret;