fs/logfs/segment.c

   1 /*
   2  * fs/logfs/segment.c   - Handling the Object Store
   3  *
   4  * As should be obvious for Linux kernel code, license is GPLv2
   5  *
   6  * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
   7  *
   8  * Object store or ostore makes up the complete device with exception of
   9  * the superblock and journal areas.  Apart from its own metadata it stores
  10  * three kinds of objects: inodes, dentries and blocks, both data and indirect.
  11  */
  12 #include "logfs.h"
  13 #include <linux/slab.h>
  14
  15 static int logfs_mark_segment_bad(struct super_block *sb, u32 segno)
  16 {
  17         struct logfs_super *super = logfs_super(sb);
  18         struct btree_head32 *head = &super->s_reserved_segments;
  19         int err;
  20
  21         err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
  22         if (err)
  23                 return err;
  24         logfs_super(sb)->s_bad_segments++;
  25         /* FIXME: write to journal */
  26         return 0;
  27 }
  28
  29 int logfs_erase_segment(struct super_block *sb, u32 segno, int ensure_erase)
  30 {
  31         struct logfs_super *super = logfs_super(sb);
  32
  33         super->s_gec++;
  34
  35         return super->s_devops->erase(sb, (u64)segno << super->s_segshift,
  36                         super->s_segsize, ensure_erase);
  37 }
  38
  39 static s64 logfs_get_free_bytes(struct logfs_area *area, size_t bytes)
  40 {
  41         s32 ofs;
  42
  43         logfs_open_area(area, bytes);
  44
  45         ofs = area->a_used_bytes;
  46         area->a_used_bytes += bytes;
  47         BUG_ON(area->a_used_bytes >= logfs_super(area->a_sb)->s_segsize);
  48
  49         return dev_ofs(area->a_sb, area->a_segno, ofs);
  50 }
  51
  52 static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
  53                 int use_filler)
  54 {
  55         struct logfs_super *super = logfs_super(sb);
  56         struct address_space *mapping = super->s_mapping_inode->i_mapping;
  57         filler_t *filler = super->s_devops->readpage;
  58         struct page *page;
  59
  60         BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS);
  61         if (use_filler)
  62                 page = read_cache_page(mapping, index, filler, sb);
  63         else {
  64                 page = find_or_create_page(mapping, index, GFP_NOFS);
  65                 unlock_page(page);
  66         }
  67         return page;
  68 }
  69
  70 int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
  71                 int use_filler)
  72 {
  73         pgoff_t index = ofs >> PAGE_SHIFT;
  74         struct page *page;
  75         long offset = ofs & (PAGE_SIZE-1);
  76         long copylen;
  77
  78         /* Only logfs_wbuf_recover may use len==0 */
  79         BUG_ON(!len && !use_filler);
  80         do {
  81                 copylen = min((ulong)len, PAGE_SIZE - offset);
  82
  83                 page = get_mapping_page(area->a_sb, index, use_filler);
  84                 if (IS_ERR(page))
  85                         return PTR_ERR(page);
  86                 BUG_ON(!page); /* FIXME: reserve a pool */
  87                 SetPageUptodate(page);
  88                 memcpy(page_address(page) + offset, buf, copylen);
  89
  90                 if (!PagePrivate(page)) {
  91                         SetPagePrivate(page);
  92                         page_cache_get(page);
  93                 }
  94                 page_cache_release(page);
  95
  96                 buf += copylen;
  97                 len -= copylen;
  98                 offset = 0;
  99                 index++;
 100         } while (len);
 101         return 0;
 102 }
 103
 104 static void pad_partial_page(struct logfs_area *area)
 105 {
 106         struct super_block *sb = area->a_sb;
 107         struct page *page;
 108         u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
 109         pgoff_t index = ofs >> PAGE_SHIFT;
 110         long offset = ofs & (PAGE_SIZE-1);
 111         u32 len = PAGE_SIZE - offset;
 112
 113         if (len % PAGE_SIZE) {
 114                 page = get_mapping_page(sb, index, 0);
 115                 BUG_ON(!page); /* FIXME: reserve a pool */
 116                 memset(page_address(page) + offset, 0xff, len);
 117                 if (!PagePrivate(page)) {
 118                         SetPagePrivate(page);
 119                         page_cache_get(page);
 120                 }
 121                 page_cache_release(page);
 122         }
 123 }
 124
 125 static void pad_full_pages(struct logfs_area *area)
 126 {
 127         struct super_block *sb = area->a_sb;
 128         struct logfs_super *super = logfs_super(sb);
 129         u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
 130         u32 len = super->s_segsize - area->a_used_bytes;
 131         pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT;
 132         pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT;
 133         struct page *page;
 134
 135         while (no_indizes) {
 136                 page = get_mapping_page(sb, index, 0);
 137                 BUG_ON(!page); /* FIXME: reserve a pool */
 138                 SetPageUptodate(page);
 139                 memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
 140                 if (!PagePrivate(page)) {
 141                         SetPagePrivate(page);
 142                         page_cache_get(page);
 143                 }
 144                 page_cache_release(page);
 145                 index++;
 146                 no_indizes--;
 147         }
 148 }
 149
 150 /*
 151  * bdev_writeseg will write full pages.  Memset the tail to prevent data leaks.
 152  * Also make sure we allocate (and memset) all pages for final writeout.
 153  */
 154 static void pad_wbuf(struct logfs_area *area, int final)
 155 {
 156         pad_partial_page(area);
 157         if (final)
 158                 pad_full_pages(area);
 159 }
 160
 161 /*
 162  * We have to be careful with the alias tree.  Since lookup is done by bix,
 163  * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with
 164  * indirect blocks.  So always use it through accessor functions.
 165  */
 166 static void *alias_tree_lookup(struct super_block *sb, u64 ino, u64 bix,
 167                 level_t level)
 168 {
 169         struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
 170         pgoff_t index = logfs_pack_index(bix, level);
 171
 172         return btree_lookup128(head, ino, index);
 173 }
 174
 175 static int alias_tree_insert(struct super_block *sb, u64 ino, u64 bix,
 176                 level_t level, void *val)
 177 {
 178         struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
 179         pgoff_t index = logfs_pack_index(bix, level);
 180
 181         return btree_insert128(head, ino, index, val, GFP_NOFS);
 182 }
 183
 184 static int btree_write_alias(struct super_block *sb, struct logfs_block *block,
 185                 write_alias_t *write_one_alias)
 186 {
 187         struct object_alias_item *item;
 188         int err;
 189
 190         list_for_each_entry(item, &block->item_list, list) {
 191                 err = write_alias_journal(sb, block->ino, block->bix,
 192                                 block->level, item->child_no, item->val);
 193                 if (err)
 194                         return err;
 195         }
 196         return 0;
 197 }
 198
 199 static struct logfs_block_ops btree_block_ops = {
 200         .write_block    = btree_write_block,
 201         .free_block     = __free_block,
 202         .write_alias    = btree_write_alias,
 203 };
 204
 205 int logfs_load_object_aliases(struct super_block *sb,
 206                 struct logfs_obj_alias *oa, int count)
 207 {
 208         struct logfs_super *super = logfs_super(sb);
 209         struct logfs_block *block;
 210         struct object_alias_item *item;
 211         u64 ino, bix;
 212         level_t level;
 213         int i, err;
 214
 215         super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
 216         count /= sizeof(*oa);
 217         for (i = 0; i < count; i++) {
 218                 item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
 219                 if (!item)
 220                         return -ENOMEM;
 221                 memset(item, 0, sizeof(*item));
 222
 223                 super->s_no_object_aliases++;
 224                 item->val = oa[i].val;
 225                 item->child_no = be16_to_cpu(oa[i].child_no);
 226
 227                 ino = be64_to_cpu(oa[i].ino);
 228                 bix = be64_to_cpu(oa[i].bix);
 229                 level = LEVEL(oa[i].level);
 230
 231                 log_aliases("logfs_load_object_aliases(%llx, %llx, %x, %x) %llx\n",
 232                                 ino, bix, level, item->child_no,
 233                                 be64_to_cpu(item->val));
 234                 block = alias_tree_lookup(sb, ino, bix, level);
 235                 if (!block) {
 236                         block = __alloc_block(sb, ino, bix, level);
 237                         block->ops = &btree_block_ops;
 238                         err = alias_tree_insert(sb, ino, bix, level, block);
 239                         BUG_ON(err); /* mempool empty */
 240                 }
 241                 if (test_and_set_bit(item->child_no, block->alias_map)) {
 242                         printk(KERN_ERR"LogFS: Alias collision detected\n");
 243                         return -EIO;
 244                 }
 245                 list_move_tail(&block->alias_list, &super->s_object_alias);
 246                 list_add(&item->list, &block->item_list);
 247         }
 248         return 0;
 249 }
 250
 251 static void kill_alias(void *_block, unsigned long ignore0,
 252                 u64 ignore1, u64 ignore2, size_t ignore3)
 253 {
 254         struct logfs_block *block = _block;
 255         struct super_block *sb = block->sb;
 256         struct logfs_super *super = logfs_super(sb);
 257         struct object_alias_item *item;
 258
 259         while (!list_empty(&block->item_list)) {
 260                 item = list_entry(block->item_list.next, typeof(*item), list);
 261                 list_del(&item->list);
 262                 mempool_free(item, super->s_alias_pool);
 263         }
 264         block->ops->free_block(sb, block);
 265 }
 266
 267 static int obj_type(struct inode *inode, level_t level)
 268 {
 269         if (level == 0) {
 270                 if (S_ISDIR(inode->i_mode))
 271                         return OBJ_DENTRY;
 272                 if (inode->i_ino == LOGFS_INO_MASTER)
 273                         return OBJ_INODE;
 274         }
 275         return OBJ_BLOCK;
 276 }
 277
 278 static int obj_len(struct super_block *sb, int obj_type)
 279 {
 280         switch (obj_type) {
 281         case OBJ_DENTRY:
 282                 return sizeof(struct logfs_disk_dentry);
 283         case OBJ_INODE:
 284                 return sizeof(struct logfs_disk_inode);
 285         case OBJ_BLOCK:
 286                 return sb->s_blocksize;
 287         default:
 288                 BUG();
 289         }
 290 }
 291
 292 static int __logfs_segment_write(struct inode *inode, void *buf,
 293                 struct logfs_shadow *shadow, int type, int len, int compr)
 294 {
 295         struct logfs_area *area;
 296         struct super_block *sb = inode->i_sb;
 297         s64 ofs;
 298         struct logfs_object_header h;
 299         int acc_len;
 300
 301         if (shadow->gc_level == 0)
 302                 acc_len = len;
 303         else
 304                 acc_len = obj_len(sb, type);
 305
 306         area = get_area(sb, shadow->gc_level);
 307         ofs = logfs_get_free_bytes(area, len + LOGFS_OBJECT_HEADERSIZE);
 308         LOGFS_BUG_ON(ofs <= 0, sb);
 309         /*
 310          * Order is important.  logfs_get_free_bytes(), by modifying the
 311          * segment file, may modify the content of the very page we're about
 312          * to write now.  Which is fine, as long as the calculated crc and
 313          * written data still match.  So do the modifications _before_
 314          * calculating the crc.
 315          */
 316
 317         h.len   = cpu_to_be16(len);
 318         h.type  = type;
 319         h.compr = compr;
 320         h.ino   = cpu_to_be64(inode->i_ino);
 321         h.bix   = cpu_to_be64(shadow->bix);
 322         h.crc   = logfs_crc32(&h, sizeof(h) - 4, 4);
 323         h.data_crc = logfs_crc32(buf, len, 0);
 324
 325         logfs_buf_write(area, ofs, &h, sizeof(h));
 326         logfs_buf_write(area, ofs + LOGFS_OBJECT_HEADERSIZE, buf, len);
 327
 328         shadow->new_ofs = ofs;
 329         shadow->new_len = acc_len + LOGFS_OBJECT_HEADERSIZE;
 330
 331         return 0;
 332 }
 333
 334 static s64 logfs_segment_write_compress(struct inode *inode, void *buf,
 335                 struct logfs_shadow *shadow, int type, int len)
 336 {
 337         struct super_block *sb = inode->i_sb;
 338         void *compressor_buf = logfs_super(sb)->s_compressed_je;
 339         ssize_t compr_len;
 340         int ret;
 341
 342         mutex_lock(&logfs_super(sb)->s_journal_mutex);
 343         compr_len = logfs_compress(buf, compressor_buf, len, len);
 344
 345         if (compr_len >= 0) {
 346                 ret = __logfs_segment_write(inode, compressor_buf, shadow,
 347                                 type, compr_len, COMPR_ZLIB);
 348         } else {
 349                 ret = __logfs_segment_write(inode, buf, shadow, type, len,
 350                                 COMPR_NONE);
 351         }
 352         mutex_unlock(&logfs_super(sb)->s_journal_mutex);
 353         return ret;
 354 }
 355
 356 /**
 357  * logfs_segment_write - write data block to object store
 358  * @inode:              inode containing data
 359  *
 360  * Returns an errno or zero.
 361  */
 362 int logfs_segment_write(struct inode *inode, struct page *page,
 363                 struct logfs_shadow *shadow)
 364 {
 365         struct super_block *sb = inode->i_sb;
 366         struct logfs_super *super = logfs_super(sb);
 367         int do_compress, type, len;
 368         int ret;
 369         void *buf;
 370
 371         super->s_flags |= LOGFS_SB_FLAG_DIRTY;
 372         BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
 373         do_compress = logfs_inode(inode)->li_flags & LOGFS_IF_COMPRESSED;
 374         if (shadow->gc_level != 0) {
 375                 /* temporarily disable compression for indirect blocks */
 376                 do_compress = 0;
 377         }
 378
 379         type = obj_type(inode, shrink_level(shadow->gc_level));
 380         len = obj_len(sb, type);
 381         buf = kmap(page);
 382         if (do_compress)
 383                 ret = logfs_segment_write_compress(inode, buf, shadow, type,
 384                                 len);
 385         else
 386                 ret = __logfs_segment_write(inode, buf, shadow, type, len,
 387                                 COMPR_NONE);
 388         kunmap(page);
 389
 390         log_segment("logfs_segment_write(%llx, %llx, %x) %llx->%llx %x->%x\n",
 391                         shadow->ino, shadow->bix, shadow->gc_level,
 392                         shadow->old_ofs, shadow->new_ofs,
 393                         shadow->old_len, shadow->new_len);
 394         /* this BUG_ON did catch a locking bug.  useful */
 395         BUG_ON(!(shadow->new_ofs & (super->s_segsize - 1)));
 396         return ret;
 397 }
 398
 399 int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf)
 400 {
 401         pgoff_t index = ofs >> PAGE_SHIFT;
 402         struct page *page;
 403         long offset = ofs & (PAGE_SIZE-1);
 404         long copylen;
 405
 406         while (len) {
 407                 copylen = min((ulong)len, PAGE_SIZE - offset);
 408
 409                 page = get_mapping_page(sb, index, 1);
 410                 if (IS_ERR(page))
 411                         return PTR_ERR(page);
 412                 memcpy(buf, page_address(page) + offset, copylen);
 413                 page_cache_release(page);
 414
 415                 buf += copylen;
 416                 len -= copylen;
 417                 offset = 0;
 418                 index++;
 419         }
 420         return 0;
 421 }
 422
 423 /*
 424  * The "position" of indirect blocks is ambiguous.  It can be the position
 425  * of any data block somewhere behind this indirect block.  So we need to
 426  * normalize the positions through logfs_block_mask() before comparing.
 427  */
 428 static int check_pos(struct super_block *sb, u64 pos1, u64 pos2, level_t level)
 429 {
 430         return  (pos1 & logfs_block_mask(sb, level)) !=
 431                 (pos2 & logfs_block_mask(sb, level));
 432 }
 433
 434 #if 0
 435 static int read_seg_header(struct super_block *sb, u64 ofs,
 436                 struct logfs_segment_header *sh)
 437 {
 438         __be32 crc;
 439         int err;
 440
 441         err = wbuf_read(sb, ofs, sizeof(*sh), sh);
 442         if (err)
 443                 return err;
 444         crc = logfs_crc32(sh, sizeof(*sh), 4);
 445         if (crc != sh->crc) {
 446                 printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
 447                                 "got %x\n", ofs, be32_to_cpu(sh->crc),
 448                                 be32_to_cpu(crc));
 449                 return -EIO;
 450         }
 451         return 0;
 452 }
 453 #endif
 454
 455 static int read_obj_header(struct super_block *sb, u64 ofs,
 456                 struct logfs_object_header *oh)
 457 {
 458         __be32 crc;
 459         int err;
 460
 461         err = wbuf_read(sb, ofs, sizeof(*oh), oh);
 462         if (err)
 463                 return err;
 464         crc = logfs_crc32(oh, sizeof(*oh) - 4, 4);
 465         if (crc != oh->crc) {
 466                 printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
 467                                 "got %x\n", ofs, be32_to_cpu(oh->crc),
 468                                 be32_to_cpu(crc));
 469                 return -EIO;
 470         }
 471         return 0;
 472 }
 473
 474 static void move_btree_to_page(struct inode *inode, struct page *page,
 475                 __be64 *data)
 476 {
 477         struct super_block *sb = inode->i_sb;
 478         struct logfs_super *super = logfs_super(sb);
 479         struct btree_head128 *head = &super->s_object_alias_tree;
 480         struct logfs_block *block;
 481         struct object_alias_item *item, *next;
 482
 483         if (!(super->s_flags & LOGFS_SB_FLAG_OBJ_ALIAS))
 484                 return;
 485
 486         block = btree_remove128(head, inode->i_ino, page->index);
 487         if (!block)
 488                 return;
 489
 490         log_blockmove("move_btree_to_page(%llx, %llx, %x)\n",
 491                         block->ino, block->bix, block->level);
 492         list_for_each_entry_safe(item, next, &block->item_list, list) {
 493                 data[item->child_no] = item->val;
 494                 list_del(&item->list);
 495                 mempool_free(item, super->s_alias_pool);
 496         }
 497         block->page = page;
 498
 499         if (!PagePrivate(page)) {
 500                 SetPagePrivate(page);
 501                 page_cache_get(page);
 502                 set_page_private(page, (unsigned long) block);
 503         }
 504         block->ops = &indirect_block_ops;
 505         initialize_block_counters(page, block, data, 0);
 506 }
 507
 508 /*
 509  * This silences a false, yet annoying gcc warning.  I hate it when my editor
 510  * jumps into bitops.h each time I recompile this file.
 511  * TODO: Complain to gcc folks about this and upgrade compiler.
 512  */
 513 static unsigned long fnb(const unsigned long *addr,
 514                 unsigned long size, unsigned long offset)
 515 {
 516         return find_next_bit(addr, size, offset);
 517 }
 518
 519 void move_page_to_btree(struct page *page)
 520 {
 521         struct logfs_block *block = logfs_block(page);
 522         struct super_block *sb = block->sb;
 523         struct logfs_super *super = logfs_super(sb);
 524         struct object_alias_item *item;
 525         unsigned long pos;
 526         __be64 *child;
 527         int err;
 528
 529         if (super->s_flags & LOGFS_SB_FLAG_SHUTDOWN) {
 530                 block->ops->free_block(sb, block);
 531                 return;
 532         }
 533         log_blockmove("move_page_to_btree(%llx, %llx, %x)\n",
 534                         block->ino, block->bix, block->level);
 535         super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
 536
 537         for (pos = 0; ; pos++) {
 538                 pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos);
 539                 if (pos >= LOGFS_BLOCK_FACTOR)
 540                         break;
 541
 542                 item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
 543                 BUG_ON(!item); /* mempool empty */
 544                 memset(item, 0, sizeof(*item));
 545
 546                 child = kmap_atomic(page, KM_USER0);
 547                 item->val = child[pos];
 548                 kunmap_atomic(child, KM_USER0);
 549                 item->child_no = pos;
 550                 list_add(&item->list, &block->item_list);
 551         }
 552         block->page = NULL;
 553
 554         if (PagePrivate(page)) {
 555                 ClearPagePrivate(page);
 556                 page_cache_release(page);
 557                 set_page_private(page, 0);
 558         }
 559         block->ops = &btree_block_ops;
 560         err = alias_tree_insert(block->sb, block->ino, block->bix, block->level,
 561                         block);
 562         BUG_ON(err); /* mempool empty */
 563         ClearPageUptodate(page);
 564 }
 565
 566 static int __logfs_segment_read(struct inode *inode, void *buf,
 567                 u64 ofs, u64 bix, level_t level)
 568 {
 569         struct super_block *sb = inode->i_sb;
 570         void *compressor_buf = logfs_super(sb)->s_compressed_je;
 571         struct logfs_object_header oh;
 572         __be32 crc;
 573         u16 len;
 574         int err, block_len;
 575
 576         block_len = obj_len(sb, obj_type(inode, level));
 577         err = read_obj_header(sb, ofs, &oh);
 578         if (err)
 579                 goto out_err;
 580
 581         err = -EIO;
 582         if (be64_to_cpu(oh.ino) != inode->i_ino
 583                         || check_pos(sb, be64_to_cpu(oh.bix), bix, level)) {
 584                 printk(KERN_ERR"LOGFS: (ino, bix) don't match at %llx: "
 585                                 "expected (%lx, %llx), got (%llx, %llx)\n",
 586                                 ofs, inode->i_ino, bix,
 587                                 be64_to_cpu(oh.ino), be64_to_cpu(oh.bix));
 588                 goto out_err;
 589         }
 590
 591         len = be16_to_cpu(oh.len);
 592
 593         switch (oh.compr) {
 594         case COMPR_NONE:
 595                 err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, buf);
 596                 if (err)
 597                         goto out_err;
 598                 crc = logfs_crc32(buf, len, 0);
 599                 if (crc != oh.data_crc) {
 600                         printk(KERN_ERR"LOGFS: uncompressed data crc error at "
 601                                         "%llx: expected %x, got %x\n", ofs,
 602                                         be32_to_cpu(oh.data_crc),
 603                                         be32_to_cpu(crc));
 604                         goto out_err;
 605                 }
 606                 break;
 607         case COMPR_ZLIB:
 608                 mutex_lock(&logfs_super(sb)->s_journal_mutex);
 609                 err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len,
 610                                 compressor_buf);
 611                 if (err) {
 612                         mutex_unlock(&logfs_super(sb)->s_journal_mutex);
 613                         goto out_err;
 614                 }
 615                 crc = logfs_crc32(compressor_buf, len, 0);
 616                 if (crc != oh.data_crc) {
 617                         printk(KERN_ERR"LOGFS: compressed data crc error at "
 618                                         "%llx: expected %x, got %x\n", ofs,
 619                                         be32_to_cpu(oh.data_crc),
 620                                         be32_to_cpu(crc));
 621                         mutex_unlock(&logfs_super(sb)->s_journal_mutex);
 622                         goto out_err;
 623                 }
 624                 err = logfs_uncompress(compressor_buf, buf, len, block_len);
 625                 mutex_unlock(&logfs_super(sb)->s_journal_mutex);
 626                 if (err) {
 627                         printk(KERN_ERR"LOGFS: uncompress error at %llx\n", ofs);
 628                         goto out_err;
 629                 }
 630                 break;
 631         default:
 632                 LOGFS_BUG(sb);
 633                 err = -EIO;
 634                 goto out_err;
 635         }
 636         return 0;
 637
 638 out_err:
 639         logfs_set_ro(sb);
 640         printk(KERN_ERR"LOGFS: device is read-only now\n");
 641         LOGFS_BUG(sb);
 642         return err;
 643 }
 644
 645 /**
 646  * logfs_segment_read - read data block from object store
 647  * @inode:              inode containing data
 648  * @buf:                data buffer
 649  * @ofs:                physical data offset
 650  * @bix:                block index
 651  * @level:              block level
 652  *
 653  * Returns 0 on success or a negative errno.
 654  */
 655 int logfs_segment_read(struct inode *inode, struct page *page,
 656                 u64 ofs, u64 bix, level_t level)
 657 {
 658         int err;
 659         void *buf;
 660
 661         if (PageUptodate(page))
 662                 return 0;
 663
 664         ofs &= ~LOGFS_FULLY_POPULATED;
 665
 666         buf = kmap(page);
 667         err = __logfs_segment_read(inode, buf, ofs, bix, level);
 668         if (!err) {
 669                 move_btree_to_page(inode, page, buf);
 670                 SetPageUptodate(page);
 671         }
 672         kunmap(page);
 673         log_segment("logfs_segment_read(%lx, %llx, %x) %llx (%d)\n",
 674                         inode->i_ino, bix, level, ofs, err);
 675         return err;
 676 }
 677
 678 int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow)
 679 {
 680         struct super_block *sb = inode->i_sb;
 681         struct logfs_super *super = logfs_super(sb);
 682         struct logfs_object_header h;
 683         u16 len;
 684         int err;
 685
 686         super->s_flags |= LOGFS_SB_FLAG_DIRTY;
 687         BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
 688         BUG_ON(shadow->old_ofs & LOGFS_FULLY_POPULATED);
 689         if (!shadow->old_ofs)
 690                 return 0;
 691
 692         log_segment("logfs_segment_delete(%llx, %llx, %x) %llx->%llx %x->%x\n",
 693                         shadow->ino, shadow->bix, shadow->gc_level,
 694                         shadow->old_ofs, shadow->new_ofs,
 695                         shadow->old_len, shadow->new_len);
 696         err = read_obj_header(sb, shadow->old_ofs, &h);
 697         LOGFS_BUG_ON(err, sb);
 698         LOGFS_BUG_ON(be64_to_cpu(h.ino) != inode->i_ino, sb);
 699         LOGFS_BUG_ON(check_pos(sb, shadow->bix, be64_to_cpu(h.bix),
 700                                 shrink_level(shadow->gc_level)), sb);
 701
 702         if (shadow->gc_level == 0)
 703                 len = be16_to_cpu(h.len);
 704         else
 705                 len = obj_len(sb, h.type);
 706         shadow->old_len = len + sizeof(h);
 707         return 0;
 708 }
 709
 710 void freeseg(struct super_block *sb, u32 segno)
 711 {
 712         struct logfs_super *super = logfs_super(sb);
 713         struct address_space *mapping = super->s_mapping_inode->i_mapping;
 714         struct page *page;
 715         u64 ofs, start, end;
 716
 717         start = dev_ofs(sb, segno, 0);
 718         end = dev_ofs(sb, segno + 1, 0);
 719         for (ofs = start; ofs < end; ofs += PAGE_SIZE) {
 720                 page = find_get_page(mapping, ofs >> PAGE_SHIFT);
 721                 if (!page)
 722                         continue;
 723                 if (PagePrivate(page)) {
 724                         ClearPagePrivate(page);
 725                         page_cache_release(page);
 726                 }
 727                 page_cache_release(page);
 728         }
 729 }
 730
 731 int logfs_open_area(struct logfs_area *area, size_t bytes)
 732 {
 733         struct super_block *sb = area->a_sb;
 734         struct logfs_super *super = logfs_super(sb);
 735         int err, closed = 0;
 736
 737         if (area->a_is_open && area->a_used_bytes + bytes <= super->s_segsize)
 738                 return 0;
 739
 740         if (area->a_is_open) {
 741                 u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
 742                 u32 len = super->s_segsize - area->a_written_bytes;
 743
 744                 log_gc("logfs_close_area(%x)\n", area->a_segno);
 745                 pad_wbuf(area, 1);
 746                 super->s_devops->writeseg(area->a_sb, ofs, len);
 747                 freeseg(sb, area->a_segno);
 748                 closed = 1;
 749         }
 750
 751         area->a_used_bytes = 0;
 752         area->a_written_bytes = 0;
 753 again:
 754         area->a_ops->get_free_segment(area);
 755         area->a_ops->get_erase_count(area);
 756
 757         log_gc("logfs_open_area(%x, %x)\n", area->a_segno, area->a_level);
 758         err = area->a_ops->erase_segment(area);
 759         if (err) {
 760                 printk(KERN_WARNING "LogFS: Error erasing segment %x\n",
 761                                 area->a_segno);
 762                 logfs_mark_segment_bad(sb, area->a_segno);
 763                 goto again;
 764         }
 765         area->a_is_open = 1;
 766         return closed;
 767 }
 768
 769 void logfs_sync_area(struct logfs_area *area)
 770 {
 771         struct super_block *sb = area->a_sb;
 772         struct logfs_super *super = logfs_super(sb);
 773         u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
 774         u32 len = (area->a_used_bytes - area->a_written_bytes);
 775
 776         if (super->s_writesize)
 777                 len &= ~(super->s_writesize - 1);
 778         if (len == 0)
 779                 return;
 780         pad_wbuf(area, 0);
 781         super->s_devops->writeseg(sb, ofs, len);
 782         area->a_written_bytes += len;
 783 }
 784
 785 void logfs_sync_segments(struct super_block *sb)
 786 {
 787         struct logfs_super *super = logfs_super(sb);
 788         int i;
 789
 790         for_each_area(i)
 791                 logfs_sync_area(super->s_area[i]);
 792 }
 793
 794 /*
 795  * Pick a free segment to be used for this area.  Effectively takes a
 796  * candidate from the free list (not really a candidate anymore).
 797  */
 798 static void ostore_get_free_segment(struct logfs_area *area)
 799 {
 800         struct super_block *sb = area->a_sb;
 801         struct logfs_super *super = logfs_super(sb);
 802
 803         if (super->s_free_list.count == 0) {
 804                 printk(KERN_ERR"LOGFS: ran out of free segments\n");
 805                 LOGFS_BUG(sb);
 806         }
 807
 808         area->a_segno = get_best_cand(sb, &super->s_free_list, NULL);
 809 }
 810
 811 static void ostore_get_erase_count(struct logfs_area *area)
 812 {
 813         struct logfs_segment_entry se;
 814         u32 ec_level;
 815
 816         logfs_get_segment_entry(area->a_sb, area->a_segno, &se);
 817         BUG_ON(se.ec_level == cpu_to_be32(BADSEG) ||
 818                         se.valid == cpu_to_be32(RESERVED));
 819
 820         ec_level = be32_to_cpu(se.ec_level);
 821         area->a_erase_count = (ec_level >> 4) + 1;
 822 }
 823
 824 static int ostore_erase_segment(struct logfs_area *area)
 825 {
 826         struct super_block *sb = area->a_sb;
 827         struct logfs_segment_header sh;
 828         u64 ofs;
 829         int err;
 830
 831         err = logfs_erase_segment(sb, area->a_segno, 0);
 832         if (err)
 833                 return err;
 834
 835         sh.pad = 0;
 836         sh.type = SEG_OSTORE;
 837         sh.level = (__force u8)area->a_level;
 838         sh.segno = cpu_to_be32(area->a_segno);
 839         sh.ec = cpu_to_be32(area->a_erase_count);
 840         sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
 841         sh.crc = logfs_crc32(&sh, sizeof(sh), 4);
 842
 843         logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count,
 844                         area->a_level);
 845
 846         ofs = dev_ofs(sb, area->a_segno, 0);
 847         area->a_used_bytes = sizeof(sh);
 848         logfs_buf_write(area, ofs, &sh, sizeof(sh));
 849         return 0;
 850 }
 851
 852 static const struct logfs_area_ops ostore_area_ops = {
 853         .get_free_segment       = ostore_get_free_segment,
 854         .get_erase_count        = ostore_get_erase_count,
 855         .erase_segment          = ostore_erase_segment,
 856 };
 857
 858 static void free_area(struct logfs_area *area)
 859 {
 860         if (area)
 861                 freeseg(area->a_sb, area->a_segno);
 862         kfree(area);
 863 }
 864
 865 void free_areas(struct super_block *sb)
 866 {
 867         struct logfs_super *super = logfs_super(sb);
 868         int i;
 869
 870         for_each_area(i)
 871                 free_area(super->s_area[i]);
 872         free_area(super->s_journal_area);
 873 }
 874
 875 static struct logfs_area *alloc_area(struct super_block *sb)
 876 {
 877         struct logfs_area *area;
 878
 879         area = kzalloc(sizeof(*area), GFP_KERNEL);
 880         if (!area)
 881                 return NULL;
 882
 883         area->a_sb = sb;
 884         return area;
 885 }
 886
 887 static void map_invalidatepage(struct page *page, unsigned long l)
 888 {
 889         BUG();
 890 }
 891
 892 static int map_releasepage(struct page *page, gfp_t g)
 893 {
 894         /* Don't release these pages */
 895         return 0;
 896 }
 897
 898 static const struct address_space_operations mapping_aops = {
 899         .invalidatepage = map_invalidatepage,
 900         .releasepage    = map_releasepage,
 901         .set_page_dirty = __set_page_dirty_nobuffers,
 902 };
 903
 904 int logfs_init_mapping(struct super_block *sb)
 905 {
 906         struct logfs_super *super = logfs_super(sb);
 907         struct address_space *mapping;
 908         struct inode *inode;
 909
 910         inode = logfs_new_meta_inode(sb, LOGFS_INO_MAPPING);
 911         if (IS_ERR(inode))
 912                 return PTR_ERR(inode);
 913         super->s_mapping_inode = inode;
 914         mapping = inode->i_mapping;
 915         mapping->a_ops = &mapping_aops;
 916         /* Would it be possible to use __GFP_HIGHMEM as well? */
 917         mapping_set_gfp_mask(mapping, GFP_NOFS);
 918         return 0;
 919 }
 920
 921 int logfs_init_areas(struct super_block *sb)
 922 {
 923         struct logfs_super *super = logfs_super(sb);
 924         int i = -1;
 925
 926         super->s_alias_pool = mempool_create_kmalloc_pool(600,
 927                         sizeof(struct object_alias_item));
 928         if (!super->s_alias_pool)
 929                 return -ENOMEM;
 930
 931         super->s_journal_area = alloc_area(sb);
 932         if (!super->s_journal_area)
 933                 goto err;
 934
 935         for_each_area(i) {
 936                 super->s_area[i] = alloc_area(sb);
 937                 if (!super->s_area[i])
 938                         goto err;
 939                 super->s_area[i]->a_level = GC_LEVEL(i);
 940                 super->s_area[i]->a_ops = &ostore_area_ops;
 941         }
 942         btree_init_mempool128(&super->s_object_alias_tree,
 943                         super->s_btree_pool);
 944         return 0;
 945
 946 err:
 947         for (i--; i >= 0; i--)
 948                 free_area(super->s_area[i]);
 949         free_area(super->s_journal_area);
 950         logfs_mempool_destroy(super->s_alias_pool);
 951         return -ENOMEM;
 952 }
 953
 954 void logfs_cleanup_areas(struct super_block *sb)
 955 {
 956         struct logfs_super *super = logfs_super(sb);
 957
 958         btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias);
 959 }