Use dentry_path() to create full path to inode object
[pohmelfs.git] / fs / pohmelfs / inode.c
blob3f7c623a6015fea4bf74c7044ec83d027018f9aa
1 /*
2 * Copyright (C) 2011+ Evgeniy Polyakov <zbr@ioremap.net>
3 */
5 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7 #include <linux/buffer_head.h>
8 #include <linux/cred.h>
9 #include <linux/fiemap.h>
10 #include <linux/fs.h>
11 #include <linux/fs_struct.h>
12 #include <linux/mpage.h>
13 #include <linux/mount.h>
14 #include <linux/mm.h>
15 #include <linux/namei.h>
16 #include <linux/pagevec.h>
17 #include <linux/pagemap.h>
18 #include <linux/random.h>
19 #include <linux/scatterlist.h>
20 #include <linux/slab.h>
21 #include <linux/time.h>
22 #include <linux/writeback.h>
24 #include "pohmelfs.h"
26 char *pohmelfs_dump_id_len_raw(const unsigned char *id, unsigned int len, char *dst)
28 unsigned int i;
30 if (len > SHA512_DIGEST_SIZE)
31 len = SHA512_DIGEST_SIZE;
33 for (i=0; i<len; ++i)
34 sprintf(&dst[2*i], "%02x", id[i]);
35 return dst;
38 #define pohmelfs_dump_len 6
39 typedef struct {
40 char id_str[pohmelfs_dump_len * 2 + 1];
41 } pohmelfs_dump_t;
42 static DEFINE_PER_CPU(pohmelfs_dump_t, pohmelfs_dump_per_cpu);
44 char *pohmelfs_dump_id(const unsigned char *id)
46 pohmelfs_dump_t *ptr;
48 ptr = &get_cpu_var(pohmelfs_dump_per_cpu);
49 pohmelfs_dump_id_len_raw(id, pohmelfs_dump_len, ptr->id_str);
50 put_cpu_var(ptr);
52 return ptr->id_str;
55 #define dnet_raw_id_scratch 6
56 typedef struct {
57 unsigned long rand;
58 struct timespec ts;
59 } dnet_raw_id_scratch_t;
60 static DEFINE_PER_CPU(dnet_raw_id_scratch_t, dnet_raw_id_scratch_per_cpu);
62 static int pohmelfs_gen_id(struct pohmelfs_sb *psb, struct dnet_raw_id *id)
64 dnet_raw_id_scratch_t *sc;
65 int err;
66 long rand;
68 get_random_bytes(&rand, sizeof(sc->rand));
70 sc = &get_cpu_var(dnet_raw_id_scratch_per_cpu);
71 sc->rand ^= rand;
72 sc->ts = CURRENT_TIME;
74 err = pohmelfs_hash(psb, sc, sizeof(dnet_raw_id_scratch_t), id);
75 put_cpu_var(sc);
77 return err;
81 * Create path from root for given inode.
82 * Path is formed as set of stuctures, containing name of the object
83 * and its inode data (mode, permissions and so on).
85 static int pohmelfs_construct_path_string(struct pohmelfs_inode *pi, char *data, int len)
87 struct dentry *d;
88 char *ptr;
89 int err;
91 d = d_find_alias(&pi->vfs_inode);
92 if (!d) {
93 err = -ENOENT;
94 goto err_out_exit;
97 ptr = dentry_path_raw(d, data, len);
98 if (IS_ERR(ptr)) {
99 err = PTR_ERR(ptr);
100 goto err_out_put;
103 err = ptr - data - 1; /* not including 0-byte */
105 pr_debug("dname: '%s', len: %u, maxlen: %u, name: '%s', strlen: %d\n",
106 d->d_name.name, d->d_name.len, len, data, err);
108 err_out_put:
109 dput(d);
110 err_out_exit:
111 return err;
114 int pohmelfs_http_compat_id(struct pohmelfs_inode *pi)
116 struct pohmelfs_sb *psb = pohmelfs_sb(pi->vfs_inode.i_sb);
117 struct timespec ts = CURRENT_TIME;
118 int idx = ts.tv_nsec % psb->http_compat;
119 struct pohmelfs_path *p = &psb->path[idx];
120 int err;
122 mutex_lock(&p->lock);
123 err = pohmelfs_construct_path_string(pi, p->data, PAGE_SIZE);
124 if (err > 0) {
125 pohmelfs_hash(psb, p->data, err, &pi->id);
127 mutex_unlock(&p->lock);
129 return err;
132 static int pohmelfs_sb_inode_insert(struct pohmelfs_sb *psb, struct pohmelfs_inode *pi)
134 struct rb_node **n = &psb->inode_root.rb_node, *parent = NULL;
135 struct pohmelfs_inode *tmp;
136 int cmp, err = 0;
138 spin_lock(&psb->inode_lock);
139 while (*n) {
140 parent = *n;
142 tmp = rb_entry(parent, struct pohmelfs_inode, node);
144 cmp = dnet_id_cmp_str(tmp->id.id, pi->id.id);
145 if (cmp < 0)
146 n = &parent->rb_left;
147 else if (cmp > 0)
148 n = &parent->rb_right;
149 else {
150 err = -EEXIST;
151 goto err_out_unlock;
155 rb_link_node(&pi->node, parent, n);
156 rb_insert_color(&pi->node, &psb->inode_root);
158 err_out_unlock:
159 spin_unlock(&psb->inode_lock);
161 return err;
164 struct pohmelfs_inode *pohmelfs_sb_inode_lookup(struct pohmelfs_sb *psb, struct dnet_raw_id *id)
166 struct rb_node *n = psb->inode_root.rb_node;
167 struct pohmelfs_inode *pi, *found = NULL;
168 int cmp;
170 spin_lock(&psb->inode_lock);
171 while (n) {
172 pi = rb_entry(n, struct pohmelfs_inode, node);
174 cmp = dnet_id_cmp_str(pi->id.id, id->id);
175 if (cmp < 0) {
176 n = n->rb_left;
177 } else if (cmp > 0)
178 n = n->rb_right;
179 else {
180 found = pi;
181 break;
184 if (found) {
185 if (!igrab(&found->vfs_inode))
186 found = NULL;
188 spin_unlock(&psb->inode_lock);
190 return found;
193 struct inode *pohmelfs_alloc_inode(struct super_block *sb)
195 struct pohmelfs_inode *pi;
197 pi = kmem_cache_zalloc(pohmelfs_inode_cache, GFP_NOIO);
198 if (!pi)
199 goto err_out_exit;
201 inode_init_once(&pi->vfs_inode);
203 rb_init_node(&pi->node);
204 mutex_init(&pi->lock);
206 return &pi->vfs_inode;
208 err_out_exit:
209 return NULL;
212 void pohmelfs_destroy_inode(struct inode *inode)
214 struct pohmelfs_inode *pi = pohmelfs_inode(inode);
216 pr_debug("%s: ino: %ld, dirty: %lx\n",
217 pohmelfs_dump_id(pi->id.id), inode->i_ino,
218 inode->i_state & I_DIRTY);
220 kfree(pi->groups);
221 kmem_cache_free(pohmelfs_inode_cache, pi);
224 int pohmelfs_hash(struct pohmelfs_sb *psb, const void *data, const size_t size, struct dnet_raw_id *id)
226 struct scatterlist sg;
227 struct hash_desc desc;
229 sg_init_table(&sg, 1);
230 sg_set_buf(&sg, data, size);
232 desc.tfm = psb->hash;
233 desc.flags = 0;
235 return crypto_hash_digest(&desc, &sg, size, id->id);
238 struct pohmelfs_readpages_priv {
239 struct pohmelfs_wait wait;
240 struct kref refcnt;
241 int page_num, page_index;
242 struct page *pages[0];
245 static void pohmelfs_readpages_free(struct kref *kref)
247 struct pohmelfs_readpages_priv *rp = container_of(kref, struct pohmelfs_readpages_priv, refcnt);
248 struct pohmelfs_inode *pi = rp->wait.pi;
249 int i;
251 pr_debug("%s: read: %ld/%ld, wait: %d\n",
252 pohmelfs_dump_id(pi->id.id), atomic_long_read(&rp->wait.count),
253 rp->page_num * PAGE_CACHE_SIZE, rp->wait.condition);
255 for (i = 0; i < rp->page_num; ++i) {
256 struct page *page = rp->pages[i];
258 flush_dcache_page(page);
259 SetPageUptodate(page);
260 unlock_page(page);
261 page_cache_release(page);
264 iput(&rp->wait.pi->vfs_inode);
265 kfree(rp);
268 static void pohmelfs_readpages_destroy(struct pohmelfs_trans *t)
270 struct pohmelfs_readpages_priv *rp = t->priv;
271 struct pohmelfs_wait *wait = &rp->wait;
273 if (!wait->condition)
274 wait->condition = 1;
276 wake_up(&wait->wq);
277 kref_put(&rp->refcnt, pohmelfs_readpages_free);
280 static int pohmelfs_readpages_complete(struct pohmelfs_trans *t, struct pohmelfs_state *recv)
282 struct pohmelfs_readpages_priv *rp = t->priv;
283 struct pohmelfs_wait *wait = &rp->wait;
284 struct dnet_cmd *cmd = &recv->cmd;
286 if (!(cmd->flags & DNET_FLAGS_MORE)) {
287 if (!wait->condition) {
288 wait->condition = cmd->status;
289 if (!wait->condition)
290 wait->condition = 1;
291 wake_up(&rp->wait.wq);
295 pr_debug("%d:%s: read: %ld, wait: %d\n",
296 cmd->id.group_id, pohmelfs_dump_id(wait->pi->id.id),
297 atomic_long_read(&wait->count), wait->condition);
299 return 0;
302 static int pohmelfs_readpages_init(struct pohmelfs_trans *t)
304 struct pohmelfs_readpages_priv *rp = t->priv;
306 kref_get(&rp->refcnt);
307 return 0;
310 static int pohmelfs_readpages_recv_reply(struct pohmelfs_trans *t, struct pohmelfs_state *recv)
312 struct pohmelfs_readpages_priv *rp = t->priv;
313 struct pohmelfs_wait *wait = &rp->wait;
314 struct pohmelfs_inode *pi = wait->pi;
315 unsigned int asize = sizeof(struct dnet_attr) + sizeof(struct dnet_io_attr);
316 void *data = &t->cmd.attr; /* overwrite send buffer used for attr/ioattr */
317 struct dnet_cmd *cmd = &recv->cmd;
318 struct page *page;
319 pgoff_t offset;
320 int err, size;
322 if (t->io_offset < asize) {
323 size = asize - t->io_offset;
324 data += t->io_offset;
325 err = pohmelfs_recv(t, recv, data, size);
326 if (err < 0)
327 goto err_out_exit;
329 dnet_convert_io_attr(&t->cmd.p.io);
332 while (t->io_offset != cmd->size) {
333 offset = (t->io_offset - asize) & (PAGE_CACHE_SIZE - 1);
334 size = PAGE_CACHE_SIZE - offset;
335 page = rp->pages[rp->page_index];
337 if (size > cmd->size - t->io_offset)
338 size = cmd->size - t->io_offset;
340 data = kmap(page);
341 err = pohmelfs_recv(t, recv, data + offset, size);
342 kunmap(page);
344 if (err > 0 && ((err + offset == PAGE_CACHE_SIZE) || (t->io_offset == cmd->size))) {
345 rp->page_index++;
348 if (err < 0)
349 goto err_out_exit;
351 atomic_long_add(err, &wait->count);
354 err = 0;
356 err_out_exit:
357 if ((err < 0) && (err != -ENOENT) && (err != -EAGAIN))
358 pr_err("%d:%s: offset: %lld, data size: %llu, err: %d\n",
359 cmd->id.group_id, pohmelfs_dump_id(pi->id.id),
360 t->io_offset - asize + t->cmd.p.io.offset,
361 (unsigned long long)cmd->size - asize, err);
363 return err;
366 static int pohmelfs_readpages_group(struct pohmelfs_inode *pi, struct pohmelfs_readpages_priv *rp, int group_id)
368 struct pohmelfs_sb *psb = pohmelfs_sb(pi->vfs_inode.i_sb);
369 struct pohmelfs_wait *wait = &rp->wait;
370 struct pohmelfs_io *io;
371 long ret;
372 int err;
374 io = kmem_cache_zalloc(pohmelfs_io_cache, GFP_NOIO);
375 if (!io) {
376 err = -ENOMEM;
377 goto err_out_exit;
380 io->pi = pi;
381 io->id = &pi->id;
382 io->cmd = DNET_CMD_READ;
384 * We send read command with lock, so its will be picked by the same threads as process
385 * bulk write commands leaving nonblocking threads free for metadata commands like
386 * directory reading, lookup and so on
388 //io->cflags = DNET_FLAGS_NEED_ACK | DNET_FLAGS_NOLOCK;
389 io->cflags = DNET_FLAGS_NEED_ACK;
390 io->offset = page_offset(rp->pages[0]);
391 io->size = rp->page_num * PAGE_CACHE_SIZE;
392 if (psb->no_read_csum)
393 io->ioflags = DNET_IO_FLAGS_NOCSUM;
394 io->cb.init = pohmelfs_readpages_init;
395 io->cb.complete = pohmelfs_readpages_complete;
396 io->cb.destroy = pohmelfs_readpages_destroy;
397 io->cb.recv_reply = pohmelfs_readpages_recv_reply;
398 io->priv = rp;
400 err = pohmelfs_send_io_group(io, group_id);
401 if (err)
402 goto err_out_free;
404 ret = wait_event_interruptible_timeout(wait->wq, wait->condition != 0, msecs_to_jiffies(psb->read_wait_timeout));
405 if (ret <= 0) {
406 err = ret;
407 if (ret == 0)
408 err = -ETIMEDOUT;
409 goto err_out_free;
412 if (wait->condition < 0) {
413 err = wait->condition;
414 goto err_out_free;
417 err = atomic_long_read(&wait->count);
419 err_out_free:
420 kmem_cache_free(pohmelfs_io_cache, io);
421 err_out_exit:
422 return err;
425 static int pohmelfs_readpages_groups(struct pohmelfs_inode *pi, struct pohmelfs_readpages_priv *rp,
426 int *groups, int group_num)
428 int err = -ENOENT;
429 int i;
431 for (i = 0; i < group_num; ++i) {
432 err = pohmelfs_readpages_group(pi, rp, groups[i]);
433 if (err < 0)
434 continue;
436 break;
439 pi->update = get_seconds();
440 return err;
443 static struct pohmelfs_readpages_priv *pohmelfs_readpages_alloc(struct pohmelfs_inode *pi, int page_num)
445 struct pohmelfs_readpages_priv *rp;
446 int err;
448 rp = kzalloc(sizeof(struct pohmelfs_readpages_priv) + page_num * sizeof(struct page *), GFP_NOIO);
449 if (!rp) {
450 err = -ENOMEM;
451 goto err_out_exit;
454 err = pohmelfs_wait_init(&rp->wait, pi);
455 if (err)
456 goto err_out_free;
458 rp->page_num = page_num;
459 kref_init(&rp->refcnt);
460 return rp;
462 err_out_free:
463 kfree(rp);
464 err_out_exit:
465 return ERR_PTR(err);
468 static int pohmelfs_readpages_send(struct pohmelfs_inode *pi, struct pohmelfs_readpages_priv *rp)
470 struct pohmelfs_sb *psb = pohmelfs_sb(pi->vfs_inode.i_sb);
471 int err;
473 if (pi->group_num) {
474 err = pohmelfs_readpages_groups(pi, rp, pi->groups, pi->group_num);
475 } else {
476 err = pohmelfs_readpages_groups(pi, rp, psb->groups, psb->group_num);
479 return err;
482 static int pohmelfs_readpages_send_list(struct address_space *mapping, struct list_head *page_list, int num)
484 struct inode *inode = mapping->host;
485 struct pohmelfs_inode *pi = pohmelfs_inode(inode);
486 int err = 0, i;
487 struct pohmelfs_readpages_priv *rp;
488 struct page *tmp, *page;
490 if (list_empty(page_list))
491 goto err_out_exit;
493 rp = pohmelfs_readpages_alloc(pi, num);
494 if (IS_ERR(rp)) {
495 err = PTR_ERR(rp);
496 goto err_out_exit;
499 i = 0;
500 list_for_each_entry_safe(page, tmp, page_list, lru) {
501 list_del(&page->lru);
503 if (add_to_page_cache_lru(page, mapping, page->index, GFP_KERNEL)) {
504 /* Failed - free current page, optionally send already grabbed and free others */
505 page_cache_release(page);
506 break;
509 rp->pages[i] = page;
510 i++;
513 if (i > 0) {
514 rp->page_num = i;
515 err = pohmelfs_readpages_send(pi, rp);
517 pr_debug("%s: ino: %lu, offset: %lu, pages: %u/%u: %d\n",
518 pohmelfs_dump_id(pi->id.id), inode->i_ino,
519 (long)page_offset(rp->pages[0]),
520 rp->page_num, num, err);
523 kref_put(&rp->refcnt, pohmelfs_readpages_free);
525 /* Cleanup pages which were not added into page cache */
526 list_for_each_entry_safe(page, tmp, page_list, lru) {
527 list_del(&page->lru);
528 page_cache_release(page);
531 err_out_exit:
532 return err;
535 static int pohmelfs_readpages(struct file *filp, struct address_space *mapping,
536 struct list_head *page_list, unsigned nr_pages)
538 struct page *tmp, *page;
539 pgoff_t idx;
540 LIST_HEAD(head);
541 int err = 0, i = 0;
543 while (!list_empty(page_list)) {
544 page = list_entry(page_list->prev, struct page, lru);
545 idx = page->index;
546 i = 0;
548 INIT_LIST_HEAD(&head);
550 list_for_each_entry_safe_reverse(page, tmp, page_list, lru) {
551 if (idx != page->index) {
552 struct pohmelfs_inode *pi = pohmelfs_inode(mapping->host);
553 pr_debug("%s: index mismatch: want: %ld, page-index: %ld, total: %d\n",
554 pohmelfs_dump_id(pi->id.id),
555 (long)idx, (long)page->index,
556 nr_pages);
557 break;
560 list_move_tail(&page->lru, &head);
561 i++;
562 idx++;
565 err = pohmelfs_readpages_send_list(mapping, &head, i);
567 if (err >= 0)
568 err = 0;
570 return err;
573 static int pohmelfs_readpage(struct file *file, struct page *page)
575 struct inode *inode = page->mapping->host;
576 struct pohmelfs_inode *pi = pohmelfs_inode(inode);
577 struct pohmelfs_readpages_priv *rp;
578 int err;
580 if (inode->i_size <= page->index << PAGE_CACHE_SHIFT) {
581 SetPageUptodate(page);
582 unlock_page(page);
583 return 0;
586 rp = pohmelfs_readpages_alloc(pi, 1);
587 if (IS_ERR(rp)) {
588 err = PTR_ERR(rp);
589 goto err_out_exit;
592 rp->pages[0] = page;
593 page_cache_get(page);
595 err = pohmelfs_readpages_send(pi, rp);
596 if (err >= 0)
597 err = 0;
599 kref_put(&rp->refcnt, pohmelfs_readpages_free);
600 err_out_exit:
601 if (err < 0)
602 pr_err("%s: %s: ino: %lu, offset: %lu, uptodate: %d, err: %d\n",
603 __func__, pohmelfs_dump_id(pi->id.id), inode->i_ino,
604 (long)page_offset(page), PageUptodate(page), err);
606 return err;
609 void pohmelfs_write_ctl_release(struct kref *kref)
611 struct pohmelfs_write_ctl *ctl = container_of(kref, struct pohmelfs_write_ctl, refcnt);
612 struct address_space *mapping = ctl->pvec.pages[0]->mapping;
613 struct inode *inode = mapping->host;
614 struct pohmelfs_sb *psb = pohmelfs_sb(inode->i_sb);
615 int bad_write = atomic_read(&ctl->good_writes) < psb->group_num / 2 + 1;
616 struct page *page;
617 unsigned int i;
619 if (psb->successful_write_count && (atomic_read(&ctl->good_writes) >= psb->successful_write_count))
620 bad_write = 0;
622 if (bad_write) {
623 struct pohmelfs_inode *pi = pohmelfs_inode(inode);
624 unsigned long long offset = page_offset(ctl->pvec.pages[0]);
626 pr_debug("%s: bad write: ino: %lu, isize: %llu, offset: %llu: writes: %d/%d\n",
627 pohmelfs_dump_id(pi->id.id),
628 inode->i_ino, inode->i_size, offset,
629 atomic_read(&ctl->good_writes), psb->group_num);
630 mapping_set_error(mapping, -EIO);
633 for (i = 0; i < pagevec_count(&ctl->pvec); ++i) {
634 page = ctl->pvec.pages[i];
636 if (PageLocked(page)) {
637 end_page_writeback(page);
639 if (bad_write) {
640 SetPageError(page);
641 ClearPageUptodate(page);
643 * Do not reschedule failed write page again
644 * This may explode systems with large caches
645 * when there is no connection to elliptics cluster
647 //set_page_dirty(page);
649 unlock_page(page);
653 pagevec_release(&ctl->pvec);
654 kmem_cache_free(pohmelfs_write_cache, ctl);
657 static int pohmelfs_writepages_chunk(struct pohmelfs_inode *pi, struct pohmelfs_write_ctl *ctl,
658 struct writeback_control *wbc, struct address_space *mapping)
660 struct inode *inode = &pi->vfs_inode;
661 uint64_t offset, size;
662 unsigned i;
663 int err = 0, good = 0;
665 offset = page_offset(ctl->pvec.pages[0]);
667 size = 0;
668 /* we will lookup them again when doing actual send */
669 for (i = 0; i< pagevec_count(&ctl->pvec); ++i) {
670 struct page *page = ctl->pvec.pages[i];
672 lock_page(page);
673 #if 1
674 if (unlikely(page->mapping != mapping)) {
675 continue_unlock:
676 unlock_page(page);
677 continue;
680 if (wbc->sync_mode != WB_SYNC_NONE)
681 wait_on_page_writeback(page);
682 if (PageWriteback(page)) {
683 unlock_page(page);
684 break;
687 if (!PageDirty(page))
688 goto continue_unlock;
690 if (!clear_page_dirty_for_io(page))
691 goto continue_unlock;
692 #else
693 clear_page_dirty_for_io(page);
694 #endif
696 set_page_writeback(page);
698 good++;
699 size += PAGE_CACHE_SIZE;
700 wbc->nr_to_write--;
703 if (good != 0) {
704 size = pagevec_count(&ctl->pvec) * PAGE_CACHE_SIZE;
705 if (offset + size > inode->i_size)
706 size = inode->i_size - offset;
708 err = pohmelfs_write_command(pi, ctl, offset, size);
709 if (err)
710 goto err_out_exit;
713 err_out_exit:
714 kref_put(&ctl->refcnt, pohmelfs_write_ctl_release);
715 return err;
718 static int pohmelfs_writepages_send(struct address_space *mapping, struct writeback_control *wbc, struct pagevec *pvec, int start, int end)
720 struct inode *inode = mapping->host;
721 struct pohmelfs_inode *pi = pohmelfs_inode(inode);
722 struct pohmelfs_write_ctl *ctl;
723 int err, i;
725 ctl = kmem_cache_zalloc(pohmelfs_write_cache, GFP_NOIO);
726 if (!ctl) {
727 err = -ENOMEM;
728 goto err_out_exit;
731 kref_init(&ctl->refcnt);
732 atomic_set(&ctl->good_writes, 0);
734 for (i = start; i < end; ++i)
735 pagevec_add(&ctl->pvec, pvec->pages[i]);
737 err = pohmelfs_writepages_chunk(pi, ctl, wbc, mapping);
738 if (err)
739 goto err_out_exit;
741 err_out_exit:
742 return err;
745 static int pohmelfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
747 struct inode *inode = mapping->host;
748 struct pohmelfs_inode *pi = pohmelfs_inode(inode);
749 pgoff_t index, start, end /* inclusive */, idx;
750 int done = 0;
751 int range_whole = 0;
752 int should_loop = 1;
753 int nr_pages, err = 0, i, start_idx;
754 struct pagevec pvec;
755 int written = 0;
757 index = wbc->range_start >> PAGE_CACHE_SHIFT;
758 end = wbc->range_end >> PAGE_CACHE_SHIFT;
760 pr_debug("%s: ino: %ld, nr: %ld, index: %llu, end: %llu, total_size: %lu, sync: %d\n",
761 pohmelfs_dump_id(pohmelfs_inode(inode)->id.id), inode->i_ino,
762 wbc->nr_to_write, wbc->range_start, wbc->range_end,
763 (unsigned long)inode->i_size, wbc->sync_mode);
765 if (wbc->range_cyclic) {
766 start = mapping->writeback_index; /* Start from prev offset */
767 end = -1;
768 } else {
769 start = wbc->range_start >> PAGE_CACHE_SHIFT;
770 end = wbc->range_end >> PAGE_CACHE_SHIFT;
771 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
772 range_whole = 1;
773 should_loop = 0;
775 index = start;
777 retry:
778 while (!done && index <= end) {
779 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY,
780 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
781 if (!nr_pages) {
782 err = 0;
783 break;
786 idx = pvec.pages[0]->index;
787 for (start_idx = 0, i = 0; i< nr_pages; ++i) {
788 struct page *page = pvec.pages[i];
790 /* non-contiguous pages detected */
791 if (idx != page->index) {
792 err = pohmelfs_writepages_send(mapping, wbc, &pvec, start_idx, i);
793 if (err)
794 goto err_out_exit;
795 start_idx = i;
798 idx++;
801 err = pohmelfs_writepages_send(mapping, wbc, &pvec, start_idx, nr_pages);
802 if (err)
803 goto err_out_exit;
805 if (wbc->nr_to_write <= 0)
806 done = 1;
808 written += nr_pages;
811 if (should_loop && !done) {
812 /* more to do; loop back to beginning of file */
813 should_loop = 0;
814 index = 0;
815 goto retry;
818 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
819 mapping->writeback_index = index;
821 if (written) {
822 err = pohmelfs_metadata_inode(pi, wbc->sync_mode != WB_SYNC_NONE);
823 if (err)
824 goto err_out_exit;
828 if (test_and_clear_bit(AS_EIO, &mapping->flags))
829 err = -EIO;
830 err_out_exit:
831 pr_debug("%s: metadata write complete: %d\n",
832 pohmelfs_dump_id(pi->id.id), err);
833 return err;
836 static const struct address_space_operations pohmelfs_aops = {
837 .write_begin = simple_write_begin,
838 .write_end = simple_write_end,
839 .writepages = pohmelfs_writepages,
840 .readpage = pohmelfs_readpage,
841 .readpages = pohmelfs_readpages,
842 .set_page_dirty = __set_page_dirty_nobuffers,
845 void pohmelfs_convert_inode_info(struct pohmelfs_inode_info *info)
847 info->ino = cpu_to_le64(info->ino);
848 info->mode = cpu_to_le64(info->mode);
849 info->nlink = cpu_to_le64(info->nlink);
850 info->uid = cpu_to_le32(info->uid);
851 info->gid = cpu_to_le32(info->gid);
852 info->namelen = cpu_to_le32(info->namelen);
853 info->blocks = cpu_to_le64(info->blocks);
854 info->rdev = cpu_to_le64(info->rdev);
855 info->size = cpu_to_le64(info->size);
856 info->version = cpu_to_le64(info->version);
857 info->blocksize = cpu_to_le64(info->blocksize);
858 info->flags = cpu_to_le64(info->flags);
860 dnet_convert_time(&info->ctime);
861 dnet_convert_time(&info->mtime);
862 dnet_convert_time(&info->atime);
865 void pohmelfs_fill_inode_info(struct inode *inode, struct pohmelfs_inode_info *info)
867 struct pohmelfs_inode *pi = pohmelfs_inode(inode);
869 memcpy(info->id.id, pi->id.id, DNET_ID_SIZE);
871 info->ino = inode->i_ino;
872 info->mode = inode->i_mode;
873 info->nlink = inode->i_nlink;
874 info->uid = inode->i_uid;
875 info->gid = inode->i_gid;
876 info->blocks = inode->i_blocks;
877 info->rdev = inode->i_rdev;
878 info->size = inode->i_size;
879 info->version = inode->i_version;
880 info->blocksize = 1 << inode->i_blkbits;
882 info->ctime.tsec = inode->i_ctime.tv_sec;
883 info->ctime.tnsec = inode->i_ctime.tv_nsec;
885 info->mtime.tsec = inode->i_mtime.tv_sec;
886 info->mtime.tnsec = inode->i_mtime.tv_nsec;
888 info->atime.tsec = inode->i_atime.tv_sec;
889 info->atime.tnsec = inode->i_atime.tv_nsec;
891 info->flags = 0;
894 void pohmelfs_fill_inode(struct inode *inode, struct pohmelfs_inode_info *info)
896 pr_debug("%s: ino: %lu inode is regular: %d, dir: %d, link: %d, mode: %o, "
897 "namelen: %u, size: %llu, state: %lx, mtime: %llu.%llu/%lu.%lu\n",
898 pohmelfs_dump_id(info->id.id), inode->i_ino,
899 S_ISREG(inode->i_mode), S_ISDIR(inode->i_mode),
900 S_ISLNK(inode->i_mode), inode->i_mode, info->namelen,
901 inode->i_size, inode->i_state,
902 (unsigned long long)info->mtime.tsec,
903 (unsigned long long)info->mtime.tnsec,
904 inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec);
906 if (info->mtime.tsec < inode->i_mtime.tv_sec)
907 return;
908 if ((info->mtime.tsec == inode->i_mtime.tv_sec) &&
909 (info->mtime.tnsec < inode->i_mtime.tv_nsec))
910 return;
912 pohmelfs_inode(inode)->id = info->id;
914 inode->i_mode = info->mode;
915 set_nlink(inode, info->nlink);
916 inode->i_uid = info->uid;
917 inode->i_gid = info->gid;
918 inode->i_blocks = info->blocks;
919 inode->i_rdev = info->rdev;
920 inode->i_size = info->size;
921 inode->i_version = info->version;
922 inode->i_blkbits = ffs(info->blocksize);
924 inode->i_mtime = pohmelfs_date(&info->mtime);
925 inode->i_atime = pohmelfs_date(&info->atime);
926 inode->i_ctime = pohmelfs_date(&info->ctime);
929 static void pohmelfs_inode_info_current(struct pohmelfs_sb *psb, struct pohmelfs_inode_info *info)
931 struct timespec ts = CURRENT_TIME;
932 struct dnet_time dtime;
934 info->nlink = S_ISDIR(info->mode) ? 2 : 1;
935 info->uid = current_fsuid();
936 info->gid = current_fsgid();
937 info->size = 0;
938 info->blocksize = PAGE_SIZE;
939 info->blocks = 0;
940 info->rdev = 0;
941 info->version = 0;
943 dtime.tsec = ts.tv_sec;
944 dtime.tnsec = ts.tv_nsec;
946 info->ctime = dtime;
947 info->mtime = dtime;
948 info->atime = dtime;
950 pohmelfs_gen_id(psb, &info->id);
953 const struct inode_operations pohmelfs_special_inode_operations = {
954 .setattr = simple_setattr,
957 struct pohmelfs_inode *pohmelfs_existing_inode(struct pohmelfs_sb *psb, struct pohmelfs_inode_info *info)
959 struct pohmelfs_inode *pi;
960 struct inode *inode;
961 int err;
963 inode = iget_locked(psb->sb, atomic_long_inc_return(&psb->ino));
964 if (!inode) {
965 err = -ENOMEM;
966 goto err_out_exit;
969 pi = pohmelfs_inode(inode);
971 if (inode->i_state & I_NEW) {
972 pohmelfs_fill_inode(inode, info);
974 * i_mapping is a pointer to i_data during inode initialization.
976 inode->i_data.a_ops = &pohmelfs_aops;
978 if (S_ISREG(inode->i_mode)) {
979 inode->i_fop = &pohmelfs_file_ops;
980 inode->i_op = &pohmelfs_file_inode_operations;
981 } else if (S_ISDIR(inode->i_mode)) {
982 inode->i_fop = &pohmelfs_dir_fops;
983 inode->i_op = &pohmelfs_dir_inode_operations;
984 } else if (S_ISLNK(inode->i_mode)) {
985 inode->i_op = &pohmelfs_symlink_inode_operations;
986 inode->i_mapping->a_ops = &pohmelfs_aops;
987 } else {
988 inode->i_op = &pohmelfs_special_inode_operations;
991 err = pohmelfs_sb_inode_insert(psb, pi);
992 if (err)
993 goto err_out_put;
995 unlock_new_inode(inode);
998 return pi;
1000 err_out_put:
1001 unlock_new_inode(inode);
1002 iput(inode);
1003 err_out_exit:
1004 return ERR_PTR(err);
1007 struct pohmelfs_inode *pohmelfs_new_inode(struct pohmelfs_sb *psb, int mode)
1009 struct pohmelfs_inode *pi;
1010 struct pohmelfs_inode_info *info;
1011 int err;
1013 info = kmem_cache_zalloc(pohmelfs_inode_info_cache, GFP_NOIO);
1014 if (!info) {
1015 err = -ENOMEM;
1016 goto err_out_exit;
1019 info->mode = mode;
1021 pohmelfs_inode_info_current(psb, info);
1023 pi = pohmelfs_existing_inode(psb, info);
1024 if (IS_ERR(pi)) {
1025 err = PTR_ERR(pi);
1026 goto err_out_free;
1029 kmem_cache_free(pohmelfs_inode_info_cache, info);
1030 return pi;
1032 err_out_free:
1033 kmem_cache_free(pohmelfs_inode_info_cache, info);
1034 err_out_exit:
1035 return ERR_PTR(err);
1038 int pohmelfs_wait_init(struct pohmelfs_wait *wait, struct pohmelfs_inode *pi)
1040 if (!igrab(&pi->vfs_inode))
1041 return -EINVAL;
1043 wait->pi = pi;
1045 atomic_long_set(&wait->count, 0);
1046 init_waitqueue_head(&wait->wq);
1047 kref_init(&wait->refcnt);
1049 return 0;
1052 struct pohmelfs_wait *pohmelfs_wait_alloc(struct pohmelfs_inode *pi)
1054 struct pohmelfs_wait *wait;
1056 wait = kmem_cache_zalloc(pohmelfs_wait_cache, GFP_NOIO);
1057 if (!wait) {
1058 goto err_out_exit;
1061 if (pohmelfs_wait_init(wait, pi))
1062 goto err_out_free;
1064 return wait;
1066 err_out_free:
1067 kmem_cache_free(pohmelfs_wait_cache, wait);
1068 err_out_exit:
1069 return NULL;
1072 static void pohmelfs_wait_free(struct kref *kref)
1074 struct pohmelfs_wait *wait = container_of(kref, struct pohmelfs_wait, refcnt);
1075 struct inode *inode = &wait->pi->vfs_inode;
1077 iput(inode);
1078 kmem_cache_free(pohmelfs_wait_cache, wait);
1081 void pohmelfs_wait_put(struct pohmelfs_wait *wait)
1083 kref_put(&wait->refcnt, pohmelfs_wait_free);