On Tue, Nov 06, 2007 at 02:33:53AM -0800, akpm@linux-foundation.org wrote:
[mmotm.git] / fs / reiser4 / plugin / file / file.c
blob84f812235dd624d45fde32b6738c6b3d22c7f34c
1 /* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
2 * reiser4/README */
4 /*
5 * this file contains implementations of inode/file/address_space/file plugin
6 * operations specific for "unix file plugin" (plugin id is
7 * UNIX_FILE_PLUGIN_ID). "Unix file" is either built of tail items only
8 * (FORMATTING_ID) or of extent items only (EXTENT_POINTER_ID) or empty (have
9 * no items but stat data)
12 #include "../../inode.h"
13 #include "../../super.h"
14 #include "../../tree_walk.h"
15 #include "../../carry.h"
16 #include "../../page_cache.h"
17 #include "../../ioctl.h"
18 #include "../object.h"
19 #include "../cluster.h"
20 #include "../../safe_link.h"
22 #include <linux/writeback.h>
23 #include <linux/pagevec.h>
24 #include <linux/syscalls.h>
27 static int unpack(struct file *file, struct inode *inode, int forever);
28 static void drop_access(struct unix_file_info *);
29 static int hint_validate(hint_t * hint, const reiser4_key * key, int check_key,
30 znode_lock_mode lock_mode);
32 /* Get exclusive access and make sure that file is not partially
33 * converted (It may happen that another process is doing tail
34 * conversion. If so, wait until it completes)
36 static inline void get_exclusive_access_careful(struct unix_file_info * uf_info,
37 struct inode *inode)
39 do {
40 get_exclusive_access(uf_info);
41 if (!reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV))
42 break;
43 drop_exclusive_access(uf_info);
44 schedule();
45 } while (1);
48 /* get unix file plugin specific portion of inode */
49 struct unix_file_info *unix_file_inode_data(const struct inode *inode)
51 return &reiser4_inode_data(inode)->file_plugin_data.unix_file_info;
54 /**
55 * equal_to_rdk - compare key and znode's right delimiting key
56 * @node: node whose right delimiting key to compare with @key
57 * @key: key to compare with @node's right delimiting key
59 * Returns true if @key is equal to right delimiting key of @node.
61 int equal_to_rdk(znode *node, const reiser4_key *key)
63 int result;
65 read_lock_dk(znode_get_tree(node));
66 result = keyeq(key, znode_get_rd_key(node));
67 read_unlock_dk(znode_get_tree(node));
68 return result;
71 #if REISER4_DEBUG
73 /**
74 * equal_to_ldk - compare key and znode's left delimiting key
75 * @node: node whose left delimiting key to compare with @key
76 * @key: key to compare with @node's left delimiting key
78 * Returns true if @key is equal to left delimiting key of @node.
80 int equal_to_ldk(znode *node, const reiser4_key *key)
82 int result;
84 read_lock_dk(znode_get_tree(node));
85 result = keyeq(key, znode_get_ld_key(node));
86 read_unlock_dk(znode_get_tree(node));
87 return result;
90 /**
91 * check_coord - check whether coord corresponds to key
92 * @coord: coord to check
93 * @key: key @coord has to correspond to
95 * Returns true if @coord is set as if it was set as result of lookup with @key
96 * in coord->node.
98 static int check_coord(const coord_t *coord, const reiser4_key *key)
100 coord_t twin;
102 node_plugin_by_node(coord->node)->lookup(coord->node, key,
103 FIND_MAX_NOT_MORE_THAN, &twin);
104 return coords_equal(coord, &twin);
107 #endif /* REISER4_DEBUG */
110 * init_uf_coord - initialize extended coord
111 * @uf_coord:
112 * @lh:
116 void init_uf_coord(uf_coord_t *uf_coord, lock_handle *lh)
118 coord_init_zero(&uf_coord->coord);
119 coord_clear_iplug(&uf_coord->coord);
120 uf_coord->lh = lh;
121 init_lh(lh);
122 memset(&uf_coord->extension, 0, sizeof(uf_coord->extension));
123 uf_coord->valid = 0;
126 static void validate_extended_coord(uf_coord_t *uf_coord, loff_t offset)
128 assert("vs-1333", uf_coord->valid == 0);
130 if (coord_is_between_items(&uf_coord->coord))
131 return;
133 assert("vs-1348",
134 item_plugin_by_coord(&uf_coord->coord)->s.file.
135 init_coord_extension);
137 item_body_by_coord(&uf_coord->coord);
138 item_plugin_by_coord(&uf_coord->coord)->s.file.
139 init_coord_extension(uf_coord, offset);
143 * goto_right_neighbor - lock right neighbor, drop current node lock
144 * @coord:
145 * @lh:
147 * Obtain lock on right neighbor and drop lock on current node.
149 int goto_right_neighbor(coord_t *coord, lock_handle *lh)
151 int result;
152 lock_handle lh_right;
154 assert("vs-1100", znode_is_locked(coord->node));
156 init_lh(&lh_right);
157 result = reiser4_get_right_neighbor(&lh_right, coord->node,
158 znode_is_wlocked(coord->node) ?
159 ZNODE_WRITE_LOCK : ZNODE_READ_LOCK,
160 GN_CAN_USE_UPPER_LEVELS);
161 if (result) {
162 done_lh(&lh_right);
163 return result;
167 * we hold two longterm locks on neighboring nodes. Unlock left of
168 * them
170 done_lh(lh);
172 coord_init_first_unit_nocheck(coord, lh_right.node);
173 move_lh(lh, &lh_right);
175 return 0;
180 * set_file_state
181 * @uf_info:
182 * @cbk_result:
183 * @level:
185 * This is to be used by find_file_item and in find_file_state to
186 * determine real state of file
188 static void set_file_state(struct unix_file_info *uf_info, int cbk_result,
189 tree_level level)
191 if (cbk_errored(cbk_result))
192 /* error happened in find_file_item */
193 return;
195 assert("vs-1164", level == LEAF_LEVEL || level == TWIG_LEVEL);
197 if (uf_info->container == UF_CONTAINER_UNKNOWN) {
198 if (cbk_result == CBK_COORD_NOTFOUND)
199 uf_info->container = UF_CONTAINER_EMPTY;
200 else if (level == LEAF_LEVEL)
201 uf_info->container = UF_CONTAINER_TAILS;
202 else
203 uf_info->container = UF_CONTAINER_EXTENTS;
204 } else {
206 * file state is known, check whether it is set correctly if
207 * file is not being tail converted
209 if (!reiser4_inode_get_flag(unix_file_info_to_inode(uf_info),
210 REISER4_PART_IN_CONV)) {
211 assert("vs-1162",
212 ergo(level == LEAF_LEVEL &&
213 cbk_result == CBK_COORD_FOUND,
214 uf_info->container == UF_CONTAINER_TAILS));
215 assert("vs-1165",
216 ergo(level == TWIG_LEVEL &&
217 cbk_result == CBK_COORD_FOUND,
218 uf_info->container == UF_CONTAINER_EXTENTS));
223 int find_file_item_nohint(coord_t *coord, lock_handle *lh,
224 const reiser4_key *key, znode_lock_mode lock_mode,
225 struct inode *inode)
227 return reiser4_object_lookup(inode, key, coord, lh, lock_mode,
228 FIND_MAX_NOT_MORE_THAN,
229 TWIG_LEVEL, LEAF_LEVEL,
230 (lock_mode == ZNODE_READ_LOCK) ? CBK_UNIQUE :
231 (CBK_UNIQUE | CBK_FOR_INSERT),
232 NULL /* ra_info */ );
236 * find_file_item - look for file item in the tree
237 * @hint: provides coordinate, lock handle, seal
238 * @key: key for search
239 * @mode: mode of lock to put on returned node
240 * @ra_info:
241 * @inode:
243 * This finds position in the tree corresponding to @key. It first tries to use
244 * @hint's seal if it is set.
246 int find_file_item(hint_t *hint, const reiser4_key *key,
247 znode_lock_mode lock_mode,
248 struct inode *inode)
250 int result;
251 coord_t *coord;
252 lock_handle *lh;
254 assert("nikita-3030", reiser4_schedulable());
255 assert("vs-1707", hint != NULL);
256 assert("vs-47", inode != NULL);
258 coord = &hint->ext_coord.coord;
259 lh = hint->ext_coord.lh;
260 init_lh(lh);
262 result = hint_validate(hint, key, 1 /* check key */, lock_mode);
263 if (!result) {
264 if (coord->between == AFTER_UNIT &&
265 equal_to_rdk(coord->node, key)) {
266 result = goto_right_neighbor(coord, lh);
267 if (result == -E_NO_NEIGHBOR)
268 return RETERR(-EIO);
269 if (result)
270 return result;
271 assert("vs-1152", equal_to_ldk(coord->node, key));
273 * we moved to different node. Invalidate coord
274 * extension, zload is necessary to init it again
276 hint->ext_coord.valid = 0;
279 set_file_state(unix_file_inode_data(inode), CBK_COORD_FOUND,
280 znode_get_level(coord->node));
282 return CBK_COORD_FOUND;
285 coord_init_zero(coord);
286 result = find_file_item_nohint(coord, lh, key, lock_mode, inode);
287 set_file_state(unix_file_inode_data(inode), result,
288 znode_get_level(coord->node));
290 /* FIXME: we might already have coord extension initialized */
291 hint->ext_coord.valid = 0;
292 return result;
295 /* plugin->u.file.write_flowom = NULL
296 plugin->u.file.read_flow = NULL */
298 void hint_init_zero(hint_t * hint)
300 memset(hint, 0, sizeof(*hint));
301 init_lh(&hint->lh);
302 hint->ext_coord.lh = &hint->lh;
305 static int find_file_state(struct inode *inode, struct unix_file_info *uf_info)
307 int result;
308 reiser4_key key;
309 coord_t coord;
310 lock_handle lh;
312 assert("vs-1628", ea_obtained(uf_info));
314 if (uf_info->container == UF_CONTAINER_UNKNOWN) {
315 key_by_inode_and_offset_common(inode, 0, &key);
316 init_lh(&lh);
317 result = find_file_item_nohint(&coord, &lh, &key,
318 ZNODE_READ_LOCK, inode);
319 set_file_state(uf_info, result, znode_get_level(coord.node));
320 done_lh(&lh);
321 if (!cbk_errored(result))
322 result = 0;
323 } else
324 result = 0;
325 assert("vs-1074",
326 ergo(result == 0, uf_info->container != UF_CONTAINER_UNKNOWN));
327 reiser4_txn_restart_current();
328 return result;
332 * Estimate and reserve space needed to truncate page
333 * which gets partially truncated: one block for page
334 * itself, stat-data update (estimate_one_insert_into_item)
335 * and one item insertion (estimate_one_insert_into_item)
336 * which may happen if page corresponds to hole extent and
337 * unallocated one will have to be created
339 static int reserve_partial_page(reiser4_tree * tree)
341 grab_space_enable();
342 return reiser4_grab_reserved(reiser4_get_current_sb(),
344 2 * estimate_one_insert_into_item(tree),
345 BA_CAN_COMMIT);
348 /* estimate and reserve space needed to cut one item and update one stat data */
349 static int reserve_cut_iteration(reiser4_tree * tree)
351 __u64 estimate = estimate_one_item_removal(tree)
352 + estimate_one_insert_into_item(tree);
354 assert("nikita-3172", lock_stack_isclean(get_current_lock_stack()));
356 grab_space_enable();
357 /* We need to double our estimate now that we can delete more than one
358 node. */
359 return reiser4_grab_reserved(reiser4_get_current_sb(), estimate * 2,
360 BA_CAN_COMMIT);
363 int reiser4_update_file_size(struct inode *inode, loff_t new_size,
364 int update_sd)
366 int result = 0;
368 INODE_SET_SIZE(inode, new_size);
369 if (update_sd) {
370 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
371 result = reiser4_update_sd(inode);
373 return result;
377 * Cut file items one by one starting from the last one until
378 * new file size (inode->i_size) is reached. Reserve space
379 * and update file stat data on every single cut from the tree
381 int cut_file_items(struct inode *inode, loff_t new_size,
382 int update_sd, loff_t cur_size,
383 int (*update_actor) (struct inode *, loff_t, int))
385 reiser4_key from_key, to_key;
386 reiser4_key smallest_removed;
387 file_plugin *fplug = inode_file_plugin(inode);
388 int result;
389 int progress = 0;
391 assert("vs-1248",
392 fplug == file_plugin_by_id(UNIX_FILE_PLUGIN_ID) ||
393 fplug == file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
395 fplug->key_by_inode(inode, new_size, &from_key);
396 to_key = from_key;
397 set_key_offset(&to_key, cur_size - 1 /*get_key_offset(reiser4_max_key()) */ );
398 /* this loop normally runs just once */
399 while (1) {
400 result = reserve_cut_iteration(reiser4_tree_by_inode(inode));
401 if (result)
402 break;
404 result = reiser4_cut_tree_object(current_tree, &from_key, &to_key,
405 &smallest_removed, inode, 1,
406 &progress);
407 if (result == -E_REPEAT) {
409 * -E_REPEAT is a signal to interrupt a long
410 * file truncation process
412 if (progress) {
413 result = update_actor(inode,
414 get_key_offset(&smallest_removed),
415 update_sd);
416 if (result)
417 break;
419 /* the below does up(sbinfo->delete_mutex).
420 * Do not get folled */
421 reiser4_release_reserved(inode->i_sb);
423 * reiser4_cut_tree_object() was interrupted probably
424 * because current atom requires commit, we have to
425 * release transaction handle to allow atom commit.
427 reiser4_txn_restart_current();
428 continue;
430 if (result
431 && !(result == CBK_COORD_NOTFOUND && new_size == 0
432 && inode->i_size == 0))
433 break;
435 set_key_offset(&smallest_removed, new_size);
436 /* Final sd update after the file gets its correct size */
437 result = update_actor(inode, get_key_offset(&smallest_removed),
438 update_sd);
439 break;
442 /* the below does up(sbinfo->delete_mutex). Do not get folled */
443 reiser4_release_reserved(inode->i_sb);
445 return result;
448 int find_or_create_extent(struct page *page);
450 /* part of truncate_file_body: it is called when truncate is used to make file
451 shorter */
452 static int shorten_file(struct inode *inode, loff_t new_size)
454 int result;
455 struct page *page;
456 int padd_from;
457 unsigned long index;
458 struct unix_file_info *uf_info;
461 * all items of ordinary reiser4 file are grouped together. That is why
462 * we can use reiser4_cut_tree. Plan B files (for instance) can not be
463 * truncated that simply
465 result = cut_file_items(inode, new_size, 1 /*update_sd */ ,
466 get_key_offset(reiser4_max_key()),
467 reiser4_update_file_size);
468 if (result)
469 return result;
471 uf_info = unix_file_inode_data(inode);
472 assert("vs-1105", new_size == inode->i_size);
473 if (new_size == 0) {
474 uf_info->container = UF_CONTAINER_EMPTY;
475 return 0;
478 result = find_file_state(inode, uf_info);
479 if (result)
480 return result;
481 if (uf_info->container == UF_CONTAINER_TAILS)
483 * No need to worry about zeroing last page after new file
484 * end
486 return 0;
488 padd_from = inode->i_size & (PAGE_CACHE_SIZE - 1);
489 if (!padd_from)
490 /* file is truncated to page boundary */
491 return 0;
493 result = reserve_partial_page(reiser4_tree_by_inode(inode));
494 if (result) {
495 reiser4_release_reserved(inode->i_sb);
496 return result;
499 /* last page is partially truncated - zero its content */
500 index = (inode->i_size >> PAGE_CACHE_SHIFT);
501 page = read_mapping_page(inode->i_mapping, index, NULL);
502 if (IS_ERR(page)) {
504 * the below does up(sbinfo->delete_mutex). Do not get
505 * confused
507 reiser4_release_reserved(inode->i_sb);
508 if (likely(PTR_ERR(page) == -EINVAL)) {
509 /* looks like file is built of tail items */
510 return 0;
512 return PTR_ERR(page);
514 wait_on_page_locked(page);
515 if (!PageUptodate(page)) {
516 page_cache_release(page);
518 * the below does up(sbinfo->delete_mutex). Do not get
519 * confused
521 reiser4_release_reserved(inode->i_sb);
522 return RETERR(-EIO);
526 * if page correspons to hole extent unit - unallocated one will be
527 * created here. This is not necessary
529 result = find_or_create_extent(page);
532 * FIXME: cut_file_items has already updated inode. Probably it would
533 * be better to update it here when file is really truncated
535 if (result) {
536 page_cache_release(page);
538 * the below does up(sbinfo->delete_mutex). Do not get
539 * confused
541 reiser4_release_reserved(inode->i_sb);
542 return result;
545 lock_page(page);
546 assert("vs-1066", PageLocked(page));
547 zero_user_segment(page, padd_from, PAGE_CACHE_SIZE);
548 unlock_page(page);
549 page_cache_release(page);
550 /* the below does up(sbinfo->delete_mutex). Do not get confused */
551 reiser4_release_reserved(inode->i_sb);
552 return 0;
556 * should_have_notail
557 * @uf_info:
558 * @new_size:
560 * Calls formatting plugin to see whether file of size @new_size has to be
561 * stored in unformatted nodes or in tail items. 0 is returned for later case.
563 static int should_have_notail(const struct unix_file_info *uf_info, loff_t new_size)
565 if (!uf_info->tplug)
566 return 1;
567 return !uf_info->tplug->have_tail(unix_file_info_to_inode(uf_info),
568 new_size);
573 * truncate_file_body - change length of file
574 * @inode: inode of file
575 * @new_size: new file length
577 * Adjusts items file @inode is built of to match @new_size. It may either cut
578 * items or add them to represent a hole at the end of file. The caller has to
579 * obtain exclusive access to the file.
581 static int truncate_file_body(struct inode *inode, struct iattr *attr)
583 int result;
584 loff_t new_size = attr->ia_size;
586 if (inode->i_size < new_size) {
587 /* expanding truncate */
588 struct unix_file_info *uf_info = unix_file_inode_data(inode);
590 result = find_file_state(inode, uf_info);
591 if (result)
592 return result;
594 if (should_have_notail(uf_info, new_size)) {
596 * file of size @new_size has to be built of
597 * extents. If it is built of tails - convert to
598 * extents
600 if (uf_info->container == UF_CONTAINER_TAILS) {
602 * if file is being convered by another process
603 * - wait until it completes
605 while (1) {
606 if (reiser4_inode_get_flag(inode,
607 REISER4_PART_IN_CONV)) {
608 drop_exclusive_access(uf_info);
609 schedule();
610 get_exclusive_access(uf_info);
611 continue;
613 break;
616 if (uf_info->container == UF_CONTAINER_TAILS) {
617 result = tail2extent(uf_info);
618 if (result)
619 return result;
622 result = reiser4_write_extent(NULL, inode, NULL,
623 0, &new_size);
624 if (result)
625 return result;
626 uf_info->container = UF_CONTAINER_EXTENTS;
627 } else {
628 if (uf_info->container == UF_CONTAINER_EXTENTS) {
629 result = reiser4_write_extent(NULL, inode, NULL,
630 0, &new_size);
631 if (result)
632 return result;
633 } else {
634 result = reiser4_write_tail(NULL, inode, NULL,
635 0, &new_size);
636 if (result)
637 return result;
638 uf_info->container = UF_CONTAINER_TAILS;
641 BUG_ON(result > 0);
642 result = reiser4_update_file_size(inode, new_size, 1);
643 BUG_ON(result != 0);
644 } else
645 result = shorten_file(inode, new_size);
646 return result;
649 /* plugin->u.write_sd_by_inode = write_sd_by_inode_common */
652 * load_file_hint - copy hint from struct file to local variable
653 * @file: file to get hint from
654 * @hint: structure to fill
656 * Reiser4 specific portion of struct file may contain information (hint)
657 * stored on exiting from previous read or write. That information includes
658 * seal of znode and coord within that znode where previous read or write
659 * stopped. This function copies that information to @hint if it was stored or
660 * initializes @hint by 0s otherwise.
662 int load_file_hint(struct file *file, hint_t *hint)
664 reiser4_file_fsdata *fsdata;
666 if (file) {
667 fsdata = reiser4_get_file_fsdata(file);
668 if (IS_ERR(fsdata))
669 return PTR_ERR(fsdata);
671 spin_lock_inode(file->f_dentry->d_inode);
672 if (reiser4_seal_is_set(&fsdata->reg.hint.seal)) {
673 *hint = fsdata->reg.hint;
674 init_lh(&hint->lh);
675 hint->ext_coord.lh = &hint->lh;
676 spin_unlock_inode(file->f_dentry->d_inode);
678 * force re-validation of the coord on the first
679 * iteration of the read/write loop.
681 hint->ext_coord.valid = 0;
682 assert("nikita-19892", coords_equal(&hint->seal.coord1,
683 &hint->ext_coord.
684 coord));
685 return 0;
687 memset(&fsdata->reg.hint, 0, sizeof(hint_t));
688 spin_unlock_inode(file->f_dentry->d_inode);
690 hint_init_zero(hint);
691 return 0;
695 * save_file_hint - copy hint to reiser4 private struct file's part
696 * @file: file to save hint in
697 * @hint: hint to save
699 * This copies @hint to reiser4 private part of struct file. It can help
700 * speedup future accesses to the file.
702 void save_file_hint(struct file *file, const hint_t *hint)
704 reiser4_file_fsdata *fsdata;
706 assert("edward-1337", hint != NULL);
708 if (!file || !reiser4_seal_is_set(&hint->seal))
709 return;
710 fsdata = reiser4_get_file_fsdata(file);
711 assert("vs-965", !IS_ERR(fsdata));
712 assert("nikita-19891",
713 coords_equal(&hint->seal.coord1, &hint->ext_coord.coord));
714 assert("vs-30", hint->lh.owner == NULL);
715 spin_lock_inode(file->f_dentry->d_inode);
716 fsdata->reg.hint = *hint;
717 spin_unlock_inode(file->f_dentry->d_inode);
718 return;
721 void reiser4_unset_hint(hint_t * hint)
723 assert("vs-1315", hint);
724 hint->ext_coord.valid = 0;
725 reiser4_seal_done(&hint->seal);
726 done_lh(&hint->lh);
729 /* coord must be set properly. So, that reiser4_set_hint
730 has nothing to do */
731 void reiser4_set_hint(hint_t * hint, const reiser4_key * key,
732 znode_lock_mode mode)
734 ON_DEBUG(coord_t * coord = &hint->ext_coord.coord);
735 assert("vs-1207", WITH_DATA(coord->node, check_coord(coord, key)));
737 reiser4_seal_init(&hint->seal, &hint->ext_coord.coord, key);
738 hint->offset = get_key_offset(key);
739 hint->mode = mode;
740 done_lh(&hint->lh);
743 int hint_is_set(const hint_t * hint)
745 return reiser4_seal_is_set(&hint->seal);
748 #if REISER4_DEBUG
749 static int all_but_offset_key_eq(const reiser4_key * k1, const reiser4_key * k2)
751 return (get_key_locality(k1) == get_key_locality(k2) &&
752 get_key_type(k1) == get_key_type(k2) &&
753 get_key_band(k1) == get_key_band(k2) &&
754 get_key_ordering(k1) == get_key_ordering(k2) &&
755 get_key_objectid(k1) == get_key_objectid(k2));
757 #endif
759 static int
760 hint_validate(hint_t * hint, const reiser4_key * key, int check_key,
761 znode_lock_mode lock_mode)
763 if (!hint || !hint_is_set(hint) || hint->mode != lock_mode)
764 /* hint either not set or set by different operation */
765 return RETERR(-E_REPEAT);
767 assert("vs-1277", all_but_offset_key_eq(key, &hint->seal.key));
769 if (check_key && get_key_offset(key) != hint->offset)
770 /* hint is set for different key */
771 return RETERR(-E_REPEAT);
773 assert("vs-31", hint->ext_coord.lh == &hint->lh);
774 return reiser4_seal_validate(&hint->seal, &hint->ext_coord.coord, key,
775 hint->ext_coord.lh, lock_mode,
776 ZNODE_LOCK_LOPRI);
780 * Look for place at twig level for extent corresponding to page,
781 * call extent's writepage method to create unallocated extent if
782 * it does not exist yet, initialize jnode, capture page
784 int find_or_create_extent(struct page *page)
786 int result;
787 struct inode *inode;
788 int plugged_hole;
790 jnode *node;
792 assert("vs-1065", page->mapping && page->mapping->host);
793 inode = page->mapping->host;
795 lock_page(page);
796 node = jnode_of_page(page);
797 if (IS_ERR(node)) {
798 unlock_page(page);
799 return PTR_ERR(node);
801 JF_SET(node, JNODE_WRITE_PREPARED);
802 unlock_page(page);
804 if (node->blocknr == 0) {
805 plugged_hole = 0;
806 result = reiser4_update_extent(inode, node, page_offset(page),
807 &plugged_hole);
808 if (result) {
809 JF_CLR(node, JNODE_WRITE_PREPARED);
810 jput(node);
811 warning("edward-1549",
812 "reiser4_update_extent failed: %d", result);
813 return result;
815 if (plugged_hole)
816 reiser4_update_sd(inode);
817 } else {
818 spin_lock_jnode(node);
819 result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
820 BUG_ON(result != 0);
821 jnode_make_dirty_locked(node);
822 spin_unlock_jnode(node);
825 BUG_ON(node->atom == NULL);
826 JF_CLR(node, JNODE_WRITE_PREPARED);
827 jput(node);
829 if (get_current_context()->entd) {
830 entd_context *ent = get_entd_context(node->tree->super);
832 if (ent->cur_request->page == page)
833 ent->cur_request->node = node;
835 return 0;
839 * has_anonymous_pages - check whether inode has pages dirtied via mmap
840 * @inode: inode to check
842 * Returns true if inode's mapping has dirty pages which do not belong to any
843 * atom. Those are either tagged PAGECACHE_TAG_REISER4_MOVED in mapping's page
844 * tree or were eflushed and can be found via jnodes tagged
845 * EFLUSH_TAG_ANONYMOUS in radix tree of jnodes.
847 static int has_anonymous_pages(struct inode *inode)
849 int result;
851 spin_lock_irq(&inode->i_mapping->tree_lock);
852 result = radix_tree_tagged(&inode->i_mapping->page_tree, PAGECACHE_TAG_REISER4_MOVED);
853 spin_unlock_irq(&inode->i_mapping->tree_lock);
854 return result;
858 * capture_page_and_create_extent -
859 * @page: page to be captured
861 * Grabs space for extent creation and stat data update and calls function to
862 * do actual work.
864 static int capture_page_and_create_extent(struct page *page)
866 int result;
867 struct inode *inode;
869 assert("vs-1084", page->mapping && page->mapping->host);
870 inode = page->mapping->host;
871 assert("vs-1139",
872 unix_file_inode_data(inode)->container == UF_CONTAINER_EXTENTS);
873 /* page belongs to file */
874 assert("vs-1393",
875 inode->i_size > page_offset(page));
877 /* page capture may require extent creation (if it does not exist yet)
878 and stat data's update (number of blocks changes on extent
879 creation) */
880 grab_space_enable();
881 result = reiser4_grab_space(2 * estimate_one_insert_into_item
882 (reiser4_tree_by_inode(inode)),
883 BA_CAN_COMMIT);
884 if (likely(!result))
885 result = find_or_create_extent(page);
887 if (result != 0)
888 SetPageError(page);
889 return result;
892 /* plugin->write_end() */
893 int write_end_unix_file(struct file *file, struct page *page,
894 unsigned from, unsigned to)
896 unlock_page(page);
897 return capture_page_and_create_extent(page);
901 * Support for "anonymous" pages and jnodes.
903 * When file is write-accessed through mmap pages can be dirtied from the user
904 * level. In this case kernel is not notified until one of following happens:
906 * (1) msync()
908 * (2) truncate() (either explicit or through unlink)
910 * (3) VM scanner starts reclaiming mapped pages, dirtying them before
911 * starting write-back.
913 * As a result of (3) ->writepage may be called on a dirty page without
914 * jnode. Such page is called "anonymous" in reiser4. Certain work-loads
915 * (iozone) generate huge number of anonymous pages.
917 * reiser4_sync_sb() method tries to insert anonymous pages into
918 * tree. This is done by capture_anonymous_*() functions below.
922 * capture_anonymous_page - involve page into transaction
923 * @pg: page to deal with
925 * Takes care that @page has corresponding metadata in the tree, creates jnode
926 * for @page and captures it. On success 1 is returned.
928 static int capture_anonymous_page(struct page *page)
930 int result;
932 if (PageWriteback(page))
933 /* FIXME: do nothing? */
934 return 0;
936 result = capture_page_and_create_extent(page);
937 if (result == 0) {
938 result = 1;
939 } else
940 warning("nikita-3329",
941 "Cannot capture anon page: %i", result);
943 return result;
947 * capture_anonymous_pages - find and capture pages dirtied via mmap
948 * @mapping: address space where to look for pages
949 * @index: start index
950 * @to_capture: maximum number of pages to capture
952 * Looks for pages tagged REISER4_MOVED starting from the *@index-th page,
953 * captures (involves into atom) them, returns number of captured pages,
954 * updates @index to next page after the last captured one.
956 static int
957 capture_anonymous_pages(struct address_space *mapping, pgoff_t *index,
958 unsigned int to_capture)
960 int result;
961 struct pagevec pvec;
962 unsigned int i, count;
963 int nr;
965 pagevec_init(&pvec, 0);
966 count = min(pagevec_space(&pvec), to_capture);
967 nr = 0;
969 /* find pages tagged MOVED */
970 spin_lock_irq(&mapping->tree_lock);
971 pvec.nr = radix_tree_gang_lookup_tag(&mapping->page_tree,
972 (void **)pvec.pages, *index, count,
973 PAGECACHE_TAG_REISER4_MOVED);
974 if (pagevec_count(&pvec) == 0) {
976 * there are no pages tagged MOVED in mapping->page_tree
977 * starting from *index
979 spin_unlock_irq(&mapping->tree_lock);
980 *index = (pgoff_t)-1;
981 return 0;
984 /* clear MOVED tag for all found pages */
985 for (i = 0; i < pagevec_count(&pvec); i++) {
986 page_cache_get(pvec.pages[i]);
987 radix_tree_tag_clear(&mapping->page_tree, pvec.pages[i]->index,
988 PAGECACHE_TAG_REISER4_MOVED);
990 spin_unlock_irq(&mapping->tree_lock);
993 *index = pvec.pages[i - 1]->index + 1;
995 for (i = 0; i < pagevec_count(&pvec); i++) {
996 result = capture_anonymous_page(pvec.pages[i]);
997 if (result == 1)
998 nr++;
999 else {
1000 if (result < 0) {
1001 warning("vs-1454",
1002 "failed to capture page: "
1003 "result=%d, captured=%d)\n",
1004 result, i);
1007 * set MOVED tag to all pages which left not
1008 * captured
1010 spin_lock_irq(&mapping->tree_lock);
1011 for (; i < pagevec_count(&pvec); i ++) {
1012 radix_tree_tag_set(&mapping->page_tree,
1013 pvec.pages[i]->index,
1014 PAGECACHE_TAG_REISER4_MOVED);
1016 spin_unlock_irq(&mapping->tree_lock);
1018 pagevec_release(&pvec);
1019 return result;
1020 } else {
1022 * result == 0. capture_anonymous_page returns
1023 * 0 for Writeback-ed page. Set MOVED tag on
1024 * that page
1026 spin_lock_irq(&mapping->tree_lock);
1027 radix_tree_tag_set(&mapping->page_tree,
1028 pvec.pages[i]->index,
1029 PAGECACHE_TAG_REISER4_MOVED);
1030 spin_unlock_irq(&mapping->tree_lock);
1031 if (i == 0)
1032 *index = pvec.pages[0]->index;
1033 else
1034 *index = pvec.pages[i - 1]->index + 1;
1038 pagevec_release(&pvec);
1039 return nr;
1043 * capture_anonymous_jnodes - find and capture anonymous jnodes
1044 * @mapping: address space where to look for jnodes
1045 * @from: start index
1046 * @to: end index
1047 * @to_capture: maximum number of jnodes to capture
1049 * Looks for jnodes tagged EFLUSH_TAG_ANONYMOUS in inode's tree of jnodes in
1050 * the range of indexes @from-@to and captures them, returns number of captured
1051 * jnodes, updates @from to next jnode after the last captured one.
1053 static int
1054 capture_anonymous_jnodes(struct address_space *mapping,
1055 pgoff_t *from, pgoff_t to, int to_capture)
1057 *from = to;
1058 return 0;
1062 * Commit atom of the jnode of a page.
1064 static int sync_page(struct page *page)
1066 int result;
1067 do {
1068 jnode *node;
1069 txn_atom *atom;
1071 lock_page(page);
1072 node = jprivate(page);
1073 if (node != NULL) {
1074 spin_lock_jnode(node);
1075 atom = jnode_get_atom(node);
1076 spin_unlock_jnode(node);
1077 } else
1078 atom = NULL;
1079 unlock_page(page);
1080 result = reiser4_sync_atom(atom);
1081 } while (result == -E_REPEAT);
1083 * ZAM-FIXME-HANS: document the logic of this loop, is it just to
1084 * handle the case where more pages get added to the atom while we are
1085 * syncing it?
1087 assert("nikita-3485", ergo(result == 0,
1088 get_current_context()->trans->atom == NULL));
1089 return result;
1093 * Commit atoms of pages on @pages list.
1094 * call sync_page for each page from mapping's page tree
1096 static int sync_page_list(struct inode *inode)
1098 int result;
1099 struct address_space *mapping;
1100 unsigned long from; /* start index for radix_tree_gang_lookup */
1101 unsigned int found; /* return value for radix_tree_gang_lookup */
1103 mapping = inode->i_mapping;
1104 from = 0;
1105 result = 0;
1106 spin_lock_irq(&mapping->tree_lock);
1107 while (result == 0) {
1108 struct page *page;
1110 found =
1111 radix_tree_gang_lookup(&mapping->page_tree, (void **)&page,
1112 from, 1);
1113 assert("edward-1550", found < 2);
1114 if (found == 0)
1115 break;
1117 * page may not leave radix tree because it is protected from
1118 * truncating by inode->i_mutex locked by sys_fsync
1120 page_cache_get(page);
1121 spin_unlock_irq(&mapping->tree_lock);
1123 from = page->index + 1;
1125 result = sync_page(page);
1127 page_cache_release(page);
1128 spin_lock_irq(&mapping->tree_lock);
1131 spin_unlock_irq(&mapping->tree_lock);
1132 return result;
1135 static int commit_file_atoms(struct inode *inode)
1137 int result;
1138 struct unix_file_info *uf_info;
1140 uf_info = unix_file_inode_data(inode);
1142 get_exclusive_access(uf_info);
1144 * find what items file is made from
1146 result = find_file_state(inode, uf_info);
1147 drop_exclusive_access(uf_info);
1148 if (result != 0)
1149 return result;
1152 * file state cannot change because we are under ->i_mutex
1154 switch (uf_info->container) {
1155 case UF_CONTAINER_EXTENTS:
1156 /* find_file_state might open join an atom */
1157 reiser4_txn_restart_current();
1158 result =
1160 * when we are called by
1161 * filemap_fdatawrite->
1162 * do_writepages()->
1163 * reiser4_writepages()
1165 * inode->i_mapping->dirty_pages are spices into
1166 * ->io_pages, leaving ->dirty_pages dirty.
1168 * When we are called from
1169 * reiser4_fsync()->sync_unix_file(), we have to
1170 * commit atoms of all pages on the ->dirty_list.
1172 * So for simplicity we just commit ->io_pages and
1173 * ->dirty_pages.
1175 sync_page_list(inode);
1176 break;
1177 case UF_CONTAINER_TAILS:
1179 * NOTE-NIKITA probably we can be smarter for tails. For now
1180 * just commit all existing atoms.
1182 result = txnmgr_force_commit_all(inode->i_sb, 0);
1183 break;
1184 case UF_CONTAINER_EMPTY:
1185 result = 0;
1186 break;
1187 case UF_CONTAINER_UNKNOWN:
1188 default:
1189 result = -EIO;
1190 break;
1194 * commit current transaction: there can be captured nodes from
1195 * find_file_state() and finish_conversion().
1197 reiser4_txn_restart_current();
1198 return result;
1202 * writepages_unix_file - writepages of struct address_space_operations
1203 * @mapping:
1204 * @wbc:
1206 * This captures anonymous pages and anonymous jnodes. Anonymous pages are
1207 * pages which are dirtied via mmapping. Anonymous jnodes are ones which were
1208 * created by reiser4_writepage.
1210 int writepages_unix_file(struct address_space *mapping,
1211 struct writeback_control *wbc)
1213 int result;
1214 struct unix_file_info *uf_info;
1215 pgoff_t pindex, jindex, nr_pages;
1216 long to_capture;
1217 struct inode *inode;
1219 inode = mapping->host;
1220 if (!has_anonymous_pages(inode)) {
1221 result = 0;
1222 goto end;
1224 jindex = pindex = wbc->range_start >> PAGE_CACHE_SHIFT;
1225 result = 0;
1226 nr_pages = size_in_pages(i_size_read(inode));
1228 uf_info = unix_file_inode_data(inode);
1230 do {
1231 reiser4_context *ctx;
1233 if (wbc->sync_mode != WB_SYNC_ALL)
1234 to_capture = min(wbc->nr_to_write, CAPTURE_APAGE_BURST);
1235 else
1236 to_capture = CAPTURE_APAGE_BURST;
1238 ctx = reiser4_init_context(inode->i_sb);
1239 if (IS_ERR(ctx)) {
1240 result = PTR_ERR(ctx);
1241 break;
1243 /* avoid recursive calls to ->sync_inodes */
1244 ctx->nobalance = 1;
1245 assert("zam-760", lock_stack_isclean(get_current_lock_stack()));
1246 assert("edward-1551", LOCK_CNT_NIL(inode_sem_w));
1247 assert("edward-1552", LOCK_CNT_NIL(inode_sem_r));
1249 reiser4_txn_restart_current();
1251 /* we have to get nonexclusive access to the file */
1252 if (get_current_context()->entd) {
1254 * use nonblocking version of nonexclusive_access to
1255 * avoid deadlock which might look like the following:
1256 * process P1 holds NEA on file F1 and called entd to
1257 * reclaim some memory. Entd works for P1 and is going
1258 * to capture pages of file F2. To do that entd has to
1259 * get NEA to F2. F2 is held by process P2 which also
1260 * called entd. But entd is serving P1 at the moment
1261 * and P2 has to wait. Process P3 trying to get EA to
1262 * file F2. Existence of pending EA request to file F2
1263 * makes impossible for entd to get NEA to file
1264 * F2. Neither of these process can continue. Using
1265 * nonblocking version of gettign NEA is supposed to
1266 * avoid this deadlock.
1268 if (try_to_get_nonexclusive_access(uf_info) == 0) {
1269 result = RETERR(-EBUSY);
1270 reiser4_exit_context(ctx);
1271 break;
1273 } else
1274 get_nonexclusive_access(uf_info);
1276 while (to_capture > 0) {
1277 pgoff_t start;
1279 assert("vs-1727", jindex <= pindex);
1280 if (pindex == jindex) {
1281 start = pindex;
1282 result =
1283 capture_anonymous_pages(inode->i_mapping,
1284 &pindex,
1285 to_capture);
1286 if (result <= 0)
1287 break;
1288 to_capture -= result;
1289 wbc->nr_to_write -= result;
1290 if (start + result == pindex) {
1291 jindex = pindex;
1292 continue;
1294 if (to_capture <= 0)
1295 break;
1297 /* deal with anonymous jnodes between jindex and pindex */
1298 result =
1299 capture_anonymous_jnodes(inode->i_mapping, &jindex,
1300 pindex, to_capture);
1301 if (result < 0)
1302 break;
1303 to_capture -= result;
1304 get_current_context()->nr_captured += result;
1306 if (jindex == (pgoff_t) - 1) {
1307 assert("vs-1728", pindex == (pgoff_t) - 1);
1308 break;
1311 if (to_capture <= 0)
1312 /* there may be left more pages */
1313 __mark_inode_dirty(inode, I_DIRTY_PAGES);
1315 drop_nonexclusive_access(uf_info);
1316 if (result < 0) {
1317 /* error happened */
1318 reiser4_exit_context(ctx);
1319 return result;
1321 if (wbc->sync_mode != WB_SYNC_ALL) {
1322 reiser4_exit_context(ctx);
1323 return 0;
1325 result = commit_file_atoms(inode);
1326 reiser4_exit_context(ctx);
1327 if (pindex >= nr_pages && jindex == pindex)
1328 break;
1329 } while (1);
1331 end:
1332 if (is_in_reiser4_context()) {
1333 if (get_current_context()->nr_captured >= CAPTURE_APAGE_BURST) {
1335 * there are already pages to flush, flush them out, do
1336 * not delay until end of reiser4_sync_inodes
1338 reiser4_writeout(inode->i_sb, wbc);
1339 get_current_context()->nr_captured = 0;
1342 return result;
1346 * readpage_unix_file_nolock - readpage of struct address_space_operations
1347 * @file:
1348 * @page:
1350 * Compose a key and search for item containing information about @page
1351 * data. If item is found - its readpage method is called.
1353 int readpage_unix_file(struct file *file, struct page *page)
1355 reiser4_context *ctx;
1356 int result;
1357 struct inode *inode;
1358 reiser4_key key;
1359 item_plugin *iplug;
1360 hint_t *hint;
1361 lock_handle *lh;
1362 coord_t *coord;
1364 assert("vs-1062", PageLocked(page));
1365 assert("vs-976", !PageUptodate(page));
1366 assert("vs-1061", page->mapping && page->mapping->host);
1368 if (page->mapping->host->i_size <= page_offset(page)) {
1369 /* page is out of file */
1370 zero_user(page, 0, PAGE_CACHE_SIZE);
1371 SetPageUptodate(page);
1372 unlock_page(page);
1373 return 0;
1376 inode = page->mapping->host;
1377 ctx = reiser4_init_context(inode->i_sb);
1378 if (IS_ERR(ctx)) {
1379 unlock_page(page);
1380 return PTR_ERR(ctx);
1383 hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
1384 if (hint == NULL) {
1385 unlock_page(page);
1386 reiser4_exit_context(ctx);
1387 return RETERR(-ENOMEM);
1390 result = load_file_hint(file, hint);
1391 if (result) {
1392 kfree(hint);
1393 unlock_page(page);
1394 reiser4_exit_context(ctx);
1395 return result;
1397 lh = &hint->lh;
1399 /* get key of first byte of the page */
1400 key_by_inode_and_offset_common(inode, page_offset(page), &key);
1402 /* look for file metadata corresponding to first byte of page */
1403 page_cache_get(page);
1404 unlock_page(page);
1405 result = find_file_item(hint, &key, ZNODE_READ_LOCK, inode);
1406 lock_page(page);
1407 page_cache_release(page);
1409 if (page->mapping == NULL) {
1411 * readpage allows truncate to run concurrently. Page was
1412 * truncated while it was not locked
1414 done_lh(lh);
1415 kfree(hint);
1416 unlock_page(page);
1417 reiser4_txn_restart(ctx);
1418 reiser4_exit_context(ctx);
1419 return -EINVAL;
1422 if (result != CBK_COORD_FOUND || hint->ext_coord.coord.between != AT_UNIT) {
1423 if (result == CBK_COORD_FOUND &&
1424 hint->ext_coord.coord.between != AT_UNIT)
1425 /* file is truncated */
1426 result = -EINVAL;
1427 done_lh(lh);
1428 kfree(hint);
1429 unlock_page(page);
1430 reiser4_txn_restart(ctx);
1431 reiser4_exit_context(ctx);
1432 return result;
1436 * item corresponding to page is found. It can not be removed because
1437 * znode lock is held
1439 if (PageUptodate(page)) {
1440 done_lh(lh);
1441 kfree(hint);
1442 unlock_page(page);
1443 reiser4_txn_restart(ctx);
1444 reiser4_exit_context(ctx);
1445 return 0;
1448 coord = &hint->ext_coord.coord;
1449 result = zload(coord->node);
1450 if (result) {
1451 done_lh(lh);
1452 kfree(hint);
1453 unlock_page(page);
1454 reiser4_txn_restart(ctx);
1455 reiser4_exit_context(ctx);
1456 return result;
1459 validate_extended_coord(&hint->ext_coord, page_offset(page));
1461 if (!coord_is_existing_unit(coord)) {
1462 /* this indicates corruption */
1463 warning("vs-280",
1464 "Looking for page %lu of file %llu (size %lli). "
1465 "No file items found (%d). File is corrupted?\n",
1466 page->index, (unsigned long long)get_inode_oid(inode),
1467 inode->i_size, result);
1468 zrelse(coord->node);
1469 done_lh(lh);
1470 kfree(hint);
1471 unlock_page(page);
1472 reiser4_txn_restart(ctx);
1473 reiser4_exit_context(ctx);
1474 return RETERR(-EIO);
1478 * get plugin of found item or use plugin if extent if there are no
1479 * one
1481 iplug = item_plugin_by_coord(coord);
1482 if (iplug->s.file.readpage)
1483 result = iplug->s.file.readpage(coord, page);
1484 else
1485 result = RETERR(-EINVAL);
1487 if (!result) {
1488 set_key_offset(&key,
1489 (loff_t) (page->index + 1) << PAGE_CACHE_SHIFT);
1490 /* FIXME should call reiser4_set_hint() */
1491 reiser4_unset_hint(hint);
1492 } else {
1493 unlock_page(page);
1494 reiser4_unset_hint(hint);
1496 assert("vs-979",
1497 ergo(result == 0, (PageLocked(page) || PageUptodate(page))));
1498 assert("vs-9791", ergo(result != 0, !PageLocked(page)));
1500 zrelse(coord->node);
1501 done_lh(lh);
1503 save_file_hint(file, hint);
1504 kfree(hint);
1507 * FIXME: explain why it is needed. HINT: page allocation in write can
1508 * not be done when atom is not NULL because reiser4_writepage can not
1509 * kick entd and have to eflush
1511 reiser4_txn_restart(ctx);
1512 reiser4_exit_context(ctx);
1513 return result;
1516 struct uf_readpages_context {
1517 lock_handle lh;
1518 coord_t coord;
1521 /* A callback function for readpages_unix_file/read_cache_pages.
1522 * If the file is build of tails, then return error (-ENOENT).
1524 * @data -- a pointer to reiser4_readpages_context object,
1525 * to save the twig lock and the coord between
1526 * read_cache_page iterations.
1527 * @page -- page to start read.
1529 static int uf_readpages_filler(void * data, struct page * page)
1531 struct uf_readpages_context *rc = data;
1532 jnode * node;
1533 int ret = 0;
1534 reiser4_extent *ext;
1535 __u64 ext_index;
1536 int cbk_done = 0;
1537 struct address_space * mapping = page->mapping;
1539 if (PageUptodate(page)) {
1540 unlock_page(page);
1541 return 0;
1543 page_cache_get(page);
1545 if (rc->lh.node == 0) {
1546 /* no twig lock - have to do tree search. */
1547 reiser4_key key;
1548 repeat:
1549 unlock_page(page);
1550 key_by_inode_and_offset_common(
1551 mapping->host, page_offset(page), &key);
1552 ret = coord_by_key(
1553 &get_super_private(mapping->host->i_sb)->tree,
1554 &key, &rc->coord, &rc->lh,
1555 ZNODE_READ_LOCK, FIND_EXACT,
1556 TWIG_LEVEL, TWIG_LEVEL, CBK_UNIQUE, NULL);
1557 if (unlikely(ret))
1558 goto exit;
1559 lock_page(page);
1560 if (PageUptodate(page))
1561 goto unlock;
1562 cbk_done = 1;
1564 ret = zload(rc->coord.node);
1565 if (unlikely(ret))
1566 goto unlock;
1567 if (!coord_is_existing_item(&rc->coord) ||
1568 !item_is_extent(&rc->coord)) {
1569 zrelse(rc->coord.node);
1570 ret = RETERR(-EIO);
1571 goto unlock;
1573 ext = extent_by_coord(&rc->coord);
1574 ext_index = extent_unit_index(&rc->coord);
1575 if (page->index < ext_index ||
1576 page->index >= ext_index + extent_get_width(ext)) {
1577 /* the page index doesn't belong to the extent unit
1578 which the coord points to - release the lock and
1579 repeat with tree search. */
1580 zrelse(rc->coord.node);
1581 done_lh(&rc->lh);
1582 /* we can be here after a CBK call only in case of
1583 corruption of the tree or the tree lookup algorithm bug. */
1584 if (unlikely(cbk_done)) {
1585 ret = RETERR(-EIO);
1586 goto unlock;
1588 goto repeat;
1590 node = jnode_of_page(page);
1591 if (unlikely(IS_ERR(node))) {
1592 zrelse(rc->coord.node);
1593 ret = PTR_ERR(node);
1594 goto unlock;
1596 ret = reiser4_do_readpage_extent(ext, page->index - ext_index, page);
1597 jput(node);
1598 zrelse(rc->coord.node);
1599 if (likely(!ret))
1600 goto exit;
1601 unlock:
1602 unlock_page(page);
1603 exit:
1604 page_cache_release(page);
1605 return ret;
1609 * readpages_unix_file - called by the readahead code, starts reading for each
1610 * page of given list of pages
1612 int readpages_unix_file(
1613 struct file *file, struct address_space *mapping,
1614 struct list_head *pages, unsigned nr_pages)
1616 reiser4_context *ctx;
1617 struct uf_readpages_context rc;
1618 int ret;
1620 ctx = reiser4_init_context(mapping->host->i_sb);
1621 if (IS_ERR(ctx)) {
1622 put_pages_list(pages);
1623 return PTR_ERR(ctx);
1625 init_lh(&rc.lh);
1626 ret = read_cache_pages(mapping, pages, uf_readpages_filler, &rc);
1627 done_lh(&rc.lh);
1628 context_set_commit_async(ctx);
1629 /* close the transaction to protect further page allocation from deadlocks */
1630 reiser4_txn_restart(ctx);
1631 reiser4_exit_context(ctx);
1632 return ret;
1635 static reiser4_block_nr unix_file_estimate_read(struct inode *inode,
1636 loff_t count UNUSED_ARG)
1638 /* We should reserve one block, because of updating of the stat data
1639 item */
1640 assert("vs-1249",
1641 inode_file_plugin(inode)->estimate.update ==
1642 estimate_update_common);
1643 return estimate_update_common(inode);
1646 /* this is called with nonexclusive access obtained, file's container can not change */
1647 static ssize_t read_file(hint_t *hint, struct file *file, /* file to read from to */
1648 char __user *buf, /* address of user-space buffer */
1649 size_t count, /* number of bytes to read */
1650 loff_t *off)
1652 int result;
1653 struct inode *inode;
1654 flow_t flow;
1655 int (*read_f) (struct file *, flow_t *, hint_t *);
1656 coord_t *coord;
1657 znode *loaded;
1659 inode = file->f_dentry->d_inode;
1661 /* build flow */
1662 assert("vs-1250",
1663 inode_file_plugin(inode)->flow_by_inode ==
1664 flow_by_inode_unix_file);
1665 result =
1666 flow_by_inode_unix_file(inode, buf, 1 /* user space */ , count,
1667 *off, READ_OP, &flow);
1668 if (unlikely(result))
1669 return result;
1671 /* get seal and coord sealed with it from reiser4 private data
1672 of struct file. The coord will tell us where our last read
1673 of this file finished, and the seal will help to determine
1674 if that location is still valid.
1676 coord = &hint->ext_coord.coord;
1677 while (flow.length && result == 0) {
1678 result =
1679 find_file_item(hint, &flow.key, ZNODE_READ_LOCK, inode);
1680 if (cbk_errored(result))
1681 /* error happened */
1682 break;
1684 if (coord->between != AT_UNIT) {
1685 /* there were no items corresponding to given offset */
1686 done_lh(hint->ext_coord.lh);
1687 break;
1690 loaded = coord->node;
1691 result = zload(loaded);
1692 if (unlikely(result)) {
1693 done_lh(hint->ext_coord.lh);
1694 break;
1697 if (hint->ext_coord.valid == 0)
1698 validate_extended_coord(&hint->ext_coord,
1699 get_key_offset(&flow.key));
1701 assert("vs-4", hint->ext_coord.valid == 1);
1702 assert("vs-33", hint->ext_coord.lh == &hint->lh);
1703 /* call item's read method */
1704 read_f = item_plugin_by_coord(coord)->s.file.read;
1705 result = read_f(file, &flow, hint);
1706 zrelse(loaded);
1707 done_lh(hint->ext_coord.lh);
1710 return (count - flow.length) ? (count - flow.length) : result;
1713 static ssize_t read_unix_file_container_tails(struct file*, char __user*, size_t, loff_t*);
1716 * read_unix_file - read of struct file_operations
1717 * @file: file to read from
1718 * @buf: address of user-space buffer
1719 * @read_amount: number of bytes to read
1720 * @off: position in file to read from
1722 * This is implementation of vfs's read method of struct file_operations for
1723 * unix file plugin.
1725 ssize_t read_unix_file(struct file *file, char __user *buf, size_t read_amount,
1726 loff_t *off)
1728 reiser4_context *ctx;
1729 ssize_t result;
1730 struct inode *inode;
1731 struct unix_file_info *uf_info;
1733 if (unlikely(read_amount == 0))
1734 return 0;
1736 assert("umka-072", file != NULL);
1737 assert("umka-074", off != NULL);
1738 inode = file->f_dentry->d_inode;
1739 assert("vs-972", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
1741 ctx = reiser4_init_context(inode->i_sb);
1742 if (IS_ERR(ctx))
1743 return PTR_ERR(ctx);
1744 uf_info = unix_file_inode_data(inode);
1745 if (uf_info->container == UF_CONTAINER_UNKNOWN) {
1746 get_exclusive_access(uf_info);
1747 result = find_file_state(inode, uf_info);
1748 if (unlikely(result != 0))
1749 goto out;
1750 } else
1751 get_nonexclusive_access(uf_info);
1752 result = reiser4_grab_space_force(unix_file_estimate_read(inode, read_amount),
1753 BA_CAN_COMMIT);
1754 if (unlikely(result != 0))
1755 goto out;
1756 if (uf_info->container == UF_CONTAINER_EXTENTS){
1757 result = do_sync_read(file, buf, read_amount, off);
1758 } else if (uf_info->container == UF_CONTAINER_TAILS ||
1759 reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV) ||
1760 reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
1761 result = read_unix_file_container_tails(file, buf, read_amount, off);
1762 } else {
1763 assert("zam-1085", uf_info->container == UF_CONTAINER_EMPTY);
1764 result = 0;
1766 out:
1767 drop_access(uf_info);
1768 context_set_commit_async(ctx);
1769 reiser4_exit_context(ctx);
1770 return result;
1773 static ssize_t read_unix_file_container_tails(
1774 struct file *file, char __user *buf, size_t read_amount, loff_t *off)
1776 int result;
1777 struct inode *inode;
1778 hint_t *hint;
1779 struct unix_file_info *uf_info;
1780 size_t count, read, left;
1781 loff_t size;
1783 assert("umka-072", file != NULL);
1784 assert("umka-074", off != NULL);
1785 inode = file->f_dentry->d_inode;
1786 assert("vs-972", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
1788 hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
1789 if (hint == NULL)
1790 return RETERR(-ENOMEM);
1792 result = load_file_hint(file, hint);
1793 if (result) {
1794 kfree(hint);
1795 return result;
1798 left = read_amount;
1799 count = 0;
1800 uf_info = unix_file_inode_data(inode);
1801 while (left > 0) {
1802 reiser4_txn_restart_current();
1803 size = i_size_read(inode);
1804 if (*off >= size)
1805 /* position to read from is past the end of file */
1806 break;
1807 if (*off + left > size)
1808 left = size - *off;
1809 /* faultin user page */
1810 result = fault_in_pages_writeable(buf, left > PAGE_CACHE_SIZE ? PAGE_CACHE_SIZE : left);
1811 if (result)
1812 return RETERR(-EFAULT);
1814 read = read_file(hint, file, buf,
1815 left > PAGE_CACHE_SIZE ? PAGE_CACHE_SIZE : left,
1816 off);
1817 if (read < 0) {
1818 result = read;
1819 break;
1821 left -= read;
1822 buf += read;
1824 /* update position in a file */
1825 *off += read;
1826 /* total number of read bytes */
1827 count += read;
1829 done_lh(&hint->lh);
1830 save_file_hint(file, hint);
1831 kfree(hint);
1832 if (count)
1833 file_accessed(file);
1834 /* return number of read bytes or error code if nothing is read */
1835 return count ? count : result;
1838 /* This function takes care about @file's pages. First of all it checks if
1839 filesystems readonly and if so gets out. Otherwise, it throws out all
1840 pages of file if it was mapped for read and going to be mapped for write
1841 and consists of tails. This is done in order to not manage few copies
1842 of the data (first in page cache and second one in tails them selves)
1843 for the case of mapping files consisting tails.
1845 Here also tail2extent conversion is performed if it is allowed and file
1846 is going to be written or mapped for write. This functions may be called
1847 from write_unix_file() or mmap_unix_file(). */
1848 static int check_pages_unix_file(struct file *file, struct inode *inode)
1850 reiser4_invalidate_pages(inode->i_mapping, 0,
1851 (inode->i_size + PAGE_CACHE_SIZE -
1852 1) >> PAGE_CACHE_SHIFT, 0);
1853 return unpack(file, inode, 0 /* not forever */ );
1857 * mmap_unix_file - mmap of struct file_operations
1858 * @file: file to mmap
1859 * @vma:
1861 * This is implementation of vfs's mmap method of struct file_operations for
1862 * unix file plugin. It converts file to extent if necessary. Sets
1863 * reiser4_inode's flag - REISER4_HAS_MMAP.
1865 int mmap_unix_file(struct file *file, struct vm_area_struct *vma)
1867 reiser4_context *ctx;
1868 int result;
1869 struct inode *inode;
1870 struct unix_file_info *uf_info;
1871 reiser4_block_nr needed;
1873 inode = file->f_dentry->d_inode;
1874 ctx = reiser4_init_context(inode->i_sb);
1875 if (IS_ERR(ctx))
1876 return PTR_ERR(ctx);
1878 uf_info = unix_file_inode_data(inode);
1880 get_exclusive_access_careful(uf_info, inode);
1882 if (!IS_RDONLY(inode) && (vma->vm_flags & (VM_MAYWRITE | VM_SHARED))) {
1884 * we need file built of extent items. If it is still built of
1885 * tail items we have to convert it. Find what items the file
1886 * is built of
1888 result = find_file_state(inode, uf_info);
1889 if (result != 0) {
1890 drop_exclusive_access(uf_info);
1891 reiser4_exit_context(ctx);
1892 return result;
1895 assert("vs-1648", (uf_info->container == UF_CONTAINER_TAILS ||
1896 uf_info->container == UF_CONTAINER_EXTENTS ||
1897 uf_info->container == UF_CONTAINER_EMPTY));
1898 if (uf_info->container == UF_CONTAINER_TAILS) {
1900 * invalidate all pages and convert file from tails to
1901 * extents
1903 result = check_pages_unix_file(file, inode);
1904 if (result) {
1905 drop_exclusive_access(uf_info);
1906 reiser4_exit_context(ctx);
1907 return result;
1913 * generic_file_mmap will do update_atime. Grab space for stat data
1914 * update.
1916 needed = inode_file_plugin(inode)->estimate.update(inode);
1917 result = reiser4_grab_space_force(needed, BA_CAN_COMMIT);
1918 if (result) {
1919 drop_exclusive_access(uf_info);
1920 reiser4_exit_context(ctx);
1921 return result;
1924 result = generic_file_mmap(file, vma);
1925 if (result == 0) {
1926 /* mark file as having mapping. */
1927 reiser4_inode_set_flag(inode, REISER4_HAS_MMAP);
1930 drop_exclusive_access(uf_info);
1931 reiser4_exit_context(ctx);
1932 return result;
1936 * find_first_item
1937 * @inode:
1939 * Finds file item which is responsible for first byte in the file.
1941 static int find_first_item(struct inode *inode)
1943 coord_t coord;
1944 lock_handle lh;
1945 reiser4_key key;
1946 int result;
1948 coord_init_zero(&coord);
1949 init_lh(&lh);
1950 inode_file_plugin(inode)->key_by_inode(inode, 0, &key);
1951 result = find_file_item_nohint(&coord, &lh, &key, ZNODE_READ_LOCK,
1952 inode);
1953 if (result == CBK_COORD_FOUND) {
1954 if (coord.between == AT_UNIT) {
1955 result = zload(coord.node);
1956 if (result == 0) {
1957 result = item_id_by_coord(&coord);
1958 zrelse(coord.node);
1959 if (result != EXTENT_POINTER_ID &&
1960 result != FORMATTING_ID)
1961 result = RETERR(-EIO);
1963 } else
1964 result = RETERR(-EIO);
1966 done_lh(&lh);
1967 return result;
1971 * open_unix_file
1972 * @inode:
1973 * @file:
1975 * If filesystem is not readonly - complete uncompleted tail conversion if
1976 * there was one
1978 int open_unix_file(struct inode *inode, struct file *file)
1980 int result;
1981 reiser4_context *ctx;
1982 struct unix_file_info *uf_info;
1984 if (IS_RDONLY(inode))
1985 return 0;
1987 if (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED))
1988 return 0;
1990 ctx = reiser4_init_context(inode->i_sb);
1991 if (IS_ERR(ctx))
1992 return PTR_ERR(ctx);
1994 uf_info = unix_file_inode_data(inode);
1996 get_exclusive_access_careful(uf_info, inode);
1998 if (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
2000 * other process completed the conversion
2002 drop_exclusive_access(uf_info);
2003 reiser4_exit_context(ctx);
2004 return 0;
2008 * file left in semi converted state after unclean shutdown or another
2009 * thread is doing conversion and dropped exclusive access which doing
2010 * balance dirty pages. Complete the conversion
2012 result = find_first_item(inode);
2013 if (result == EXTENT_POINTER_ID)
2015 * first item is extent, therefore there was incomplete
2016 * tail2extent conversion. Complete it
2018 result = tail2extent(unix_file_inode_data(inode));
2019 else if (result == FORMATTING_ID)
2021 * first item is formatting item, therefore there was
2022 * incomplete extent2tail conversion. Complete it
2024 result = extent2tail(file, unix_file_inode_data(inode));
2025 else
2026 result = -EIO;
2028 assert("vs-1712",
2029 ergo(result == 0,
2030 (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED) &&
2031 !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV))));
2032 drop_exclusive_access(uf_info);
2033 reiser4_exit_context(ctx);
2034 return result;
2037 #define NEITHER_OBTAINED 0
2038 #define EA_OBTAINED 1
2039 #define NEA_OBTAINED 2
2041 static void drop_access(struct unix_file_info *uf_info)
2043 if (uf_info->exclusive_use)
2044 drop_exclusive_access(uf_info);
2045 else
2046 drop_nonexclusive_access(uf_info);
2049 #define debug_wuf(format, ...) printk("%s: %d: %s: " format "\n", \
2050 __FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
2053 * write_unix_file - private ->write() method of unix_file plugin.
2055 * @file: file to write to
2056 * @buf: address of user-space buffer
2057 * @count: number of bytes to write
2058 * @pos: position in file to write to
2059 * @cont: unused argument, as we don't perform plugin conversion when being
2060 * managed by unix_file plugin.
2062 ssize_t write_unix_file(struct file *file,
2063 const char __user *buf,
2064 size_t count, loff_t *pos,
2065 struct dispatch_context *cont)
2067 int result;
2068 reiser4_context *ctx;
2069 struct inode *inode;
2070 struct unix_file_info *uf_info;
2071 ssize_t written;
2072 int try_free_space;
2073 int to_write = PAGE_CACHE_SIZE * WRITE_GRANULARITY;
2074 size_t left;
2075 ssize_t (*write_op)(struct file *, struct inode *,
2076 const char __user *, size_t,
2077 loff_t *pos);
2078 int ea;
2079 loff_t new_size;
2081 ctx = get_current_context();
2082 inode = file->f_dentry->d_inode;
2084 assert("vs-947", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
2085 assert("vs-9471", (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED)));
2087 /* check amount of bytes to write and writing position */
2088 result = generic_write_checks(file, pos, &count, 0);
2089 if (result) {
2090 context_set_commit_async(ctx);
2091 return result;
2094 result = file_remove_suid(file);
2095 if (result) {
2096 context_set_commit_async(ctx);
2097 return result;
2099 /* remove_suid might create a transaction */
2100 reiser4_txn_restart(ctx);
2102 uf_info = unix_file_inode_data(inode);
2104 current->backing_dev_info = inode->i_mapping->backing_dev_info;
2105 written = 0;
2106 try_free_space = 0;
2107 left = count;
2108 ea = NEITHER_OBTAINED;
2110 new_size = i_size_read(inode);
2111 if (*pos + count > new_size)
2112 new_size = *pos + count;
2114 while (left) {
2115 if (left < to_write)
2116 to_write = left;
2118 if (uf_info->container == UF_CONTAINER_EMPTY) {
2119 get_exclusive_access(uf_info);
2120 ea = EA_OBTAINED;
2121 if (uf_info->container != UF_CONTAINER_EMPTY) {
2122 /* file is made not empty by another process */
2123 drop_exclusive_access(uf_info);
2124 ea = NEITHER_OBTAINED;
2125 continue;
2127 } else if (uf_info->container == UF_CONTAINER_UNKNOWN) {
2129 * get exclusive access directly just to not have to
2130 * re-obtain it if file will appear empty
2132 get_exclusive_access(uf_info);
2133 ea = EA_OBTAINED;
2134 result = find_file_state(inode, uf_info);
2135 if (result) {
2136 drop_exclusive_access(uf_info);
2137 ea = NEITHER_OBTAINED;
2138 break;
2140 } else {
2141 get_nonexclusive_access(uf_info);
2142 ea = NEA_OBTAINED;
2145 /* either EA or NEA is obtained. Choose item write method */
2146 if (uf_info->container == UF_CONTAINER_EXTENTS) {
2147 /* file is built of extent items */
2148 write_op = reiser4_write_extent;
2149 } else if (uf_info->container == UF_CONTAINER_EMPTY) {
2150 /* file is empty */
2151 if (should_have_notail(uf_info, new_size))
2152 write_op = reiser4_write_extent;
2153 else
2154 write_op = reiser4_write_tail;
2155 } else {
2156 /* file is built of tail items */
2157 if (should_have_notail(uf_info, new_size)) {
2158 if (ea == NEA_OBTAINED) {
2159 drop_nonexclusive_access(uf_info);
2160 get_exclusive_access(uf_info);
2161 ea = EA_OBTAINED;
2163 if (uf_info->container == UF_CONTAINER_TAILS) {
2165 * if file is being convered by another
2166 * process - wait until it completes
2168 while (1) {
2169 if (reiser4_inode_get_flag(inode,
2170 REISER4_PART_IN_CONV)) {
2171 drop_exclusive_access(uf_info);
2172 schedule();
2173 get_exclusive_access(uf_info);
2174 continue;
2176 break;
2178 if (uf_info->container == UF_CONTAINER_TAILS) {
2179 result = tail2extent(uf_info);
2180 if (result) {
2181 drop_exclusive_access(uf_info);
2182 context_set_commit_async(ctx);
2183 break;
2187 drop_exclusive_access(uf_info);
2188 ea = NEITHER_OBTAINED;
2189 continue;
2191 write_op = reiser4_write_tail;
2194 written = write_op(file, inode, buf, to_write, pos);
2195 if (written == -ENOSPC && try_free_space) {
2196 drop_access(uf_info);
2197 txnmgr_force_commit_all(inode->i_sb, 0);
2198 try_free_space = 0;
2199 continue;
2201 if (written < 0) {
2202 drop_access(uf_info);
2203 result = written;
2204 break;
2206 /* something is written. */
2207 if (uf_info->container == UF_CONTAINER_EMPTY) {
2208 assert("edward-1553", ea == EA_OBTAINED);
2209 uf_info->container =
2210 (write_op == reiser4_write_extent) ?
2211 UF_CONTAINER_EXTENTS : UF_CONTAINER_TAILS;
2212 } else {
2213 assert("edward-1554", ergo(uf_info->container == UF_CONTAINER_EXTENTS,
2214 write_op == reiser4_write_extent));
2215 assert("edward-1555", ergo(uf_info->container == UF_CONTAINER_TAILS,
2216 write_op == reiser4_write_tail));
2218 if (*pos + written > inode->i_size)
2219 INODE_SET_FIELD(inode, i_size, *pos + written);
2220 file_update_time(file);
2221 result = reiser4_update_sd(inode);
2222 if (result) {
2223 current->backing_dev_info = NULL;
2224 drop_access(uf_info);
2225 context_set_commit_async(ctx);
2226 break;
2228 drop_access(uf_info);
2229 ea = NEITHER_OBTAINED;
2232 * tell VM how many pages were dirtied. Maybe number of pages
2233 * which were dirty already should not be counted
2235 reiser4_throttle_write(inode,
2236 (written + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE);
2237 left -= written;
2238 buf += written;
2239 *pos += written;
2241 if (result == 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
2242 reiser4_txn_restart_current();
2243 grab_space_enable();
2244 result = reiser4_sync_file_common(file, file->f_dentry,
2245 0 /* data and stat data */);
2246 if (result)
2247 warning("reiser4-7", "failed to sync file %llu",
2248 (unsigned long long)get_inode_oid(inode));
2251 current->backing_dev_info = NULL;
2254 * return number of written bytes or error code if nothing is
2255 * written. Note, that it does not work correctly in case when
2256 * sync_unix_file returns error
2258 return (count - left) ? (count - left) : result;
2262 * release_unix_file - release of struct file_operations
2263 * @inode: inode of released file
2264 * @file: file to release
2266 * Implementation of release method of struct file_operations for unix file
2267 * plugin. If last reference to indode is released - convert all extent items
2268 * into tail items if necessary. Frees reiser4 specific file data.
2270 int release_unix_file(struct inode *inode, struct file *file)
2272 reiser4_context *ctx;
2273 struct unix_file_info *uf_info;
2274 int result;
2275 int in_reiser4;
2277 in_reiser4 = is_in_reiser4_context();
2279 ctx = reiser4_init_context(inode->i_sb);
2280 if (IS_ERR(ctx))
2281 return PTR_ERR(ctx);
2283 result = 0;
2284 if (in_reiser4 == 0) {
2285 uf_info = unix_file_inode_data(inode);
2287 get_exclusive_access_careful(uf_info, inode);
2288 if (atomic_read(&file->f_dentry->d_count) == 1 &&
2289 uf_info->container == UF_CONTAINER_EXTENTS &&
2290 !should_have_notail(uf_info, inode->i_size) &&
2291 !rofs_inode(inode)) {
2292 result = extent2tail(file, uf_info);
2293 if (result != 0) {
2294 context_set_commit_async(ctx);
2295 warning("nikita-3233",
2296 "Failed (%d) to convert in %s (%llu)",
2297 result, __FUNCTION__,
2298 (unsigned long long)
2299 get_inode_oid(inode));
2302 drop_exclusive_access(uf_info);
2303 } else {
2305 we are within reiser4 context already. How latter is
2306 possible? Simple:
2308 (gdb) bt
2309 #0 get_exclusive_access ()
2310 #2 0xc01e56d3 in release_unix_file ()
2311 #3 0xc01c3643 in reiser4_release ()
2312 #4 0xc014cae0 in __fput ()
2313 #5 0xc013ffc3 in remove_vm_struct ()
2314 #6 0xc0141786 in exit_mmap ()
2315 #7 0xc0118480 in mmput ()
2316 #8 0xc0133205 in oom_kill ()
2317 #9 0xc01332d1 in out_of_memory ()
2318 #10 0xc013bc1d in try_to_free_pages ()
2319 #11 0xc013427b in __alloc_pages ()
2320 #12 0xc013f058 in do_anonymous_page ()
2321 #13 0xc013f19d in do_no_page ()
2322 #14 0xc013f60e in handle_mm_fault ()
2323 #15 0xc01131e5 in do_page_fault ()
2324 #16 0xc0104935 in error_code ()
2325 #17 0xc025c0c6 in __copy_to_user_ll ()
2326 #18 0xc01d496f in reiser4_read_tail ()
2327 #19 0xc01e4def in read_unix_file ()
2328 #20 0xc01c3504 in reiser4_read ()
2329 #21 0xc014bd4f in vfs_read ()
2330 #22 0xc014bf66 in sys_read ()
2332 warning("vs-44", "out of memory?");
2335 reiser4_free_file_fsdata(file);
2337 reiser4_exit_context(ctx);
2338 return result;
2341 static void set_file_notail(struct inode *inode)
2343 reiser4_inode *state;
2344 formatting_plugin *tplug;
2346 state = reiser4_inode_data(inode);
2347 tplug = formatting_plugin_by_id(NEVER_TAILS_FORMATTING_ID);
2348 force_plugin_pset(inode, PSET_FORMATTING, (reiser4_plugin *)tplug);
2351 /* if file is built of tails - convert it to extents */
2352 static int unpack(struct file *filp, struct inode *inode, int forever)
2354 int result = 0;
2355 struct unix_file_info *uf_info;
2357 uf_info = unix_file_inode_data(inode);
2358 assert("vs-1628", ea_obtained(uf_info));
2360 result = find_file_state(inode, uf_info);
2361 if (result)
2362 return result;
2363 assert("vs-1074", uf_info->container != UF_CONTAINER_UNKNOWN);
2365 if (uf_info->container == UF_CONTAINER_TAILS) {
2367 * if file is being convered by another process - wait until it
2368 * completes
2370 while (1) {
2371 if (reiser4_inode_get_flag(inode,
2372 REISER4_PART_IN_CONV)) {
2373 drop_exclusive_access(uf_info);
2374 schedule();
2375 get_exclusive_access(uf_info);
2376 continue;
2378 break;
2380 if (uf_info->container == UF_CONTAINER_TAILS) {
2381 result = tail2extent(uf_info);
2382 if (result)
2383 return result;
2386 if (forever) {
2387 /* safe new formatting plugin in stat data */
2388 __u64 tograb;
2390 set_file_notail(inode);
2392 grab_space_enable();
2393 tograb = inode_file_plugin(inode)->estimate.update(inode);
2394 result = reiser4_grab_space(tograb, BA_CAN_COMMIT);
2395 result = reiser4_update_sd(inode);
2398 return result;
2401 /* implentation of vfs' ioctl method of struct file_operations for unix file
2402 plugin
2405 ioctl_unix_file(struct inode *inode, struct file *filp,
2406 unsigned int cmd, unsigned long arg UNUSED_ARG)
2408 reiser4_context *ctx;
2409 int result;
2411 ctx = reiser4_init_context(inode->i_sb);
2412 if (IS_ERR(ctx))
2413 return PTR_ERR(ctx);
2415 switch (cmd) {
2416 case REISER4_IOC_UNPACK:
2417 get_exclusive_access(unix_file_inode_data(inode));
2418 result = unpack(filp, inode, 1 /* forever */ );
2419 drop_exclusive_access(unix_file_inode_data(inode));
2420 break;
2422 default:
2423 result = RETERR(-ENOSYS);
2424 break;
2426 reiser4_exit_context(ctx);
2427 return result;
2430 /* implentation of vfs' bmap method of struct address_space_operations for unix
2431 file plugin
2433 sector_t bmap_unix_file(struct address_space * mapping, sector_t lblock)
2435 reiser4_context *ctx;
2436 sector_t result;
2437 reiser4_key key;
2438 coord_t coord;
2439 lock_handle lh;
2440 struct inode *inode;
2441 item_plugin *iplug;
2442 sector_t block;
2444 inode = mapping->host;
2446 ctx = reiser4_init_context(inode->i_sb);
2447 if (IS_ERR(ctx))
2448 return PTR_ERR(ctx);
2449 key_by_inode_and_offset_common(inode,
2450 (loff_t) lblock * current_blocksize,
2451 &key);
2453 init_lh(&lh);
2454 result =
2455 find_file_item_nohint(&coord, &lh, &key, ZNODE_READ_LOCK, inode);
2456 if (cbk_errored(result)) {
2457 done_lh(&lh);
2458 reiser4_exit_context(ctx);
2459 return result;
2462 result = zload(coord.node);
2463 if (result) {
2464 done_lh(&lh);
2465 reiser4_exit_context(ctx);
2466 return result;
2469 iplug = item_plugin_by_coord(&coord);
2470 if (iplug->s.file.get_block) {
2471 result = iplug->s.file.get_block(&coord, lblock, &block);
2472 if (result == 0)
2473 result = block;
2474 } else
2475 result = RETERR(-EINVAL);
2477 zrelse(coord.node);
2478 done_lh(&lh);
2479 reiser4_exit_context(ctx);
2480 return result;
2484 * flow_by_inode_unix_file - initizlize structure flow
2485 * @inode: inode of file for which read or write is abou
2486 * @buf: buffer to perform read to or write from
2487 * @user: flag showing whether @buf is user space or kernel space
2488 * @size: size of buffer @buf
2489 * @off: start offset fro read or write
2490 * @op: READ or WRITE
2491 * @flow:
2493 * Initializes fields of @flow: key, size of data, i/o mode (read or write).
2495 int flow_by_inode_unix_file(struct inode *inode,
2496 const char __user *buf, int user,
2497 loff_t size, loff_t off,
2498 rw_op op, flow_t *flow)
2500 assert("nikita-1100", inode != NULL);
2502 flow->length = size;
2503 memcpy(&flow->data, &buf, sizeof(buf));
2504 flow->user = user;
2505 flow->op = op;
2506 assert("nikita-1931", inode_file_plugin(inode) != NULL);
2507 assert("nikita-1932",
2508 inode_file_plugin(inode)->key_by_inode ==
2509 key_by_inode_and_offset_common);
2510 /* calculate key of write position and insert it into flow->key */
2511 return key_by_inode_and_offset_common(inode, off, &flow->key);
2514 /* plugin->u.file.set_plug_in_sd = NULL
2515 plugin->u.file.set_plug_in_inode = NULL
2516 plugin->u.file.create_blank_sd = NULL */
2517 /* plugin->u.file.delete */
2519 plugin->u.file.add_link = reiser4_add_link_common
2520 plugin->u.file.rem_link = NULL */
2522 /* plugin->u.file.owns_item
2523 this is common_file_owns_item with assertion */
2524 /* Audited by: green(2002.06.15) */
2526 owns_item_unix_file(const struct inode *inode /* object to check against */ ,
2527 const coord_t * coord /* coord to check */ )
2529 int result;
2531 result = owns_item_common(inode, coord);
2532 if (!result)
2533 return 0;
2534 if (!plugin_of_group(item_plugin_by_coord(coord),
2535 UNIX_FILE_METADATA_ITEM_TYPE))
2536 return 0;
2537 assert("vs-547",
2538 item_id_by_coord(coord) == EXTENT_POINTER_ID ||
2539 item_id_by_coord(coord) == FORMATTING_ID);
2540 return 1;
2543 static int setattr_truncate(struct inode *inode, struct iattr *attr)
2545 int result;
2546 int s_result;
2547 loff_t old_size;
2548 reiser4_tree *tree;
2550 inode_check_scale(inode, inode->i_size, attr->ia_size);
2552 old_size = inode->i_size;
2553 tree = reiser4_tree_by_inode(inode);
2555 result = safe_link_grab(tree, BA_CAN_COMMIT);
2556 if (result == 0)
2557 result = safe_link_add(inode, SAFE_TRUNCATE);
2558 if (result == 0)
2559 result = truncate_file_body(inode, attr);
2560 if (result)
2561 warning("vs-1588", "truncate_file failed: oid %lli, "
2562 "old size %lld, new size %lld, retval %d",
2563 (unsigned long long)get_inode_oid(inode),
2564 old_size, attr->ia_size, result);
2566 s_result = safe_link_grab(tree, BA_CAN_COMMIT);
2567 if (s_result == 0)
2568 s_result =
2569 safe_link_del(tree, get_inode_oid(inode), SAFE_TRUNCATE);
2570 if (s_result != 0) {
2571 warning("nikita-3417", "Cannot kill safelink %lli: %i",
2572 (unsigned long long)get_inode_oid(inode), s_result);
2574 safe_link_release(tree);
2575 return result;
2578 /* plugin->u.file.setattr method */
2579 /* This calls inode_setattr and if truncate is in effect it also takes
2580 exclusive inode access to avoid races */
2581 int setattr_unix_file(struct dentry *dentry, /* Object to change attributes */
2582 struct iattr *attr /* change description */ )
2584 int result;
2586 if (attr->ia_valid & ATTR_SIZE) {
2587 reiser4_context *ctx;
2588 struct unix_file_info *uf_info;
2590 /* truncate does reservation itself and requires exclusive
2591 access obtained */
2592 ctx = reiser4_init_context(dentry->d_inode->i_sb);
2593 if (IS_ERR(ctx))
2594 return PTR_ERR(ctx);
2596 uf_info = unix_file_inode_data(dentry->d_inode);
2597 get_exclusive_access_careful(uf_info, dentry->d_inode);
2598 result = setattr_truncate(dentry->d_inode, attr);
2599 drop_exclusive_access(uf_info);
2600 context_set_commit_async(ctx);
2601 reiser4_exit_context(ctx);
2602 } else
2603 result = reiser4_setattr_common(dentry, attr);
2605 return result;
2608 /* plugin->u.file.init_inode_data */
2609 void
2610 init_inode_data_unix_file(struct inode *inode,
2611 reiser4_object_create_data * crd, int create)
2613 struct unix_file_info *data;
2615 data = unix_file_inode_data(inode);
2616 data->container = create ? UF_CONTAINER_EMPTY : UF_CONTAINER_UNKNOWN;
2617 init_rwsem(&data->latch);
2618 data->tplug = inode_formatting_plugin(inode);
2619 data->exclusive_use = 0;
2621 #if REISER4_DEBUG
2622 data->ea_owner = NULL;
2623 atomic_set(&data->nr_neas, 0);
2624 #endif
2625 init_inode_ordering(inode, crd, create);
2629 * delete_unix_file - delete_object of file_plugin
2630 * @inode: inode to be deleted
2632 * Truncates file to length 0, removes stat data and safe link.
2634 int delete_object_unix_file(struct inode *inode)
2636 struct unix_file_info *uf_info;
2637 int result;
2639 if (reiser4_inode_get_flag(inode, REISER4_NO_SD))
2640 return 0;
2642 /* truncate file bogy first */
2643 uf_info = unix_file_inode_data(inode);
2644 get_exclusive_access(uf_info);
2645 result = shorten_file(inode, 0 /* size */ );
2646 drop_exclusive_access(uf_info);
2648 if (result)
2649 warning("edward-1556",
2650 "failed to truncate file (%llu) on removal: %d",
2651 get_inode_oid(inode), result);
2653 /* remove stat data and safe link */
2654 return reiser4_delete_object_common(inode);
2657 /* plugin->write_begin() */
2658 int write_begin_unix_file(struct file *file, struct page *page,
2659 unsigned from, unsigned to)
2661 int ret;
2662 struct unix_file_info *info;
2664 info = unix_file_inode_data(file->f_dentry->d_inode);
2665 get_exclusive_access(info);
2666 ret = find_file_state(file->f_dentry->d_inode, info);
2667 if (likely(ret == 0)) {
2668 if (info->container == UF_CONTAINER_TAILS)
2669 ret = -EINVAL;
2670 else
2671 ret = do_prepare_write(file, page, from, to);
2673 drop_exclusive_access(info);
2674 return ret;
2678 * Local variables:
2679 * c-indentation-style: "K&R"
2680 * mode-name: "LC"
2681 * c-basic-offset: 8
2682 * tab-width: 8
2683 * fill-column: 79
2684 * scroll-step: 1
2685 * End: