revert-mm-fix-blkdev-size-calculation-in-generic_write_checks
[linux-2.6/linux-trees-mm.git] / fs / reiser4 / plugin / file / file.c
blob99851295bdc9055102d4e92d65648bffb45d801a
1 /* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
2 * reiser4/README */
4 /*
5 * this file contains implementations of inode/file/address_space/file plugin
6 * operations specific for "unix file plugin" (plugin id is
7 * UNIX_FILE_PLUGIN_ID). "Unix file" is either built of tail items only
8 * (FORMATTING_ID) or of extent items only (EXTENT_POINTER_ID) or empty (have
9 * no items but stat data)
12 #include "../../inode.h"
13 #include "../../super.h"
14 #include "../../tree_walk.h"
15 #include "../../carry.h"
16 #include "../../page_cache.h"
17 #include "../../ioctl.h"
18 #include "../object.h"
19 #include "../cluster.h"
20 #include "../../safe_link.h"
22 #include <linux/writeback.h>
23 #include <linux/pagevec.h>
24 #include <linux/syscalls.h>
27 static int unpack(struct file *file, struct inode *inode, int forever);
28 static void drop_access(struct unix_file_info *);
29 static int hint_validate(hint_t * hint, const reiser4_key * key, int check_key,
30 znode_lock_mode lock_mode);
32 /* Get exclusive access and make sure that file is not partially
33 * converted (It may happen that another process is doing tail
34 * conversion. If so, wait until it completes)
36 static inline void get_exclusive_access_careful(struct unix_file_info * uf_info,
37 struct inode *inode)
39 do {
40 get_exclusive_access(uf_info);
41 if (!reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV))
42 break;
43 drop_exclusive_access(uf_info);
44 schedule();
45 } while (1);
48 /* get unix file plugin specific portion of inode */
49 struct unix_file_info *unix_file_inode_data(const struct inode *inode)
51 return &reiser4_inode_data(inode)->file_plugin_data.unix_file_info;
54 /**
55 * equal_to_rdk - compare key and znode's right delimiting key
56 * @node: node whose right delimiting key to compare with @key
57 * @key: key to compare with @node's right delimiting key
59 * Returns true if @key is equal to right delimiting key of @node.
61 int equal_to_rdk(znode *node, const reiser4_key *key)
63 int result;
65 read_lock_dk(znode_get_tree(node));
66 result = keyeq(key, znode_get_rd_key(node));
67 read_unlock_dk(znode_get_tree(node));
68 return result;
71 #if REISER4_DEBUG
73 /**
74 * equal_to_ldk - compare key and znode's left delimiting key
75 * @node: node whose left delimiting key to compare with @key
76 * @key: key to compare with @node's left delimiting key
78 * Returns true if @key is equal to left delimiting key of @node.
80 int equal_to_ldk(znode *node, const reiser4_key *key)
82 int result;
84 read_lock_dk(znode_get_tree(node));
85 result = keyeq(key, znode_get_ld_key(node));
86 read_unlock_dk(znode_get_tree(node));
87 return result;
90 /**
91 * check_coord - check whether coord corresponds to key
92 * @coord: coord to check
93 * @key: key @coord has to correspond to
95 * Returns true if @coord is set as if it was set as result of lookup with @key
96 * in coord->node.
98 static int check_coord(const coord_t *coord, const reiser4_key *key)
100 coord_t twin;
102 node_plugin_by_node(coord->node)->lookup(coord->node, key,
103 FIND_MAX_NOT_MORE_THAN, &twin);
104 return coords_equal(coord, &twin);
107 #endif /* REISER4_DEBUG */
110 * init_uf_coord - initialize extended coord
111 * @uf_coord:
112 * @lh:
116 void init_uf_coord(uf_coord_t *uf_coord, lock_handle *lh)
118 coord_init_zero(&uf_coord->coord);
119 coord_clear_iplug(&uf_coord->coord);
120 uf_coord->lh = lh;
121 init_lh(lh);
122 memset(&uf_coord->extension, 0, sizeof(uf_coord->extension));
123 uf_coord->valid = 0;
126 static void validate_extended_coord(uf_coord_t *uf_coord, loff_t offset)
128 assert("vs-1333", uf_coord->valid == 0);
130 if (coord_is_between_items(&uf_coord->coord))
131 return;
133 assert("vs-1348",
134 item_plugin_by_coord(&uf_coord->coord)->s.file.
135 init_coord_extension);
137 item_body_by_coord(&uf_coord->coord);
138 item_plugin_by_coord(&uf_coord->coord)->s.file.
139 init_coord_extension(uf_coord, offset);
143 * goto_right_neighbor - lock right neighbor, drop current node lock
144 * @coord:
145 * @lh:
147 * Obtain lock on right neighbor and drop lock on current node.
149 int goto_right_neighbor(coord_t *coord, lock_handle *lh)
151 int result;
152 lock_handle lh_right;
154 assert("vs-1100", znode_is_locked(coord->node));
156 init_lh(&lh_right);
157 result = reiser4_get_right_neighbor(&lh_right, coord->node,
158 znode_is_wlocked(coord->node) ?
159 ZNODE_WRITE_LOCK : ZNODE_READ_LOCK,
160 GN_CAN_USE_UPPER_LEVELS);
161 if (result) {
162 done_lh(&lh_right);
163 return result;
167 * we hold two longterm locks on neighboring nodes. Unlock left of
168 * them
170 done_lh(lh);
172 coord_init_first_unit_nocheck(coord, lh_right.node);
173 move_lh(lh, &lh_right);
175 return 0;
180 * set_file_state
181 * @uf_info:
182 * @cbk_result:
183 * @level:
185 * This is to be used by find_file_item and in find_file_state to
186 * determine real state of file
188 static void set_file_state(struct unix_file_info *uf_info, int cbk_result,
189 tree_level level)
191 if (cbk_errored(cbk_result))
192 /* error happened in find_file_item */
193 return;
195 assert("vs-1164", level == LEAF_LEVEL || level == TWIG_LEVEL);
197 if (uf_info->container == UF_CONTAINER_UNKNOWN) {
198 if (cbk_result == CBK_COORD_NOTFOUND)
199 uf_info->container = UF_CONTAINER_EMPTY;
200 else if (level == LEAF_LEVEL)
201 uf_info->container = UF_CONTAINER_TAILS;
202 else
203 uf_info->container = UF_CONTAINER_EXTENTS;
204 } else {
206 * file state is known, check whether it is set correctly if
207 * file is not being tail converted
209 if (!reiser4_inode_get_flag(unix_file_info_to_inode(uf_info),
210 REISER4_PART_IN_CONV)) {
211 assert("vs-1162",
212 ergo(level == LEAF_LEVEL &&
213 cbk_result == CBK_COORD_FOUND,
214 uf_info->container == UF_CONTAINER_TAILS));
215 assert("vs-1165",
216 ergo(level == TWIG_LEVEL &&
217 cbk_result == CBK_COORD_FOUND,
218 uf_info->container == UF_CONTAINER_EXTENTS));
223 int find_file_item_nohint(coord_t *coord, lock_handle *lh,
224 const reiser4_key *key, znode_lock_mode lock_mode,
225 struct inode *inode)
227 return reiser4_object_lookup(inode, key, coord, lh, lock_mode,
228 FIND_MAX_NOT_MORE_THAN,
229 TWIG_LEVEL, LEAF_LEVEL,
230 (lock_mode == ZNODE_READ_LOCK) ? CBK_UNIQUE :
231 (CBK_UNIQUE | CBK_FOR_INSERT),
232 NULL /* ra_info */ );
236 * find_file_item - look for file item in the tree
237 * @hint: provides coordinate, lock handle, seal
238 * @key: key for search
239 * @mode: mode of lock to put on returned node
240 * @ra_info:
241 * @inode:
243 * This finds position in the tree corresponding to @key. It first tries to use
244 * @hint's seal if it is set.
246 int find_file_item(hint_t *hint, const reiser4_key *key,
247 znode_lock_mode lock_mode,
248 struct inode *inode)
250 int result;
251 coord_t *coord;
252 lock_handle *lh;
254 assert("nikita-3030", reiser4_schedulable());
255 assert("vs-1707", hint != NULL);
256 assert("vs-47", inode != NULL);
258 coord = &hint->ext_coord.coord;
259 lh = hint->ext_coord.lh;
260 init_lh(lh);
262 result = hint_validate(hint, key, 1 /* check key */, lock_mode);
263 if (!result) {
264 if (coord->between == AFTER_UNIT &&
265 equal_to_rdk(coord->node, key)) {
266 result = goto_right_neighbor(coord, lh);
267 if (result == -E_NO_NEIGHBOR)
268 return RETERR(-EIO);
269 if (result)
270 return result;
271 assert("vs-1152", equal_to_ldk(coord->node, key));
273 * we moved to different node. Invalidate coord
274 * extension, zload is necessary to init it again
276 hint->ext_coord.valid = 0;
279 set_file_state(unix_file_inode_data(inode), CBK_COORD_FOUND,
280 znode_get_level(coord->node));
282 return CBK_COORD_FOUND;
285 coord_init_zero(coord);
286 result = find_file_item_nohint(coord, lh, key, lock_mode, inode);
287 set_file_state(unix_file_inode_data(inode), result,
288 znode_get_level(coord->node));
290 /* FIXME: we might already have coord extension initialized */
291 hint->ext_coord.valid = 0;
292 return result;
295 /* plugin->u.file.write_flowom = NULL
296 plugin->u.file.read_flow = NULL */
298 void hint_init_zero(hint_t * hint)
300 memset(hint, 0, sizeof(*hint));
301 init_lh(&hint->lh);
302 hint->ext_coord.lh = &hint->lh;
305 static int find_file_state(struct inode *inode, struct unix_file_info *uf_info)
307 int result;
308 reiser4_key key;
309 coord_t coord;
310 lock_handle lh;
312 assert("vs-1628", ea_obtained(uf_info));
314 if (uf_info->container == UF_CONTAINER_UNKNOWN) {
315 key_by_inode_and_offset_common(inode, 0, &key);
316 init_lh(&lh);
317 result = find_file_item_nohint(&coord, &lh, &key,
318 ZNODE_READ_LOCK, inode);
319 set_file_state(uf_info, result, znode_get_level(coord.node));
320 done_lh(&lh);
321 if (!cbk_errored(result))
322 result = 0;
323 } else
324 result = 0;
325 assert("vs-1074",
326 ergo(result == 0, uf_info->container != UF_CONTAINER_UNKNOWN));
327 reiser4_txn_restart_current();
328 return result;
331 /* estimate and reserve space needed to truncate page which gets partially truncated: one block for page itself, stat
332 data update (estimate_one_insert_into_item) and one item insertion (estimate_one_insert_into_item) which may happen
333 if page corresponds to hole extent and unallocated one will have to be created */
334 static int reserve_partial_page(reiser4_tree * tree)
336 grab_space_enable();
337 return reiser4_grab_reserved(reiser4_get_current_sb(),
339 2 * estimate_one_insert_into_item(tree),
340 BA_CAN_COMMIT);
343 /* estimate and reserve space needed to cut one item and update one stat data */
344 static int reserve_cut_iteration(reiser4_tree * tree)
346 __u64 estimate = estimate_one_item_removal(tree)
347 + estimate_one_insert_into_item(tree);
349 assert("nikita-3172", lock_stack_isclean(get_current_lock_stack()));
351 grab_space_enable();
352 /* We need to double our estimate now that we can delete more than one
353 node. */
354 return reiser4_grab_reserved(reiser4_get_current_sb(), estimate * 2,
355 BA_CAN_COMMIT);
358 int reiser4_update_file_size(struct inode *inode, reiser4_key * key,
359 int update_sd)
361 int result = 0;
363 INODE_SET_SIZE(inode, get_key_offset(key));
364 if (update_sd) {
365 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
366 result = reiser4_update_sd(inode);
368 return result;
371 /* cut file items one by one starting from the last one until new file size (inode->i_size) is reached. Reserve space
372 and update file stat data on every single cut from the tree */
374 cut_file_items(struct inode *inode, loff_t new_size, int update_sd,
375 loff_t cur_size, int (*update_actor) (struct inode *,
376 reiser4_key *, int))
378 reiser4_key from_key, to_key;
379 reiser4_key smallest_removed;
380 file_plugin *fplug = inode_file_plugin(inode);
381 int result;
382 int progress = 0;
384 assert("vs-1248",
385 fplug == file_plugin_by_id(UNIX_FILE_PLUGIN_ID) ||
386 fplug == file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
388 fplug->key_by_inode(inode, new_size, &from_key);
389 to_key = from_key;
390 set_key_offset(&to_key, cur_size - 1 /*get_key_offset(reiser4_max_key()) */ );
391 /* this loop normally runs just once */
392 while (1) {
393 result = reserve_cut_iteration(reiser4_tree_by_inode(inode));
394 if (result)
395 break;
397 result = reiser4_cut_tree_object(current_tree, &from_key, &to_key,
398 &smallest_removed, inode, 1,
399 &progress);
400 if (result == -E_REPEAT) {
401 /* -E_REPEAT is a signal to interrupt a long file truncation process */
402 if (progress) {
403 result =
404 update_actor(inode, &smallest_removed,
405 update_sd);
406 if (result)
407 break;
410 /* the below does up(sbinfo->delete_mutex). Do not get folled */
411 reiser4_release_reserved(inode->i_sb);
413 /* reiser4_cut_tree_object() was interrupted probably because
414 * current atom requires commit, we have to release
415 * transaction handle to allow atom commit. */
416 reiser4_txn_restart_current();
417 continue;
419 if (result
420 && !(result == CBK_COORD_NOTFOUND && new_size == 0
421 && inode->i_size == 0))
422 break;
424 set_key_offset(&smallest_removed, new_size);
425 /* Final sd update after the file gets its correct size */
426 result = update_actor(inode, &smallest_removed, update_sd);
427 break;
430 /* the below does up(sbinfo->delete_mutex). Do not get folled */
431 reiser4_release_reserved(inode->i_sb);
433 return result;
436 int find_or_create_extent(struct page *page);
438 /* part of truncate_file_body: it is called when truncate is used to make file
439 shorter */
440 static int shorten_file(struct inode *inode, loff_t new_size)
442 int result;
443 struct page *page;
444 int padd_from;
445 unsigned long index;
446 struct unix_file_info *uf_info;
449 * all items of ordinary reiser4 file are grouped together. That is why
450 * we can use reiser4_cut_tree. Plan B files (for instance) can not be
451 * truncated that simply
453 result = cut_file_items(inode, new_size, 1 /*update_sd */ ,
454 get_key_offset(reiser4_max_key()),
455 reiser4_update_file_size);
456 if (result)
457 return result;
459 uf_info = unix_file_inode_data(inode);
460 assert("vs-1105", new_size == inode->i_size);
461 if (new_size == 0) {
462 uf_info->container = UF_CONTAINER_EMPTY;
463 return 0;
466 result = find_file_state(inode, uf_info);
467 if (result)
468 return result;
469 if (uf_info->container == UF_CONTAINER_TAILS)
471 * No need to worry about zeroing last page after new file
472 * end
474 return 0;
476 padd_from = inode->i_size & (PAGE_CACHE_SIZE - 1);
477 if (!padd_from)
478 /* file is truncated to page boundary */
479 return 0;
481 result = reserve_partial_page(reiser4_tree_by_inode(inode));
482 if (result) {
483 reiser4_release_reserved(inode->i_sb);
484 return result;
487 /* last page is partially truncated - zero its content */
488 index = (inode->i_size >> PAGE_CACHE_SHIFT);
489 page = read_mapping_page(inode->i_mapping, index, NULL);
490 if (IS_ERR(page)) {
492 * the below does up(sbinfo->delete_mutex). Do not get
493 * confused
495 reiser4_release_reserved(inode->i_sb);
496 if (likely(PTR_ERR(page) == -EINVAL)) {
497 /* looks like file is built of tail items */
498 return 0;
500 return PTR_ERR(page);
502 wait_on_page_locked(page);
503 if (!PageUptodate(page)) {
504 page_cache_release(page);
506 * the below does up(sbinfo->delete_mutex). Do not get
507 * confused
509 reiser4_release_reserved(inode->i_sb);
510 return RETERR(-EIO);
514 * if page correspons to hole extent unit - unallocated one will be
515 * created here. This is not necessary
517 result = find_or_create_extent(page);
520 * FIXME: cut_file_items has already updated inode. Probably it would
521 * be better to update it here when file is really truncated
523 if (result) {
524 page_cache_release(page);
526 * the below does up(sbinfo->delete_mutex). Do not get
527 * confused
529 reiser4_release_reserved(inode->i_sb);
530 return result;
533 lock_page(page);
534 assert("vs-1066", PageLocked(page));
535 zero_user_segment(page, padd_from, PAGE_CACHE_SIZE);
536 unlock_page(page);
537 page_cache_release(page);
538 /* the below does up(sbinfo->delete_mutex). Do not get confused */
539 reiser4_release_reserved(inode->i_sb);
540 return 0;
544 * should_have_notail
545 * @uf_info:
546 * @new_size:
548 * Calls formatting plugin to see whether file of size @new_size has to be
549 * stored in unformatted nodes or in tail items. 0 is returned for later case.
551 static int should_have_notail(const struct unix_file_info *uf_info, loff_t new_size)
553 if (!uf_info->tplug)
554 return 1;
555 return !uf_info->tplug->have_tail(unix_file_info_to_inode(uf_info),
556 new_size);
561 * truncate_file_body - change length of file
562 * @inode: inode of file
563 * @new_size: new file length
565 * Adjusts items file @inode is built of to match @new_size. It may either cut
566 * items or add them to represent a hole at the end of file. The caller has to
567 * obtain exclusive access to the file.
569 static int truncate_file_body(struct inode *inode, struct iattr *attr)
571 int result;
572 loff_t new_size = attr->ia_size;
574 if (inode->i_size < new_size) {
575 /* expanding truncate */
576 struct file * file = attr->ia_file;
577 struct unix_file_info *uf_info = unix_file_inode_data(inode);
579 assert("edward-1532", attr->ia_valid & ATTR_FILE);
581 result = find_file_state(inode, uf_info);
582 if (result)
583 return result;
585 if (should_have_notail(uf_info, new_size)) {
587 * file of size @new_size has to be built of
588 * extents. If it is built of tails - convert to
589 * extents
591 if (uf_info->container == UF_CONTAINER_TAILS) {
593 * if file is being convered by another process
594 * - wait until it completes
596 while (1) {
597 if (reiser4_inode_get_flag(inode,
598 REISER4_PART_IN_CONV)) {
599 drop_exclusive_access(uf_info);
600 schedule();
601 get_exclusive_access(uf_info);
602 continue;
604 break;
607 if (uf_info->container == UF_CONTAINER_TAILS) {
608 result = tail2extent(uf_info);
609 if (result)
610 return result;
613 result = reiser4_write_extent(file, NULL, 0,
614 &new_size);
615 if (result)
616 return result;
617 uf_info->container = UF_CONTAINER_EXTENTS;
618 } else {
619 if (uf_info->container == UF_CONTAINER_EXTENTS) {
620 result = reiser4_write_extent(file, NULL, 0,
621 &new_size);
622 if (result)
623 return result;
624 } else {
625 result = reiser4_write_tail(file, NULL, 0,
626 &new_size);
627 if (result)
628 return result;
629 uf_info->container = UF_CONTAINER_TAILS;
632 BUG_ON(result > 0);
633 INODE_SET_FIELD(inode, i_size, new_size);
634 file_update_time(file);
635 result = reiser4_update_sd(inode);
636 BUG_ON(result != 0);
637 reiser4_free_file_fsdata(file);
638 } else
639 result = shorten_file(inode, new_size);
640 return result;
643 /* plugin->u.write_sd_by_inode = write_sd_by_inode_common */
646 * load_file_hint - copy hint from struct file to local variable
647 * @file: file to get hint from
648 * @hint: structure to fill
650 * Reiser4 specific portion of struct file may contain information (hint)
651 * stored on exiting from previous read or write. That information includes
652 * seal of znode and coord within that znode where previous read or write
653 * stopped. This function copies that information to @hint if it was stored or
654 * initializes @hint by 0s otherwise.
656 int load_file_hint(struct file *file, hint_t *hint)
658 reiser4_file_fsdata *fsdata;
660 if (file) {
661 fsdata = reiser4_get_file_fsdata(file);
662 if (IS_ERR(fsdata))
663 return PTR_ERR(fsdata);
665 spin_lock_inode(file->f_dentry->d_inode);
666 if (reiser4_seal_is_set(&fsdata->reg.hint.seal)) {
667 *hint = fsdata->reg.hint;
668 init_lh(&hint->lh);
669 hint->ext_coord.lh = &hint->lh;
670 spin_unlock_inode(file->f_dentry->d_inode);
672 * force re-validation of the coord on the first
673 * iteration of the read/write loop.
675 hint->ext_coord.valid = 0;
676 assert("nikita-19892", coords_equal(&hint->seal.coord1,
677 &hint->ext_coord.
678 coord));
679 return 0;
681 memset(&fsdata->reg.hint, 0, sizeof(hint_t));
682 spin_unlock_inode(file->f_dentry->d_inode);
684 hint_init_zero(hint);
685 return 0;
689 * save_file_hint - copy hint to reiser4 private struct file's part
690 * @file: file to save hint in
691 * @hint: hint to save
693 * This copies @hint to reiser4 private part of struct file. It can help
694 * speedup future accesses to the file.
696 void save_file_hint(struct file *file, const hint_t *hint)
698 reiser4_file_fsdata *fsdata;
700 assert("edward-1337", hint != NULL);
702 if (!file || !reiser4_seal_is_set(&hint->seal))
703 return;
704 fsdata = reiser4_get_file_fsdata(file);
705 assert("vs-965", !IS_ERR(fsdata));
706 assert("nikita-19891",
707 coords_equal(&hint->seal.coord1, &hint->ext_coord.coord));
708 assert("vs-30", hint->lh.owner == NULL);
709 spin_lock_inode(file->f_dentry->d_inode);
710 fsdata->reg.hint = *hint;
711 spin_unlock_inode(file->f_dentry->d_inode);
712 return;
715 void reiser4_unset_hint(hint_t * hint)
717 assert("vs-1315", hint);
718 hint->ext_coord.valid = 0;
719 reiser4_seal_done(&hint->seal);
720 done_lh(&hint->lh);
723 /* coord must be set properly. So, that reiser4_set_hint
724 has nothing to do */
725 void reiser4_set_hint(hint_t * hint, const reiser4_key * key,
726 znode_lock_mode mode)
728 ON_DEBUG(coord_t * coord = &hint->ext_coord.coord);
729 assert("vs-1207", WITH_DATA(coord->node, check_coord(coord, key)));
731 reiser4_seal_init(&hint->seal, &hint->ext_coord.coord, key);
732 hint->offset = get_key_offset(key);
733 hint->mode = mode;
734 done_lh(&hint->lh);
737 int hint_is_set(const hint_t * hint)
739 return reiser4_seal_is_set(&hint->seal);
742 #if REISER4_DEBUG
743 static int all_but_offset_key_eq(const reiser4_key * k1, const reiser4_key * k2)
745 return (get_key_locality(k1) == get_key_locality(k2) &&
746 get_key_type(k1) == get_key_type(k2) &&
747 get_key_band(k1) == get_key_band(k2) &&
748 get_key_ordering(k1) == get_key_ordering(k2) &&
749 get_key_objectid(k1) == get_key_objectid(k2));
751 #endif
753 static int
754 hint_validate(hint_t * hint, const reiser4_key * key, int check_key,
755 znode_lock_mode lock_mode)
757 if (!hint || !hint_is_set(hint) || hint->mode != lock_mode)
758 /* hint either not set or set by different operation */
759 return RETERR(-E_REPEAT);
761 assert("vs-1277", all_but_offset_key_eq(key, &hint->seal.key));
763 if (check_key && get_key_offset(key) != hint->offset)
764 /* hint is set for different key */
765 return RETERR(-E_REPEAT);
767 assert("vs-31", hint->ext_coord.lh == &hint->lh);
768 return reiser4_seal_validate(&hint->seal, &hint->ext_coord.coord, key,
769 hint->ext_coord.lh, lock_mode,
770 ZNODE_LOCK_LOPRI);
774 * find_or_create_extent -
775 * @page:
779 /* look for place at twig level for extent corresponding to page, call extent's writepage method to create
780 unallocated extent if it does not exist yet, initialize jnode, capture page */
781 int find_or_create_extent(struct page *page)
783 int result;
784 struct inode *inode;
785 int plugged_hole;
787 jnode *node;
789 assert("vs-1065", page->mapping && page->mapping->host);
790 inode = page->mapping->host;
792 lock_page(page);
793 node = jnode_of_page(page);
794 if (IS_ERR(node)) {
795 unlock_page(page);
796 return PTR_ERR(node);
798 JF_SET(node, JNODE_WRITE_PREPARED);
799 unlock_page(page);
801 if (node->blocknr == 0) {
802 plugged_hole = 0;
803 result = reiser4_update_extent(inode, node, page_offset(page),
804 &plugged_hole);
805 if (result) {
806 JF_CLR(node, JNODE_WRITE_PREPARED);
807 jput(node);
808 warning("", "reiser4_update_extent failed: %d", result);
809 return result;
811 if (plugged_hole)
812 reiser4_update_sd(inode);
813 } else {
814 spin_lock_jnode(node);
815 result = reiser4_try_capture(node, ZNODE_WRITE_LOCK, 0);
816 BUG_ON(result != 0);
817 jnode_make_dirty_locked(node);
818 spin_unlock_jnode(node);
821 BUG_ON(node->atom == NULL);
822 JF_CLR(node, JNODE_WRITE_PREPARED);
823 jput(node);
825 if (get_current_context()->entd) {
826 entd_context *ent = get_entd_context(node->tree->super);
828 if (ent->cur_request->page == page)
829 ent->cur_request->node = node;
831 return 0;
835 * has_anonymous_pages - check whether inode has pages dirtied via mmap
836 * @inode: inode to check
838 * Returns true if inode's mapping has dirty pages which do not belong to any
839 * atom. Those are either tagged PAGECACHE_TAG_REISER4_MOVED in mapping's page
840 * tree or were eflushed and can be found via jnodes tagged
841 * EFLUSH_TAG_ANONYMOUS in radix tree of jnodes.
843 static int has_anonymous_pages(struct inode *inode)
845 int result;
847 read_lock_irq(&inode->i_mapping->tree_lock);
848 result = radix_tree_tagged(&inode->i_mapping->page_tree, PAGECACHE_TAG_REISER4_MOVED);
849 read_unlock_irq(&inode->i_mapping->tree_lock);
850 return result;
854 * capture_page_and_create_extent -
855 * @page: page to be captured
857 * Grabs space for extent creation and stat data update and calls function to
858 * do actual work.
860 static int capture_page_and_create_extent(struct page *page)
862 int result;
863 struct inode *inode;
865 assert("vs-1084", page->mapping && page->mapping->host);
866 inode = page->mapping->host;
867 assert("vs-1139",
868 unix_file_inode_data(inode)->container == UF_CONTAINER_EXTENTS);
869 /* page belongs to file */
870 assert("vs-1393",
871 inode->i_size > page_offset(page));
873 /* page capture may require extent creation (if it does not exist yet)
874 and stat data's update (number of blocks changes on extent
875 creation) */
876 grab_space_enable();
877 result = reiser4_grab_space(2 * estimate_one_insert_into_item
878 (reiser4_tree_by_inode(inode)),
879 BA_CAN_COMMIT);
880 if (likely(!result))
881 result = find_or_create_extent(page);
883 if (result != 0)
884 SetPageError(page);
885 return result;
888 /* this is implementation of method commit_write of struct
889 address_space_operations for unix file plugin */
891 commit_write_unix_file(struct file *file, struct page *page,
892 unsigned from, unsigned to)
894 reiser4_context *ctx;
895 struct inode *inode;
896 int result;
898 assert("umka-3101", file != NULL);
899 assert("umka-3102", page != NULL);
900 assert("umka-3093", PageLocked(page));
902 SetPageUptodate(page);
904 inode = page->mapping->host;
905 ctx = reiser4_init_context(page->mapping->host->i_sb);
906 if (IS_ERR(ctx))
907 return PTR_ERR(ctx);
908 page_cache_get(page);
909 unlock_page(page);
910 result = capture_page_and_create_extent(page);
911 lock_page(page);
912 page_cache_release(page);
914 /* don't commit transaction under inode semaphore */
915 context_set_commit_async(ctx);
916 reiser4_exit_context(ctx);
917 return result;
921 * Support for "anonymous" pages and jnodes.
923 * When file is write-accessed through mmap pages can be dirtied from the user
924 * level. In this case kernel is not notified until one of following happens:
926 * (1) msync()
928 * (2) truncate() (either explicit or through unlink)
930 * (3) VM scanner starts reclaiming mapped pages, dirtying them before
931 * starting write-back.
933 * As a result of (3) ->writepage may be called on a dirty page without
934 * jnode. Such page is called "anonymous" in reiser4. Certain work-loads
935 * (iozone) generate huge number of anonymous pages. Emergency flush handles
936 * this situation by creating jnode for anonymous page, starting IO on the
937 * page, and marking jnode with JNODE_KEEPME bit so that it's not thrown out of
938 * memory. Such jnode is also called anonymous.
940 * reiser4_sync_sb() method tries to insert anonymous pages and jnodes into
941 * tree. This is done by capture_anonymous_*() functions below.
945 * capture_anonymous_page - involve page into transaction
946 * @pg: page to deal with
948 * Takes care that @page has corresponding metadata in the tree, creates jnode
949 * for @page and captures it. On success 1 is returned.
951 static int capture_anonymous_page(struct page *page)
953 int result;
955 if (PageWriteback(page))
956 /* FIXME: do nothing? */
957 return 0;
959 result = capture_page_and_create_extent(page);
960 if (result == 0) {
961 result = 1;
962 } else
963 warning("nikita-3329",
964 "Cannot capture anon page: %i", result);
966 return result;
970 * capture_anonymous_pages - find and capture pages dirtied via mmap
971 * @mapping: address space where to look for pages
972 * @index: start index
973 * @to_capture: maximum number of pages to capture
975 * Looks for pages tagged REISER4_MOVED starting from the *@index-th page,
976 * captures (involves into atom) them, returns number of captured pages,
977 * updates @index to next page after the last captured one.
979 static int
980 capture_anonymous_pages(struct address_space *mapping, pgoff_t *index,
981 unsigned int to_capture)
983 int result;
984 struct pagevec pvec;
985 unsigned int i, count;
986 int nr;
988 pagevec_init(&pvec, 0);
989 count = min(pagevec_space(&pvec), to_capture);
990 nr = 0;
992 /* find pages tagged MOVED */
993 write_lock_irq(&mapping->tree_lock);
994 pvec.nr = radix_tree_gang_lookup_tag(&mapping->page_tree,
995 (void **)pvec.pages, *index, count,
996 PAGECACHE_TAG_REISER4_MOVED);
997 if (pagevec_count(&pvec) == 0) {
999 * there are no pages tagged MOVED in mapping->page_tree
1000 * starting from *index
1002 write_unlock_irq(&mapping->tree_lock);
1003 *index = (pgoff_t)-1;
1004 return 0;
1007 /* clear MOVED tag for all found pages */
1008 for (i = 0; i < pagevec_count(&pvec); i++) {
1009 void *p;
1011 page_cache_get(pvec.pages[i]);
1012 p = radix_tree_tag_clear(&mapping->page_tree, pvec.pages[i]->index,
1013 PAGECACHE_TAG_REISER4_MOVED);
1014 assert("vs-49", p == pvec.pages[i]);
1016 write_unlock_irq(&mapping->tree_lock);
1019 *index = pvec.pages[i - 1]->index + 1;
1021 for (i = 0; i < pagevec_count(&pvec); i++) {
1023 * tag PAGECACHE_TAG_REISER4_MOVED will be cleared by
1024 * reiser4_set_page_dirty_internal which is called when jnode is
1025 * captured
1027 result = capture_anonymous_page(pvec.pages[i]);
1028 if (result == 1)
1029 nr++;
1030 else {
1031 if (result < 0) {
1032 warning("vs-1454",
1033 "failed to capture page: "
1034 "result=%d, captured=%d)\n",
1035 result, i);
1038 * set MOVED tag to all pages which left not
1039 * captured
1041 write_lock_irq(&mapping->tree_lock);
1042 for (; i < pagevec_count(&pvec); i ++) {
1043 radix_tree_tag_set(&mapping->page_tree,
1044 pvec.pages[i]->index,
1045 PAGECACHE_TAG_REISER4_MOVED);
1047 write_unlock_irq(&mapping->tree_lock);
1049 pagevec_release(&pvec);
1050 return result;
1051 } else {
1053 * result == 0. capture_anonymous_page returns
1054 * 0 for Writeback-ed page. Set MOVED tag on
1055 * that page
1057 write_lock_irq(&mapping->tree_lock);
1058 radix_tree_tag_set(&mapping->page_tree,
1059 pvec.pages[i]->index,
1060 PAGECACHE_TAG_REISER4_MOVED);
1061 write_unlock_irq(&mapping->tree_lock);
1062 if (i == 0)
1063 *index = pvec.pages[0]->index;
1064 else
1065 *index = pvec.pages[i - 1]->index + 1;
1069 pagevec_release(&pvec);
1070 return nr;
1074 * capture_anonymous_jnodes - find and capture anonymous jnodes
1075 * @mapping: address space where to look for jnodes
1076 * @from: start index
1077 * @to: end index
1078 * @to_capture: maximum number of jnodes to capture
1080 * Looks for jnodes tagged EFLUSH_TAG_ANONYMOUS in inode's tree of jnodes in
1081 * the range of indexes @from-@to and captures them, returns number of captured
1082 * jnodes, updates @from to next jnode after the last captured one.
1084 static int
1085 capture_anonymous_jnodes(struct address_space *mapping,
1086 pgoff_t *from, pgoff_t to, int to_capture)
1088 *from = to;
1089 return 0;
1093 * Commit atom of the jnode of a page.
1095 static int sync_page(struct page *page)
1097 int result;
1098 do {
1099 jnode *node;
1100 txn_atom *atom;
1102 lock_page(page);
1103 node = jprivate(page);
1104 if (node != NULL) {
1105 spin_lock_jnode(node);
1106 atom = jnode_get_atom(node);
1107 spin_unlock_jnode(node);
1108 } else
1109 atom = NULL;
1110 unlock_page(page);
1111 result = reiser4_sync_atom(atom);
1112 } while (result == -E_REPEAT);
1114 * ZAM-FIXME-HANS: document the logic of this loop, is it just to
1115 * handle the case where more pages get added to the atom while we are
1116 * syncing it?
1118 assert("nikita-3485", ergo(result == 0,
1119 get_current_context()->trans->atom == NULL));
1120 return result;
1124 * Commit atoms of pages on @pages list.
1125 * call sync_page for each page from mapping's page tree
1127 static int sync_page_list(struct inode *inode)
1129 int result;
1130 struct address_space *mapping;
1131 unsigned long from; /* start index for radix_tree_gang_lookup */
1132 unsigned int found; /* return value for radix_tree_gang_lookup */
1134 mapping = inode->i_mapping;
1135 from = 0;
1136 result = 0;
1137 read_lock_irq(&mapping->tree_lock);
1138 while (result == 0) {
1139 struct page *page;
1141 found =
1142 radix_tree_gang_lookup(&mapping->page_tree, (void **)&page,
1143 from, 1);
1144 assert("", found < 2);
1145 if (found == 0)
1146 break;
1148 /* page may not leave radix tree because it is protected from truncating by inode->i_mutex locked by
1149 sys_fsync */
1150 page_cache_get(page);
1151 read_unlock_irq(&mapping->tree_lock);
1153 from = page->index + 1;
1155 result = sync_page(page);
1157 page_cache_release(page);
1158 read_lock_irq(&mapping->tree_lock);
1161 read_unlock_irq(&mapping->tree_lock);
1162 return result;
1165 static int commit_file_atoms(struct inode *inode)
1167 int result;
1168 struct unix_file_info *uf_info;
1170 uf_info = unix_file_inode_data(inode);
1172 get_exclusive_access(uf_info);
1174 * find what items file is made from
1176 result = find_file_state(inode, uf_info);
1177 drop_exclusive_access(uf_info);
1178 if (result != 0)
1179 return result;
1182 * file state cannot change because we are under ->i_mutex
1184 switch (uf_info->container) {
1185 case UF_CONTAINER_EXTENTS:
1186 /* find_file_state might open join an atom */
1187 reiser4_txn_restart_current();
1188 result =
1190 * when we are called by
1191 * filemap_fdatawrite->
1192 * do_writepages()->
1193 * reiser4_writepages()
1195 * inode->i_mapping->dirty_pages are spices into
1196 * ->io_pages, leaving ->dirty_pages dirty.
1198 * When we are called from
1199 * reiser4_fsync()->sync_unix_file(), we have to
1200 * commit atoms of all pages on the ->dirty_list.
1202 * So for simplicity we just commit ->io_pages and
1203 * ->dirty_pages.
1205 sync_page_list(inode);
1206 break;
1207 case UF_CONTAINER_TAILS:
1209 * NOTE-NIKITA probably we can be smarter for tails. For now
1210 * just commit all existing atoms.
1212 result = txnmgr_force_commit_all(inode->i_sb, 0);
1213 break;
1214 case UF_CONTAINER_EMPTY:
1215 result = 0;
1216 break;
1217 case UF_CONTAINER_UNKNOWN:
1218 default:
1219 result = -EIO;
1220 break;
1224 * commit current transaction: there can be captured nodes from
1225 * find_file_state() and finish_conversion().
1227 reiser4_txn_restart_current();
1228 return result;
1232 * writepages_unix_file - writepages of struct address_space_operations
1233 * @mapping:
1234 * @wbc:
1236 * This captures anonymous pages and anonymous jnodes. Anonymous pages are
1237 * pages which are dirtied via mmapping. Anonymous jnodes are ones which were
1238 * created by reiser4_writepage.
1240 int writepages_unix_file(struct address_space *mapping,
1241 struct writeback_control *wbc)
1243 int result;
1244 struct unix_file_info *uf_info;
1245 pgoff_t pindex, jindex, nr_pages;
1246 long to_capture;
1247 struct inode *inode;
1249 inode = mapping->host;
1250 if (!has_anonymous_pages(inode)) {
1251 result = 0;
1252 goto end;
1254 jindex = pindex = wbc->range_start >> PAGE_CACHE_SHIFT;
1255 result = 0;
1256 nr_pages = size_in_pages(i_size_read(inode));
1258 uf_info = unix_file_inode_data(inode);
1260 do {
1261 reiser4_context *ctx;
1263 if (wbc->sync_mode != WB_SYNC_ALL)
1264 to_capture = min(wbc->nr_to_write, CAPTURE_APAGE_BURST);
1265 else
1266 to_capture = CAPTURE_APAGE_BURST;
1268 ctx = reiser4_init_context(inode->i_sb);
1269 if (IS_ERR(ctx)) {
1270 result = PTR_ERR(ctx);
1271 break;
1273 /* avoid recursive calls to ->sync_inodes */
1274 ctx->nobalance = 1;
1275 assert("zam-760", lock_stack_isclean(get_current_lock_stack()));
1276 assert("", LOCK_CNT_NIL(inode_sem_w));
1277 assert("", LOCK_CNT_NIL(inode_sem_r));
1279 reiser4_txn_restart_current();
1281 /* we have to get nonexclusive access to the file */
1282 if (get_current_context()->entd) {
1284 * use nonblocking version of nonexclusive_access to
1285 * avoid deadlock which might look like the following:
1286 * process P1 holds NEA on file F1 and called entd to
1287 * reclaim some memory. Entd works for P1 and is going
1288 * to capture pages of file F2. To do that entd has to
1289 * get NEA to F2. F2 is held by process P2 which also
1290 * called entd. But entd is serving P1 at the moment
1291 * and P2 has to wait. Process P3 trying to get EA to
1292 * file F2. Existence of pending EA request to file F2
1293 * makes impossible for entd to get NEA to file
1294 * F2. Neither of these process can continue. Using
1295 * nonblocking version of gettign NEA is supposed to
1296 * avoid this deadlock.
1298 if (try_to_get_nonexclusive_access(uf_info) == 0) {
1299 result = RETERR(-EBUSY);
1300 reiser4_exit_context(ctx);
1301 break;
1303 } else
1304 get_nonexclusive_access(uf_info);
1306 while (to_capture > 0) {
1307 pgoff_t start;
1309 assert("vs-1727", jindex <= pindex);
1310 if (pindex == jindex) {
1311 start = pindex;
1312 result =
1313 capture_anonymous_pages(inode->i_mapping,
1314 &pindex,
1315 to_capture);
1316 if (result <= 0)
1317 break;
1318 to_capture -= result;
1319 wbc->nr_to_write -= result;
1320 if (start + result == pindex) {
1321 jindex = pindex;
1322 continue;
1324 if (to_capture <= 0)
1325 break;
1327 /* deal with anonymous jnodes between jindex and pindex */
1328 result =
1329 capture_anonymous_jnodes(inode->i_mapping, &jindex,
1330 pindex, to_capture);
1331 if (result < 0)
1332 break;
1333 to_capture -= result;
1334 get_current_context()->nr_captured += result;
1336 if (jindex == (pgoff_t) - 1) {
1337 assert("vs-1728", pindex == (pgoff_t) - 1);
1338 break;
1341 if (to_capture <= 0)
1342 /* there may be left more pages */
1343 __mark_inode_dirty(inode, I_DIRTY_PAGES);
1345 drop_nonexclusive_access(uf_info);
1346 if (result < 0) {
1347 /* error happened */
1348 reiser4_exit_context(ctx);
1349 return result;
1351 if (wbc->sync_mode != WB_SYNC_ALL) {
1352 reiser4_exit_context(ctx);
1353 return 0;
1355 result = commit_file_atoms(inode);
1356 reiser4_exit_context(ctx);
1357 if (pindex >= nr_pages && jindex == pindex)
1358 break;
1359 } while (1);
1361 end:
1362 if (is_in_reiser4_context()) {
1363 if (get_current_context()->nr_captured >= CAPTURE_APAGE_BURST) {
1365 * there are already pages to flush, flush them out, do
1366 * not delay until end of reiser4_sync_inodes
1368 reiser4_writeout(inode->i_sb, wbc);
1369 get_current_context()->nr_captured = 0;
1372 return result;
1376 * ->sync() method for unix file.
1378 * We are trying to be smart here. Instead of committing all atoms (original
1379 * solution), we scan dirty pages of this file and commit all atoms they are
1380 * part of.
1382 * Situation is complicated by anonymous pages: i.e., extent-less pages
1383 * dirtied through mmap. Fortunately sys_fsync() first calls
1384 * filemap_fdatawrite() that will ultimately call reiser4_writepages(), insert
1385 * all missing extents and capture anonymous pages.
1387 int sync_unix_file(struct file *file, struct dentry *dentry, int datasync)
1389 reiser4_context *ctx;
1390 txn_atom *atom;
1391 reiser4_block_nr reserve;
1393 ctx = reiser4_init_context(dentry->d_inode->i_sb);
1394 if (IS_ERR(ctx))
1395 return PTR_ERR(ctx);
1397 reserve = estimate_update_common(dentry->d_inode);
1398 if (reiser4_grab_space(reserve, BA_CAN_COMMIT)) {
1399 reiser4_exit_context(ctx);
1400 return RETERR(-ENOSPC);
1402 write_sd_by_inode_common(dentry->d_inode);
1404 atom = get_current_atom_locked();
1405 spin_lock_txnh(ctx->trans);
1406 force_commit_atom(ctx->trans);
1407 reiser4_exit_context(ctx);
1408 return 0;
1412 * readpage_unix_file_nolock - readpage of struct address_space_operations
1413 * @file:
1414 * @page:
1416 * Compose a key and search for item containing information about @page
1417 * data. If item is found - its readpage method is called.
1419 int readpage_unix_file(struct file *file, struct page *page)
1421 reiser4_context *ctx;
1422 int result;
1423 struct inode *inode;
1424 reiser4_key key;
1425 item_plugin *iplug;
1426 hint_t *hint;
1427 lock_handle *lh;
1428 coord_t *coord;
1430 assert("vs-1062", PageLocked(page));
1431 assert("vs-976", !PageUptodate(page));
1432 assert("vs-1061", page->mapping && page->mapping->host);
1434 if (page->mapping->host->i_size <= page_offset(page)) {
1435 /* page is out of file */
1436 zero_user(page, 0, PAGE_CACHE_SIZE);
1437 SetPageUptodate(page);
1438 unlock_page(page);
1439 return 0;
1442 inode = page->mapping->host;
1443 ctx = reiser4_init_context(inode->i_sb);
1444 if (IS_ERR(ctx)) {
1445 unlock_page(page);
1446 return PTR_ERR(ctx);
1449 hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
1450 if (hint == NULL) {
1451 unlock_page(page);
1452 reiser4_exit_context(ctx);
1453 return RETERR(-ENOMEM);
1456 result = load_file_hint(file, hint);
1457 if (result) {
1458 kfree(hint);
1459 unlock_page(page);
1460 reiser4_exit_context(ctx);
1461 return result;
1463 lh = &hint->lh;
1465 /* get key of first byte of the page */
1466 key_by_inode_and_offset_common(inode, page_offset(page), &key);
1468 /* look for file metadata corresponding to first byte of page */
1469 page_cache_get(page);
1470 unlock_page(page);
1471 result = find_file_item(hint, &key, ZNODE_READ_LOCK, inode);
1472 lock_page(page);
1473 page_cache_release(page);
1475 if (page->mapping == NULL) {
1477 * readpage allows truncate to run concurrently. Page was
1478 * truncated while it was not locked
1480 done_lh(lh);
1481 kfree(hint);
1482 unlock_page(page);
1483 reiser4_txn_restart(ctx);
1484 reiser4_exit_context(ctx);
1485 return -EINVAL;
1488 if (result != CBK_COORD_FOUND || hint->ext_coord.coord.between != AT_UNIT) {
1489 if (result == CBK_COORD_FOUND &&
1490 hint->ext_coord.coord.between != AT_UNIT)
1491 /* file is truncated */
1492 result = -EINVAL;
1493 done_lh(lh);
1494 kfree(hint);
1495 unlock_page(page);
1496 reiser4_txn_restart(ctx);
1497 reiser4_exit_context(ctx);
1498 return result;
1502 * item corresponding to page is found. It can not be removed because
1503 * znode lock is held
1505 if (PageUptodate(page)) {
1506 done_lh(lh);
1507 kfree(hint);
1508 unlock_page(page);
1509 reiser4_txn_restart(ctx);
1510 reiser4_exit_context(ctx);
1511 return 0;
1514 coord = &hint->ext_coord.coord;
1515 result = zload(coord->node);
1516 if (result) {
1517 done_lh(lh);
1518 kfree(hint);
1519 unlock_page(page);
1520 reiser4_txn_restart(ctx);
1521 reiser4_exit_context(ctx);
1522 return result;
1525 validate_extended_coord(&hint->ext_coord, page_offset(page));
1527 if (!coord_is_existing_unit(coord)) {
1528 /* this indicates corruption */
1529 warning("vs-280",
1530 "Looking for page %lu of file %llu (size %lli). "
1531 "No file items found (%d). File is corrupted?\n",
1532 page->index, (unsigned long long)get_inode_oid(inode),
1533 inode->i_size, result);
1534 zrelse(coord->node);
1535 done_lh(lh);
1536 kfree(hint);
1537 unlock_page(page);
1538 reiser4_txn_restart(ctx);
1539 reiser4_exit_context(ctx);
1540 return RETERR(-EIO);
1544 * get plugin of found item or use plugin if extent if there are no
1545 * one
1547 iplug = item_plugin_by_coord(coord);
1548 if (iplug->s.file.readpage)
1549 result = iplug->s.file.readpage(coord, page);
1550 else
1551 result = RETERR(-EINVAL);
1553 if (!result) {
1554 set_key_offset(&key,
1555 (loff_t) (page->index + 1) << PAGE_CACHE_SHIFT);
1556 /* FIXME should call reiser4_set_hint() */
1557 reiser4_unset_hint(hint);
1558 } else {
1559 unlock_page(page);
1560 reiser4_unset_hint(hint);
1562 assert("vs-979",
1563 ergo(result == 0, (PageLocked(page) || PageUptodate(page))));
1564 assert("vs-9791", ergo(result != 0, !PageLocked(page)));
1566 zrelse(coord->node);
1567 done_lh(lh);
1569 save_file_hint(file, hint);
1570 kfree(hint);
1573 * FIXME: explain why it is needed. HINT: page allocation in write can
1574 * not be done when atom is not NULL because reiser4_writepage can not
1575 * kick entd and have to eflush
1577 reiser4_txn_restart(ctx);
1578 reiser4_exit_context(ctx);
1579 return result;
1582 struct uf_readpages_context {
1583 lock_handle lh;
1584 coord_t coord;
1587 /* A callback function for readpages_unix_file/read_cache_pages.
1588 * If the file is build of tails, then return error (-ENOENT).
1590 * @data -- a pointer to reiser4_readpages_context object,
1591 * to save the twig lock and the coord between
1592 * read_cache_page iterations.
1593 * @page -- page to start read.
1595 static int uf_readpages_filler(void * data, struct page * page)
1597 struct uf_readpages_context *rc = data;
1598 jnode * node;
1599 int ret = 0;
1600 reiser4_extent *ext;
1601 __u64 ext_index;
1602 int cbk_done = 0;
1603 struct address_space * mapping = page->mapping;
1605 if (PageUptodate(page)) {
1606 unlock_page(page);
1607 return 0;
1609 page_cache_get(page);
1611 if (rc->lh.node == 0) {
1612 /* no twig lock - have to do tree search. */
1613 reiser4_key key;
1614 repeat:
1615 unlock_page(page);
1616 key_by_inode_and_offset_common(
1617 mapping->host, page_offset(page), &key);
1618 ret = coord_by_key(
1619 &get_super_private(mapping->host->i_sb)->tree,
1620 &key, &rc->coord, &rc->lh,
1621 ZNODE_READ_LOCK, FIND_EXACT,
1622 TWIG_LEVEL, TWIG_LEVEL, CBK_UNIQUE, NULL);
1623 if (unlikely(ret))
1624 goto exit;
1625 lock_page(page);
1626 if (PageUptodate(page))
1627 goto unlock;
1628 cbk_done = 1;
1630 ret = zload(rc->coord.node);
1631 if (unlikely(ret))
1632 goto unlock;
1633 if (!coord_is_existing_item(&rc->coord) ||
1634 !item_is_extent(&rc->coord)) {
1635 zrelse(rc->coord.node);
1636 ret = RETERR(-EIO);
1637 goto unlock;
1639 ext = extent_by_coord(&rc->coord);
1640 ext_index = extent_unit_index(&rc->coord);
1641 if (page->index < ext_index ||
1642 page->index >= ext_index + extent_get_width(ext)) {
1643 /* the page index doesn't belong to the extent unit
1644 which the coord points to - release the lock and
1645 repeat with tree search. */
1646 zrelse(rc->coord.node);
1647 done_lh(&rc->lh);
1648 /* we can be here after a CBK call only in case of
1649 corruption of the tree or the tree lookup algorithm bug. */
1650 if (unlikely(cbk_done)) {
1651 ret = RETERR(-EIO);
1652 goto unlock;
1654 goto repeat;
1656 node = jnode_of_page(page);
1657 if (unlikely(IS_ERR(node))) {
1658 zrelse(rc->coord.node);
1659 ret = PTR_ERR(node);
1660 goto unlock;
1662 ret = reiser4_do_readpage_extent(ext, page->index - ext_index, page);
1663 jput(node);
1664 zrelse(rc->coord.node);
1665 if (likely(!ret))
1666 goto exit;
1667 unlock:
1668 unlock_page(page);
1669 exit:
1670 page_cache_release(page);
1671 return ret;
1675 * readpages_unix_file - called by the readahead code, starts reading for each
1676 * page of given list of pages
1678 int readpages_unix_file(
1679 struct file *file, struct address_space *mapping,
1680 struct list_head *pages, unsigned nr_pages)
1682 reiser4_context *ctx;
1683 struct uf_readpages_context rc;
1684 int ret;
1686 ctx = reiser4_init_context(mapping->host->i_sb);
1687 if (IS_ERR(ctx)) {
1688 put_pages_list(pages);
1689 return PTR_ERR(ctx);
1691 init_lh(&rc.lh);
1692 ret = read_cache_pages(mapping, pages, uf_readpages_filler, &rc);
1693 done_lh(&rc.lh);
1694 context_set_commit_async(ctx);
1695 /* close the transaction to protect further page allocation from deadlocks */
1696 reiser4_txn_restart(ctx);
1697 reiser4_exit_context(ctx);
1698 return ret;
1701 static reiser4_block_nr unix_file_estimate_read(struct inode *inode,
1702 loff_t count UNUSED_ARG)
1704 /* We should reserve one block, because of updating of the stat data
1705 item */
1706 assert("vs-1249",
1707 inode_file_plugin(inode)->estimate.update ==
1708 estimate_update_common);
1709 return estimate_update_common(inode);
1712 /* this is called with nonexclusive access obtained, file's container can not change */
1713 static ssize_t read_file(hint_t *hint, struct file *file, /* file to read from to */
1714 char __user *buf, /* address of user-space buffer */
1715 size_t count, /* number of bytes to read */
1716 loff_t *off)
1718 int result;
1719 struct inode *inode;
1720 flow_t flow;
1721 int (*read_f) (struct file *, flow_t *, hint_t *);
1722 coord_t *coord;
1723 znode *loaded;
1725 inode = file->f_dentry->d_inode;
1727 /* build flow */
1728 assert("vs-1250",
1729 inode_file_plugin(inode)->flow_by_inode ==
1730 flow_by_inode_unix_file);
1731 result =
1732 flow_by_inode_unix_file(inode, buf, 1 /* user space */ , count,
1733 *off, READ_OP, &flow);
1734 if (unlikely(result))
1735 return result;
1737 /* get seal and coord sealed with it from reiser4 private data
1738 of struct file. The coord will tell us where our last read
1739 of this file finished, and the seal will help to determine
1740 if that location is still valid.
1742 coord = &hint->ext_coord.coord;
1743 while (flow.length && result == 0) {
1744 result =
1745 find_file_item(hint, &flow.key, ZNODE_READ_LOCK, inode);
1746 if (cbk_errored(result))
1747 /* error happened */
1748 break;
1750 if (coord->between != AT_UNIT) {
1751 /* there were no items corresponding to given offset */
1752 done_lh(hint->ext_coord.lh);
1753 break;
1756 loaded = coord->node;
1757 result = zload(loaded);
1758 if (unlikely(result)) {
1759 done_lh(hint->ext_coord.lh);
1760 break;
1763 if (hint->ext_coord.valid == 0)
1764 validate_extended_coord(&hint->ext_coord,
1765 get_key_offset(&flow.key));
1767 assert("vs-4", hint->ext_coord.valid == 1);
1768 assert("vs-33", hint->ext_coord.lh == &hint->lh);
1769 /* call item's read method */
1770 read_f = item_plugin_by_coord(coord)->s.file.read;
1771 result = read_f(file, &flow, hint);
1772 zrelse(loaded);
1773 done_lh(hint->ext_coord.lh);
1776 return (count - flow.length) ? (count - flow.length) : result;
1779 static ssize_t read_unix_file_container_tails(struct file*, char __user*, size_t, loff_t*);
1782 * read_unix_file - read of struct file_operations
1783 * @file: file to read from
1784 * @buf: address of user-space buffer
1785 * @read_amount: number of bytes to read
1786 * @off: position in file to read from
1788 * This is implementation of vfs's read method of struct file_operations for
1789 * unix file plugin.
1791 ssize_t read_unix_file(struct file *file, char __user *buf, size_t read_amount,
1792 loff_t *off)
1794 reiser4_context *ctx;
1795 ssize_t result;
1796 struct inode *inode;
1797 struct unix_file_info *uf_info;
1799 if (unlikely(read_amount == 0))
1800 return 0;
1802 assert("umka-072", file != NULL);
1803 assert("umka-074", off != NULL);
1804 inode = file->f_dentry->d_inode;
1805 assert("vs-972", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
1807 ctx = reiser4_init_context(inode->i_sb);
1808 if (IS_ERR(ctx))
1809 return PTR_ERR(ctx);
1810 uf_info = unix_file_inode_data(inode);
1811 if (uf_info->container == UF_CONTAINER_UNKNOWN) {
1812 get_exclusive_access(uf_info);
1813 result = find_file_state(inode, uf_info);
1814 if (unlikely(result != 0))
1815 goto out;
1816 } else
1817 get_nonexclusive_access(uf_info);
1818 result = reiser4_grab_space_force(unix_file_estimate_read(inode, read_amount),
1819 BA_CAN_COMMIT);
1820 if (unlikely(result != 0))
1821 goto out;
1822 if (uf_info->container == UF_CONTAINER_EXTENTS){
1823 result = do_sync_read(file, buf, read_amount, off);
1824 } else if (uf_info->container == UF_CONTAINER_TAILS ||
1825 reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV) ||
1826 reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
1827 result = read_unix_file_container_tails(file, buf, read_amount, off);
1828 } else {
1829 assert("zam-1085", uf_info->container == UF_CONTAINER_EMPTY);
1830 result = 0;
1832 out:
1833 drop_access(uf_info);
1834 context_set_commit_async(ctx);
1835 reiser4_exit_context(ctx);
1836 return result;
1839 static ssize_t read_unix_file_container_tails(
1840 struct file *file, char __user *buf, size_t read_amount, loff_t *off)
1842 int result;
1843 struct inode *inode;
1844 hint_t *hint;
1845 struct unix_file_info *uf_info;
1846 size_t count, read, left;
1847 loff_t size;
1849 assert("umka-072", file != NULL);
1850 assert("umka-074", off != NULL);
1851 inode = file->f_dentry->d_inode;
1852 assert("vs-972", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
1854 hint = kmalloc(sizeof(*hint), reiser4_ctx_gfp_mask_get());
1855 if (hint == NULL)
1856 return RETERR(-ENOMEM);
1858 result = load_file_hint(file, hint);
1859 if (result) {
1860 kfree(hint);
1861 return result;
1864 left = read_amount;
1865 count = 0;
1866 uf_info = unix_file_inode_data(inode);
1867 while (left > 0) {
1868 reiser4_txn_restart_current();
1869 size = i_size_read(inode);
1870 if (*off >= size)
1871 /* position to read from is past the end of file */
1872 break;
1873 if (*off + left > size)
1874 left = size - *off;
1875 /* faultin user page */
1876 result = fault_in_pages_writeable(buf, left > PAGE_CACHE_SIZE ? PAGE_CACHE_SIZE : left);
1877 if (result)
1878 return RETERR(-EFAULT);
1880 read = read_file(hint, file, buf,
1881 left > PAGE_CACHE_SIZE ? PAGE_CACHE_SIZE : left,
1882 off);
1883 if (read < 0) {
1884 result = read;
1885 break;
1887 left -= read;
1888 buf += read;
1890 /* update position in a file */
1891 *off += read;
1892 /* total number of read bytes */
1893 count += read;
1895 done_lh(&hint->lh);
1896 save_file_hint(file, hint);
1897 kfree(hint);
1898 if (count)
1899 file_accessed(file);
1900 /* return number of read bytes or error code if nothing is read */
1901 return count ? count : result;
1904 /* This function takes care about @file's pages. First of all it checks if
1905 filesystems readonly and if so gets out. Otherwise, it throws out all
1906 pages of file if it was mapped for read and going to be mapped for write
1907 and consists of tails. This is done in order to not manage few copies
1908 of the data (first in page cache and second one in tails them selves)
1909 for the case of mapping files consisting tails.
1911 Here also tail2extent conversion is performed if it is allowed and file
1912 is going to be written or mapped for write. This functions may be called
1913 from write_unix_file() or mmap_unix_file(). */
1914 static int check_pages_unix_file(struct file *file, struct inode *inode)
1916 reiser4_invalidate_pages(inode->i_mapping, 0,
1917 (inode->i_size + PAGE_CACHE_SIZE -
1918 1) >> PAGE_CACHE_SHIFT, 0);
1919 return unpack(file, inode, 0 /* not forever */ );
1923 * mmap_unix_file - mmap of struct file_operations
1924 * @file: file to mmap
1925 * @vma:
1927 * This is implementation of vfs's mmap method of struct file_operations for
1928 * unix file plugin. It converts file to extent if necessary. Sets
1929 * reiser4_inode's flag - REISER4_HAS_MMAP.
1931 int mmap_unix_file(struct file *file, struct vm_area_struct *vma)
1933 reiser4_context *ctx;
1934 int result;
1935 struct inode *inode;
1936 struct unix_file_info *uf_info;
1937 reiser4_block_nr needed;
1939 inode = file->f_dentry->d_inode;
1940 ctx = reiser4_init_context(inode->i_sb);
1941 if (IS_ERR(ctx))
1942 return PTR_ERR(ctx);
1944 uf_info = unix_file_inode_data(inode);
1946 get_exclusive_access_careful(uf_info, inode);
1948 if (!IS_RDONLY(inode) && (vma->vm_flags & (VM_MAYWRITE | VM_SHARED))) {
1950 * we need file built of extent items. If it is still built of
1951 * tail items we have to convert it. Find what items the file
1952 * is built of
1954 result = find_file_state(inode, uf_info);
1955 if (result != 0) {
1956 drop_exclusive_access(uf_info);
1957 reiser4_exit_context(ctx);
1958 return result;
1961 assert("vs-1648", (uf_info->container == UF_CONTAINER_TAILS ||
1962 uf_info->container == UF_CONTAINER_EXTENTS ||
1963 uf_info->container == UF_CONTAINER_EMPTY));
1964 if (uf_info->container == UF_CONTAINER_TAILS) {
1966 * invalidate all pages and convert file from tails to
1967 * extents
1969 result = check_pages_unix_file(file, inode);
1970 if (result) {
1971 drop_exclusive_access(uf_info);
1972 reiser4_exit_context(ctx);
1973 return result;
1979 * generic_file_mmap will do update_atime. Grab space for stat data
1980 * update.
1982 needed = inode_file_plugin(inode)->estimate.update(inode);
1983 result = reiser4_grab_space_force(needed, BA_CAN_COMMIT);
1984 if (result) {
1985 drop_exclusive_access(uf_info);
1986 reiser4_exit_context(ctx);
1987 return result;
1990 result = generic_file_mmap(file, vma);
1991 if (result == 0) {
1992 /* mark file as having mapping. */
1993 reiser4_inode_set_flag(inode, REISER4_HAS_MMAP);
1996 drop_exclusive_access(uf_info);
1997 reiser4_exit_context(ctx);
1998 return result;
2002 * find_first_item
2003 * @inode:
2005 * Finds file item which is responsible for first byte in the file.
2007 static int find_first_item(struct inode *inode)
2009 coord_t coord;
2010 lock_handle lh;
2011 reiser4_key key;
2012 int result;
2014 coord_init_zero(&coord);
2015 init_lh(&lh);
2016 inode_file_plugin(inode)->key_by_inode(inode, 0, &key);
2017 result = find_file_item_nohint(&coord, &lh, &key, ZNODE_READ_LOCK,
2018 inode);
2019 if (result == CBK_COORD_FOUND) {
2020 if (coord.between == AT_UNIT) {
2021 result = zload(coord.node);
2022 if (result == 0) {
2023 result = item_id_by_coord(&coord);
2024 zrelse(coord.node);
2025 if (result != EXTENT_POINTER_ID &&
2026 result != FORMATTING_ID)
2027 result = RETERR(-EIO);
2029 } else
2030 result = RETERR(-EIO);
2032 done_lh(&lh);
2033 return result;
2037 * open_unix_file
2038 * @inode:
2039 * @file:
2041 * If filesystem is not readonly - complete uncompleted tail conversion if
2042 * there was one
2044 int open_unix_file(struct inode *inode, struct file *file)
2046 int result;
2047 reiser4_context *ctx;
2048 struct unix_file_info *uf_info;
2050 if (IS_RDONLY(inode))
2051 return 0;
2053 if (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED))
2054 return 0;
2056 ctx = reiser4_init_context(inode->i_sb);
2057 if (IS_ERR(ctx))
2058 return PTR_ERR(ctx);
2060 uf_info = unix_file_inode_data(inode);
2062 get_exclusive_access_careful(uf_info, inode);
2064 if (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED)) {
2066 * other process completed the conversion
2068 drop_exclusive_access(uf_info);
2069 reiser4_exit_context(ctx);
2070 return 0;
2074 * file left in semi converted state after unclean shutdown or another
2075 * thread is doing conversion and dropped exclusive access which doing
2076 * balance dirty pages. Complete the conversion
2078 result = find_first_item(inode);
2079 if (result == EXTENT_POINTER_ID)
2081 * first item is extent, therefore there was incomplete
2082 * tail2extent conversion. Complete it
2084 result = tail2extent(unix_file_inode_data(inode));
2085 else if (result == FORMATTING_ID)
2087 * first item is formatting item, therefore there was
2088 * incomplete extent2tail conversion. Complete it
2090 result = extent2tail(file, unix_file_inode_data(inode));
2091 else
2092 result = -EIO;
2094 assert("vs-1712",
2095 ergo(result == 0,
2096 (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED) &&
2097 !reiser4_inode_get_flag(inode, REISER4_PART_IN_CONV))));
2098 drop_exclusive_access(uf_info);
2099 reiser4_exit_context(ctx);
2100 return result;
2103 #define NEITHER_OBTAINED 0
2104 #define EA_OBTAINED 1
2105 #define NEA_OBTAINED 2
2107 static void drop_access(struct unix_file_info *uf_info)
2109 if (uf_info->exclusive_use)
2110 drop_exclusive_access(uf_info);
2111 else
2112 drop_nonexclusive_access(uf_info);
2115 #define debug_wuf(format, ...) printk("%s: %d: %s: " format "\n", \
2116 __FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
2119 * write_unix_file - write of struct file_operations
2120 * @file: file to write to
2121 * @buf: address of user-space buffer
2122 * @write_amount: number of bytes to write
2123 * @off: position in file to write to
2125 * This is implementation of vfs's write method of struct file_operations for
2126 * unix file plugin.
2128 ssize_t write_unix_file(struct file *file, const char __user *buf,
2129 size_t count, loff_t *pos)
2131 int result;
2132 reiser4_context *ctx;
2133 struct inode *inode;
2134 struct unix_file_info *uf_info;
2135 ssize_t written;
2136 int try_free_space;
2137 int to_write = PAGE_CACHE_SIZE * WRITE_GRANULARITY;
2138 size_t left;
2139 ssize_t (*write_op)(struct file *, const char __user *, size_t,
2140 loff_t *pos);
2141 int ea;
2142 loff_t new_size;
2144 inode = file->f_dentry->d_inode;
2145 ctx = reiser4_init_context(inode->i_sb);
2146 if (IS_ERR(ctx))
2147 return PTR_ERR(ctx);
2149 mutex_lock(&inode->i_mutex);
2151 assert("vs-947", !reiser4_inode_get_flag(inode, REISER4_NO_SD));
2152 assert("vs-9471", (!reiser4_inode_get_flag(inode, REISER4_PART_MIXED)));
2154 /* check amount of bytes to write and writing position */
2155 result = generic_write_checks(file, pos, &count, 0);
2156 if (result) {
2157 mutex_unlock(&inode->i_mutex);
2158 context_set_commit_async(ctx);
2159 reiser4_exit_context(ctx);
2160 return result;
2163 result = remove_suid(file->f_dentry);
2164 if (result) {
2165 mutex_unlock(&inode->i_mutex);
2166 context_set_commit_async(ctx);
2167 reiser4_exit_context(ctx);
2168 return result;
2170 /* remove_suid might create a transaction */
2171 reiser4_txn_restart(ctx);
2173 uf_info = unix_file_inode_data(inode);
2175 current->backing_dev_info = inode->i_mapping->backing_dev_info;
2176 written = 0;
2177 try_free_space = 0;
2178 left = count;
2179 ea = NEITHER_OBTAINED;
2181 new_size = i_size_read(inode);
2182 if (*pos + count > new_size)
2183 new_size = *pos + count;
2185 while (left) {
2186 if (left < to_write)
2187 to_write = left;
2189 if (uf_info->container == UF_CONTAINER_EMPTY) {
2190 get_exclusive_access(uf_info);
2191 ea = EA_OBTAINED;
2192 if (uf_info->container != UF_CONTAINER_EMPTY) {
2193 /* file is made not empty by another process */
2194 drop_exclusive_access(uf_info);
2195 ea = NEITHER_OBTAINED;
2196 continue;
2198 } else if (uf_info->container == UF_CONTAINER_UNKNOWN) {
2200 * get exclusive access directly just to not have to
2201 * re-obtain it if file will appear empty
2203 get_exclusive_access(uf_info);
2204 ea = EA_OBTAINED;
2205 result = find_file_state(inode, uf_info);
2206 if (result) {
2207 drop_exclusive_access(uf_info);
2208 ea = NEITHER_OBTAINED;
2209 break;
2211 } else {
2212 get_nonexclusive_access(uf_info);
2213 ea = NEA_OBTAINED;
2216 /* either EA or NEA is obtained. Choose item write method */
2217 if (uf_info->container == UF_CONTAINER_EXTENTS) {
2218 /* file is built of extent items */
2219 write_op = reiser4_write_extent;
2220 } else if (uf_info->container == UF_CONTAINER_EMPTY) {
2221 /* file is empty */
2222 if (should_have_notail(uf_info, new_size))
2223 write_op = reiser4_write_extent;
2224 else
2225 write_op = reiser4_write_tail;
2226 } else {
2227 /* file is built of tail items */
2228 if (should_have_notail(uf_info, new_size)) {
2229 if (ea == NEA_OBTAINED) {
2230 drop_nonexclusive_access(uf_info);
2231 get_exclusive_access(uf_info);
2232 ea = EA_OBTAINED;
2234 if (uf_info->container == UF_CONTAINER_TAILS) {
2236 * if file is being convered by another
2237 * process - wait until it completes
2239 while (1) {
2240 if (reiser4_inode_get_flag(inode,
2241 REISER4_PART_IN_CONV)) {
2242 drop_exclusive_access(uf_info);
2243 schedule();
2244 get_exclusive_access(uf_info);
2245 continue;
2247 break;
2249 if (uf_info->container == UF_CONTAINER_TAILS) {
2250 result = tail2extent(uf_info);
2251 if (result)
2252 break;
2255 drop_exclusive_access(uf_info);
2256 ea = NEITHER_OBTAINED;
2257 continue;
2259 write_op = reiser4_write_tail;
2262 written = write_op(file, buf, to_write, pos);
2263 if (written == -ENOSPC && try_free_space) {
2264 drop_access(uf_info);
2265 txnmgr_force_commit_all(inode->i_sb, 0);
2266 try_free_space = 0;
2267 continue;
2269 if (written < 0) {
2270 drop_access(uf_info);
2271 result = written;
2272 break;
2274 /* something is written. */
2275 if (uf_info->container == UF_CONTAINER_EMPTY) {
2276 assert("", ea == EA_OBTAINED);
2277 uf_info->container =
2278 (write_op == reiser4_write_extent) ?
2279 UF_CONTAINER_EXTENTS : UF_CONTAINER_TAILS;
2280 } else {
2281 assert("", ergo(uf_info->container == UF_CONTAINER_EXTENTS,
2282 write_op == reiser4_write_extent));
2283 assert("", ergo(uf_info->container == UF_CONTAINER_TAILS,
2284 write_op == reiser4_write_tail));
2286 if (*pos + written > inode->i_size)
2287 INODE_SET_FIELD(inode, i_size, *pos + written);
2288 file_update_time(file);
2289 result = reiser4_update_sd(inode);
2290 if (result) {
2291 mutex_unlock(&inode->i_mutex);
2292 current->backing_dev_info = NULL;
2293 drop_access(uf_info);
2294 context_set_commit_async(ctx);
2295 reiser4_exit_context(ctx);
2296 return result;
2298 drop_access(uf_info);
2299 ea = NEITHER_OBTAINED;
2300 reiser4_txn_restart(ctx);
2301 current->journal_info = NULL;
2303 * tell VM how many pages were dirtied. Maybe number of pages
2304 * which were dirty already should not be counted
2306 balance_dirty_pages_ratelimited_nr(inode->i_mapping,
2307 (written + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE);
2308 current->journal_info = ctx;
2310 left -= written;
2311 buf += written;
2312 *pos += written;
2315 mutex_unlock(&inode->i_mutex);
2317 if (result == 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
2318 reiser4_txn_restart_current();
2319 grab_space_enable();
2320 result = sync_unix_file(file, file->f_dentry,
2321 0 /* data and stat data */ );
2322 if (result)
2323 warning("reiser4-7", "failed to sync file %llu",
2324 (unsigned long long)get_inode_oid(inode));
2327 current->backing_dev_info = NULL;
2329 reiser4_exit_context(ctx);
2332 * return number of written bytes or error code if nothing is
2333 * written. Note, that it does not work correctly in case when
2334 * sync_unix_file returns error
2336 return (count - left) ? (count - left) : result;
2340 * release_unix_file - release of struct file_operations
2341 * @inode: inode of released file
2342 * @file: file to release
2344 * Implementation of release method of struct file_operations for unix file
2345 * plugin. If last reference to indode is released - convert all extent items
2346 * into tail items if necessary. Frees reiser4 specific file data.
2348 int release_unix_file(struct inode *inode, struct file *file)
2350 reiser4_context *ctx;
2351 struct unix_file_info *uf_info;
2352 int result;
2353 int in_reiser4;
2355 in_reiser4 = is_in_reiser4_context();
2357 ctx = reiser4_init_context(inode->i_sb);
2358 if (IS_ERR(ctx))
2359 return PTR_ERR(ctx);
2361 result = 0;
2362 if (in_reiser4 == 0) {
2363 uf_info = unix_file_inode_data(inode);
2365 get_exclusive_access_careful(uf_info, inode);
2366 if (atomic_read(&file->f_dentry->d_count) == 1 &&
2367 uf_info->container == UF_CONTAINER_EXTENTS &&
2368 !should_have_notail(uf_info, inode->i_size) &&
2369 !rofs_inode(inode)) {
2370 result = extent2tail(file, uf_info);
2371 if (result != 0) {
2372 warning("nikita-3233",
2373 "Failed (%d) to convert in %s (%llu)",
2374 result, __FUNCTION__,
2375 (unsigned long long)
2376 get_inode_oid(inode));
2379 drop_exclusive_access(uf_info);
2380 } else {
2382 we are within reiser4 context already. How latter is
2383 possible? Simple:
2385 (gdb) bt
2386 #0 get_exclusive_access ()
2387 #2 0xc01e56d3 in release_unix_file ()
2388 #3 0xc01c3643 in reiser4_release ()
2389 #4 0xc014cae0 in __fput ()
2390 #5 0xc013ffc3 in remove_vm_struct ()
2391 #6 0xc0141786 in exit_mmap ()
2392 #7 0xc0118480 in mmput ()
2393 #8 0xc0133205 in oom_kill ()
2394 #9 0xc01332d1 in out_of_memory ()
2395 #10 0xc013bc1d in try_to_free_pages ()
2396 #11 0xc013427b in __alloc_pages ()
2397 #12 0xc013f058 in do_anonymous_page ()
2398 #13 0xc013f19d in do_no_page ()
2399 #14 0xc013f60e in handle_mm_fault ()
2400 #15 0xc01131e5 in do_page_fault ()
2401 #16 0xc0104935 in error_code ()
2402 #17 0xc025c0c6 in __copy_to_user_ll ()
2403 #18 0xc01d496f in reiser4_read_tail ()
2404 #19 0xc01e4def in read_unix_file ()
2405 #20 0xc01c3504 in reiser4_read ()
2406 #21 0xc014bd4f in vfs_read ()
2407 #22 0xc014bf66 in sys_read ()
2409 warning("vs-44", "out of memory?");
2412 reiser4_free_file_fsdata(file);
2414 reiser4_exit_context(ctx);
2415 return result;
2418 static void set_file_notail(struct inode *inode)
2420 reiser4_inode *state;
2421 formatting_plugin *tplug;
2423 state = reiser4_inode_data(inode);
2424 tplug = formatting_plugin_by_id(NEVER_TAILS_FORMATTING_ID);
2425 force_plugin_pset(inode, PSET_FORMATTING, (reiser4_plugin *)tplug);
2428 /* if file is built of tails - convert it to extents */
2429 static int unpack(struct file *filp, struct inode *inode, int forever)
2431 int result = 0;
2432 struct unix_file_info *uf_info;
2434 uf_info = unix_file_inode_data(inode);
2435 assert("vs-1628", ea_obtained(uf_info));
2437 result = find_file_state(inode, uf_info);
2438 if (result)
2439 return result;
2440 assert("vs-1074", uf_info->container != UF_CONTAINER_UNKNOWN);
2442 if (uf_info->container == UF_CONTAINER_TAILS) {
2444 * if file is being convered by another process - wait until it
2445 * completes
2447 while (1) {
2448 if (reiser4_inode_get_flag(inode,
2449 REISER4_PART_IN_CONV)) {
2450 drop_exclusive_access(uf_info);
2451 schedule();
2452 get_exclusive_access(uf_info);
2453 continue;
2455 break;
2457 if (uf_info->container == UF_CONTAINER_TAILS) {
2458 result = tail2extent(uf_info);
2459 if (result)
2460 return result;
2463 if (forever) {
2464 /* safe new formatting plugin in stat data */
2465 __u64 tograb;
2467 set_file_notail(inode);
2469 grab_space_enable();
2470 tograb = inode_file_plugin(inode)->estimate.update(inode);
2471 result = reiser4_grab_space(tograb, BA_CAN_COMMIT);
2472 result = reiser4_update_sd(inode);
2475 return result;
2478 /* implentation of vfs' ioctl method of struct file_operations for unix file
2479 plugin
2482 ioctl_unix_file(struct inode *inode, struct file *filp,
2483 unsigned int cmd, unsigned long arg UNUSED_ARG)
2485 reiser4_context *ctx;
2486 int result;
2488 ctx = reiser4_init_context(inode->i_sb);
2489 if (IS_ERR(ctx))
2490 return PTR_ERR(ctx);
2492 switch (cmd) {
2493 case REISER4_IOC_UNPACK:
2494 get_exclusive_access(unix_file_inode_data(inode));
2495 result = unpack(filp, inode, 1 /* forever */ );
2496 drop_exclusive_access(unix_file_inode_data(inode));
2497 break;
2499 default:
2500 result = RETERR(-ENOSYS);
2501 break;
2503 reiser4_exit_context(ctx);
2504 return result;
2507 /* implentation of vfs' bmap method of struct address_space_operations for unix
2508 file plugin
2510 sector_t bmap_unix_file(struct address_space * mapping, sector_t lblock)
2512 reiser4_context *ctx;
2513 sector_t result;
2514 reiser4_key key;
2515 coord_t coord;
2516 lock_handle lh;
2517 struct inode *inode;
2518 item_plugin *iplug;
2519 sector_t block;
2521 inode = mapping->host;
2523 ctx = reiser4_init_context(inode->i_sb);
2524 if (IS_ERR(ctx))
2525 return PTR_ERR(ctx);
2526 key_by_inode_and_offset_common(inode,
2527 (loff_t) lblock * current_blocksize,
2528 &key);
2530 init_lh(&lh);
2531 result =
2532 find_file_item_nohint(&coord, &lh, &key, ZNODE_READ_LOCK, inode);
2533 if (cbk_errored(result)) {
2534 done_lh(&lh);
2535 reiser4_exit_context(ctx);
2536 return result;
2539 result = zload(coord.node);
2540 if (result) {
2541 done_lh(&lh);
2542 reiser4_exit_context(ctx);
2543 return result;
2546 iplug = item_plugin_by_coord(&coord);
2547 if (iplug->s.file.get_block) {
2548 result = iplug->s.file.get_block(&coord, lblock, &block);
2549 if (result == 0)
2550 result = block;
2551 } else
2552 result = RETERR(-EINVAL);
2554 zrelse(coord.node);
2555 done_lh(&lh);
2556 reiser4_exit_context(ctx);
2557 return result;
2561 * flow_by_inode_unix_file - initizlize structure flow
2562 * @inode: inode of file for which read or write is abou
2563 * @buf: buffer to perform read to or write from
2564 * @user: flag showing whether @buf is user space or kernel space
2565 * @size: size of buffer @buf
2566 * @off: start offset fro read or write
2567 * @op: READ or WRITE
2568 * @flow:
2570 * Initializes fields of @flow: key, size of data, i/o mode (read or write).
2572 int flow_by_inode_unix_file(struct inode *inode,
2573 const char __user *buf, int user,
2574 loff_t size, loff_t off,
2575 rw_op op, flow_t *flow)
2577 assert("nikita-1100", inode != NULL);
2579 flow->length = size;
2580 memcpy(&flow->data, &buf, sizeof(buf));
2581 flow->user = user;
2582 flow->op = op;
2583 assert("nikita-1931", inode_file_plugin(inode) != NULL);
2584 assert("nikita-1932",
2585 inode_file_plugin(inode)->key_by_inode ==
2586 key_by_inode_and_offset_common);
2587 /* calculate key of write position and insert it into flow->key */
2588 return key_by_inode_and_offset_common(inode, off, &flow->key);
2591 /* plugin->u.file.set_plug_in_sd = NULL
2592 plugin->u.file.set_plug_in_inode = NULL
2593 plugin->u.file.create_blank_sd = NULL */
2594 /* plugin->u.file.delete */
2596 plugin->u.file.add_link = reiser4_add_link_common
2597 plugin->u.file.rem_link = NULL */
2599 /* plugin->u.file.owns_item
2600 this is common_file_owns_item with assertion */
2601 /* Audited by: green(2002.06.15) */
2603 owns_item_unix_file(const struct inode *inode /* object to check against */ ,
2604 const coord_t * coord /* coord to check */ )
2606 int result;
2608 result = owns_item_common(inode, coord);
2609 if (!result)
2610 return 0;
2611 if (!plugin_of_group(item_plugin_by_coord(coord),
2612 UNIX_FILE_METADATA_ITEM_TYPE))
2613 return 0;
2614 assert("vs-547",
2615 item_id_by_coord(coord) == EXTENT_POINTER_ID ||
2616 item_id_by_coord(coord) == FORMATTING_ID);
2617 return 1;
2620 static int setattr_truncate(struct inode *inode, struct iattr *attr)
2622 int result;
2623 int s_result;
2624 loff_t old_size;
2625 reiser4_tree *tree;
2627 inode_check_scale(inode, inode->i_size, attr->ia_size);
2629 old_size = inode->i_size;
2630 tree = reiser4_tree_by_inode(inode);
2632 result = safe_link_grab(tree, BA_CAN_COMMIT);
2633 if (result == 0)
2634 result = safe_link_add(inode, SAFE_TRUNCATE);
2635 if (result == 0)
2636 result = truncate_file_body(inode, attr);
2637 if (result)
2638 warning("vs-1588", "truncate_file failed: oid %lli, "
2639 "old size %lld, new size %lld, retval %d",
2640 (unsigned long long)get_inode_oid(inode),
2641 old_size, attr->ia_size, result);
2643 s_result = safe_link_grab(tree, BA_CAN_COMMIT);
2644 if (s_result == 0)
2645 s_result =
2646 safe_link_del(tree, get_inode_oid(inode), SAFE_TRUNCATE);
2647 if (s_result != 0) {
2648 warning("nikita-3417", "Cannot kill safelink %lli: %i",
2649 (unsigned long long)get_inode_oid(inode), s_result);
2651 safe_link_release(tree);
2652 return result;
2655 /* plugin->u.file.setattr method */
2656 /* This calls inode_setattr and if truncate is in effect it also takes
2657 exclusive inode access to avoid races */
2658 int setattr_unix_file(struct dentry *dentry, /* Object to change attributes */
2659 struct iattr *attr /* change description */ )
2661 int result;
2663 if (attr->ia_valid & ATTR_SIZE) {
2664 reiser4_context *ctx;
2665 struct unix_file_info *uf_info;
2667 /* truncate does reservation itself and requires exclusive
2668 access obtained */
2669 ctx = reiser4_init_context(dentry->d_inode->i_sb);
2670 if (IS_ERR(ctx))
2671 return PTR_ERR(ctx);
2673 uf_info = unix_file_inode_data(dentry->d_inode);
2674 get_exclusive_access_careful(uf_info, dentry->d_inode);
2675 result = setattr_truncate(dentry->d_inode, attr);
2676 drop_exclusive_access(uf_info);
2677 context_set_commit_async(ctx);
2678 reiser4_exit_context(ctx);
2679 } else
2680 result = reiser4_setattr_common(dentry, attr);
2682 return result;
2685 /* plugin->u.file.init_inode_data */
2686 void
2687 init_inode_data_unix_file(struct inode *inode,
2688 reiser4_object_create_data * crd, int create)
2690 struct unix_file_info *data;
2692 data = unix_file_inode_data(inode);
2693 data->container = create ? UF_CONTAINER_EMPTY : UF_CONTAINER_UNKNOWN;
2694 init_rwsem(&data->latch);
2695 data->tplug = inode_formatting_plugin(inode);
2696 data->exclusive_use = 0;
2698 #if REISER4_DEBUG
2699 data->ea_owner = NULL;
2700 atomic_set(&data->nr_neas, 0);
2701 #endif
2702 init_inode_ordering(inode, crd, create);
2706 * delete_object_unix_file - delete_object of file_plugin
2707 * @inode: inode to be deleted
2709 * Truncates file to length 0, removes stat data and safe link.
2711 int delete_object_unix_file(struct inode *inode)
2713 struct unix_file_info *uf_info;
2714 int result;
2716 if (reiser4_inode_get_flag(inode, REISER4_NO_SD))
2717 return 0;
2719 /* truncate file bogy first */
2720 uf_info = unix_file_inode_data(inode);
2721 get_exclusive_access(uf_info);
2722 result = shorten_file(inode, 0 /* size */ );
2723 drop_exclusive_access(uf_info);
2725 if (result)
2726 warning("", "failed to truncate file (%llu) on removal: %d",
2727 get_inode_oid(inode), result);
2729 /* remove stat data and safe link */
2730 return reiser4_delete_object_common(inode);
2734 prepare_write_unix_file(struct file *file, struct page *page,
2735 unsigned from, unsigned to)
2737 reiser4_context *ctx;
2738 struct unix_file_info *uf_info;
2739 int ret;
2741 ctx = reiser4_init_context(file->f_dentry->d_inode->i_sb);
2742 if (IS_ERR(ctx))
2743 return PTR_ERR(ctx);
2745 uf_info = unix_file_inode_data(file->f_dentry->d_inode);
2746 get_exclusive_access(uf_info);
2747 ret = find_file_state(file->f_dentry->d_inode, uf_info);
2748 if (ret == 0) {
2749 if (uf_info->container == UF_CONTAINER_TAILS)
2750 ret = -EINVAL;
2751 else
2752 ret = do_prepare_write(file, page, from, to);
2754 drop_exclusive_access(uf_info);
2756 /* don't commit transaction under inode semaphore */
2757 context_set_commit_async(ctx);
2758 reiser4_exit_context(ctx);
2759 return ret;
2763 * Local variables:
2764 * c-indentation-style: "K&R"
2765 * mode-name: "LC"
2766 * c-basic-offset: 8
2767 * tab-width: 8
2768 * fill-column: 79
2769 * scroll-step: 1
2770 * End: