1 /* Copyright 2001, 2002, 2003, 2004 by Hans Reiser, licensing governed by
5 * this file contains implementations of inode/file/address_space/file plugin
6 * operations specific for "unix file plugin" (plugin id is
7 * UNIX_FILE_PLUGIN_ID). "Unix file" is either built of tail items only
8 * (FORMATTING_ID) or of extent items only (EXTENT_POINTER_ID) or empty (have
9 * no items but stat data)
12 #include "../../inode.h"
13 #include "../../super.h"
14 #include "../../tree_walk.h"
15 #include "../../carry.h"
16 #include "../../page_cache.h"
17 #include "../../ioctl.h"
18 #include "../object.h"
19 #include "../cluster.h"
20 #include "../../safe_link.h"
22 #include <linux/writeback.h>
23 #include <linux/pagevec.h>
24 #include <linux/syscalls.h>
27 static int unpack(struct file
*file
, struct inode
*inode
, int forever
);
28 static void drop_access(struct unix_file_info
*);
29 static int hint_validate(hint_t
* hint
, const reiser4_key
* key
, int check_key
,
30 znode_lock_mode lock_mode
);
32 /* Get exclusive access and make sure that file is not partially
33 * converted (It may happen that another process is doing tail
34 * conversion. If so, wait until it completes)
36 static inline void get_exclusive_access_careful(struct unix_file_info
* uf_info
,
40 get_exclusive_access(uf_info
);
41 if (!reiser4_inode_get_flag(inode
, REISER4_PART_IN_CONV
))
43 drop_exclusive_access(uf_info
);
48 /* get unix file plugin specific portion of inode */
49 struct unix_file_info
*unix_file_inode_data(const struct inode
*inode
)
51 return &reiser4_inode_data(inode
)->file_plugin_data
.unix_file_info
;
55 * equal_to_rdk - compare key and znode's right delimiting key
56 * @node: node whose right delimiting key to compare with @key
57 * @key: key to compare with @node's right delimiting key
59 * Returns true if @key is equal to right delimiting key of @node.
61 int equal_to_rdk(znode
*node
, const reiser4_key
*key
)
65 read_lock_dk(znode_get_tree(node
));
66 result
= keyeq(key
, znode_get_rd_key(node
));
67 read_unlock_dk(znode_get_tree(node
));
74 * equal_to_ldk - compare key and znode's left delimiting key
75 * @node: node whose left delimiting key to compare with @key
76 * @key: key to compare with @node's left delimiting key
78 * Returns true if @key is equal to left delimiting key of @node.
80 int equal_to_ldk(znode
*node
, const reiser4_key
*key
)
84 read_lock_dk(znode_get_tree(node
));
85 result
= keyeq(key
, znode_get_ld_key(node
));
86 read_unlock_dk(znode_get_tree(node
));
91 * check_coord - check whether coord corresponds to key
92 * @coord: coord to check
93 * @key: key @coord has to correspond to
95 * Returns true if @coord is set as if it was set as result of lookup with @key
98 static int check_coord(const coord_t
*coord
, const reiser4_key
*key
)
102 node_plugin_by_node(coord
->node
)->lookup(coord
->node
, key
,
103 FIND_MAX_NOT_MORE_THAN
, &twin
);
104 return coords_equal(coord
, &twin
);
107 #endif /* REISER4_DEBUG */
110 * init_uf_coord - initialize extended coord
116 void init_uf_coord(uf_coord_t
*uf_coord
, lock_handle
*lh
)
118 coord_init_zero(&uf_coord
->coord
);
119 coord_clear_iplug(&uf_coord
->coord
);
122 memset(&uf_coord
->extension
, 0, sizeof(uf_coord
->extension
));
126 static void validate_extended_coord(uf_coord_t
*uf_coord
, loff_t offset
)
128 assert("vs-1333", uf_coord
->valid
== 0);
130 if (coord_is_between_items(&uf_coord
->coord
))
134 item_plugin_by_coord(&uf_coord
->coord
)->s
.file
.
135 init_coord_extension
);
137 item_body_by_coord(&uf_coord
->coord
);
138 item_plugin_by_coord(&uf_coord
->coord
)->s
.file
.
139 init_coord_extension(uf_coord
, offset
);
143 * goto_right_neighbor - lock right neighbor, drop current node lock
147 * Obtain lock on right neighbor and drop lock on current node.
149 int goto_right_neighbor(coord_t
*coord
, lock_handle
*lh
)
152 lock_handle lh_right
;
154 assert("vs-1100", znode_is_locked(coord
->node
));
157 result
= reiser4_get_right_neighbor(&lh_right
, coord
->node
,
158 znode_is_wlocked(coord
->node
) ?
159 ZNODE_WRITE_LOCK
: ZNODE_READ_LOCK
,
160 GN_CAN_USE_UPPER_LEVELS
);
167 * we hold two longterm locks on neighboring nodes. Unlock left of
172 coord_init_first_unit_nocheck(coord
, lh_right
.node
);
173 move_lh(lh
, &lh_right
);
185 * This is to be used by find_file_item and in find_file_state to
186 * determine real state of file
188 static void set_file_state(struct unix_file_info
*uf_info
, int cbk_result
,
191 if (cbk_errored(cbk_result
))
192 /* error happened in find_file_item */
195 assert("vs-1164", level
== LEAF_LEVEL
|| level
== TWIG_LEVEL
);
197 if (uf_info
->container
== UF_CONTAINER_UNKNOWN
) {
198 if (cbk_result
== CBK_COORD_NOTFOUND
)
199 uf_info
->container
= UF_CONTAINER_EMPTY
;
200 else if (level
== LEAF_LEVEL
)
201 uf_info
->container
= UF_CONTAINER_TAILS
;
203 uf_info
->container
= UF_CONTAINER_EXTENTS
;
206 * file state is known, check whether it is set correctly if
207 * file is not being tail converted
209 if (!reiser4_inode_get_flag(unix_file_info_to_inode(uf_info
),
210 REISER4_PART_IN_CONV
)) {
212 ergo(level
== LEAF_LEVEL
&&
213 cbk_result
== CBK_COORD_FOUND
,
214 uf_info
->container
== UF_CONTAINER_TAILS
));
216 ergo(level
== TWIG_LEVEL
&&
217 cbk_result
== CBK_COORD_FOUND
,
218 uf_info
->container
== UF_CONTAINER_EXTENTS
));
223 int find_file_item_nohint(coord_t
*coord
, lock_handle
*lh
,
224 const reiser4_key
*key
, znode_lock_mode lock_mode
,
227 return reiser4_object_lookup(inode
, key
, coord
, lh
, lock_mode
,
228 FIND_MAX_NOT_MORE_THAN
,
229 TWIG_LEVEL
, LEAF_LEVEL
,
230 (lock_mode
== ZNODE_READ_LOCK
) ? CBK_UNIQUE
:
231 (CBK_UNIQUE
| CBK_FOR_INSERT
),
232 NULL
/* ra_info */ );
236 * find_file_item - look for file item in the tree
237 * @hint: provides coordinate, lock handle, seal
238 * @key: key for search
239 * @mode: mode of lock to put on returned node
243 * This finds position in the tree corresponding to @key. It first tries to use
244 * @hint's seal if it is set.
246 int find_file_item(hint_t
*hint
, const reiser4_key
*key
,
247 znode_lock_mode lock_mode
,
254 assert("nikita-3030", reiser4_schedulable());
255 assert("vs-1707", hint
!= NULL
);
256 assert("vs-47", inode
!= NULL
);
258 coord
= &hint
->ext_coord
.coord
;
259 lh
= hint
->ext_coord
.lh
;
262 result
= hint_validate(hint
, key
, 1 /* check key */, lock_mode
);
264 if (coord
->between
== AFTER_UNIT
&&
265 equal_to_rdk(coord
->node
, key
)) {
266 result
= goto_right_neighbor(coord
, lh
);
267 if (result
== -E_NO_NEIGHBOR
)
271 assert("vs-1152", equal_to_ldk(coord
->node
, key
));
273 * we moved to different node. Invalidate coord
274 * extension, zload is necessary to init it again
276 hint
->ext_coord
.valid
= 0;
279 set_file_state(unix_file_inode_data(inode
), CBK_COORD_FOUND
,
280 znode_get_level(coord
->node
));
282 return CBK_COORD_FOUND
;
285 coord_init_zero(coord
);
286 result
= find_file_item_nohint(coord
, lh
, key
, lock_mode
, inode
);
287 set_file_state(unix_file_inode_data(inode
), result
,
288 znode_get_level(coord
->node
));
290 /* FIXME: we might already have coord extension initialized */
291 hint
->ext_coord
.valid
= 0;
295 /* plugin->u.file.write_flowom = NULL
296 plugin->u.file.read_flow = NULL */
298 void hint_init_zero(hint_t
* hint
)
300 memset(hint
, 0, sizeof(*hint
));
302 hint
->ext_coord
.lh
= &hint
->lh
;
305 static int find_file_state(struct inode
*inode
, struct unix_file_info
*uf_info
)
312 assert("vs-1628", ea_obtained(uf_info
));
314 if (uf_info
->container
== UF_CONTAINER_UNKNOWN
) {
315 key_by_inode_and_offset_common(inode
, 0, &key
);
317 result
= find_file_item_nohint(&coord
, &lh
, &key
,
318 ZNODE_READ_LOCK
, inode
);
319 set_file_state(uf_info
, result
, znode_get_level(coord
.node
));
321 if (!cbk_errored(result
))
326 ergo(result
== 0, uf_info
->container
!= UF_CONTAINER_UNKNOWN
));
327 reiser4_txn_restart_current();
331 /* estimate and reserve space needed to truncate page which gets partially truncated: one block for page itself, stat
332 data update (estimate_one_insert_into_item) and one item insertion (estimate_one_insert_into_item) which may happen
333 if page corresponds to hole extent and unallocated one will have to be created */
334 static int reserve_partial_page(reiser4_tree
* tree
)
337 return reiser4_grab_reserved(reiser4_get_current_sb(),
339 2 * estimate_one_insert_into_item(tree
),
343 /* estimate and reserve space needed to cut one item and update one stat data */
344 static int reserve_cut_iteration(reiser4_tree
* tree
)
346 __u64 estimate
= estimate_one_item_removal(tree
)
347 + estimate_one_insert_into_item(tree
);
349 assert("nikita-3172", lock_stack_isclean(get_current_lock_stack()));
352 /* We need to double our estimate now that we can delete more than one
354 return reiser4_grab_reserved(reiser4_get_current_sb(), estimate
* 2,
358 int reiser4_update_file_size(struct inode
*inode
, reiser4_key
* key
,
363 INODE_SET_SIZE(inode
, get_key_offset(key
));
365 inode
->i_ctime
= inode
->i_mtime
= CURRENT_TIME
;
366 result
= reiser4_update_sd(inode
);
371 /* cut file items one by one starting from the last one until new file size (inode->i_size) is reached. Reserve space
372 and update file stat data on every single cut from the tree */
374 cut_file_items(struct inode
*inode
, loff_t new_size
, int update_sd
,
375 loff_t cur_size
, int (*update_actor
) (struct inode
*,
378 reiser4_key from_key
, to_key
;
379 reiser4_key smallest_removed
;
380 file_plugin
*fplug
= inode_file_plugin(inode
);
385 fplug
== file_plugin_by_id(UNIX_FILE_PLUGIN_ID
) ||
386 fplug
== file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID
));
388 fplug
->key_by_inode(inode
, new_size
, &from_key
);
390 set_key_offset(&to_key
, cur_size
- 1 /*get_key_offset(reiser4_max_key()) */ );
391 /* this loop normally runs just once */
393 result
= reserve_cut_iteration(reiser4_tree_by_inode(inode
));
397 result
= reiser4_cut_tree_object(current_tree
, &from_key
, &to_key
,
398 &smallest_removed
, inode
, 1,
400 if (result
== -E_REPEAT
) {
401 /* -E_REPEAT is a signal to interrupt a long file truncation process */
404 update_actor(inode
, &smallest_removed
,
410 /* the below does up(sbinfo->delete_mutex). Do not get folled */
411 reiser4_release_reserved(inode
->i_sb
);
413 /* reiser4_cut_tree_object() was interrupted probably because
414 * current atom requires commit, we have to release
415 * transaction handle to allow atom commit. */
416 reiser4_txn_restart_current();
420 && !(result
== CBK_COORD_NOTFOUND
&& new_size
== 0
421 && inode
->i_size
== 0))
424 set_key_offset(&smallest_removed
, new_size
);
425 /* Final sd update after the file gets its correct size */
426 result
= update_actor(inode
, &smallest_removed
, update_sd
);
430 /* the below does up(sbinfo->delete_mutex). Do not get folled */
431 reiser4_release_reserved(inode
->i_sb
);
436 int find_or_create_extent(struct page
*page
);
438 /* part of truncate_file_body: it is called when truncate is used to make file
440 static int shorten_file(struct inode
*inode
, loff_t new_size
)
446 struct unix_file_info
*uf_info
;
449 * all items of ordinary reiser4 file are grouped together. That is why
450 * we can use reiser4_cut_tree. Plan B files (for instance) can not be
451 * truncated that simply
453 result
= cut_file_items(inode
, new_size
, 1 /*update_sd */ ,
454 get_key_offset(reiser4_max_key()),
455 reiser4_update_file_size
);
459 uf_info
= unix_file_inode_data(inode
);
460 assert("vs-1105", new_size
== inode
->i_size
);
462 uf_info
->container
= UF_CONTAINER_EMPTY
;
466 result
= find_file_state(inode
, uf_info
);
469 if (uf_info
->container
== UF_CONTAINER_TAILS
)
471 * No need to worry about zeroing last page after new file
476 padd_from
= inode
->i_size
& (PAGE_CACHE_SIZE
- 1);
478 /* file is truncated to page boundary */
481 result
= reserve_partial_page(reiser4_tree_by_inode(inode
));
483 reiser4_release_reserved(inode
->i_sb
);
487 /* last page is partially truncated - zero its content */
488 index
= (inode
->i_size
>> PAGE_CACHE_SHIFT
);
489 page
= read_mapping_page(inode
->i_mapping
, index
, NULL
);
492 * the below does up(sbinfo->delete_mutex). Do not get
495 reiser4_release_reserved(inode
->i_sb
);
496 if (likely(PTR_ERR(page
) == -EINVAL
)) {
497 /* looks like file is built of tail items */
500 return PTR_ERR(page
);
502 wait_on_page_locked(page
);
503 if (!PageUptodate(page
)) {
504 page_cache_release(page
);
506 * the below does up(sbinfo->delete_mutex). Do not get
509 reiser4_release_reserved(inode
->i_sb
);
514 * if page correspons to hole extent unit - unallocated one will be
515 * created here. This is not necessary
517 result
= find_or_create_extent(page
);
520 * FIXME: cut_file_items has already updated inode. Probably it would
521 * be better to update it here when file is really truncated
524 page_cache_release(page
);
526 * the below does up(sbinfo->delete_mutex). Do not get
529 reiser4_release_reserved(inode
->i_sb
);
534 assert("vs-1066", PageLocked(page
));
535 zero_user_segment(page
, padd_from
, PAGE_CACHE_SIZE
);
537 page_cache_release(page
);
538 /* the below does up(sbinfo->delete_mutex). Do not get confused */
539 reiser4_release_reserved(inode
->i_sb
);
548 * Calls formatting plugin to see whether file of size @new_size has to be
549 * stored in unformatted nodes or in tail items. 0 is returned for later case.
551 static int should_have_notail(const struct unix_file_info
*uf_info
, loff_t new_size
)
555 return !uf_info
->tplug
->have_tail(unix_file_info_to_inode(uf_info
),
561 * truncate_file_body - change length of file
562 * @inode: inode of file
563 * @new_size: new file length
565 * Adjusts items file @inode is built of to match @new_size. It may either cut
566 * items or add them to represent a hole at the end of file. The caller has to
567 * obtain exclusive access to the file.
569 static int truncate_file_body(struct inode
*inode
, struct iattr
*attr
)
572 loff_t new_size
= attr
->ia_size
;
574 if (inode
->i_size
< new_size
) {
575 /* expanding truncate */
576 struct file
* file
= attr
->ia_file
;
577 struct unix_file_info
*uf_info
= unix_file_inode_data(inode
);
579 assert("edward-1532", attr
->ia_valid
& ATTR_FILE
);
581 result
= find_file_state(inode
, uf_info
);
585 if (should_have_notail(uf_info
, new_size
)) {
587 * file of size @new_size has to be built of
588 * extents. If it is built of tails - convert to
591 if (uf_info
->container
== UF_CONTAINER_TAILS
) {
593 * if file is being convered by another process
594 * - wait until it completes
597 if (reiser4_inode_get_flag(inode
,
598 REISER4_PART_IN_CONV
)) {
599 drop_exclusive_access(uf_info
);
601 get_exclusive_access(uf_info
);
607 if (uf_info
->container
== UF_CONTAINER_TAILS
) {
608 result
= tail2extent(uf_info
);
613 result
= reiser4_write_extent(file
, NULL
, 0,
617 uf_info
->container
= UF_CONTAINER_EXTENTS
;
619 if (uf_info
->container
== UF_CONTAINER_EXTENTS
) {
620 result
= reiser4_write_extent(file
, NULL
, 0,
625 result
= reiser4_write_tail(file
, NULL
, 0,
629 uf_info
->container
= UF_CONTAINER_TAILS
;
633 INODE_SET_FIELD(inode
, i_size
, new_size
);
634 file_update_time(file
);
635 result
= reiser4_update_sd(inode
);
637 reiser4_free_file_fsdata(file
);
639 result
= shorten_file(inode
, new_size
);
643 /* plugin->u.write_sd_by_inode = write_sd_by_inode_common */
646 * load_file_hint - copy hint from struct file to local variable
647 * @file: file to get hint from
648 * @hint: structure to fill
650 * Reiser4 specific portion of struct file may contain information (hint)
651 * stored on exiting from previous read or write. That information includes
652 * seal of znode and coord within that znode where previous read or write
653 * stopped. This function copies that information to @hint if it was stored or
654 * initializes @hint by 0s otherwise.
656 int load_file_hint(struct file
*file
, hint_t
*hint
)
658 reiser4_file_fsdata
*fsdata
;
661 fsdata
= reiser4_get_file_fsdata(file
);
663 return PTR_ERR(fsdata
);
665 spin_lock_inode(file
->f_dentry
->d_inode
);
666 if (reiser4_seal_is_set(&fsdata
->reg
.hint
.seal
)) {
667 *hint
= fsdata
->reg
.hint
;
669 hint
->ext_coord
.lh
= &hint
->lh
;
670 spin_unlock_inode(file
->f_dentry
->d_inode
);
672 * force re-validation of the coord on the first
673 * iteration of the read/write loop.
675 hint
->ext_coord
.valid
= 0;
676 assert("nikita-19892", coords_equal(&hint
->seal
.coord1
,
681 memset(&fsdata
->reg
.hint
, 0, sizeof(hint_t
));
682 spin_unlock_inode(file
->f_dentry
->d_inode
);
684 hint_init_zero(hint
);
689 * save_file_hint - copy hint to reiser4 private struct file's part
690 * @file: file to save hint in
691 * @hint: hint to save
693 * This copies @hint to reiser4 private part of struct file. It can help
694 * speedup future accesses to the file.
696 void save_file_hint(struct file
*file
, const hint_t
*hint
)
698 reiser4_file_fsdata
*fsdata
;
700 assert("edward-1337", hint
!= NULL
);
702 if (!file
|| !reiser4_seal_is_set(&hint
->seal
))
704 fsdata
= reiser4_get_file_fsdata(file
);
705 assert("vs-965", !IS_ERR(fsdata
));
706 assert("nikita-19891",
707 coords_equal(&hint
->seal
.coord1
, &hint
->ext_coord
.coord
));
708 assert("vs-30", hint
->lh
.owner
== NULL
);
709 spin_lock_inode(file
->f_dentry
->d_inode
);
710 fsdata
->reg
.hint
= *hint
;
711 spin_unlock_inode(file
->f_dentry
->d_inode
);
715 void reiser4_unset_hint(hint_t
* hint
)
717 assert("vs-1315", hint
);
718 hint
->ext_coord
.valid
= 0;
719 reiser4_seal_done(&hint
->seal
);
723 /* coord must be set properly. So, that reiser4_set_hint
725 void reiser4_set_hint(hint_t
* hint
, const reiser4_key
* key
,
726 znode_lock_mode mode
)
728 ON_DEBUG(coord_t
* coord
= &hint
->ext_coord
.coord
);
729 assert("vs-1207", WITH_DATA(coord
->node
, check_coord(coord
, key
)));
731 reiser4_seal_init(&hint
->seal
, &hint
->ext_coord
.coord
, key
);
732 hint
->offset
= get_key_offset(key
);
737 int hint_is_set(const hint_t
* hint
)
739 return reiser4_seal_is_set(&hint
->seal
);
743 static int all_but_offset_key_eq(const reiser4_key
* k1
, const reiser4_key
* k2
)
745 return (get_key_locality(k1
) == get_key_locality(k2
) &&
746 get_key_type(k1
) == get_key_type(k2
) &&
747 get_key_band(k1
) == get_key_band(k2
) &&
748 get_key_ordering(k1
) == get_key_ordering(k2
) &&
749 get_key_objectid(k1
) == get_key_objectid(k2
));
754 hint_validate(hint_t
* hint
, const reiser4_key
* key
, int check_key
,
755 znode_lock_mode lock_mode
)
757 if (!hint
|| !hint_is_set(hint
) || hint
->mode
!= lock_mode
)
758 /* hint either not set or set by different operation */
759 return RETERR(-E_REPEAT
);
761 assert("vs-1277", all_but_offset_key_eq(key
, &hint
->seal
.key
));
763 if (check_key
&& get_key_offset(key
) != hint
->offset
)
764 /* hint is set for different key */
765 return RETERR(-E_REPEAT
);
767 assert("vs-31", hint
->ext_coord
.lh
== &hint
->lh
);
768 return reiser4_seal_validate(&hint
->seal
, &hint
->ext_coord
.coord
, key
,
769 hint
->ext_coord
.lh
, lock_mode
,
774 * find_or_create_extent -
779 /* look for place at twig level for extent corresponding to page, call extent's writepage method to create
780 unallocated extent if it does not exist yet, initialize jnode, capture page */
781 int find_or_create_extent(struct page
*page
)
789 assert("vs-1065", page
->mapping
&& page
->mapping
->host
);
790 inode
= page
->mapping
->host
;
793 node
= jnode_of_page(page
);
796 return PTR_ERR(node
);
798 JF_SET(node
, JNODE_WRITE_PREPARED
);
801 if (node
->blocknr
== 0) {
803 result
= reiser4_update_extent(inode
, node
, page_offset(page
),
806 JF_CLR(node
, JNODE_WRITE_PREPARED
);
808 warning("", "reiser4_update_extent failed: %d", result
);
812 reiser4_update_sd(inode
);
814 spin_lock_jnode(node
);
815 result
= reiser4_try_capture(node
, ZNODE_WRITE_LOCK
, 0);
817 jnode_make_dirty_locked(node
);
818 spin_unlock_jnode(node
);
821 BUG_ON(node
->atom
== NULL
);
822 JF_CLR(node
, JNODE_WRITE_PREPARED
);
825 if (get_current_context()->entd
) {
826 entd_context
*ent
= get_entd_context(node
->tree
->super
);
828 if (ent
->cur_request
->page
== page
)
829 ent
->cur_request
->node
= node
;
835 * has_anonymous_pages - check whether inode has pages dirtied via mmap
836 * @inode: inode to check
838 * Returns true if inode's mapping has dirty pages which do not belong to any
839 * atom. Those are either tagged PAGECACHE_TAG_REISER4_MOVED in mapping's page
840 * tree or were eflushed and can be found via jnodes tagged
841 * EFLUSH_TAG_ANONYMOUS in radix tree of jnodes.
843 static int has_anonymous_pages(struct inode
*inode
)
847 read_lock_irq(&inode
->i_mapping
->tree_lock
);
848 result
= radix_tree_tagged(&inode
->i_mapping
->page_tree
, PAGECACHE_TAG_REISER4_MOVED
);
849 read_unlock_irq(&inode
->i_mapping
->tree_lock
);
854 * capture_page_and_create_extent -
855 * @page: page to be captured
857 * Grabs space for extent creation and stat data update and calls function to
860 static int capture_page_and_create_extent(struct page
*page
)
865 assert("vs-1084", page
->mapping
&& page
->mapping
->host
);
866 inode
= page
->mapping
->host
;
868 unix_file_inode_data(inode
)->container
== UF_CONTAINER_EXTENTS
);
869 /* page belongs to file */
871 inode
->i_size
> page_offset(page
));
873 /* page capture may require extent creation (if it does not exist yet)
874 and stat data's update (number of blocks changes on extent
877 result
= reiser4_grab_space(2 * estimate_one_insert_into_item
878 (reiser4_tree_by_inode(inode
)),
881 result
= find_or_create_extent(page
);
888 /* this is implementation of method commit_write of struct
889 address_space_operations for unix file plugin */
891 commit_write_unix_file(struct file
*file
, struct page
*page
,
892 unsigned from
, unsigned to
)
894 reiser4_context
*ctx
;
898 assert("umka-3101", file
!= NULL
);
899 assert("umka-3102", page
!= NULL
);
900 assert("umka-3093", PageLocked(page
));
902 SetPageUptodate(page
);
904 inode
= page
->mapping
->host
;
905 ctx
= reiser4_init_context(page
->mapping
->host
->i_sb
);
908 page_cache_get(page
);
910 result
= capture_page_and_create_extent(page
);
912 page_cache_release(page
);
914 /* don't commit transaction under inode semaphore */
915 context_set_commit_async(ctx
);
916 reiser4_exit_context(ctx
);
921 * Support for "anonymous" pages and jnodes.
923 * When file is write-accessed through mmap pages can be dirtied from the user
924 * level. In this case kernel is not notified until one of following happens:
928 * (2) truncate() (either explicit or through unlink)
930 * (3) VM scanner starts reclaiming mapped pages, dirtying them before
931 * starting write-back.
933 * As a result of (3) ->writepage may be called on a dirty page without
934 * jnode. Such page is called "anonymous" in reiser4. Certain work-loads
935 * (iozone) generate huge number of anonymous pages. Emergency flush handles
936 * this situation by creating jnode for anonymous page, starting IO on the
937 * page, and marking jnode with JNODE_KEEPME bit so that it's not thrown out of
938 * memory. Such jnode is also called anonymous.
940 * reiser4_sync_sb() method tries to insert anonymous pages and jnodes into
941 * tree. This is done by capture_anonymous_*() functions below.
945 * capture_anonymous_page - involve page into transaction
946 * @pg: page to deal with
948 * Takes care that @page has corresponding metadata in the tree, creates jnode
949 * for @page and captures it. On success 1 is returned.
951 static int capture_anonymous_page(struct page
*page
)
955 if (PageWriteback(page
))
956 /* FIXME: do nothing? */
959 result
= capture_page_and_create_extent(page
);
963 warning("nikita-3329",
964 "Cannot capture anon page: %i", result
);
970 * capture_anonymous_pages - find and capture pages dirtied via mmap
971 * @mapping: address space where to look for pages
972 * @index: start index
973 * @to_capture: maximum number of pages to capture
975 * Looks for pages tagged REISER4_MOVED starting from the *@index-th page,
976 * captures (involves into atom) them, returns number of captured pages,
977 * updates @index to next page after the last captured one.
980 capture_anonymous_pages(struct address_space
*mapping
, pgoff_t
*index
,
981 unsigned int to_capture
)
985 unsigned int i
, count
;
988 pagevec_init(&pvec
, 0);
989 count
= min(pagevec_space(&pvec
), to_capture
);
992 /* find pages tagged MOVED */
993 write_lock_irq(&mapping
->tree_lock
);
994 pvec
.nr
= radix_tree_gang_lookup_tag(&mapping
->page_tree
,
995 (void **)pvec
.pages
, *index
, count
,
996 PAGECACHE_TAG_REISER4_MOVED
);
997 if (pagevec_count(&pvec
) == 0) {
999 * there are no pages tagged MOVED in mapping->page_tree
1000 * starting from *index
1002 write_unlock_irq(&mapping
->tree_lock
);
1003 *index
= (pgoff_t
)-1;
1007 /* clear MOVED tag for all found pages */
1008 for (i
= 0; i
< pagevec_count(&pvec
); i
++) {
1011 page_cache_get(pvec
.pages
[i
]);
1012 p
= radix_tree_tag_clear(&mapping
->page_tree
, pvec
.pages
[i
]->index
,
1013 PAGECACHE_TAG_REISER4_MOVED
);
1014 assert("vs-49", p
== pvec
.pages
[i
]);
1016 write_unlock_irq(&mapping
->tree_lock
);
1019 *index
= pvec
.pages
[i
- 1]->index
+ 1;
1021 for (i
= 0; i
< pagevec_count(&pvec
); i
++) {
1023 * tag PAGECACHE_TAG_REISER4_MOVED will be cleared by
1024 * reiser4_set_page_dirty_internal which is called when jnode is
1027 result
= capture_anonymous_page(pvec
.pages
[i
]);
1033 "failed to capture page: "
1034 "result=%d, captured=%d)\n",
1038 * set MOVED tag to all pages which left not
1041 write_lock_irq(&mapping
->tree_lock
);
1042 for (; i
< pagevec_count(&pvec
); i
++) {
1043 radix_tree_tag_set(&mapping
->page_tree
,
1044 pvec
.pages
[i
]->index
,
1045 PAGECACHE_TAG_REISER4_MOVED
);
1047 write_unlock_irq(&mapping
->tree_lock
);
1049 pagevec_release(&pvec
);
1053 * result == 0. capture_anonymous_page returns
1054 * 0 for Writeback-ed page. Set MOVED tag on
1057 write_lock_irq(&mapping
->tree_lock
);
1058 radix_tree_tag_set(&mapping
->page_tree
,
1059 pvec
.pages
[i
]->index
,
1060 PAGECACHE_TAG_REISER4_MOVED
);
1061 write_unlock_irq(&mapping
->tree_lock
);
1063 *index
= pvec
.pages
[0]->index
;
1065 *index
= pvec
.pages
[i
- 1]->index
+ 1;
1069 pagevec_release(&pvec
);
1074 * capture_anonymous_jnodes - find and capture anonymous jnodes
1075 * @mapping: address space where to look for jnodes
1076 * @from: start index
1078 * @to_capture: maximum number of jnodes to capture
1080 * Looks for jnodes tagged EFLUSH_TAG_ANONYMOUS in inode's tree of jnodes in
1081 * the range of indexes @from-@to and captures them, returns number of captured
1082 * jnodes, updates @from to next jnode after the last captured one.
1085 capture_anonymous_jnodes(struct address_space
*mapping
,
1086 pgoff_t
*from
, pgoff_t to
, int to_capture
)
1093 * Commit atom of the jnode of a page.
1095 static int sync_page(struct page
*page
)
1103 node
= jprivate(page
);
1105 spin_lock_jnode(node
);
1106 atom
= jnode_get_atom(node
);
1107 spin_unlock_jnode(node
);
1111 result
= reiser4_sync_atom(atom
);
1112 } while (result
== -E_REPEAT
);
1114 * ZAM-FIXME-HANS: document the logic of this loop, is it just to
1115 * handle the case where more pages get added to the atom while we are
1118 assert("nikita-3485", ergo(result
== 0,
1119 get_current_context()->trans
->atom
== NULL
));
1124 * Commit atoms of pages on @pages list.
1125 * call sync_page for each page from mapping's page tree
1127 static int sync_page_list(struct inode
*inode
)
1130 struct address_space
*mapping
;
1131 unsigned long from
; /* start index for radix_tree_gang_lookup */
1132 unsigned int found
; /* return value for radix_tree_gang_lookup */
1134 mapping
= inode
->i_mapping
;
1137 read_lock_irq(&mapping
->tree_lock
);
1138 while (result
== 0) {
1142 radix_tree_gang_lookup(&mapping
->page_tree
, (void **)&page
,
1144 assert("", found
< 2);
1148 /* page may not leave radix tree because it is protected from truncating by inode->i_mutex locked by
1150 page_cache_get(page
);
1151 read_unlock_irq(&mapping
->tree_lock
);
1153 from
= page
->index
+ 1;
1155 result
= sync_page(page
);
1157 page_cache_release(page
);
1158 read_lock_irq(&mapping
->tree_lock
);
1161 read_unlock_irq(&mapping
->tree_lock
);
1165 static int commit_file_atoms(struct inode
*inode
)
1168 struct unix_file_info
*uf_info
;
1170 uf_info
= unix_file_inode_data(inode
);
1172 get_exclusive_access(uf_info
);
1174 * find what items file is made from
1176 result
= find_file_state(inode
, uf_info
);
1177 drop_exclusive_access(uf_info
);
1182 * file state cannot change because we are under ->i_mutex
1184 switch (uf_info
->container
) {
1185 case UF_CONTAINER_EXTENTS
:
1186 /* find_file_state might open join an atom */
1187 reiser4_txn_restart_current();
1190 * when we are called by
1191 * filemap_fdatawrite->
1193 * reiser4_writepages()
1195 * inode->i_mapping->dirty_pages are spices into
1196 * ->io_pages, leaving ->dirty_pages dirty.
1198 * When we are called from
1199 * reiser4_fsync()->sync_unix_file(), we have to
1200 * commit atoms of all pages on the ->dirty_list.
1202 * So for simplicity we just commit ->io_pages and
1205 sync_page_list(inode
);
1207 case UF_CONTAINER_TAILS
:
1209 * NOTE-NIKITA probably we can be smarter for tails. For now
1210 * just commit all existing atoms.
1212 result
= txnmgr_force_commit_all(inode
->i_sb
, 0);
1214 case UF_CONTAINER_EMPTY
:
1217 case UF_CONTAINER_UNKNOWN
:
1224 * commit current transaction: there can be captured nodes from
1225 * find_file_state() and finish_conversion().
1227 reiser4_txn_restart_current();
1232 * writepages_unix_file - writepages of struct address_space_operations
1236 * This captures anonymous pages and anonymous jnodes. Anonymous pages are
1237 * pages which are dirtied via mmapping. Anonymous jnodes are ones which were
1238 * created by reiser4_writepage.
1240 int writepages_unix_file(struct address_space
*mapping
,
1241 struct writeback_control
*wbc
)
1244 struct unix_file_info
*uf_info
;
1245 pgoff_t pindex
, jindex
, nr_pages
;
1247 struct inode
*inode
;
1249 inode
= mapping
->host
;
1250 if (!has_anonymous_pages(inode
)) {
1254 jindex
= pindex
= wbc
->range_start
>> PAGE_CACHE_SHIFT
;
1256 nr_pages
= size_in_pages(i_size_read(inode
));
1258 uf_info
= unix_file_inode_data(inode
);
1261 reiser4_context
*ctx
;
1263 if (wbc
->sync_mode
!= WB_SYNC_ALL
)
1264 to_capture
= min(wbc
->nr_to_write
, CAPTURE_APAGE_BURST
);
1266 to_capture
= CAPTURE_APAGE_BURST
;
1268 ctx
= reiser4_init_context(inode
->i_sb
);
1270 result
= PTR_ERR(ctx
);
1273 /* avoid recursive calls to ->sync_inodes */
1275 assert("zam-760", lock_stack_isclean(get_current_lock_stack()));
1276 assert("", LOCK_CNT_NIL(inode_sem_w
));
1277 assert("", LOCK_CNT_NIL(inode_sem_r
));
1279 reiser4_txn_restart_current();
1281 /* we have to get nonexclusive access to the file */
1282 if (get_current_context()->entd
) {
1284 * use nonblocking version of nonexclusive_access to
1285 * avoid deadlock which might look like the following:
1286 * process P1 holds NEA on file F1 and called entd to
1287 * reclaim some memory. Entd works for P1 and is going
1288 * to capture pages of file F2. To do that entd has to
1289 * get NEA to F2. F2 is held by process P2 which also
1290 * called entd. But entd is serving P1 at the moment
1291 * and P2 has to wait. Process P3 trying to get EA to
1292 * file F2. Existence of pending EA request to file F2
1293 * makes impossible for entd to get NEA to file
1294 * F2. Neither of these process can continue. Using
1295 * nonblocking version of gettign NEA is supposed to
1296 * avoid this deadlock.
1298 if (try_to_get_nonexclusive_access(uf_info
) == 0) {
1299 result
= RETERR(-EBUSY
);
1300 reiser4_exit_context(ctx
);
1304 get_nonexclusive_access(uf_info
);
1306 while (to_capture
> 0) {
1309 assert("vs-1727", jindex
<= pindex
);
1310 if (pindex
== jindex
) {
1313 capture_anonymous_pages(inode
->i_mapping
,
1318 to_capture
-= result
;
1319 wbc
->nr_to_write
-= result
;
1320 if (start
+ result
== pindex
) {
1324 if (to_capture
<= 0)
1327 /* deal with anonymous jnodes between jindex and pindex */
1329 capture_anonymous_jnodes(inode
->i_mapping
, &jindex
,
1330 pindex
, to_capture
);
1333 to_capture
-= result
;
1334 get_current_context()->nr_captured
+= result
;
1336 if (jindex
== (pgoff_t
) - 1) {
1337 assert("vs-1728", pindex
== (pgoff_t
) - 1);
1341 if (to_capture
<= 0)
1342 /* there may be left more pages */
1343 __mark_inode_dirty(inode
, I_DIRTY_PAGES
);
1345 drop_nonexclusive_access(uf_info
);
1347 /* error happened */
1348 reiser4_exit_context(ctx
);
1351 if (wbc
->sync_mode
!= WB_SYNC_ALL
) {
1352 reiser4_exit_context(ctx
);
1355 result
= commit_file_atoms(inode
);
1356 reiser4_exit_context(ctx
);
1357 if (pindex
>= nr_pages
&& jindex
== pindex
)
1362 if (is_in_reiser4_context()) {
1363 if (get_current_context()->nr_captured
>= CAPTURE_APAGE_BURST
) {
1365 * there are already pages to flush, flush them out, do
1366 * not delay until end of reiser4_sync_inodes
1368 reiser4_writeout(inode
->i_sb
, wbc
);
1369 get_current_context()->nr_captured
= 0;
1376 * ->sync() method for unix file.
1378 * We are trying to be smart here. Instead of committing all atoms (original
1379 * solution), we scan dirty pages of this file and commit all atoms they are
1382 * Situation is complicated by anonymous pages: i.e., extent-less pages
1383 * dirtied through mmap. Fortunately sys_fsync() first calls
1384 * filemap_fdatawrite() that will ultimately call reiser4_writepages(), insert
1385 * all missing extents and capture anonymous pages.
1387 int sync_unix_file(struct file
*file
, struct dentry
*dentry
, int datasync
)
1389 reiser4_context
*ctx
;
1391 reiser4_block_nr reserve
;
1393 ctx
= reiser4_init_context(dentry
->d_inode
->i_sb
);
1395 return PTR_ERR(ctx
);
1397 reserve
= estimate_update_common(dentry
->d_inode
);
1398 if (reiser4_grab_space(reserve
, BA_CAN_COMMIT
)) {
1399 reiser4_exit_context(ctx
);
1400 return RETERR(-ENOSPC
);
1402 write_sd_by_inode_common(dentry
->d_inode
);
1404 atom
= get_current_atom_locked();
1405 spin_lock_txnh(ctx
->trans
);
1406 force_commit_atom(ctx
->trans
);
1407 reiser4_exit_context(ctx
);
1412 * readpage_unix_file_nolock - readpage of struct address_space_operations
1416 * Compose a key and search for item containing information about @page
1417 * data. If item is found - its readpage method is called.
1419 int readpage_unix_file(struct file
*file
, struct page
*page
)
1421 reiser4_context
*ctx
;
1423 struct inode
*inode
;
1430 assert("vs-1062", PageLocked(page
));
1431 assert("vs-976", !PageUptodate(page
));
1432 assert("vs-1061", page
->mapping
&& page
->mapping
->host
);
1434 if (page
->mapping
->host
->i_size
<= page_offset(page
)) {
1435 /* page is out of file */
1436 zero_user(page
, 0, PAGE_CACHE_SIZE
);
1437 SetPageUptodate(page
);
1442 inode
= page
->mapping
->host
;
1443 ctx
= reiser4_init_context(inode
->i_sb
);
1446 return PTR_ERR(ctx
);
1449 hint
= kmalloc(sizeof(*hint
), reiser4_ctx_gfp_mask_get());
1452 reiser4_exit_context(ctx
);
1453 return RETERR(-ENOMEM
);
1456 result
= load_file_hint(file
, hint
);
1460 reiser4_exit_context(ctx
);
1465 /* get key of first byte of the page */
1466 key_by_inode_and_offset_common(inode
, page_offset(page
), &key
);
1468 /* look for file metadata corresponding to first byte of page */
1469 page_cache_get(page
);
1471 result
= find_file_item(hint
, &key
, ZNODE_READ_LOCK
, inode
);
1473 page_cache_release(page
);
1475 if (page
->mapping
== NULL
) {
1477 * readpage allows truncate to run concurrently. Page was
1478 * truncated while it was not locked
1483 reiser4_txn_restart(ctx
);
1484 reiser4_exit_context(ctx
);
1488 if (result
!= CBK_COORD_FOUND
|| hint
->ext_coord
.coord
.between
!= AT_UNIT
) {
1489 if (result
== CBK_COORD_FOUND
&&
1490 hint
->ext_coord
.coord
.between
!= AT_UNIT
)
1491 /* file is truncated */
1496 reiser4_txn_restart(ctx
);
1497 reiser4_exit_context(ctx
);
1502 * item corresponding to page is found. It can not be removed because
1503 * znode lock is held
1505 if (PageUptodate(page
)) {
1509 reiser4_txn_restart(ctx
);
1510 reiser4_exit_context(ctx
);
1514 coord
= &hint
->ext_coord
.coord
;
1515 result
= zload(coord
->node
);
1520 reiser4_txn_restart(ctx
);
1521 reiser4_exit_context(ctx
);
1525 validate_extended_coord(&hint
->ext_coord
, page_offset(page
));
1527 if (!coord_is_existing_unit(coord
)) {
1528 /* this indicates corruption */
1530 "Looking for page %lu of file %llu (size %lli). "
1531 "No file items found (%d). File is corrupted?\n",
1532 page
->index
, (unsigned long long)get_inode_oid(inode
),
1533 inode
->i_size
, result
);
1534 zrelse(coord
->node
);
1538 reiser4_txn_restart(ctx
);
1539 reiser4_exit_context(ctx
);
1540 return RETERR(-EIO
);
1544 * get plugin of found item or use plugin if extent if there are no
1547 iplug
= item_plugin_by_coord(coord
);
1548 if (iplug
->s
.file
.readpage
)
1549 result
= iplug
->s
.file
.readpage(coord
, page
);
1551 result
= RETERR(-EINVAL
);
1554 set_key_offset(&key
,
1555 (loff_t
) (page
->index
+ 1) << PAGE_CACHE_SHIFT
);
1556 /* FIXME should call reiser4_set_hint() */
1557 reiser4_unset_hint(hint
);
1560 reiser4_unset_hint(hint
);
1563 ergo(result
== 0, (PageLocked(page
) || PageUptodate(page
))));
1564 assert("vs-9791", ergo(result
!= 0, !PageLocked(page
)));
1566 zrelse(coord
->node
);
1569 save_file_hint(file
, hint
);
1573 * FIXME: explain why it is needed. HINT: page allocation in write can
1574 * not be done when atom is not NULL because reiser4_writepage can not
1575 * kick entd and have to eflush
1577 reiser4_txn_restart(ctx
);
1578 reiser4_exit_context(ctx
);
1582 struct uf_readpages_context
{
1587 /* A callback function for readpages_unix_file/read_cache_pages.
1588 * If the file is build of tails, then return error (-ENOENT).
1590 * @data -- a pointer to reiser4_readpages_context object,
1591 * to save the twig lock and the coord between
1592 * read_cache_page iterations.
1593 * @page -- page to start read.
1595 static int uf_readpages_filler(void * data
, struct page
* page
)
1597 struct uf_readpages_context
*rc
= data
;
1600 reiser4_extent
*ext
;
1603 struct address_space
* mapping
= page
->mapping
;
1605 if (PageUptodate(page
)) {
1609 page_cache_get(page
);
1611 if (rc
->lh
.node
== 0) {
1612 /* no twig lock - have to do tree search. */
1616 key_by_inode_and_offset_common(
1617 mapping
->host
, page_offset(page
), &key
);
1619 &get_super_private(mapping
->host
->i_sb
)->tree
,
1620 &key
, &rc
->coord
, &rc
->lh
,
1621 ZNODE_READ_LOCK
, FIND_EXACT
,
1622 TWIG_LEVEL
, TWIG_LEVEL
, CBK_UNIQUE
, NULL
);
1626 if (PageUptodate(page
))
1630 ret
= zload(rc
->coord
.node
);
1633 if (!coord_is_existing_item(&rc
->coord
) ||
1634 !item_is_extent(&rc
->coord
)) {
1635 zrelse(rc
->coord
.node
);
1639 ext
= extent_by_coord(&rc
->coord
);
1640 ext_index
= extent_unit_index(&rc
->coord
);
1641 if (page
->index
< ext_index
||
1642 page
->index
>= ext_index
+ extent_get_width(ext
)) {
1643 /* the page index doesn't belong to the extent unit
1644 which the coord points to - release the lock and
1645 repeat with tree search. */
1646 zrelse(rc
->coord
.node
);
1648 /* we can be here after a CBK call only in case of
1649 corruption of the tree or the tree lookup algorithm bug. */
1650 if (unlikely(cbk_done
)) {
1656 node
= jnode_of_page(page
);
1657 if (unlikely(IS_ERR(node
))) {
1658 zrelse(rc
->coord
.node
);
1659 ret
= PTR_ERR(node
);
1662 ret
= reiser4_do_readpage_extent(ext
, page
->index
- ext_index
, page
);
1664 zrelse(rc
->coord
.node
);
1670 page_cache_release(page
);
1675 * readpages_unix_file - called by the readahead code, starts reading for each
1676 * page of given list of pages
1678 int readpages_unix_file(
1679 struct file
*file
, struct address_space
*mapping
,
1680 struct list_head
*pages
, unsigned nr_pages
)
1682 reiser4_context
*ctx
;
1683 struct uf_readpages_context rc
;
1686 ctx
= reiser4_init_context(mapping
->host
->i_sb
);
1688 put_pages_list(pages
);
1689 return PTR_ERR(ctx
);
1692 ret
= read_cache_pages(mapping
, pages
, uf_readpages_filler
, &rc
);
1694 context_set_commit_async(ctx
);
1695 /* close the transaction to protect further page allocation from deadlocks */
1696 reiser4_txn_restart(ctx
);
1697 reiser4_exit_context(ctx
);
1701 static reiser4_block_nr
unix_file_estimate_read(struct inode
*inode
,
1702 loff_t count UNUSED_ARG
)
1704 /* We should reserve one block, because of updating of the stat data
1707 inode_file_plugin(inode
)->estimate
.update
==
1708 estimate_update_common
);
1709 return estimate_update_common(inode
);
1712 /* this is called with nonexclusive access obtained, file's container can not change */
1713 static ssize_t
read_file(hint_t
*hint
, struct file
*file
, /* file to read from to */
1714 char __user
*buf
, /* address of user-space buffer */
1715 size_t count
, /* number of bytes to read */
1719 struct inode
*inode
;
1721 int (*read_f
) (struct file
*, flow_t
*, hint_t
*);
1725 inode
= file
->f_dentry
->d_inode
;
1729 inode_file_plugin(inode
)->flow_by_inode
==
1730 flow_by_inode_unix_file
);
1732 flow_by_inode_unix_file(inode
, buf
, 1 /* user space */ , count
,
1733 *off
, READ_OP
, &flow
);
1734 if (unlikely(result
))
1737 /* get seal and coord sealed with it from reiser4 private data
1738 of struct file. The coord will tell us where our last read
1739 of this file finished, and the seal will help to determine
1740 if that location is still valid.
1742 coord
= &hint
->ext_coord
.coord
;
1743 while (flow
.length
&& result
== 0) {
1745 find_file_item(hint
, &flow
.key
, ZNODE_READ_LOCK
, inode
);
1746 if (cbk_errored(result
))
1747 /* error happened */
1750 if (coord
->between
!= AT_UNIT
) {
1751 /* there were no items corresponding to given offset */
1752 done_lh(hint
->ext_coord
.lh
);
1756 loaded
= coord
->node
;
1757 result
= zload(loaded
);
1758 if (unlikely(result
)) {
1759 done_lh(hint
->ext_coord
.lh
);
1763 if (hint
->ext_coord
.valid
== 0)
1764 validate_extended_coord(&hint
->ext_coord
,
1765 get_key_offset(&flow
.key
));
1767 assert("vs-4", hint
->ext_coord
.valid
== 1);
1768 assert("vs-33", hint
->ext_coord
.lh
== &hint
->lh
);
1769 /* call item's read method */
1770 read_f
= item_plugin_by_coord(coord
)->s
.file
.read
;
1771 result
= read_f(file
, &flow
, hint
);
1773 done_lh(hint
->ext_coord
.lh
);
1776 return (count
- flow
.length
) ? (count
- flow
.length
) : result
;
1779 static ssize_t
read_unix_file_container_tails(struct file
*, char __user
*, size_t, loff_t
*);
1782 * read_unix_file - read of struct file_operations
1783 * @file: file to read from
1784 * @buf: address of user-space buffer
1785 * @read_amount: number of bytes to read
1786 * @off: position in file to read from
1788 * This is implementation of vfs's read method of struct file_operations for
1791 ssize_t
read_unix_file(struct file
*file
, char __user
*buf
, size_t read_amount
,
1794 reiser4_context
*ctx
;
1796 struct inode
*inode
;
1797 struct unix_file_info
*uf_info
;
1799 if (unlikely(read_amount
== 0))
1802 assert("umka-072", file
!= NULL
);
1803 assert("umka-074", off
!= NULL
);
1804 inode
= file
->f_dentry
->d_inode
;
1805 assert("vs-972", !reiser4_inode_get_flag(inode
, REISER4_NO_SD
));
1807 ctx
= reiser4_init_context(inode
->i_sb
);
1809 return PTR_ERR(ctx
);
1810 uf_info
= unix_file_inode_data(inode
);
1811 if (uf_info
->container
== UF_CONTAINER_UNKNOWN
) {
1812 get_exclusive_access(uf_info
);
1813 result
= find_file_state(inode
, uf_info
);
1814 if (unlikely(result
!= 0))
1817 get_nonexclusive_access(uf_info
);
1818 result
= reiser4_grab_space_force(unix_file_estimate_read(inode
, read_amount
),
1820 if (unlikely(result
!= 0))
1822 if (uf_info
->container
== UF_CONTAINER_EXTENTS
){
1823 result
= do_sync_read(file
, buf
, read_amount
, off
);
1824 } else if (uf_info
->container
== UF_CONTAINER_TAILS
||
1825 reiser4_inode_get_flag(inode
, REISER4_PART_IN_CONV
) ||
1826 reiser4_inode_get_flag(inode
, REISER4_PART_MIXED
)) {
1827 result
= read_unix_file_container_tails(file
, buf
, read_amount
, off
);
1829 assert("zam-1085", uf_info
->container
== UF_CONTAINER_EMPTY
);
1833 drop_access(uf_info
);
1834 context_set_commit_async(ctx
);
1835 reiser4_exit_context(ctx
);
1839 static ssize_t
read_unix_file_container_tails(
1840 struct file
*file
, char __user
*buf
, size_t read_amount
, loff_t
*off
)
1843 struct inode
*inode
;
1845 struct unix_file_info
*uf_info
;
1846 size_t count
, read
, left
;
1849 assert("umka-072", file
!= NULL
);
1850 assert("umka-074", off
!= NULL
);
1851 inode
= file
->f_dentry
->d_inode
;
1852 assert("vs-972", !reiser4_inode_get_flag(inode
, REISER4_NO_SD
));
1854 hint
= kmalloc(sizeof(*hint
), reiser4_ctx_gfp_mask_get());
1856 return RETERR(-ENOMEM
);
1858 result
= load_file_hint(file
, hint
);
1866 uf_info
= unix_file_inode_data(inode
);
1868 reiser4_txn_restart_current();
1869 size
= i_size_read(inode
);
1871 /* position to read from is past the end of file */
1873 if (*off
+ left
> size
)
1875 /* faultin user page */
1876 result
= fault_in_pages_writeable(buf
, left
> PAGE_CACHE_SIZE
? PAGE_CACHE_SIZE
: left
);
1878 return RETERR(-EFAULT
);
1880 read
= read_file(hint
, file
, buf
,
1881 left
> PAGE_CACHE_SIZE
? PAGE_CACHE_SIZE
: left
,
1890 /* update position in a file */
1892 /* total number of read bytes */
1896 save_file_hint(file
, hint
);
1899 file_accessed(file
);
1900 /* return number of read bytes or error code if nothing is read */
1901 return count
? count
: result
;
1904 /* This function takes care about @file's pages. First of all it checks if
1905 filesystems readonly and if so gets out. Otherwise, it throws out all
1906 pages of file if it was mapped for read and going to be mapped for write
1907 and consists of tails. This is done in order to not manage few copies
1908 of the data (first in page cache and second one in tails them selves)
1909 for the case of mapping files consisting tails.
1911 Here also tail2extent conversion is performed if it is allowed and file
1912 is going to be written or mapped for write. This functions may be called
1913 from write_unix_file() or mmap_unix_file(). */
1914 static int check_pages_unix_file(struct file
*file
, struct inode
*inode
)
1916 reiser4_invalidate_pages(inode
->i_mapping
, 0,
1917 (inode
->i_size
+ PAGE_CACHE_SIZE
-
1918 1) >> PAGE_CACHE_SHIFT
, 0);
1919 return unpack(file
, inode
, 0 /* not forever */ );
1923 * mmap_unix_file - mmap of struct file_operations
1924 * @file: file to mmap
1927 * This is implementation of vfs's mmap method of struct file_operations for
1928 * unix file plugin. It converts file to extent if necessary. Sets
1929 * reiser4_inode's flag - REISER4_HAS_MMAP.
1931 int mmap_unix_file(struct file
*file
, struct vm_area_struct
*vma
)
1933 reiser4_context
*ctx
;
1935 struct inode
*inode
;
1936 struct unix_file_info
*uf_info
;
1937 reiser4_block_nr needed
;
1939 inode
= file
->f_dentry
->d_inode
;
1940 ctx
= reiser4_init_context(inode
->i_sb
);
1942 return PTR_ERR(ctx
);
1944 uf_info
= unix_file_inode_data(inode
);
1946 get_exclusive_access_careful(uf_info
, inode
);
1948 if (!IS_RDONLY(inode
) && (vma
->vm_flags
& (VM_MAYWRITE
| VM_SHARED
))) {
1950 * we need file built of extent items. If it is still built of
1951 * tail items we have to convert it. Find what items the file
1954 result
= find_file_state(inode
, uf_info
);
1956 drop_exclusive_access(uf_info
);
1957 reiser4_exit_context(ctx
);
1961 assert("vs-1648", (uf_info
->container
== UF_CONTAINER_TAILS
||
1962 uf_info
->container
== UF_CONTAINER_EXTENTS
||
1963 uf_info
->container
== UF_CONTAINER_EMPTY
));
1964 if (uf_info
->container
== UF_CONTAINER_TAILS
) {
1966 * invalidate all pages and convert file from tails to
1969 result
= check_pages_unix_file(file
, inode
);
1971 drop_exclusive_access(uf_info
);
1972 reiser4_exit_context(ctx
);
1979 * generic_file_mmap will do update_atime. Grab space for stat data
1982 needed
= inode_file_plugin(inode
)->estimate
.update(inode
);
1983 result
= reiser4_grab_space_force(needed
, BA_CAN_COMMIT
);
1985 drop_exclusive_access(uf_info
);
1986 reiser4_exit_context(ctx
);
1990 result
= generic_file_mmap(file
, vma
);
1992 /* mark file as having mapping. */
1993 reiser4_inode_set_flag(inode
, REISER4_HAS_MMAP
);
1996 drop_exclusive_access(uf_info
);
1997 reiser4_exit_context(ctx
);
2005 * Finds file item which is responsible for first byte in the file.
2007 static int find_first_item(struct inode
*inode
)
2014 coord_init_zero(&coord
);
2016 inode_file_plugin(inode
)->key_by_inode(inode
, 0, &key
);
2017 result
= find_file_item_nohint(&coord
, &lh
, &key
, ZNODE_READ_LOCK
,
2019 if (result
== CBK_COORD_FOUND
) {
2020 if (coord
.between
== AT_UNIT
) {
2021 result
= zload(coord
.node
);
2023 result
= item_id_by_coord(&coord
);
2025 if (result
!= EXTENT_POINTER_ID
&&
2026 result
!= FORMATTING_ID
)
2027 result
= RETERR(-EIO
);
2030 result
= RETERR(-EIO
);
2041 * If filesystem is not readonly - complete uncompleted tail conversion if
2044 int open_unix_file(struct inode
*inode
, struct file
*file
)
2047 reiser4_context
*ctx
;
2048 struct unix_file_info
*uf_info
;
2050 if (IS_RDONLY(inode
))
2053 if (!reiser4_inode_get_flag(inode
, REISER4_PART_MIXED
))
2056 ctx
= reiser4_init_context(inode
->i_sb
);
2058 return PTR_ERR(ctx
);
2060 uf_info
= unix_file_inode_data(inode
);
2062 get_exclusive_access_careful(uf_info
, inode
);
2064 if (!reiser4_inode_get_flag(inode
, REISER4_PART_MIXED
)) {
2066 * other process completed the conversion
2068 drop_exclusive_access(uf_info
);
2069 reiser4_exit_context(ctx
);
2074 * file left in semi converted state after unclean shutdown or another
2075 * thread is doing conversion and dropped exclusive access which doing
2076 * balance dirty pages. Complete the conversion
2078 result
= find_first_item(inode
);
2079 if (result
== EXTENT_POINTER_ID
)
2081 * first item is extent, therefore there was incomplete
2082 * tail2extent conversion. Complete it
2084 result
= tail2extent(unix_file_inode_data(inode
));
2085 else if (result
== FORMATTING_ID
)
2087 * first item is formatting item, therefore there was
2088 * incomplete extent2tail conversion. Complete it
2090 result
= extent2tail(file
, unix_file_inode_data(inode
));
2096 (!reiser4_inode_get_flag(inode
, REISER4_PART_MIXED
) &&
2097 !reiser4_inode_get_flag(inode
, REISER4_PART_IN_CONV
))));
2098 drop_exclusive_access(uf_info
);
2099 reiser4_exit_context(ctx
);
2103 #define NEITHER_OBTAINED 0
2104 #define EA_OBTAINED 1
2105 #define NEA_OBTAINED 2
2107 static void drop_access(struct unix_file_info
*uf_info
)
2109 if (uf_info
->exclusive_use
)
2110 drop_exclusive_access(uf_info
);
2112 drop_nonexclusive_access(uf_info
);
2115 #define debug_wuf(format, ...) printk("%s: %d: %s: " format "\n", \
2116 __FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
2119 * write_unix_file - write of struct file_operations
2120 * @file: file to write to
2121 * @buf: address of user-space buffer
2122 * @write_amount: number of bytes to write
2123 * @off: position in file to write to
2125 * This is implementation of vfs's write method of struct file_operations for
2128 ssize_t
write_unix_file(struct file
*file
, const char __user
*buf
,
2129 size_t count
, loff_t
*pos
)
2132 reiser4_context
*ctx
;
2133 struct inode
*inode
;
2134 struct unix_file_info
*uf_info
;
2137 int to_write
= PAGE_CACHE_SIZE
* WRITE_GRANULARITY
;
2139 ssize_t (*write_op
)(struct file
*, const char __user
*, size_t,
2144 inode
= file
->f_dentry
->d_inode
;
2145 ctx
= reiser4_init_context(inode
->i_sb
);
2147 return PTR_ERR(ctx
);
2149 mutex_lock(&inode
->i_mutex
);
2151 assert("vs-947", !reiser4_inode_get_flag(inode
, REISER4_NO_SD
));
2152 assert("vs-9471", (!reiser4_inode_get_flag(inode
, REISER4_PART_MIXED
)));
2154 /* check amount of bytes to write and writing position */
2155 result
= generic_write_checks(file
, pos
, &count
, 0);
2157 mutex_unlock(&inode
->i_mutex
);
2158 context_set_commit_async(ctx
);
2159 reiser4_exit_context(ctx
);
2163 result
= remove_suid(file
->f_dentry
);
2165 mutex_unlock(&inode
->i_mutex
);
2166 context_set_commit_async(ctx
);
2167 reiser4_exit_context(ctx
);
2170 /* remove_suid might create a transaction */
2171 reiser4_txn_restart(ctx
);
2173 uf_info
= unix_file_inode_data(inode
);
2175 current
->backing_dev_info
= inode
->i_mapping
->backing_dev_info
;
2179 ea
= NEITHER_OBTAINED
;
2181 new_size
= i_size_read(inode
);
2182 if (*pos
+ count
> new_size
)
2183 new_size
= *pos
+ count
;
2186 if (left
< to_write
)
2189 if (uf_info
->container
== UF_CONTAINER_EMPTY
) {
2190 get_exclusive_access(uf_info
);
2192 if (uf_info
->container
!= UF_CONTAINER_EMPTY
) {
2193 /* file is made not empty by another process */
2194 drop_exclusive_access(uf_info
);
2195 ea
= NEITHER_OBTAINED
;
2198 } else if (uf_info
->container
== UF_CONTAINER_UNKNOWN
) {
2200 * get exclusive access directly just to not have to
2201 * re-obtain it if file will appear empty
2203 get_exclusive_access(uf_info
);
2205 result
= find_file_state(inode
, uf_info
);
2207 drop_exclusive_access(uf_info
);
2208 ea
= NEITHER_OBTAINED
;
2212 get_nonexclusive_access(uf_info
);
2216 /* either EA or NEA is obtained. Choose item write method */
2217 if (uf_info
->container
== UF_CONTAINER_EXTENTS
) {
2218 /* file is built of extent items */
2219 write_op
= reiser4_write_extent
;
2220 } else if (uf_info
->container
== UF_CONTAINER_EMPTY
) {
2222 if (should_have_notail(uf_info
, new_size
))
2223 write_op
= reiser4_write_extent
;
2225 write_op
= reiser4_write_tail
;
2227 /* file is built of tail items */
2228 if (should_have_notail(uf_info
, new_size
)) {
2229 if (ea
== NEA_OBTAINED
) {
2230 drop_nonexclusive_access(uf_info
);
2231 get_exclusive_access(uf_info
);
2234 if (uf_info
->container
== UF_CONTAINER_TAILS
) {
2236 * if file is being convered by another
2237 * process - wait until it completes
2240 if (reiser4_inode_get_flag(inode
,
2241 REISER4_PART_IN_CONV
)) {
2242 drop_exclusive_access(uf_info
);
2244 get_exclusive_access(uf_info
);
2249 if (uf_info
->container
== UF_CONTAINER_TAILS
) {
2250 result
= tail2extent(uf_info
);
2255 drop_exclusive_access(uf_info
);
2256 ea
= NEITHER_OBTAINED
;
2259 write_op
= reiser4_write_tail
;
2262 written
= write_op(file
, buf
, to_write
, pos
);
2263 if (written
== -ENOSPC
&& try_free_space
) {
2264 drop_access(uf_info
);
2265 txnmgr_force_commit_all(inode
->i_sb
, 0);
2270 drop_access(uf_info
);
2274 /* something is written. */
2275 if (uf_info
->container
== UF_CONTAINER_EMPTY
) {
2276 assert("", ea
== EA_OBTAINED
);
2277 uf_info
->container
=
2278 (write_op
== reiser4_write_extent
) ?
2279 UF_CONTAINER_EXTENTS
: UF_CONTAINER_TAILS
;
2281 assert("", ergo(uf_info
->container
== UF_CONTAINER_EXTENTS
,
2282 write_op
== reiser4_write_extent
));
2283 assert("", ergo(uf_info
->container
== UF_CONTAINER_TAILS
,
2284 write_op
== reiser4_write_tail
));
2286 if (*pos
+ written
> inode
->i_size
)
2287 INODE_SET_FIELD(inode
, i_size
, *pos
+ written
);
2288 file_update_time(file
);
2289 result
= reiser4_update_sd(inode
);
2291 mutex_unlock(&inode
->i_mutex
);
2292 current
->backing_dev_info
= NULL
;
2293 drop_access(uf_info
);
2294 context_set_commit_async(ctx
);
2295 reiser4_exit_context(ctx
);
2298 drop_access(uf_info
);
2299 ea
= NEITHER_OBTAINED
;
2300 reiser4_txn_restart(ctx
);
2301 current
->journal_info
= NULL
;
2303 * tell VM how many pages were dirtied. Maybe number of pages
2304 * which were dirty already should not be counted
2306 balance_dirty_pages_ratelimited_nr(inode
->i_mapping
,
2307 (written
+ PAGE_CACHE_SIZE
- 1) / PAGE_CACHE_SIZE
);
2308 current
->journal_info
= ctx
;
2315 mutex_unlock(&inode
->i_mutex
);
2317 if (result
== 0 && ((file
->f_flags
& O_SYNC
) || IS_SYNC(inode
))) {
2318 reiser4_txn_restart_current();
2319 grab_space_enable();
2320 result
= sync_unix_file(file
, file
->f_dentry
,
2321 0 /* data and stat data */ );
2323 warning("reiser4-7", "failed to sync file %llu",
2324 (unsigned long long)get_inode_oid(inode
));
2327 current
->backing_dev_info
= NULL
;
2329 reiser4_exit_context(ctx
);
2332 * return number of written bytes or error code if nothing is
2333 * written. Note, that it does not work correctly in case when
2334 * sync_unix_file returns error
2336 return (count
- left
) ? (count
- left
) : result
;
2340 * release_unix_file - release of struct file_operations
2341 * @inode: inode of released file
2342 * @file: file to release
2344 * Implementation of release method of struct file_operations for unix file
2345 * plugin. If last reference to indode is released - convert all extent items
2346 * into tail items if necessary. Frees reiser4 specific file data.
2348 int release_unix_file(struct inode
*inode
, struct file
*file
)
2350 reiser4_context
*ctx
;
2351 struct unix_file_info
*uf_info
;
2355 in_reiser4
= is_in_reiser4_context();
2357 ctx
= reiser4_init_context(inode
->i_sb
);
2359 return PTR_ERR(ctx
);
2362 if (in_reiser4
== 0) {
2363 uf_info
= unix_file_inode_data(inode
);
2365 get_exclusive_access_careful(uf_info
, inode
);
2366 if (atomic_read(&file
->f_dentry
->d_count
) == 1 &&
2367 uf_info
->container
== UF_CONTAINER_EXTENTS
&&
2368 !should_have_notail(uf_info
, inode
->i_size
) &&
2369 !rofs_inode(inode
)) {
2370 result
= extent2tail(file
, uf_info
);
2372 warning("nikita-3233",
2373 "Failed (%d) to convert in %s (%llu)",
2374 result
, __FUNCTION__
,
2375 (unsigned long long)
2376 get_inode_oid(inode
));
2379 drop_exclusive_access(uf_info
);
2382 we are within reiser4 context already. How latter is
2386 #0 get_exclusive_access ()
2387 #2 0xc01e56d3 in release_unix_file ()
2388 #3 0xc01c3643 in reiser4_release ()
2389 #4 0xc014cae0 in __fput ()
2390 #5 0xc013ffc3 in remove_vm_struct ()
2391 #6 0xc0141786 in exit_mmap ()
2392 #7 0xc0118480 in mmput ()
2393 #8 0xc0133205 in oom_kill ()
2394 #9 0xc01332d1 in out_of_memory ()
2395 #10 0xc013bc1d in try_to_free_pages ()
2396 #11 0xc013427b in __alloc_pages ()
2397 #12 0xc013f058 in do_anonymous_page ()
2398 #13 0xc013f19d in do_no_page ()
2399 #14 0xc013f60e in handle_mm_fault ()
2400 #15 0xc01131e5 in do_page_fault ()
2401 #16 0xc0104935 in error_code ()
2402 #17 0xc025c0c6 in __copy_to_user_ll ()
2403 #18 0xc01d496f in reiser4_read_tail ()
2404 #19 0xc01e4def in read_unix_file ()
2405 #20 0xc01c3504 in reiser4_read ()
2406 #21 0xc014bd4f in vfs_read ()
2407 #22 0xc014bf66 in sys_read ()
2409 warning("vs-44", "out of memory?");
2412 reiser4_free_file_fsdata(file
);
2414 reiser4_exit_context(ctx
);
2418 static void set_file_notail(struct inode
*inode
)
2420 reiser4_inode
*state
;
2421 formatting_plugin
*tplug
;
2423 state
= reiser4_inode_data(inode
);
2424 tplug
= formatting_plugin_by_id(NEVER_TAILS_FORMATTING_ID
);
2425 force_plugin_pset(inode
, PSET_FORMATTING
, (reiser4_plugin
*)tplug
);
2428 /* if file is built of tails - convert it to extents */
2429 static int unpack(struct file
*filp
, struct inode
*inode
, int forever
)
2432 struct unix_file_info
*uf_info
;
2434 uf_info
= unix_file_inode_data(inode
);
2435 assert("vs-1628", ea_obtained(uf_info
));
2437 result
= find_file_state(inode
, uf_info
);
2440 assert("vs-1074", uf_info
->container
!= UF_CONTAINER_UNKNOWN
);
2442 if (uf_info
->container
== UF_CONTAINER_TAILS
) {
2444 * if file is being convered by another process - wait until it
2448 if (reiser4_inode_get_flag(inode
,
2449 REISER4_PART_IN_CONV
)) {
2450 drop_exclusive_access(uf_info
);
2452 get_exclusive_access(uf_info
);
2457 if (uf_info
->container
== UF_CONTAINER_TAILS
) {
2458 result
= tail2extent(uf_info
);
2464 /* safe new formatting plugin in stat data */
2467 set_file_notail(inode
);
2469 grab_space_enable();
2470 tograb
= inode_file_plugin(inode
)->estimate
.update(inode
);
2471 result
= reiser4_grab_space(tograb
, BA_CAN_COMMIT
);
2472 result
= reiser4_update_sd(inode
);
2478 /* implentation of vfs' ioctl method of struct file_operations for unix file
2482 ioctl_unix_file(struct inode
*inode
, struct file
*filp
,
2483 unsigned int cmd
, unsigned long arg UNUSED_ARG
)
2485 reiser4_context
*ctx
;
2488 ctx
= reiser4_init_context(inode
->i_sb
);
2490 return PTR_ERR(ctx
);
2493 case REISER4_IOC_UNPACK
:
2494 get_exclusive_access(unix_file_inode_data(inode
));
2495 result
= unpack(filp
, inode
, 1 /* forever */ );
2496 drop_exclusive_access(unix_file_inode_data(inode
));
2500 result
= RETERR(-ENOSYS
);
2503 reiser4_exit_context(ctx
);
2507 /* implentation of vfs' bmap method of struct address_space_operations for unix
2510 sector_t
bmap_unix_file(struct address_space
* mapping
, sector_t lblock
)
2512 reiser4_context
*ctx
;
2517 struct inode
*inode
;
2521 inode
= mapping
->host
;
2523 ctx
= reiser4_init_context(inode
->i_sb
);
2525 return PTR_ERR(ctx
);
2526 key_by_inode_and_offset_common(inode
,
2527 (loff_t
) lblock
* current_blocksize
,
2532 find_file_item_nohint(&coord
, &lh
, &key
, ZNODE_READ_LOCK
, inode
);
2533 if (cbk_errored(result
)) {
2535 reiser4_exit_context(ctx
);
2539 result
= zload(coord
.node
);
2542 reiser4_exit_context(ctx
);
2546 iplug
= item_plugin_by_coord(&coord
);
2547 if (iplug
->s
.file
.get_block
) {
2548 result
= iplug
->s
.file
.get_block(&coord
, lblock
, &block
);
2552 result
= RETERR(-EINVAL
);
2556 reiser4_exit_context(ctx
);
2561 * flow_by_inode_unix_file - initizlize structure flow
2562 * @inode: inode of file for which read or write is abou
2563 * @buf: buffer to perform read to or write from
2564 * @user: flag showing whether @buf is user space or kernel space
2565 * @size: size of buffer @buf
2566 * @off: start offset fro read or write
2567 * @op: READ or WRITE
2570 * Initializes fields of @flow: key, size of data, i/o mode (read or write).
2572 int flow_by_inode_unix_file(struct inode
*inode
,
2573 const char __user
*buf
, int user
,
2574 loff_t size
, loff_t off
,
2575 rw_op op
, flow_t
*flow
)
2577 assert("nikita-1100", inode
!= NULL
);
2579 flow
->length
= size
;
2580 memcpy(&flow
->data
, &buf
, sizeof(buf
));
2583 assert("nikita-1931", inode_file_plugin(inode
) != NULL
);
2584 assert("nikita-1932",
2585 inode_file_plugin(inode
)->key_by_inode
==
2586 key_by_inode_and_offset_common
);
2587 /* calculate key of write position and insert it into flow->key */
2588 return key_by_inode_and_offset_common(inode
, off
, &flow
->key
);
2591 /* plugin->u.file.set_plug_in_sd = NULL
2592 plugin->u.file.set_plug_in_inode = NULL
2593 plugin->u.file.create_blank_sd = NULL */
2594 /* plugin->u.file.delete */
2596 plugin->u.file.add_link = reiser4_add_link_common
2597 plugin->u.file.rem_link = NULL */
2599 /* plugin->u.file.owns_item
2600 this is common_file_owns_item with assertion */
2601 /* Audited by: green(2002.06.15) */
2603 owns_item_unix_file(const struct inode
*inode
/* object to check against */ ,
2604 const coord_t
* coord
/* coord to check */ )
2608 result
= owns_item_common(inode
, coord
);
2611 if (!plugin_of_group(item_plugin_by_coord(coord
),
2612 UNIX_FILE_METADATA_ITEM_TYPE
))
2615 item_id_by_coord(coord
) == EXTENT_POINTER_ID
||
2616 item_id_by_coord(coord
) == FORMATTING_ID
);
2620 static int setattr_truncate(struct inode
*inode
, struct iattr
*attr
)
2627 inode_check_scale(inode
, inode
->i_size
, attr
->ia_size
);
2629 old_size
= inode
->i_size
;
2630 tree
= reiser4_tree_by_inode(inode
);
2632 result
= safe_link_grab(tree
, BA_CAN_COMMIT
);
2634 result
= safe_link_add(inode
, SAFE_TRUNCATE
);
2636 result
= truncate_file_body(inode
, attr
);
2638 warning("vs-1588", "truncate_file failed: oid %lli, "
2639 "old size %lld, new size %lld, retval %d",
2640 (unsigned long long)get_inode_oid(inode
),
2641 old_size
, attr
->ia_size
, result
);
2643 s_result
= safe_link_grab(tree
, BA_CAN_COMMIT
);
2646 safe_link_del(tree
, get_inode_oid(inode
), SAFE_TRUNCATE
);
2647 if (s_result
!= 0) {
2648 warning("nikita-3417", "Cannot kill safelink %lli: %i",
2649 (unsigned long long)get_inode_oid(inode
), s_result
);
2651 safe_link_release(tree
);
2655 /* plugin->u.file.setattr method */
2656 /* This calls inode_setattr and if truncate is in effect it also takes
2657 exclusive inode access to avoid races */
2658 int setattr_unix_file(struct dentry
*dentry
, /* Object to change attributes */
2659 struct iattr
*attr
/* change description */ )
2663 if (attr
->ia_valid
& ATTR_SIZE
) {
2664 reiser4_context
*ctx
;
2665 struct unix_file_info
*uf_info
;
2667 /* truncate does reservation itself and requires exclusive
2669 ctx
= reiser4_init_context(dentry
->d_inode
->i_sb
);
2671 return PTR_ERR(ctx
);
2673 uf_info
= unix_file_inode_data(dentry
->d_inode
);
2674 get_exclusive_access_careful(uf_info
, dentry
->d_inode
);
2675 result
= setattr_truncate(dentry
->d_inode
, attr
);
2676 drop_exclusive_access(uf_info
);
2677 context_set_commit_async(ctx
);
2678 reiser4_exit_context(ctx
);
2680 result
= reiser4_setattr_common(dentry
, attr
);
2685 /* plugin->u.file.init_inode_data */
2687 init_inode_data_unix_file(struct inode
*inode
,
2688 reiser4_object_create_data
* crd
, int create
)
2690 struct unix_file_info
*data
;
2692 data
= unix_file_inode_data(inode
);
2693 data
->container
= create
? UF_CONTAINER_EMPTY
: UF_CONTAINER_UNKNOWN
;
2694 init_rwsem(&data
->latch
);
2695 data
->tplug
= inode_formatting_plugin(inode
);
2696 data
->exclusive_use
= 0;
2699 data
->ea_owner
= NULL
;
2700 atomic_set(&data
->nr_neas
, 0);
2702 init_inode_ordering(inode
, crd
, create
);
2706 * delete_object_unix_file - delete_object of file_plugin
2707 * @inode: inode to be deleted
2709 * Truncates file to length 0, removes stat data and safe link.
2711 int delete_object_unix_file(struct inode
*inode
)
2713 struct unix_file_info
*uf_info
;
2716 if (reiser4_inode_get_flag(inode
, REISER4_NO_SD
))
2719 /* truncate file bogy first */
2720 uf_info
= unix_file_inode_data(inode
);
2721 get_exclusive_access(uf_info
);
2722 result
= shorten_file(inode
, 0 /* size */ );
2723 drop_exclusive_access(uf_info
);
2726 warning("", "failed to truncate file (%llu) on removal: %d",
2727 get_inode_oid(inode
), result
);
2729 /* remove stat data and safe link */
2730 return reiser4_delete_object_common(inode
);
2734 prepare_write_unix_file(struct file
*file
, struct page
*page
,
2735 unsigned from
, unsigned to
)
2737 reiser4_context
*ctx
;
2738 struct unix_file_info
*uf_info
;
2741 ctx
= reiser4_init_context(file
->f_dentry
->d_inode
->i_sb
);
2743 return PTR_ERR(ctx
);
2745 uf_info
= unix_file_inode_data(file
->f_dentry
->d_inode
);
2746 get_exclusive_access(uf_info
);
2747 ret
= find_file_state(file
->f_dentry
->d_inode
, uf_info
);
2749 if (uf_info
->container
== UF_CONTAINER_TAILS
)
2752 ret
= do_prepare_write(file
, page
, from
, to
);
2754 drop_exclusive_access(uf_info
);
2756 /* don't commit transaction under inode semaphore */
2757 context_set_commit_async(ctx
);
2758 reiser4_exit_context(ctx
);
2764 * c-indentation-style: "K&R"