1 /* Copyright 2003, 2004 by Hans Reiser, licensing governed by
7 * Safe-links are used to maintain file system consistency during operations
8 * that spawns multiple transactions. For example:
10 * 1. Unlink. UNIX supports "open-but-unlinked" files, that is files
11 * without user-visible names in the file system, but still opened by some
12 * active process. What happens here is that unlink proper (i.e., removal
13 * of the last file name) and file deletion (truncate of file body to zero
14 * and deletion of stat-data, that happens when last file descriptor is
15 * closed), may belong to different transactions T1 and T2. If a crash
16 * happens after T1 commit, but before T2 commit, on-disk file system has
17 * a file without name, that is, disk space leak.
19 * 2. Truncate. Truncate of large file may spawn multiple transactions. If
20 * system crashes while truncate was in-progress, file is left partially
21 * truncated, which violates "atomicity guarantees" of reiser4, viz. that
22 * every system is atomic.
24 * Safe-links address both above cases. Basically, safe-link is a way post
25 * some operation to be executed during commit of some other transaction than
26 * current one. (Another way to look at the safe-link is to interpret it as a
29 * Specifically, at the beginning of unlink safe-link in inserted in the
30 * tree. This safe-link is normally removed by file deletion code (during
31 * transaction T2 in the above terms). Truncate also inserts safe-link that is
32 * normally removed when truncate operation is finished.
34 * This means, that in the case of "clean umount" there are no safe-links in
35 * the tree. If safe-links are observed during mount, it means that (a) system
36 * was terminated abnormally, and (b) safe-link correspond to the "pending"
37 * (i.e., not finished) operations that were in-progress during system
38 * termination. Each safe-link record enough information to complete
39 * corresponding operation, and mount simply "replays" them (hence, the
40 * analogy with the logical logging).
42 * Safe-links are implemented as blackbox items (see
43 * plugin/item/blackbox.[ch]).
45 * For the reference: ext3 also has similar mechanism, it's called "an orphan
49 #include "safe_link.h"
53 #include "plugin/item/blackbox.h"
58 * On-disk format of safe-link.
60 typedef struct safelink
{
61 reiser4_key sdkey
; /* key of stat-data for the file safe-link is
63 d64 size
; /* size to which file should be truncated */
67 * locality where safe-link items are stored. Next to the objectid of root
70 static oid_t
safe_link_locality(reiser4_tree
* tree
)
72 return get_key_objectid(get_super_private(tree
->super
)->df_plug
->
73 root_dir_key(tree
->super
)) + 1;
77 Construct a key for the safe-link. Key has the following format:
79 | 60 | 4 | 64 | 4 | 60 | 64 |
80 +---------------+---+------------------+---+---------------+------------------+
81 | locality | 0 | 0 | 0 | objectid | link type |
82 +---------------+---+------------------+---+---------------+------------------+
84 | 8 bytes | 8 bytes | 8 bytes | 8 bytes |
86 This is in large keys format. In small keys format second 8 byte chunk is
87 out. Locality is a constant returned by safe_link_locality(). objectid is
88 an oid of a file on which operation protected by this safe-link is
89 performed. link-type is used to distinguish safe-links for different
93 static reiser4_key
*build_link_key(reiser4_tree
* tree
, oid_t oid
,
94 reiser4_safe_link_t link
, reiser4_key
* key
)
96 reiser4_key_init(key
);
97 set_key_locality(key
, safe_link_locality(tree
));
98 set_key_objectid(key
, oid
);
99 set_key_offset(key
, link
);
104 * how much disk space is necessary to insert and remove (in the
105 * error-handling path) safe-link.
107 static __u64
safe_link_tograb(reiser4_tree
* tree
)
110 /* insert safe link */
111 estimate_one_insert_item(tree
) +
112 /* remove safe link */
113 estimate_one_item_removal(tree
) +
114 /* drill to the leaf level during insertion */
115 1 + estimate_one_insert_item(tree
) +
117 * possible update of existing safe-link. Actually, if
118 * safe-link existed already (we failed to remove it), then no
119 * insertion is necessary, so this term is already "covered",
120 * but for simplicity let's left it.
126 * grab enough disk space to insert and remove (in the error-handling path)
129 int safe_link_grab(reiser4_tree
* tree
, reiser4_ba_flags_t flags
)
134 /* The sbinfo->delete_mutex can be taken here.
135 * safe_link_release() should be called before leaving reiser4
138 reiser4_grab_reserved(tree
->super
, safe_link_tograb(tree
), flags
);
144 * release unused disk space reserved by safe_link_grab().
146 void safe_link_release(reiser4_tree
* tree
)
148 reiser4_release_reserved(tree
->super
);
152 * insert into tree safe-link for operation @link on inode @inode.
154 int safe_link_add(struct inode
*inode
, reiser4_safe_link_t link
)
162 build_sd_key(inode
, &sl
.sdkey
);
163 length
= sizeof sl
.sdkey
;
165 if (link
== SAFE_TRUNCATE
) {
167 * for truncate we have to store final file length also,
170 length
+= sizeof(sl
.size
);
171 put_unaligned(cpu_to_le64(inode
->i_size
), &sl
.size
);
173 tree
= reiser4_tree_by_inode(inode
);
174 build_link_key(tree
, get_inode_oid(inode
), link
, &key
);
176 result
= store_black_box(tree
, &key
, &sl
, length
);
177 if (result
== -EEXIST
)
178 result
= update_black_box(tree
, &key
, &sl
, length
);
183 * remove safe-link corresponding to the operation @link on inode @inode from
186 int safe_link_del(reiser4_tree
* tree
, oid_t oid
, reiser4_safe_link_t link
)
190 return kill_black_box(tree
, build_link_key(tree
, oid
, link
, &key
));
194 * in-memory structure to keep information extracted from safe-link. This is
195 * used to iterate over all safe-links.
197 struct safe_link_context
{
198 reiser4_tree
*tree
; /* internal tree */
199 reiser4_key key
; /* safe-link key */
200 reiser4_key sdkey
; /* key of object stat-data */
201 reiser4_safe_link_t link
; /* safe-link type */
202 oid_t oid
; /* object oid */
203 __u64 size
; /* final size for truncate */
207 * start iterating over all safe-links.
209 static void safe_link_iter_begin(reiser4_tree
* tree
,
210 struct safe_link_context
* ctx
)
213 reiser4_key_init(&ctx
->key
);
214 set_key_locality(&ctx
->key
, safe_link_locality(tree
));
215 set_key_objectid(&ctx
->key
, get_key_objectid(reiser4_max_key()));
216 set_key_offset(&ctx
->key
, get_key_offset(reiser4_max_key()));
220 * return next safe-link.
222 static int safe_link_iter_next(struct safe_link_context
* ctx
)
227 result
= load_black_box(ctx
->tree
, &ctx
->key
, &sl
, sizeof sl
, 0);
229 ctx
->oid
= get_key_objectid(&ctx
->key
);
230 ctx
->link
= get_key_offset(&ctx
->key
);
231 ctx
->sdkey
= sl
.sdkey
;
232 if (ctx
->link
== SAFE_TRUNCATE
)
233 ctx
->size
= le64_to_cpu(get_unaligned(&sl
.size
));
239 * check are there any more safe-links left in the tree.
241 static int safe_link_iter_finished(struct safe_link_context
* ctx
)
243 return get_key_locality(&ctx
->key
) != safe_link_locality(ctx
->tree
);
247 * finish safe-link iteration.
249 static void safe_link_iter_end(struct safe_link_context
* ctx
)
251 /* nothing special */
255 * process single safe-link.
257 static int process_safelink(struct super_block
*super
, reiser4_safe_link_t link
,
258 reiser4_key
* sdkey
, oid_t oid
, __u64 size
)
264 * obtain object inode by reiser4_iget(), then call object plugin
265 * ->safelink() method to do actual work, then delete safe-link on
268 inode
= reiser4_iget(super
, sdkey
, 1);
269 if (!IS_ERR(inode
)) {
272 fplug
= inode_file_plugin(inode
);
273 assert("nikita-3428", fplug
!= NULL
);
274 assert("", oid
== get_inode_oid(inode
));
275 if (fplug
->safelink
!= NULL
) {
276 /* reiser4_txn_restart_current is not necessary because
277 * mounting is signle thread. However, without it
278 * deadlock detection code will complain (see
280 reiser4_txn_restart_current();
281 result
= fplug
->safelink(inode
, link
, size
);
283 warning("nikita-3430",
284 "Cannot handle safelink for %lli",
285 (unsigned long long)oid
);
286 reiser4_print_key("key", sdkey
);
290 warning("nikita-3431",
291 "Error processing safelink for %lli: %i",
292 (unsigned long long)oid
, result
);
294 reiser4_iget_complete(inode
);
297 result
= safe_link_grab(reiser4_get_tree(super
), BA_CAN_COMMIT
);
300 safe_link_del(reiser4_get_tree(super
), oid
, link
);
301 safe_link_release(reiser4_get_tree(super
));
303 * restart transaction: if there was large number of
304 * safe-links, their processing may fail to fit into
305 * single transaction.
308 reiser4_txn_restart_current();
311 result
= PTR_ERR(inode
);
316 * iterate over all safe-links in the file-system processing them one by one.
318 int process_safelinks(struct super_block
*super
)
320 struct safe_link_context ctx
;
323 if (rofs_super(super
))
324 /* do nothing on the read-only file system */
326 safe_link_iter_begin(&get_super_private(super
)->tree
, &ctx
);
329 result
= safe_link_iter_next(&ctx
);
330 if (safe_link_iter_finished(&ctx
) || result
== -ENOENT
) {
335 result
= process_safelink(super
, ctx
.link
,
338 } while (result
== 0);
339 safe_link_iter_end(&ctx
);
345 c-indentation-style: "K&R"