On Tue, Nov 06, 2007 at 02:33:53AM -0800, akpm@linux-foundation.org wrote:
[mmotm.git] / fs / reiser4 / safe_link.c
blobd59f6f0f129e70d7485790c876a7d318d11f1328
1 /* Copyright 2003, 2004 by Hans Reiser, licensing governed by
2 * reiser4/README */
4 /* Safe-links. */
6 /*
7 * Safe-links are used to maintain file system consistency during operations
8 * that spawns multiple transactions. For example:
10 * 1. Unlink. UNIX supports "open-but-unlinked" files, that is files
11 * without user-visible names in the file system, but still opened by some
12 * active process. What happens here is that unlink proper (i.e., removal
13 * of the last file name) and file deletion (truncate of file body to zero
14 * and deletion of stat-data, that happens when last file descriptor is
15 * closed), may belong to different transactions T1 and T2. If a crash
16 * happens after T1 commit, but before T2 commit, on-disk file system has
17 * a file without name, that is, disk space leak.
19 * 2. Truncate. Truncate of large file may spawn multiple transactions. If
20 * system crashes while truncate was in-progress, file is left partially
21 * truncated, which violates "atomicity guarantees" of reiser4, viz. that
22 * every system is atomic.
24 * Safe-links address both above cases. Basically, safe-link is a way post
25 * some operation to be executed during commit of some other transaction than
26 * current one. (Another way to look at the safe-link is to interpret it as a
27 * logical logging.)
29 * Specifically, at the beginning of unlink safe-link in inserted in the
30 * tree. This safe-link is normally removed by file deletion code (during
31 * transaction T2 in the above terms). Truncate also inserts safe-link that is
32 * normally removed when truncate operation is finished.
34 * This means, that in the case of "clean umount" there are no safe-links in
35 * the tree. If safe-links are observed during mount, it means that (a) system
36 * was terminated abnormally, and (b) safe-link correspond to the "pending"
37 * (i.e., not finished) operations that were in-progress during system
38 * termination. Each safe-link record enough information to complete
39 * corresponding operation, and mount simply "replays" them (hence, the
40 * analogy with the logical logging).
42 * Safe-links are implemented as blackbox items (see
43 * plugin/item/blackbox.[ch]).
45 * For the reference: ext3 also has similar mechanism, it's called "an orphan
46 * list" there.
49 #include "safe_link.h"
50 #include "debug.h"
51 #include "inode.h"
53 #include "plugin/item/blackbox.h"
55 #include <linux/fs.h>
58 * On-disk format of safe-link.
60 typedef struct safelink {
61 reiser4_key sdkey; /* key of stat-data for the file safe-link is
62 * for */
63 d64 size; /* size to which file should be truncated */
64 } safelink_t;
67 * locality where safe-link items are stored. Next to the objectid of root
68 * directory.
70 static oid_t safe_link_locality(reiser4_tree * tree)
72 return get_key_objectid(get_super_private(tree->super)->df_plug->
73 root_dir_key(tree->super)) + 1;
77 Construct a key for the safe-link. Key has the following format:
79 | 60 | 4 | 64 | 4 | 60 | 64 |
80 +---------------+---+------------------+---+---------------+------------------+
81 | locality | 0 | 0 | 0 | objectid | link type |
82 +---------------+---+------------------+---+---------------+------------------+
83 | | | | |
84 | 8 bytes | 8 bytes | 8 bytes | 8 bytes |
86 This is in large keys format. In small keys format second 8 byte chunk is
87 out. Locality is a constant returned by safe_link_locality(). objectid is
88 an oid of a file on which operation protected by this safe-link is
89 performed. link-type is used to distinguish safe-links for different
90 operations.
93 static reiser4_key *build_link_key(reiser4_tree * tree, oid_t oid,
94 reiser4_safe_link_t link, reiser4_key * key)
96 reiser4_key_init(key);
97 set_key_locality(key, safe_link_locality(tree));
98 set_key_objectid(key, oid);
99 set_key_offset(key, link);
100 return key;
104 * how much disk space is necessary to insert and remove (in the
105 * error-handling path) safe-link.
107 static __u64 safe_link_tograb(reiser4_tree * tree)
109 return
110 /* insert safe link */
111 estimate_one_insert_item(tree) +
112 /* remove safe link */
113 estimate_one_item_removal(tree) +
114 /* drill to the leaf level during insertion */
115 1 + estimate_one_insert_item(tree) +
117 * possible update of existing safe-link. Actually, if
118 * safe-link existed already (we failed to remove it), then no
119 * insertion is necessary, so this term is already "covered",
120 * but for simplicity let's left it.
126 * grab enough disk space to insert and remove (in the error-handling path)
127 * safe-link.
129 int safe_link_grab(reiser4_tree * tree, reiser4_ba_flags_t flags)
131 int result;
133 grab_space_enable();
134 /* The sbinfo->delete_mutex can be taken here.
135 * safe_link_release() should be called before leaving reiser4
136 * context. */
137 result =
138 reiser4_grab_reserved(tree->super, safe_link_tograb(tree), flags);
139 grab_space_enable();
140 return result;
144 * release unused disk space reserved by safe_link_grab().
146 void safe_link_release(reiser4_tree * tree)
148 reiser4_release_reserved(tree->super);
152 * insert into tree safe-link for operation @link on inode @inode.
154 int safe_link_add(struct inode *inode, reiser4_safe_link_t link)
156 reiser4_key key;
157 safelink_t sl;
158 int length;
159 int result;
160 reiser4_tree *tree;
162 build_sd_key(inode, &sl.sdkey);
163 length = sizeof sl.sdkey;
165 if (link == SAFE_TRUNCATE) {
167 * for truncate we have to store final file length also,
168 * expand item.
170 length += sizeof(sl.size);
171 put_unaligned(cpu_to_le64(inode->i_size), &sl.size);
173 tree = reiser4_tree_by_inode(inode);
174 build_link_key(tree, get_inode_oid(inode), link, &key);
176 result = store_black_box(tree, &key, &sl, length);
177 if (result == -EEXIST)
178 result = update_black_box(tree, &key, &sl, length);
179 return result;
183 * remove safe-link corresponding to the operation @link on inode @inode from
184 * the tree.
186 int safe_link_del(reiser4_tree * tree, oid_t oid, reiser4_safe_link_t link)
188 reiser4_key key;
190 return kill_black_box(tree, build_link_key(tree, oid, link, &key));
194 * in-memory structure to keep information extracted from safe-link. This is
195 * used to iterate over all safe-links.
197 struct safe_link_context {
198 reiser4_tree *tree; /* internal tree */
199 reiser4_key key; /* safe-link key */
200 reiser4_key sdkey; /* key of object stat-data */
201 reiser4_safe_link_t link; /* safe-link type */
202 oid_t oid; /* object oid */
203 __u64 size; /* final size for truncate */
207 * start iterating over all safe-links.
209 static void safe_link_iter_begin(reiser4_tree * tree,
210 struct safe_link_context *ctx)
212 ctx->tree = tree;
213 reiser4_key_init(&ctx->key);
214 set_key_locality(&ctx->key, safe_link_locality(tree));
215 set_key_objectid(&ctx->key, get_key_objectid(reiser4_max_key()));
216 set_key_offset(&ctx->key, get_key_offset(reiser4_max_key()));
220 * return next safe-link.
222 static int safe_link_iter_next(struct safe_link_context *ctx)
224 int result;
225 safelink_t sl;
227 result = load_black_box(ctx->tree, &ctx->key, &sl, sizeof sl, 0);
228 if (result == 0) {
229 ctx->oid = get_key_objectid(&ctx->key);
230 ctx->link = get_key_offset(&ctx->key);
231 ctx->sdkey = sl.sdkey;
232 if (ctx->link == SAFE_TRUNCATE)
233 ctx->size = le64_to_cpu(get_unaligned(&sl.size));
235 return result;
239 * check are there any more safe-links left in the tree.
241 static int safe_link_iter_finished(struct safe_link_context *ctx)
243 return get_key_locality(&ctx->key) != safe_link_locality(ctx->tree);
247 * finish safe-link iteration.
249 static void safe_link_iter_end(struct safe_link_context *ctx)
251 /* nothing special */
255 * process single safe-link.
257 static int process_safelink(struct super_block *super, reiser4_safe_link_t link,
258 reiser4_key * sdkey, oid_t oid, __u64 size)
260 struct inode *inode;
261 int result;
264 * obtain object inode by reiser4_iget(), then call object plugin
265 * ->safelink() method to do actual work, then delete safe-link on
266 * success.
268 inode = reiser4_iget(super, sdkey, 1);
269 if (!IS_ERR(inode)) {
270 file_plugin *fplug;
272 fplug = inode_file_plugin(inode);
273 assert("nikita-3428", fplug != NULL);
274 assert("", oid == get_inode_oid(inode));
275 if (fplug->safelink != NULL) {
276 /* reiser4_txn_restart_current is not necessary because
277 * mounting is signle thread. However, without it
278 * deadlock detection code will complain (see
279 * nikita-3361). */
280 reiser4_txn_restart_current();
281 result = fplug->safelink(inode, link, size);
282 } else {
283 warning("nikita-3430",
284 "Cannot handle safelink for %lli",
285 (unsigned long long)oid);
286 reiser4_print_key("key", sdkey);
287 result = 0;
289 if (result != 0) {
290 warning("nikita-3431",
291 "Error processing safelink for %lli: %i",
292 (unsigned long long)oid, result);
294 reiser4_iget_complete(inode);
295 iput(inode);
296 if (result == 0) {
297 result = safe_link_grab(reiser4_get_tree(super),
298 BA_CAN_COMMIT);
299 if (result == 0)
300 result =
301 safe_link_del(reiser4_get_tree(super), oid,
302 link);
303 safe_link_release(reiser4_get_tree(super));
305 * restart transaction: if there was large number of
306 * safe-links, their processing may fail to fit into
307 * single transaction.
309 if (result == 0)
310 reiser4_txn_restart_current();
312 } else
313 result = PTR_ERR(inode);
314 return result;
318 * iterate over all safe-links in the file-system processing them one by one.
320 int process_safelinks(struct super_block *super)
322 struct safe_link_context ctx;
323 int result;
325 if (rofs_super(super))
326 /* do nothing on the read-only file system */
327 return 0;
328 safe_link_iter_begin(&get_super_private(super)->tree, &ctx);
329 result = 0;
330 do {
331 result = safe_link_iter_next(&ctx);
332 if (safe_link_iter_finished(&ctx) || result == -ENOENT) {
333 result = 0;
334 break;
336 if (result == 0)
337 result = process_safelink(super, ctx.link,
338 &ctx.sdkey, ctx.oid,
339 ctx.size);
340 } while (result == 0);
341 safe_link_iter_end(&ctx);
342 return result;
345 /* Make Linus happy.
346 Local variables:
347 c-indentation-style: "K&R"
348 mode-name: "LC"
349 c-basic-offset: 8
350 tab-width: 8
351 fill-column: 120
352 scroll-step: 1
353 End: