On Tue, Nov 06, 2007 at 02:33:53AM -0800, akpm@linux-foundation.org wrote:
[mmotm.git] / fs / reiser4 / plugin / file / file_conversion.c
blob0c26ffe0906e61f1bdff1183050789bfcceca1db
1 /* Copyright 2001, 2002, 2003 by Hans Reiser,
2 licensing governed by reiser4/README */
4 /**
5 * This file contains dispatching hooks, and conversion methods, which
6 * implement transitions in the FILE interface.
8 * Dispatching hook makes a decision (at dispatching point) about the
9 * most reasonable plugin. Such decision is made in accordance with some
10 * O(1)-heuristic.
12 * We implement a transition CRYPTCOMPRESS -> UNIX_FILE for files with
13 * incompressible data. Current heuristic to estimate compressibility is
14 * very simple: if first complete logical cluster (64K by default) of a
15 * file is incompressible, then we make a decision, that the whole file
16 * is incompressible.
18 * To enable dispatching we install a special "magic" compression mode
19 * plugin CONVX_COMPRESSION_MODE_ID at file creation time.
21 * Note, that we don't perform back conversion (UNIX_FILE->CRYPTCOMPRESS)
22 * because of compatibility reasons).
24 * In conversion time we protect CS, the conversion set (file's (meta)data
25 * and plugin table (pset)) via special per-inode rw-semaphore (conv_sem).
26 * The methods which implement conversion are CS writers. The methods of FS
27 * interface (file_operations, inode_operations, address_space_operations)
28 * are CS readers.
31 #include "../../inode.h"
32 #include "../cluster.h"
33 #include "file.h"
35 #define conversion_enabled(inode) \
36 (inode_compression_mode_plugin(inode) == \
37 compression_mode_plugin_by_id(CONVX_COMPRESSION_MODE_ID))
39 /**
40 * Located sections (readers and writers of @pset) are not permanently
41 * critical: cryptcompress file can be converted only if the conversion
42 * is enabled (see the macrio above). Also we don't perform back
43 * conversion. The following helper macro is a sanity check to decide
44 * if we need the protection (locks are always additional overheads).
46 #define should_protect(inode) \
47 (inode_file_plugin(inode) == \
48 file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID) && \
49 conversion_enabled(inode))
50 /**
51 * To avoid confusion with read/write file operations, we'll speak about
52 * "passive" protection for FCS readers and "active" protection for FCS
53 * writers. All methods with active or passive protection have suffix
54 * "careful".
56 /**
57 * Macros for passive protection.
59 * Construct invariant operation to be supplied to VFS.
60 * The macro accepts the following lexemes:
61 * @type - type of the value represented by the compound statement;
62 * @method - name of an operation to be supplied to VFS (reiser4 file
63 * plugin also should contain a method with such name).
65 #define PROT_PASSIVE(type, method, args) \
66 ({ \
67 type _result; \
68 struct rw_semaphore * guard = \
69 &reiser4_inode_data(inode)->conv_sem; \
71 if (should_protect(inode)) { \
72 down_read(guard); \
73 if (!should_protect(inode)) \
74 up_read(guard); \
75 } \
76 _result = inode_file_plugin(inode)->method args; \
77 if (should_protect(inode)) \
78 up_read(guard); \
79 _result; \
82 #define PROT_PASSIVE_VOID(method, args) \
83 ({ \
84 struct rw_semaphore * guard = \
85 &reiser4_inode_data(inode)->conv_sem; \
87 if (should_protect(inode)) { \
88 down_read(guard); \
89 if (!should_protect(inode)) \
90 up_read(guard); \
91 } \
92 inode_file_plugin(inode)->method args; \
94 if (should_protect(inode)) \
95 up_read(guard); \
98 /* Pass management to the unix-file plugin with "notail" policy */
99 static int __cryptcompress2unixfile(struct file *file, struct inode * inode)
101 int result;
102 reiser4_inode *info;
103 struct unix_file_info * uf;
104 info = reiser4_inode_data(inode);
106 result = aset_set_unsafe(&info->pset,
107 PSET_FILE,
108 (reiser4_plugin *)
109 file_plugin_by_id(UNIX_FILE_PLUGIN_ID));
110 if (result)
111 return result;
112 result = aset_set_unsafe(&info->pset,
113 PSET_FORMATTING,
114 (reiser4_plugin *)
115 formatting_plugin_by_id(NEVER_TAILS_FORMATTING_ID));
116 if (result)
117 return result;
118 /* get rid of non-standard plugins */
119 info->plugin_mask &= ~cryptcompress_mask;
120 /* get rid of plugin stat-data extension */
121 info->extmask &= ~(1 << PLUGIN_STAT);
123 reiser4_inode_clr_flag(inode, REISER4_SDLEN_KNOWN);
125 /* FIXME use init_inode_data_unix_file() instead,
126 but aviod init_inode_ordering() */
127 /* Init unix-file specific part of inode */
128 uf = unix_file_inode_data(inode);
129 uf->container = UF_CONTAINER_UNKNOWN;
130 init_rwsem(&uf->latch);
131 uf->tplug = inode_formatting_plugin(inode);
132 uf->exclusive_use = 0;
133 #if REISER4_DEBUG
134 uf->ea_owner = NULL;
135 atomic_set(&uf->nr_neas, 0);
136 #endif
138 * we was carefull for file_ops, inode_ops and as_ops
139 * to be invariant for plugin conversion, so there is
140 * no need to update ones already installed in the
141 * vfs's residence.
143 return 0;
146 #if REISER4_DEBUG
147 static int disabled_conversion_inode_ok(struct inode * inode)
149 __u64 extmask = reiser4_inode_data(inode)->extmask;
150 __u16 plugin_mask = reiser4_inode_data(inode)->plugin_mask;
152 return ((extmask & (1 << LIGHT_WEIGHT_STAT)) &&
153 (extmask & (1 << UNIX_STAT)) &&
154 (extmask & (1 << LARGE_TIMES_STAT)) &&
155 (extmask & (1 << PLUGIN_STAT)) &&
156 (plugin_mask & (1 << PSET_COMPRESSION_MODE)));
158 #endif
161 * Disable future attempts to schedule/convert file plugin.
162 * This function is called by plugin schedule hooks.
164 * To disable conversion we assign any compression mode plugin id
165 * different from CONVX_COMPRESSION_MODE_ID.
167 static int disable_conversion(struct inode * inode)
169 int result;
170 result =
171 force_plugin_pset(inode,
172 PSET_COMPRESSION_MODE,
173 (reiser4_plugin *)compression_mode_plugin_by_id
174 (LATTD_COMPRESSION_MODE_ID));
175 assert("edward-1500",
176 ergo(!result, disabled_conversion_inode_ok(inode)));
177 return result;
181 * Check if we really have achieved plugin scheduling point
183 static int check_dispatch_point(struct inode * inode,
184 loff_t pos /* position in the
185 file to write from */,
186 struct cluster_handle * clust,
187 struct dispatch_context * cont)
189 assert("edward-1505", conversion_enabled(inode));
191 * if file size is more then cluster size, then compressible
192 * status must be figured out (i.e. compression was disabled,
193 * or file plugin was converted to unix_file)
195 assert("edward-1506", inode->i_size <= inode_cluster_size(inode));
197 if (pos > inode->i_size)
198 /* first logical cluster will contain a (partial) hole */
199 return disable_conversion(inode);
200 if (pos < inode_cluster_size(inode))
201 /* writing to the first logical cluster */
202 return 0;
204 * here we have:
205 * cluster_size <= pos <= i_size <= cluster_size,
206 * and, hence, pos == i_size == cluster_size
208 assert("edward-1498",
209 pos == inode->i_size &&
210 pos == inode_cluster_size(inode));
211 assert("edward-1539", cont != NULL);
212 assert("edward-1540", cont->state == DISPATCH_INVAL_STATE);
214 cont->state = DISPATCH_POINT;
215 return 0;
218 static void start_check_compressibility(struct inode * inode,
219 struct cluster_handle * clust,
220 hint_t * hint)
222 assert("edward-1507", clust->index == 1);
223 assert("edward-1508", !tfm_cluster_is_uptodate(&clust->tc));
224 assert("edward-1509", cluster_get_tfm_act(&clust->tc) == TFMA_READ);
226 hint_init_zero(hint);
227 clust->hint = hint;
228 clust->index --;
229 clust->nr_pages = size_in_pages(lbytes(clust->index, inode));
231 /* first logical cluster (of index #0) must be complete */
232 assert("edward-1510", lbytes(clust->index, inode) ==
233 inode_cluster_size(inode));
236 static void finish_check_compressibility(struct inode * inode,
237 struct cluster_handle * clust,
238 hint_t * hint)
240 reiser4_unset_hint(clust->hint);
241 clust->hint = hint;
242 clust->index ++;
245 #if REISER4_DEBUG
246 static int prepped_dclust_ok(hint_t * hint)
248 reiser4_key key;
249 coord_t * coord = &hint->ext_coord.coord;
251 item_key_by_coord(coord, &key);
252 return (item_id_by_coord(coord) == CTAIL_ID &&
253 !coord_is_unprepped_ctail(coord) &&
254 (get_key_offset(&key) + nr_units_ctail(coord) ==
255 dclust_get_extension_dsize(hint)));
257 #endif
259 #define fifty_persent(size) (size >> 1)
260 /* evaluation of data compressibility */
261 #define data_is_compressible(osize, isize) \
262 (osize < fifty_persent(isize))
265 * A simple O(1)-heuristic for compressibility.
266 * This is called not more then one time per file's life.
267 * Read first logical cluster (of index #0) and estimate its compressibility.
268 * Save estimation result in @cont.
270 static int read_check_compressibility(struct inode * inode,
271 struct cluster_handle * clust,
272 struct dispatch_context * cont)
274 int i;
275 int result;
276 size_t dst_len;
277 hint_t tmp_hint;
278 hint_t * cur_hint = clust->hint;
279 assert("edward-1541", cont->state == DISPATCH_POINT);
281 start_check_compressibility(inode, clust, &tmp_hint);
283 reset_cluster_pgset(clust, cluster_nrpages(inode));
284 result = grab_page_cluster(inode, clust, READ_OP);
285 if (result)
286 return result;
287 /* Read page cluster here */
288 for (i = 0; i < clust->nr_pages; i++) {
289 struct page *page = clust->pages[i];
290 lock_page(page);
291 result = do_readpage_ctail(inode, clust, page,
292 ZNODE_READ_LOCK);
293 unlock_page(page);
294 if (result)
295 goto error;
297 tfm_cluster_clr_uptodate(&clust->tc);
299 cluster_set_tfm_act(&clust->tc, TFMA_WRITE);
301 if (hint_is_valid(&tmp_hint) && !hint_is_unprepped_dclust(&tmp_hint)) {
302 /* lenght of compressed data is known, no need to compress */
303 assert("edward-1511",
304 znode_is_any_locked(tmp_hint.lh.node));
305 assert("edward-1512",
306 WITH_DATA(tmp_hint.ext_coord.coord.node,
307 prepped_dclust_ok(&tmp_hint)));
308 dst_len = dclust_get_extension_dsize(&tmp_hint);
310 else {
311 struct tfm_cluster * tc = &clust->tc;
312 compression_plugin * cplug = inode_compression_plugin(inode);
313 result = grab_tfm_stream(inode, tc, INPUT_STREAM);
314 if (result)
315 goto error;
316 for (i = 0; i < clust->nr_pages; i++) {
317 char *data;
318 lock_page(clust->pages[i]);
319 BUG_ON(!PageUptodate(clust->pages[i]));
320 data = kmap(clust->pages[i]);
321 memcpy(tfm_stream_data(tc, INPUT_STREAM) + pg_to_off(i),
322 data, PAGE_CACHE_SIZE);
323 kunmap(clust->pages[i]);
324 unlock_page(clust->pages[i]);
326 result = grab_tfm_stream(inode, tc, OUTPUT_STREAM);
327 if (result)
328 goto error;
329 result = grab_coa(tc, cplug);
330 if (result)
331 goto error;
332 tc->len = tc->lsize = lbytes(clust->index, inode);
333 assert("edward-1513", tc->len == inode_cluster_size(inode));
334 dst_len = tfm_stream_size(tc, OUTPUT_STREAM);
335 cplug->compress(get_coa(tc, cplug->h.id, tc->act),
336 tfm_input_data(clust), tc->len,
337 tfm_output_data(clust), &dst_len);
338 assert("edward-1514",
339 dst_len <= tfm_stream_size(tc, OUTPUT_STREAM));
341 finish_check_compressibility(inode, clust, cur_hint);
342 cont->state =
343 (data_is_compressible(dst_len, inode_cluster_size(inode)) ?
344 DISPATCH_REMAINS_OLD :
345 DISPATCH_ASSIGNED_NEW);
346 return 0;
347 error:
348 put_page_cluster(clust, inode, READ_OP);
349 return result;
352 /* Cut disk cluster of index @idx */
353 static int cut_disk_cluster(struct inode * inode, cloff_t idx)
355 reiser4_key from, to;
356 assert("edward-1515", inode_file_plugin(inode) ==
357 file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID));
358 key_by_inode_cryptcompress(inode, clust_to_off(idx, inode), &from);
359 to = from;
360 set_key_offset(&to,
361 get_key_offset(&from) + inode_cluster_size(inode) - 1);
362 return reiser4_cut_tree(reiser4_tree_by_inode(inode),
363 &from, &to, inode, 0);
366 static int reserve_cryptcompress2unixfile(struct inode *inode)
368 reiser4_block_nr unformatted_nodes;
369 reiser4_tree *tree;
371 tree = reiser4_tree_by_inode(inode);
373 /* number of unformatted nodes which will be created */
374 unformatted_nodes = cluster_nrpages(inode); /* N */
377 * space required for one iteration of extent->tail conversion:
379 * 1. kill ctail items
381 * 2. insert N unformatted nodes
383 * 3. insert N (worst-case single-block
384 * extents) extent units.
386 * 4. drilling to the leaf level by coord_by_key()
388 * 5. possible update of stat-data
391 grab_space_enable();
392 return reiser4_grab_space
393 (2 * tree->height +
394 unformatted_nodes +
395 unformatted_nodes * estimate_one_insert_into_item(tree) +
396 1 + estimate_one_insert_item(tree) +
397 inode_file_plugin(inode)->estimate.update(inode),
398 BA_CAN_COMMIT);
402 * Convert cryptcompress file plugin to unix_file plugin.
404 static int cryptcompress2unixfile(struct file *file, struct inode *inode,
405 struct dispatch_context *cont)
407 int i;
408 int result = 0;
409 struct cryptcompress_info *cr_info;
410 struct unix_file_info *uf_info;
411 assert("edward-1516", cont->pages[0]->index == 0);
413 /* release all cryptcompress-specific resources */
414 cr_info = cryptcompress_inode_data(inode);
415 result = reserve_cryptcompress2unixfile(inode);
416 if (result)
417 goto out;
418 /* tell kill_hook to not truncate pages */
419 reiser4_inode_set_flag(inode, REISER4_FILE_CONV_IN_PROGRESS);
420 result = cut_disk_cluster(inode, 0);
421 if (result)
422 goto out;
423 /* captured jnode of cluster and assotiated resources (pages,
424 reserved disk space) were released by ->kill_hook() method
425 of the item plugin */
427 result = __cryptcompress2unixfile(file, inode);
428 if (result)
429 goto out;
430 /* At this point file is managed by unix file plugin */
432 uf_info = unix_file_inode_data(inode);
434 assert("edward-1518",
435 ergo(jprivate(cont->pages[0]),
436 !jnode_is_cluster_page(jprivate(cont->pages[0]))));
437 for(i = 0; i < cont->nr_pages; i++) {
438 assert("edward-1519", cont->pages[i]);
439 assert("edward-1520", PageUptodate(cont->pages[i]));
441 result = find_or_create_extent(cont->pages[i]);
442 if (result)
443 break;
445 if (unlikely(result))
446 goto out;
447 uf_info->container = UF_CONTAINER_EXTENTS;
448 result = reiser4_update_sd(inode);
449 out:
450 all_grabbed2free();
451 return result;
454 #define convert_file_plugin cryptcompress2unixfile
457 * This is called by ->write() method of a cryptcompress file plugin.
458 * Make a decision about the most reasonable file plugin id to manage
459 * the file.
461 int write_dispatch_hook(struct file *file, struct inode *inode,
462 loff_t pos, struct cluster_handle *clust,
463 struct dispatch_context *cont)
465 int result;
466 if (!conversion_enabled(inode))
467 return 0;
468 result = check_dispatch_point(inode, pos, clust, cont);
469 if (result || cont->state != DISPATCH_POINT)
470 return result;
471 result = read_check_compressibility(inode, clust, cont);
472 if (result)
473 return result;
474 if (cont->state == DISPATCH_REMAINS_OLD) {
475 put_page_cluster(clust, inode, READ_OP);
476 return disable_conversion(inode);
478 assert("edward-1543", cont->state == DISPATCH_ASSIGNED_NEW);
480 * page cluster is grabbed and uptodate. It will be
481 * released with a pgset after plugin conversion is
482 * finished, see put_dispatch_context().
484 reiser4_unset_hint(clust->hint);
485 move_cluster_pgset(clust, &cont->pages, &cont->nr_pages);
486 return 0;
490 * This is called by ->setattr() method of cryptcompress file plugin.
492 int setattr_dispatch_hook(struct inode * inode)
494 if (conversion_enabled(inode))
495 return disable_conversion(inode);
496 return 0;
499 static inline void init_dispatch_context(struct dispatch_context * cont)
501 memset(cont, 0, sizeof(*cont));
504 static inline void done_dispatch_context(struct dispatch_context * cont,
505 struct inode * inode)
507 if (cont->pages) {
508 __put_page_cluster(0, cont->nr_pages, cont->pages, inode);
509 kfree(cont->pages);
513 * Here are wrappers with "protection", aka Reiser4 "careful" methods.
514 * They are used by vfs (as methods of file_ops, inode_ops or as_ops),
515 * which is not aware of plugin conversion performed by Reiser4.
519 * Wrappers with active protection for:
521 * ->write();
525 * ->write() file operation supplied to VFS.
526 * Write a file in 3 steps (some of them can be optional).
528 ssize_t reiser4_write_careful(struct file *file, const char __user *buf,
529 size_t count, loff_t *off)
531 int result;
532 reiser4_context *ctx;
533 ssize_t written_old = 0; /* bytes written with initial plugin */
534 ssize_t written_new = 0; /* bytes written with new plugin */
535 struct dispatch_context cont;
536 struct inode * inode = file->f_dentry->d_inode;
538 ctx = reiser4_init_context(inode->i_sb);
539 if (IS_ERR(ctx))
540 return PTR_ERR(ctx);
541 init_dispatch_context(&cont);
542 mutex_lock(&inode->i_mutex);
544 * First step.
545 * Start write with initial file plugin.
546 * Keep a plugin schedule status at @cont (if any).
548 written_old = inode_file_plugin(inode)->write(file,
549 buf,
550 count,
551 off,
552 &cont);
553 if (cont.state != DISPATCH_ASSIGNED_NEW || written_old < 0)
554 goto exit;
556 * Second step.
557 * New file plugin has been scheduled.
558 * Perform conversion to the new plugin.
560 down_read(&reiser4_inode_data(inode)->conv_sem);
561 result = convert_file_plugin(file, inode, &cont);
562 up_read(&reiser4_inode_data(inode)->conv_sem);
563 if (result) {
564 warning("edward-1544",
565 "Inode %llu: file plugin conversion failed (%d)",
566 (unsigned long long)get_inode_oid(inode),
567 result);
568 context_set_commit_async(ctx);
569 goto exit;
571 reiser4_txn_restart(ctx);
573 * Third step:
574 * Finish write with the new file plugin.
576 assert("edward-1536",
577 inode_file_plugin(inode) ==
578 file_plugin_by_id(UNIX_FILE_PLUGIN_ID));
580 written_new = inode_file_plugin(inode)->write(file,
581 buf + written_old,
582 count - written_old,
583 off,
584 NULL);
585 exit:
586 mutex_unlock(&inode->i_mutex);
587 done_dispatch_context(&cont, inode);
588 reiser4_exit_context(ctx);
590 return written_old + (written_new < 0 ? 0 : written_new);
593 /* Wrappers with passive protection for:
595 * ->open();
596 * ->read();
597 * ->ioctl();
598 * ->mmap();
599 * ->release();
600 * ->bmap().
603 int reiser4_open_careful(struct inode *inode, struct file *file)
605 return PROT_PASSIVE(int, open, (inode, file));
608 ssize_t reiser4_read_careful(struct file * file, char __user * buf,
609 size_t size, loff_t * off)
611 struct inode * inode = file->f_dentry->d_inode;
612 return PROT_PASSIVE(ssize_t, read, (file, buf, size, off));
615 int reiser4_ioctl_careful(struct inode *inode, struct file *filp,
616 unsigned int cmd, unsigned long arg)
618 return PROT_PASSIVE(int, ioctl, (inode, filp, cmd, arg));
621 int reiser4_mmap_careful(struct file *file, struct vm_area_struct *vma)
623 struct inode *inode = file->f_dentry->d_inode;
624 return PROT_PASSIVE(int, mmap, (file, vma));
627 int reiser4_release_careful(struct inode *inode, struct file *file)
629 return PROT_PASSIVE(int, release, (inode, file));
632 sector_t reiser4_bmap_careful(struct address_space * mapping, sector_t lblock)
634 struct inode *inode = mapping->host;
635 return PROT_PASSIVE(sector_t, bmap, (mapping, lblock));
638 int reiser4_write_begin_careful(struct file *file,
639 struct address_space *mapping,
640 loff_t pos,
641 unsigned len,
642 unsigned flags,
643 struct page **pagep,
644 void **fsdata)
646 int ret = 0;
647 unsigned start, end;
648 struct page *page;
649 pgoff_t index;
650 reiser4_context *ctx;
651 struct inode * inode = file->f_dentry->d_inode;
654 * reiser4_write_end() can not cope with
655 * short writes for now
657 BUG_ON(!(flags & AOP_FLAG_UNINTERRUPTIBLE));
659 index = pos >> PAGE_CACHE_SHIFT;
660 start = pos & (PAGE_CACHE_SIZE - 1);
661 end = start + len;
663 page = grab_cache_page_write_begin(mapping, index,
664 flags & AOP_FLAG_NOFS);
665 *pagep = page;
666 if (!page)
667 return -ENOMEM;
669 ctx = reiser4_init_context(file->f_dentry->d_inode->i_sb);
670 if (IS_ERR(ctx)) {
671 ret = PTR_ERR(ctx);
672 goto out;
674 ret = PROT_PASSIVE(int, write_begin, (file, page, start, end));
676 /* don't commit transaction under inode semaphore */
677 context_set_commit_async(ctx);
678 reiser4_exit_context(ctx);
679 out:
680 if (unlikely(ret)) {
681 unlock_page(page);
682 page_cache_release(page);
684 return ret;
687 int reiser4_write_end_careful(struct file *file,
688 struct address_space *mapping,
689 loff_t pos,
690 unsigned len,
691 unsigned copied,
692 struct page *page,
693 void *fsdata)
695 int ret;
696 reiser4_context *ctx;
697 unsigned start, end;
698 struct inode *inode = page->mapping->host;
700 assert("umka-3101", file != NULL);
701 assert("umka-3102", page != NULL);
702 assert("umka-3093", PageLocked(page));
704 start = pos & (PAGE_CACHE_SIZE - 1);
705 end = start + len;
707 flush_dcache_page(page);
708 SetPageUptodate(page);
710 ctx = reiser4_init_context(page->mapping->host->i_sb);
711 if (IS_ERR(ctx)){
712 unlock_page(page);
713 ret = PTR_ERR(ctx);
714 goto out;
716 ret = PROT_PASSIVE(int, write_end, (file, page, start, end));
718 /* don't commit transaction under inode semaphore */
719 context_set_commit_async(ctx);
720 reiser4_exit_context(ctx);
721 out:
722 page_cache_release(page);
723 if (!ret)
724 ret = copied;
725 return ret;
729 * Wrappers without protection for:
731 * ->setattr()
733 int reiser4_setattr(struct dentry *dentry, struct iattr *attr)
735 return inode_file_plugin(dentry->d_inode)->setattr(dentry, attr);
739 Local variables:
740 c-indentation-style: "K&R"
741 mode-name: "LC"
742 c-basic-offset: 8
743 tab-width: 8
744 fill-column: 80
745 scroll-step: 1
746 End: