1 /* Copyright 2001, 2002, 2003 by Hans Reiser, licensing governed by
4 * Written by Edward Shishkin.
6 * Implementations of inode/file/address_space operations
7 * specific for cryptcompress file plugin which manages
8 * regular files built of compressed and(or) encrypted bodies.
9 * See http://dev.namesys.com/CryptcompressPlugin for details.
12 #include "../../inode.h"
13 #include "../cluster.h"
14 #include "../object.h"
15 #include "../../tree_walk.h"
16 #include "cryptcompress.h"
18 #include <linux/pagevec.h>
19 #include <asm/uaccess.h>
20 #include <linux/swap.h>
21 #include <linux/writeback.h>
22 #include <linux/random.h>
23 #include <linux/scatterlist.h>
26 Managing primary and secondary caches by Reiser4
27 cryptcompress file plugin. Synchronization scheme.
31 +------------------->| tfm stream |
32 | | (compressed data)|
33 flush | +------------------+
36 --+ writepages() | | +-***-+ reiser4 +---+
37 | | +--+ | *** | storage tree | |
38 | | | +-***-+ (primary cache)| |
39 u | write() (secondary| cache) V / | \ | |
40 s | ----> +----+ +----+ +----+ +----+ +-***** ******* **----+ ----> | d |
41 e | | | |page cluster | | | **disk cluster** | | i |
42 r | <---- +----+ +----+ +----+ +----+ +-***** **********----+ <---- | s |
44 | | (->)longterm lock| | page_io()| |
46 --+ readpages() | | +---+
48 | +------------------+
49 +--------------------| tfm stream |
54 /* get cryptcompress specific portion of inode */
55 struct cryptcompress_info
*cryptcompress_inode_data(const struct inode
*inode
)
57 return &reiser4_inode_data(inode
)->file_plugin_data
.cryptcompress_info
;
60 /* plugin->u.file.init_inode_data */
61 void init_inode_data_cryptcompress(struct inode
*inode
,
62 reiser4_object_create_data
* crd
,
65 struct cryptcompress_info
*data
;
67 data
= cryptcompress_inode_data(inode
);
68 assert("edward-685", data
!= NULL
);
70 memset(data
, 0, sizeof(*data
));
72 mutex_init(&data
->checkin_mutex
);
73 data
->trunc_index
= ULONG_MAX
;
74 turn_on_compression(data
);
75 set_lattice_factor(data
, MIN_LATTICE_FACTOR
);
76 init_inode_ordering(inode
, crd
, create
);
79 /* The following is a part of reiser4 cipher key manager
80 which is called when opening/creating a cryptcompress file */
82 /* get/set cipher key info */
83 struct reiser4_crypto_info
* inode_crypto_info (struct inode
* inode
)
85 assert("edward-90", inode
!= NULL
);
86 assert("edward-91", reiser4_inode_data(inode
) != NULL
);
87 return cryptcompress_inode_data(inode
)->crypt
;
90 static void set_inode_crypto_info (struct inode
* inode
,
91 struct reiser4_crypto_info
* info
)
93 cryptcompress_inode_data(inode
)->crypt
= info
;
96 /* allocate a cipher key info */
97 struct reiser4_crypto_info
* reiser4_alloc_crypto_info (struct inode
* inode
)
99 struct reiser4_crypto_info
*info
;
102 info
= kzalloc(sizeof(*info
), reiser4_ctx_gfp_mask_get());
104 return ERR_PTR(-ENOMEM
);
106 fipsize
= inode_digest_plugin(inode
)->fipsize
;
107 info
->keyid
= kmalloc(fipsize
, reiser4_ctx_gfp_mask_get());
110 return ERR_PTR(-ENOMEM
);
117 /* allocate/free low-level info for cipher and digest
119 static int alloc_crypto_tfms(struct reiser4_crypto_info
* info
)
121 struct crypto_blkcipher
* ctfm
= NULL
;
122 struct crypto_hash
* dtfm
= NULL
;
123 cipher_plugin
* cplug
= inode_cipher_plugin(info
->host
);
124 digest_plugin
* dplug
= inode_digest_plugin(info
->host
);
127 ctfm
= cplug
->alloc();
129 warning("edward-1364",
130 "Can not allocate info for %s\n",
132 return RETERR(PTR_ERR(ctfm
));
135 info_set_cipher(info
, ctfm
);
137 dtfm
= dplug
->alloc();
139 warning("edward-1365",
140 "Can not allocate info for %s\n",
142 goto unhappy_with_digest
;
145 info_set_digest(info
, dtfm
);
150 info_set_cipher(info
, NULL
);
152 return RETERR(PTR_ERR(dtfm
));
157 free_crypto_tfms(struct reiser4_crypto_info
* info
)
159 assert("edward-1366", info
!= NULL
);
160 if (!info_get_cipher(info
)) {
161 assert("edward-1601", !info_get_digest(info
));
164 inode_cipher_plugin(info
->host
)->free(info_get_cipher(info
));
165 info_set_cipher(info
, NULL
);
166 inode_digest_plugin(info
->host
)->free(info_get_digest(info
));
167 info_set_digest(info
, NULL
);
172 /* create a key fingerprint for disk stat-data */
173 static int create_keyid (struct reiser4_crypto_info
* info
,
174 struct reiser4_crypto_data
* data
)
180 struct hash_desc ddesc
;
181 struct blkcipher_desc cdesc
;
182 struct scatterlist sg
;
184 assert("edward-1367", info
!= NULL
);
185 assert("edward-1368", info
->keyid
!= NULL
);
187 ddesc
.tfm
= info_get_digest(info
);
189 cdesc
.tfm
= info_get_cipher(info
);
192 dmem
= kmalloc((size_t)crypto_hash_digestsize(ddesc
.tfm
),
193 reiser4_ctx_gfp_mask_get());
197 blk
= crypto_blkcipher_blocksize(cdesc
.tfm
);
199 pad
= data
->keyid_size
% blk
;
200 pad
= (pad
? blk
- pad
: 0);
202 cmem
= kmalloc((size_t)data
->keyid_size
+ pad
,
203 reiser4_ctx_gfp_mask_get());
206 memcpy(cmem
, data
->keyid
, data
->keyid_size
);
207 memset(cmem
+ data
->keyid_size
, 0, pad
);
209 sg_init_one(&sg
, cmem
, data
->keyid_size
+ pad
);
211 ret
= crypto_blkcipher_encrypt(&cdesc
, &sg
, &sg
,
212 data
->keyid_size
+ pad
);
214 warning("edward-1369",
215 "encryption failed flags=%x\n", cdesc
.flags
);
218 ret
= crypto_hash_digest(&ddesc
, &sg
, sg
.length
, dmem
);
220 warning("edward-1602",
221 "digest failed flags=%x\n", ddesc
.flags
);
224 memcpy(info
->keyid
, dmem
, inode_digest_plugin(info
->host
)->fipsize
);
234 static void destroy_keyid(struct reiser4_crypto_info
* info
)
236 assert("edward-1370", info
!= NULL
);
237 assert("edward-1371", info
->keyid
!= NULL
);
242 static void __free_crypto_info (struct inode
* inode
)
244 struct reiser4_crypto_info
* info
= inode_crypto_info(inode
);
245 assert("edward-1372", info
!= NULL
);
247 free_crypto_tfms(info
);
253 static void instantiate_crypto_info(struct reiser4_crypto_info
* info
)
255 assert("edward-1373", info
!= NULL
);
256 assert("edward-1374", info
->inst
== 0);
261 static void uninstantiate_crypto_info(struct reiser4_crypto_info
* info
)
263 assert("edward-1375", info
!= NULL
);
268 static int is_crypto_info_instantiated(struct reiser4_crypto_info
* info
)
273 static int inode_has_cipher_key(struct inode
* inode
)
275 assert("edward-1376", inode
!= NULL
);
276 return inode_crypto_info(inode
) &&
277 is_crypto_info_instantiated(inode_crypto_info(inode
));
281 static void free_crypto_info (struct inode
* inode
)
283 uninstantiate_crypto_info(inode_crypto_info(inode
));
284 __free_crypto_info(inode
);
287 static int need_cipher(struct inode
* inode
)
289 return inode_cipher_plugin(inode
) !=
290 cipher_plugin_by_id(NONE_CIPHER_ID
);
293 /* Parse @data which contains a (uninstantiated) cipher key imported
294 from user space, create a low-level cipher info and attach it to
295 the @object. If success, then info contains an instantiated key */
297 struct reiser4_crypto_info
* create_crypto_info(struct inode
* object
,
298 struct reiser4_crypto_data
* data
)
301 struct reiser4_crypto_info
* info
;
303 assert("edward-1377", data
!= NULL
);
304 assert("edward-1378", need_cipher(object
));
306 if (inode_file_plugin(object
) !=
307 file_plugin_by_id(DIRECTORY_FILE_PLUGIN_ID
))
308 return ERR_PTR(-EINVAL
);
310 info
= reiser4_alloc_crypto_info(object
);
313 ret
= alloc_crypto_tfms(info
);
316 /* instantiating a key */
317 ret
= crypto_blkcipher_setkey(info_get_cipher(info
),
321 warning("edward-1379",
322 "setkey failed flags=%x",
323 crypto_blkcipher_get_flags(info_get_cipher(info
)));
326 info
->keysize
= data
->keysize
;
327 ret
= create_keyid(info
, data
);
330 instantiate_crypto_info(info
);
333 __free_crypto_info(object
);
338 /* increment/decrement a load counter when
339 attaching/detaching the crypto-stat to any object */
340 static void load_crypto_info(struct reiser4_crypto_info
* info
)
342 assert("edward-1380", info
!= NULL
);
343 inc_keyload_count(info
);
346 static void unload_crypto_info(struct inode
* inode
)
348 struct reiser4_crypto_info
* info
= inode_crypto_info(inode
);
349 assert("edward-1381", info
->keyload_count
> 0);
351 dec_keyload_count(inode_crypto_info(inode
));
352 if (info
->keyload_count
== 0)
354 free_crypto_info(inode
);
357 /* attach/detach an existing crypto-stat */
358 void reiser4_attach_crypto_info(struct inode
* inode
,
359 struct reiser4_crypto_info
* info
)
361 assert("edward-1382", inode
!= NULL
);
362 assert("edward-1383", info
!= NULL
);
363 assert("edward-1384", inode_crypto_info(inode
) == NULL
);
365 set_inode_crypto_info(inode
, info
);
366 load_crypto_info(info
);
369 /* returns true, if crypto stat can be attached to the @host */
371 static int host_allows_crypto_info(struct inode
* host
)
374 file_plugin
* fplug
= inode_file_plugin(host
);
376 switch (fplug
->h
.id
) {
377 case CRYPTCOMPRESS_FILE_PLUGIN_ID
:
385 #endif /* REISER4_DEBUG */
387 static void reiser4_detach_crypto_info(struct inode
* inode
)
389 assert("edward-1385", inode
!= NULL
);
390 assert("edward-1386", host_allows_crypto_info(inode
));
392 if (inode_crypto_info(inode
))
393 unload_crypto_info(inode
);
394 set_inode_crypto_info(inode
, NULL
);
399 /* compare fingerprints of @child and @parent */
400 static int keyid_eq(struct reiser4_crypto_info
* child
,
401 struct reiser4_crypto_info
* parent
)
403 return !memcmp(child
->keyid
,
405 info_digest_plugin(parent
)->fipsize
);
408 /* check if a crypto-stat (which is bound to @parent) can be inherited */
409 int can_inherit_crypto_cryptcompress(struct inode
*child
, struct inode
*parent
)
411 if (!need_cipher(child
))
413 /* the child is created */
414 if (!inode_crypto_info(child
))
416 /* the child is looked up */
417 if (!inode_crypto_info(parent
))
419 return (inode_cipher_plugin(child
) == inode_cipher_plugin(parent
) &&
420 inode_digest_plugin(child
) == inode_digest_plugin(parent
) &&
421 inode_crypto_info(child
)->keysize
==
422 inode_crypto_info(parent
)->keysize
&&
423 keyid_eq(inode_crypto_info(child
), inode_crypto_info(parent
)));
427 /* helper functions for ->create() method of the cryptcompress plugin */
428 static int inode_set_crypto(struct inode
* object
)
430 reiser4_inode
* info
;
431 if (!inode_crypto_info(object
)) {
432 if (need_cipher(object
))
433 return RETERR(-EINVAL
);
434 /* the file is not to be encrypted */
437 info
= reiser4_inode_data(object
);
438 info
->extmask
|= (1 << CRYPTO_STAT
);
442 static int inode_init_compression(struct inode
* object
)
445 assert("edward-1461", object
!= NULL
);
446 if (inode_compression_plugin(object
)->init
)
447 result
= inode_compression_plugin(object
)->init();
451 static int inode_check_cluster(struct inode
* object
)
453 assert("edward-696", object
!= NULL
);
455 if (unlikely(inode_cluster_size(object
) < PAGE_CACHE_SIZE
)) {
456 warning("edward-1320", "Can not support '%s' "
457 "logical clusters (less then page size)",
458 inode_cluster_plugin(object
)->h
.label
);
459 return RETERR(-EINVAL
);
461 if (unlikely(inode_cluster_shift(object
)) >= BITS_PER_BYTE
*sizeof(int)){
462 warning("edward-1463", "Can not support '%s' "
463 "logical clusters (too big for transform)",
464 inode_cluster_plugin(object
)->h
.label
);
465 return RETERR(-EINVAL
);
470 /* plugin->destroy_inode() */
471 void destroy_inode_cryptcompress(struct inode
* inode
)
473 assert("edward-1464", INODE_PGCOUNT(inode
) == 0);
474 reiser4_detach_crypto_info(inode
);
478 /* plugin->create_object():
480 . attach crypto info if specified
481 . attach compression info if specified
482 . attach cluster info
484 int create_object_cryptcompress(struct inode
*object
, struct inode
*parent
,
485 reiser4_object_create_data
* data
)
490 assert("edward-23", object
!= NULL
);
491 assert("edward-24", parent
!= NULL
);
492 assert("edward-30", data
!= NULL
);
493 assert("edward-26", reiser4_inode_get_flag(object
, REISER4_NO_SD
));
494 assert("edward-27", data
->id
== CRYPTCOMPRESS_FILE_PLUGIN_ID
);
496 info
= reiser4_inode_data(object
);
498 assert("edward-29", info
!= NULL
);
501 info
->plugin_mask
|= (1 << PSET_FILE
);
504 result
= inode_set_crypto(object
);
507 /* set compression */
508 result
= inode_init_compression(object
);
512 result
= inode_check_cluster(object
);
516 /* save everything in disk stat-data */
517 result
= write_sd_by_inode_common(object
);
521 reiser4_detach_crypto_info(object
);
526 int open_cryptcompress(struct inode
* inode
, struct file
* file
)
531 /* returns a blocksize, the attribute of a cipher algorithm */
533 cipher_blocksize(struct inode
* inode
)
535 assert("edward-758", need_cipher(inode
));
536 assert("edward-1400", inode_crypto_info(inode
) != NULL
);
537 return crypto_blkcipher_blocksize
538 (info_get_cipher(inode_crypto_info(inode
)));
541 /* returns offset translated by scale factor of the crypto-algorithm */
542 static loff_t
inode_scaled_offset (struct inode
* inode
,
543 const loff_t src_off
/* input offset */)
545 assert("edward-97", inode
!= NULL
);
547 if (!need_cipher(inode
) ||
548 src_off
== get_key_offset(reiser4_min_key()) ||
549 src_off
== get_key_offset(reiser4_max_key()))
552 return inode_cipher_plugin(inode
)->scale(inode
,
553 cipher_blocksize(inode
),
557 /* returns disk cluster size */
558 size_t inode_scaled_cluster_size(struct inode
* inode
)
560 assert("edward-110", inode
!= NULL
);
562 return inode_scaled_offset(inode
, inode_cluster_size(inode
));
565 /* set number of cluster pages */
566 static void set_cluster_nrpages(struct cluster_handle
* clust
,
569 struct reiser4_slide
* win
;
571 assert("edward-180", clust
!= NULL
);
572 assert("edward-1040", inode
!= NULL
);
574 clust
->old_nrpages
= size_in_pages(lbytes(clust
->index
, inode
));
577 clust
->nr_pages
= size_in_pages(lbytes(clust
->index
, inode
));
580 assert("edward-1176", clust
->op
!= LC_INVAL
);
581 assert("edward-1064", win
->off
+ win
->count
+ win
->delta
!= 0);
583 if (win
->stat
== HOLE_WINDOW
&&
584 win
->off
== 0 && win
->count
== inode_cluster_size(inode
)) {
585 /* special case: writing a "fake" logical cluster */
589 clust
->nr_pages
= size_in_pages(max(win
->off
+ win
->count
+ win
->delta
,
590 lbytes(clust
->index
, inode
)));
594 /* plugin->key_by_inode()
595 build key of a disk cluster */
596 int key_by_inode_cryptcompress(struct inode
*inode
, loff_t off
,
599 assert("edward-64", inode
!= 0);
601 if (likely(off
!= get_key_offset(reiser4_max_key())))
602 off
= off_to_clust_to_off(off
, inode
);
603 if (inode_crypto_info(inode
))
604 off
= inode_scaled_offset(inode
, off
);
606 key_by_inode_and_offset_common(inode
, 0, key
);
607 set_key_offset(key
, (__u64
)off
);
611 /* plugin->flow_by_inode() */
612 /* flow is used to read/write disk clusters */
613 int flow_by_inode_cryptcompress(struct inode
*inode
, const char __user
* buf
,
614 int user
, /* 1: @buf is of user space,
616 loff_t size
, /* @buf size */
617 loff_t off
, /* offset to start io from */
618 rw_op op
, /* READ or WRITE */
619 flow_t
* f
/* resulting flow */)
621 assert("edward-436", f
!= NULL
);
622 assert("edward-149", inode
!= NULL
);
623 assert("edward-150", inode_file_plugin(inode
) != NULL
);
624 assert("edward-1465", user
== 0); /* we use flow to read/write
625 disk clusters located in
628 memcpy(&f
->data
, &buf
, sizeof(buf
));
632 return key_by_inode_cryptcompress(inode
, off
, &f
->key
);
636 cryptcompress_hint_validate(hint_t
* hint
, const reiser4_key
* key
,
637 znode_lock_mode lock_mode
)
641 assert("edward-704", hint
!= NULL
);
642 assert("edward-1089", !hint_is_valid(hint
));
643 assert("edward-706", hint
->lh
.owner
== NULL
);
645 coord
= &hint
->ext_coord
.coord
;
647 if (!hint
|| !hint_is_set(hint
) || hint
->mode
!= lock_mode
)
648 /* hint either not set or set by different operation */
649 return RETERR(-E_REPEAT
);
651 if (get_key_offset(key
) != hint
->offset
)
652 /* hint is set for different key */
653 return RETERR(-E_REPEAT
);
655 assert("edward-707", reiser4_schedulable());
657 return reiser4_seal_validate(&hint
->seal
, &hint
->ext_coord
.coord
,
658 key
, &hint
->lh
, lock_mode
,
662 /* reserve disk space when writing a logical cluster */
663 static int reserve4cluster(struct inode
*inode
, struct cluster_handle
*clust
)
667 assert("edward-965", reiser4_schedulable());
668 assert("edward-439", inode
!= NULL
);
669 assert("edward-440", clust
!= NULL
);
670 assert("edward-441", clust
->pages
!= NULL
);
672 if (clust
->nr_pages
== 0) {
673 assert("edward-1152", clust
->win
!= NULL
);
674 assert("edward-1153", clust
->win
->stat
== HOLE_WINDOW
);
675 /* don't reserve disk space for fake logical cluster */
678 assert("edward-442", jprivate(clust
->pages
[0]) != NULL
);
680 result
= reiser4_grab_space_force(estimate_insert_cluster(inode
) +
681 estimate_update_cluster(inode
),
686 grabbed2cluster_reserved(estimate_insert_cluster(inode
) +
687 estimate_update_cluster(inode
));
689 clust
->reserved_prepped
= estimate_update_cluster(inode
);
690 clust
->reserved_unprepped
= estimate_insert_cluster(inode
);
692 /* there can be space grabbed by txnmgr_force_commit_all */
696 /* free reserved disk space if writing a logical cluster fails */
697 static void free_reserved4cluster(struct inode
*inode
,
698 struct cluster_handle
*ch
, int count
)
700 assert("edward-967", ch
->reserved
== 1);
702 cluster_reserved2free(count
);
706 /* The core search procedure of the cryptcompress plugin.
707 If returned value is not cbk_errored, then current znode is locked */
708 static int find_cluster_item(hint_t
* hint
,
709 const reiser4_key
* key
, /* key of the item we are
711 znode_lock_mode lock_mode
/* which lock */ ,
712 ra_info_t
* ra_info
, lookup_bias bias
, __u32 flags
)
717 coord_t
*coord
= &hint
->ext_coord
.coord
;
718 coord_t orig
= *coord
;
720 assert("edward-152", hint
!= NULL
);
722 if (!hint_is_valid(hint
)) {
723 result
= cryptcompress_hint_validate(hint
, key
, lock_mode
);
724 if (result
== -E_REPEAT
)
727 assert("edward-1216", 0);
730 hint_set_valid(hint
);
732 assert("edward-709", znode_is_any_locked(coord
->node
));
734 /* In-place lookup is going here, it means we just need to
735 check if next item of the @coord match to the @keyhint) */
737 if (equal_to_rdk(coord
->node
, key
)) {
738 result
= goto_right_neighbor(coord
, &hint
->lh
);
739 if (result
== -E_NO_NEIGHBOR
) {
740 assert("edward-1217", 0);
745 assert("edward-1218", equal_to_ldk(coord
->node
, key
));
750 coord
->between
= AT_UNIT
;
752 result
= zload(coord
->node
);
755 assert("edward-1219", !node_is_empty(coord
->node
));
757 if (!coord_is_existing_item(coord
)) {
761 item_key_by_coord(coord
, &ikey
);
763 if (!keyeq(key
, &ikey
))
765 /* Ok, item is found, update node counts */
767 dclust_inc_extension_ncount(hint
);
768 return CBK_COORD_FOUND
;
771 assert("edward-1220", coord
->item_pos
> 0);
775 ON_DEBUG(coord_update_v(coord
));
776 return CBK_COORD_NOTFOUND
;
779 assert("edward-713", hint
->lh
.owner
== NULL
);
780 assert("edward-714", reiser4_schedulable());
782 reiser4_unset_hint(hint
);
783 dclust_init_extension(hint
);
784 coord_init_zero(coord
);
785 result
= coord_by_key(current_tree
, key
, coord
, &hint
->lh
,
786 lock_mode
, bias
, LEAF_LEVEL
, LEAF_LEVEL
,
787 CBK_UNIQUE
| flags
, ra_info
);
788 if (cbk_errored(result
))
790 if(result
== CBK_COORD_FOUND
)
791 dclust_inc_extension_ncount(hint
);
792 hint_set_valid(hint
);
796 /* This function is called by deflate[inflate] manager when
797 creating a transformed/plain stream to check if we should
798 create/cut some overhead. If this returns true, then @oh
799 contains the size of this overhead.
801 static int need_cut_or_align(struct inode
* inode
,
802 struct cluster_handle
* ch
, rw_op rw
, int * oh
)
804 struct tfm_cluster
* tc
= &ch
->tc
;
806 case WRITE_OP
: /* estimate align */
807 *oh
= tc
->len
% cipher_blocksize(inode
);
811 case READ_OP
: /* estimate cut */
812 *oh
= *(tfm_output_data(ch
) + tc
->len
- 1);
815 impossible("edward-1401", "bad option");
817 return (tc
->len
!= tc
->lsize
);
820 /* create/cut an overhead of transformed/plain stream */
821 static void align_or_cut_overhead(struct inode
* inode
,
822 struct cluster_handle
* ch
, rw_op rw
)
825 cipher_plugin
* cplug
= inode_cipher_plugin(inode
);
827 assert("edward-1402", need_cipher(inode
));
829 if (!need_cut_or_align(inode
, ch
, rw
, &oh
))
832 case WRITE_OP
: /* do align */
834 cplug
->align_stream(tfm_input_data(ch
) +
835 ch
->tc
.len
, ch
->tc
.len
,
836 cipher_blocksize(inode
));
837 *(tfm_input_data(ch
) + ch
->tc
.len
- 1) =
838 cipher_blocksize(inode
) - oh
;
840 case READ_OP
: /* do cut */
841 assert("edward-1403", oh
<= cipher_blocksize(inode
));
845 impossible("edward-1404", "bad option");
850 static unsigned max_cipher_overhead(struct inode
* inode
)
852 if (!need_cipher(inode
) || !inode_cipher_plugin(inode
)->align_stream
)
854 return cipher_blocksize(inode
);
857 static int deflate_overhead(struct inode
*inode
)
859 return (inode_compression_plugin(inode
)->
860 checksum
? DC_CHECKSUM_SIZE
: 0);
863 static unsigned deflate_overrun(struct inode
* inode
, int ilen
)
865 return coa_overrun(inode_compression_plugin(inode
), ilen
);
868 /* Estimating compressibility of a logical cluster by various
869 policies represented by compression mode plugin.
870 If this returns false, then compressor won't be called for
871 the cluster of index @index.
873 static int should_compress(struct tfm_cluster
* tc
, cloff_t index
,
876 compression_plugin
*cplug
= inode_compression_plugin(inode
);
877 compression_mode_plugin
*mplug
= inode_compression_mode_plugin(inode
);
879 assert("edward-1321", tc
->len
!= 0);
880 assert("edward-1322", cplug
!= NULL
);
881 assert("edward-1323", mplug
!= NULL
);
883 return /* estimate by size */
884 (cplug
->min_size_deflate
?
885 tc
->len
>= cplug
->min_size_deflate() :
887 /* estimate by compression mode plugin */
888 (mplug
->should_deflate
?
889 mplug
->should_deflate(inode
, index
) :
893 /* Evaluating results of compression transform.
894 Returns true, if we need to accept this results */
895 static int save_compressed(int size_before
, int size_after
, struct inode
*inode
)
897 return (size_after
+ deflate_overhead(inode
) +
898 max_cipher_overhead(inode
) < size_before
);
901 /* Guess result of the evaluation above */
902 static int need_inflate(struct cluster_handle
* ch
, struct inode
* inode
,
903 int encrypted
/* is cluster encrypted */ )
905 struct tfm_cluster
* tc
= &ch
->tc
;
907 assert("edward-142", tc
!= 0);
908 assert("edward-143", inode
!= NULL
);
912 inode_scaled_offset(inode
, tc
->lsize
) :
916 /* If results of compression were accepted, then we add
917 a checksum to catch possible disk cluster corruption.
918 The following is a format of the data stored in disk clusters:
920 data This is (transformed) logical cluster.
921 cipher_overhead This is created by ->align() method
922 of cipher plugin. May be absent.
923 checksum (4) This is created by ->checksum method
924 of compression plugin to check
925 integrity. May be absent.
927 Crypto overhead format:
930 control_byte (1) contains aligned overhead size:
931 1 <= overhead <= cipher_blksize
933 /* Append a checksum at the end of a transformed stream */
934 static void dc_set_checksum(compression_plugin
* cplug
, struct tfm_cluster
* tc
)
938 assert("edward-1309", tc
!= NULL
);
939 assert("edward-1310", tc
->len
> 0);
940 assert("edward-1311", cplug
->checksum
!= NULL
);
942 checksum
= cplug
->checksum(tfm_stream_data(tc
, OUTPUT_STREAM
), tc
->len
);
943 put_unaligned(cpu_to_le32(checksum
),
944 (d32
*)(tfm_stream_data(tc
, OUTPUT_STREAM
) + tc
->len
));
945 tc
->len
+= (int)DC_CHECKSUM_SIZE
;
948 /* Check a disk cluster checksum.
949 Returns 0 if checksum is correct, otherwise returns 1 */
950 static int dc_check_checksum(compression_plugin
* cplug
, struct tfm_cluster
* tc
)
952 assert("edward-1312", tc
!= NULL
);
953 assert("edward-1313", tc
->len
> (int)DC_CHECKSUM_SIZE
);
954 assert("edward-1314", cplug
->checksum
!= NULL
);
956 if (cplug
->checksum(tfm_stream_data(tc
, INPUT_STREAM
),
957 tc
->len
- (int)DC_CHECKSUM_SIZE
) !=
958 le32_to_cpu(get_unaligned((d32
*)
959 (tfm_stream_data(tc
, INPUT_STREAM
)
960 + tc
->len
- (int)DC_CHECKSUM_SIZE
)))) {
961 warning("edward-156",
962 "Bad disk cluster checksum %d, (should be %d) Fsck?\n",
964 (get_unaligned((d32
*)
965 (tfm_stream_data(tc
, INPUT_STREAM
) +
966 tc
->len
- (int)DC_CHECKSUM_SIZE
))),
968 (tfm_stream_data(tc
, INPUT_STREAM
),
969 tc
->len
- (int)DC_CHECKSUM_SIZE
));
972 tc
->len
-= (int)DC_CHECKSUM_SIZE
;
976 /* get input/output stream for some transform action */
977 int grab_tfm_stream(struct inode
* inode
, struct tfm_cluster
* tc
,
980 size_t size
= inode_scaled_cluster_size(inode
);
982 assert("edward-901", tc
!= NULL
);
983 assert("edward-1027", inode_compression_plugin(inode
) != NULL
);
985 if (cluster_get_tfm_act(tc
) == TFMA_WRITE
)
986 size
+= deflate_overrun(inode
, inode_cluster_size(inode
));
988 if (!get_tfm_stream(tc
, id
) && id
== INPUT_STREAM
)
989 alternate_streams(tc
);
990 if (!get_tfm_stream(tc
, id
))
991 return alloc_tfm_stream(tc
, size
, id
);
993 assert("edward-902", tfm_stream_is_set(tc
, id
));
995 if (tfm_stream_size(tc
, id
) < size
)
996 return realloc_tfm_stream(tc
, size
, id
);
1000 /* Common deflate manager */
1001 int reiser4_deflate_cluster(struct cluster_handle
* clust
, struct inode
* inode
)
1006 struct tfm_cluster
* tc
= &clust
->tc
;
1007 compression_plugin
* coplug
;
1009 assert("edward-401", inode
!= NULL
);
1010 assert("edward-903", tfm_stream_is_set(tc
, INPUT_STREAM
));
1011 assert("edward-1348", cluster_get_tfm_act(tc
) == TFMA_WRITE
);
1012 assert("edward-498", !tfm_cluster_is_uptodate(tc
));
1014 coplug
= inode_compression_plugin(inode
);
1015 if (should_compress(tc
, clust
->index
, inode
)) {
1016 /* try to compress, discard bad results */
1018 compression_mode_plugin
* mplug
=
1019 inode_compression_mode_plugin(inode
);
1020 assert("edward-602", coplug
!= NULL
);
1021 assert("edward-1423", coplug
->compress
!= NULL
);
1023 result
= grab_coa(tc
, coplug
);
1025 warning("edward-1424",
1026 "alloc_coa failed with ret=%d, skipped compression",
1030 result
= grab_tfm_stream(inode
, tc
, OUTPUT_STREAM
);
1032 warning("edward-1425",
1033 "alloc stream failed with ret=%d, skipped compression",
1037 dst_len
= tfm_stream_size(tc
, OUTPUT_STREAM
);
1038 coplug
->compress(get_coa(tc
, coplug
->h
.id
, tc
->act
),
1039 tfm_input_data(clust
), tc
->len
,
1040 tfm_output_data(clust
), &dst_len
);
1041 /* make sure we didn't overwrite extra bytes */
1042 assert("edward-603",
1043 dst_len
<= tfm_stream_size(tc
, OUTPUT_STREAM
));
1045 /* evaluate results of compression transform */
1046 if (save_compressed(tc
->len
, dst_len
, inode
)) {
1047 /* good result, accept */
1049 if (mplug
->accept_hook
!= NULL
) {
1050 result
= mplug
->accept_hook(inode
, clust
->index
);
1052 warning("edward-1426",
1053 "accept_hook failed with ret=%d",
1059 /* bad result, discard */
1061 if (cluster_is_complete(clust
, inode
))
1062 warning("edward-1496",
1063 "incompressible cluster %lu (inode %llu)",
1065 (unsigned long long)get_inode_oid(inode
));
1067 if (mplug
->discard_hook
!= NULL
&&
1068 cluster_is_complete(clust
, inode
)) {
1069 result
= mplug
->discard_hook(inode
,
1072 warning("edward-1427",
1073 "discard_hook failed with ret=%d",
1079 if (need_cipher(inode
)) {
1080 cipher_plugin
* ciplug
;
1081 struct blkcipher_desc desc
;
1082 struct scatterlist src
;
1083 struct scatterlist dst
;
1085 ciplug
= inode_cipher_plugin(inode
);
1086 desc
.tfm
= info_get_cipher(inode_crypto_info(inode
));
1089 alternate_streams(tc
);
1090 result
= grab_tfm_stream(inode
, tc
, OUTPUT_STREAM
);
1094 align_or_cut_overhead(inode
, clust
, WRITE_OP
);
1095 sg_init_one(&src
, tfm_input_data(clust
), tc
->len
);
1096 sg_init_one(&dst
, tfm_output_data(clust
), tc
->len
);
1098 result
= crypto_blkcipher_encrypt(&desc
, &dst
, &src
, tc
->len
);
1100 warning("edward-1405",
1101 "encryption failed flags=%x\n", desc
.flags
);
1106 if (compressed
&& coplug
->checksum
!= NULL
)
1107 dc_set_checksum(coplug
, tc
);
1108 if (!compressed
&& !encrypted
)
1109 alternate_streams(tc
);
1113 /* Common inflate manager. */
1114 int reiser4_inflate_cluster(struct cluster_handle
* clust
, struct inode
* inode
)
1117 int transformed
= 0;
1118 struct tfm_cluster
* tc
= &clust
->tc
;
1119 compression_plugin
* coplug
;
1121 assert("edward-905", inode
!= NULL
);
1122 assert("edward-1178", clust
->dstat
== PREP_DISK_CLUSTER
);
1123 assert("edward-906", tfm_stream_is_set(&clust
->tc
, INPUT_STREAM
));
1124 assert("edward-1349", tc
->act
== TFMA_READ
);
1125 assert("edward-907", !tfm_cluster_is_uptodate(tc
));
1127 /* Handle a checksum (if any) */
1128 coplug
= inode_compression_plugin(inode
);
1129 if (need_inflate(clust
, inode
, need_cipher(inode
)) &&
1130 coplug
->checksum
!= NULL
) {
1131 result
= dc_check_checksum(coplug
, tc
);
1132 if (unlikely(result
)) {
1133 warning("edward-1460",
1134 "Inode %llu: disk cluster %lu looks corrupted",
1135 (unsigned long long)get_inode_oid(inode
),
1137 return RETERR(-EIO
);
1140 if (need_cipher(inode
)) {
1141 cipher_plugin
* ciplug
;
1142 struct blkcipher_desc desc
;
1143 struct scatterlist src
;
1144 struct scatterlist dst
;
1146 ciplug
= inode_cipher_plugin(inode
);
1147 desc
.tfm
= info_get_cipher(inode_crypto_info(inode
));
1149 result
= grab_tfm_stream(inode
, tc
, OUTPUT_STREAM
);
1152 assert("edward-909", tfm_cluster_is_set(tc
));
1154 sg_init_one(&src
, tfm_input_data(clust
), tc
->len
);
1155 sg_init_one(&dst
, tfm_output_data(clust
), tc
->len
);
1157 result
= crypto_blkcipher_decrypt(&desc
, &dst
, &src
, tc
->len
);
1159 warning("edward-1600", "decrypt failed flags=%x\n",
1163 align_or_cut_overhead(inode
, clust
, READ_OP
);
1166 if (need_inflate(clust
, inode
, 0)) {
1167 size_t dst_len
= inode_cluster_size(inode
);
1169 alternate_streams(tc
);
1171 result
= grab_tfm_stream(inode
, tc
, OUTPUT_STREAM
);
1174 assert("edward-1305", coplug
->decompress
!= NULL
);
1175 assert("edward-910", tfm_cluster_is_set(tc
));
1177 coplug
->decompress(get_coa(tc
, coplug
->h
.id
, tc
->act
),
1178 tfm_input_data(clust
), tc
->len
,
1179 tfm_output_data(clust
), &dst_len
);
1182 assert("edward-157", dst_len
== tc
->lsize
);
1186 alternate_streams(tc
);
1190 /* This is implementation of readpage method of struct
1191 address_space_operations for cryptcompress plugin. */
1192 int readpage_cryptcompress(struct file
*file
, struct page
*page
)
1194 reiser4_context
*ctx
;
1195 struct cluster_handle clust
;
1199 assert("edward-88", PageLocked(page
));
1200 assert("vs-976", !PageUptodate(page
));
1201 assert("edward-89", page
->mapping
&& page
->mapping
->host
);
1203 ctx
= reiser4_init_context(page
->mapping
->host
->i_sb
);
1206 return PTR_ERR(ctx
);
1208 assert("edward-113",
1210 page
->mapping
== file
->f_dentry
->d_inode
->i_mapping
));
1212 if (PageUptodate(page
)) {
1213 warning("edward-1338", "page is already uptodate\n");
1215 reiser4_exit_context(ctx
);
1218 cluster_init_read(&clust
, NULL
);
1220 iplug
= item_plugin_by_id(CTAIL_ID
);
1221 if (!iplug
->s
.file
.readpage
) {
1223 put_cluster_handle(&clust
);
1224 reiser4_exit_context(ctx
);
1227 result
= iplug
->s
.file
.readpage(&clust
, page
);
1229 put_cluster_handle(&clust
);
1230 reiser4_txn_restart(ctx
);
1231 reiser4_exit_context(ctx
);
1235 /* number of pages to check in */
1236 static int get_new_nrpages(struct cluster_handle
* clust
)
1238 switch (clust
->op
) {
1240 return clust
->nr_pages
;
1242 assert("edward-1179", clust
->win
!= NULL
);
1243 return size_in_pages(clust
->win
->off
+ clust
->win
->count
);
1245 impossible("edward-1180", "bad page cluster option");
1250 static void set_cluster_pages_dirty(struct cluster_handle
* clust
,
1251 struct inode
* inode
)
1255 int nrpages
= get_new_nrpages(clust
);
1257 for (i
= 0; i
< nrpages
; i
++) {
1259 pg
= clust
->pages
[i
];
1260 assert("edward-968", pg
!= NULL
);
1262 assert("edward-1065", PageUptodate(pg
));
1263 set_page_dirty_notag(pg
);
1265 mark_page_accessed(pg
);
1269 /* Grab a page cluster for read/write operations.
1270 Attach a jnode for write operations (when preparing for modifications, which
1271 are supposed to be committed).
1273 We allocate only one jnode per page cluster; this jnode is binded to the
1274 first page of this cluster, so we have an extra-reference that will be put
1275 as soon as jnode is evicted from memory), other references will be cleaned
1276 up in flush time (assume that check in page cluster was successful).
1278 int grab_page_cluster(struct inode
* inode
,
1279 struct cluster_handle
* clust
, rw_op rw
)
1285 assert("edward-182", clust
!= NULL
);
1286 assert("edward-183", clust
->pages
!= NULL
);
1287 assert("edward-1466", clust
->node
== NULL
);
1288 assert("edward-1428", inode
!= NULL
);
1289 assert("edward-1429", inode
->i_mapping
!= NULL
);
1290 assert("edward-184", clust
->nr_pages
<= cluster_nrpages(inode
));
1292 if (clust
->nr_pages
== 0)
1295 for (i
= 0; i
< clust
->nr_pages
; i
++) {
1297 assert("edward-1044", clust
->pages
[i
] == NULL
);
1300 find_or_create_page(inode
->i_mapping
,
1301 clust_to_pg(clust
->index
, inode
) + i
,
1302 reiser4_ctx_gfp_mask_get());
1303 if (!clust
->pages
[i
]) {
1304 result
= RETERR(-ENOMEM
);
1307 if (i
== 0 && rw
== WRITE_OP
) {
1308 node
= jnode_of_page(clust
->pages
[i
]);
1310 result
= PTR_ERR(node
);
1311 unlock_page(clust
->pages
[i
]);
1314 JF_SET(node
, JNODE_CLUSTER_PAGE
);
1315 assert("edward-920", jprivate(clust
->pages
[0]));
1317 INODE_PGCOUNT_INC(inode
);
1318 unlock_page(clust
->pages
[i
]);
1320 if (unlikely(result
)) {
1322 put_cluster_page(clust
->pages
[--i
]);
1323 INODE_PGCOUNT_DEC(inode
);
1325 if (node
&& !IS_ERR(node
))
1333 static void truncate_page_cluster_range(struct inode
* inode
,
1334 struct page
** pages
,
1336 int from
, int count
,
1339 assert("edward-1467", count
> 0);
1340 reiser4_invalidate_pages(inode
->i_mapping
,
1341 clust_to_pg(index
, inode
) + from
,
1345 /* Put @count pages starting from @from offset */
1346 void __put_page_cluster(int from
, int count
,
1347 struct page
** pages
, struct inode
* inode
)
1350 assert("edward-1468", pages
!= NULL
);
1351 assert("edward-1469", inode
!= NULL
);
1352 assert("edward-1470", from
>= 0 && count
>= 0);
1354 for (i
= 0; i
< count
; i
++) {
1355 assert("edward-1471", pages
[from
+ i
] != NULL
);
1356 assert("edward-1472",
1357 pages
[from
+ i
]->index
== pages
[from
]->index
+ i
);
1359 put_cluster_page(pages
[from
+ i
]);
1360 INODE_PGCOUNT_DEC(inode
);
1365 * This is dual to grab_page_cluster,
1366 * however if @rw == WRITE_OP, then we call this function
1367 * only if something is failed before checkin page cluster.
1369 void put_page_cluster(struct cluster_handle
* clust
,
1370 struct inode
* inode
, rw_op rw
)
1372 assert("edward-445", clust
!= NULL
);
1373 assert("edward-922", clust
->pages
!= NULL
);
1374 assert("edward-446",
1375 ergo(clust
->nr_pages
!= 0, clust
->pages
[0] != NULL
));
1377 __put_page_cluster(0, clust
->nr_pages
, clust
->pages
, inode
);
1378 if (rw
== WRITE_OP
) {
1379 if (unlikely(clust
->node
)) {
1380 assert("edward-447",
1381 clust
->node
== jprivate(clust
->pages
[0]));
1389 int cryptcompress_inode_ok(struct inode
*inode
)
1391 if (!(reiser4_inode_data(inode
)->plugin_mask
& (1 << PSET_FILE
)))
1393 if (!cluster_shift_ok(inode_cluster_shift(inode
)))
1398 static int window_ok(struct reiser4_slide
* win
, struct inode
*inode
)
1400 assert("edward-1115", win
!= NULL
);
1401 assert("edward-1116", ergo(win
->delta
, win
->stat
== HOLE_WINDOW
));
1403 return (win
->off
!= inode_cluster_size(inode
)) &&
1404 (win
->off
+ win
->count
+ win
->delta
<= inode_cluster_size(inode
));
1407 static int cluster_ok(struct cluster_handle
* clust
, struct inode
*inode
)
1409 assert("edward-279", clust
!= NULL
);
1413 return (clust
->win
? window_ok(clust
->win
, inode
) : 1);
1416 static int pages_truncate_ok(struct inode
*inode
, pgoff_t start
)
1421 found
= find_get_pages(inode
->i_mapping
, start
, 1, &page
);
1423 put_cluster_page(page
);
1427 #define pages_truncate_ok(inode, start) 1
1430 static int jnode_truncate_ok(struct inode
*inode
, cloff_t index
)
1433 node
= jlookup(current_tree
, get_inode_oid(inode
),
1434 clust_to_pg(index
, inode
));
1441 static int find_fake_appended(struct inode
*inode
, cloff_t
* index
);
1443 static int body_truncate_ok(struct inode
*inode
, cloff_t aidx
)
1448 result
= find_fake_appended(inode
, &raidx
);
1449 return !result
&& (aidx
== raidx
);
1453 /* guess next window stat */
1454 static inline window_stat
next_window_stat(struct reiser4_slide
* win
)
1456 assert("edward-1130", win
!= NULL
);
1457 return ((win
->stat
== HOLE_WINDOW
&& win
->delta
== 0) ?
1458 HOLE_WINDOW
: DATA_WINDOW
);
1461 /* guess and set next cluster index and window params */
1462 static void move_update_window(struct inode
* inode
,
1463 struct cluster_handle
* clust
,
1464 loff_t file_off
, loff_t to_file
)
1466 struct reiser4_slide
* win
;
1468 assert("edward-185", clust
!= NULL
);
1469 assert("edward-438", clust
->pages
!= NULL
);
1470 assert("edward-281", cluster_ok(clust
, inode
));
1476 switch (win
->stat
) {
1480 win
->stat
= DATA_WINDOW
;
1482 win
->count
= min((loff_t
)inode_cluster_size(inode
), to_file
);
1485 switch (next_window_stat(win
)) {
1488 clust
->index
= off_to_clust(file_off
, inode
);
1489 win
->stat
= HOLE_WINDOW
;
1491 win
->count
= off_to_cloff(file_off
, inode
);
1492 win
->delta
= min((loff_t
)(inode_cluster_size(inode
) -
1493 win
->count
), to_file
);
1497 win
->stat
= DATA_WINDOW
;
1498 /* off+count+delta=inv */
1499 win
->off
= win
->off
+ win
->count
;
1500 win
->count
= win
->delta
;
1504 impossible("edward-282", "wrong next window state");
1508 impossible("edward-283", "wrong current window state");
1510 assert("edward-1068", cluster_ok(clust
, inode
));
1513 static int update_sd_cryptcompress(struct inode
*inode
)
1517 assert("edward-978", reiser4_schedulable());
1519 result
= reiser4_grab_space_force(/* one for stat data update */
1520 estimate_update_common(inode
),
1524 inode
->i_ctime
= inode
->i_mtime
= CURRENT_TIME
;
1525 result
= reiser4_update_sd(inode
);
1530 static void uncapture_cluster_jnode(jnode
* node
)
1534 assert_spin_locked(&(node
->guard
));
1536 atom
= jnode_get_atom(node
);
1538 assert("jmacd-7111", !JF_ISSET(node
, JNODE_DIRTY
));
1539 spin_unlock_jnode(node
);
1542 reiser4_uncapture_block(node
);
1543 spin_unlock_atom(atom
);
1547 static void put_found_pages(struct page
**pages
, int nr
)
1550 for (i
= 0; i
< nr
; i
++) {
1551 assert("edward-1045", pages
[i
] != NULL
);
1552 put_cluster_page(pages
[i
]);
1556 /* Lifecycle of a logical cluster in the system.
1559 * Logical cluster of a cryptcompress file is represented in the system by
1560 * . page cluster (in memory, primary cache, contains plain text);
1561 * . disk cluster (in memory, secondary cache, contains transformed text).
1562 * Primary cache is to reduce number of transform operations (compression,
1563 * encryption), i.e. to implement transform-caching strategy.
1564 * Secondary cache is to reduce number of I/O operations, i.e. for usual
1565 * write-caching strategy. Page cluster is a set of pages, i.e. mapping of
1566 * a logical cluster to the primary cache. Disk cluster is a set of items
1567 * of the same type defined by some reiser4 item plugin id.
1569 * 1. Performing modifications
1571 * Every modification of a cryptcompress file is considered as a set of
1572 * operations performed on file's logical clusters. Every such "atomic"
1573 * modification is truncate, append and(or) overwrite some bytes of a
1574 * logical cluster performed in the primary cache with the following
1575 * synchronization with the secondary cache (in flush time). Disk clusters,
1576 * which live in the secondary cache, are supposed to be synchronized with
1577 * disk. The mechanism of synchronization of primary and secondary caches
1578 * includes so-called checkin/checkout technique described below.
1580 * 2. Submitting modifications
1582 * Each page cluster has associated jnode (a special in-memory header to
1583 * keep a track of transactions in reiser4), which is attached to its first
1584 * page when grabbing page cluster for modifications (see grab_page_cluster).
1585 * Submitting modifications (see checkin_logical_cluster) is going per logical
1586 * cluster and includes:
1587 * . checkin_cluster_size;
1588 * . checkin_page_cluster.
1589 * checkin_cluster_size() is resolved to file size update (which completely
1590 * defines new size of logical cluster (number of file's bytes in a logical
1592 * checkin_page_cluster() captures jnode of a page cluster and installs
1593 * jnode's dirty flag (if needed) to indicate that modifications are
1594 * successfully checked in.
1596 * 3. Checking out modifications
1598 * Is going per logical cluster in flush time (see checkout_logical_cluster).
1599 * This is the time of synchronizing primary and secondary caches.
1600 * checkout_logical_cluster() includes:
1601 * . checkout_page_cluster (retrieving checked in pages).
1602 * . uncapture jnode (including clear dirty flag and unlock)
1604 * 4. Committing modifications
1606 * Proceeding a synchronization of primary and secondary caches. When checking
1607 * out page cluster (the phase above) pages are locked/flushed/unlocked
1608 * one-by-one in ascending order of their indexes to contiguous stream, which
1609 * is supposed to be transformed (compressed, encrypted), chopped up into items
1610 * and committed to disk as a disk cluster.
1612 * 5. Managing page references
1614 * Every checked in page have a special additional "control" reference,
1615 * which is dropped at checkout. We need this to avoid unexpected evicting
1616 * pages from memory before checkout. Control references are managed so
1617 * they are not accumulated with every checkin:
1628 * Every page cluster has its own unique "cluster lock". Update/drop
1629 * references are serialized via this lock. Number of checked in cluster
1630 * pages is calculated by i_size under cluster lock. File size is updated
1631 * at every checkin action also under cluster lock (except cases of
1632 * appending/truncating fake logical clusters).
1634 * Proof of correctness:
1636 * Since we update file size under cluster lock, in the case of non-fake
1637 * logical cluster with its lock held we do have expected number of checked
1638 * in pages. On the other hand, append/truncate of fake logical clusters
1639 * doesn't change number of checked in pages of any cluster.
1641 * NOTE-EDWARD: As cluster lock we use guard (spinlock_t) of its jnode.
1642 * Currently, I don't see any reason to create a special lock for those
1646 static inline void lock_cluster(jnode
* node
)
1648 spin_lock_jnode(node
);
1651 static inline void unlock_cluster(jnode
* node
)
1653 spin_unlock_jnode(node
);
1656 static inline void unlock_cluster_uncapture(jnode
* node
)
1658 uncapture_cluster_jnode(node
);
1661 /* Set new file size by window. Cluster lock is required. */
1662 static void checkin_file_size(struct cluster_handle
* clust
,
1663 struct inode
* inode
)
1666 struct reiser4_slide
* win
;
1668 assert("edward-1181", clust
!= NULL
);
1669 assert("edward-1182", inode
!= NULL
);
1670 assert("edward-1473", clust
->pages
!= NULL
);
1671 assert("edward-1474", clust
->pages
[0] != NULL
);
1672 assert("edward-1475", jprivate(clust
->pages
[0]) != NULL
);
1673 assert_spin_locked(&(jprivate(clust
->pages
[0])->guard
));
1677 assert("edward-1183", win
!= NULL
);
1679 new_size
= clust_to_off(clust
->index
, inode
) + win
->off
;
1681 switch (clust
->op
) {
1683 if (new_size
+ win
->count
<= i_size_read(inode
))
1684 /* overwrite only */
1686 new_size
+= win
->count
;
1691 impossible("edward-1184", "bad page cluster option");
1694 inode_check_scale_nolock(inode
, i_size_read(inode
), new_size
);
1695 i_size_write(inode
, new_size
);
1699 static inline void checkin_cluster_size(struct cluster_handle
* clust
,
1700 struct inode
* inode
)
1703 checkin_file_size(clust
, inode
);
1706 static int checkin_page_cluster(struct cluster_handle
* clust
,
1707 struct inode
* inode
)
1711 int old_nrpages
= clust
->old_nrpages
;
1712 int new_nrpages
= get_new_nrpages(clust
);
1716 assert("edward-221", node
!= NULL
);
1717 assert("edward-971", clust
->reserved
== 1);
1718 assert("edward-1263",
1719 clust
->reserved_prepped
== estimate_update_cluster(inode
));
1720 assert("edward-1264", clust
->reserved_unprepped
== 0);
1722 if (JF_ISSET(node
, JNODE_DIRTY
)) {
1724 * page cluster was checked in, but not yet
1725 * checked out, so release related resources
1727 free_reserved4cluster(inode
, clust
,
1728 estimate_update_cluster(inode
));
1729 __put_page_cluster(0, clust
->old_nrpages
,
1730 clust
->pages
, inode
);
1732 result
= capture_cluster_jnode(node
);
1733 if (unlikely(result
)) {
1734 unlock_cluster(node
);
1737 jnode_make_dirty_locked(node
);
1738 clust
->reserved
= 0;
1740 unlock_cluster(node
);
1742 if (new_nrpages
< old_nrpages
) {
1743 /* truncate >= 1 complete pages */
1744 __put_page_cluster(new_nrpages
,
1745 old_nrpages
- new_nrpages
,
1746 clust
->pages
, inode
);
1747 truncate_page_cluster_range(inode
,
1748 clust
->pages
, clust
->index
,
1750 old_nrpages
- new_nrpages
,
1754 clust
->reserved_prepped
-= estimate_update_cluster(inode
);
1759 /* Submit modifications of a logical cluster */
1760 static int checkin_logical_cluster(struct cluster_handle
* clust
,
1761 struct inode
*inode
)
1768 assert("edward-1035", node
!= NULL
);
1769 assert("edward-1029", clust
!= NULL
);
1770 assert("edward-1030", clust
->reserved
== 1);
1771 assert("edward-1031", clust
->nr_pages
!= 0);
1772 assert("edward-1032", clust
->pages
!= NULL
);
1773 assert("edward-1033", clust
->pages
[0] != NULL
);
1774 assert("edward-1446", jnode_is_cluster_page(node
));
1775 assert("edward-1476", node
== jprivate(clust
->pages
[0]));
1778 checkin_cluster_size(clust
, inode
);
1779 /* this will unlock cluster */
1780 result
= checkin_page_cluster(clust
, inode
);
1787 * Retrieve size of logical cluster that was checked in at
1788 * the latest modifying session (cluster lock is required)
1790 static inline void checkout_cluster_size(struct cluster_handle
* clust
,
1791 struct inode
* inode
)
1793 struct tfm_cluster
*tc
= &clust
->tc
;
1795 tc
->len
= lbytes(clust
->index
, inode
);
1796 assert("edward-1478", tc
->len
!= 0);
1800 * Retrieve a page cluster with the latest submitted modifications
1801 * and flush its pages to previously allocated contiguous stream.
1803 static void checkout_page_cluster(struct cluster_handle
* clust
,
1804 jnode
* node
, struct inode
* inode
)
1809 struct tfm_cluster
*tc
= &clust
->tc
;
1811 /* find and put checked in pages: cluster is locked,
1812 * so we must get expected number (to_put) of pages
1814 to_put
= size_in_pages(lbytes(clust
->index
, inode
));
1815 found
= find_get_pages(inode
->i_mapping
,
1816 clust_to_pg(clust
->index
, inode
),
1817 to_put
, clust
->pages
);
1818 BUG_ON(found
!= to_put
);
1820 __put_page_cluster(0, to_put
, clust
->pages
, inode
);
1821 unlock_cluster_uncapture(node
);
1823 /* Flush found pages.
1825 * Note, that we don't disable modifications while flushing,
1826 * moreover, some found pages can be truncated, as we have
1827 * released cluster lock.
1829 for (i
= 0; i
< found
; i
++) {
1832 assert("edward-1479",
1833 clust
->pages
[i
]->index
== clust
->pages
[0]->index
+ i
);
1835 lock_page(clust
->pages
[i
]);
1836 if (!PageUptodate(clust
->pages
[i
])) {
1837 /* page was truncated */
1838 assert("edward-1480",
1839 i_size_read(inode
) <= page_offset(clust
->pages
[i
]));
1840 assert("edward-1481",
1841 clust
->pages
[i
]->mapping
!= inode
->i_mapping
);
1842 unlock_page(clust
->pages
[i
]);
1845 /* Update the number of bytes in the logical cluster,
1846 * as it could be partially truncated. Note, that only
1847 * partial truncate is possible (complete truncate can
1848 * not go here, as it is performed via ->kill_hook()
1849 * called by cut_file_items(), and the last one must
1850 * wait for znode locked with parent coord).
1852 checkout_cluster_size(clust
, inode
);
1854 /* this can be zero, as new file size is
1855 checked in before truncating pages */
1856 in_page
= __mbp(tc
->len
, i
);
1858 data
= kmap(clust
->pages
[i
]);
1859 memcpy(tfm_stream_data(tc
, INPUT_STREAM
) + pg_to_off(i
),
1861 kunmap(clust
->pages
[i
]);
1863 if (PageDirty(clust
->pages
[i
]))
1864 cancel_dirty_page(clust
->pages
[i
], PAGE_CACHE_SIZE
);
1866 unlock_page(clust
->pages
[i
]);
1868 if (in_page
< PAGE_CACHE_SIZE
)
1869 /* end of the file */
1872 put_found_pages(clust
->pages
, found
); /* find_get_pages */
1873 tc
->lsize
= tc
->len
;
1877 /* Check out modifications of a logical cluster */
1878 int checkout_logical_cluster(struct cluster_handle
* clust
,
1879 jnode
* node
, struct inode
*inode
)
1882 struct tfm_cluster
*tc
= &clust
->tc
;
1884 assert("edward-980", node
!= NULL
);
1885 assert("edward-236", inode
!= NULL
);
1886 assert("edward-237", clust
!= NULL
);
1887 assert("edward-240", !clust
->win
);
1888 assert("edward-241", reiser4_schedulable());
1889 assert("edward-718", cryptcompress_inode_ok(inode
));
1891 result
= grab_tfm_stream(inode
, tc
, INPUT_STREAM
);
1893 warning("edward-1430", "alloc stream failed with ret=%d",
1895 return RETERR(-E_REPEAT
);
1899 if (unlikely(!JF_ISSET(node
, JNODE_DIRTY
))) {
1900 /* race with another flush */
1901 warning("edward-982",
1902 "checking out logical cluster %lu of inode %llu: "
1903 "jnode is not dirty", clust
->index
,
1904 (unsigned long long)get_inode_oid(inode
));
1905 unlock_cluster(node
);
1906 return RETERR(-E_REPEAT
);
1908 cluster_reserved2grabbed(estimate_update_cluster(inode
));
1910 /* this will unlock cluster */
1911 checkout_page_cluster(clust
, node
, inode
);
1915 /* set hint for the cluster of the index @index */
1916 static void set_hint_cluster(struct inode
*inode
, hint_t
* hint
,
1917 cloff_t index
, znode_lock_mode mode
)
1920 assert("edward-722", cryptcompress_inode_ok(inode
));
1921 assert("edward-723",
1922 inode_file_plugin(inode
) ==
1923 file_plugin_by_id(CRYPTCOMPRESS_FILE_PLUGIN_ID
));
1925 inode_file_plugin(inode
)->key_by_inode(inode
,
1926 clust_to_off(index
, inode
),
1929 reiser4_seal_init(&hint
->seal
, &hint
->ext_coord
.coord
, &key
);
1930 hint
->offset
= get_key_offset(&key
);
1934 void invalidate_hint_cluster(struct cluster_handle
* clust
)
1936 assert("edward-1291", clust
!= NULL
);
1937 assert("edward-1292", clust
->hint
!= NULL
);
1939 done_lh(&clust
->hint
->lh
);
1940 hint_clr_valid(clust
->hint
);
1943 static void put_hint_cluster(struct cluster_handle
* clust
,
1944 struct inode
*inode
, znode_lock_mode mode
)
1946 assert("edward-1286", clust
!= NULL
);
1947 assert("edward-1287", clust
->hint
!= NULL
);
1949 set_hint_cluster(inode
, clust
->hint
, clust
->index
+ 1, mode
);
1950 invalidate_hint_cluster(clust
);
1953 static int balance_dirty_page_cluster(struct cluster_handle
* clust
,
1954 struct inode
*inode
, loff_t off
,
1959 struct cryptcompress_info
* info
;
1961 assert("edward-724", inode
!= NULL
);
1962 assert("edward-725", cryptcompress_inode_ok(inode
));
1963 assert("edward-1547",
1964 nr_dirtied
!= 0 && nr_dirtied
<= cluster_nrpages(inode
));
1966 /* set next window params */
1967 move_update_window(inode
, clust
, off
, to_file
);
1969 result
= update_sd_cryptcompress(inode
);
1972 assert("edward-726", clust
->hint
->lh
.owner
== NULL
);
1973 info
= cryptcompress_inode_data(inode
);
1975 mutex_unlock(&info
->checkin_mutex
);
1976 reiser4_throttle_write(inode
, nr_dirtied
);
1977 mutex_lock(&info
->checkin_mutex
);
1981 /* set zeroes to the page cluster, proceed it, and maybe, try to capture
1983 static int write_hole(struct inode
*inode
, struct cluster_handle
* clust
,
1984 loff_t file_off
, loff_t to_file
)
1987 unsigned cl_off
, cl_count
= 0;
1988 unsigned to_pg
, pg_off
;
1989 struct reiser4_slide
* win
;
1991 assert("edward-190", clust
!= NULL
);
1992 assert("edward-1069", clust
->win
!= NULL
);
1993 assert("edward-191", inode
!= NULL
);
1994 assert("edward-727", cryptcompress_inode_ok(inode
));
1995 assert("edward-1171", clust
->dstat
!= INVAL_DISK_CLUSTER
);
1996 assert("edward-1154",
1997 ergo(clust
->dstat
!= FAKE_DISK_CLUSTER
, clust
->reserved
== 1));
2001 assert("edward-1070", win
!= NULL
);
2002 assert("edward-201", win
->stat
== HOLE_WINDOW
);
2003 assert("edward-192", cluster_ok(clust
, inode
));
2005 if (win
->off
== 0 && win
->count
== inode_cluster_size(inode
)) {
2006 /* This part of the hole will be represented by "fake"
2007 * logical cluster, i.e. which doesn't have appropriate
2008 * disk cluster until someone modify this logical cluster
2009 * and make it dirty.
2010 * So go forward here..
2012 move_update_window(inode
, clust
, file_off
, to_file
);
2015 cl_count
= win
->count
; /* number of zeroes to write */
2017 pg_off
= off_to_pgoff(win
->off
);
2021 page
= clust
->pages
[off_to_pg(cl_off
)];
2023 assert("edward-284", page
!= NULL
);
2025 to_pg
= min((typeof(pg_off
))PAGE_CACHE_SIZE
- pg_off
, cl_count
);
2027 zero_user(page
, pg_off
, to_pg
);
2028 SetPageUptodate(page
);
2029 set_page_dirty_notag(page
);
2030 mark_page_accessed(page
);
2038 /* only zeroes in this window, try to capture
2040 result
= checkin_logical_cluster(clust
, inode
);
2043 put_hint_cluster(clust
, inode
, ZNODE_WRITE_LOCK
);
2044 result
= balance_dirty_page_cluster(clust
,
2045 inode
, file_off
, to_file
,
2046 win_count_to_nrpages(win
));
2048 move_update_window(inode
, clust
, file_off
, to_file
);
2053 The main disk search procedure for cryptcompress plugin, which
2054 . scans all items of disk cluster with the lock mode @mode
2055 . maybe reads each one (if @read)
2056 . maybe makes its znode dirty (if write lock mode was specified)
2058 NOTE-EDWARD: Callers should handle the case when disk cluster
2059 is incomplete (-EIO)
2061 int find_disk_cluster(struct cluster_handle
* clust
,
2062 struct inode
*inode
, int read
, znode_lock_mode mode
)
2071 struct tfm_cluster
*tc
;
2072 struct cryptcompress_info
* info
;
2074 assert("edward-138", clust
!= NULL
);
2075 assert("edward-728", clust
->hint
!= NULL
);
2076 assert("edward-226", reiser4_schedulable());
2077 assert("edward-137", inode
!= NULL
);
2078 assert("edward-729", cryptcompress_inode_ok(inode
));
2081 fplug
= inode_file_plugin(inode
);
2082 was_grabbed
= get_current_context()->grabbed_blocks
;
2083 info
= cryptcompress_inode_data(inode
);
2086 assert("edward-462", !tfm_cluster_is_uptodate(tc
));
2087 assert("edward-461", ergo(read
, tfm_stream_is_set(tc
, INPUT_STREAM
)));
2089 dclust_init_extension(hint
);
2091 /* set key of the first disk cluster item */
2092 fplug
->flow_by_inode(inode
,
2093 (read
? (char __user
*)tfm_stream_data(tc
, INPUT_STREAM
) : NULL
),
2094 0 /* kernel space */ ,
2095 inode_scaled_cluster_size(inode
),
2096 clust_to_off(clust
->index
, inode
), READ_OP
, &f
);
2097 if (mode
== ZNODE_WRITE_LOCK
) {
2098 /* reserve for flush to make dirty all the leaf nodes
2099 which contain disk cluster */
2101 reiser4_grab_space_force(estimate_dirty_cluster(inode
),
2107 ra_info
.key_to_stop
= f
.key
;
2108 set_key_offset(&ra_info
.key_to_stop
, get_key_offset(reiser4_max_key()));
2111 result
= find_cluster_item(hint
, &f
.key
, mode
,
2113 (mode
== ZNODE_WRITE_LOCK
?
2114 CBK_FOR_INSERT
: 0));
2116 case CBK_COORD_NOTFOUND
:
2118 if (inode_scaled_offset
2119 (inode
, clust_to_off(clust
->index
, inode
)) ==
2120 get_key_offset(&f
.key
)) {
2121 /* first item not found, this is treated
2122 as disk cluster is absent */
2123 clust
->dstat
= FAKE_DISK_CLUSTER
;
2126 /* we are outside the cluster, stop search here */
2127 assert("edward-146",
2128 f
.length
!= inode_scaled_cluster_size(inode
));
2130 case CBK_COORD_FOUND
:
2131 assert("edward-148",
2132 hint
->ext_coord
.coord
.between
== AT_UNIT
);
2133 assert("edward-460",
2134 hint
->ext_coord
.coord
.unit_pos
== 0);
2136 coord_clear_iplug(&hint
->ext_coord
.coord
);
2137 result
= zload_ra(hint
->ext_coord
.coord
.node
, &ra_info
);
2138 if (unlikely(result
))
2140 iplug
= item_plugin_by_coord(&hint
->ext_coord
.coord
);
2141 assert("edward-147",
2142 item_id_by_coord(&hint
->ext_coord
.coord
) ==
2145 result
= iplug
->s
.file
.read(NULL
, &f
, hint
);
2147 zrelse(hint
->ext_coord
.coord
.node
);
2150 if (mode
== ZNODE_WRITE_LOCK
) {
2151 /* Don't make dirty more nodes then it was
2152 estimated (see comments before
2153 estimate_dirty_cluster). Missed nodes will be
2154 read up in flush time if they are evicted from
2156 if (dclust_get_extension_ncount(hint
) <=
2157 estimate_dirty_cluster(inode
))
2158 znode_make_dirty(hint
->ext_coord
.coord
.node
);
2160 znode_set_convertible(hint
->ext_coord
.coord
.
2163 zrelse(hint
->ext_coord
.coord
.node
);
2170 /* at least one item was found */
2171 /* NOTE-EDWARD: Callers should handle the case
2172 when disk cluster is incomplete (-EIO) */
2173 tc
->len
= inode_scaled_cluster_size(inode
) - f
.length
;
2174 tc
->lsize
= lbytes(clust
->index
, inode
);
2175 assert("edward-1196", tc
->len
> 0);
2176 assert("edward-1406", tc
->lsize
> 0);
2178 if (hint_is_unprepped_dclust(clust
->hint
)) {
2179 clust
->dstat
= UNPR_DISK_CLUSTER
;
2180 } else if (clust
->index
== info
->trunc_index
) {
2181 clust
->dstat
= TRNC_DISK_CLUSTER
;
2183 clust
->dstat
= PREP_DISK_CLUSTER
;
2184 dclust_set_extension_dsize(clust
->hint
, tc
->len
);
2187 assert("edward-1339",
2188 get_current_context()->grabbed_blocks
>= was_grabbed
);
2189 grabbed2free(get_current_context(),
2190 get_current_super_private(),
2191 get_current_context()->grabbed_blocks
- was_grabbed
);
2195 int get_disk_cluster_locked(struct cluster_handle
* clust
, struct inode
*inode
,
2196 znode_lock_mode lock_mode
)
2201 assert("edward-730", reiser4_schedulable());
2202 assert("edward-731", clust
!= NULL
);
2203 assert("edward-732", inode
!= NULL
);
2205 if (hint_is_valid(clust
->hint
)) {
2206 assert("edward-1293", clust
->dstat
!= INVAL_DISK_CLUSTER
);
2207 assert("edward-1294",
2208 znode_is_write_locked(clust
->hint
->lh
.node
));
2209 /* already have a valid locked position */
2210 return (clust
->dstat
==
2211 FAKE_DISK_CLUSTER
? CBK_COORD_NOTFOUND
:
2214 key_by_inode_cryptcompress(inode
, clust_to_off(clust
->index
, inode
),
2216 ra_info
.key_to_stop
= key
;
2217 set_key_offset(&ra_info
.key_to_stop
, get_key_offset(reiser4_max_key()));
2219 return find_cluster_item(clust
->hint
, &key
, lock_mode
, NULL
, FIND_EXACT
,
2223 /* Read needed cluster pages before modifying.
2224 If success, @clust->hint contains locked position in the tree.
2226 . find and set disk cluster state
2227 . make disk cluster dirty if its state is not FAKE_DISK_CLUSTER.
2229 static int read_some_cluster_pages(struct inode
* inode
,
2230 struct cluster_handle
* clust
)
2235 struct reiser4_slide
* win
= clust
->win
;
2236 znode_lock_mode mode
= ZNODE_WRITE_LOCK
;
2238 iplug
= item_plugin_by_id(CTAIL_ID
);
2240 assert("edward-924", !tfm_cluster_is_uptodate(&clust
->tc
));
2243 if (clust
->nr_pages
== 0) {
2244 /* start write hole from fake disk cluster */
2245 assert("edward-1117", win
!= NULL
);
2246 assert("edward-1118", win
->stat
== HOLE_WINDOW
);
2247 assert("edward-1119", new_logical_cluster(clust
, inode
));
2250 if (new_logical_cluster(clust
, inode
)) {
2252 new page cluster is about to be written, nothing to read,
2254 assert("edward-734", reiser4_schedulable());
2255 assert("edward-735", clust
->hint
->lh
.owner
== NULL
);
2257 if (clust
->nr_pages
) {
2260 assert("edward-1419", clust
->pages
!= NULL
);
2261 pg
= clust
->pages
[clust
->nr_pages
- 1];
2262 assert("edward-1420", pg
!= NULL
);
2263 off
= off_to_pgoff(win
->off
+win
->count
+win
->delta
);
2266 zero_user_segment(pg
, off
, PAGE_CACHE_SIZE
);
2270 clust
->dstat
= FAKE_DISK_CLUSTER
;
2274 Here we should search for disk cluster to figure out its real state.
2275 Also there is one more important reason to do disk search: we need
2276 to make disk cluster _dirty_ if it exists
2279 /* if windows is specified, read the only pages
2280 that will be modified partially */
2282 for (i
= 0; i
< clust
->nr_pages
; i
++) {
2283 struct page
*pg
= clust
->pages
[i
];
2286 if (PageUptodate(pg
)) {
2293 i
>= size_in_pages(win
->off
) &&
2294 i
< off_to_pg(win
->off
+ win
->count
+ win
->delta
))
2295 /* page will be completely overwritten */
2298 if (win
&& (i
== clust
->nr_pages
- 1) &&
2302 (size_in_pages(i_size_read(inode
)) <= pg
->index
)) {
2304 so set zeroes to the rest */
2307 assert("edward-1260",
2308 size_in_pages(win
->off
+ win
->count
+
2309 win
->delta
) - 1 == i
);
2312 off_to_pgoff(win
->off
+ win
->count
+ win
->delta
);
2313 zero_user_segment(pg
, offset
, PAGE_CACHE_SIZE
);
2315 /* still not uptodate */
2319 result
= do_readpage_ctail(inode
, clust
, pg
, mode
);
2321 assert("edward-1526", ergo(!result
, PageUptodate(pg
)));
2324 warning("edward-219", "do_readpage_ctail failed");
2328 if (!tfm_cluster_is_uptodate(&clust
->tc
)) {
2329 /* disk cluster unclaimed, but we need to make its znodes dirty
2330 * to make flush update convert its content
2332 result
= find_disk_cluster(clust
, inode
,
2333 0 /* do not read items */,
2337 tfm_cluster_clr_uptodate(&clust
->tc
);
2341 static int should_create_unprepped_cluster(struct cluster_handle
* clust
,
2342 struct inode
* inode
)
2344 assert("edward-737", clust
!= NULL
);
2346 switch (clust
->dstat
) {
2347 case PREP_DISK_CLUSTER
:
2348 case UNPR_DISK_CLUSTER
:
2350 case FAKE_DISK_CLUSTER
:
2352 clust
->win
->stat
== HOLE_WINDOW
&& clust
->nr_pages
== 0) {
2353 assert("edward-1172",
2354 new_logical_cluster(clust
, inode
));
2359 impossible("edward-1173", "bad disk cluster state");
2364 static int cryptcompress_make_unprepped_cluster(struct cluster_handle
* clust
,
2365 struct inode
*inode
)
2369 assert("edward-1123", reiser4_schedulable());
2370 assert("edward-737", clust
!= NULL
);
2371 assert("edward-738", inode
!= NULL
);
2372 assert("edward-739", cryptcompress_inode_ok(inode
));
2373 assert("edward-1053", clust
->hint
!= NULL
);
2375 if (!should_create_unprepped_cluster(clust
, inode
)) {
2376 if (clust
->reserved
) {
2377 cluster_reserved2free(estimate_insert_cluster(inode
));
2379 assert("edward-1267",
2380 clust
->reserved_unprepped
==
2381 estimate_insert_cluster(inode
));
2382 clust
->reserved_unprepped
-=
2383 estimate_insert_cluster(inode
);
2388 assert("edward-1268", clust
->reserved
);
2389 cluster_reserved2grabbed(estimate_insert_cluster(inode
));
2391 assert("edward-1441",
2392 clust
->reserved_unprepped
== estimate_insert_cluster(inode
));
2393 clust
->reserved_unprepped
-= estimate_insert_cluster(inode
);
2395 result
= ctail_insert_unprepped_cluster(clust
, inode
);
2399 inode_add_bytes(inode
, inode_cluster_size(inode
));
2401 assert("edward-743", cryptcompress_inode_ok(inode
));
2402 assert("edward-744", znode_is_write_locked(clust
->hint
->lh
.node
));
2404 clust
->dstat
= UNPR_DISK_CLUSTER
;
2408 /* . Grab page cluster for read, write, setattr, etc. operations;
2409 * . Truncate its complete pages, if needed;
2411 int prepare_page_cluster(struct inode
* inode
, struct cluster_handle
* clust
,
2414 assert("edward-177", inode
!= NULL
);
2415 assert("edward-741", cryptcompress_inode_ok(inode
));
2416 assert("edward-740", clust
->pages
!= NULL
);
2418 set_cluster_nrpages(clust
, inode
);
2419 reset_cluster_pgset(clust
, cluster_nrpages(inode
));
2420 return grab_page_cluster(inode
, clust
, rw
);
2423 /* Truncate complete page cluster of index @index.
2424 * This is called by ->kill_hook() method of item
2425 * plugin when deleting a disk cluster of such index.
2427 void truncate_complete_page_cluster(struct inode
*inode
, cloff_t index
,
2433 struct page
*pages
[MAX_CLUSTER_NRPAGES
];
2435 node
= jlookup(current_tree
, get_inode_oid(inode
),
2436 clust_to_pg(index
, inode
));
2437 nr_pages
= size_in_pages(lbytes(index
, inode
));
2438 assert("edward-1483", nr_pages
!= 0);
2441 found
= find_get_pages(inode
->i_mapping
,
2442 clust_to_pg(index
, inode
),
2443 cluster_nrpages(inode
), pages
);
2445 assert("edward-1484", jnode_truncate_ok(inode
, index
));
2450 if (reiser4_inode_get_flag(inode
, REISER4_FILE_CONV_IN_PROGRESS
)
2452 /* converting to unix_file is in progress */
2453 JF_CLR(node
, JNODE_CLUSTER_PAGE
);
2454 if (JF_ISSET(node
, JNODE_DIRTY
)) {
2456 * @nr_pages were checked in, but not yet checked out -
2457 * we need to release them. (also there can be pages
2458 * attached to page cache by read(), etc. - don't take
2459 * them into account).
2461 assert("edward-1198", found
>= nr_pages
);
2463 /* free disk space grabbed for disk cluster converting */
2464 cluster_reserved2grabbed(estimate_update_cluster(inode
));
2465 grabbed2free(get_current_context(),
2466 get_current_super_private(),
2467 estimate_update_cluster(inode
));
2468 __put_page_cluster(0, nr_pages
, pages
, inode
);
2470 /* This will clear dirty bit, uncapture and unlock jnode */
2471 unlock_cluster_uncapture(node
);
2473 unlock_cluster(node
);
2474 jput(node
); /* jlookup */
2475 put_found_pages(pages
, found
); /* find_get_pages */
2477 if (reiser4_inode_get_flag(inode
, REISER4_FILE_CONV_IN_PROGRESS
) &&
2480 truncate_page_cluster_range(inode
, pages
, index
, 0,
2481 cluster_nrpages(inode
),
2483 assert("edward-1201",
2484 ergo(!reiser4_inode_get_flag(inode
,
2485 REISER4_FILE_CONV_IN_PROGRESS
),
2486 jnode_truncate_ok(inode
, index
)));
2491 * Set cluster handle @clust of a logical cluster before
2492 * modifications which are supposed to be committed.
2494 * . grab cluster pages;
2495 * . reserve disk space;
2496 * . maybe read pages from disk and set the disk cluster dirty;
2497 * . maybe write hole and check in (partially zeroed) logical cluster;
2498 * . create 'unprepped' disk cluster for new or fake logical one.
2500 static int prepare_logical_cluster(struct inode
*inode
,
2501 loff_t file_off
, /* write position
2503 loff_t to_file
, /* bytes of users data
2504 to write to the file */
2505 struct cluster_handle
* clust
,
2506 logical_cluster_op op
)
2509 struct reiser4_slide
* win
= clust
->win
;
2511 reset_cluster_params(clust
);
2512 cluster_set_tfm_act(&clust
->tc
, TFMA_READ
);
2514 clust
->ctx
= get_current_context();
2516 assert("edward-1190", op
!= LC_INVAL
);
2520 result
= prepare_page_cluster(inode
, clust
, WRITE_OP
);
2523 assert("edward-1447",
2524 ergo(clust
->nr_pages
!= 0, jprivate(clust
->pages
[0])));
2525 assert("edward-1448",
2526 ergo(clust
->nr_pages
!= 0,
2527 jnode_is_cluster_page(jprivate(clust
->pages
[0]))));
2529 result
= reserve4cluster(inode
, clust
);
2532 result
= read_some_cluster_pages(inode
, clust
);
2534 free_reserved4cluster(inode
,
2536 estimate_update_cluster(inode
) +
2537 estimate_insert_cluster(inode
));
2540 assert("edward-1124", clust
->dstat
!= INVAL_DISK_CLUSTER
);
2542 result
= cryptcompress_make_unprepped_cluster(clust
, inode
);
2545 if (win
&& win
->stat
== HOLE_WINDOW
) {
2546 result
= write_hole(inode
, clust
, file_off
, to_file
);
2552 free_reserved4cluster(inode
, clust
,
2553 estimate_update_cluster(inode
));
2555 put_page_cluster(clust
, inode
, WRITE_OP
);
2556 assert("edward-1125", result
== -ENOSPC
);
2560 /* set window by two offsets */
2561 static void set_window(struct cluster_handle
* clust
,
2562 struct reiser4_slide
* win
, struct inode
*inode
,
2563 loff_t o1
, loff_t o2
)
2565 assert("edward-295", clust
!= NULL
);
2566 assert("edward-296", inode
!= NULL
);
2567 assert("edward-1071", win
!= NULL
);
2568 assert("edward-297", o1
<= o2
);
2570 clust
->index
= off_to_clust(o1
, inode
);
2572 win
->off
= off_to_cloff(o1
, inode
);
2573 win
->count
= min((loff_t
)(inode_cluster_size(inode
) - win
->off
),
2580 static int set_cluster_by_window(struct inode
*inode
,
2581 struct cluster_handle
* clust
,
2582 struct reiser4_slide
* win
, size_t length
,
2587 assert("edward-197", clust
!= NULL
);
2588 assert("edward-1072", win
!= NULL
);
2589 assert("edward-198", inode
!= NULL
);
2591 result
= alloc_cluster_pgset(clust
, cluster_nrpages(inode
));
2595 if (file_off
> i_size_read(inode
)) {
2596 /* Uhmm, hole in cryptcompress file... */
2598 hole_size
= file_off
- inode
->i_size
;
2600 set_window(clust
, win
, inode
, inode
->i_size
, file_off
);
2601 win
->stat
= HOLE_WINDOW
;
2602 if (win
->off
+ hole_size
< inode_cluster_size(inode
))
2603 /* there is also user's data to append to the hole */
2604 win
->delta
= min(inode_cluster_size(inode
) -
2605 (win
->off
+ win
->count
), length
);
2608 set_window(clust
, win
, inode
, file_off
, file_off
+ length
);
2609 win
->stat
= DATA_WINDOW
;
2613 int set_cluster_by_page(struct cluster_handle
* clust
, struct page
* page
,
2617 int (*setting_actor
)(struct cluster_handle
* clust
, int count
);
2619 assert("edward-1358", clust
!= NULL
);
2620 assert("edward-1359", page
!= NULL
);
2621 assert("edward-1360", page
->mapping
!= NULL
);
2622 assert("edward-1361", page
->mapping
->host
!= NULL
);
2625 (clust
->pages
? reset_cluster_pgset
: alloc_cluster_pgset
);
2626 result
= setting_actor(clust
, count
);
2627 clust
->index
= pg_to_clust(page
->index
, page
->mapping
->host
);
2631 /* reset all the params that not get updated */
2632 void reset_cluster_params(struct cluster_handle
* clust
)
2634 assert("edward-197", clust
!= NULL
);
2636 clust
->dstat
= INVAL_DISK_CLUSTER
;
2637 clust
->tc
.uptodate
= 0;
2641 /* the heart of write_cryptcompress */
2642 static loff_t
do_write_cryptcompress(struct file
*file
, struct inode
*inode
,
2643 const char __user
*buf
, size_t to_write
,
2644 loff_t pos
, struct dispatch_context
*cont
)
2650 struct reiser4_slide win
;
2651 struct cluster_handle clust
;
2652 struct cryptcompress_info
* info
;
2654 assert("edward-154", buf
!= NULL
);
2655 assert("edward-161", reiser4_schedulable());
2656 assert("edward-748", cryptcompress_inode_ok(inode
));
2657 assert("edward-159", current_blocksize
== PAGE_CACHE_SIZE
);
2658 assert("edward-1274", get_current_context()->grabbed_blocks
== 0);
2660 hint
= kmalloc(sizeof(*hint
), reiser4_ctx_gfp_mask_get());
2662 return RETERR(-ENOMEM
);
2664 result
= load_file_hint(file
, hint
);
2671 reiser4_slide_init(&win
);
2672 cluster_init_read(&clust
, &win
);
2674 info
= cryptcompress_inode_data(inode
);
2676 mutex_lock(&info
->checkin_mutex
);
2678 result
= set_cluster_by_window(inode
, &clust
, &win
, to_write
, pos
);
2682 if (next_window_stat(&win
) == HOLE_WINDOW
) {
2683 /* write hole in this iteration
2684 separated from the loop below */
2685 result
= write_dispatch_hook(file
, inode
,
2689 result
= prepare_logical_cluster(inode
, pos
, count
, &clust
,
2695 const char __user
* src
;
2696 unsigned page_off
, to_page
;
2698 assert("edward-750", reiser4_schedulable());
2700 result
= write_dispatch_hook(file
, inode
,
2701 pos
+ to_write
- count
,
2705 if (cont
->state
== DISPATCH_ASSIGNED_NEW
)
2706 /* done_lh was called in write_dispatch_hook */
2707 goto out_no_longterm_lock
;
2709 result
= prepare_logical_cluster(inode
, pos
, count
, &clust
,
2714 assert("edward-751", cryptcompress_inode_ok(inode
));
2715 assert("edward-204", win
.stat
== DATA_WINDOW
);
2716 assert("edward-1288", hint_is_valid(clust
.hint
));
2717 assert("edward-752",
2718 znode_is_write_locked(hint
->ext_coord
.coord
.node
));
2719 put_hint_cluster(&clust
, inode
, ZNODE_WRITE_LOCK
);
2721 /* set write position in page */
2722 page_off
= off_to_pgoff(win
.off
);
2724 /* copy user's data to cluster pages */
2725 for (i
= off_to_pg(win
.off
), src
= buf
;
2726 i
< size_in_pages(win
.off
+ win
.count
);
2727 i
++, src
+= to_page
) {
2728 to_page
= __mbp(win
.off
+ win
.count
, i
) - page_off
;
2729 assert("edward-1039",
2730 page_off
+ to_page
<= PAGE_CACHE_SIZE
);
2731 assert("edward-287", clust
.pages
[i
] != NULL
);
2733 fault_in_pages_readable(src
, to_page
);
2735 lock_page(clust
.pages
[i
]);
2737 __copy_from_user((char *)kmap(clust
.pages
[i
]) +
2738 page_off
, src
, to_page
);
2739 kunmap(clust
.pages
[i
]);
2740 if (unlikely(result
)) {
2741 unlock_page(clust
.pages
[i
]);
2745 SetPageUptodate(clust
.pages
[i
]);
2746 set_page_dirty_notag(clust
.pages
[i
]);
2747 flush_dcache_page(clust
.pages
[i
]);
2748 mark_page_accessed(clust
.pages
[i
]);
2749 unlock_page(clust
.pages
[i
]);
2752 assert("edward-753", cryptcompress_inode_ok(inode
));
2754 result
= checkin_logical_cluster(&clust
, inode
);
2761 result
= balance_dirty_page_cluster(&clust
, inode
, 0, count
,
2762 win_count_to_nrpages(&win
));
2765 assert("edward-755", hint
->lh
.owner
== NULL
);
2766 reset_cluster_params(&clust
);
2769 put_page_cluster(&clust
, inode
, WRITE_OP
);
2772 free_reserved4cluster(inode
,
2774 estimate_update_cluster(inode
));
2779 save_file_hint(file
, hint
);
2780 out_no_longterm_lock
:
2781 mutex_unlock(&info
->checkin_mutex
);
2783 put_cluster_handle(&clust
);
2784 assert("edward-195",
2785 ergo((to_write
== count
),
2786 (result
< 0 || cont
->state
== DISPATCH_ASSIGNED_NEW
)));
2787 return (to_write
- count
) ? (to_write
- count
) : result
;
2792 * @file: file to write to
2793 * @buf: address of user-space buffer
2794 * @read_amount: number of bytes to write
2795 * @off: position in file to write to
2797 ssize_t
write_cryptcompress(struct file
*file
, const char __user
*buf
,
2798 size_t count
, loff_t
*off
,
2799 struct dispatch_context
*cont
)
2802 struct inode
*inode
;
2803 reiser4_context
*ctx
;
2805 struct cryptcompress_info
*info
;
2807 assert("edward-1449", cont
->state
== DISPATCH_INVAL_STATE
);
2809 inode
= file
->f_dentry
->d_inode
;
2810 assert("edward-196", cryptcompress_inode_ok(inode
));
2812 info
= cryptcompress_inode_data(inode
);
2813 ctx
= get_current_context();
2815 result
= generic_write_checks(file
, &pos
, &count
, 0);
2816 if (unlikely(result
!= 0)) {
2817 context_set_commit_async(ctx
);
2820 if (unlikely(count
== 0))
2822 result
= file_remove_suid(file
);
2823 if (unlikely(result
!= 0)) {
2824 context_set_commit_async(ctx
);
2827 /* remove_suid might create a transaction */
2828 reiser4_txn_restart(ctx
);
2830 result
= do_write_cryptcompress(file
, inode
, buf
, count
, pos
, cont
);
2832 if (unlikely(result
< 0)) {
2833 context_set_commit_async(ctx
);
2836 /* update position in a file */
2837 *off
= pos
+ result
;
2841 /* plugin->readpages */
2842 int readpages_cryptcompress(struct file
*file
, struct address_space
*mapping
,
2843 struct list_head
*pages
, unsigned nr_pages
)
2845 reiser4_context
* ctx
;
2848 ctx
= reiser4_init_context(mapping
->host
->i_sb
);
2853 /* cryptcompress file can be built of ctail items only */
2854 ret
= readpages_ctail(file
, mapping
, pages
);
2855 reiser4_txn_restart(ctx
);
2856 reiser4_exit_context(ctx
);
2859 put_pages_list(pages
);
2864 static reiser4_block_nr
cryptcompress_estimate_read(struct inode
*inode
)
2866 /* reserve one block to update stat data item */
2867 assert("edward-1193",
2868 inode_file_plugin(inode
)->estimate
.update
==
2869 estimate_update_common
);
2870 return estimate_update_common(inode
);
2875 * @file: file to read from
2876 * @buf: address of user-space buffer
2877 * @read_amount: number of bytes to read
2878 * @off: position in file to read from
2880 ssize_t
read_cryptcompress(struct file
* file
, char __user
*buf
, size_t size
,
2884 struct inode
*inode
;
2885 reiser4_context
*ctx
;
2886 struct cryptcompress_info
*info
;
2887 reiser4_block_nr needed
;
2889 inode
= file
->f_dentry
->d_inode
;
2890 assert("edward-1194", !reiser4_inode_get_flag(inode
, REISER4_NO_SD
));
2892 ctx
= reiser4_init_context(inode
->i_sb
);
2894 return PTR_ERR(ctx
);
2896 info
= cryptcompress_inode_data(inode
);
2897 needed
= cryptcompress_estimate_read(inode
);
2899 result
= reiser4_grab_space(needed
, BA_CAN_COMMIT
);
2901 reiser4_exit_context(ctx
);
2904 result
= do_sync_read(file
, buf
, size
, off
);
2906 context_set_commit_async(ctx
);
2907 reiser4_exit_context(ctx
);
2912 /* Look for a disk cluster and keep lookup result in @found.
2913 * If @index > 0, then find disk cluster of the index (@index - 1);
2914 * If @index == 0, then find the rightmost disk cluster.
2915 * Keep incremented index of the found disk cluster in @found.
2916 * @found == 0 means that disk cluster was not found (in the last
2917 * case (@index == 0) it means that file doesn't have disk clusters).
2919 static int lookup_disk_cluster(struct inode
*inode
, cloff_t
* found
,
2931 assert("edward-1131", inode
!= NULL
);
2932 assert("edward-95", cryptcompress_inode_ok(inode
));
2934 hint
= kmalloc(sizeof(*hint
), reiser4_ctx_gfp_mask_get());
2936 return RETERR(-ENOMEM
);
2937 hint_init_zero(hint
);
2940 bias
= (index
? FIND_EXACT
: FIND_MAX_NOT_MORE_THAN
);
2942 (index
? clust_to_off(index
, inode
) -
2943 1 : get_key_offset(reiser4_max_key()));
2945 key_by_inode_cryptcompress(inode
, offset
, &key
);
2947 /* find the last item of this object */
2949 find_cluster_item(hint
, &key
, ZNODE_READ_LOCK
, NULL
/* ra_info */,
2951 if (cbk_errored(result
)) {
2956 if (result
== CBK_COORD_NOTFOUND
) {
2957 /* no real disk clusters */
2963 /* disk cluster is found */
2964 coord
= &hint
->ext_coord
.coord
;
2965 coord_clear_iplug(coord
);
2966 result
= zload(coord
->node
);
2967 if (unlikely(result
)) {
2972 iplug
= item_plugin_by_coord(coord
);
2973 assert("edward-277", iplug
== item_plugin_by_id(CTAIL_ID
));
2974 assert("edward-1202", ctail_ok(coord
));
2976 item_key_by_coord(coord
, &key
);
2977 *found
= off_to_clust(get_key_offset(&key
), inode
) + 1;
2979 assert("edward-1132", ergo(index
, index
== *found
));
2981 zrelse(coord
->node
);
2987 static int find_fake_appended(struct inode
*inode
, cloff_t
* index
)
2989 return lookup_disk_cluster(inode
, index
,
2990 0 /* find last real one */ );
2993 /* Set left coord when unit is not found after node_lookup()
2994 This takes into account that there can be holes in a sequence
2997 static void adjust_left_coord(coord_t
* left_coord
)
2999 switch (left_coord
->between
) {
3001 left_coord
->between
= AFTER_ITEM
;
3006 impossible("edward-1204", "bad left coord to cut");
3011 #define CRC_CUT_TREE_MIN_ITERATIONS 64
3013 /* plugin->cut_tree_worker */
3014 int cut_tree_worker_cryptcompress(tap_t
* tap
, const reiser4_key
* from_key
,
3015 const reiser4_key
* to_key
,
3016 reiser4_key
* smallest_removed
,
3017 struct inode
*object
, int truncate
,
3020 lock_handle next_node_lock
;
3024 assert("edward-1158", tap
->coord
->node
!= NULL
);
3025 assert("edward-1159", znode_is_write_locked(tap
->coord
->node
));
3026 assert("edward-1160", znode_get_level(tap
->coord
->node
) == LEAF_LEVEL
);
3029 init_lh(&next_node_lock
);
3032 znode
*node
; /* node from which items are cut */
3033 node_plugin
*nplug
; /* node plugin for @node */
3035 node
= tap
->coord
->node
;
3037 /* Move next_node_lock to the next node on the left. */
3039 reiser4_get_left_neighbor(&next_node_lock
, node
,
3041 GN_CAN_USE_UPPER_LEVELS
);
3042 if (result
!= 0 && result
!= -E_NO_NEIGHBOR
)
3044 /* FIXME-EDWARD: Check can we delete the node as a whole. */
3045 result
= reiser4_tap_load(tap
);
3049 /* Prepare the second (right) point for cut_node() */
3051 coord_init_last_unit(tap
->coord
, node
);
3053 else if (item_plugin_by_coord(tap
->coord
)->b
.lookup
== NULL
)
3054 /* set rightmost unit for the items without lookup method */
3055 tap
->coord
->unit_pos
= coord_last_unit_pos(tap
->coord
);
3057 nplug
= node
->nplug
;
3059 assert("edward-1161", nplug
);
3060 assert("edward-1162", nplug
->lookup
);
3062 /* left_coord is leftmost unit cut from @node */
3063 result
= nplug
->lookup(node
, from_key
, FIND_EXACT
, &left_coord
);
3065 if (IS_CBKERR(result
))
3068 if (result
== CBK_COORD_NOTFOUND
)
3069 adjust_left_coord(&left_coord
);
3071 /* adjust coordinates so that they are set to existing units */
3072 if (coord_set_to_right(&left_coord
)
3073 || coord_set_to_left(tap
->coord
)) {
3078 if (coord_compare(&left_coord
, tap
->coord
) ==
3079 COORD_CMP_ON_RIGHT
) {
3080 /* keys from @from_key to @to_key are not in the tree */
3085 /* cut data from one node */
3086 *smallest_removed
= *reiser4_min_key();
3087 result
= kill_node_content(&left_coord
,
3092 next_node_lock
.node
,
3094 reiser4_tap_relse(tap
);
3101 /* Check whether all items with keys >= from_key were removed
3103 if (keyle(smallest_removed
, from_key
))
3107 if (next_node_lock
.node
== NULL
)
3110 result
= reiser4_tap_move(tap
, &next_node_lock
);
3111 done_lh(&next_node_lock
);
3115 /* Break long cut_tree operation (deletion of a large file) if
3116 * atom requires commit. */
3117 if (*progress
> CRC_CUT_TREE_MIN_ITERATIONS
3118 && current_atom_should_commit()) {
3123 done_lh(&next_node_lock
);
3127 /* Append or expand hole in two steps:
3128 * 1) set zeroes to the rightmost page of the rightmost non-fake
3130 * 2) expand hole via fake logical clusters (just increase i_size)
3132 static int cryptcompress_append_hole(struct inode
*inode
/* with old size */,
3140 struct reiser4_slide win
;
3141 struct cluster_handle clust
;
3143 assert("edward-1133", inode
->i_size
< new_size
);
3144 assert("edward-1134", reiser4_schedulable());
3145 assert("edward-1135", cryptcompress_inode_ok(inode
));
3146 assert("edward-1136", current_blocksize
== PAGE_CACHE_SIZE
);
3147 assert("edward-1333", off_to_cloff(inode
->i_size
, inode
) != 0);
3149 hint
= kmalloc(sizeof(*hint
), reiser4_ctx_gfp_mask_get());
3151 return RETERR(-ENOMEM
);
3152 hint_init_zero(hint
);
3155 reiser4_slide_init(&win
);
3156 cluster_init_read(&clust
, &win
);
3159 result
= alloc_cluster_pgset(&clust
, cluster_nrpages(inode
));
3162 if (off_to_cloff(inode
->i_size
, inode
) == 0)
3164 hole_size
= new_size
- inode
->i_size
;
3166 inode_cluster_size(inode
) - off_to_cloff(inode
->i_size
, inode
);
3167 if (hole_size
< nr_zeroes
)
3168 nr_zeroes
= hole_size
;
3169 set_window(&clust
, &win
, inode
, inode
->i_size
,
3170 inode
->i_size
+ nr_zeroes
);
3171 win
.stat
= HOLE_WINDOW
;
3173 assert("edward-1137",
3174 clust
.index
== off_to_clust(inode
->i_size
, inode
));
3176 result
= prepare_logical_cluster(inode
, 0, 0, &clust
, LC_APPOV
);
3178 assert("edward-1271", !result
|| result
== -ENOSPC
);
3181 assert("edward-1139",
3182 clust
.dstat
== PREP_DISK_CLUSTER
||
3183 clust
.dstat
== UNPR_DISK_CLUSTER
);
3185 assert("edward-1431", hole_size
>= nr_zeroes
);
3186 if (hole_size
== nr_zeroes
)
3187 /* nothing to append anymore */
3190 INODE_SET_SIZE(inode
, new_size
);
3194 put_cluster_handle(&clust
);
3198 static int update_cryptcompress_size(struct inode
*inode
, loff_t new_size
,
3201 return (new_size
& ((loff_t
) (inode_cluster_size(inode
)) - 1)
3202 ? 0 : reiser4_update_file_size(inode
, new_size
, update_sd
));
3205 /* Prune cryptcompress file in two steps:
3206 * 1) cut all nominated logical clusters except the leftmost one which
3207 * is to be partially truncated. Note, that there can be "holes"
3208 * represented by fake logical clusters.
3209 * 2) set zeroes and capture leftmost partially truncated logical
3210 * cluster, if it is not fake; otherwise prune fake logical cluster
3211 * (just decrease i_size).
3213 static int prune_cryptcompress(struct inode
*inode
, loff_t new_size
,
3214 int update_sd
, cloff_t aidx
)
3224 struct reiser4_slide win
;
3225 struct cluster_handle clust
;
3227 assert("edward-1140", inode
->i_size
>= new_size
);
3228 assert("edward-1141", reiser4_schedulable());
3229 assert("edward-1142", cryptcompress_inode_ok(inode
));
3230 assert("edward-1143", current_blocksize
== PAGE_CACHE_SIZE
);
3232 old_size
= inode
->i_size
;
3234 hint
= kmalloc(sizeof(*hint
), reiser4_ctx_gfp_mask_get());
3236 return RETERR(-ENOMEM
);
3237 hint_init_zero(hint
);
3240 reiser4_slide_init(&win
);
3241 cluster_init_read(&clust
, &win
);
3244 /* calculate index of the rightmost logical cluster
3245 that will be completely truncated */
3246 ridx
= size_in_lc(new_size
, inode
);
3248 /* truncate all disk clusters starting from @ridx */
3249 assert("edward-1174", ridx
<= aidx
);
3250 old_size
= inode
->i_size
;
3252 struct cryptcompress_info
* info
;
3253 info
= cryptcompress_inode_data(inode
);
3254 result
= cut_file_items(inode
,
3255 clust_to_off(ridx
, inode
),
3257 clust_to_off(aidx
, inode
),
3258 update_cryptcompress_size
);
3259 info
->trunc_index
= ULONG_MAX
;
3264 * there can be pages of fake logical clusters, truncate them
3266 truncate_inode_pages(inode
->i_mapping
, clust_to_off(ridx
, inode
));
3267 assert("edward-1524",
3268 pages_truncate_ok(inode
, clust_to_pg(ridx
, inode
)));
3270 * now perform partial truncate of last logical cluster
3272 if (!off_to_cloff(new_size
, inode
)) {
3273 /* no partial truncate is needed */
3274 assert("edward-1145", inode
->i_size
== new_size
);
3277 assert("edward-1146", new_size
< inode
->i_size
);
3279 to_prune
= inode
->i_size
- new_size
;
3281 /* check if the last logical cluster is fake */
3282 result
= lookup_disk_cluster(inode
, &aidx
, ridx
);
3286 /* yup, this is fake one */
3289 assert("edward-1148", aidx
== ridx
);
3291 /* do partial truncate of the last page cluster,
3292 and try to capture this one */
3293 result
= alloc_cluster_pgset(&clust
, cluster_nrpages(inode
));
3296 nr_zeroes
= (off_to_pgoff(new_size
) ?
3297 PAGE_CACHE_SIZE
- off_to_pgoff(new_size
) : 0);
3298 set_window(&clust
, &win
, inode
, new_size
, new_size
+ nr_zeroes
);
3299 win
.stat
= HOLE_WINDOW
;
3301 assert("edward-1149", clust
.index
== ridx
- 1);
3303 result
= prepare_logical_cluster(inode
, 0, 0, &clust
, LC_TRUNC
);
3306 assert("edward-1151",
3307 clust
.dstat
== PREP_DISK_CLUSTER
||
3308 clust
.dstat
== UNPR_DISK_CLUSTER
);
3310 assert("edward-1191", inode
->i_size
== new_size
);
3311 assert("edward-1206", body_truncate_ok(inode
, ridx
));
3313 /* drop all the pages that don't have jnodes (i.e. pages
3314 which can not be truncated by cut_file_items() because
3315 of holes represented by fake disk clusters) including
3316 the pages of partially truncated cluster which was
3317 released by prepare_logical_cluster() */
3318 INODE_SET_SIZE(inode
, new_size
);
3319 truncate_inode_pages(inode
->i_mapping
, new_size
);
3321 assert("edward-1334", !result
|| result
== -ENOSPC
);
3322 assert("edward-1497",
3323 pages_truncate_ok(inode
, size_in_pages(new_size
)));
3327 put_cluster_handle(&clust
);
3331 /* Prepare cryptcompress file for truncate:
3332 * prune or append rightmost fake logical clusters (if any)
3334 static int start_truncate_fake(struct inode
*inode
, cloff_t aidx
,
3335 loff_t new_size
, int update_sd
)
3340 if (new_size
> inode
->i_size
) {
3342 if (inode
->i_size
< clust_to_off(aidx
, inode
))
3345 bytes
= new_size
- inode
->i_size
;
3346 INODE_SET_SIZE(inode
, inode
->i_size
+ bytes
);
3349 if (inode
->i_size
<= clust_to_off(aidx
, inode
))
3352 bytes
= inode
->i_size
-
3353 max(new_size
, clust_to_off(aidx
, inode
));
3356 INODE_SET_SIZE(inode
, inode
->i_size
- bytes
);
3357 /* In the case of fake prune we need to drop page cluster.
3358 There are only 2 cases for partially truncated page:
3359 1. If is is dirty, therefore it is anonymous
3360 (was dirtied via mmap), and will be captured
3361 later via ->capture().
3362 2. If is clean, therefore it is filled by zeroes.
3363 In both cases we don't need to make it dirty and
3366 truncate_inode_pages(inode
->i_mapping
, inode
->i_size
);
3369 result
= update_sd_cryptcompress(inode
);
3374 * This is called in setattr_cryptcompress when it is used to truncate,
3375 * and in delete_object_cryptcompress
3377 static int cryptcompress_truncate(struct inode
*inode
, /* old size */
3378 loff_t new_size
, /* new size */
3384 result
= find_fake_appended(inode
, &aidx
);
3387 assert("edward-1208",
3388 ergo(aidx
> 0, inode
->i_size
> clust_to_off(aidx
- 1, inode
)));
3390 result
= start_truncate_fake(inode
, aidx
, new_size
, update_sd
);
3393 if (inode
->i_size
== new_size
)
3394 /* nothing to truncate anymore */
3396 result
= (inode
->i_size
< new_size
?
3397 cryptcompress_append_hole(inode
, new_size
) :
3398 prune_cryptcompress(inode
, new_size
, update_sd
, aidx
));
3399 if (!result
&& update_sd
)
3400 result
= update_sd_cryptcompress(inode
);
3405 * Capture a pager cluster.
3406 * @clust must be set up by a caller.
3408 static int capture_page_cluster(struct cluster_handle
* clust
,
3409 struct inode
* inode
)
3413 assert("edward-1073", clust
!= NULL
);
3414 assert("edward-1074", inode
!= NULL
);
3415 assert("edward-1075", clust
->dstat
== INVAL_DISK_CLUSTER
);
3417 result
= prepare_logical_cluster(inode
, 0, 0, clust
, LC_APPOV
);
3421 set_cluster_pages_dirty(clust
, inode
);
3422 result
= checkin_logical_cluster(clust
, inode
);
3423 put_hint_cluster(clust
, inode
, ZNODE_WRITE_LOCK
);
3424 if (unlikely(result
))
3425 put_page_cluster(clust
, inode
, WRITE_OP
);
3429 /* Starting from @index find tagged pages of the same page cluster.
3430 * Clear the tag for each of them. Return number of found pages.
3432 static int find_anon_page_cluster(struct address_space
* mapping
,
3433 pgoff_t
* index
, struct page
** pages
)
3437 spin_lock_irq(&mapping
->tree_lock
);
3439 /* looking for one page */
3440 found
= radix_tree_gang_lookup_tag(&mapping
->page_tree
,
3443 PAGECACHE_TAG_REISER4_MOVED
);
3446 if (!same_page_cluster(pages
[0], pages
[i
]))
3450 page_cache_get(pages
[i
]);
3451 *index
= pages
[i
]->index
+ 1;
3453 radix_tree_tag_clear(&mapping
->page_tree
,
3455 PAGECACHE_TAG_REISER4_MOVED
);
3456 if (last_page_in_cluster(pages
[i
++]))
3459 spin_unlock_irq(&mapping
->tree_lock
);
3463 #define MAX_PAGES_TO_CAPTURE (1024)
3465 /* Capture anonymous page clusters */
3466 static int capture_anon_pages(struct address_space
* mapping
, pgoff_t
* index
,
3474 struct inode
* inode
;
3475 struct cluster_handle clust
;
3476 struct page
* pages
[MAX_CLUSTER_NRPAGES
];
3478 assert("edward-1127", mapping
!= NULL
);
3479 assert("edward-1128", mapping
->host
!= NULL
);
3480 assert("edward-1440", mapping
->host
->i_mapping
== mapping
);
3482 inode
= mapping
->host
;
3483 hint
= kmalloc(sizeof(*hint
), reiser4_ctx_gfp_mask_get());
3485 return RETERR(-ENOMEM
);
3486 hint_init_zero(hint
);
3489 cluster_init_read(&clust
, NULL
);
3492 result
= alloc_cluster_pgset(&clust
, cluster_nrpages(inode
));
3496 while (to_capture
> 0) {
3497 found
= find_anon_page_cluster(mapping
, index
, pages
);
3499 *index
= (pgoff_t
) - 1;
3502 move_cluster_forward(&clust
, inode
, pages
[0]->index
);
3503 result
= capture_page_cluster(&clust
, inode
);
3505 put_found_pages(pages
, found
); /* find_anon_page_cluster */
3508 to_capture
-= clust
.nr_pages
;
3509 count
+= clust
.nr_pages
;
3512 warning("edward-1077",
3513 "Capture failed (inode %llu, result=%i, captured=%d)\n",
3514 (unsigned long long)get_inode_oid(inode
), result
, count
);
3516 assert("edward-1078", ergo(found
> 0, count
> 0));
3517 if (to_capture
<= 0)
3518 /* there may be left more pages */
3519 __mark_inode_dirty(inode
, I_DIRTY_PAGES
);
3525 put_cluster_handle(&clust
);
3529 /* Returns true if inode's mapping has dirty pages
3530 which do not belong to any atom */
3531 static int cryptcompress_inode_has_anon_pages(struct inode
*inode
)
3534 spin_lock_irq(&inode
->i_mapping
->tree_lock
);
3535 result
= radix_tree_tagged(&inode
->i_mapping
->page_tree
,
3536 PAGECACHE_TAG_REISER4_MOVED
);
3537 spin_unlock_irq(&inode
->i_mapping
->tree_lock
);
3541 /* plugin->writepages */
3542 int writepages_cryptcompress(struct address_space
*mapping
,
3543 struct writeback_control
*wbc
)
3549 struct inode
*inode
;
3550 struct cryptcompress_info
*info
;
3552 inode
= mapping
->host
;
3553 if (!cryptcompress_inode_has_anon_pages(inode
))
3555 info
= cryptcompress_inode_data(inode
);
3556 nrpages
= size_in_pages(i_size_read(inode
));
3558 if (wbc
->sync_mode
!= WB_SYNC_ALL
)
3559 to_capture
= min(wbc
->nr_to_write
, (long)MAX_PAGES_TO_CAPTURE
);
3561 to_capture
= MAX_PAGES_TO_CAPTURE
;
3563 reiser4_context
*ctx
;
3565 ctx
= reiser4_init_context(inode
->i_sb
);
3567 result
= PTR_ERR(ctx
);
3570 /* avoid recursive calls to ->sync_inodes */
3573 assert("edward-1079",
3574 lock_stack_isclean(get_current_lock_stack()));
3576 reiser4_txn_restart_current();
3578 if (get_current_context()->entd
) {
3579 if (mutex_trylock(&info
->checkin_mutex
) == 0) {
3580 /* the mutex might be occupied by
3582 result
= RETERR(-EBUSY
);
3583 reiser4_exit_context(ctx
);
3587 mutex_lock(&info
->checkin_mutex
);
3589 result
= capture_anon_pages(inode
->i_mapping
, &index
,
3591 mutex_unlock(&info
->checkin_mutex
);
3594 reiser4_exit_context(ctx
);
3597 wbc
->nr_to_write
-= result
;
3598 if (wbc
->sync_mode
!= WB_SYNC_ALL
) {
3599 reiser4_exit_context(ctx
);
3602 result
= txnmgr_force_commit_all(inode
->i_sb
, 0);
3603 reiser4_exit_context(ctx
);
3604 } while (result
>= 0 && index
< nrpages
);
3607 if (is_in_reiser4_context()) {
3608 if (get_current_context()->nr_captured
>= CAPTURE_APAGE_BURST
) {
3609 /* there are already pages to flush, flush them out,
3610 do not delay until end of reiser4_sync_inodes */
3611 reiser4_writeout(inode
->i_sb
, wbc
);
3612 get_current_context()->nr_captured
= 0;
3619 int ioctl_cryptcompress(struct inode
*inode
, struct file
*filp
,
3620 unsigned int cmd
, unsigned long arg
)
3622 return RETERR(-ENOSYS
);
3626 int mmap_cryptcompress(struct file
*file
, struct vm_area_struct
*vma
)
3629 struct inode
*inode
;
3630 reiser4_context
*ctx
;
3632 inode
= file
->f_dentry
->d_inode
;
3633 ctx
= reiser4_init_context(inode
->i_sb
);
3635 return PTR_ERR(ctx
);
3637 * generic_file_mmap will do update_atime. Grab space for stat data
3640 result
= reiser4_grab_space_force
3641 (inode_file_plugin(inode
)->estimate
.update(inode
),
3644 reiser4_exit_context(ctx
);
3647 result
= generic_file_mmap(file
, vma
);
3648 reiser4_exit_context(ctx
);
3652 /* plugin->delete_object */
3653 int delete_object_cryptcompress(struct inode
*inode
)
3656 struct cryptcompress_info
* info
;
3658 assert("edward-429", inode
->i_nlink
== 0);
3660 reiser4_txn_restart_current();
3661 info
= cryptcompress_inode_data(inode
);
3663 mutex_lock(&info
->checkin_mutex
);
3664 result
= cryptcompress_truncate(inode
, 0, 0);
3665 mutex_unlock(&info
->checkin_mutex
);
3668 warning("edward-430",
3669 "cannot truncate cryptcompress file %lli: %i",
3670 (unsigned long long)get_inode_oid(inode
),
3673 truncate_inode_pages(inode
->i_mapping
, 0);
3674 assert("edward-1487", pages_truncate_ok(inode
, 0));
3675 /* and remove stat data */
3676 return reiser4_delete_object_common(inode
);
3681 * This implements actual truncate (see comments in reiser4/page_cache.c)
3683 int setattr_cryptcompress(struct dentry
*dentry
, struct iattr
*attr
)
3686 struct inode
*inode
;
3687 struct cryptcompress_info
* info
;
3689 inode
= dentry
->d_inode
;
3690 info
= cryptcompress_inode_data(inode
);
3692 if (attr
->ia_valid
& ATTR_SIZE
) {
3693 if (i_size_read(inode
) != attr
->ia_size
) {
3694 reiser4_context
*ctx
;
3697 ctx
= reiser4_init_context(dentry
->d_inode
->i_sb
);
3699 return PTR_ERR(ctx
);
3700 result
= setattr_dispatch_hook(inode
);
3702 context_set_commit_async(ctx
);
3703 reiser4_exit_context(ctx
);
3706 old_size
= i_size_read(inode
);
3707 inode_check_scale(inode
, old_size
, attr
->ia_size
);
3709 mutex_lock(&info
->checkin_mutex
);
3710 result
= cryptcompress_truncate(inode
,
3713 mutex_unlock(&info
->checkin_mutex
);
3715 warning("edward-1192",
3716 "truncate_cryptcompress failed: oid %lli, "
3717 "old size %lld, new size %lld, retval %d",
3718 (unsigned long long)
3719 get_inode_oid(inode
), old_size
,
3720 attr
->ia_size
, result
);
3722 context_set_commit_async(ctx
);
3723 reiser4_exit_context(ctx
);
3727 result
= reiser4_setattr_common(dentry
, attr
);
3731 /* plugin->release */
3732 int release_cryptcompress(struct inode
*inode
, struct file
*file
)
3734 reiser4_context
*ctx
= reiser4_init_context(inode
->i_sb
);
3737 return PTR_ERR(ctx
);
3738 reiser4_free_file_fsdata(file
);
3739 reiser4_exit_context(ctx
);
3743 /* plugin->prepare_write */
3744 int write_begin_cryptcompress(struct file
*file
, struct page
*page
,
3745 unsigned from
, unsigned to
)
3747 return do_prepare_write(file
, page
, from
, to
);
3750 /* plugin->commit_write */
3751 int write_end_cryptcompress(struct file
*file
, struct page
*page
,
3752 unsigned from
, unsigned to
)
3757 struct inode
* inode
;
3758 struct cluster_handle clust
;
3762 inode
= page
->mapping
->host
;
3763 hint
= kmalloc(sizeof(*hint
), reiser4_ctx_gfp_mask_get());
3765 return RETERR(-ENOMEM
);
3766 hint_init_zero(hint
);
3769 cluster_init_read(&clust
, NULL
);
3772 ret
= alloc_cluster_pgset(&clust
, cluster_nrpages(inode
));
3775 clust
.index
= pg_to_clust(page
->index
, inode
);
3776 ret
= capture_page_cluster(&clust
, inode
);
3778 warning("edward-1557",
3779 "Capture failed (inode %llu, result=%i)",
3780 (unsigned long long)get_inode_oid(inode
), ret
);
3784 put_cluster_handle(&clust
);
3789 sector_t
bmap_cryptcompress(struct address_space
*mapping
, sector_t lblock
)
3796 c-indentation-style: "K&R"