On Tue, Nov 06, 2007 at 02:33:53AM -0800, akpm@linux-foundation.org wrote:
[mmotm.git] / fs / ocfs2 / xattr.c
blobfe3419068df2efa5ed33ff7cdc424327d04431f0
1 /* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
4 * xattr.c
6 * Copyright (C) 2004, 2008 Oracle. All rights reserved.
8 * CREDITS:
9 * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public
14 * License version 2 as published by the Free Software Foundation.
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38 #include <linux/security.h>
40 #define MLOG_MASK_PREFIX ML_XATTR
41 #include <cluster/masklog.h>
43 #include "ocfs2.h"
44 #include "alloc.h"
45 #include "blockcheck.h"
46 #include "dlmglue.h"
47 #include "file.h"
48 #include "symlink.h"
49 #include "sysfile.h"
50 #include "inode.h"
51 #include "journal.h"
52 #include "ocfs2_fs.h"
53 #include "suballoc.h"
54 #include "uptodate.h"
55 #include "buffer_head_io.h"
56 #include "super.h"
57 #include "xattr.h"
58 #include "refcounttree.h"
59 #include "acl.h"
61 struct ocfs2_xattr_def_value_root {
62 struct ocfs2_xattr_value_root xv;
63 struct ocfs2_extent_rec er;
66 struct ocfs2_xattr_bucket {
67 /* The inode these xattrs are associated with */
68 struct inode *bu_inode;
70 /* The actual buffers that make up the bucket */
71 struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
73 /* How many blocks make up one bucket for this filesystem */
74 int bu_blocks;
77 struct ocfs2_xattr_set_ctxt {
78 handle_t *handle;
79 struct ocfs2_alloc_context *meta_ac;
80 struct ocfs2_alloc_context *data_ac;
81 struct ocfs2_cached_dealloc_ctxt dealloc;
84 #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root))
85 #define OCFS2_XATTR_INLINE_SIZE 80
86 #define OCFS2_XATTR_HEADER_GAP 4
87 #define OCFS2_XATTR_FREE_IN_IBODY (OCFS2_MIN_XATTR_INLINE_SIZE \
88 - sizeof(struct ocfs2_xattr_header) \
89 - OCFS2_XATTR_HEADER_GAP)
90 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr) ((ptr)->i_sb->s_blocksize \
91 - sizeof(struct ocfs2_xattr_block) \
92 - sizeof(struct ocfs2_xattr_header) \
93 - OCFS2_XATTR_HEADER_GAP)
95 static struct ocfs2_xattr_def_value_root def_xv = {
96 .xv.xr_list.l_count = cpu_to_le16(1),
99 struct xattr_handler *ocfs2_xattr_handlers[] = {
100 &ocfs2_xattr_user_handler,
101 #ifdef CONFIG_OCFS2_FS_POSIX_ACL
102 &ocfs2_xattr_acl_access_handler,
103 &ocfs2_xattr_acl_default_handler,
104 #endif
105 &ocfs2_xattr_trusted_handler,
106 &ocfs2_xattr_security_handler,
107 NULL
110 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
111 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler,
112 #ifdef CONFIG_OCFS2_FS_POSIX_ACL
113 [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
114 = &ocfs2_xattr_acl_access_handler,
115 [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
116 = &ocfs2_xattr_acl_default_handler,
117 #endif
118 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler,
119 [OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler,
122 struct ocfs2_xattr_info {
123 int name_index;
124 const char *name;
125 const void *value;
126 size_t value_len;
129 struct ocfs2_xattr_search {
130 struct buffer_head *inode_bh;
132 * xattr_bh point to the block buffer head which has extended attribute
133 * when extended attribute in inode, xattr_bh is equal to inode_bh.
135 struct buffer_head *xattr_bh;
136 struct ocfs2_xattr_header *header;
137 struct ocfs2_xattr_bucket *bucket;
138 void *base;
139 void *end;
140 struct ocfs2_xattr_entry *here;
141 int not_found;
144 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
145 struct ocfs2_xattr_header *xh,
146 int index,
147 int *block_off,
148 int *new_offset);
150 static int ocfs2_xattr_block_find(struct inode *inode,
151 int name_index,
152 const char *name,
153 struct ocfs2_xattr_search *xs);
154 static int ocfs2_xattr_index_block_find(struct inode *inode,
155 struct buffer_head *root_bh,
156 int name_index,
157 const char *name,
158 struct ocfs2_xattr_search *xs);
160 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
161 struct buffer_head *blk_bh,
162 char *buffer,
163 size_t buffer_size);
165 static int ocfs2_xattr_create_index_block(struct inode *inode,
166 struct ocfs2_xattr_search *xs,
167 struct ocfs2_xattr_set_ctxt *ctxt);
169 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
170 struct ocfs2_xattr_info *xi,
171 struct ocfs2_xattr_search *xs,
172 struct ocfs2_xattr_set_ctxt *ctxt);
174 typedef int (xattr_tree_rec_func)(struct inode *inode,
175 struct buffer_head *root_bh,
176 u64 blkno, u32 cpos, u32 len, void *para);
177 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
178 struct buffer_head *root_bh,
179 xattr_tree_rec_func *rec_func,
180 void *para);
181 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
182 struct ocfs2_xattr_bucket *bucket,
183 void *para);
184 static int ocfs2_rm_xattr_cluster(struct inode *inode,
185 struct buffer_head *root_bh,
186 u64 blkno,
187 u32 cpos,
188 u32 len,
189 void *para);
191 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
192 u64 src_blk, u64 last_blk, u64 to_blk,
193 unsigned int start_bucket,
194 u32 *first_hash);
195 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
196 struct ocfs2_dinode *di,
197 struct ocfs2_xattr_info *xi,
198 struct ocfs2_xattr_search *xis,
199 struct ocfs2_xattr_search *xbs,
200 struct ocfs2_refcount_tree **ref_tree,
201 int *meta_need,
202 int *credits);
203 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
204 struct ocfs2_xattr_bucket *bucket,
205 int offset,
206 struct ocfs2_xattr_value_root **xv,
207 struct buffer_head **bh);
208 static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
209 const void *value, size_t size, int flags);
211 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
213 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
216 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
218 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
221 static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
223 u16 len = sb->s_blocksize -
224 offsetof(struct ocfs2_xattr_header, xh_entries);
226 return len / sizeof(struct ocfs2_xattr_entry);
229 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
230 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
231 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
233 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
235 struct ocfs2_xattr_bucket *bucket;
236 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
238 BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
240 bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
241 if (bucket) {
242 bucket->bu_inode = inode;
243 bucket->bu_blocks = blks;
246 return bucket;
249 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
251 int i;
253 for (i = 0; i < bucket->bu_blocks; i++) {
254 brelse(bucket->bu_bhs[i]);
255 bucket->bu_bhs[i] = NULL;
259 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
261 if (bucket) {
262 ocfs2_xattr_bucket_relse(bucket);
263 bucket->bu_inode = NULL;
264 kfree(bucket);
269 * A bucket that has never been written to disk doesn't need to be
270 * read. We just need the buffer_heads. Don't call this for
271 * buckets that are already on disk. ocfs2_read_xattr_bucket() initializes
272 * them fully.
274 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
275 u64 xb_blkno)
277 int i, rc = 0;
279 for (i = 0; i < bucket->bu_blocks; i++) {
280 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
281 xb_blkno + i);
282 if (!bucket->bu_bhs[i]) {
283 rc = -EIO;
284 mlog_errno(rc);
285 break;
288 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
289 bucket->bu_bhs[i]))
290 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
291 bucket->bu_bhs[i]);
294 if (rc)
295 ocfs2_xattr_bucket_relse(bucket);
296 return rc;
299 /* Read the xattr bucket at xb_blkno */
300 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
301 u64 xb_blkno)
303 int rc;
305 rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
306 bucket->bu_blocks, bucket->bu_bhs, 0,
307 NULL);
308 if (!rc) {
309 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
310 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
311 bucket->bu_bhs,
312 bucket->bu_blocks,
313 &bucket_xh(bucket)->xh_check);
314 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
315 if (rc)
316 mlog_errno(rc);
319 if (rc)
320 ocfs2_xattr_bucket_relse(bucket);
321 return rc;
324 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
325 struct ocfs2_xattr_bucket *bucket,
326 int type)
328 int i, rc = 0;
330 for (i = 0; i < bucket->bu_blocks; i++) {
331 rc = ocfs2_journal_access(handle,
332 INODE_CACHE(bucket->bu_inode),
333 bucket->bu_bhs[i], type);
334 if (rc) {
335 mlog_errno(rc);
336 break;
340 return rc;
343 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
344 struct ocfs2_xattr_bucket *bucket)
346 int i;
348 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
349 ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
350 bucket->bu_bhs, bucket->bu_blocks,
351 &bucket_xh(bucket)->xh_check);
352 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
354 for (i = 0; i < bucket->bu_blocks; i++)
355 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
358 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
359 struct ocfs2_xattr_bucket *src)
361 int i;
362 int blocksize = src->bu_inode->i_sb->s_blocksize;
364 BUG_ON(dest->bu_blocks != src->bu_blocks);
365 BUG_ON(dest->bu_inode != src->bu_inode);
367 for (i = 0; i < src->bu_blocks; i++) {
368 memcpy(bucket_block(dest, i), bucket_block(src, i),
369 blocksize);
373 static int ocfs2_validate_xattr_block(struct super_block *sb,
374 struct buffer_head *bh)
376 int rc;
377 struct ocfs2_xattr_block *xb =
378 (struct ocfs2_xattr_block *)bh->b_data;
380 mlog(0, "Validating xattr block %llu\n",
381 (unsigned long long)bh->b_blocknr);
383 BUG_ON(!buffer_uptodate(bh));
386 * If the ecc fails, we return the error but otherwise
387 * leave the filesystem running. We know any error is
388 * local to this block.
390 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
391 if (rc)
392 return rc;
395 * Errors after here are fatal
398 if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
399 ocfs2_error(sb,
400 "Extended attribute block #%llu has bad "
401 "signature %.*s",
402 (unsigned long long)bh->b_blocknr, 7,
403 xb->xb_signature);
404 return -EINVAL;
407 if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
408 ocfs2_error(sb,
409 "Extended attribute block #%llu has an "
410 "invalid xb_blkno of %llu",
411 (unsigned long long)bh->b_blocknr,
412 (unsigned long long)le64_to_cpu(xb->xb_blkno));
413 return -EINVAL;
416 if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
417 ocfs2_error(sb,
418 "Extended attribute block #%llu has an invalid "
419 "xb_fs_generation of #%u",
420 (unsigned long long)bh->b_blocknr,
421 le32_to_cpu(xb->xb_fs_generation));
422 return -EINVAL;
425 return 0;
428 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
429 struct buffer_head **bh)
431 int rc;
432 struct buffer_head *tmp = *bh;
434 rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
435 ocfs2_validate_xattr_block);
437 /* If ocfs2_read_block() got us a new bh, pass it up. */
438 if (!rc && !*bh)
439 *bh = tmp;
441 return rc;
444 static inline const char *ocfs2_xattr_prefix(int name_index)
446 struct xattr_handler *handler = NULL;
448 if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
449 handler = ocfs2_xattr_handler_map[name_index];
451 return handler ? handler->prefix : NULL;
454 static u32 ocfs2_xattr_name_hash(struct inode *inode,
455 const char *name,
456 int name_len)
458 /* Get hash value of uuid from super block */
459 u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
460 int i;
462 /* hash extended attribute name */
463 for (i = 0; i < name_len; i++) {
464 hash = (hash << OCFS2_HASH_SHIFT) ^
465 (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
466 *name++;
469 return hash;
473 * ocfs2_xattr_hash_entry()
475 * Compute the hash of an extended attribute.
477 static void ocfs2_xattr_hash_entry(struct inode *inode,
478 struct ocfs2_xattr_header *header,
479 struct ocfs2_xattr_entry *entry)
481 u32 hash = 0;
482 char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
484 hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len);
485 entry->xe_name_hash = cpu_to_le32(hash);
487 return;
490 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
492 int size = 0;
494 if (value_len <= OCFS2_XATTR_INLINE_SIZE)
495 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
496 else
497 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
498 size += sizeof(struct ocfs2_xattr_entry);
500 return size;
503 int ocfs2_calc_security_init(struct inode *dir,
504 struct ocfs2_security_xattr_info *si,
505 int *want_clusters,
506 int *xattr_credits,
507 struct ocfs2_alloc_context **xattr_ac)
509 int ret = 0;
510 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
511 int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
512 si->value_len);
515 * The max space of security xattr taken inline is
516 * 256(name) + 80(value) + 16(entry) = 352 bytes,
517 * So reserve one metadata block for it is ok.
519 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
520 s_size > OCFS2_XATTR_FREE_IN_IBODY) {
521 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
522 if (ret) {
523 mlog_errno(ret);
524 return ret;
526 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
529 /* reserve clusters for xattr value which will be set in B tree*/
530 if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
531 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
532 si->value_len);
534 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
535 new_clusters);
536 *want_clusters += new_clusters;
538 return ret;
541 int ocfs2_calc_xattr_init(struct inode *dir,
542 struct buffer_head *dir_bh,
543 int mode,
544 struct ocfs2_security_xattr_info *si,
545 int *want_clusters,
546 int *xattr_credits,
547 int *want_meta)
549 int ret = 0;
550 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
551 int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
553 if (si->enable)
554 s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
555 si->value_len);
557 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
558 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
559 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
560 "", NULL, 0);
561 if (acl_len > 0) {
562 a_size = ocfs2_xattr_entry_real_size(0, acl_len);
563 if (S_ISDIR(mode))
564 a_size <<= 1;
565 } else if (acl_len != 0 && acl_len != -ENODATA) {
566 mlog_errno(ret);
567 return ret;
571 if (!(s_size + a_size))
572 return ret;
575 * The max space of security xattr taken inline is
576 * 256(name) + 80(value) + 16(entry) = 352 bytes,
577 * The max space of acl xattr taken inline is
578 * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
579 * when blocksize = 512, may reserve one more cluser for
580 * xattr bucket, otherwise reserve one metadata block
581 * for them is ok.
582 * If this is a new directory with inline data,
583 * we choose to reserve the entire inline area for
584 * directory contents and force an external xattr block.
586 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
587 (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
588 (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
589 *want_meta = *want_meta + 1;
590 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
593 if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
594 (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
595 *want_clusters += 1;
596 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
600 * reserve credits and clusters for xattrs which has large value
601 * and have to be set outside
603 if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
604 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
605 si->value_len);
606 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
607 new_clusters);
608 *want_clusters += new_clusters;
610 if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
611 acl_len > OCFS2_XATTR_INLINE_SIZE) {
612 /* for directory, it has DEFAULT and ACCESS two types of acls */
613 new_clusters = (S_ISDIR(mode) ? 2 : 1) *
614 ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
615 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
616 new_clusters);
617 *want_clusters += new_clusters;
620 return ret;
623 static int ocfs2_xattr_extend_allocation(struct inode *inode,
624 u32 clusters_to_add,
625 struct ocfs2_xattr_value_buf *vb,
626 struct ocfs2_xattr_set_ctxt *ctxt)
628 int status = 0;
629 handle_t *handle = ctxt->handle;
630 enum ocfs2_alloc_restarted why;
631 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
632 struct ocfs2_extent_tree et;
634 mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
636 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
638 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
639 OCFS2_JOURNAL_ACCESS_WRITE);
640 if (status < 0) {
641 mlog_errno(status);
642 goto leave;
645 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
646 status = ocfs2_add_clusters_in_btree(handle,
647 &et,
648 &logical_start,
649 clusters_to_add,
651 ctxt->data_ac,
652 ctxt->meta_ac,
653 &why);
654 if (status < 0) {
655 mlog_errno(status);
656 goto leave;
659 status = ocfs2_journal_dirty(handle, vb->vb_bh);
660 if (status < 0) {
661 mlog_errno(status);
662 goto leave;
665 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
668 * We should have already allocated enough space before the transaction,
669 * so no need to restart.
671 BUG_ON(why != RESTART_NONE || clusters_to_add);
673 leave:
675 return status;
678 static int __ocfs2_remove_xattr_range(struct inode *inode,
679 struct ocfs2_xattr_value_buf *vb,
680 u32 cpos, u32 phys_cpos, u32 len,
681 unsigned int ext_flags,
682 struct ocfs2_xattr_set_ctxt *ctxt)
684 int ret;
685 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
686 handle_t *handle = ctxt->handle;
687 struct ocfs2_extent_tree et;
689 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
691 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
692 OCFS2_JOURNAL_ACCESS_WRITE);
693 if (ret) {
694 mlog_errno(ret);
695 goto out;
698 ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
699 &ctxt->dealloc);
700 if (ret) {
701 mlog_errno(ret);
702 goto out;
705 le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
707 ret = ocfs2_journal_dirty(handle, vb->vb_bh);
708 if (ret) {
709 mlog_errno(ret);
710 goto out;
713 if (ext_flags & OCFS2_EXT_REFCOUNTED)
714 ret = ocfs2_decrease_refcount(inode, handle,
715 ocfs2_blocks_to_clusters(inode->i_sb,
716 phys_blkno),
717 len, ctxt->meta_ac, &ctxt->dealloc, 1);
718 else
719 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
720 phys_blkno, len);
721 if (ret)
722 mlog_errno(ret);
724 out:
725 return ret;
728 static int ocfs2_xattr_shrink_size(struct inode *inode,
729 u32 old_clusters,
730 u32 new_clusters,
731 struct ocfs2_xattr_value_buf *vb,
732 struct ocfs2_xattr_set_ctxt *ctxt)
734 int ret = 0;
735 unsigned int ext_flags;
736 u32 trunc_len, cpos, phys_cpos, alloc_size;
737 u64 block;
739 if (old_clusters <= new_clusters)
740 return 0;
742 cpos = new_clusters;
743 trunc_len = old_clusters - new_clusters;
744 while (trunc_len) {
745 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
746 &alloc_size,
747 &vb->vb_xv->xr_list, &ext_flags);
748 if (ret) {
749 mlog_errno(ret);
750 goto out;
753 if (alloc_size > trunc_len)
754 alloc_size = trunc_len;
756 ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
757 phys_cpos, alloc_size,
758 ext_flags, ctxt);
759 if (ret) {
760 mlog_errno(ret);
761 goto out;
764 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
765 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
766 block, alloc_size);
767 cpos += alloc_size;
768 trunc_len -= alloc_size;
771 out:
772 return ret;
775 static int ocfs2_xattr_value_truncate(struct inode *inode,
776 struct ocfs2_xattr_value_buf *vb,
777 int len,
778 struct ocfs2_xattr_set_ctxt *ctxt)
780 int ret;
781 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
782 u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
784 if (new_clusters == old_clusters)
785 return 0;
787 if (new_clusters > old_clusters)
788 ret = ocfs2_xattr_extend_allocation(inode,
789 new_clusters - old_clusters,
790 vb, ctxt);
791 else
792 ret = ocfs2_xattr_shrink_size(inode,
793 old_clusters, new_clusters,
794 vb, ctxt);
796 return ret;
799 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
800 size_t *result, const char *prefix,
801 const char *name, int name_len)
803 char *p = buffer + *result;
804 int prefix_len = strlen(prefix);
805 int total_len = prefix_len + name_len + 1;
807 *result += total_len;
809 /* we are just looking for how big our buffer needs to be */
810 if (!size)
811 return 0;
813 if (*result > size)
814 return -ERANGE;
816 memcpy(p, prefix, prefix_len);
817 memcpy(p + prefix_len, name, name_len);
818 p[prefix_len + name_len] = '\0';
820 return 0;
823 static int ocfs2_xattr_list_entries(struct inode *inode,
824 struct ocfs2_xattr_header *header,
825 char *buffer, size_t buffer_size)
827 size_t result = 0;
828 int i, type, ret;
829 const char *prefix, *name;
831 for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
832 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
833 type = ocfs2_xattr_get_type(entry);
834 prefix = ocfs2_xattr_prefix(type);
836 if (prefix) {
837 name = (const char *)header +
838 le16_to_cpu(entry->xe_name_offset);
840 ret = ocfs2_xattr_list_entry(buffer, buffer_size,
841 &result, prefix, name,
842 entry->xe_name_len);
843 if (ret)
844 return ret;
848 return result;
851 int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
852 struct ocfs2_dinode *di)
854 struct ocfs2_xattr_header *xh;
855 int i;
857 xh = (struct ocfs2_xattr_header *)
858 ((void *)di + inode->i_sb->s_blocksize -
859 le16_to_cpu(di->i_xattr_inline_size));
861 for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
862 if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
863 return 1;
865 return 0;
868 static int ocfs2_xattr_ibody_list(struct inode *inode,
869 struct ocfs2_dinode *di,
870 char *buffer,
871 size_t buffer_size)
873 struct ocfs2_xattr_header *header = NULL;
874 struct ocfs2_inode_info *oi = OCFS2_I(inode);
875 int ret = 0;
877 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
878 return ret;
880 header = (struct ocfs2_xattr_header *)
881 ((void *)di + inode->i_sb->s_blocksize -
882 le16_to_cpu(di->i_xattr_inline_size));
884 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
886 return ret;
889 static int ocfs2_xattr_block_list(struct inode *inode,
890 struct ocfs2_dinode *di,
891 char *buffer,
892 size_t buffer_size)
894 struct buffer_head *blk_bh = NULL;
895 struct ocfs2_xattr_block *xb;
896 int ret = 0;
898 if (!di->i_xattr_loc)
899 return ret;
901 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
902 &blk_bh);
903 if (ret < 0) {
904 mlog_errno(ret);
905 return ret;
908 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
909 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
910 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
911 ret = ocfs2_xattr_list_entries(inode, header,
912 buffer, buffer_size);
913 } else
914 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
915 buffer, buffer_size);
917 brelse(blk_bh);
919 return ret;
922 ssize_t ocfs2_listxattr(struct dentry *dentry,
923 char *buffer,
924 size_t size)
926 int ret = 0, i_ret = 0, b_ret = 0;
927 struct buffer_head *di_bh = NULL;
928 struct ocfs2_dinode *di = NULL;
929 struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
931 if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
932 return -EOPNOTSUPP;
934 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
935 return ret;
937 ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
938 if (ret < 0) {
939 mlog_errno(ret);
940 return ret;
943 di = (struct ocfs2_dinode *)di_bh->b_data;
945 down_read(&oi->ip_xattr_sem);
946 i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
947 if (i_ret < 0)
948 b_ret = 0;
949 else {
950 if (buffer) {
951 buffer += i_ret;
952 size -= i_ret;
954 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
955 buffer, size);
956 if (b_ret < 0)
957 i_ret = 0;
959 up_read(&oi->ip_xattr_sem);
960 ocfs2_inode_unlock(dentry->d_inode, 0);
962 brelse(di_bh);
964 return i_ret + b_ret;
967 static int ocfs2_xattr_find_entry(int name_index,
968 const char *name,
969 struct ocfs2_xattr_search *xs)
971 struct ocfs2_xattr_entry *entry;
972 size_t name_len;
973 int i, cmp = 1;
975 if (name == NULL)
976 return -EINVAL;
978 name_len = strlen(name);
979 entry = xs->here;
980 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
981 cmp = name_index - ocfs2_xattr_get_type(entry);
982 if (!cmp)
983 cmp = name_len - entry->xe_name_len;
984 if (!cmp)
985 cmp = memcmp(name, (xs->base +
986 le16_to_cpu(entry->xe_name_offset)),
987 name_len);
988 if (cmp == 0)
989 break;
990 entry += 1;
992 xs->here = entry;
994 return cmp ? -ENODATA : 0;
997 static int ocfs2_xattr_get_value_outside(struct inode *inode,
998 struct ocfs2_xattr_value_root *xv,
999 void *buffer,
1000 size_t len)
1002 u32 cpos, p_cluster, num_clusters, bpc, clusters;
1003 u64 blkno;
1004 int i, ret = 0;
1005 size_t cplen, blocksize;
1006 struct buffer_head *bh = NULL;
1007 struct ocfs2_extent_list *el;
1009 el = &xv->xr_list;
1010 clusters = le32_to_cpu(xv->xr_clusters);
1011 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1012 blocksize = inode->i_sb->s_blocksize;
1014 cpos = 0;
1015 while (cpos < clusters) {
1016 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1017 &num_clusters, el, NULL);
1018 if (ret) {
1019 mlog_errno(ret);
1020 goto out;
1023 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1024 /* Copy ocfs2_xattr_value */
1025 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1026 ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1027 &bh, NULL);
1028 if (ret) {
1029 mlog_errno(ret);
1030 goto out;
1033 cplen = len >= blocksize ? blocksize : len;
1034 memcpy(buffer, bh->b_data, cplen);
1035 len -= cplen;
1036 buffer += cplen;
1038 brelse(bh);
1039 bh = NULL;
1040 if (len == 0)
1041 break;
1043 cpos += num_clusters;
1045 out:
1046 return ret;
1049 static int ocfs2_xattr_ibody_get(struct inode *inode,
1050 int name_index,
1051 const char *name,
1052 void *buffer,
1053 size_t buffer_size,
1054 struct ocfs2_xattr_search *xs)
1056 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1057 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1058 struct ocfs2_xattr_value_root *xv;
1059 size_t size;
1060 int ret = 0;
1062 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1063 return -ENODATA;
1065 xs->end = (void *)di + inode->i_sb->s_blocksize;
1066 xs->header = (struct ocfs2_xattr_header *)
1067 (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1068 xs->base = (void *)xs->header;
1069 xs->here = xs->header->xh_entries;
1071 ret = ocfs2_xattr_find_entry(name_index, name, xs);
1072 if (ret)
1073 return ret;
1074 size = le64_to_cpu(xs->here->xe_value_size);
1075 if (buffer) {
1076 if (size > buffer_size)
1077 return -ERANGE;
1078 if (ocfs2_xattr_is_local(xs->here)) {
1079 memcpy(buffer, (void *)xs->base +
1080 le16_to_cpu(xs->here->xe_name_offset) +
1081 OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1082 } else {
1083 xv = (struct ocfs2_xattr_value_root *)
1084 (xs->base + le16_to_cpu(
1085 xs->here->xe_name_offset) +
1086 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1087 ret = ocfs2_xattr_get_value_outside(inode, xv,
1088 buffer, size);
1089 if (ret < 0) {
1090 mlog_errno(ret);
1091 return ret;
1096 return size;
1099 static int ocfs2_xattr_block_get(struct inode *inode,
1100 int name_index,
1101 const char *name,
1102 void *buffer,
1103 size_t buffer_size,
1104 struct ocfs2_xattr_search *xs)
1106 struct ocfs2_xattr_block *xb;
1107 struct ocfs2_xattr_value_root *xv;
1108 size_t size;
1109 int ret = -ENODATA, name_offset, name_len, i;
1110 int uninitialized_var(block_off);
1112 xs->bucket = ocfs2_xattr_bucket_new(inode);
1113 if (!xs->bucket) {
1114 ret = -ENOMEM;
1115 mlog_errno(ret);
1116 goto cleanup;
1119 ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1120 if (ret) {
1121 mlog_errno(ret);
1122 goto cleanup;
1125 if (xs->not_found) {
1126 ret = -ENODATA;
1127 goto cleanup;
1130 xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1131 size = le64_to_cpu(xs->here->xe_value_size);
1132 if (buffer) {
1133 ret = -ERANGE;
1134 if (size > buffer_size)
1135 goto cleanup;
1137 name_offset = le16_to_cpu(xs->here->xe_name_offset);
1138 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1139 i = xs->here - xs->header->xh_entries;
1141 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1142 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1143 bucket_xh(xs->bucket),
1145 &block_off,
1146 &name_offset);
1147 xs->base = bucket_block(xs->bucket, block_off);
1149 if (ocfs2_xattr_is_local(xs->here)) {
1150 memcpy(buffer, (void *)xs->base +
1151 name_offset + name_len, size);
1152 } else {
1153 xv = (struct ocfs2_xattr_value_root *)
1154 (xs->base + name_offset + name_len);
1155 ret = ocfs2_xattr_get_value_outside(inode, xv,
1156 buffer, size);
1157 if (ret < 0) {
1158 mlog_errno(ret);
1159 goto cleanup;
1163 ret = size;
1164 cleanup:
1165 ocfs2_xattr_bucket_free(xs->bucket);
1167 brelse(xs->xattr_bh);
1168 xs->xattr_bh = NULL;
1169 return ret;
1172 int ocfs2_xattr_get_nolock(struct inode *inode,
1173 struct buffer_head *di_bh,
1174 int name_index,
1175 const char *name,
1176 void *buffer,
1177 size_t buffer_size)
1179 int ret;
1180 struct ocfs2_dinode *di = NULL;
1181 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1182 struct ocfs2_xattr_search xis = {
1183 .not_found = -ENODATA,
1185 struct ocfs2_xattr_search xbs = {
1186 .not_found = -ENODATA,
1189 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1190 return -EOPNOTSUPP;
1192 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1193 ret = -ENODATA;
1195 xis.inode_bh = xbs.inode_bh = di_bh;
1196 di = (struct ocfs2_dinode *)di_bh->b_data;
1198 down_read(&oi->ip_xattr_sem);
1199 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1200 buffer_size, &xis);
1201 if (ret == -ENODATA && di->i_xattr_loc)
1202 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1203 buffer_size, &xbs);
1204 up_read(&oi->ip_xattr_sem);
1206 return ret;
1209 /* ocfs2_xattr_get()
1211 * Copy an extended attribute into the buffer provided.
1212 * Buffer is NULL to compute the size of buffer required.
1214 static int ocfs2_xattr_get(struct inode *inode,
1215 int name_index,
1216 const char *name,
1217 void *buffer,
1218 size_t buffer_size)
1220 int ret;
1221 struct buffer_head *di_bh = NULL;
1223 ret = ocfs2_inode_lock(inode, &di_bh, 0);
1224 if (ret < 0) {
1225 mlog_errno(ret);
1226 return ret;
1228 ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1229 name, buffer, buffer_size);
1231 ocfs2_inode_unlock(inode, 0);
1233 brelse(di_bh);
1235 return ret;
1238 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1239 handle_t *handle,
1240 struct ocfs2_xattr_value_buf *vb,
1241 const void *value,
1242 int value_len)
1244 int ret = 0, i, cp_len;
1245 u16 blocksize = inode->i_sb->s_blocksize;
1246 u32 p_cluster, num_clusters;
1247 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1248 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1249 u64 blkno;
1250 struct buffer_head *bh = NULL;
1251 unsigned int ext_flags;
1252 struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1254 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1256 while (cpos < clusters) {
1257 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1258 &num_clusters, &xv->xr_list,
1259 &ext_flags);
1260 if (ret) {
1261 mlog_errno(ret);
1262 goto out;
1265 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1267 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1269 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1270 ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1271 &bh, NULL);
1272 if (ret) {
1273 mlog_errno(ret);
1274 goto out;
1277 ret = ocfs2_journal_access(handle,
1278 INODE_CACHE(inode),
1280 OCFS2_JOURNAL_ACCESS_WRITE);
1281 if (ret < 0) {
1282 mlog_errno(ret);
1283 goto out;
1286 cp_len = value_len > blocksize ? blocksize : value_len;
1287 memcpy(bh->b_data, value, cp_len);
1288 value_len -= cp_len;
1289 value += cp_len;
1290 if (cp_len < blocksize)
1291 memset(bh->b_data + cp_len, 0,
1292 blocksize - cp_len);
1294 ret = ocfs2_journal_dirty(handle, bh);
1295 if (ret < 0) {
1296 mlog_errno(ret);
1297 goto out;
1299 brelse(bh);
1300 bh = NULL;
1303 * XXX: do we need to empty all the following
1304 * blocks in this cluster?
1306 if (!value_len)
1307 break;
1309 cpos += num_clusters;
1311 out:
1312 brelse(bh);
1314 return ret;
1317 static int ocfs2_xattr_cleanup(struct inode *inode,
1318 handle_t *handle,
1319 struct ocfs2_xattr_info *xi,
1320 struct ocfs2_xattr_search *xs,
1321 struct ocfs2_xattr_value_buf *vb,
1322 size_t offs)
1324 int ret = 0;
1325 size_t name_len = strlen(xi->name);
1326 void *val = xs->base + offs;
1327 size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1329 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1330 OCFS2_JOURNAL_ACCESS_WRITE);
1331 if (ret) {
1332 mlog_errno(ret);
1333 goto out;
1335 /* Decrease xattr count */
1336 le16_add_cpu(&xs->header->xh_count, -1);
1337 /* Remove the xattr entry and tree root which has already be set*/
1338 memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
1339 memset(val, 0, size);
1341 ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1342 if (ret < 0)
1343 mlog_errno(ret);
1344 out:
1345 return ret;
1348 static int ocfs2_xattr_update_entry(struct inode *inode,
1349 handle_t *handle,
1350 struct ocfs2_xattr_info *xi,
1351 struct ocfs2_xattr_search *xs,
1352 struct ocfs2_xattr_value_buf *vb,
1353 size_t offs)
1355 int ret;
1357 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1358 OCFS2_JOURNAL_ACCESS_WRITE);
1359 if (ret) {
1360 mlog_errno(ret);
1361 goto out;
1364 xs->here->xe_name_offset = cpu_to_le16(offs);
1365 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1366 if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE)
1367 ocfs2_xattr_set_local(xs->here, 1);
1368 else
1369 ocfs2_xattr_set_local(xs->here, 0);
1370 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1372 ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1373 if (ret < 0)
1374 mlog_errno(ret);
1375 out:
1376 return ret;
1380 * ocfs2_xattr_set_value_outside()
1382 * Set large size value in B tree.
1384 static int ocfs2_xattr_set_value_outside(struct inode *inode,
1385 struct ocfs2_xattr_info *xi,
1386 struct ocfs2_xattr_search *xs,
1387 struct ocfs2_xattr_set_ctxt *ctxt,
1388 struct ocfs2_xattr_value_buf *vb,
1389 size_t offs)
1391 size_t name_len = strlen(xi->name);
1392 void *val = xs->base + offs;
1393 struct ocfs2_xattr_value_root *xv = NULL;
1394 size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1395 int ret = 0;
1397 memset(val, 0, size);
1398 memcpy(val, xi->name, name_len);
1399 xv = (struct ocfs2_xattr_value_root *)
1400 (val + OCFS2_XATTR_SIZE(name_len));
1401 xv->xr_clusters = 0;
1402 xv->xr_last_eb_blk = 0;
1403 xv->xr_list.l_tree_depth = 0;
1404 xv->xr_list.l_count = cpu_to_le16(1);
1405 xv->xr_list.l_next_free_rec = 0;
1406 vb->vb_xv = xv;
1408 ret = ocfs2_xattr_value_truncate(inode, vb, xi->value_len, ctxt);
1409 if (ret < 0) {
1410 mlog_errno(ret);
1411 return ret;
1413 ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, vb, offs);
1414 if (ret < 0) {
1415 mlog_errno(ret);
1416 return ret;
1418 ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb,
1419 xi->value, xi->value_len);
1420 if (ret < 0)
1421 mlog_errno(ret);
1423 return ret;
1427 * ocfs2_xattr_set_entry_local()
1429 * Set, replace or remove extended attribute in local.
1431 static void ocfs2_xattr_set_entry_local(struct inode *inode,
1432 struct ocfs2_xattr_info *xi,
1433 struct ocfs2_xattr_search *xs,
1434 struct ocfs2_xattr_entry *last,
1435 size_t min_offs)
1437 size_t name_len = strlen(xi->name);
1438 int i;
1440 if (xi->value && xs->not_found) {
1441 /* Insert the new xattr entry. */
1442 le16_add_cpu(&xs->header->xh_count, 1);
1443 ocfs2_xattr_set_type(last, xi->name_index);
1444 ocfs2_xattr_set_local(last, 1);
1445 last->xe_name_len = name_len;
1446 } else {
1447 void *first_val;
1448 void *val;
1449 size_t offs, size;
1451 first_val = xs->base + min_offs;
1452 offs = le16_to_cpu(xs->here->xe_name_offset);
1453 val = xs->base + offs;
1455 if (le64_to_cpu(xs->here->xe_value_size) >
1456 OCFS2_XATTR_INLINE_SIZE)
1457 size = OCFS2_XATTR_SIZE(name_len) +
1458 OCFS2_XATTR_ROOT_SIZE;
1459 else
1460 size = OCFS2_XATTR_SIZE(name_len) +
1461 OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1463 if (xi->value && size == OCFS2_XATTR_SIZE(name_len) +
1464 OCFS2_XATTR_SIZE(xi->value_len)) {
1465 /* The old and the new value have the
1466 same size. Just replace the value. */
1467 ocfs2_xattr_set_local(xs->here, 1);
1468 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1469 /* Clear value bytes. */
1470 memset(val + OCFS2_XATTR_SIZE(name_len),
1472 OCFS2_XATTR_SIZE(xi->value_len));
1473 memcpy(val + OCFS2_XATTR_SIZE(name_len),
1474 xi->value,
1475 xi->value_len);
1476 return;
1478 /* Remove the old name+value. */
1479 memmove(first_val + size, first_val, val - first_val);
1480 memset(first_val, 0, size);
1481 xs->here->xe_name_hash = 0;
1482 xs->here->xe_name_offset = 0;
1483 ocfs2_xattr_set_local(xs->here, 1);
1484 xs->here->xe_value_size = 0;
1486 min_offs += size;
1488 /* Adjust all value offsets. */
1489 last = xs->header->xh_entries;
1490 for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1491 size_t o = le16_to_cpu(last->xe_name_offset);
1493 if (o < offs)
1494 last->xe_name_offset = cpu_to_le16(o + size);
1495 last += 1;
1498 if (!xi->value) {
1499 /* Remove the old entry. */
1500 last -= 1;
1501 memmove(xs->here, xs->here + 1,
1502 (void *)last - (void *)xs->here);
1503 memset(last, 0, sizeof(struct ocfs2_xattr_entry));
1504 le16_add_cpu(&xs->header->xh_count, -1);
1507 if (xi->value) {
1508 /* Insert the new name+value. */
1509 size_t size = OCFS2_XATTR_SIZE(name_len) +
1510 OCFS2_XATTR_SIZE(xi->value_len);
1511 void *val = xs->base + min_offs - size;
1513 xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
1514 memset(val, 0, size);
1515 memcpy(val, xi->name, name_len);
1516 memcpy(val + OCFS2_XATTR_SIZE(name_len),
1517 xi->value,
1518 xi->value_len);
1519 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1520 ocfs2_xattr_set_local(xs->here, 1);
1521 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1524 return;
1528 * ocfs2_xattr_set_entry()
1530 * Set extended attribute entry into inode or block.
1532 * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
1533 * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
1534 * then set value in B tree with set_value_outside().
1536 static int ocfs2_xattr_set_entry(struct inode *inode,
1537 struct ocfs2_xattr_info *xi,
1538 struct ocfs2_xattr_search *xs,
1539 struct ocfs2_xattr_set_ctxt *ctxt,
1540 int flag)
1542 struct ocfs2_xattr_entry *last;
1543 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1544 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1545 size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1546 size_t size_l = 0;
1547 handle_t *handle = ctxt->handle;
1548 int free, i, ret;
1549 struct ocfs2_xattr_info xi_l = {
1550 .name_index = xi->name_index,
1551 .name = xi->name,
1552 .value = xi->value,
1553 .value_len = xi->value_len,
1555 struct ocfs2_xattr_value_buf vb = {
1556 .vb_bh = xs->xattr_bh,
1557 .vb_access = ocfs2_journal_access_di,
1560 if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1561 BUG_ON(xs->xattr_bh == xs->inode_bh);
1562 vb.vb_access = ocfs2_journal_access_xb;
1563 } else
1564 BUG_ON(xs->xattr_bh != xs->inode_bh);
1566 /* Compute min_offs, last and free space. */
1567 last = xs->header->xh_entries;
1569 for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1570 size_t offs = le16_to_cpu(last->xe_name_offset);
1571 if (offs < min_offs)
1572 min_offs = offs;
1573 last += 1;
1576 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
1577 if (free < 0)
1578 return -EIO;
1580 if (!xs->not_found) {
1581 size_t size = 0;
1582 if (ocfs2_xattr_is_local(xs->here))
1583 size = OCFS2_XATTR_SIZE(name_len) +
1584 OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1585 else
1586 size = OCFS2_XATTR_SIZE(name_len) +
1587 OCFS2_XATTR_ROOT_SIZE;
1588 free += (size + sizeof(struct ocfs2_xattr_entry));
1590 /* Check free space in inode or block */
1591 if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1592 if (free < sizeof(struct ocfs2_xattr_entry) +
1593 OCFS2_XATTR_SIZE(name_len) +
1594 OCFS2_XATTR_ROOT_SIZE) {
1595 ret = -ENOSPC;
1596 goto out;
1598 size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1599 xi_l.value = (void *)&def_xv;
1600 xi_l.value_len = OCFS2_XATTR_ROOT_SIZE;
1601 } else if (xi->value) {
1602 if (free < sizeof(struct ocfs2_xattr_entry) +
1603 OCFS2_XATTR_SIZE(name_len) +
1604 OCFS2_XATTR_SIZE(xi->value_len)) {
1605 ret = -ENOSPC;
1606 goto out;
1610 if (!xs->not_found) {
1611 /* For existing extended attribute */
1612 size_t size = OCFS2_XATTR_SIZE(name_len) +
1613 OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1614 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1615 void *val = xs->base + offs;
1617 if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1618 /* Replace existing local xattr with tree root */
1619 ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1620 ctxt, &vb, offs);
1621 if (ret < 0)
1622 mlog_errno(ret);
1623 goto out;
1624 } else if (!ocfs2_xattr_is_local(xs->here)) {
1625 /* For existing xattr which has value outside */
1626 vb.vb_xv = (struct ocfs2_xattr_value_root *)
1627 (val + OCFS2_XATTR_SIZE(name_len));
1629 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1631 * If new value need set outside also,
1632 * first truncate old value to new value,
1633 * then set new value with set_value_outside().
1635 ret = ocfs2_xattr_value_truncate(inode,
1636 &vb,
1637 xi->value_len,
1638 ctxt);
1639 if (ret < 0) {
1640 mlog_errno(ret);
1641 goto out;
1644 ret = ocfs2_xattr_update_entry(inode,
1645 handle,
1648 &vb,
1649 offs);
1650 if (ret < 0) {
1651 mlog_errno(ret);
1652 goto out;
1655 ret = __ocfs2_xattr_set_value_outside(inode,
1656 handle,
1657 &vb,
1658 xi->value,
1659 xi->value_len);
1660 if (ret < 0)
1661 mlog_errno(ret);
1662 goto out;
1663 } else {
1665 * If new value need set in local,
1666 * just trucate old value to zero.
1668 ret = ocfs2_xattr_value_truncate(inode,
1669 &vb,
1671 ctxt);
1672 if (ret < 0)
1673 mlog_errno(ret);
1678 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), xs->inode_bh,
1679 OCFS2_JOURNAL_ACCESS_WRITE);
1680 if (ret) {
1681 mlog_errno(ret);
1682 goto out;
1685 if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1686 ret = vb.vb_access(handle, INODE_CACHE(inode), vb.vb_bh,
1687 OCFS2_JOURNAL_ACCESS_WRITE);
1688 if (ret) {
1689 mlog_errno(ret);
1690 goto out;
1695 * Set value in local, include set tree root in local.
1696 * This is the first step for value size >INLINE_SIZE.
1698 ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);
1700 if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1701 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1702 if (ret < 0) {
1703 mlog_errno(ret);
1704 goto out;
1708 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
1709 (flag & OCFS2_INLINE_XATTR_FL)) {
1710 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1711 unsigned int xattrsize = osb->s_xattr_inline_size;
1714 * Adjust extent record count or inline data size
1715 * to reserve space for extended attribute.
1717 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1718 struct ocfs2_inline_data *idata = &di->id2.i_data;
1719 le16_add_cpu(&idata->id_count, -xattrsize);
1720 } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
1721 struct ocfs2_extent_list *el = &di->id2.i_list;
1722 le16_add_cpu(&el->l_count, -(xattrsize /
1723 sizeof(struct ocfs2_extent_rec)));
1725 di->i_xattr_inline_size = cpu_to_le16(xattrsize);
1727 /* Update xattr flag */
1728 spin_lock(&oi->ip_lock);
1729 oi->ip_dyn_features |= flag;
1730 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1731 spin_unlock(&oi->ip_lock);
1733 ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1734 if (ret < 0)
1735 mlog_errno(ret);
1737 if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1739 * Set value outside in B tree.
1740 * This is the second step for value size > INLINE_SIZE.
1742 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1743 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt,
1744 &vb, offs);
1745 if (ret < 0) {
1746 int ret2;
1748 mlog_errno(ret);
1750 * If set value outside failed, we have to clean
1751 * the junk tree root we have already set in local.
1753 ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
1754 xi, xs, &vb, offs);
1755 if (ret2 < 0)
1756 mlog_errno(ret2);
1759 out:
1760 return ret;
1764 * In xattr remove, if it is stored outside and refcounted, we may have
1765 * the chance to split the refcount tree. So need the allocators.
1767 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
1768 struct ocfs2_xattr_value_root *xv,
1769 struct ocfs2_caching_info *ref_ci,
1770 struct buffer_head *ref_root_bh,
1771 struct ocfs2_alloc_context **meta_ac,
1772 int *ref_credits)
1774 int ret, meta_add = 0;
1775 u32 p_cluster, num_clusters;
1776 unsigned int ext_flags;
1778 *ref_credits = 0;
1779 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
1780 &num_clusters,
1781 &xv->xr_list,
1782 &ext_flags);
1783 if (ret) {
1784 mlog_errno(ret);
1785 goto out;
1788 if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
1789 goto out;
1791 ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
1792 ref_root_bh, xv,
1793 &meta_add, ref_credits);
1794 if (ret) {
1795 mlog_errno(ret);
1796 goto out;
1799 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
1800 meta_add, meta_ac);
1801 if (ret)
1802 mlog_errno(ret);
1804 out:
1805 return ret;
1808 static int ocfs2_remove_value_outside(struct inode*inode,
1809 struct ocfs2_xattr_value_buf *vb,
1810 struct ocfs2_xattr_header *header,
1811 struct ocfs2_caching_info *ref_ci,
1812 struct buffer_head *ref_root_bh)
1814 int ret = 0, i, ref_credits;
1815 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1816 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
1817 void *val;
1819 ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
1821 for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1822 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1824 if (ocfs2_xattr_is_local(entry))
1825 continue;
1827 val = (void *)header +
1828 le16_to_cpu(entry->xe_name_offset);
1829 vb->vb_xv = (struct ocfs2_xattr_value_root *)
1830 (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1832 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
1833 ref_ci, ref_root_bh,
1834 &ctxt.meta_ac,
1835 &ref_credits);
1837 ctxt.handle = ocfs2_start_trans(osb, ref_credits +
1838 ocfs2_remove_extent_credits(osb->sb));
1839 if (IS_ERR(ctxt.handle)) {
1840 ret = PTR_ERR(ctxt.handle);
1841 mlog_errno(ret);
1842 break;
1845 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
1846 if (ret < 0) {
1847 mlog_errno(ret);
1848 break;
1851 ocfs2_commit_trans(osb, ctxt.handle);
1852 if (ctxt.meta_ac) {
1853 ocfs2_free_alloc_context(ctxt.meta_ac);
1854 ctxt.meta_ac = NULL;
1858 if (ctxt.meta_ac)
1859 ocfs2_free_alloc_context(ctxt.meta_ac);
1860 ocfs2_schedule_truncate_log_flush(osb, 1);
1861 ocfs2_run_deallocs(osb, &ctxt.dealloc);
1862 return ret;
1865 static int ocfs2_xattr_ibody_remove(struct inode *inode,
1866 struct buffer_head *di_bh,
1867 struct ocfs2_caching_info *ref_ci,
1868 struct buffer_head *ref_root_bh)
1871 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1872 struct ocfs2_xattr_header *header;
1873 int ret;
1874 struct ocfs2_xattr_value_buf vb = {
1875 .vb_bh = di_bh,
1876 .vb_access = ocfs2_journal_access_di,
1879 header = (struct ocfs2_xattr_header *)
1880 ((void *)di + inode->i_sb->s_blocksize -
1881 le16_to_cpu(di->i_xattr_inline_size));
1883 ret = ocfs2_remove_value_outside(inode, &vb, header,
1884 ref_ci, ref_root_bh);
1886 return ret;
1889 struct ocfs2_rm_xattr_bucket_para {
1890 struct ocfs2_caching_info *ref_ci;
1891 struct buffer_head *ref_root_bh;
1894 static int ocfs2_xattr_block_remove(struct inode *inode,
1895 struct buffer_head *blk_bh,
1896 struct ocfs2_caching_info *ref_ci,
1897 struct buffer_head *ref_root_bh)
1899 struct ocfs2_xattr_block *xb;
1900 int ret = 0;
1901 struct ocfs2_xattr_value_buf vb = {
1902 .vb_bh = blk_bh,
1903 .vb_access = ocfs2_journal_access_xb,
1905 struct ocfs2_rm_xattr_bucket_para args = {
1906 .ref_ci = ref_ci,
1907 .ref_root_bh = ref_root_bh,
1910 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1911 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1912 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1913 ret = ocfs2_remove_value_outside(inode, &vb, header,
1914 ref_ci, ref_root_bh);
1915 } else
1916 ret = ocfs2_iterate_xattr_index_block(inode,
1917 blk_bh,
1918 ocfs2_rm_xattr_cluster,
1919 &args);
1921 return ret;
1924 static int ocfs2_xattr_free_block(struct inode *inode,
1925 u64 block,
1926 struct ocfs2_caching_info *ref_ci,
1927 struct buffer_head *ref_root_bh)
1929 struct inode *xb_alloc_inode;
1930 struct buffer_head *xb_alloc_bh = NULL;
1931 struct buffer_head *blk_bh = NULL;
1932 struct ocfs2_xattr_block *xb;
1933 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1934 handle_t *handle;
1935 int ret = 0;
1936 u64 blk, bg_blkno;
1937 u16 bit;
1939 ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
1940 if (ret < 0) {
1941 mlog_errno(ret);
1942 goto out;
1945 ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
1946 if (ret < 0) {
1947 mlog_errno(ret);
1948 goto out;
1951 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1952 blk = le64_to_cpu(xb->xb_blkno);
1953 bit = le16_to_cpu(xb->xb_suballoc_bit);
1954 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1956 xb_alloc_inode = ocfs2_get_system_file_inode(osb,
1957 EXTENT_ALLOC_SYSTEM_INODE,
1958 le16_to_cpu(xb->xb_suballoc_slot));
1959 if (!xb_alloc_inode) {
1960 ret = -ENOMEM;
1961 mlog_errno(ret);
1962 goto out;
1964 mutex_lock(&xb_alloc_inode->i_mutex);
1966 ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
1967 if (ret < 0) {
1968 mlog_errno(ret);
1969 goto out_mutex;
1972 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
1973 if (IS_ERR(handle)) {
1974 ret = PTR_ERR(handle);
1975 mlog_errno(ret);
1976 goto out_unlock;
1979 ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
1980 bit, bg_blkno, 1);
1981 if (ret < 0)
1982 mlog_errno(ret);
1984 ocfs2_commit_trans(osb, handle);
1985 out_unlock:
1986 ocfs2_inode_unlock(xb_alloc_inode, 1);
1987 brelse(xb_alloc_bh);
1988 out_mutex:
1989 mutex_unlock(&xb_alloc_inode->i_mutex);
1990 iput(xb_alloc_inode);
1991 out:
1992 brelse(blk_bh);
1993 return ret;
1997 * ocfs2_xattr_remove()
1999 * Free extended attribute resources associated with this inode.
2001 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2003 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2004 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2005 struct ocfs2_refcount_tree *ref_tree = NULL;
2006 struct buffer_head *ref_root_bh = NULL;
2007 struct ocfs2_caching_info *ref_ci = NULL;
2008 handle_t *handle;
2009 int ret;
2011 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2012 return 0;
2014 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
2015 return 0;
2017 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
2018 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2019 le64_to_cpu(di->i_refcount_loc),
2020 1, &ref_tree, &ref_root_bh);
2021 if (ret) {
2022 mlog_errno(ret);
2023 goto out;
2025 ref_ci = &ref_tree->rf_ci;
2029 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2030 ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2031 ref_ci, ref_root_bh);
2032 if (ret < 0) {
2033 mlog_errno(ret);
2034 goto out;
2038 if (di->i_xattr_loc) {
2039 ret = ocfs2_xattr_free_block(inode,
2040 le64_to_cpu(di->i_xattr_loc),
2041 ref_ci, ref_root_bh);
2042 if (ret < 0) {
2043 mlog_errno(ret);
2044 goto out;
2048 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
2049 OCFS2_INODE_UPDATE_CREDITS);
2050 if (IS_ERR(handle)) {
2051 ret = PTR_ERR(handle);
2052 mlog_errno(ret);
2053 goto out;
2055 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2056 OCFS2_JOURNAL_ACCESS_WRITE);
2057 if (ret) {
2058 mlog_errno(ret);
2059 goto out_commit;
2062 di->i_xattr_loc = 0;
2064 spin_lock(&oi->ip_lock);
2065 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
2066 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2067 spin_unlock(&oi->ip_lock);
2069 ret = ocfs2_journal_dirty(handle, di_bh);
2070 if (ret < 0)
2071 mlog_errno(ret);
2072 out_commit:
2073 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2074 out:
2075 if (ref_tree)
2076 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2077 brelse(ref_root_bh);
2078 return ret;
2081 static int ocfs2_xattr_has_space_inline(struct inode *inode,
2082 struct ocfs2_dinode *di)
2084 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2085 unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2086 int free;
2088 if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
2089 return 0;
2091 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2092 struct ocfs2_inline_data *idata = &di->id2.i_data;
2093 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
2094 } else if (ocfs2_inode_is_fast_symlink(inode)) {
2095 free = ocfs2_fast_symlink_chars(inode->i_sb) -
2096 le64_to_cpu(di->i_size);
2097 } else {
2098 struct ocfs2_extent_list *el = &di->id2.i_list;
2099 free = (le16_to_cpu(el->l_count) -
2100 le16_to_cpu(el->l_next_free_rec)) *
2101 sizeof(struct ocfs2_extent_rec);
2103 if (free >= xattrsize)
2104 return 1;
2106 return 0;
2110 * ocfs2_xattr_ibody_find()
2112 * Find extended attribute in inode block and
2113 * fill search info into struct ocfs2_xattr_search.
2115 static int ocfs2_xattr_ibody_find(struct inode *inode,
2116 int name_index,
2117 const char *name,
2118 struct ocfs2_xattr_search *xs)
2120 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2121 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2122 int ret;
2123 int has_space = 0;
2125 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2126 return 0;
2128 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2129 down_read(&oi->ip_alloc_sem);
2130 has_space = ocfs2_xattr_has_space_inline(inode, di);
2131 up_read(&oi->ip_alloc_sem);
2132 if (!has_space)
2133 return 0;
2136 xs->xattr_bh = xs->inode_bh;
2137 xs->end = (void *)di + inode->i_sb->s_blocksize;
2138 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2139 xs->header = (struct ocfs2_xattr_header *)
2140 (xs->end - le16_to_cpu(di->i_xattr_inline_size));
2141 else
2142 xs->header = (struct ocfs2_xattr_header *)
2143 (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2144 xs->base = (void *)xs->header;
2145 xs->here = xs->header->xh_entries;
2147 /* Find the named attribute. */
2148 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2149 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2150 if (ret && ret != -ENODATA)
2151 return ret;
2152 xs->not_found = ret;
2155 return 0;
2159 * ocfs2_xattr_ibody_set()
2161 * Set, replace or remove an extended attribute into inode block.
2164 static int ocfs2_xattr_ibody_set(struct inode *inode,
2165 struct ocfs2_xattr_info *xi,
2166 struct ocfs2_xattr_search *xs,
2167 struct ocfs2_xattr_set_ctxt *ctxt)
2169 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2170 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2171 int ret;
2173 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2174 return -ENOSPC;
2176 down_write(&oi->ip_alloc_sem);
2177 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2178 if (!ocfs2_xattr_has_space_inline(inode, di)) {
2179 ret = -ENOSPC;
2180 goto out;
2184 ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2185 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
2186 out:
2187 up_write(&oi->ip_alloc_sem);
2189 return ret;
2193 * ocfs2_xattr_block_find()
2195 * Find extended attribute in external block and
2196 * fill search info into struct ocfs2_xattr_search.
2198 static int ocfs2_xattr_block_find(struct inode *inode,
2199 int name_index,
2200 const char *name,
2201 struct ocfs2_xattr_search *xs)
2203 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2204 struct buffer_head *blk_bh = NULL;
2205 struct ocfs2_xattr_block *xb;
2206 int ret = 0;
2208 if (!di->i_xattr_loc)
2209 return ret;
2211 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2212 &blk_bh);
2213 if (ret < 0) {
2214 mlog_errno(ret);
2215 return ret;
2218 xs->xattr_bh = blk_bh;
2219 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2221 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2222 xs->header = &xb->xb_attrs.xb_header;
2223 xs->base = (void *)xs->header;
2224 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2225 xs->here = xs->header->xh_entries;
2227 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2228 } else
2229 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2230 name_index,
2231 name, xs);
2233 if (ret && ret != -ENODATA) {
2234 xs->xattr_bh = NULL;
2235 goto cleanup;
2237 xs->not_found = ret;
2238 return 0;
2239 cleanup:
2240 brelse(blk_bh);
2242 return ret;
2245 static int ocfs2_create_xattr_block(handle_t *handle,
2246 struct inode *inode,
2247 struct buffer_head *inode_bh,
2248 struct ocfs2_alloc_context *meta_ac,
2249 struct buffer_head **ret_bh,
2250 int indexed)
2252 int ret;
2253 u16 suballoc_bit_start;
2254 u32 num_got;
2255 u64 first_blkno;
2256 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data;
2257 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2258 struct buffer_head *new_bh = NULL;
2259 struct ocfs2_xattr_block *xblk;
2261 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), inode_bh,
2262 OCFS2_JOURNAL_ACCESS_CREATE);
2263 if (ret < 0) {
2264 mlog_errno(ret);
2265 goto end;
2268 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
2269 &suballoc_bit_start, &num_got,
2270 &first_blkno);
2271 if (ret < 0) {
2272 mlog_errno(ret);
2273 goto end;
2276 new_bh = sb_getblk(inode->i_sb, first_blkno);
2277 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2279 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode),
2280 new_bh,
2281 OCFS2_JOURNAL_ACCESS_CREATE);
2282 if (ret < 0) {
2283 mlog_errno(ret);
2284 goto end;
2287 /* Initialize ocfs2_xattr_block */
2288 xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2289 memset(xblk, 0, inode->i_sb->s_blocksize);
2290 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2291 xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
2292 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2293 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2294 xblk->xb_blkno = cpu_to_le64(first_blkno);
2296 if (indexed) {
2297 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2298 xr->xt_clusters = cpu_to_le32(1);
2299 xr->xt_last_eb_blk = 0;
2300 xr->xt_list.l_tree_depth = 0;
2301 xr->xt_list.l_count = cpu_to_le16(
2302 ocfs2_xattr_recs_per_xb(inode->i_sb));
2303 xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2304 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2307 ret = ocfs2_journal_dirty(handle, new_bh);
2308 if (ret < 0) {
2309 mlog_errno(ret);
2310 goto end;
2312 di->i_xattr_loc = cpu_to_le64(first_blkno);
2313 ocfs2_journal_dirty(handle, inode_bh);
2315 *ret_bh = new_bh;
2316 new_bh = NULL;
2318 end:
2319 brelse(new_bh);
2320 return ret;
2324 * ocfs2_xattr_block_set()
2326 * Set, replace or remove an extended attribute into external block.
2329 static int ocfs2_xattr_block_set(struct inode *inode,
2330 struct ocfs2_xattr_info *xi,
2331 struct ocfs2_xattr_search *xs,
2332 struct ocfs2_xattr_set_ctxt *ctxt)
2334 struct buffer_head *new_bh = NULL;
2335 handle_t *handle = ctxt->handle;
2336 struct ocfs2_xattr_block *xblk = NULL;
2337 int ret;
2339 if (!xs->xattr_bh) {
2340 ret = ocfs2_create_xattr_block(handle, inode, xs->inode_bh,
2341 ctxt->meta_ac, &new_bh, 0);
2342 if (ret) {
2343 mlog_errno(ret);
2344 goto end;
2347 xs->xattr_bh = new_bh;
2348 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2349 xs->header = &xblk->xb_attrs.xb_header;
2350 xs->base = (void *)xs->header;
2351 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2352 xs->here = xs->header->xh_entries;
2353 } else
2354 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2356 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2357 /* Set extended attribute into external block */
2358 ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2359 OCFS2_HAS_XATTR_FL);
2360 if (!ret || ret != -ENOSPC)
2361 goto end;
2363 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2364 if (ret)
2365 goto end;
2368 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2370 end:
2372 return ret;
2375 /* Check whether the new xattr can be inserted into the inode. */
2376 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2377 struct ocfs2_xattr_info *xi,
2378 struct ocfs2_xattr_search *xs)
2380 u64 value_size;
2381 struct ocfs2_xattr_entry *last;
2382 int free, i;
2383 size_t min_offs = xs->end - xs->base;
2385 if (!xs->header)
2386 return 0;
2388 last = xs->header->xh_entries;
2390 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2391 size_t offs = le16_to_cpu(last->xe_name_offset);
2392 if (offs < min_offs)
2393 min_offs = offs;
2394 last += 1;
2397 free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
2398 if (free < 0)
2399 return 0;
2401 BUG_ON(!xs->not_found);
2403 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2404 value_size = OCFS2_XATTR_ROOT_SIZE;
2405 else
2406 value_size = OCFS2_XATTR_SIZE(xi->value_len);
2408 if (free >= sizeof(struct ocfs2_xattr_entry) +
2409 OCFS2_XATTR_SIZE(strlen(xi->name)) + value_size)
2410 return 1;
2412 return 0;
2415 static int ocfs2_calc_xattr_set_need(struct inode *inode,
2416 struct ocfs2_dinode *di,
2417 struct ocfs2_xattr_info *xi,
2418 struct ocfs2_xattr_search *xis,
2419 struct ocfs2_xattr_search *xbs,
2420 int *clusters_need,
2421 int *meta_need,
2422 int *credits_need)
2424 int ret = 0, old_in_xb = 0;
2425 int clusters_add = 0, meta_add = 0, credits = 0;
2426 struct buffer_head *bh = NULL;
2427 struct ocfs2_xattr_block *xb = NULL;
2428 struct ocfs2_xattr_entry *xe = NULL;
2429 struct ocfs2_xattr_value_root *xv = NULL;
2430 char *base = NULL;
2431 int name_offset, name_len = 0;
2432 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2433 xi->value_len);
2434 u64 value_size;
2437 * Calculate the clusters we need to write.
2438 * No matter whether we replace an old one or add a new one,
2439 * we need this for writing.
2441 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
2442 credits += new_clusters *
2443 ocfs2_clusters_to_blocks(inode->i_sb, 1);
2445 if (xis->not_found && xbs->not_found) {
2446 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2448 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2449 clusters_add += new_clusters;
2450 credits += ocfs2_calc_extend_credits(inode->i_sb,
2451 &def_xv.xv.xr_list,
2452 new_clusters);
2455 goto meta_guess;
2458 if (!xis->not_found) {
2459 xe = xis->here;
2460 name_offset = le16_to_cpu(xe->xe_name_offset);
2461 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2462 base = xis->base;
2463 credits += OCFS2_INODE_UPDATE_CREDITS;
2464 } else {
2465 int i, block_off = 0;
2466 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2467 xe = xbs->here;
2468 name_offset = le16_to_cpu(xe->xe_name_offset);
2469 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2470 i = xbs->here - xbs->header->xh_entries;
2471 old_in_xb = 1;
2473 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2474 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
2475 bucket_xh(xbs->bucket),
2476 i, &block_off,
2477 &name_offset);
2478 base = bucket_block(xbs->bucket, block_off);
2479 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2480 } else {
2481 base = xbs->base;
2482 credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
2487 * delete a xattr doesn't need metadata and cluster allocation.
2488 * so just calculate the credits and return.
2490 * The credits for removing the value tree will be extended
2491 * by ocfs2_remove_extent itself.
2493 if (!xi->value) {
2494 if (!ocfs2_xattr_is_local(xe))
2495 credits += ocfs2_remove_extent_credits(inode->i_sb);
2497 goto out;
2500 /* do cluster allocation guess first. */
2501 value_size = le64_to_cpu(xe->xe_value_size);
2503 if (old_in_xb) {
2505 * In xattr set, we always try to set the xe in inode first,
2506 * so if it can be inserted into inode successfully, the old
2507 * one will be removed from the xattr block, and this xattr
2508 * will be inserted into inode as a new xattr in inode.
2510 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
2511 clusters_add += new_clusters;
2512 credits += ocfs2_remove_extent_credits(inode->i_sb) +
2513 OCFS2_INODE_UPDATE_CREDITS;
2514 if (!ocfs2_xattr_is_local(xe))
2515 credits += ocfs2_calc_extend_credits(
2516 inode->i_sb,
2517 &def_xv.xv.xr_list,
2518 new_clusters);
2519 goto out;
2523 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
2524 /* the new values will be stored outside. */
2525 u32 old_clusters = 0;
2527 if (!ocfs2_xattr_is_local(xe)) {
2528 old_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2529 value_size);
2530 xv = (struct ocfs2_xattr_value_root *)
2531 (base + name_offset + name_len);
2532 value_size = OCFS2_XATTR_ROOT_SIZE;
2533 } else
2534 xv = &def_xv.xv;
2536 if (old_clusters >= new_clusters) {
2537 credits += ocfs2_remove_extent_credits(inode->i_sb);
2538 goto out;
2539 } else {
2540 meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
2541 clusters_add += new_clusters - old_clusters;
2542 credits += ocfs2_calc_extend_credits(inode->i_sb,
2543 &xv->xr_list,
2544 new_clusters -
2545 old_clusters);
2546 if (value_size >= OCFS2_XATTR_ROOT_SIZE)
2547 goto out;
2549 } else {
2551 * Now the new value will be stored inside. So if the new
2552 * value is smaller than the size of value root or the old
2553 * value, we don't need any allocation, otherwise we have
2554 * to guess metadata allocation.
2556 if ((ocfs2_xattr_is_local(xe) && value_size >= xi->value_len) ||
2557 (!ocfs2_xattr_is_local(xe) &&
2558 OCFS2_XATTR_ROOT_SIZE >= xi->value_len))
2559 goto out;
2562 meta_guess:
2563 /* calculate metadata allocation. */
2564 if (di->i_xattr_loc) {
2565 if (!xbs->xattr_bh) {
2566 ret = ocfs2_read_xattr_block(inode,
2567 le64_to_cpu(di->i_xattr_loc),
2568 &bh);
2569 if (ret) {
2570 mlog_errno(ret);
2571 goto out;
2574 xb = (struct ocfs2_xattr_block *)bh->b_data;
2575 } else
2576 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2579 * If there is already an xattr tree, good, we can calculate
2580 * like other b-trees. Otherwise we may have the chance of
2581 * create a tree, the credit calculation is borrowed from
2582 * ocfs2_calc_extend_credits with root_el = NULL. And the
2583 * new tree will be cluster based, so no meta is needed.
2585 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2586 struct ocfs2_extent_list *el =
2587 &xb->xb_attrs.xb_root.xt_list;
2588 meta_add += ocfs2_extend_meta_needed(el);
2589 credits += ocfs2_calc_extend_credits(inode->i_sb,
2590 el, 1);
2591 } else
2592 credits += OCFS2_SUBALLOC_ALLOC + 1;
2595 * This cluster will be used either for new bucket or for
2596 * new xattr block.
2597 * If the cluster size is the same as the bucket size, one
2598 * more is needed since we may need to extend the bucket
2599 * also.
2601 clusters_add += 1;
2602 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2603 if (OCFS2_XATTR_BUCKET_SIZE ==
2604 OCFS2_SB(inode->i_sb)->s_clustersize) {
2605 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2606 clusters_add += 1;
2608 } else {
2609 meta_add += 1;
2610 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
2612 out:
2613 if (clusters_need)
2614 *clusters_need = clusters_add;
2615 if (meta_need)
2616 *meta_need = meta_add;
2617 if (credits_need)
2618 *credits_need = credits;
2619 brelse(bh);
2620 return ret;
2623 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
2624 struct ocfs2_dinode *di,
2625 struct ocfs2_xattr_info *xi,
2626 struct ocfs2_xattr_search *xis,
2627 struct ocfs2_xattr_search *xbs,
2628 struct ocfs2_xattr_set_ctxt *ctxt,
2629 int extra_meta,
2630 int *credits)
2632 int clusters_add, meta_add, ret;
2633 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2635 memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
2637 ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
2639 ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
2640 &clusters_add, &meta_add, credits);
2641 if (ret) {
2642 mlog_errno(ret);
2643 return ret;
2646 meta_add += extra_meta;
2647 mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
2648 "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
2650 if (meta_add) {
2651 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
2652 &ctxt->meta_ac);
2653 if (ret) {
2654 mlog_errno(ret);
2655 goto out;
2659 if (clusters_add) {
2660 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
2661 if (ret)
2662 mlog_errno(ret);
2664 out:
2665 if (ret) {
2666 if (ctxt->meta_ac) {
2667 ocfs2_free_alloc_context(ctxt->meta_ac);
2668 ctxt->meta_ac = NULL;
2672 * We cannot have an error and a non null ctxt->data_ac.
2676 return ret;
2679 static int __ocfs2_xattr_set_handle(struct inode *inode,
2680 struct ocfs2_dinode *di,
2681 struct ocfs2_xattr_info *xi,
2682 struct ocfs2_xattr_search *xis,
2683 struct ocfs2_xattr_search *xbs,
2684 struct ocfs2_xattr_set_ctxt *ctxt)
2686 int ret = 0, credits, old_found;
2688 if (!xi->value) {
2689 /* Remove existing extended attribute */
2690 if (!xis->not_found)
2691 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2692 else if (!xbs->not_found)
2693 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2694 } else {
2695 /* We always try to set extended attribute into inode first*/
2696 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2697 if (!ret && !xbs->not_found) {
2699 * If succeed and that extended attribute existing in
2700 * external block, then we will remove it.
2702 xi->value = NULL;
2703 xi->value_len = 0;
2705 old_found = xis->not_found;
2706 xis->not_found = -ENODATA;
2707 ret = ocfs2_calc_xattr_set_need(inode,
2710 xis,
2711 xbs,
2712 NULL,
2713 NULL,
2714 &credits);
2715 xis->not_found = old_found;
2716 if (ret) {
2717 mlog_errno(ret);
2718 goto out;
2721 ret = ocfs2_extend_trans(ctxt->handle, credits +
2722 ctxt->handle->h_buffer_credits);
2723 if (ret) {
2724 mlog_errno(ret);
2725 goto out;
2727 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2728 } else if (ret == -ENOSPC) {
2729 if (di->i_xattr_loc && !xbs->xattr_bh) {
2730 ret = ocfs2_xattr_block_find(inode,
2731 xi->name_index,
2732 xi->name, xbs);
2733 if (ret)
2734 goto out;
2736 old_found = xis->not_found;
2737 xis->not_found = -ENODATA;
2738 ret = ocfs2_calc_xattr_set_need(inode,
2741 xis,
2742 xbs,
2743 NULL,
2744 NULL,
2745 &credits);
2746 xis->not_found = old_found;
2747 if (ret) {
2748 mlog_errno(ret);
2749 goto out;
2752 ret = ocfs2_extend_trans(ctxt->handle, credits +
2753 ctxt->handle->h_buffer_credits);
2754 if (ret) {
2755 mlog_errno(ret);
2756 goto out;
2760 * If no space in inode, we will set extended attribute
2761 * into external block.
2763 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2764 if (ret)
2765 goto out;
2766 if (!xis->not_found) {
2768 * If succeed and that extended attribute
2769 * existing in inode, we will remove it.
2771 xi->value = NULL;
2772 xi->value_len = 0;
2773 xbs->not_found = -ENODATA;
2774 ret = ocfs2_calc_xattr_set_need(inode,
2777 xis,
2778 xbs,
2779 NULL,
2780 NULL,
2781 &credits);
2782 if (ret) {
2783 mlog_errno(ret);
2784 goto out;
2787 ret = ocfs2_extend_trans(ctxt->handle, credits +
2788 ctxt->handle->h_buffer_credits);
2789 if (ret) {
2790 mlog_errno(ret);
2791 goto out;
2793 ret = ocfs2_xattr_ibody_set(inode, xi,
2794 xis, ctxt);
2799 if (!ret) {
2800 /* Update inode ctime. */
2801 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
2802 xis->inode_bh,
2803 OCFS2_JOURNAL_ACCESS_WRITE);
2804 if (ret) {
2805 mlog_errno(ret);
2806 goto out;
2809 inode->i_ctime = CURRENT_TIME;
2810 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
2811 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
2812 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
2814 out:
2815 return ret;
2819 * This function only called duing creating inode
2820 * for init security/acl xattrs of the new inode.
2821 * All transanction credits have been reserved in mknod.
2823 int ocfs2_xattr_set_handle(handle_t *handle,
2824 struct inode *inode,
2825 struct buffer_head *di_bh,
2826 int name_index,
2827 const char *name,
2828 const void *value,
2829 size_t value_len,
2830 int flags,
2831 struct ocfs2_alloc_context *meta_ac,
2832 struct ocfs2_alloc_context *data_ac)
2834 struct ocfs2_dinode *di;
2835 int ret;
2837 struct ocfs2_xattr_info xi = {
2838 .name_index = name_index,
2839 .name = name,
2840 .value = value,
2841 .value_len = value_len,
2844 struct ocfs2_xattr_search xis = {
2845 .not_found = -ENODATA,
2848 struct ocfs2_xattr_search xbs = {
2849 .not_found = -ENODATA,
2852 struct ocfs2_xattr_set_ctxt ctxt = {
2853 .handle = handle,
2854 .meta_ac = meta_ac,
2855 .data_ac = data_ac,
2858 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2859 return -EOPNOTSUPP;
2862 * In extreme situation, may need xattr bucket when
2863 * block size is too small. And we have already reserved
2864 * the credits for bucket in mknod.
2866 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
2867 xbs.bucket = ocfs2_xattr_bucket_new(inode);
2868 if (!xbs.bucket) {
2869 mlog_errno(-ENOMEM);
2870 return -ENOMEM;
2874 xis.inode_bh = xbs.inode_bh = di_bh;
2875 di = (struct ocfs2_dinode *)di_bh->b_data;
2877 down_write(&OCFS2_I(inode)->ip_xattr_sem);
2879 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2880 if (ret)
2881 goto cleanup;
2882 if (xis.not_found) {
2883 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2884 if (ret)
2885 goto cleanup;
2888 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
2890 cleanup:
2891 up_write(&OCFS2_I(inode)->ip_xattr_sem);
2892 brelse(xbs.xattr_bh);
2893 ocfs2_xattr_bucket_free(xbs.bucket);
2895 return ret;
2899 * ocfs2_xattr_set()
2901 * Set, replace or remove an extended attribute for this inode.
2902 * value is NULL to remove an existing extended attribute, else either
2903 * create or replace an extended attribute.
2905 int ocfs2_xattr_set(struct inode *inode,
2906 int name_index,
2907 const char *name,
2908 const void *value,
2909 size_t value_len,
2910 int flags)
2912 struct buffer_head *di_bh = NULL;
2913 struct ocfs2_dinode *di;
2914 int ret, credits, ref_meta = 0, ref_credits = 0;
2915 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2916 struct inode *tl_inode = osb->osb_tl_inode;
2917 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2918 struct ocfs2_refcount_tree *ref_tree = NULL;
2920 struct ocfs2_xattr_info xi = {
2921 .name_index = name_index,
2922 .name = name,
2923 .value = value,
2924 .value_len = value_len,
2927 struct ocfs2_xattr_search xis = {
2928 .not_found = -ENODATA,
2931 struct ocfs2_xattr_search xbs = {
2932 .not_found = -ENODATA,
2935 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2936 return -EOPNOTSUPP;
2939 * Only xbs will be used on indexed trees. xis doesn't need a
2940 * bucket.
2942 xbs.bucket = ocfs2_xattr_bucket_new(inode);
2943 if (!xbs.bucket) {
2944 mlog_errno(-ENOMEM);
2945 return -ENOMEM;
2948 ret = ocfs2_inode_lock(inode, &di_bh, 1);
2949 if (ret < 0) {
2950 mlog_errno(ret);
2951 goto cleanup_nolock;
2953 xis.inode_bh = xbs.inode_bh = di_bh;
2954 di = (struct ocfs2_dinode *)di_bh->b_data;
2956 down_write(&OCFS2_I(inode)->ip_xattr_sem);
2958 * Scan inode and external block to find the same name
2959 * extended attribute and collect search infomation.
2961 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2962 if (ret)
2963 goto cleanup;
2964 if (xis.not_found) {
2965 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2966 if (ret)
2967 goto cleanup;
2970 if (xis.not_found && xbs.not_found) {
2971 ret = -ENODATA;
2972 if (flags & XATTR_REPLACE)
2973 goto cleanup;
2974 ret = 0;
2975 if (!value)
2976 goto cleanup;
2977 } else {
2978 ret = -EEXIST;
2979 if (flags & XATTR_CREATE)
2980 goto cleanup;
2983 /* Check whether the value is refcounted and do some prepartion. */
2984 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
2985 (!xis.not_found || !xbs.not_found)) {
2986 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
2987 &xis, &xbs, &ref_tree,
2988 &ref_meta, &ref_credits);
2989 if (ret) {
2990 mlog_errno(ret);
2991 goto cleanup;
2995 mutex_lock(&tl_inode->i_mutex);
2997 if (ocfs2_truncate_log_needs_flush(osb)) {
2998 ret = __ocfs2_flush_truncate_log(osb);
2999 if (ret < 0) {
3000 mutex_unlock(&tl_inode->i_mutex);
3001 mlog_errno(ret);
3002 goto cleanup;
3005 mutex_unlock(&tl_inode->i_mutex);
3007 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
3008 &xbs, &ctxt, ref_meta, &credits);
3009 if (ret) {
3010 mlog_errno(ret);
3011 goto cleanup;
3014 /* we need to update inode's ctime field, so add credit for it. */
3015 credits += OCFS2_INODE_UPDATE_CREDITS;
3016 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
3017 if (IS_ERR(ctxt.handle)) {
3018 ret = PTR_ERR(ctxt.handle);
3019 mlog_errno(ret);
3020 goto cleanup;
3023 ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3025 ocfs2_commit_trans(osb, ctxt.handle);
3027 if (ctxt.data_ac)
3028 ocfs2_free_alloc_context(ctxt.data_ac);
3029 if (ctxt.meta_ac)
3030 ocfs2_free_alloc_context(ctxt.meta_ac);
3031 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
3032 ocfs2_schedule_truncate_log_flush(osb, 1);
3033 ocfs2_run_deallocs(osb, &ctxt.dealloc);
3035 cleanup:
3036 if (ref_tree)
3037 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3038 up_write(&OCFS2_I(inode)->ip_xattr_sem);
3039 if (!value && !ret) {
3040 ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3041 if (ret)
3042 mlog_errno(ret);
3044 ocfs2_inode_unlock(inode, 1);
3045 cleanup_nolock:
3046 brelse(di_bh);
3047 brelse(xbs.xattr_bh);
3048 ocfs2_xattr_bucket_free(xbs.bucket);
3050 return ret;
3054 * Find the xattr extent rec which may contains name_hash.
3055 * e_cpos will be the first name hash of the xattr rec.
3056 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
3058 static int ocfs2_xattr_get_rec(struct inode *inode,
3059 u32 name_hash,
3060 u64 *p_blkno,
3061 u32 *e_cpos,
3062 u32 *num_clusters,
3063 struct ocfs2_extent_list *el)
3065 int ret = 0, i;
3066 struct buffer_head *eb_bh = NULL;
3067 struct ocfs2_extent_block *eb;
3068 struct ocfs2_extent_rec *rec = NULL;
3069 u64 e_blkno = 0;
3071 if (el->l_tree_depth) {
3072 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3073 &eb_bh);
3074 if (ret) {
3075 mlog_errno(ret);
3076 goto out;
3079 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
3080 el = &eb->h_list;
3082 if (el->l_tree_depth) {
3083 ocfs2_error(inode->i_sb,
3084 "Inode %lu has non zero tree depth in "
3085 "xattr tree block %llu\n", inode->i_ino,
3086 (unsigned long long)eb_bh->b_blocknr);
3087 ret = -EROFS;
3088 goto out;
3092 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
3093 rec = &el->l_recs[i];
3095 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
3096 e_blkno = le64_to_cpu(rec->e_blkno);
3097 break;
3101 if (!e_blkno) {
3102 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
3103 "record (%u, %u, 0) in xattr", inode->i_ino,
3104 le32_to_cpu(rec->e_cpos),
3105 ocfs2_rec_clusters(el, rec));
3106 ret = -EROFS;
3107 goto out;
3110 *p_blkno = le64_to_cpu(rec->e_blkno);
3111 *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
3112 if (e_cpos)
3113 *e_cpos = le32_to_cpu(rec->e_cpos);
3114 out:
3115 brelse(eb_bh);
3116 return ret;
3119 typedef int (xattr_bucket_func)(struct inode *inode,
3120 struct ocfs2_xattr_bucket *bucket,
3121 void *para);
3123 static int ocfs2_find_xe_in_bucket(struct inode *inode,
3124 struct ocfs2_xattr_bucket *bucket,
3125 int name_index,
3126 const char *name,
3127 u32 name_hash,
3128 u16 *xe_index,
3129 int *found)
3131 int i, ret = 0, cmp = 1, block_off, new_offset;
3132 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3133 size_t name_len = strlen(name);
3134 struct ocfs2_xattr_entry *xe = NULL;
3135 char *xe_name;
3138 * We don't use binary search in the bucket because there
3139 * may be multiple entries with the same name hash.
3141 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3142 xe = &xh->xh_entries[i];
3144 if (name_hash > le32_to_cpu(xe->xe_name_hash))
3145 continue;
3146 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
3147 break;
3149 cmp = name_index - ocfs2_xattr_get_type(xe);
3150 if (!cmp)
3151 cmp = name_len - xe->xe_name_len;
3152 if (cmp)
3153 continue;
3155 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3158 &block_off,
3159 &new_offset);
3160 if (ret) {
3161 mlog_errno(ret);
3162 break;
3166 xe_name = bucket_block(bucket, block_off) + new_offset;
3167 if (!memcmp(name, xe_name, name_len)) {
3168 *xe_index = i;
3169 *found = 1;
3170 ret = 0;
3171 break;
3175 return ret;
3179 * Find the specified xattr entry in a series of buckets.
3180 * This series start from p_blkno and last for num_clusters.
3181 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3182 * the num of the valid buckets.
3184 * Return the buffer_head this xattr should reside in. And if the xattr's
3185 * hash is in the gap of 2 buckets, return the lower bucket.
3187 static int ocfs2_xattr_bucket_find(struct inode *inode,
3188 int name_index,
3189 const char *name,
3190 u32 name_hash,
3191 u64 p_blkno,
3192 u32 first_hash,
3193 u32 num_clusters,
3194 struct ocfs2_xattr_search *xs)
3196 int ret, found = 0;
3197 struct ocfs2_xattr_header *xh = NULL;
3198 struct ocfs2_xattr_entry *xe = NULL;
3199 u16 index = 0;
3200 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3201 int low_bucket = 0, bucket, high_bucket;
3202 struct ocfs2_xattr_bucket *search;
3203 u32 last_hash;
3204 u64 blkno, lower_blkno = 0;
3206 search = ocfs2_xattr_bucket_new(inode);
3207 if (!search) {
3208 ret = -ENOMEM;
3209 mlog_errno(ret);
3210 goto out;
3213 ret = ocfs2_read_xattr_bucket(search, p_blkno);
3214 if (ret) {
3215 mlog_errno(ret);
3216 goto out;
3219 xh = bucket_xh(search);
3220 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3221 while (low_bucket <= high_bucket) {
3222 ocfs2_xattr_bucket_relse(search);
3224 bucket = (low_bucket + high_bucket) / 2;
3225 blkno = p_blkno + bucket * blk_per_bucket;
3226 ret = ocfs2_read_xattr_bucket(search, blkno);
3227 if (ret) {
3228 mlog_errno(ret);
3229 goto out;
3232 xh = bucket_xh(search);
3233 xe = &xh->xh_entries[0];
3234 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3235 high_bucket = bucket - 1;
3236 continue;
3240 * Check whether the hash of the last entry in our
3241 * bucket is larger than the search one. for an empty
3242 * bucket, the last one is also the first one.
3244 if (xh->xh_count)
3245 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3247 last_hash = le32_to_cpu(xe->xe_name_hash);
3249 /* record lower_blkno which may be the insert place. */
3250 lower_blkno = blkno;
3252 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3253 low_bucket = bucket + 1;
3254 continue;
3257 /* the searched xattr should reside in this bucket if exists. */
3258 ret = ocfs2_find_xe_in_bucket(inode, search,
3259 name_index, name, name_hash,
3260 &index, &found);
3261 if (ret) {
3262 mlog_errno(ret);
3263 goto out;
3265 break;
3269 * Record the bucket we have found.
3270 * When the xattr's hash value is in the gap of 2 buckets, we will
3271 * always set it to the previous bucket.
3273 if (!lower_blkno)
3274 lower_blkno = p_blkno;
3276 /* This should be in cache - we just read it during the search */
3277 ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3278 if (ret) {
3279 mlog_errno(ret);
3280 goto out;
3283 xs->header = bucket_xh(xs->bucket);
3284 xs->base = bucket_block(xs->bucket, 0);
3285 xs->end = xs->base + inode->i_sb->s_blocksize;
3287 if (found) {
3288 xs->here = &xs->header->xh_entries[index];
3289 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
3290 (unsigned long long)bucket_blkno(xs->bucket), index);
3291 } else
3292 ret = -ENODATA;
3294 out:
3295 ocfs2_xattr_bucket_free(search);
3296 return ret;
3299 static int ocfs2_xattr_index_block_find(struct inode *inode,
3300 struct buffer_head *root_bh,
3301 int name_index,
3302 const char *name,
3303 struct ocfs2_xattr_search *xs)
3305 int ret;
3306 struct ocfs2_xattr_block *xb =
3307 (struct ocfs2_xattr_block *)root_bh->b_data;
3308 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3309 struct ocfs2_extent_list *el = &xb_root->xt_list;
3310 u64 p_blkno = 0;
3311 u32 first_hash, num_clusters = 0;
3312 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3314 if (le16_to_cpu(el->l_next_free_rec) == 0)
3315 return -ENODATA;
3317 mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
3318 name, name_hash, name_index);
3320 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3321 &num_clusters, el);
3322 if (ret) {
3323 mlog_errno(ret);
3324 goto out;
3327 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3329 mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
3330 "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
3331 first_hash);
3333 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3334 p_blkno, first_hash, num_clusters, xs);
3336 out:
3337 return ret;
3340 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3341 u64 blkno,
3342 u32 clusters,
3343 xattr_bucket_func *func,
3344 void *para)
3346 int i, ret = 0;
3347 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3348 u32 num_buckets = clusters * bpc;
3349 struct ocfs2_xattr_bucket *bucket;
3351 bucket = ocfs2_xattr_bucket_new(inode);
3352 if (!bucket) {
3353 mlog_errno(-ENOMEM);
3354 return -ENOMEM;
3357 mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
3358 clusters, (unsigned long long)blkno);
3360 for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3361 ret = ocfs2_read_xattr_bucket(bucket, blkno);
3362 if (ret) {
3363 mlog_errno(ret);
3364 break;
3368 * The real bucket num in this series of blocks is stored
3369 * in the 1st bucket.
3371 if (i == 0)
3372 num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3374 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
3375 (unsigned long long)blkno,
3376 le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3377 if (func) {
3378 ret = func(inode, bucket, para);
3379 if (ret && ret != -ERANGE)
3380 mlog_errno(ret);
3381 /* Fall through to bucket_relse() */
3384 ocfs2_xattr_bucket_relse(bucket);
3385 if (ret)
3386 break;
3389 ocfs2_xattr_bucket_free(bucket);
3390 return ret;
3393 struct ocfs2_xattr_tree_list {
3394 char *buffer;
3395 size_t buffer_size;
3396 size_t result;
3399 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
3400 struct ocfs2_xattr_header *xh,
3401 int index,
3402 int *block_off,
3403 int *new_offset)
3405 u16 name_offset;
3407 if (index < 0 || index >= le16_to_cpu(xh->xh_count))
3408 return -EINVAL;
3410 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
3412 *block_off = name_offset >> sb->s_blocksize_bits;
3413 *new_offset = name_offset % sb->s_blocksize;
3415 return 0;
3418 static int ocfs2_list_xattr_bucket(struct inode *inode,
3419 struct ocfs2_xattr_bucket *bucket,
3420 void *para)
3422 int ret = 0, type;
3423 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
3424 int i, block_off, new_offset;
3425 const char *prefix, *name;
3427 for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
3428 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
3429 type = ocfs2_xattr_get_type(entry);
3430 prefix = ocfs2_xattr_prefix(type);
3432 if (prefix) {
3433 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3434 bucket_xh(bucket),
3436 &block_off,
3437 &new_offset);
3438 if (ret)
3439 break;
3441 name = (const char *)bucket_block(bucket, block_off) +
3442 new_offset;
3443 ret = ocfs2_xattr_list_entry(xl->buffer,
3444 xl->buffer_size,
3445 &xl->result,
3446 prefix, name,
3447 entry->xe_name_len);
3448 if (ret)
3449 break;
3453 return ret;
3456 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
3457 struct buffer_head *blk_bh,
3458 xattr_tree_rec_func *rec_func,
3459 void *para)
3461 struct ocfs2_xattr_block *xb =
3462 (struct ocfs2_xattr_block *)blk_bh->b_data;
3463 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
3464 int ret = 0;
3465 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
3466 u64 p_blkno = 0;
3468 if (!el->l_next_free_rec || !rec_func)
3469 return 0;
3471 while (name_hash > 0) {
3472 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
3473 &e_cpos, &num_clusters, el);
3474 if (ret) {
3475 mlog_errno(ret);
3476 break;
3479 ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
3480 num_clusters, para);
3481 if (ret) {
3482 if (ret != -ERANGE)
3483 mlog_errno(ret);
3484 break;
3487 if (e_cpos == 0)
3488 break;
3490 name_hash = e_cpos - 1;
3493 return ret;
3497 static int ocfs2_list_xattr_tree_rec(struct inode *inode,
3498 struct buffer_head *root_bh,
3499 u64 blkno, u32 cpos, u32 len, void *para)
3501 return ocfs2_iterate_xattr_buckets(inode, blkno, len,
3502 ocfs2_list_xattr_bucket, para);
3505 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
3506 struct buffer_head *blk_bh,
3507 char *buffer,
3508 size_t buffer_size)
3510 int ret;
3511 struct ocfs2_xattr_tree_list xl = {
3512 .buffer = buffer,
3513 .buffer_size = buffer_size,
3514 .result = 0,
3517 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
3518 ocfs2_list_xattr_tree_rec, &xl);
3519 if (ret) {
3520 mlog_errno(ret);
3521 goto out;
3524 ret = xl.result;
3525 out:
3526 return ret;
3529 static int cmp_xe(const void *a, const void *b)
3531 const struct ocfs2_xattr_entry *l = a, *r = b;
3532 u32 l_hash = le32_to_cpu(l->xe_name_hash);
3533 u32 r_hash = le32_to_cpu(r->xe_name_hash);
3535 if (l_hash > r_hash)
3536 return 1;
3537 if (l_hash < r_hash)
3538 return -1;
3539 return 0;
3542 static void swap_xe(void *a, void *b, int size)
3544 struct ocfs2_xattr_entry *l = a, *r = b, tmp;
3546 tmp = *l;
3547 memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
3548 memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
3552 * When the ocfs2_xattr_block is filled up, new bucket will be created
3553 * and all the xattr entries will be moved to the new bucket.
3554 * The header goes at the start of the bucket, and the names+values are
3555 * filled from the end. This is why *target starts as the last buffer.
3556 * Note: we need to sort the entries since they are not saved in order
3557 * in the ocfs2_xattr_block.
3559 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
3560 struct buffer_head *xb_bh,
3561 struct ocfs2_xattr_bucket *bucket)
3563 int i, blocksize = inode->i_sb->s_blocksize;
3564 int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3565 u16 offset, size, off_change;
3566 struct ocfs2_xattr_entry *xe;
3567 struct ocfs2_xattr_block *xb =
3568 (struct ocfs2_xattr_block *)xb_bh->b_data;
3569 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
3570 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3571 u16 count = le16_to_cpu(xb_xh->xh_count);
3572 char *src = xb_bh->b_data;
3573 char *target = bucket_block(bucket, blks - 1);
3575 mlog(0, "cp xattr from block %llu to bucket %llu\n",
3576 (unsigned long long)xb_bh->b_blocknr,
3577 (unsigned long long)bucket_blkno(bucket));
3579 for (i = 0; i < blks; i++)
3580 memset(bucket_block(bucket, i), 0, blocksize);
3583 * Since the xe_name_offset is based on ocfs2_xattr_header,
3584 * there is a offset change corresponding to the change of
3585 * ocfs2_xattr_header's position.
3587 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3588 xe = &xb_xh->xh_entries[count - 1];
3589 offset = le16_to_cpu(xe->xe_name_offset) + off_change;
3590 size = blocksize - offset;
3592 /* copy all the names and values. */
3593 memcpy(target + offset, src + offset, size);
3595 /* Init new header now. */
3596 xh->xh_count = xb_xh->xh_count;
3597 xh->xh_num_buckets = cpu_to_le16(1);
3598 xh->xh_name_value_len = cpu_to_le16(size);
3599 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
3601 /* copy all the entries. */
3602 target = bucket_block(bucket, 0);
3603 offset = offsetof(struct ocfs2_xattr_header, xh_entries);
3604 size = count * sizeof(struct ocfs2_xattr_entry);
3605 memcpy(target + offset, (char *)xb_xh + offset, size);
3607 /* Change the xe offset for all the xe because of the move. */
3608 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
3609 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3610 for (i = 0; i < count; i++)
3611 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
3613 mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
3614 offset, size, off_change);
3616 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
3617 cmp_xe, swap_xe);
3621 * After we move xattr from block to index btree, we have to
3622 * update ocfs2_xattr_search to the new xe and base.
3624 * When the entry is in xattr block, xattr_bh indicates the storage place.
3625 * While if the entry is in index b-tree, "bucket" indicates the
3626 * real place of the xattr.
3628 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
3629 struct ocfs2_xattr_search *xs,
3630 struct buffer_head *old_bh)
3632 char *buf = old_bh->b_data;
3633 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
3634 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
3635 int i;
3637 xs->header = bucket_xh(xs->bucket);
3638 xs->base = bucket_block(xs->bucket, 0);
3639 xs->end = xs->base + inode->i_sb->s_blocksize;
3641 if (xs->not_found)
3642 return;
3644 i = xs->here - old_xh->xh_entries;
3645 xs->here = &xs->header->xh_entries[i];
3648 static int ocfs2_xattr_create_index_block(struct inode *inode,
3649 struct ocfs2_xattr_search *xs,
3650 struct ocfs2_xattr_set_ctxt *ctxt)
3652 int ret;
3653 u32 bit_off, len;
3654 u64 blkno;
3655 handle_t *handle = ctxt->handle;
3656 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3657 struct ocfs2_inode_info *oi = OCFS2_I(inode);
3658 struct buffer_head *xb_bh = xs->xattr_bh;
3659 struct ocfs2_xattr_block *xb =
3660 (struct ocfs2_xattr_block *)xb_bh->b_data;
3661 struct ocfs2_xattr_tree_root *xr;
3662 u16 xb_flags = le16_to_cpu(xb->xb_flags);
3664 mlog(0, "create xattr index block for %llu\n",
3665 (unsigned long long)xb_bh->b_blocknr);
3667 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
3668 BUG_ON(!xs->bucket);
3671 * XXX:
3672 * We can use this lock for now, and maybe move to a dedicated mutex
3673 * if performance becomes a problem later.
3675 down_write(&oi->ip_alloc_sem);
3677 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
3678 OCFS2_JOURNAL_ACCESS_WRITE);
3679 if (ret) {
3680 mlog_errno(ret);
3681 goto out;
3684 ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
3685 1, 1, &bit_off, &len);
3686 if (ret) {
3687 mlog_errno(ret);
3688 goto out;
3692 * The bucket may spread in many blocks, and
3693 * we will only touch the 1st block and the last block
3694 * in the whole bucket(one for entry and one for data).
3696 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
3698 mlog(0, "allocate 1 cluster from %llu to xattr block\n",
3699 (unsigned long long)blkno);
3701 ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
3702 if (ret) {
3703 mlog_errno(ret);
3704 goto out;
3707 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
3708 OCFS2_JOURNAL_ACCESS_CREATE);
3709 if (ret) {
3710 mlog_errno(ret);
3711 goto out;
3714 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
3715 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
3717 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
3719 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
3720 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
3721 offsetof(struct ocfs2_xattr_block, xb_attrs));
3723 xr = &xb->xb_attrs.xb_root;
3724 xr->xt_clusters = cpu_to_le32(1);
3725 xr->xt_last_eb_blk = 0;
3726 xr->xt_list.l_tree_depth = 0;
3727 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
3728 xr->xt_list.l_next_free_rec = cpu_to_le16(1);
3730 xr->xt_list.l_recs[0].e_cpos = 0;
3731 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
3732 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
3734 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
3736 ocfs2_journal_dirty(handle, xb_bh);
3738 out:
3739 up_write(&oi->ip_alloc_sem);
3741 return ret;
3744 static int cmp_xe_offset(const void *a, const void *b)
3746 const struct ocfs2_xattr_entry *l = a, *r = b;
3747 u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
3748 u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
3750 if (l_name_offset < r_name_offset)
3751 return 1;
3752 if (l_name_offset > r_name_offset)
3753 return -1;
3754 return 0;
3758 * defrag a xattr bucket if we find that the bucket has some
3759 * holes beteen name/value pairs.
3760 * We will move all the name/value pairs to the end of the bucket
3761 * so that we can spare some space for insertion.
3763 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
3764 handle_t *handle,
3765 struct ocfs2_xattr_bucket *bucket)
3767 int ret, i;
3768 size_t end, offset, len, value_len;
3769 struct ocfs2_xattr_header *xh;
3770 char *entries, *buf, *bucket_buf = NULL;
3771 u64 blkno = bucket_blkno(bucket);
3772 u16 xh_free_start;
3773 size_t blocksize = inode->i_sb->s_blocksize;
3774 struct ocfs2_xattr_entry *xe;
3777 * In order to make the operation more efficient and generic,
3778 * we copy all the blocks into a contiguous memory and do the
3779 * defragment there, so if anything is error, we will not touch
3780 * the real block.
3782 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
3783 if (!bucket_buf) {
3784 ret = -EIO;
3785 goto out;
3788 buf = bucket_buf;
3789 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3790 memcpy(buf, bucket_block(bucket, i), blocksize);
3792 ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
3793 OCFS2_JOURNAL_ACCESS_WRITE);
3794 if (ret < 0) {
3795 mlog_errno(ret);
3796 goto out;
3799 xh = (struct ocfs2_xattr_header *)bucket_buf;
3800 entries = (char *)xh->xh_entries;
3801 xh_free_start = le16_to_cpu(xh->xh_free_start);
3803 mlog(0, "adjust xattr bucket in %llu, count = %u, "
3804 "xh_free_start = %u, xh_name_value_len = %u.\n",
3805 (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
3806 xh_free_start, le16_to_cpu(xh->xh_name_value_len));
3809 * sort all the entries by their offset.
3810 * the largest will be the first, so that we can
3811 * move them to the end one by one.
3813 sort(entries, le16_to_cpu(xh->xh_count),
3814 sizeof(struct ocfs2_xattr_entry),
3815 cmp_xe_offset, swap_xe);
3817 /* Move all name/values to the end of the bucket. */
3818 xe = xh->xh_entries;
3819 end = OCFS2_XATTR_BUCKET_SIZE;
3820 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
3821 offset = le16_to_cpu(xe->xe_name_offset);
3822 if (ocfs2_xattr_is_local(xe))
3823 value_len = OCFS2_XATTR_SIZE(
3824 le64_to_cpu(xe->xe_value_size));
3825 else
3826 value_len = OCFS2_XATTR_ROOT_SIZE;
3827 len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
3830 * We must make sure that the name/value pair
3831 * exist in the same block. So adjust end to
3832 * the previous block end if needed.
3834 if (((end - len) / blocksize !=
3835 (end - 1) / blocksize))
3836 end = end - end % blocksize;
3838 if (end > offset + len) {
3839 memmove(bucket_buf + end - len,
3840 bucket_buf + offset, len);
3841 xe->xe_name_offset = cpu_to_le16(end - len);
3844 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
3845 "bucket %llu\n", (unsigned long long)blkno);
3847 end -= len;
3850 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
3851 "bucket %llu\n", (unsigned long long)blkno);
3853 if (xh_free_start == end)
3854 goto out;
3856 memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
3857 xh->xh_free_start = cpu_to_le16(end);
3859 /* sort the entries by their name_hash. */
3860 sort(entries, le16_to_cpu(xh->xh_count),
3861 sizeof(struct ocfs2_xattr_entry),
3862 cmp_xe, swap_xe);
3864 buf = bucket_buf;
3865 for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
3866 memcpy(bucket_block(bucket, i), buf, blocksize);
3867 ocfs2_xattr_bucket_journal_dirty(handle, bucket);
3869 out:
3870 kfree(bucket_buf);
3871 return ret;
3875 * prev_blkno points to the start of an existing extent. new_blkno
3876 * points to a newly allocated extent. Because we know each of our
3877 * clusters contains more than bucket, we can easily split one cluster
3878 * at a bucket boundary. So we take the last cluster of the existing
3879 * extent and split it down the middle. We move the last half of the
3880 * buckets in the last cluster of the existing extent over to the new
3881 * extent.
3883 * first_bh is the buffer at prev_blkno so we can update the existing
3884 * extent's bucket count. header_bh is the bucket were we were hoping
3885 * to insert our xattr. If the bucket move places the target in the new
3886 * extent, we'll update first_bh and header_bh after modifying the old
3887 * extent.
3889 * first_hash will be set as the 1st xe's name_hash in the new extent.
3891 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3892 handle_t *handle,
3893 struct ocfs2_xattr_bucket *first,
3894 struct ocfs2_xattr_bucket *target,
3895 u64 new_blkno,
3896 u32 num_clusters,
3897 u32 *first_hash)
3899 int ret;
3900 struct super_block *sb = inode->i_sb;
3901 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
3902 int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
3903 int to_move = num_buckets / 2;
3904 u64 src_blkno;
3905 u64 last_cluster_blkno = bucket_blkno(first) +
3906 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
3908 BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
3909 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
3911 mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3912 (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
3914 ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
3915 last_cluster_blkno, new_blkno,
3916 to_move, first_hash);
3917 if (ret) {
3918 mlog_errno(ret);
3919 goto out;
3922 /* This is the first bucket that got moved */
3923 src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
3926 * If the target bucket was part of the moved buckets, we need to
3927 * update first and target.
3929 if (bucket_blkno(target) >= src_blkno) {
3930 /* Find the block for the new target bucket */
3931 src_blkno = new_blkno +
3932 (bucket_blkno(target) - src_blkno);
3934 ocfs2_xattr_bucket_relse(first);
3935 ocfs2_xattr_bucket_relse(target);
3938 * These shouldn't fail - the buffers are in the
3939 * journal from ocfs2_cp_xattr_bucket().
3941 ret = ocfs2_read_xattr_bucket(first, new_blkno);
3942 if (ret) {
3943 mlog_errno(ret);
3944 goto out;
3946 ret = ocfs2_read_xattr_bucket(target, src_blkno);
3947 if (ret)
3948 mlog_errno(ret);
3952 out:
3953 return ret;
3957 * Find the suitable pos when we divide a bucket into 2.
3958 * We have to make sure the xattrs with the same hash value exist
3959 * in the same bucket.
3961 * If this ocfs2_xattr_header covers more than one hash value, find a
3962 * place where the hash value changes. Try to find the most even split.
3963 * The most common case is that all entries have different hash values,
3964 * and the first check we make will find a place to split.
3966 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
3968 struct ocfs2_xattr_entry *entries = xh->xh_entries;
3969 int count = le16_to_cpu(xh->xh_count);
3970 int delta, middle = count / 2;
3973 * We start at the middle. Each step gets farther away in both
3974 * directions. We therefore hit the change in hash value
3975 * nearest to the middle. Note that this loop does not execute for
3976 * count < 2.
3978 for (delta = 0; delta < middle; delta++) {
3979 /* Let's check delta earlier than middle */
3980 if (cmp_xe(&entries[middle - delta - 1],
3981 &entries[middle - delta]))
3982 return middle - delta;
3984 /* For even counts, don't walk off the end */
3985 if ((middle + delta + 1) == count)
3986 continue;
3988 /* Now try delta past middle */
3989 if (cmp_xe(&entries[middle + delta],
3990 &entries[middle + delta + 1]))
3991 return middle + delta + 1;
3994 /* Every entry had the same hash */
3995 return count;
3999 * Move some xattrs in old bucket(blk) to new bucket(new_blk).
4000 * first_hash will record the 1st hash of the new bucket.
4002 * Normally half of the xattrs will be moved. But we have to make
4003 * sure that the xattrs with the same hash value are stored in the
4004 * same bucket. If all the xattrs in this bucket have the same hash
4005 * value, the new bucket will be initialized as an empty one and the
4006 * first_hash will be initialized as (hash_value+1).
4008 static int ocfs2_divide_xattr_bucket(struct inode *inode,
4009 handle_t *handle,
4010 u64 blk,
4011 u64 new_blk,
4012 u32 *first_hash,
4013 int new_bucket_head)
4015 int ret, i;
4016 int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
4017 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4018 struct ocfs2_xattr_header *xh;
4019 struct ocfs2_xattr_entry *xe;
4020 int blocksize = inode->i_sb->s_blocksize;
4022 mlog(0, "move some of xattrs from bucket %llu to %llu\n",
4023 (unsigned long long)blk, (unsigned long long)new_blk);
4025 s_bucket = ocfs2_xattr_bucket_new(inode);
4026 t_bucket = ocfs2_xattr_bucket_new(inode);
4027 if (!s_bucket || !t_bucket) {
4028 ret = -ENOMEM;
4029 mlog_errno(ret);
4030 goto out;
4033 ret = ocfs2_read_xattr_bucket(s_bucket, blk);
4034 if (ret) {
4035 mlog_errno(ret);
4036 goto out;
4039 ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
4040 OCFS2_JOURNAL_ACCESS_WRITE);
4041 if (ret) {
4042 mlog_errno(ret);
4043 goto out;
4047 * Even if !new_bucket_head, we're overwriting t_bucket. Thus,
4048 * there's no need to read it.
4050 ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
4051 if (ret) {
4052 mlog_errno(ret);
4053 goto out;
4057 * Hey, if we're overwriting t_bucket, what difference does
4058 * ACCESS_CREATE vs ACCESS_WRITE make? See the comment in the
4059 * same part of ocfs2_cp_xattr_bucket().
4061 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4062 new_bucket_head ?
4063 OCFS2_JOURNAL_ACCESS_CREATE :
4064 OCFS2_JOURNAL_ACCESS_WRITE);
4065 if (ret) {
4066 mlog_errno(ret);
4067 goto out;
4070 xh = bucket_xh(s_bucket);
4071 count = le16_to_cpu(xh->xh_count);
4072 start = ocfs2_xattr_find_divide_pos(xh);
4074 if (start == count) {
4075 xe = &xh->xh_entries[start-1];
4078 * initialized a new empty bucket here.
4079 * The hash value is set as one larger than
4080 * that of the last entry in the previous bucket.
4082 for (i = 0; i < t_bucket->bu_blocks; i++)
4083 memset(bucket_block(t_bucket, i), 0, blocksize);
4085 xh = bucket_xh(t_bucket);
4086 xh->xh_free_start = cpu_to_le16(blocksize);
4087 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
4088 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
4090 goto set_num_buckets;
4093 /* copy the whole bucket to the new first. */
4094 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4096 /* update the new bucket. */
4097 xh = bucket_xh(t_bucket);
4100 * Calculate the total name/value len and xh_free_start for
4101 * the old bucket first.
4103 name_offset = OCFS2_XATTR_BUCKET_SIZE;
4104 name_value_len = 0;
4105 for (i = 0; i < start; i++) {
4106 xe = &xh->xh_entries[i];
4107 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
4108 if (ocfs2_xattr_is_local(xe))
4109 xe_len +=
4110 OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4111 else
4112 xe_len += OCFS2_XATTR_ROOT_SIZE;
4113 name_value_len += xe_len;
4114 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
4115 name_offset = le16_to_cpu(xe->xe_name_offset);
4119 * Now begin the modification to the new bucket.
4121 * In the new bucket, We just move the xattr entry to the beginning
4122 * and don't touch the name/value. So there will be some holes in the
4123 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
4124 * called.
4126 xe = &xh->xh_entries[start];
4127 len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4128 mlog(0, "mv xattr entry len %d from %d to %d\n", len,
4129 (int)((char *)xe - (char *)xh),
4130 (int)((char *)xh->xh_entries - (char *)xh));
4131 memmove((char *)xh->xh_entries, (char *)xe, len);
4132 xe = &xh->xh_entries[count - start];
4133 len = sizeof(struct ocfs2_xattr_entry) * start;
4134 memset((char *)xe, 0, len);
4136 le16_add_cpu(&xh->xh_count, -start);
4137 le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
4139 /* Calculate xh_free_start for the new bucket. */
4140 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4141 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4142 xe = &xh->xh_entries[i];
4143 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
4144 if (ocfs2_xattr_is_local(xe))
4145 xe_len +=
4146 OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4147 else
4148 xe_len += OCFS2_XATTR_ROOT_SIZE;
4149 if (le16_to_cpu(xe->xe_name_offset) <
4150 le16_to_cpu(xh->xh_free_start))
4151 xh->xh_free_start = xe->xe_name_offset;
4154 set_num_buckets:
4155 /* set xh->xh_num_buckets for the new xh. */
4156 if (new_bucket_head)
4157 xh->xh_num_buckets = cpu_to_le16(1);
4158 else
4159 xh->xh_num_buckets = 0;
4161 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4163 /* store the first_hash of the new bucket. */
4164 if (first_hash)
4165 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4168 * Now only update the 1st block of the old bucket. If we
4169 * just added a new empty bucket, there is no need to modify
4170 * it.
4172 if (start == count)
4173 goto out;
4175 xh = bucket_xh(s_bucket);
4176 memset(&xh->xh_entries[start], 0,
4177 sizeof(struct ocfs2_xattr_entry) * (count - start));
4178 xh->xh_count = cpu_to_le16(start);
4179 xh->xh_free_start = cpu_to_le16(name_offset);
4180 xh->xh_name_value_len = cpu_to_le16(name_value_len);
4182 ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
4184 out:
4185 ocfs2_xattr_bucket_free(s_bucket);
4186 ocfs2_xattr_bucket_free(t_bucket);
4188 return ret;
4192 * Copy xattr from one bucket to another bucket.
4194 * The caller must make sure that the journal transaction
4195 * has enough space for journaling.
4197 static int ocfs2_cp_xattr_bucket(struct inode *inode,
4198 handle_t *handle,
4199 u64 s_blkno,
4200 u64 t_blkno,
4201 int t_is_new)
4203 int ret;
4204 struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4206 BUG_ON(s_blkno == t_blkno);
4208 mlog(0, "cp bucket %llu to %llu, target is %d\n",
4209 (unsigned long long)s_blkno, (unsigned long long)t_blkno,
4210 t_is_new);
4212 s_bucket = ocfs2_xattr_bucket_new(inode);
4213 t_bucket = ocfs2_xattr_bucket_new(inode);
4214 if (!s_bucket || !t_bucket) {
4215 ret = -ENOMEM;
4216 mlog_errno(ret);
4217 goto out;
4220 ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4221 if (ret)
4222 goto out;
4225 * Even if !t_is_new, we're overwriting t_bucket. Thus,
4226 * there's no need to read it.
4228 ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
4229 if (ret)
4230 goto out;
4233 * Hey, if we're overwriting t_bucket, what difference does
4234 * ACCESS_CREATE vs ACCESS_WRITE make? Well, if we allocated a new
4235 * cluster to fill, we came here from
4236 * ocfs2_mv_xattr_buckets(), and it is really new -
4237 * ACCESS_CREATE is required. But we also might have moved data
4238 * out of t_bucket before extending back into it.
4239 * ocfs2_add_new_xattr_bucket() can do this - its call to
4240 * ocfs2_add_new_xattr_cluster() may have created a new extent
4241 * and copied out the end of the old extent. Then it re-extends
4242 * the old extent back to create space for new xattrs. That's
4243 * how we get here, and the bucket isn't really new.
4245 ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4246 t_is_new ?
4247 OCFS2_JOURNAL_ACCESS_CREATE :
4248 OCFS2_JOURNAL_ACCESS_WRITE);
4249 if (ret)
4250 goto out;
4252 ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4253 ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4255 out:
4256 ocfs2_xattr_bucket_free(t_bucket);
4257 ocfs2_xattr_bucket_free(s_bucket);
4259 return ret;
4263 * src_blk points to the start of an existing extent. last_blk points to
4264 * last cluster in that extent. to_blk points to a newly allocated
4265 * extent. We copy the buckets from the cluster at last_blk to the new
4266 * extent. If start_bucket is non-zero, we skip that many buckets before
4267 * we start copying. The new extent's xh_num_buckets gets set to the
4268 * number of buckets we copied. The old extent's xh_num_buckets shrinks
4269 * by the same amount.
4271 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4272 u64 src_blk, u64 last_blk, u64 to_blk,
4273 unsigned int start_bucket,
4274 u32 *first_hash)
4276 int i, ret, credits;
4277 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4278 int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4279 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4280 struct ocfs2_xattr_bucket *old_first, *new_first;
4282 mlog(0, "mv xattrs from cluster %llu to %llu\n",
4283 (unsigned long long)last_blk, (unsigned long long)to_blk);
4285 BUG_ON(start_bucket >= num_buckets);
4286 if (start_bucket) {
4287 num_buckets -= start_bucket;
4288 last_blk += (start_bucket * blks_per_bucket);
4291 /* The first bucket of the original extent */
4292 old_first = ocfs2_xattr_bucket_new(inode);
4293 /* The first bucket of the new extent */
4294 new_first = ocfs2_xattr_bucket_new(inode);
4295 if (!old_first || !new_first) {
4296 ret = -ENOMEM;
4297 mlog_errno(ret);
4298 goto out;
4301 ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4302 if (ret) {
4303 mlog_errno(ret);
4304 goto out;
4308 * We need to update the first bucket of the old extent and all
4309 * the buckets going to the new extent.
4311 credits = ((num_buckets + 1) * blks_per_bucket) +
4312 handle->h_buffer_credits;
4313 ret = ocfs2_extend_trans(handle, credits);
4314 if (ret) {
4315 mlog_errno(ret);
4316 goto out;
4319 ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4320 OCFS2_JOURNAL_ACCESS_WRITE);
4321 if (ret) {
4322 mlog_errno(ret);
4323 goto out;
4326 for (i = 0; i < num_buckets; i++) {
4327 ret = ocfs2_cp_xattr_bucket(inode, handle,
4328 last_blk + (i * blks_per_bucket),
4329 to_blk + (i * blks_per_bucket),
4331 if (ret) {
4332 mlog_errno(ret);
4333 goto out;
4338 * Get the new bucket ready before we dirty anything
4339 * (This actually shouldn't fail, because we already dirtied
4340 * it once in ocfs2_cp_xattr_bucket()).
4342 ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4343 if (ret) {
4344 mlog_errno(ret);
4345 goto out;
4347 ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4348 OCFS2_JOURNAL_ACCESS_WRITE);
4349 if (ret) {
4350 mlog_errno(ret);
4351 goto out;
4354 /* Now update the headers */
4355 le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4356 ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4358 bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4359 ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4361 if (first_hash)
4362 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4364 out:
4365 ocfs2_xattr_bucket_free(new_first);
4366 ocfs2_xattr_bucket_free(old_first);
4367 return ret;
4371 * Move some xattrs in this cluster to the new cluster.
4372 * This function should only be called when bucket size == cluster size.
4373 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4375 static int ocfs2_divide_xattr_cluster(struct inode *inode,
4376 handle_t *handle,
4377 u64 prev_blk,
4378 u64 new_blk,
4379 u32 *first_hash)
4381 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4382 int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
4384 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4386 ret = ocfs2_extend_trans(handle, credits);
4387 if (ret) {
4388 mlog_errno(ret);
4389 return ret;
4392 /* Move half of the xattr in start_blk to the next bucket. */
4393 return ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4394 new_blk, first_hash, 1);
4398 * Move some xattrs from the old cluster to the new one since they are not
4399 * contiguous in ocfs2 xattr tree.
4401 * new_blk starts a new separate cluster, and we will move some xattrs from
4402 * prev_blk to it. v_start will be set as the first name hash value in this
4403 * new cluster so that it can be used as e_cpos during tree insertion and
4404 * don't collide with our original b-tree operations. first_bh and header_bh
4405 * will also be updated since they will be used in ocfs2_extend_xattr_bucket
4406 * to extend the insert bucket.
4408 * The problem is how much xattr should we move to the new one and when should
4409 * we update first_bh and header_bh?
4410 * 1. If cluster size > bucket size, that means the previous cluster has more
4411 * than 1 bucket, so just move half nums of bucket into the new cluster and
4412 * update the first_bh and header_bh if the insert bucket has been moved
4413 * to the new cluster.
4414 * 2. If cluster_size == bucket_size:
4415 * a) If the previous extent rec has more than one cluster and the insert
4416 * place isn't in the last cluster, copy the entire last cluster to the
4417 * new one. This time, we don't need to upate the first_bh and header_bh
4418 * since they will not be moved into the new cluster.
4419 * b) Otherwise, move the bottom half of the xattrs in the last cluster into
4420 * the new one. And we set the extend flag to zero if the insert place is
4421 * moved into the new allocated cluster since no extend is needed.
4423 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
4424 handle_t *handle,
4425 struct ocfs2_xattr_bucket *first,
4426 struct ocfs2_xattr_bucket *target,
4427 u64 new_blk,
4428 u32 prev_clusters,
4429 u32 *v_start,
4430 int *extend)
4432 int ret;
4434 mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
4435 (unsigned long long)bucket_blkno(first), prev_clusters,
4436 (unsigned long long)new_blk);
4438 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
4439 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
4440 handle,
4441 first, target,
4442 new_blk,
4443 prev_clusters,
4444 v_start);
4445 if (ret)
4446 mlog_errno(ret);
4447 } else {
4448 /* The start of the last cluster in the first extent */
4449 u64 last_blk = bucket_blkno(first) +
4450 ((prev_clusters - 1) *
4451 ocfs2_clusters_to_blocks(inode->i_sb, 1));
4453 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
4454 ret = ocfs2_mv_xattr_buckets(inode, handle,
4455 bucket_blkno(first),
4456 last_blk, new_blk, 0,
4457 v_start);
4458 if (ret)
4459 mlog_errno(ret);
4460 } else {
4461 ret = ocfs2_divide_xattr_cluster(inode, handle,
4462 last_blk, new_blk,
4463 v_start);
4464 if (ret)
4465 mlog_errno(ret);
4467 if ((bucket_blkno(target) == last_blk) && extend)
4468 *extend = 0;
4472 return ret;
4476 * Add a new cluster for xattr storage.
4478 * If the new cluster is contiguous with the previous one, it will be
4479 * appended to the same extent record, and num_clusters will be updated.
4480 * If not, we will insert a new extent for it and move some xattrs in
4481 * the last cluster into the new allocated one.
4482 * We also need to limit the maximum size of a btree leaf, otherwise we'll
4483 * lose the benefits of hashing because we'll have to search large leaves.
4484 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
4485 * if it's bigger).
4487 * first_bh is the first block of the previous extent rec and header_bh
4488 * indicates the bucket we will insert the new xattrs. They will be updated
4489 * when the header_bh is moved into the new cluster.
4491 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
4492 struct buffer_head *root_bh,
4493 struct ocfs2_xattr_bucket *first,
4494 struct ocfs2_xattr_bucket *target,
4495 u32 *num_clusters,
4496 u32 prev_cpos,
4497 int *extend,
4498 struct ocfs2_xattr_set_ctxt *ctxt)
4500 int ret;
4501 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
4502 u32 prev_clusters = *num_clusters;
4503 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
4504 u64 block;
4505 handle_t *handle = ctxt->handle;
4506 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4507 struct ocfs2_extent_tree et;
4509 mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
4510 "previous xattr blkno = %llu\n",
4511 (unsigned long long)OCFS2_I(inode)->ip_blkno,
4512 prev_cpos, (unsigned long long)bucket_blkno(first));
4514 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
4516 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
4517 OCFS2_JOURNAL_ACCESS_WRITE);
4518 if (ret < 0) {
4519 mlog_errno(ret);
4520 goto leave;
4523 ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
4524 clusters_to_add, &bit_off, &num_bits);
4525 if (ret < 0) {
4526 if (ret != -ENOSPC)
4527 mlog_errno(ret);
4528 goto leave;
4531 BUG_ON(num_bits > clusters_to_add);
4533 block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
4534 mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
4535 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
4537 if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
4538 (prev_clusters + num_bits) << osb->s_clustersize_bits <=
4539 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
4541 * If this cluster is contiguous with the old one and
4542 * adding this new cluster, we don't surpass the limit of
4543 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
4544 * initialized and used like other buckets in the previous
4545 * cluster.
4546 * So add it as a contiguous one. The caller will handle
4547 * its init process.
4549 v_start = prev_cpos + prev_clusters;
4550 *num_clusters = prev_clusters + num_bits;
4551 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
4552 num_bits);
4553 } else {
4554 ret = ocfs2_adjust_xattr_cross_cluster(inode,
4555 handle,
4556 first,
4557 target,
4558 block,
4559 prev_clusters,
4560 &v_start,
4561 extend);
4562 if (ret) {
4563 mlog_errno(ret);
4564 goto leave;
4568 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
4569 num_bits, (unsigned long long)block, v_start);
4570 ret = ocfs2_insert_extent(handle, &et, v_start, block,
4571 num_bits, 0, ctxt->meta_ac);
4572 if (ret < 0) {
4573 mlog_errno(ret);
4574 goto leave;
4577 ret = ocfs2_journal_dirty(handle, root_bh);
4578 if (ret < 0)
4579 mlog_errno(ret);
4581 leave:
4582 return ret;
4586 * We are given an extent. 'first' is the bucket at the very front of
4587 * the extent. The extent has space for an additional bucket past
4588 * bucket_xh(first)->xh_num_buckets. 'target_blkno' is the block number
4589 * of the target bucket. We wish to shift every bucket past the target
4590 * down one, filling in that additional space. When we get back to the
4591 * target, we split the target between itself and the now-empty bucket
4592 * at target+1 (aka, target_blkno + blks_per_bucket).
4594 static int ocfs2_extend_xattr_bucket(struct inode *inode,
4595 handle_t *handle,
4596 struct ocfs2_xattr_bucket *first,
4597 u64 target_blk,
4598 u32 num_clusters)
4600 int ret, credits;
4601 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4602 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4603 u64 end_blk;
4604 u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
4606 mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
4607 "from %llu, len = %u\n", (unsigned long long)target_blk,
4608 (unsigned long long)bucket_blkno(first), num_clusters);
4610 /* The extent must have room for an additional bucket */
4611 BUG_ON(new_bucket >=
4612 (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
4614 /* end_blk points to the last existing bucket */
4615 end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
4618 * end_blk is the start of the last existing bucket.
4619 * Thus, (end_blk - target_blk) covers the target bucket and
4620 * every bucket after it up to, but not including, the last
4621 * existing bucket. Then we add the last existing bucket, the
4622 * new bucket, and the first bucket (3 * blk_per_bucket).
4624 credits = (end_blk - target_blk) + (3 * blk_per_bucket) +
4625 handle->h_buffer_credits;
4626 ret = ocfs2_extend_trans(handle, credits);
4627 if (ret) {
4628 mlog_errno(ret);
4629 goto out;
4632 ret = ocfs2_xattr_bucket_journal_access(handle, first,
4633 OCFS2_JOURNAL_ACCESS_WRITE);
4634 if (ret) {
4635 mlog_errno(ret);
4636 goto out;
4639 while (end_blk != target_blk) {
4640 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
4641 end_blk + blk_per_bucket, 0);
4642 if (ret)
4643 goto out;
4644 end_blk -= blk_per_bucket;
4647 /* Move half of the xattr in target_blkno to the next bucket. */
4648 ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
4649 target_blk + blk_per_bucket, NULL, 0);
4651 le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
4652 ocfs2_xattr_bucket_journal_dirty(handle, first);
4654 out:
4655 return ret;
4659 * Add new xattr bucket in an extent record and adjust the buckets
4660 * accordingly. xb_bh is the ocfs2_xattr_block, and target is the
4661 * bucket we want to insert into.
4663 * In the easy case, we will move all the buckets after target down by
4664 * one. Half of target's xattrs will be moved to the next bucket.
4666 * If current cluster is full, we'll allocate a new one. This may not
4667 * be contiguous. The underlying calls will make sure that there is
4668 * space for the insert, shifting buckets around if necessary.
4669 * 'target' may be moved by those calls.
4671 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
4672 struct buffer_head *xb_bh,
4673 struct ocfs2_xattr_bucket *target,
4674 struct ocfs2_xattr_set_ctxt *ctxt)
4676 struct ocfs2_xattr_block *xb =
4677 (struct ocfs2_xattr_block *)xb_bh->b_data;
4678 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
4679 struct ocfs2_extent_list *el = &xb_root->xt_list;
4680 u32 name_hash =
4681 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
4682 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4683 int ret, num_buckets, extend = 1;
4684 u64 p_blkno;
4685 u32 e_cpos, num_clusters;
4686 /* The bucket at the front of the extent */
4687 struct ocfs2_xattr_bucket *first;
4689 mlog(0, "Add new xattr bucket starting from %llu\n",
4690 (unsigned long long)bucket_blkno(target));
4692 /* The first bucket of the original extent */
4693 first = ocfs2_xattr_bucket_new(inode);
4694 if (!first) {
4695 ret = -ENOMEM;
4696 mlog_errno(ret);
4697 goto out;
4700 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
4701 &num_clusters, el);
4702 if (ret) {
4703 mlog_errno(ret);
4704 goto out;
4707 ret = ocfs2_read_xattr_bucket(first, p_blkno);
4708 if (ret) {
4709 mlog_errno(ret);
4710 goto out;
4713 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
4714 if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
4716 * This can move first+target if the target bucket moves
4717 * to the new extent.
4719 ret = ocfs2_add_new_xattr_cluster(inode,
4720 xb_bh,
4721 first,
4722 target,
4723 &num_clusters,
4724 e_cpos,
4725 &extend,
4726 ctxt);
4727 if (ret) {
4728 mlog_errno(ret);
4729 goto out;
4733 if (extend) {
4734 ret = ocfs2_extend_xattr_bucket(inode,
4735 ctxt->handle,
4736 first,
4737 bucket_blkno(target),
4738 num_clusters);
4739 if (ret)
4740 mlog_errno(ret);
4743 out:
4744 ocfs2_xattr_bucket_free(first);
4746 return ret;
4749 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
4750 struct ocfs2_xattr_bucket *bucket,
4751 int offs)
4753 int block_off = offs >> inode->i_sb->s_blocksize_bits;
4755 offs = offs % inode->i_sb->s_blocksize;
4756 return bucket_block(bucket, block_off) + offs;
4760 * Handle the normal xattr set, including replace, delete and new.
4762 * Note: "local" indicates the real data's locality. So we can't
4763 * just its bucket locality by its length.
4765 static void ocfs2_xattr_set_entry_normal(struct inode *inode,
4766 struct ocfs2_xattr_info *xi,
4767 struct ocfs2_xattr_search *xs,
4768 u32 name_hash,
4769 int local)
4771 struct ocfs2_xattr_entry *last, *xe;
4772 int name_len = strlen(xi->name);
4773 struct ocfs2_xattr_header *xh = xs->header;
4774 u16 count = le16_to_cpu(xh->xh_count), start;
4775 size_t blocksize = inode->i_sb->s_blocksize;
4776 char *val;
4777 size_t offs, size, new_size;
4779 last = &xh->xh_entries[count];
4780 if (!xs->not_found) {
4781 xe = xs->here;
4782 offs = le16_to_cpu(xe->xe_name_offset);
4783 if (ocfs2_xattr_is_local(xe))
4784 size = OCFS2_XATTR_SIZE(name_len) +
4785 OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4786 else
4787 size = OCFS2_XATTR_SIZE(name_len) +
4788 OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4791 * If the new value will be stored outside, xi->value has been
4792 * initalized as an empty ocfs2_xattr_value_root, and the same
4793 * goes with xi->value_len, so we can set new_size safely here.
4794 * See ocfs2_xattr_set_in_bucket.
4796 new_size = OCFS2_XATTR_SIZE(name_len) +
4797 OCFS2_XATTR_SIZE(xi->value_len);
4799 le16_add_cpu(&xh->xh_name_value_len, -size);
4800 if (xi->value) {
4801 if (new_size > size)
4802 goto set_new_name_value;
4804 /* Now replace the old value with new one. */
4805 if (local)
4806 xe->xe_value_size = cpu_to_le64(xi->value_len);
4807 else
4808 xe->xe_value_size = 0;
4810 val = ocfs2_xattr_bucket_get_val(inode,
4811 xs->bucket, offs);
4812 memset(val + OCFS2_XATTR_SIZE(name_len), 0,
4813 size - OCFS2_XATTR_SIZE(name_len));
4814 if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
4815 memcpy(val + OCFS2_XATTR_SIZE(name_len),
4816 xi->value, xi->value_len);
4818 le16_add_cpu(&xh->xh_name_value_len, new_size);
4819 ocfs2_xattr_set_local(xe, local);
4820 return;
4821 } else {
4823 * Remove the old entry if there is more than one.
4824 * We don't remove the last entry so that we can
4825 * use it to indicate the hash value of the empty
4826 * bucket.
4828 last -= 1;
4829 le16_add_cpu(&xh->xh_count, -1);
4830 if (xh->xh_count) {
4831 memmove(xe, xe + 1,
4832 (void *)last - (void *)xe);
4833 memset(last, 0,
4834 sizeof(struct ocfs2_xattr_entry));
4835 } else
4836 xh->xh_free_start =
4837 cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4839 return;
4841 } else {
4842 /* find a new entry for insert. */
4843 int low = 0, high = count - 1, tmp;
4844 struct ocfs2_xattr_entry *tmp_xe;
4846 while (low <= high && count) {
4847 tmp = (low + high) / 2;
4848 tmp_xe = &xh->xh_entries[tmp];
4850 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
4851 low = tmp + 1;
4852 else if (name_hash <
4853 le32_to_cpu(tmp_xe->xe_name_hash))
4854 high = tmp - 1;
4855 else {
4856 low = tmp;
4857 break;
4861 xe = &xh->xh_entries[low];
4862 if (low != count)
4863 memmove(xe + 1, xe, (void *)last - (void *)xe);
4865 le16_add_cpu(&xh->xh_count, 1);
4866 memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
4867 xe->xe_name_hash = cpu_to_le32(name_hash);
4868 xe->xe_name_len = name_len;
4869 ocfs2_xattr_set_type(xe, xi->name_index);
4872 set_new_name_value:
4873 /* Insert the new name+value. */
4874 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);
4877 * We must make sure that the name/value pair
4878 * exists in the same block.
4880 offs = le16_to_cpu(xh->xh_free_start);
4881 start = offs - size;
4883 if (start >> inode->i_sb->s_blocksize_bits !=
4884 (offs - 1) >> inode->i_sb->s_blocksize_bits) {
4885 offs = offs - offs % blocksize;
4886 xh->xh_free_start = cpu_to_le16(offs);
4889 val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size);
4890 xe->xe_name_offset = cpu_to_le16(offs - size);
4892 memset(val, 0, size);
4893 memcpy(val, xi->name, name_len);
4894 memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);
4896 xe->xe_value_size = cpu_to_le64(xi->value_len);
4897 ocfs2_xattr_set_local(xe, local);
4898 xs->here = xe;
4899 le16_add_cpu(&xh->xh_free_start, -size);
4900 le16_add_cpu(&xh->xh_name_value_len, size);
4902 return;
4906 * Set the xattr entry in the specified bucket.
4907 * The bucket is indicated by xs->bucket and it should have the enough
4908 * space for the xattr insertion.
4910 static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4911 handle_t *handle,
4912 struct ocfs2_xattr_info *xi,
4913 struct ocfs2_xattr_search *xs,
4914 u32 name_hash,
4915 int local)
4917 int ret;
4918 u64 blkno;
4920 mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
4921 (unsigned long)xi->value_len, xi->name_index,
4922 (unsigned long long)bucket_blkno(xs->bucket));
4924 if (!xs->bucket->bu_bhs[1]) {
4925 blkno = bucket_blkno(xs->bucket);
4926 ocfs2_xattr_bucket_relse(xs->bucket);
4927 ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
4928 if (ret) {
4929 mlog_errno(ret);
4930 goto out;
4934 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4935 OCFS2_JOURNAL_ACCESS_WRITE);
4936 if (ret < 0) {
4937 mlog_errno(ret);
4938 goto out;
4941 ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
4942 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4944 out:
4945 return ret;
4949 * Truncate the specified xe_off entry in xattr bucket.
4950 * bucket is indicated by header_bh and len is the new length.
4951 * Both the ocfs2_xattr_value_root and the entry will be updated here.
4953 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4955 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4956 struct ocfs2_xattr_bucket *bucket,
4957 int xe_off,
4958 int len,
4959 struct ocfs2_xattr_set_ctxt *ctxt)
4961 int ret, offset;
4962 u64 value_blk;
4963 struct ocfs2_xattr_entry *xe;
4964 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4965 size_t blocksize = inode->i_sb->s_blocksize;
4966 struct ocfs2_xattr_value_buf vb = {
4967 .vb_access = ocfs2_journal_access,
4970 xe = &xh->xh_entries[xe_off];
4972 BUG_ON(!xe || ocfs2_xattr_is_local(xe));
4974 offset = le16_to_cpu(xe->xe_name_offset) +
4975 OCFS2_XATTR_SIZE(xe->xe_name_len);
4977 value_blk = offset / blocksize;
4979 /* We don't allow ocfs2_xattr_value to be stored in different block. */
4980 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4982 vb.vb_bh = bucket->bu_bhs[value_blk];
4983 BUG_ON(!vb.vb_bh);
4985 vb.vb_xv = (struct ocfs2_xattr_value_root *)
4986 (vb.vb_bh->b_data + offset % blocksize);
4989 * From here on out we have to dirty the bucket. The generic
4990 * value calls only modify one of the bucket's bhs, but we need
4991 * to send the bucket at once. So if they error, they *could* have
4992 * modified something. We have to assume they did, and dirty
4993 * the whole bucket. This leaves us in a consistent state.
4995 mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4996 xe_off, (unsigned long long)bucket_blkno(bucket), len);
4997 ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
4998 if (ret) {
4999 mlog_errno(ret);
5000 goto out;
5003 ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
5004 OCFS2_JOURNAL_ACCESS_WRITE);
5005 if (ret) {
5006 mlog_errno(ret);
5007 goto out;
5010 xe->xe_value_size = cpu_to_le64(len);
5012 ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
5014 out:
5015 return ret;
5018 static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
5019 struct ocfs2_xattr_search *xs,
5020 int len,
5021 struct ocfs2_xattr_set_ctxt *ctxt)
5023 int ret, offset;
5024 struct ocfs2_xattr_entry *xe = xs->here;
5025 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
5027 BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
5029 offset = xe - xh->xh_entries;
5030 ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket,
5031 offset, len, ctxt);
5032 if (ret)
5033 mlog_errno(ret);
5035 return ret;
5038 static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
5039 handle_t *handle,
5040 struct ocfs2_xattr_search *xs,
5041 char *val,
5042 int value_len)
5044 int ret, offset, block_off;
5045 struct ocfs2_xattr_value_root *xv;
5046 struct ocfs2_xattr_entry *xe = xs->here;
5047 struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
5048 void *base;
5049 struct ocfs2_xattr_value_buf vb = {
5050 .vb_access = ocfs2_journal_access,
5053 BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
5055 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, xh,
5056 xe - xh->xh_entries,
5057 &block_off,
5058 &offset);
5059 if (ret) {
5060 mlog_errno(ret);
5061 goto out;
5064 base = bucket_block(xs->bucket, block_off);
5065 xv = (struct ocfs2_xattr_value_root *)(base + offset +
5066 OCFS2_XATTR_SIZE(xe->xe_name_len));
5068 vb.vb_xv = xv;
5069 vb.vb_bh = xs->bucket->bu_bhs[block_off];
5070 ret = __ocfs2_xattr_set_value_outside(inode, handle,
5071 &vb, val, value_len);
5072 if (ret)
5073 mlog_errno(ret);
5074 out:
5075 return ret;
5078 static int ocfs2_rm_xattr_cluster(struct inode *inode,
5079 struct buffer_head *root_bh,
5080 u64 blkno,
5081 u32 cpos,
5082 u32 len,
5083 void *para)
5085 int ret;
5086 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5087 struct inode *tl_inode = osb->osb_tl_inode;
5088 handle_t *handle;
5089 struct ocfs2_xattr_block *xb =
5090 (struct ocfs2_xattr_block *)root_bh->b_data;
5091 struct ocfs2_alloc_context *meta_ac = NULL;
5092 struct ocfs2_cached_dealloc_ctxt dealloc;
5093 struct ocfs2_extent_tree et;
5095 ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5096 ocfs2_delete_xattr_in_bucket, para);
5097 if (ret) {
5098 mlog_errno(ret);
5099 return ret;
5102 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5104 ocfs2_init_dealloc_ctxt(&dealloc);
5106 mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
5107 cpos, len, (unsigned long long)blkno);
5109 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5110 len);
5112 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
5113 if (ret) {
5114 mlog_errno(ret);
5115 return ret;
5118 mutex_lock(&tl_inode->i_mutex);
5120 if (ocfs2_truncate_log_needs_flush(osb)) {
5121 ret = __ocfs2_flush_truncate_log(osb);
5122 if (ret < 0) {
5123 mlog_errno(ret);
5124 goto out;
5128 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5129 if (IS_ERR(handle)) {
5130 ret = -ENOMEM;
5131 mlog_errno(ret);
5132 goto out;
5135 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5136 OCFS2_JOURNAL_ACCESS_WRITE);
5137 if (ret) {
5138 mlog_errno(ret);
5139 goto out_commit;
5142 ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
5143 &dealloc);
5144 if (ret) {
5145 mlog_errno(ret);
5146 goto out_commit;
5149 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5151 ret = ocfs2_journal_dirty(handle, root_bh);
5152 if (ret) {
5153 mlog_errno(ret);
5154 goto out_commit;
5157 ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5158 if (ret)
5159 mlog_errno(ret);
5161 out_commit:
5162 ocfs2_commit_trans(osb, handle);
5163 out:
5164 ocfs2_schedule_truncate_log_flush(osb, 1);
5166 mutex_unlock(&tl_inode->i_mutex);
5168 if (meta_ac)
5169 ocfs2_free_alloc_context(meta_ac);
5171 ocfs2_run_deallocs(osb, &dealloc);
5173 return ret;
5176 static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
5177 handle_t *handle,
5178 struct ocfs2_xattr_search *xs)
5180 struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
5181 struct ocfs2_xattr_entry *last = &xh->xh_entries[
5182 le16_to_cpu(xh->xh_count) - 1];
5183 int ret = 0;
5185 ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
5186 OCFS2_JOURNAL_ACCESS_WRITE);
5187 if (ret) {
5188 mlog_errno(ret);
5189 return;
5192 /* Remove the old entry. */
5193 memmove(xs->here, xs->here + 1,
5194 (void *)last - (void *)xs->here);
5195 memset(last, 0, sizeof(struct ocfs2_xattr_entry));
5196 le16_add_cpu(&xh->xh_count, -1);
5198 ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
5202 * Set the xattr name/value in the bucket specified in xs.
5204 * As the new value in xi may be stored in the bucket or in an outside cluster,
5205 * we divide the whole process into 3 steps:
5206 * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
5207 * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
5208 * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
5209 * 4. If the clusters for the new outside value can't be allocated, we need
5210 * to free the xattr we allocated in set.
5212 static int ocfs2_xattr_set_in_bucket(struct inode *inode,
5213 struct ocfs2_xattr_info *xi,
5214 struct ocfs2_xattr_search *xs,
5215 struct ocfs2_xattr_set_ctxt *ctxt)
5217 int ret, local = 1;
5218 size_t value_len;
5219 char *val = (char *)xi->value;
5220 struct ocfs2_xattr_entry *xe = xs->here;
5221 u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name,
5222 strlen(xi->name));
5224 if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
5226 * We need to truncate the xattr storage first.
5228 * If both the old and new value are stored to
5229 * outside block, we only need to truncate
5230 * the storage and then set the value outside.
5232 * If the new value should be stored within block,
5233 * we should free all the outside block first and
5234 * the modification to the xattr block will be done
5235 * by following steps.
5237 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
5238 value_len = xi->value_len;
5239 else
5240 value_len = 0;
5242 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
5243 value_len,
5244 ctxt);
5245 if (ret)
5246 goto out;
5248 if (value_len)
5249 goto set_value_outside;
5252 value_len = xi->value_len;
5253 /* So we have to handle the inside block change now. */
5254 if (value_len > OCFS2_XATTR_INLINE_SIZE) {
5256 * If the new value will be stored outside of block,
5257 * initalize a new empty value root and insert it first.
5259 local = 0;
5260 xi->value = &def_xv;
5261 xi->value_len = OCFS2_XATTR_ROOT_SIZE;
5264 ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs,
5265 name_hash, local);
5266 if (ret) {
5267 mlog_errno(ret);
5268 goto out;
5271 if (value_len <= OCFS2_XATTR_INLINE_SIZE)
5272 goto out;
5274 /* allocate the space now for the outside block storage. */
5275 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
5276 value_len, ctxt);
5277 if (ret) {
5278 mlog_errno(ret);
5280 if (xs->not_found) {
5282 * We can't allocate enough clusters for outside
5283 * storage and we have allocated xattr already,
5284 * so need to remove it.
5286 ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs);
5288 goto out;
5291 set_value_outside:
5292 ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle,
5293 xs, val, value_len);
5294 out:
5295 return ret;
5299 * check whether the xattr bucket is filled up with the same hash value.
5300 * If we want to insert the xattr with the same hash, return -ENOSPC.
5301 * If we want to insert a xattr with different hash value, go ahead
5302 * and ocfs2_divide_xattr_bucket will handle this.
5304 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5305 struct ocfs2_xattr_bucket *bucket,
5306 const char *name)
5308 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5309 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5311 if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5312 return 0;
5314 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5315 xh->xh_entries[0].xe_name_hash) {
5316 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5317 "hash = %u\n",
5318 (unsigned long long)bucket_blkno(bucket),
5319 le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5320 return -ENOSPC;
5323 return 0;
5326 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5327 struct ocfs2_xattr_info *xi,
5328 struct ocfs2_xattr_search *xs,
5329 struct ocfs2_xattr_set_ctxt *ctxt)
5331 struct ocfs2_xattr_header *xh;
5332 struct ocfs2_xattr_entry *xe;
5333 u16 count, header_size, xh_free_start;
5334 int free, max_free, need, old;
5335 size_t value_size = 0, name_len = strlen(xi->name);
5336 size_t blocksize = inode->i_sb->s_blocksize;
5337 int ret, allocation = 0;
5339 mlog_entry("Set xattr %s in xattr index block\n", xi->name);
5341 try_again:
5342 xh = xs->header;
5343 count = le16_to_cpu(xh->xh_count);
5344 xh_free_start = le16_to_cpu(xh->xh_free_start);
5345 header_size = sizeof(struct ocfs2_xattr_header) +
5346 count * sizeof(struct ocfs2_xattr_entry);
5347 max_free = OCFS2_XATTR_BUCKET_SIZE - header_size -
5348 le16_to_cpu(xh->xh_name_value_len) - OCFS2_XATTR_HEADER_GAP;
5350 mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
5351 "of %u which exceed block size\n",
5352 (unsigned long long)bucket_blkno(xs->bucket),
5353 header_size);
5355 if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
5356 value_size = OCFS2_XATTR_ROOT_SIZE;
5357 else if (xi->value)
5358 value_size = OCFS2_XATTR_SIZE(xi->value_len);
5360 if (xs->not_found)
5361 need = sizeof(struct ocfs2_xattr_entry) +
5362 OCFS2_XATTR_SIZE(name_len) + value_size;
5363 else {
5364 need = value_size + OCFS2_XATTR_SIZE(name_len);
5367 * We only replace the old value if the new length is smaller
5368 * than the old one. Otherwise we will allocate new space in the
5369 * bucket to store it.
5371 xe = xs->here;
5372 if (ocfs2_xattr_is_local(xe))
5373 old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
5374 else
5375 old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
5377 if (old >= value_size)
5378 need = 0;
5381 free = xh_free_start - header_size - OCFS2_XATTR_HEADER_GAP;
5383 * We need to make sure the new name/value pair
5384 * can exist in the same block.
5386 if (xh_free_start % blocksize < need)
5387 free -= xh_free_start % blocksize;
5389 mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
5390 "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
5391 " %u\n", xs->not_found,
5392 (unsigned long long)bucket_blkno(xs->bucket),
5393 free, need, max_free, le16_to_cpu(xh->xh_free_start),
5394 le16_to_cpu(xh->xh_name_value_len));
5396 if (free < need ||
5397 (xs->not_found &&
5398 count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
5399 if (need <= max_free &&
5400 count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
5402 * We can create the space by defragment. Since only the
5403 * name/value will be moved, the xe shouldn't be changed
5404 * in xs.
5406 ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5407 xs->bucket);
5408 if (ret) {
5409 mlog_errno(ret);
5410 goto out;
5413 xh_free_start = le16_to_cpu(xh->xh_free_start);
5414 free = xh_free_start - header_size
5415 - OCFS2_XATTR_HEADER_GAP;
5416 if (xh_free_start % blocksize < need)
5417 free -= xh_free_start % blocksize;
5419 if (free >= need)
5420 goto xattr_set;
5422 mlog(0, "Can't get enough space for xattr insert by "
5423 "defragment. Need %u bytes, but we have %d, so "
5424 "allocate new bucket for it.\n", need, free);
5428 * We have to add new buckets or clusters and one
5429 * allocation should leave us enough space for insert.
5431 BUG_ON(allocation);
5434 * We do not allow for overlapping ranges between buckets. And
5435 * the maximum number of collisions we will allow for then is
5436 * one bucket's worth, so check it here whether we need to
5437 * add a new bucket for the insert.
5439 ret = ocfs2_check_xattr_bucket_collision(inode,
5440 xs->bucket,
5441 xi->name);
5442 if (ret) {
5443 mlog_errno(ret);
5444 goto out;
5447 ret = ocfs2_add_new_xattr_bucket(inode,
5448 xs->xattr_bh,
5449 xs->bucket,
5450 ctxt);
5451 if (ret) {
5452 mlog_errno(ret);
5453 goto out;
5457 * ocfs2_add_new_xattr_bucket() will have updated
5458 * xs->bucket if it moved, but it will not have updated
5459 * any of the other search fields. Thus, we drop it and
5460 * re-search. Everything should be cached, so it'll be
5461 * quick.
5463 ocfs2_xattr_bucket_relse(xs->bucket);
5464 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5465 xi->name_index,
5466 xi->name, xs);
5467 if (ret && ret != -ENODATA)
5468 goto out;
5469 xs->not_found = ret;
5470 allocation = 1;
5471 goto try_again;
5474 xattr_set:
5475 ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
5476 out:
5477 mlog_exit(ret);
5478 return ret;
5481 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5482 struct ocfs2_xattr_bucket *bucket,
5483 void *para)
5485 int ret = 0, ref_credits;
5486 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5487 u16 i;
5488 struct ocfs2_xattr_entry *xe;
5489 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5490 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5491 int credits = ocfs2_remove_extent_credits(osb->sb) +
5492 ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5493 struct ocfs2_xattr_value_root *xv;
5494 struct ocfs2_rm_xattr_bucket_para *args =
5495 (struct ocfs2_rm_xattr_bucket_para *)para;
5497 ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5499 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5500 xe = &xh->xh_entries[i];
5501 if (ocfs2_xattr_is_local(xe))
5502 continue;
5504 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5505 i, &xv, NULL);
5507 ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5508 args->ref_ci,
5509 args->ref_root_bh,
5510 &ctxt.meta_ac,
5511 &ref_credits);
5513 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5514 if (IS_ERR(ctxt.handle)) {
5515 ret = PTR_ERR(ctxt.handle);
5516 mlog_errno(ret);
5517 break;
5520 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5521 i, 0, &ctxt);
5523 ocfs2_commit_trans(osb, ctxt.handle);
5524 if (ctxt.meta_ac) {
5525 ocfs2_free_alloc_context(ctxt.meta_ac);
5526 ctxt.meta_ac = NULL;
5528 if (ret) {
5529 mlog_errno(ret);
5530 break;
5534 if (ctxt.meta_ac)
5535 ocfs2_free_alloc_context(ctxt.meta_ac);
5536 ocfs2_schedule_truncate_log_flush(osb, 1);
5537 ocfs2_run_deallocs(osb, &ctxt.dealloc);
5538 return ret;
5542 * Whenever we modify a xattr value root in the bucket(e.g, CoW
5543 * or change the extent record flag), we need to recalculate
5544 * the metaecc for the whole bucket. So it is done here.
5546 * Note:
5547 * We have to give the extra credits for the caller.
5549 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5550 handle_t *handle,
5551 void *para)
5553 int ret;
5554 struct ocfs2_xattr_bucket *bucket =
5555 (struct ocfs2_xattr_bucket *)para;
5557 ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5558 OCFS2_JOURNAL_ACCESS_WRITE);
5559 if (ret) {
5560 mlog_errno(ret);
5561 return ret;
5564 ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5566 return 0;
5570 * Special action we need if the xattr value is refcounted.
5572 * 1. If the xattr is refcounted, lock the tree.
5573 * 2. CoW the xattr if we are setting the new value and the value
5574 * will be stored outside.
5575 * 3. In other case, decrease_refcount will work for us, so just
5576 * lock the refcount tree, calculate the meta and credits is OK.
5578 * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5579 * currently CoW is a completed transaction, while this function
5580 * will also lock the allocators and let us deadlock. So we will
5581 * CoW the whole xattr value.
5583 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5584 struct ocfs2_dinode *di,
5585 struct ocfs2_xattr_info *xi,
5586 struct ocfs2_xattr_search *xis,
5587 struct ocfs2_xattr_search *xbs,
5588 struct ocfs2_refcount_tree **ref_tree,
5589 int *meta_add,
5590 int *credits)
5592 int ret = 0;
5593 struct ocfs2_xattr_block *xb;
5594 struct ocfs2_xattr_entry *xe;
5595 char *base;
5596 u32 p_cluster, num_clusters;
5597 unsigned int ext_flags;
5598 int name_offset, name_len;
5599 struct ocfs2_xattr_value_buf vb;
5600 struct ocfs2_xattr_bucket *bucket = NULL;
5601 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5602 struct ocfs2_post_refcount refcount;
5603 struct ocfs2_post_refcount *p = NULL;
5604 struct buffer_head *ref_root_bh = NULL;
5606 if (!xis->not_found) {
5607 xe = xis->here;
5608 name_offset = le16_to_cpu(xe->xe_name_offset);
5609 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5610 base = xis->base;
5611 vb.vb_bh = xis->inode_bh;
5612 vb.vb_access = ocfs2_journal_access_di;
5613 } else {
5614 int i, block_off = 0;
5615 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5616 xe = xbs->here;
5617 name_offset = le16_to_cpu(xe->xe_name_offset);
5618 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5619 i = xbs->here - xbs->header->xh_entries;
5621 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5622 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
5623 bucket_xh(xbs->bucket),
5624 i, &block_off,
5625 &name_offset);
5626 if (ret) {
5627 mlog_errno(ret);
5628 goto out;
5630 base = bucket_block(xbs->bucket, block_off);
5631 vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5632 vb.vb_access = ocfs2_journal_access;
5634 if (ocfs2_meta_ecc(osb)) {
5635 /*create parameters for ocfs2_post_refcount. */
5636 bucket = xbs->bucket;
5637 refcount.credits = bucket->bu_blocks;
5638 refcount.para = bucket;
5639 refcount.func =
5640 ocfs2_xattr_bucket_post_refcount;
5641 p = &refcount;
5643 } else {
5644 base = xbs->base;
5645 vb.vb_bh = xbs->xattr_bh;
5646 vb.vb_access = ocfs2_journal_access_xb;
5650 if (ocfs2_xattr_is_local(xe))
5651 goto out;
5653 vb.vb_xv = (struct ocfs2_xattr_value_root *)
5654 (base + name_offset + name_len);
5656 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
5657 &num_clusters, &vb.vb_xv->xr_list,
5658 &ext_flags);
5659 if (ret) {
5660 mlog_errno(ret);
5661 goto out;
5665 * We just need to check the 1st extent record, since we always
5666 * CoW the whole xattr. So there shouldn't be a xattr with
5667 * some REFCOUNT extent recs after the 1st one.
5669 if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
5670 goto out;
5672 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
5673 1, ref_tree, &ref_root_bh);
5674 if (ret) {
5675 mlog_errno(ret);
5676 goto out;
5680 * If we are deleting the xattr or the new size will be stored inside,
5681 * cool, leave it there, the xattr truncate process will remove them
5682 * for us(it still needs the refcount tree lock and the meta, credits).
5683 * And the worse case is that every cluster truncate will split the
5684 * refcount tree, and make the original extent become 3. So we will need
5685 * 2 * cluster more extent recs at most.
5687 if (!xi->value || xi->value_len <= OCFS2_XATTR_INLINE_SIZE) {
5689 ret = ocfs2_refcounted_xattr_delete_need(inode,
5690 &(*ref_tree)->rf_ci,
5691 ref_root_bh, vb.vb_xv,
5692 meta_add, credits);
5693 if (ret)
5694 mlog_errno(ret);
5695 goto out;
5698 ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
5699 *ref_tree, ref_root_bh, 0,
5700 le32_to_cpu(vb.vb_xv->xr_clusters), p);
5701 if (ret)
5702 mlog_errno(ret);
5704 out:
5705 brelse(ref_root_bh);
5706 return ret;
5710 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
5711 * The physical clusters will be added to refcount tree.
5713 static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
5714 struct ocfs2_xattr_value_root *xv,
5715 struct ocfs2_extent_tree *value_et,
5716 struct ocfs2_caching_info *ref_ci,
5717 struct buffer_head *ref_root_bh,
5718 struct ocfs2_cached_dealloc_ctxt *dealloc,
5719 struct ocfs2_post_refcount *refcount)
5721 int ret = 0;
5722 u32 clusters = le32_to_cpu(xv->xr_clusters);
5723 u32 cpos, p_cluster, num_clusters;
5724 struct ocfs2_extent_list *el = &xv->xr_list;
5725 unsigned int ext_flags;
5727 cpos = 0;
5728 while (cpos < clusters) {
5729 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
5730 &num_clusters, el, &ext_flags);
5732 cpos += num_clusters;
5733 if ((ext_flags & OCFS2_EXT_REFCOUNTED))
5734 continue;
5736 BUG_ON(!p_cluster);
5738 ret = ocfs2_add_refcount_flag(inode, value_et,
5739 ref_ci, ref_root_bh,
5740 cpos - num_clusters,
5741 p_cluster, num_clusters,
5742 dealloc, refcount);
5743 if (ret) {
5744 mlog_errno(ret);
5745 break;
5749 return ret;
5753 * Given a normal ocfs2_xattr_header, refcount all the entries which
5754 * have value stored outside.
5755 * Used for xattrs stored in inode and ocfs2_xattr_block.
5757 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
5758 struct ocfs2_xattr_value_buf *vb,
5759 struct ocfs2_xattr_header *header,
5760 struct ocfs2_caching_info *ref_ci,
5761 struct buffer_head *ref_root_bh,
5762 struct ocfs2_cached_dealloc_ctxt *dealloc)
5765 struct ocfs2_xattr_entry *xe;
5766 struct ocfs2_xattr_value_root *xv;
5767 struct ocfs2_extent_tree et;
5768 int i, ret = 0;
5770 for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
5771 xe = &header->xh_entries[i];
5773 if (ocfs2_xattr_is_local(xe))
5774 continue;
5776 xv = (struct ocfs2_xattr_value_root *)((void *)header +
5777 le16_to_cpu(xe->xe_name_offset) +
5778 OCFS2_XATTR_SIZE(xe->xe_name_len));
5780 vb->vb_xv = xv;
5781 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
5783 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
5784 ref_ci, ref_root_bh,
5785 dealloc, NULL);
5786 if (ret) {
5787 mlog_errno(ret);
5788 break;
5792 return ret;
5795 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
5796 struct buffer_head *fe_bh,
5797 struct ocfs2_caching_info *ref_ci,
5798 struct buffer_head *ref_root_bh,
5799 struct ocfs2_cached_dealloc_ctxt *dealloc)
5801 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
5802 struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
5803 (fe_bh->b_data + inode->i_sb->s_blocksize -
5804 le16_to_cpu(di->i_xattr_inline_size));
5805 struct ocfs2_xattr_value_buf vb = {
5806 .vb_bh = fe_bh,
5807 .vb_access = ocfs2_journal_access_di,
5810 return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
5811 ref_ci, ref_root_bh, dealloc);
5814 struct ocfs2_xattr_tree_value_refcount_para {
5815 struct ocfs2_caching_info *ref_ci;
5816 struct buffer_head *ref_root_bh;
5817 struct ocfs2_cached_dealloc_ctxt *dealloc;
5820 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
5821 struct ocfs2_xattr_bucket *bucket,
5822 int offset,
5823 struct ocfs2_xattr_value_root **xv,
5824 struct buffer_head **bh)
5826 int ret, block_off, name_offset;
5827 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5828 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
5829 void *base;
5831 ret = ocfs2_xattr_bucket_get_name_value(sb,
5832 bucket_xh(bucket),
5833 offset,
5834 &block_off,
5835 &name_offset);
5836 if (ret) {
5837 mlog_errno(ret);
5838 goto out;
5841 base = bucket_block(bucket, block_off);
5843 *xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
5844 OCFS2_XATTR_SIZE(xe->xe_name_len));
5846 if (bh)
5847 *bh = bucket->bu_bhs[block_off];
5848 out:
5849 return ret;
5853 * For a given xattr bucket, refcount all the entries which
5854 * have value stored outside.
5856 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
5857 struct ocfs2_xattr_bucket *bucket,
5858 void *para)
5860 int i, ret = 0;
5861 struct ocfs2_extent_tree et;
5862 struct ocfs2_xattr_tree_value_refcount_para *ref =
5863 (struct ocfs2_xattr_tree_value_refcount_para *)para;
5864 struct ocfs2_xattr_header *xh =
5865 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
5866 struct ocfs2_xattr_entry *xe;
5867 struct ocfs2_xattr_value_buf vb = {
5868 .vb_access = ocfs2_journal_access,
5870 struct ocfs2_post_refcount refcount = {
5871 .credits = bucket->bu_blocks,
5872 .para = bucket,
5873 .func = ocfs2_xattr_bucket_post_refcount,
5875 struct ocfs2_post_refcount *p = NULL;
5877 /* We only need post_refcount if we support metaecc. */
5878 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
5879 p = &refcount;
5881 mlog(0, "refcount bucket %llu, count = %u\n",
5882 (unsigned long long)bucket_blkno(bucket),
5883 le16_to_cpu(xh->xh_count));
5884 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5885 xe = &xh->xh_entries[i];
5887 if (ocfs2_xattr_is_local(xe))
5888 continue;
5890 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
5891 &vb.vb_xv, &vb.vb_bh);
5892 if (ret) {
5893 mlog_errno(ret);
5894 break;
5897 ocfs2_init_xattr_value_extent_tree(&et,
5898 INODE_CACHE(inode), &vb);
5900 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
5901 &et, ref->ref_ci,
5902 ref->ref_root_bh,
5903 ref->dealloc, p);
5904 if (ret) {
5905 mlog_errno(ret);
5906 break;
5910 return ret;
5914 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
5915 struct buffer_head *root_bh,
5916 u64 blkno, u32 cpos, u32 len, void *para)
5918 return ocfs2_iterate_xattr_buckets(inode, blkno, len,
5919 ocfs2_xattr_bucket_value_refcount,
5920 para);
5923 static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
5924 struct buffer_head *blk_bh,
5925 struct ocfs2_caching_info *ref_ci,
5926 struct buffer_head *ref_root_bh,
5927 struct ocfs2_cached_dealloc_ctxt *dealloc)
5929 int ret = 0;
5930 struct ocfs2_xattr_block *xb =
5931 (struct ocfs2_xattr_block *)blk_bh->b_data;
5933 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
5934 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
5935 struct ocfs2_xattr_value_buf vb = {
5936 .vb_bh = blk_bh,
5937 .vb_access = ocfs2_journal_access_xb,
5940 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
5941 ref_ci, ref_root_bh,
5942 dealloc);
5943 } else {
5944 struct ocfs2_xattr_tree_value_refcount_para para = {
5945 .ref_ci = ref_ci,
5946 .ref_root_bh = ref_root_bh,
5947 .dealloc = dealloc,
5950 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
5951 ocfs2_refcount_xattr_tree_rec,
5952 &para);
5955 return ret;
5958 int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
5959 struct buffer_head *fe_bh,
5960 struct ocfs2_caching_info *ref_ci,
5961 struct buffer_head *ref_root_bh,
5962 struct ocfs2_cached_dealloc_ctxt *dealloc)
5964 int ret = 0;
5965 struct ocfs2_inode_info *oi = OCFS2_I(inode);
5966 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
5967 struct buffer_head *blk_bh = NULL;
5969 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
5970 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
5971 ref_ci, ref_root_bh,
5972 dealloc);
5973 if (ret) {
5974 mlog_errno(ret);
5975 goto out;
5979 if (!di->i_xattr_loc)
5980 goto out;
5982 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
5983 &blk_bh);
5984 if (ret < 0) {
5985 mlog_errno(ret);
5986 goto out;
5989 ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
5990 ref_root_bh, dealloc);
5991 if (ret)
5992 mlog_errno(ret);
5994 brelse(blk_bh);
5995 out:
5997 return ret;
6000 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6002 * Store the information we need in xattr reflink.
6003 * old_bh and new_bh are inode bh for the old and new inode.
6005 struct ocfs2_xattr_reflink {
6006 struct inode *old_inode;
6007 struct inode *new_inode;
6008 struct buffer_head *old_bh;
6009 struct buffer_head *new_bh;
6010 struct ocfs2_caching_info *ref_ci;
6011 struct buffer_head *ref_root_bh;
6012 struct ocfs2_cached_dealloc_ctxt *dealloc;
6013 should_xattr_reflinked *xattr_reflinked;
6017 * Given a xattr header and xe offset,
6018 * return the proper xv and the corresponding bh.
6019 * xattr in inode, block and xattr tree have different implementaions.
6021 typedef int (get_xattr_value_root)(struct super_block *sb,
6022 struct buffer_head *bh,
6023 struct ocfs2_xattr_header *xh,
6024 int offset,
6025 struct ocfs2_xattr_value_root **xv,
6026 struct buffer_head **ret_bh,
6027 void *para);
6030 * Calculate all the xattr value root metadata stored in this xattr header and
6031 * credits we need if we create them from the scratch.
6032 * We use get_xattr_value_root so that all types of xattr container can use it.
6034 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6035 struct buffer_head *bh,
6036 struct ocfs2_xattr_header *xh,
6037 int *metas, int *credits,
6038 int *num_recs,
6039 get_xattr_value_root *func,
6040 void *para)
6042 int i, ret = 0;
6043 struct ocfs2_xattr_value_root *xv;
6044 struct ocfs2_xattr_entry *xe;
6046 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6047 xe = &xh->xh_entries[i];
6048 if (ocfs2_xattr_is_local(xe))
6049 continue;
6051 ret = func(sb, bh, xh, i, &xv, NULL, para);
6052 if (ret) {
6053 mlog_errno(ret);
6054 break;
6057 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6058 le16_to_cpu(xv->xr_list.l_next_free_rec);
6060 *credits += ocfs2_calc_extend_credits(sb,
6061 &def_xv.xv.xr_list,
6062 le32_to_cpu(xv->xr_clusters));
6065 * If the value is a tree with depth > 1, We don't go deep
6066 * to the extent block, so just calculate a maximum record num.
6068 if (!xv->xr_list.l_tree_depth)
6069 *num_recs += xv->xr_list.l_next_free_rec;
6070 else
6071 *num_recs += ocfs2_clusters_for_bytes(sb,
6072 XATTR_SIZE_MAX);
6075 return ret;
6078 /* Used by xattr inode and block to return the right xv and buffer_head. */
6079 static int ocfs2_get_xattr_value_root(struct super_block *sb,
6080 struct buffer_head *bh,
6081 struct ocfs2_xattr_header *xh,
6082 int offset,
6083 struct ocfs2_xattr_value_root **xv,
6084 struct buffer_head **ret_bh,
6085 void *para)
6087 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6089 *xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6090 le16_to_cpu(xe->xe_name_offset) +
6091 OCFS2_XATTR_SIZE(xe->xe_name_len));
6093 if (ret_bh)
6094 *ret_bh = bh;
6096 return 0;
6100 * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
6101 * It is only used for inline xattr and xattr block.
6103 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6104 struct ocfs2_xattr_header *xh,
6105 struct buffer_head *ref_root_bh,
6106 int *credits,
6107 struct ocfs2_alloc_context **meta_ac)
6109 int ret, meta_add = 0, num_recs = 0;
6110 struct ocfs2_refcount_block *rb =
6111 (struct ocfs2_refcount_block *)ref_root_bh->b_data;
6113 *credits = 0;
6115 ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6116 &meta_add, credits, &num_recs,
6117 ocfs2_get_xattr_value_root,
6118 NULL);
6119 if (ret) {
6120 mlog_errno(ret);
6121 goto out;
6125 * We need to add/modify num_recs in refcount tree, so just calculate
6126 * an approximate number we need for refcount tree change.
6127 * Sometimes we need to split the tree, and after split, half recs
6128 * will be moved to the new block, and a new block can only provide
6129 * half number of recs. So we multiple new blocks by 2.
6131 num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6132 meta_add += num_recs;
6133 *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6134 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6135 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6136 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6137 else
6138 *credits += 1;
6140 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6141 if (ret)
6142 mlog_errno(ret);
6144 out:
6145 return ret;
6149 * Given a xattr header, reflink all the xattrs in this container.
6150 * It can be used for inode, block and bucket.
6152 * NOTE:
6153 * Before we call this function, the caller has memcpy the xattr in
6154 * old_xh to the new_xh.
6156 * If args.xattr_reflinked is set, call it to decide whether the xe should
6157 * be reflinked or not. If not, remove it from the new xattr header.
6159 static int ocfs2_reflink_xattr_header(handle_t *handle,
6160 struct ocfs2_xattr_reflink *args,
6161 struct buffer_head *old_bh,
6162 struct ocfs2_xattr_header *xh,
6163 struct buffer_head *new_bh,
6164 struct ocfs2_xattr_header *new_xh,
6165 struct ocfs2_xattr_value_buf *vb,
6166 struct ocfs2_alloc_context *meta_ac,
6167 get_xattr_value_root *func,
6168 void *para)
6170 int ret = 0, i, j;
6171 struct super_block *sb = args->old_inode->i_sb;
6172 struct buffer_head *value_bh;
6173 struct ocfs2_xattr_entry *xe, *last;
6174 struct ocfs2_xattr_value_root *xv, *new_xv;
6175 struct ocfs2_extent_tree data_et;
6176 u32 clusters, cpos, p_cluster, num_clusters;
6177 unsigned int ext_flags = 0;
6179 mlog(0, "reflink xattr in container %llu, count = %u\n",
6180 (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count));
6182 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6183 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6184 xe = &xh->xh_entries[i];
6186 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6187 xe = &new_xh->xh_entries[j];
6189 le16_add_cpu(&new_xh->xh_count, -1);
6190 if (new_xh->xh_count) {
6191 memmove(xe, xe + 1,
6192 (void *)last - (void *)xe);
6193 memset(last, 0,
6194 sizeof(struct ocfs2_xattr_entry));
6198 * We don't want j to increase in the next round since
6199 * it is already moved ahead.
6201 j--;
6202 continue;
6205 if (ocfs2_xattr_is_local(xe))
6206 continue;
6208 ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6209 if (ret) {
6210 mlog_errno(ret);
6211 break;
6214 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6215 if (ret) {
6216 mlog_errno(ret);
6217 break;
6221 * For the xattr which has l_tree_depth = 0, all the extent
6222 * recs have already be copied to the new xh with the
6223 * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6224 * increase the refount count int the refcount tree.
6226 * For the xattr which has l_tree_depth > 0, we need
6227 * to initialize it to the empty default value root,
6228 * and then insert the extents one by one.
6230 if (xv->xr_list.l_tree_depth) {
6231 memcpy(new_xv, &def_xv, sizeof(def_xv));
6232 vb->vb_xv = new_xv;
6233 vb->vb_bh = value_bh;
6234 ocfs2_init_xattr_value_extent_tree(&data_et,
6235 INODE_CACHE(args->new_inode), vb);
6238 clusters = le32_to_cpu(xv->xr_clusters);
6239 cpos = 0;
6240 while (cpos < clusters) {
6241 ret = ocfs2_xattr_get_clusters(args->old_inode,
6242 cpos,
6243 &p_cluster,
6244 &num_clusters,
6245 &xv->xr_list,
6246 &ext_flags);
6247 if (ret) {
6248 mlog_errno(ret);
6249 goto out;
6252 BUG_ON(!p_cluster);
6254 if (xv->xr_list.l_tree_depth) {
6255 ret = ocfs2_insert_extent(handle,
6256 &data_et, cpos,
6257 ocfs2_clusters_to_blocks(
6258 args->old_inode->i_sb,
6259 p_cluster),
6260 num_clusters, ext_flags,
6261 meta_ac);
6262 if (ret) {
6263 mlog_errno(ret);
6264 goto out;
6268 ret = ocfs2_increase_refcount(handle, args->ref_ci,
6269 args->ref_root_bh,
6270 p_cluster, num_clusters,
6271 meta_ac, args->dealloc);
6272 if (ret) {
6273 mlog_errno(ret);
6274 goto out;
6277 cpos += num_clusters;
6281 out:
6282 return ret;
6285 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6287 int ret = 0, credits = 0;
6288 handle_t *handle;
6289 struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6290 struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6291 int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6292 int header_off = osb->sb->s_blocksize - inline_size;
6293 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6294 (args->old_bh->b_data + header_off);
6295 struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6296 (args->new_bh->b_data + header_off);
6297 struct ocfs2_alloc_context *meta_ac = NULL;
6298 struct ocfs2_inode_info *new_oi;
6299 struct ocfs2_dinode *new_di;
6300 struct ocfs2_xattr_value_buf vb = {
6301 .vb_bh = args->new_bh,
6302 .vb_access = ocfs2_journal_access_di,
6305 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6306 &credits, &meta_ac);
6307 if (ret) {
6308 mlog_errno(ret);
6309 goto out;
6312 handle = ocfs2_start_trans(osb, credits);
6313 if (IS_ERR(handle)) {
6314 ret = PTR_ERR(handle);
6315 mlog_errno(ret);
6316 goto out;
6319 ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6320 args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6321 if (ret) {
6322 mlog_errno(ret);
6323 goto out_commit;
6326 memcpy(args->new_bh->b_data + header_off,
6327 args->old_bh->b_data + header_off, inline_size);
6329 new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6330 new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6332 ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6333 args->new_bh, new_xh, &vb, meta_ac,
6334 ocfs2_get_xattr_value_root, NULL);
6335 if (ret) {
6336 mlog_errno(ret);
6337 goto out_commit;
6340 new_oi = OCFS2_I(args->new_inode);
6341 spin_lock(&new_oi->ip_lock);
6342 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6343 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6344 spin_unlock(&new_oi->ip_lock);
6346 ocfs2_journal_dirty(handle, args->new_bh);
6348 out_commit:
6349 ocfs2_commit_trans(osb, handle);
6351 out:
6352 if (meta_ac)
6353 ocfs2_free_alloc_context(meta_ac);
6354 return ret;
6357 static int ocfs2_create_empty_xattr_block(struct inode *inode,
6358 struct buffer_head *fe_bh,
6359 struct buffer_head **ret_bh,
6360 int indexed)
6362 int ret;
6363 handle_t *handle;
6364 struct ocfs2_alloc_context *meta_ac;
6365 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6367 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
6368 if (ret < 0) {
6369 mlog_errno(ret);
6370 return ret;
6373 handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6374 if (IS_ERR(handle)) {
6375 ret = PTR_ERR(handle);
6376 mlog_errno(ret);
6377 goto out;
6380 mlog(0, "create new xattr block for inode %llu, index = %d\n",
6381 (unsigned long long)fe_bh->b_blocknr, indexed);
6382 ret = ocfs2_create_xattr_block(handle, inode, fe_bh,
6383 meta_ac, ret_bh, indexed);
6384 if (ret)
6385 mlog_errno(ret);
6387 ocfs2_commit_trans(osb, handle);
6388 out:
6389 ocfs2_free_alloc_context(meta_ac);
6390 return ret;
6393 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6394 struct buffer_head *blk_bh,
6395 struct buffer_head *new_blk_bh)
6397 int ret = 0, credits = 0;
6398 handle_t *handle;
6399 struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6400 struct ocfs2_dinode *new_di;
6401 struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6402 int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6403 struct ocfs2_xattr_block *xb =
6404 (struct ocfs2_xattr_block *)blk_bh->b_data;
6405 struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6406 struct ocfs2_xattr_block *new_xb =
6407 (struct ocfs2_xattr_block *)new_blk_bh->b_data;
6408 struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6409 struct ocfs2_alloc_context *meta_ac;
6410 struct ocfs2_xattr_value_buf vb = {
6411 .vb_bh = new_blk_bh,
6412 .vb_access = ocfs2_journal_access_xb,
6415 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6416 &credits, &meta_ac);
6417 if (ret) {
6418 mlog_errno(ret);
6419 return ret;
6422 /* One more credits in case we need to add xattr flags in new inode. */
6423 handle = ocfs2_start_trans(osb, credits + 1);
6424 if (IS_ERR(handle)) {
6425 ret = PTR_ERR(handle);
6426 mlog_errno(ret);
6427 goto out;
6430 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6431 ret = ocfs2_journal_access_di(handle,
6432 INODE_CACHE(args->new_inode),
6433 args->new_bh,
6434 OCFS2_JOURNAL_ACCESS_WRITE);
6435 if (ret) {
6436 mlog_errno(ret);
6437 goto out_commit;
6441 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6442 new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6443 if (ret) {
6444 mlog_errno(ret);
6445 goto out_commit;
6448 memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6449 osb->sb->s_blocksize - header_off);
6451 ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6452 new_blk_bh, new_xh, &vb, meta_ac,
6453 ocfs2_get_xattr_value_root, NULL);
6454 if (ret) {
6455 mlog_errno(ret);
6456 goto out_commit;
6459 ocfs2_journal_dirty(handle, new_blk_bh);
6461 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6462 new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6463 spin_lock(&new_oi->ip_lock);
6464 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6465 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6466 spin_unlock(&new_oi->ip_lock);
6468 ocfs2_journal_dirty(handle, args->new_bh);
6471 out_commit:
6472 ocfs2_commit_trans(osb, handle);
6474 out:
6475 ocfs2_free_alloc_context(meta_ac);
6476 return ret;
6479 struct ocfs2_reflink_xattr_tree_args {
6480 struct ocfs2_xattr_reflink *reflink;
6481 struct buffer_head *old_blk_bh;
6482 struct buffer_head *new_blk_bh;
6483 struct ocfs2_xattr_bucket *old_bucket;
6484 struct ocfs2_xattr_bucket *new_bucket;
6488 * NOTE:
6489 * We have to handle the case that both old bucket and new bucket
6490 * will call this function to get the right ret_bh.
6491 * So The caller must give us the right bh.
6493 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6494 struct buffer_head *bh,
6495 struct ocfs2_xattr_header *xh,
6496 int offset,
6497 struct ocfs2_xattr_value_root **xv,
6498 struct buffer_head **ret_bh,
6499 void *para)
6501 struct ocfs2_reflink_xattr_tree_args *args =
6502 (struct ocfs2_reflink_xattr_tree_args *)para;
6503 struct ocfs2_xattr_bucket *bucket;
6505 if (bh == args->old_bucket->bu_bhs[0])
6506 bucket = args->old_bucket;
6507 else
6508 bucket = args->new_bucket;
6510 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6511 xv, ret_bh);
6514 struct ocfs2_value_tree_metas {
6515 int num_metas;
6516 int credits;
6517 int num_recs;
6520 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6521 struct buffer_head *bh,
6522 struct ocfs2_xattr_header *xh,
6523 int offset,
6524 struct ocfs2_xattr_value_root **xv,
6525 struct buffer_head **ret_bh,
6526 void *para)
6528 struct ocfs2_xattr_bucket *bucket =
6529 (struct ocfs2_xattr_bucket *)para;
6531 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6532 xv, ret_bh);
6535 static int ocfs2_calc_value_tree_metas(struct inode *inode,
6536 struct ocfs2_xattr_bucket *bucket,
6537 void *para)
6539 struct ocfs2_value_tree_metas *metas =
6540 (struct ocfs2_value_tree_metas *)para;
6541 struct ocfs2_xattr_header *xh =
6542 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6544 /* Add the credits for this bucket first. */
6545 metas->credits += bucket->bu_blocks;
6546 return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6547 xh, &metas->num_metas,
6548 &metas->credits, &metas->num_recs,
6549 ocfs2_value_tree_metas_in_bucket,
6550 bucket);
6554 * Given a xattr extent rec starting from blkno and having len clusters,
6555 * iterate all the buckets calculate how much metadata we need for reflinking
6556 * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6558 static int ocfs2_lock_reflink_xattr_rec_allocators(
6559 struct ocfs2_reflink_xattr_tree_args *args,
6560 struct ocfs2_extent_tree *xt_et,
6561 u64 blkno, u32 len, int *credits,
6562 struct ocfs2_alloc_context **meta_ac,
6563 struct ocfs2_alloc_context **data_ac)
6565 int ret, num_free_extents;
6566 struct ocfs2_value_tree_metas metas;
6567 struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6568 struct ocfs2_refcount_block *rb;
6570 memset(&metas, 0, sizeof(metas));
6572 ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6573 ocfs2_calc_value_tree_metas, &metas);
6574 if (ret) {
6575 mlog_errno(ret);
6576 goto out;
6579 *credits = metas.credits;
6582 * Calculate we need for refcount tree change.
6584 * We need to add/modify num_recs in refcount tree, so just calculate
6585 * an approximate number we need for refcount tree change.
6586 * Sometimes we need to split the tree, and after split, half recs
6587 * will be moved to the new block, and a new block can only provide
6588 * half number of recs. So we multiple new blocks by 2.
6589 * In the end, we have to add credits for modifying the already
6590 * existed refcount block.
6592 rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6593 metas.num_recs =
6594 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6595 ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6596 metas.num_metas += metas.num_recs;
6597 *credits += metas.num_recs +
6598 metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6599 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6600 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6601 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6602 else
6603 *credits += 1;
6605 /* count in the xattr tree change. */
6606 num_free_extents = ocfs2_num_free_extents(osb, xt_et);
6607 if (num_free_extents < 0) {
6608 ret = num_free_extents;
6609 mlog_errno(ret);
6610 goto out;
6613 if (num_free_extents < len)
6614 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
6616 *credits += ocfs2_calc_extend_credits(osb->sb,
6617 xt_et->et_root_el, len);
6619 if (metas.num_metas) {
6620 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
6621 meta_ac);
6622 if (ret) {
6623 mlog_errno(ret);
6624 goto out;
6628 if (len) {
6629 ret = ocfs2_reserve_clusters(osb, len, data_ac);
6630 if (ret)
6631 mlog_errno(ret);
6633 out:
6634 if (ret) {
6635 if (*meta_ac) {
6636 ocfs2_free_alloc_context(*meta_ac);
6637 meta_ac = NULL;
6641 return ret;
6644 static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6645 u64 blkno, u64 new_blkno, u32 clusters,
6646 struct ocfs2_alloc_context *meta_ac,
6647 struct ocfs2_alloc_context *data_ac,
6648 struct ocfs2_reflink_xattr_tree_args *args)
6650 int i, j, ret = 0;
6651 struct super_block *sb = args->reflink->old_inode->i_sb;
6652 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
6653 u32 num_buckets = clusters * bpc;
6654 int bpb = args->old_bucket->bu_blocks;
6655 struct ocfs2_xattr_value_buf vb = {
6656 .vb_access = ocfs2_journal_access,
6659 for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
6660 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6661 if (ret) {
6662 mlog_errno(ret);
6663 break;
6666 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno);
6667 if (ret) {
6668 mlog_errno(ret);
6669 break;
6673 * The real bucket num in this series of blocks is stored
6674 * in the 1st bucket.
6676 if (i == 0)
6677 num_buckets = le16_to_cpu(
6678 bucket_xh(args->old_bucket)->xh_num_buckets);
6680 ret = ocfs2_xattr_bucket_journal_access(handle,
6681 args->new_bucket,
6682 OCFS2_JOURNAL_ACCESS_CREATE);
6683 if (ret) {
6684 mlog_errno(ret);
6685 break;
6688 for (j = 0; j < bpb; j++)
6689 memcpy(bucket_block(args->new_bucket, j),
6690 bucket_block(args->old_bucket, j),
6691 sb->s_blocksize);
6693 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6695 ret = ocfs2_reflink_xattr_header(handle, args->reflink,
6696 args->old_bucket->bu_bhs[0],
6697 bucket_xh(args->old_bucket),
6698 args->new_bucket->bu_bhs[0],
6699 bucket_xh(args->new_bucket),
6700 &vb, meta_ac,
6701 ocfs2_get_reflink_xattr_value_root,
6702 args);
6703 if (ret) {
6704 mlog_errno(ret);
6705 break;
6709 * Re-access and dirty the bucket to calculate metaecc.
6710 * Because we may extend the transaction in reflink_xattr_header
6711 * which will let the already accessed block gone.
6713 ret = ocfs2_xattr_bucket_journal_access(handle,
6714 args->new_bucket,
6715 OCFS2_JOURNAL_ACCESS_WRITE);
6716 if (ret) {
6717 mlog_errno(ret);
6718 break;
6721 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6722 ocfs2_xattr_bucket_relse(args->old_bucket);
6723 ocfs2_xattr_bucket_relse(args->new_bucket);
6726 ocfs2_xattr_bucket_relse(args->old_bucket);
6727 ocfs2_xattr_bucket_relse(args->new_bucket);
6728 return ret;
6731 * Create the same xattr extent record in the new inode's xattr tree.
6733 static int ocfs2_reflink_xattr_rec(struct inode *inode,
6734 struct buffer_head *root_bh,
6735 u64 blkno,
6736 u32 cpos,
6737 u32 len,
6738 void *para)
6740 int ret, credits = 0;
6741 u32 p_cluster, num_clusters;
6742 u64 new_blkno;
6743 handle_t *handle;
6744 struct ocfs2_reflink_xattr_tree_args *args =
6745 (struct ocfs2_reflink_xattr_tree_args *)para;
6746 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6747 struct ocfs2_alloc_context *meta_ac = NULL;
6748 struct ocfs2_alloc_context *data_ac = NULL;
6749 struct ocfs2_extent_tree et;
6751 ocfs2_init_xattr_tree_extent_tree(&et,
6752 INODE_CACHE(args->reflink->new_inode),
6753 args->new_blk_bh);
6755 ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
6756 len, &credits,
6757 &meta_ac, &data_ac);
6758 if (ret) {
6759 mlog_errno(ret);
6760 goto out;
6763 handle = ocfs2_start_trans(osb, credits);
6764 if (IS_ERR(handle)) {
6765 ret = PTR_ERR(handle);
6766 mlog_errno(ret);
6767 goto out;
6770 ret = ocfs2_claim_clusters(osb, handle, data_ac,
6771 len, &p_cluster, &num_clusters);
6772 if (ret) {
6773 mlog_errno(ret);
6774 goto out_commit;
6777 new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
6779 mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
6780 (unsigned long long)blkno, (unsigned long long)new_blkno, len);
6781 ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
6782 meta_ac, data_ac, args);
6783 if (ret) {
6784 mlog_errno(ret);
6785 goto out_commit;
6788 mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
6789 (unsigned long long)new_blkno, len, cpos);
6790 ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
6791 len, 0, meta_ac);
6792 if (ret)
6793 mlog_errno(ret);
6795 out_commit:
6796 ocfs2_commit_trans(osb, handle);
6798 out:
6799 if (meta_ac)
6800 ocfs2_free_alloc_context(meta_ac);
6801 if (data_ac)
6802 ocfs2_free_alloc_context(data_ac);
6803 return ret;
6807 * Create reflinked xattr buckets.
6808 * We will add bucket one by one, and refcount all the xattrs in the bucket
6809 * if they are stored outside.
6811 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
6812 struct buffer_head *blk_bh,
6813 struct buffer_head *new_blk_bh)
6815 int ret;
6816 struct ocfs2_reflink_xattr_tree_args para;
6818 memset(&para, 0, sizeof(para));
6819 para.reflink = args;
6820 para.old_blk_bh = blk_bh;
6821 para.new_blk_bh = new_blk_bh;
6823 para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
6824 if (!para.old_bucket) {
6825 mlog_errno(-ENOMEM);
6826 return -ENOMEM;
6829 para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
6830 if (!para.new_bucket) {
6831 ret = -ENOMEM;
6832 mlog_errno(ret);
6833 goto out;
6836 ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
6837 ocfs2_reflink_xattr_rec,
6838 &para);
6839 if (ret)
6840 mlog_errno(ret);
6842 out:
6843 ocfs2_xattr_bucket_free(para.old_bucket);
6844 ocfs2_xattr_bucket_free(para.new_bucket);
6845 return ret;
6848 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
6849 struct buffer_head *blk_bh)
6851 int ret, indexed = 0;
6852 struct buffer_head *new_blk_bh = NULL;
6853 struct ocfs2_xattr_block *xb =
6854 (struct ocfs2_xattr_block *)blk_bh->b_data;
6857 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
6858 indexed = 1;
6860 ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
6861 &new_blk_bh, indexed);
6862 if (ret) {
6863 mlog_errno(ret);
6864 goto out;
6867 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED))
6868 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
6869 else
6870 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
6871 if (ret)
6872 mlog_errno(ret);
6874 out:
6875 brelse(new_blk_bh);
6876 return ret;
6879 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
6881 int type = ocfs2_xattr_get_type(xe);
6883 return type != OCFS2_XATTR_INDEX_SECURITY &&
6884 type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
6885 type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
6888 int ocfs2_reflink_xattrs(struct inode *old_inode,
6889 struct buffer_head *old_bh,
6890 struct inode *new_inode,
6891 struct buffer_head *new_bh,
6892 bool preserve_security)
6894 int ret;
6895 struct ocfs2_xattr_reflink args;
6896 struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
6897 struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
6898 struct buffer_head *blk_bh = NULL;
6899 struct ocfs2_cached_dealloc_ctxt dealloc;
6900 struct ocfs2_refcount_tree *ref_tree;
6901 struct buffer_head *ref_root_bh = NULL;
6903 ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
6904 le64_to_cpu(di->i_refcount_loc),
6905 1, &ref_tree, &ref_root_bh);
6906 if (ret) {
6907 mlog_errno(ret);
6908 goto out;
6911 ocfs2_init_dealloc_ctxt(&dealloc);
6913 args.old_inode = old_inode;
6914 args.new_inode = new_inode;
6915 args.old_bh = old_bh;
6916 args.new_bh = new_bh;
6917 args.ref_ci = &ref_tree->rf_ci;
6918 args.ref_root_bh = ref_root_bh;
6919 args.dealloc = &dealloc;
6920 if (preserve_security)
6921 args.xattr_reflinked = NULL;
6922 else
6923 args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
6925 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6926 ret = ocfs2_reflink_xattr_inline(&args);
6927 if (ret) {
6928 mlog_errno(ret);
6929 goto out_unlock;
6933 if (!di->i_xattr_loc)
6934 goto out_unlock;
6936 ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
6937 &blk_bh);
6938 if (ret < 0) {
6939 mlog_errno(ret);
6940 goto out_unlock;
6943 ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
6944 if (ret)
6945 mlog_errno(ret);
6947 brelse(blk_bh);
6949 out_unlock:
6950 ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
6951 ref_tree, 1);
6952 brelse(ref_root_bh);
6954 if (ocfs2_dealloc_has_cluster(&dealloc)) {
6955 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
6956 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
6959 out:
6960 return ret;
6964 * Initialize security and acl for a already created inode.
6965 * Used for reflink a non-preserve-security file.
6967 * It uses common api like ocfs2_xattr_set, so the caller
6968 * must not hold any lock expect i_mutex.
6970 int ocfs2_init_security_and_acl(struct inode *dir,
6971 struct inode *inode)
6973 int ret = 0;
6974 struct buffer_head *dir_bh = NULL;
6975 struct ocfs2_security_xattr_info si = {
6976 .enable = 1,
6979 ret = ocfs2_init_security_get(inode, dir, &si);
6980 if (!ret) {
6981 ret = ocfs2_xattr_security_set(inode, si.name,
6982 si.value, si.value_len,
6983 XATTR_CREATE);
6984 if (ret) {
6985 mlog_errno(ret);
6986 goto leave;
6988 } else if (ret != -EOPNOTSUPP) {
6989 mlog_errno(ret);
6990 goto leave;
6993 ret = ocfs2_inode_lock(dir, &dir_bh, 0);
6994 if (ret) {
6995 mlog_errno(ret);
6996 goto leave;
6999 ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
7000 if (ret)
7001 mlog_errno(ret);
7003 ocfs2_inode_unlock(dir, 0);
7004 brelse(dir_bh);
7005 leave:
7006 return ret;
7009 * 'security' attributes support
7011 static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
7012 size_t list_size, const char *name,
7013 size_t name_len)
7015 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
7016 const size_t total_len = prefix_len + name_len + 1;
7018 if (list && total_len <= list_size) {
7019 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
7020 memcpy(list + prefix_len, name, name_len);
7021 list[prefix_len + name_len] = '\0';
7023 return total_len;
7026 static int ocfs2_xattr_security_get(struct inode *inode, const char *name,
7027 void *buffer, size_t size)
7029 if (strcmp(name, "") == 0)
7030 return -EINVAL;
7031 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name,
7032 buffer, size);
7035 static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
7036 const void *value, size_t size, int flags)
7038 if (strcmp(name, "") == 0)
7039 return -EINVAL;
7041 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value,
7042 size, flags);
7045 int ocfs2_init_security_get(struct inode *inode,
7046 struct inode *dir,
7047 struct ocfs2_security_xattr_info *si)
7049 /* check whether ocfs2 support feature xattr */
7050 if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7051 return -EOPNOTSUPP;
7052 return security_inode_init_security(inode, dir, &si->name, &si->value,
7053 &si->value_len);
7056 int ocfs2_init_security_set(handle_t *handle,
7057 struct inode *inode,
7058 struct buffer_head *di_bh,
7059 struct ocfs2_security_xattr_info *si,
7060 struct ocfs2_alloc_context *xattr_ac,
7061 struct ocfs2_alloc_context *data_ac)
7063 return ocfs2_xattr_set_handle(handle, inode, di_bh,
7064 OCFS2_XATTR_INDEX_SECURITY,
7065 si->name, si->value, si->value_len, 0,
7066 xattr_ac, data_ac);
7069 struct xattr_handler ocfs2_xattr_security_handler = {
7070 .prefix = XATTR_SECURITY_PREFIX,
7071 .list = ocfs2_xattr_security_list,
7072 .get = ocfs2_xattr_security_get,
7073 .set = ocfs2_xattr_security_set,
7077 * 'trusted' attributes support
7079 static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
7080 size_t list_size, const char *name,
7081 size_t name_len)
7083 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
7084 const size_t total_len = prefix_len + name_len + 1;
7086 if (list && total_len <= list_size) {
7087 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
7088 memcpy(list + prefix_len, name, name_len);
7089 list[prefix_len + name_len] = '\0';
7091 return total_len;
7094 static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name,
7095 void *buffer, size_t size)
7097 if (strcmp(name, "") == 0)
7098 return -EINVAL;
7099 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name,
7100 buffer, size);
7103 static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name,
7104 const void *value, size_t size, int flags)
7106 if (strcmp(name, "") == 0)
7107 return -EINVAL;
7109 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value,
7110 size, flags);
7113 struct xattr_handler ocfs2_xattr_trusted_handler = {
7114 .prefix = XATTR_TRUSTED_PREFIX,
7115 .list = ocfs2_xattr_trusted_list,
7116 .get = ocfs2_xattr_trusted_get,
7117 .set = ocfs2_xattr_trusted_set,
7121 * 'user' attributes support
7123 static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
7124 size_t list_size, const char *name,
7125 size_t name_len)
7127 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
7128 const size_t total_len = prefix_len + name_len + 1;
7129 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7131 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7132 return 0;
7134 if (list && total_len <= list_size) {
7135 memcpy(list, XATTR_USER_PREFIX, prefix_len);
7136 memcpy(list + prefix_len, name, name_len);
7137 list[prefix_len + name_len] = '\0';
7139 return total_len;
7142 static int ocfs2_xattr_user_get(struct inode *inode, const char *name,
7143 void *buffer, size_t size)
7145 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7147 if (strcmp(name, "") == 0)
7148 return -EINVAL;
7149 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7150 return -EOPNOTSUPP;
7151 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
7152 buffer, size);
7155 static int ocfs2_xattr_user_set(struct inode *inode, const char *name,
7156 const void *value, size_t size, int flags)
7158 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
7160 if (strcmp(name, "") == 0)
7161 return -EINVAL;
7162 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7163 return -EOPNOTSUPP;
7165 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value,
7166 size, flags);
7169 struct xattr_handler ocfs2_xattr_user_handler = {
7170 .prefix = XATTR_USER_PREFIX,
7171 .list = ocfs2_xattr_user_list,
7172 .get = ocfs2_xattr_user_get,
7173 .set = ocfs2_xattr_user_set,