container quota patch queue v0.07
[ct-quota-pq.git] / ext4-add-directory-tree-2.6.32-rc5.patch
blob5e18fed08a8a897a1c6b3b3eb483dc748cf5239e
1 ext4: Add directory tree id support
3 This patch add directory tree id support it has mach in common with
4 project-id in XFS. One may assign some id to a fs hierarchy,
5 most probably for full subtree. Each entry from the hierarchy
6 may be accounted in directory tree quota. Will appear in later patches.
8 Assumptions:
9 (1) Tree-id is embeded inside inode
10 (2) Tree-id is inherented from parent dir on inode create
11 (3) Inode can not site in different directory tries
13 New incompat feature EXT4_FEATURE_INCOMPAT_TREEID
14 mount option: "treeid"
15 mount flag: EXT4_MOUNT_TREEID
16 new ext4_inode field: i_tree
17 ioctl EXT4_IOC_GET_TREEID / EXT4_IOC_SET_TREEID
18 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
19 index 984ca0c..bdea63e 100644
20 --- a/fs/ext4/ext4.h
21 +++ b/fs/ext4/ext4.h
22 @@ -395,6 +395,8 @@ struct ext4_new_group_data {
23 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
24 #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
25 #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
26 +#define EXT4_IOC_GET_TREEID _IOR('f', 16, unsigned int)
27 +#define EXT4_IOC_SET_TREEID _IOW('f', 17, unsigned int)
30 * ioctl commands in 32 bit emulation
31 @@ -491,6 +493,7 @@ struct ext4_inode {
32 __le32 i_crtime; /* File Creation time */
33 __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
34 __le32 i_version_hi; /* high 32 bits for 64-bit version */
35 + __le32 i_tree; /* directory tree quota id */
38 struct move_extent {
39 @@ -702,6 +705,7 @@ struct ext4_inode_info {
40 struct list_head i_aio_dio_complete_list;
41 /* current io_end structure for async DIO write*/
42 ext4_io_end_t *cur_aio_dio;
43 + unsigned int i_tree; /* directory tree quota id */
47 @@ -748,6 +752,7 @@ struct ext4_inode_info {
48 #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
49 #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
50 #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
51 +#define EXT4_MOUNT_TREEID 0x40000000 /* Dirtectory tree id */
53 #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
54 #define set_opt(o, opt) o |= EXT4_MOUNT_##opt
55 @@ -1107,6 +1112,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
56 #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
57 #define EXT4_FEATURE_INCOMPAT_MMP 0x0100
58 #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
59 +#define EXT4_FEATURE_INCOMPAT_TREEID 0x0400
61 #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
62 #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
63 @@ -1114,7 +1120,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
64 EXT4_FEATURE_INCOMPAT_META_BG| \
65 EXT4_FEATURE_INCOMPAT_EXTENTS| \
66 EXT4_FEATURE_INCOMPAT_64BIT| \
67 - EXT4_FEATURE_INCOMPAT_FLEX_BG)
68 + EXT4_FEATURE_INCOMPAT_FLEX_BG| \
69 + EXT4_FEATURE_INCOMPAT_TREEID)
70 #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
71 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
72 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
73 @@ -1682,6 +1689,37 @@ static inline void ext4_unlock_group(struct super_block *sb,
75 spin_unlock(ext4_group_lock_ptr(sb, group));
77 +/**
78 + Directory tree-id assumptions:
79 + (1) Tree id is embeded inside inode
80 + (2) Tree id is inherented from parent dir
81 + (3) Inode can not belongs to different directory trees
83 + Default directory tree (with id == 0) has special meaning.
84 + directory which belongs to default tree may contains entries with
85 + other trees. It may be used for subtree manipulation.
86 + */
87 +enum {
88 + EXT4_TREEID_SAME = 1,
89 + EXT4_TREEID_COMMON,
90 + EXT4_TREEID_CROSS,
91 +};
92 +/**
93 + * Check directory tree id assumptions:
94 + * @dstino: parent directory inode
95 + * Return:
96 + * EXT4_TREEID_COMMON: Destanation dir belongs to default tree
97 + * EXT4_TREEID_SAME: Both belongs to same tree
98 + * EXT4_TREEID_CROSS: Inodes belongs to different trees
99 + */
100 +static inline int ext4_is_same_tree(struct inode *dstino, struct inode *inode)
102 + if ((EXT4_I(inode)->i_tree == EXT4_I(dstino)->i_tree))
103 + return EXT4_TREEID_SAME;
104 + else if ((EXT4_I(dstino)->i_tree == 0))
105 + return EXT4_TREEID_COMMON;
106 + return EXT4_TREEID_CROSS;
110 * Inodes and files operations
111 diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
112 index f3624ea..d20c228 100644
113 --- a/fs/ext4/ialloc.c
114 +++ b/fs/ext4/ialloc.c
115 @@ -1013,6 +1013,9 @@ got:
117 ei->i_flags =
118 ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
119 + if (test_opt(inode->i_sb, TREEID))
120 + ei->i_tree = EXT4_I(dir)->i_tree;
122 ei->i_file_acl = 0;
123 ei->i_dtime = 0;
124 ei->i_block_group = group;
125 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
126 index 0a0b986..c7ffc7b 100644
127 --- a/fs/ext4/inode.c
128 +++ b/fs/ext4/inode.c
129 @@ -4876,6 +4876,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
130 (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
133 + if (test_opt(inode->i_sb, TREEID))
134 + ei->i_tree = le32_to_cpu(raw_inode->i_tree);
136 ret = 0;
137 if (ei->i_file_acl &&
138 ((ei->i_file_acl <
139 @@ -5033,6 +5036,9 @@ static int ext4_do_update_inode(handle_t *handle,
140 raw_inode->i_uid_high = 0;
141 raw_inode->i_gid_high = 0;
143 + if (test_opt(inode->i_sb, TREEID))
144 + raw_inode->i_tree = cpu_to_le32(ei->i_tree);
146 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
148 EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
149 diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
150 index c1cdf61..135e16c 100644
151 --- a/fs/ext4/ioctl.c
152 +++ b/fs/ext4/ioctl.c
153 @@ -164,6 +164,85 @@ setversion_out:
154 mnt_drop_write(filp->f_path.mnt);
155 return err;
157 + case EXT4_IOC_GET_TREEID:
158 + if (test_opt(inode->i_sb, TREEID))
159 + return put_user(ei->i_tree, (unsigned int __user*) arg);
160 + else
161 + return -EINVAL;
162 + case EXT4_IOC_SET_TREEID: {
163 + int err;
164 + struct ext4_iloc iloc;
165 + unsigned int treeid;
166 + handle_t *handle = NULL;
167 + struct inode *dir = IS_ROOT(filp->f_dentry) ?
168 + NULL : filp->f_dentry->d_parent->d_inode;
170 + if (!test_opt(inode->i_sb, TREEID))
171 + return -EINVAL;
173 + if (!is_owner_or_cap(inode))
174 + return -EACCES;
176 + if (get_user(treeid, (unsigned int __user *) arg))
177 + return -EFAULT;
179 + err = mnt_want_write(filp->f_path.mnt);
180 + if (err)
181 + return err;
182 + /*
183 + * Nested directory quota trees are not allowed!
184 + * We have to hold dir->i_mutex for entire operation
185 + * in order to prevent from races with same parent's ioctl.
186 + */
187 + if (dir)
188 + mutex_lock(&dir->i_mutex);
189 + mutex_lock(&inode->i_mutex);
190 + err = -EXDEV;
191 + if (dir && (EXT4_I(dir)->i_tree != 0 &&
192 + treeid != EXT4_I(dir)->i_tree))
193 + goto settree_out;
195 + //// FIXME if S_ISDIR(inode->i_mode) then we have to check
196 + //// tree id for each entry from this directory.
198 + err = -EPERM;
199 + /* Is it quota file? Do not allow user to mess with it */
200 + if (IS_NOQUOTA(inode))
201 + goto settree_out;
203 + if (!capable(CAP_SYS_RESOURCE))
204 + goto settree_out;
206 + err = 0;
207 + if (ei->i_tree == treeid)
208 + /* Nothing to do */
209 + goto settree_out;
211 + //// FIXME: Reserve enough blocks for tree quota transfer ?????
212 + handle = ext4_journal_start(inode, 1);
213 + if (IS_ERR(handle)) {
214 + err = PTR_ERR(handle);
215 + goto settree_out;
217 + if (IS_SYNC(inode))
218 + ext4_handle_sync(handle);
219 + err = ext4_reserve_inode_write(handle, inode, &iloc);
220 + if (err)
221 + goto settree_err;
223 + ////FIXME: tree quota transfer here
224 + ei->i_tree = treeid;
225 + inode->i_mtime = ext4_current_time(inode);
226 + err = ext4_mark_iloc_dirty(handle, inode, &iloc);
227 +settree_err:
228 + ext4_journal_stop(handle);
229 +settree_out:
230 + if (dir)
231 + mutex_unlock(&dir->i_mutex);
232 + mutex_unlock(&inode->i_mutex);
233 + mnt_drop_write(filp->f_path.mnt);
234 + return err;
236 #ifdef CONFIG_JBD2_DEBUG
237 case EXT4_IOC_WAIT_FOR_READONLY:
239 diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
240 index 4ad3881..0228db7 100644
241 --- a/fs/ext4/namei.c
242 +++ b/fs/ext4/namei.c
243 @@ -1086,6 +1086,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
244 return ERR_CAST(inode);
247 + /* Check directory tree id parent/child relationship */
248 + if ((test_opt(dir->i_sb, TREEID)) &&
249 + ext4_is_same_tree(dir, inode) == EXT4_TREEID_CROSS) {
250 + printk(KERN_WARNING "WARN: Bad tree hierarchy dev:%s "
251 + "dir{ino:%lu, tr:%u} ino{ino:%lu, tr:%u}\n",
252 + dir->i_sb->s_id, dir->i_ino,
253 + EXT4_I(dir)->i_tree,
254 + inode->i_ino, EXT4_I(inode)->i_tree);
257 return d_splice_alias(inode, dentry);
259 @@ -2328,7 +2337,14 @@ static int ext4_link(struct dentry *old_dentry,
261 if (inode->i_nlink == 0)
262 return -ENOENT;
264 + /*
265 + According to directory tree quota assumptions one inode
266 + can not belongs to different quota trees.
267 + Unlike rename(), link() is only possible in the same tree.
268 + */
269 + if (test_opt(inode->i_sb, TREEID) &&
270 + ext4_is_same_tree(dir, inode) != EXT4_TREEID_SAME)
271 + return -EXDEV;
272 retry:
273 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
274 EXT4_INDEX_EXTRA_TRANS_BLOCKS);
275 @@ -2378,6 +2394,28 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
276 * in separate transaction */
277 if (new_dentry->d_inode)
278 vfs_dq_init(new_dentry->d_inode);
280 + /*
281 + Check for directory treeid hierarhy assumptions
282 + */
283 + //// FIXME: Seems what i_nlink check is racy
284 + //// We have take old_dentry->d_inode->i_mutex here, Is it possible?
285 + if (test_opt(old_dir->i_sb, TREEID)) {
286 + int same = ext4_is_same_tree(old_dentry->d_inode, new_dir);
287 + if ((S_ISDIR(old_dentry->d_inode->i_mode))) {
288 + if (same == EXT4_TREEID_CROSS)
289 + return -EXDEV;
290 + } else {
291 + /*
292 + * If inode has more than one link then it is bad
293 + * idea to move one of this entries between trees.
294 + */
295 + if (old_dentry->d_inode->i_nlink > 1 &&
296 + same != EXT4_TREEID_SAME)
297 + return -EXDEV;
301 handle = ext4_journal_start(old_dir, 2 *
302 EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
303 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
304 diff --git a/fs/ext4/super.c b/fs/ext4/super.c
305 index 75a291a..55661d1 100644
306 --- a/fs/ext4/super.c
307 +++ b/fs/ext4/super.c
308 @@ -831,6 +831,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
309 seq_puts(seq, ",debug");
310 if (test_opt(sb, OLDALLOC))
311 seq_puts(seq, ",oldalloc");
312 + if (test_opt(sb, TREEID))
313 + seq_puts(seq, ",treeid");
314 #ifdef CONFIG_EXT4_FS_XATTR
315 if (test_opt(sb, XATTR_USER) &&
316 !(def_mount_opts & EXT4_DEFM_XATTR_USER))
317 @@ -1080,7 +1082,7 @@ enum {
318 Opt_usrquota, Opt_grpquota, Opt_i_version,
319 Opt_stripe, Opt_delalloc, Opt_nodelalloc,
320 Opt_block_validity, Opt_noblock_validity,
321 - Opt_inode_readahead_blks, Opt_journal_ioprio
322 + Opt_inode_readahead_blks, Opt_journal_ioprio, Opt_treeid
325 static const match_table_t tokens = {
326 @@ -1142,6 +1144,7 @@ static const match_table_t tokens = {
327 {Opt_noblock_validity, "noblock_validity"},
328 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
329 {Opt_journal_ioprio, "journal_ioprio=%u"},
330 + {Opt_treeid, "treeid"},
331 {Opt_auto_da_alloc, "auto_da_alloc=%u"},
332 {Opt_auto_da_alloc, "auto_da_alloc"},
333 {Opt_noauto_da_alloc, "noauto_da_alloc"},
334 @@ -1565,6 +1568,26 @@ set_qf_format:
335 *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
336 option);
337 break;
338 + case Opt_treeid:
339 + /* ///////////////////////////////////////////////
340 + * FIXME: Kill this after implementing lib2fs
341 + * INCOMPAT_TREEID feature support.
342 + */
343 + if (!EXT4_HAS_INCOMPAT_FEATURE(sb,
344 + EXT4_FEATURE_INCOMPAT_TREEID))
345 + EXT4_SET_INCOMPAT_FEATURE(sb,
346 + EXT4_FEATURE_INCOMPAT_TREEID);
347 + ////////////////////////////////////////////////////
348 + if (!EXT4_HAS_INCOMPAT_FEATURE(sb,
349 + EXT4_FEATURE_INCOMPAT_TREEID)) {
350 + ext4_msg(sb, KERN_ERR, "treeid option specified"
351 + " with no directory treeid feature "
352 + "enabled. run tune2fs");
353 + return 0;
355 + set_opt(sbi->s_mount_opt, TREEID);
356 + break;
358 case Opt_noauto_da_alloc:
359 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
360 break;