container quota patch queue v0.07
[ct-quota-pq.git] / ext4-add-directory-tree.patch
blobf1e4ec114e98519836fa74d8548606f14ee40e7d
1 ext4: Add directory tree id support
3 This patch add directory tree id support it has mach in common with
4 project-id in XFS. One may assign some id to a fs hierarchy,
5 most probably for full subtree. Each entry from the hierarchy
6 may be accounted in directory tree quota. Will appear in later patches.
8 Assumptions:
9 (1) Tree-id is embeded inside inode
10 (2) Tree-id is inherented from parent dir on inode create
11 (3) Inode can not site in different directory tries
13 New incompat feature EXT4_FEATURE_INCOMPAT_TREEID
14 mount option: "treeid"
15 mount flag: EXT4_MOUNT_TREEID
16 new ext4_inode field: i_tree
17 ioctl EXT4_IOC_GET_TREEID / EXT4_IOC_SET_TREEID
18 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
19 index 3c4b0f5..bd76c0f 100644
20 --- a/fs/ext4/ext4.h
21 +++ b/fs/ext4/ext4.h
22 @@ -396,6 +396,8 @@ struct ext4_new_group_data {
23 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
24 #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
25 #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
26 +#define EXT4_IOC_GET_TREEID _IOR('f', 16, unsigned int)
27 +#define EXT4_IOC_SET_TREEID _IOW('f', 17, unsigned int)
30 * ioctl commands in 32 bit emulation
31 @@ -471,7 +473,7 @@ struct ext4_inode {
32 __le16 l_i_file_acl_high;
33 __le16 l_i_uid_high; /* these 2 fields */
34 __le16 l_i_gid_high; /* were reserved2[0] */
35 - __u32 l_i_reserved2;
36 + __u32 l_i_tree_id; /* reserved for 2-level disk quota */
37 } linux2;
38 struct {
39 __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
40 @@ -585,7 +587,7 @@ do { \
41 #define i_gid_low i_gid
42 #define i_uid_high osd2.linux2.l_i_uid_high
43 #define i_gid_high osd2.linux2.l_i_gid_high
44 -#define i_reserved2 osd2.linux2.l_i_reserved2
45 +#define i_disk_tree_id osd2.linux2.l_i_tree_id
47 #elif defined(__GNU__)
49 @@ -705,6 +707,7 @@ struct ext4_inode_info {
50 struct list_head i_aio_dio_complete_list;
51 /* current io_end structure for async DIO write*/
52 ext4_io_end_t *cur_aio_dio;
53 + unsigned int i_tree_id; /* directory tree quota id */
57 @@ -752,6 +755,7 @@ struct ext4_inode_info {
58 #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
59 #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
60 #define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
61 +#define EXT4_MOUNT_TREEID 0x40000000 /* Dirtectory tree id */
63 #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
64 #define set_opt(o, opt) o |= EXT4_MOUNT_##opt
65 @@ -1111,6 +1115,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
66 #define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
67 #define EXT4_FEATURE_INCOMPAT_MMP 0x0100
68 #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
69 +#define EXT4_FEATURE_INCOMPAT_TREEID 0x0400
71 #define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
72 #define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
73 @@ -1118,7 +1123,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
74 EXT4_FEATURE_INCOMPAT_META_BG| \
75 EXT4_FEATURE_INCOMPAT_EXTENTS| \
76 EXT4_FEATURE_INCOMPAT_64BIT| \
77 - EXT4_FEATURE_INCOMPAT_FLEX_BG)
78 + EXT4_FEATURE_INCOMPAT_FLEX_BG| \
79 + EXT4_FEATURE_INCOMPAT_TREEID)
80 #define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
81 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
82 EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
83 @@ -1686,6 +1692,37 @@ static inline void ext4_unlock_group(struct super_block *sb,
85 spin_unlock(ext4_group_lock_ptr(sb, group));
87 +/**
88 + Directory tree-id assumptions:
89 + (1) Tree id is embedded inside inode
90 + (2) Tree id is inherent from parent dir
91 + (3) Inode can not belongs to different directory trees
93 + Default directory tree (with id == 0) has special meaning.
94 + directory which belongs to default tree may contains entries with
95 + other trees. It may be used for subtree manipulation.
96 + */
97 +enum {
98 + EXT4_TREEID_SAME = 1,
99 + EXT4_TREEID_COMMON,
100 + EXT4_TREEID_CROSS,
102 +/**
103 + * Check directory tree id assumptions:
104 + * @dstino: parent directory inode
105 + * Return:
106 + * EXT4_TREEID_COMMON: Destanation dir belongs to default tree
107 + * EXT4_TREEID_SAME: Both belongs to same tree
108 + * EXT4_TREEID_CROSS: Inodes belongs to different trees
109 + */
110 +static inline int ext4_is_same_tree(struct inode *dstino, struct inode *inode)
112 + if ((EXT4_I(inode)->i_tree_id == EXT4_I(dstino)->i_tree_id))
113 + return EXT4_TREEID_SAME;
114 + else if ((EXT4_I(dstino)->i_tree_id == 0))
115 + return EXT4_TREEID_COMMON;
116 + return EXT4_TREEID_CROSS;
120 * Inodes and files operations
121 diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
122 index f3624ea..d7ca92d 100644
123 --- a/fs/ext4/ialloc.c
124 +++ b/fs/ext4/ialloc.c
125 @@ -1013,6 +1013,9 @@ got:
127 ei->i_flags =
128 ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
129 + if (test_opt(inode->i_sb, TREEID))
130 + ei->i_tree_id = EXT4_I(dir)->i_tree_id;
132 ei->i_file_acl = 0;
133 ei->i_dtime = 0;
134 ei->i_block_group = group;
135 diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
136 index 30ffac2..e67e6a1 100644
137 --- a/fs/ext4/inode.c
138 +++ b/fs/ext4/inode.c
139 @@ -5087,6 +5087,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
140 (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
143 + if (test_opt(inode->i_sb, TREEID))
144 + ei->i_tree_id = le32_to_cpu(raw_inode->i_disk_tree_id);
146 ret = 0;
147 if (ei->i_file_acl &&
148 !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
149 @@ -5239,6 +5242,9 @@ static int ext4_do_update_inode(handle_t *handle,
150 raw_inode->i_uid_high = 0;
151 raw_inode->i_gid_high = 0;
153 + if (test_opt(inode->i_sb, TREEID))
154 + raw_inode->i_disk_tree_id = cpu_to_le32(ei->i_tree_id);
156 raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
158 EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
159 diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
160 index d1fe495..bf6b500 100644
161 --- a/fs/ext4/ioctl.c
162 +++ b/fs/ext4/ioctl.c
163 @@ -176,6 +176,90 @@ setversion_out:
164 mnt_drop_write(filp->f_path.mnt);
165 return err;
167 + case EXT4_IOC_GET_TREEID:
168 + if (test_opt(inode->i_sb, TREEID))
169 + return put_user(ei->i_tree_id, (unsigned int __user*) arg);
170 + else
171 + return -EINVAL;
172 + case EXT4_IOC_SET_TREEID: {
173 + int err;
174 + struct ext4_iloc iloc;
175 + unsigned int treeid;
176 + handle_t *handle = NULL;
177 + struct inode *dir = IS_ROOT(filp->f_dentry) ?
178 + NULL : filp->f_dentry->d_parent->d_inode;
180 + if (!test_opt(inode->i_sb, TREEID))
181 + return -EINVAL;
183 + if (!is_owner_or_cap(inode))
184 + return -EACCES;
186 + if (get_user(treeid, (unsigned int __user *) arg))
187 + return -EFAULT;
189 + err = mnt_want_write(filp->f_path.mnt);
190 + if (err)
191 + return err;
192 + /*
193 + * Nested directory quota trees are not allowed!
194 + * We have to hold dir->i_mutex for entire operation
195 + * in order to prevent from races with same parent's ioctl.
196 + */
197 + if (dir)
198 + mutex_lock(&dir->i_mutex);
199 + mutex_lock(&inode->i_mutex);
200 + err = -EXDEV;
201 + if (dir && (EXT4_I(dir)->i_tree_id != 0 &&
202 + treeid != EXT4_I(dir)->i_tree_id))
203 + goto settree_out;
205 + //// FIXME if S_ISDIR(inode->i_mode) then we have to check
206 + //// tree id for each entry from this directory.
208 + err = -EPERM;
209 + /* Is it quota file? Do not allow user to mess with it */
210 + if (IS_NOQUOTA(inode))
211 + goto settree_out;
213 + if (!capable(CAP_SYS_RESOURCE))
214 + goto settree_out;
216 + err = 0;
217 + if (ei->i_tree_id == treeid)
218 + /* Nothing to do */
219 + goto settree_out;
221 + /*
222 + Reserve blocks for quota transfer (user+group+tree)
223 + plus one block for inode
224 + */
225 + err = EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
226 + EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb) + 1;
227 + handle = ext4_journal_start(inode, err);
228 + if (IS_ERR(handle)) {
229 + err = PTR_ERR(handle);
230 + goto settree_out;
232 + if (IS_SYNC(inode))
233 + ext4_handle_sync(handle);
234 + err = ext4_reserve_inode_write(handle, inode, &iloc);
235 + if (err)
236 + goto settree_err;
238 + ////FIXME: tree quota transfer here
239 + ei->i_tree_id = treeid;
240 + inode->i_mtime = ext4_current_time(inode);
241 + err = ext4_mark_iloc_dirty(handle, inode, &iloc);
242 +settree_err:
243 + ext4_journal_stop(handle);
244 +settree_out:
245 + if (dir)
246 + mutex_unlock(&dir->i_mutex);
247 + mutex_unlock(&inode->i_mutex);
248 + mnt_drop_write(filp->f_path.mnt);
249 + return err;
251 #ifdef CONFIG_JBD2_DEBUG
252 case EXT4_IOC_WAIT_FOR_READONLY:
254 diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
255 index 17a17e1..87d446e 100644
256 --- a/fs/ext4/namei.c
257 +++ b/fs/ext4/namei.c
258 @@ -1086,6 +1086,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
259 return ERR_CAST(inode);
262 + /* DEBUG: Check directory tree id parent/child relationship */
263 + if ((test_opt(dir->i_sb, TREEID)) &&
264 + ext4_is_same_tree(dir, inode) == EXT4_TREEID_CROSS) {
265 + printk(KERN_WARNING "WARN: Bad tree hierarchy dev:%s "
266 + "dir{ino:%lu, tr:%u} ino{ino:%lu, tr:%u}\n",
267 + dir->i_sb->s_id, dir->i_ino,
268 + EXT4_I(dir)->i_tree_id,
269 + inode->i_ino, EXT4_I(inode)->i_tree_id);
272 return d_splice_alias(inode, dentry);
274 @@ -2314,7 +2323,14 @@ static int ext4_link(struct dentry *old_dentry,
276 if (inode->i_nlink == 0)
277 return -ENOENT;
279 + /*
280 + According to directory tree quota assumptions one inode
281 + can not belongs to different quota trees.
282 + Unlike rename(), link() is only possible in the same tree.
283 + */
284 + if (test_opt(inode->i_sb, TREEID) &&
285 + ext4_is_same_tree(dir, inode) != EXT4_TREEID_SAME)
286 + return -EXDEV;
287 retry:
288 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
289 EXT4_INDEX_EXTRA_TRANS_BLOCKS);
290 @@ -2364,6 +2380,28 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
291 * in separate transaction */
292 if (new_dentry->d_inode)
293 vfs_dq_init(new_dentry->d_inode);
295 + /*
296 + Check for directory treeid hierarhy assumptions
297 + */
298 + //// FIXME: Seems what i_nlink check is racy
299 + //// We have take old_dentry->d_inode->i_mutex here, Is it possible?
300 + if (test_opt(old_dir->i_sb, TREEID)) {
301 + int same = ext4_is_same_tree(new_dir, old_dentry->d_inode);
302 + if ((S_ISDIR(old_dentry->d_inode->i_mode))) {
303 + if (same == EXT4_TREEID_CROSS)
304 + return -EXDEV;
305 + } else {
306 + /*
307 + * If inode has more than one link then it is bad
308 + * idea to move one of this entries between trees.
309 + */
310 + if (old_dentry->d_inode->i_nlink > 1 &&
311 + same != EXT4_TREEID_SAME)
312 + return -EXDEV;
316 handle = ext4_journal_start(old_dir, 2 *
317 EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
318 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
319 diff --git a/fs/ext4/super.c b/fs/ext4/super.c
320 index 9df0d27..6b9e4f3 100644
321 --- a/fs/ext4/super.c
322 +++ b/fs/ext4/super.c
323 @@ -840,6 +840,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
324 seq_puts(seq, ",debug");
325 if (test_opt(sb, OLDALLOC))
326 seq_puts(seq, ",oldalloc");
327 + if (test_opt(sb, TREEID))
328 + seq_puts(seq, ",treeid");
329 #ifdef CONFIG_EXT4_FS_XATTR
330 if (test_opt(sb, XATTR_USER) &&
331 !(def_mount_opts & EXT4_DEFM_XATTR_USER))
332 @@ -1095,7 +1097,8 @@ enum {
333 Opt_usrquota, Opt_grpquota, Opt_i_version,
334 Opt_stripe, Opt_delalloc, Opt_nodelalloc,
335 Opt_block_validity, Opt_noblock_validity,
336 - Opt_inode_readahead_blks, Opt_journal_ioprio, Opt_akpm_lock_hack
337 + Opt_inode_readahead_blks, Opt_journal_ioprio, Opt_akpm_lock_hack,
338 + Opt_treeid
341 static const match_table_t tokens = {
342 @@ -1159,6 +1162,7 @@ static const match_table_t tokens = {
343 {Opt_noblock_validity, "noblock_validity"},
344 {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
345 {Opt_journal_ioprio, "journal_ioprio=%u"},
346 + {Opt_treeid, "treeid"},
347 {Opt_auto_da_alloc, "auto_da_alloc=%u"},
348 {Opt_auto_da_alloc, "auto_da_alloc"},
349 {Opt_noauto_da_alloc, "noauto_da_alloc"},
350 @@ -1587,6 +1591,26 @@ set_qf_format:
351 *journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
352 option);
353 break;
354 + case Opt_treeid:
355 + /* ///////////////////////////////////////////////
356 + * FIXME: Kill this after implementing lib2fs
357 + * INCOMPAT_TREEID feature support.
358 + */
359 + if (!EXT4_HAS_INCOMPAT_FEATURE(sb,
360 + EXT4_FEATURE_INCOMPAT_TREEID))
361 + EXT4_SET_INCOMPAT_FEATURE(sb,
362 + EXT4_FEATURE_INCOMPAT_TREEID);
363 + ////////////////////////////////////////////////////
364 + if (!EXT4_HAS_INCOMPAT_FEATURE(sb,
365 + EXT4_FEATURE_INCOMPAT_TREEID)) {
366 + ext4_msg(sb, KERN_ERR, "treeid option specified"
367 + " with no directory treeid feature "
368 + "enabled. run tune2fs");
369 + return 0;
371 + set_opt(sbi->s_mount_opt, TREEID);
372 + break;
374 case Opt_noauto_da_alloc:
375 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
376 break;