2 * Copyright (c) 2003-2007 Erez Zadok
3 * Copyright (c) 2003-2006 Charles P. Wright
4 * Copyright (c) 2005-2007 Josef 'Jeff' Sipek
5 * Copyright (c) 2005 Arun M. Krishnakumar
6 * Copyright (c) 2004-2006 David P. Quigley
7 * Copyright (c) 2003-2004 Mohammad Nayyer Zubair
8 * Copyright (c) 2003 Puja Gupta
9 * Copyright (c) 2003 Harikesavan Krishnan
10 * Copyright (c) 2003-2007 Stony Brook University
11 * Copyright (c) 2003-2007 The Research Foundation of SUNY
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License version 2 as
15 * published by the Free Software Foundation.
21 #include <linux/dcache.h>
22 #include <linux/file.h>
23 #include <linux/list.h>
26 #include <linux/module.h>
27 #include <linux/mount.h>
28 #include <linux/namei.h>
29 #include <linux/page-flags.h>
30 #include <linux/pagemap.h>
31 #include <linux/poll.h>
32 #include <linux/security.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35 #include <linux/spinlock.h>
36 #include <linux/smp_lock.h>
37 #include <linux/statfs.h>
38 #include <linux/string.h>
39 #include <linux/vmalloc.h>
40 #include <linux/writeback.h>
41 #include <linux/buffer_head.h>
42 #include <linux/xattr.h>
43 #include <linux/fs_stack.h>
44 #include <linux/magic.h>
45 #include <linux/log2.h>
46 #include <linux/poison.h>
47 #include <linux/mman.h>
48 #include <linux/backing-dev.h>
50 #include <asm/system.h>
52 #include <linux/union_fs.h>
54 /* the file system name */
55 #define UNIONFS_NAME "unionfs"
57 /* unionfs root inode number */
58 #define UNIONFS_ROOT_INO 1
60 /* number of times we try to get a unique temporary file name */
61 #define GET_TMPNAM_MAX_RETRY 5
63 /* maximum number of branches we support, to avoid memory blowup */
64 #define UNIONFS_MAX_BRANCHES 128
66 /* Operations vectors defined in specific files. */
67 extern struct file_operations unionfs_main_fops
;
68 extern struct file_operations unionfs_dir_fops
;
69 extern struct inode_operations unionfs_main_iops
;
70 extern struct inode_operations unionfs_dir_iops
;
71 extern struct inode_operations unionfs_symlink_iops
;
72 extern struct super_operations unionfs_sops
;
73 extern struct dentry_operations unionfs_dops
;
74 extern struct address_space_operations unionfs_aops
;
76 /* How long should an entry be allowed to persist */
77 #define RDCACHE_JIFFIES (5*HZ)
79 /* file private data. */
80 struct unionfs_file_info
{
85 struct unionfs_dir_state
*rdstate
;
86 struct file
**lower_files
;
87 int *saved_branch_ids
; /* IDs of branches when file was opened */
90 /* unionfs inode data in memory */
91 struct unionfs_inode_info
{
96 /* Stuff for readdir over NFS. */
98 struct list_head readdircache
;
103 /* The lower inodes */
104 struct inode
**lower_inodes
;
105 /* to keep track of reads/writes for unlinks before closes */
108 struct inode vfs_inode
;
111 /* unionfs dentry data in memory */
112 struct unionfs_dentry_info
{
114 * The semaphore is used to lock the dentry as soon as we get into a
115 * unionfs function from the VFS. Our lock ordering is that children
116 * go before their parents.
124 struct path
*lower_paths
;
127 /* These are the pointers to our various objects. */
128 struct unionfs_data
{
129 struct super_block
*sb
;
130 atomic_t open_files
; /* number of open files on branch */
132 int branch_id
; /* unique branch ID at re/mount time */
135 /* unionfs super-block data in memory */
136 struct unionfs_sb_info
{
142 * This rwsem is used to make sure that a branch management
144 * 1) will not begin before all currently in-flight operations
146 * 2) any new operations do not execute until the currently
147 * running branch management operation completes.
149 * The write_lock_owner records the PID of the task which grabbed
150 * the rw_sem for writing. If the same task also tries to grab the
151 * read lock, we allow it. This prevents a self-deadlock when
152 * branch-management is used on a pivot_root'ed union, because we
153 * have to ->lookup paths which belong to the same union.
155 struct rw_semaphore rwsem
;
156 pid_t write_lock_owner
; /* PID of rw_sem owner (write lock) */
157 int high_branch_id
; /* last unique branch ID given */
158 struct unionfs_data
*data
;
162 * structure for making the linked list of entries by readdir on left branch
163 * to compare with entries on right branch
165 struct filldir_node
{
166 struct list_head file_list
; /* list for directory entries */
167 char *name
; /* name entry */
168 int hash
; /* name hash */
169 int namelen
; /* name len since name is not 0 terminated */
172 * we can check for duplicate whiteouts and files in the same branch
173 * in order to return -EIO.
177 /* is this a whiteout entry? */
180 /* Inline name, so we don't need to separately kmalloc small ones */
181 char iname
[DNAME_INLINE_LEN_MIN
];
184 /* Directory hash table. */
185 struct unionfs_dir_state
{
186 unsigned int cookie
; /* the cookie, based off of rdversion */
187 unsigned int offset
; /* The entry we have returned. */
189 loff_t dirpos
; /* offset within the lower level directory */
190 int size
; /* How big is the hash table? */
191 int hashentries
; /* How many entries have been inserted? */
192 unsigned long access
;
194 /* This cache list is used when the inode keeps us around. */
195 struct list_head cache
;
196 struct list_head list
[0];
199 /* externs needed for fanout.h or sioq.h */
200 extern int unionfs_get_nlinks(const struct inode
*inode
);
202 /* include miscellaneous macros */
206 /* externs for cache creation/deletion routines */
207 extern void unionfs_destroy_filldir_cache(void);
208 extern int unionfs_init_filldir_cache(void);
209 extern int unionfs_init_inode_cache(void);
210 extern void unionfs_destroy_inode_cache(void);
211 extern int unionfs_init_dentry_cache(void);
212 extern void unionfs_destroy_dentry_cache(void);
214 /* Initialize and free readdir-specific state. */
215 extern int init_rdstate(struct file
*file
);
216 extern struct unionfs_dir_state
*alloc_rdstate(struct inode
*inode
,
218 extern struct unionfs_dir_state
*find_rdstate(struct inode
*inode
,
220 extern void free_rdstate(struct unionfs_dir_state
*state
);
221 extern int add_filldir_node(struct unionfs_dir_state
*rdstate
,
222 const char *name
, int namelen
, int bindex
,
224 extern struct filldir_node
*find_filldir_node(struct unionfs_dir_state
*rdstate
,
225 const char *name
, int namelen
);
227 extern struct dentry
**alloc_new_dentries(int objs
);
228 extern struct unionfs_data
*alloc_new_data(int objs
);
230 /* We can only use 32-bits of offset for rdstate --- blech! */
231 #define DIREOF (0xfffff)
232 #define RDOFFBITS 20 /* This is the number of bits in DIREOF. */
233 #define MAXRDCOOKIE (0xfff)
234 /* Turn an rdstate into an offset. */
235 static inline off_t
rdstate2offset(struct unionfs_dir_state
*buf
)
239 tmp
= ((buf
->cookie
& MAXRDCOOKIE
) << RDOFFBITS
)
240 | (buf
->offset
& DIREOF
);
244 static inline void unionfs_read_lock(struct super_block
*sb
)
246 if (UNIONFS_SB(sb
)->write_lock_owner
&&
247 UNIONFS_SB(sb
)->write_lock_owner
== current
->pid
)
249 down_read(&UNIONFS_SB(sb
)->rwsem
);
251 static inline void unionfs_read_unlock(struct super_block
*sb
)
253 if (UNIONFS_SB(sb
)->write_lock_owner
&&
254 UNIONFS_SB(sb
)->write_lock_owner
== current
->pid
)
256 up_read(&UNIONFS_SB(sb
)->rwsem
);
258 static inline void unionfs_write_lock(struct super_block
*sb
)
260 down_write(&UNIONFS_SB(sb
)->rwsem
);
261 UNIONFS_SB(sb
)->write_lock_owner
= current
->pid
;
263 static inline void unionfs_write_unlock(struct super_block
*sb
)
265 up_write(&UNIONFS_SB(sb
)->rwsem
);
266 UNIONFS_SB(sb
)->write_lock_owner
= 0;
269 static inline void unionfs_double_lock_dentry(struct dentry
*d1
,
273 struct dentry
*tmp
= d1
;
277 unionfs_lock_dentry(d1
);
278 unionfs_lock_dentry(d2
);
281 extern int new_dentry_private_data(struct dentry
*dentry
);
282 extern void free_dentry_private_data(struct dentry
*dentry
);
283 extern void update_bstart(struct dentry
*dentry
);
284 extern int init_lower_nd(struct nameidata
*nd
, unsigned int flags
);
285 extern void release_lower_nd(struct nameidata
*nd
, int err
);
291 /* replicates the directory structure up to given dentry in given branch */
292 extern struct dentry
*create_parents(struct inode
*dir
, struct dentry
*dentry
,
293 const char *name
, int bindex
);
294 extern int make_dir_opaque(struct dentry
*dir
, int bindex
);
297 extern int unionfs_partial_lookup(struct dentry
*dentry
);
300 * Pass an unionfs dentry and an index and it will try to create a whiteout
303 * On error, it will proceed to a branch to the left
305 extern int create_whiteout(struct dentry
*dentry
, int start
);
306 /* copies a file from dbstart to newbindex branch */
307 extern int copyup_file(struct inode
*dir
, struct file
*file
, int bstart
,
308 int newbindex
, loff_t size
);
309 extern int copyup_named_file(struct inode
*dir
, struct file
*file
,
310 char *name
, int bstart
, int new_bindex
,
312 /* copies a dentry from dbstart to newbindex branch */
313 extern int copyup_dentry(struct inode
*dir
, struct dentry
*dentry
,
314 int bstart
, int new_bindex
, const char *name
,
315 int namelen
, struct file
**copyup_file
, loff_t len
);
316 /* helper functions for post-copyup actions */
317 extern void unionfs_postcopyup_setmnt(struct dentry
*dentry
);
318 extern void unionfs_postcopyup_release(struct dentry
*dentry
);
320 extern int remove_whiteouts(struct dentry
*dentry
,
321 struct dentry
*lower_dentry
, int bindex
);
323 extern int do_delete_whiteouts(struct dentry
*dentry
, int bindex
,
324 struct unionfs_dir_state
*namelist
);
326 /* Is this directory empty: 0 if it is empty, -ENOTEMPTY if not. */
327 extern int check_empty(struct dentry
*dentry
,
328 struct unionfs_dir_state
**namelist
);
329 /* Delete whiteouts from this directory in branch bindex. */
330 extern int delete_whiteouts(struct dentry
*dentry
, int bindex
,
331 struct unionfs_dir_state
*namelist
);
333 /* Re-lookup a lower dentry. */
334 extern int unionfs_refresh_lower_dentry(struct dentry
*dentry
, int bindex
);
336 extern void unionfs_reinterpose(struct dentry
*this_dentry
);
337 extern struct super_block
*unionfs_duplicate_super(struct super_block
*sb
);
339 /* Locking functions. */
340 extern int unionfs_setlk(struct file
*file
, int cmd
, struct file_lock
*fl
);
341 extern int unionfs_getlk(struct file
*file
, struct file_lock
*fl
);
343 /* Common file operations. */
344 extern int unionfs_file_revalidate(struct file
*file
, bool willwrite
);
345 extern int unionfs_open(struct inode
*inode
, struct file
*file
);
346 extern int unionfs_file_release(struct inode
*inode
, struct file
*file
);
347 extern int unionfs_flush(struct file
*file
, fl_owner_t id
);
348 extern long unionfs_ioctl(struct file
*file
, unsigned int cmd
,
350 extern int unionfs_fsync(struct file
*file
, struct dentry
*dentry
,
352 extern int unionfs_fasync(int fd
, struct file
*file
, int flag
);
354 /* Inode operations */
355 extern int unionfs_rename(struct inode
*old_dir
, struct dentry
*old_dentry
,
356 struct inode
*new_dir
, struct dentry
*new_dentry
);
357 extern int unionfs_unlink(struct inode
*dir
, struct dentry
*dentry
);
358 extern int unionfs_rmdir(struct inode
*dir
, struct dentry
*dentry
);
360 extern bool __unionfs_d_revalidate_chain(struct dentry
*dentry
,
361 struct nameidata
*nd
, bool willwrite
);
362 extern bool is_newer_lower(const struct dentry
*dentry
);
364 /* The values for unionfs_interpose's flag. */
365 #define INTERPOSE_DEFAULT 0
366 #define INTERPOSE_LOOKUP 1
367 #define INTERPOSE_REVAL 2
368 #define INTERPOSE_REVAL_NEG 3
369 #define INTERPOSE_PARTIAL 4
371 extern struct dentry
*unionfs_interpose(struct dentry
*this_dentry
,
372 struct super_block
*sb
, int flag
);
374 #ifdef CONFIG_UNION_FS_XATTR
375 /* Extended attribute functions. */
376 extern void *unionfs_xattr_alloc(size_t size
, size_t limit
);
377 static inline void unionfs_xattr_kfree(const void *p
)
381 extern ssize_t
unionfs_getxattr(struct dentry
*dentry
, const char *name
,
382 void *value
, size_t size
);
383 extern int unionfs_removexattr(struct dentry
*dentry
, const char *name
);
384 extern ssize_t
unionfs_listxattr(struct dentry
*dentry
, char *list
,
386 extern int unionfs_setxattr(struct dentry
*dentry
, const char *name
,
387 const void *value
, size_t size
, int flags
);
388 #endif /* CONFIG_UNION_FS_XATTR */
390 /* The root directory is unhashed, but isn't deleted. */
391 static inline int d_deleted(struct dentry
*d
)
393 return d_unhashed(d
) && (d
!= d
->d_sb
->s_root
);
396 struct dentry
*unionfs_lookup_backend(struct dentry
*dentry
,
397 struct nameidata
*nd
, int lookupmode
);
399 /* unionfs_permission, check if we should bypass error to facilitate copyup */
400 #define IS_COPYUP_ERR(err) ((err) == -EROFS)
402 /* unionfs_open, check if we need to copyup the file */
403 #define OPEN_WRITE_FLAGS (O_WRONLY | O_RDWR | O_APPEND)
404 #define IS_WRITE_FLAG(flag) ((flag) & OPEN_WRITE_FLAGS)
406 static inline int branchperms(const struct super_block
*sb
, int index
)
409 return UNIONFS_SB(sb
)->data
[index
].branchperms
;
412 static inline int set_branchperms(struct super_block
*sb
, int index
, int perms
)
415 UNIONFS_SB(sb
)->data
[index
].branchperms
= perms
;
419 /* Is this file on a read-only branch? */
420 static inline int is_robranch_super(const struct super_block
*sb
, int index
)
424 ret
= (!(branchperms(sb
, index
) & MAY_WRITE
)) ? -EROFS
: 0;
428 /* Is this file on a read-only branch? */
429 static inline int is_robranch_idx(const struct dentry
*dentry
, int index
)
431 struct super_block
*lower_sb
;
435 if (!(branchperms(dentry
->d_sb
, index
) & MAY_WRITE
))
438 lower_sb
= unionfs_lower_super_idx(dentry
->d_sb
, index
);
439 BUG_ON(lower_sb
== NULL
);
441 * test sb flags directly, not IS_RDONLY(lower_inode) because the
442 * lower_dentry could be a negative.
444 if (lower_sb
->s_flags
& MS_RDONLY
)
450 static inline int is_robranch(const struct dentry
*dentry
)
454 index
= UNIONFS_D(dentry
)->bstart
;
457 return is_robranch_idx(dentry
, index
);
460 /* What do we use for whiteouts. */
461 #define UNIONFS_WHPFX ".wh."
462 #define UNIONFS_WHLEN 4
464 * If a directory contains this file, then it is opaque. We start with the
465 * .wh. flag so that it is blocked by lookup.
467 #define UNIONFS_DIR_OPAQUE_NAME "__dir_opaque"
468 #define UNIONFS_DIR_OPAQUE UNIONFS_WHPFX UNIONFS_DIR_OPAQUE_NAME
473 extern char *alloc_whname(const char *name
, int len
);
474 extern int check_branch(struct nameidata
*nd
);
475 extern int __parse_branch_mode(const char *name
);
476 extern int parse_branch_mode(const char *name
);
479 * These two functions are here because it is kind of daft to copy and paste
480 * the contents of the two functions to 32+ places in unionfs
482 static inline struct dentry
*lock_parent(struct dentry
*dentry
)
484 struct dentry
*dir
= dget(dentry
->d_parent
);
486 mutex_lock(&dir
->d_inode
->i_mutex
);
490 static inline void unlock_dir(struct dentry
*dir
)
492 mutex_unlock(&dir
->d_inode
->i_mutex
);
496 static inline struct vfsmount
*unionfs_mntget(struct dentry
*dentry
,
499 struct vfsmount
*mnt
;
501 BUG_ON(!dentry
|| bindex
< 0);
503 mnt
= mntget(unionfs_lower_mnt_idx(dentry
, bindex
));
504 #ifdef CONFIG_UNION_FS_DEBUG
506 pr_debug("unionfs: mntget: mnt=%p bindex=%d\n",
508 #endif /* CONFIG_UNION_FS_DEBUG */
513 static inline void unionfs_mntput(struct dentry
*dentry
, int bindex
)
515 struct vfsmount
*mnt
;
517 if (!dentry
&& bindex
< 0)
519 BUG_ON(!dentry
|| bindex
< 0);
521 mnt
= unionfs_lower_mnt_idx(dentry
, bindex
);
522 #ifdef CONFIG_UNION_FS_DEBUG
524 * Directories can have NULL lower objects in between start/end, but
525 * NOT if at the start/end range. We cannot verify that this dentry
526 * is a type=DIR, because it may already be a negative dentry. But
527 * if dbstart is greater than dbend, we know that this couldn't have
528 * been a regular file: it had to have been a directory.
530 if (!mnt
&& !(bindex
> dbstart(dentry
) && bindex
< dbend(dentry
)))
531 pr_debug("unionfs: mntput: mnt=%p bindex=%d\n", mnt
, bindex
);
532 #endif /* CONFIG_UNION_FS_DEBUG */
536 #ifdef CONFIG_UNION_FS_DEBUG
538 /* useful for tracking code reachability */
539 #define UDBG pr_debug("DBG:%s:%s:%d\n", __FILE__, __FUNCTION__, __LINE__)
541 #define unionfs_check_inode(i) __unionfs_check_inode((i), \
542 __FILE__, __FUNCTION__, __LINE__)
543 #define unionfs_check_dentry(d) __unionfs_check_dentry((d), \
544 __FILE__, __FUNCTION__, __LINE__)
545 #define unionfs_check_file(f) __unionfs_check_file((f), \
546 __FILE__, __FUNCTION__, __LINE__)
547 #define unionfs_check_nd(n) __unionfs_check_nd((n), \
548 __FILE__, __FUNCTION__, __LINE__)
549 #define show_branch_counts(sb) __show_branch_counts((sb), \
550 __FILE__, __FUNCTION__, __LINE__)
551 #define show_inode_times(i) __show_inode_times((i), \
552 __FILE__, __FUNCTION__, __LINE__)
553 #define show_dinode_times(d) __show_dinode_times((d), \
554 __FILE__, __FUNCTION__, __LINE__)
555 #define show_inode_counts(i) __show_inode_counts((i), \
556 __FILE__, __FUNCTION__, __LINE__)
558 extern void __unionfs_check_inode(const struct inode
*inode
, const char *fname
,
559 const char *fxn
, int line
);
560 extern void __unionfs_check_dentry(const struct dentry
*dentry
,
561 const char *fname
, const char *fxn
,
563 extern void __unionfs_check_file(const struct file
*file
,
564 const char *fname
, const char *fxn
, int line
);
565 extern void __unionfs_check_nd(const struct nameidata
*nd
,
566 const char *fname
, const char *fxn
, int line
);
567 extern void __show_branch_counts(const struct super_block
*sb
,
568 const char *file
, const char *fxn
, int line
);
569 extern void __show_inode_times(const struct inode
*inode
,
570 const char *file
, const char *fxn
, int line
);
571 extern void __show_dinode_times(const struct dentry
*dentry
,
572 const char *file
, const char *fxn
, int line
);
573 extern void __show_inode_counts(const struct inode
*inode
,
574 const char *file
, const char *fxn
, int line
);
576 #else /* not CONFIG_UNION_FS_DEBUG */
578 /* we leave useful hooks for these check functions throughout the code */
579 #define unionfs_check_inode(i) do { } while (0)
580 #define unionfs_check_dentry(d) do { } while (0)
581 #define unionfs_check_file(f) do { } while (0)
582 #define unionfs_check_nd(n) do { } while (0)
583 #define show_branch_counts(sb) do { } while (0)
584 #define show_inode_times(i) do { } while (0)
585 #define show_dinode_times(d) do { } while (0)
586 #define show_inode_counts(i) do { } while (0)
588 #endif /* not CONFIG_UNION_FS_DEBUG */
590 #endif /* not _UNION_H_ */