2 * fs/revoke.c - Invalidate all current open file descriptors of an inode.
4 * Copyright (C) 2006-2007 Pekka Enberg
6 * This file is released under the GPLv2.
9 #include <linux/file.h>
11 #include <linux/namei.h>
12 #include <linux/magic.h>
14 #include <linux/mman.h>
15 #include <linux/module.h>
16 #include <linux/mount.h>
17 #include <linux/sched.h>
18 #include <linux/revoked_fs_i.h>
19 #include <linux/syscalls.h>
22 * fileset - an array of file pointers.
23 * @files: the array of file pointers
24 * @nr: number of elements in the array
25 * @end: index to next unused file pointer
34 * revoke_details - details of the revoke operation
35 * @inode: invalidate open file descriptors of this inode
36 * @fset: set of files that point to a revoked inode
37 * @restore_start: index to the first file pointer that is currently in
38 * use by a file descriptor but the real file has not
41 struct revoke_details
{
43 unsigned long restore_start
;
46 static struct kmem_cache
*revokefs_inode_cache
;
48 static inline bool fset_is_full(struct fileset
*set
)
50 return set
->nr
== set
->end
;
53 static inline struct file
*fset_get_filp(struct fileset
*set
)
55 return set
->files
[set
->end
++];
58 static struct fileset
*alloc_fset(unsigned long size
)
62 fset
= kzalloc(sizeof *fset
, GFP_KERNEL
);
66 fset
->files
= kcalloc(size
, sizeof(struct file
*), GFP_KERNEL
);
75 static void free_fset(struct fileset
*fset
)
79 for (i
= fset
->end
; i
< fset
->nr
; i
++)
87 * Revoked file descriptors point to inodes in the revokefs filesystem.
89 static struct vfsmount
*revokefs_mnt
;
91 static struct file
*get_revoked_file(void)
93 struct dentry
*dentry
;
98 filp
= get_empty_filp();
102 inode
= new_inode(revokefs_mnt
->mnt_sb
);
106 name
.name
= "revoked_file";
107 name
.len
= strlen(name
.name
);
108 dentry
= d_alloc(revokefs_mnt
->mnt_sb
->s_root
, &name
);
112 d_instantiate(dentry
, inode
);
114 filp
->f_mapping
= inode
->i_mapping
;
115 filp
->f_dentry
= dget(dentry
);
116 filp
->f_vfsmnt
= mntget(revokefs_mnt
);
117 filp
->f_op
= fops_get(inode
->i_fop
);
130 static inline bool can_revoke_file(struct file
*file
, struct inode
*inode
,
131 struct file
*to_exclude
)
133 if (!file
|| file
== to_exclude
)
136 return file
->f_dentry
->d_inode
== inode
;
140 * LOCKING: task_lock(owner)
142 static int revoke_fds(struct task_struct
*owner
,
144 struct file
*to_exclude
, struct fileset
*fset
)
146 struct files_struct
*files
;
151 files
= get_files_struct(owner
);
155 spin_lock(&files
->file_lock
);
156 fdt
= files_fdtable(files
);
158 for (fd
= 0; fd
< fdt
->max_fds
; fd
++) {
159 struct revokefs_inode_info
*info
;
160 struct file
*filp
, *new_filp
;
161 struct inode
*new_inode
;
163 filp
= fcheck_files(files
, fd
);
164 if (!can_revoke_file(filp
, inode
, to_exclude
))
167 if (!filp
->f_op
->revoke
) {
172 if (fset_is_full(fset
)) {
177 new_filp
= fset_get_filp(fset
);
180 * Replace original struct file pointer with a pointer to
181 * a 'revoked file.' After this point, we don't need to worry
182 * about racing with sys_close or sys_dup.
184 rcu_assign_pointer(fdt
->fd
[fd
], new_filp
);
187 * Hold on to task until we can take down the file and its
190 get_task_struct(owner
);
192 new_inode
= new_filp
->f_dentry
->d_inode
;
193 make_revoked_inode(new_inode
, inode
->i_mode
& S_IFMT
);
195 info
= revokefs_i(new_inode
);
201 spin_unlock(&files
->file_lock
);
202 put_files_struct(files
);
207 static inline bool can_revoke_vma(struct vm_area_struct
*vma
,
208 struct inode
*inode
, struct file
*to_exclude
)
210 struct file
*file
= vma
->vm_file
;
212 if (vma
->vm_flags
& VM_REVOKED
)
215 if (!file
|| file
== to_exclude
)
218 return file
->f_path
.dentry
->d_inode
== inode
;
221 static int __revoke_break_cow(struct task_struct
*tsk
, struct inode
*inode
,
222 struct file
*to_exclude
)
224 struct mm_struct
*mm
= tsk
->mm
;
225 struct vm_area_struct
*vma
;
228 down_read(&mm
->mmap_sem
);
229 for (vma
= mm
->mmap
; vma
!= NULL
; vma
= vma
->vm_next
) {
232 if (vma
->vm_flags
& VM_SHARED
)
235 if (!can_revoke_vma(vma
, inode
, to_exclude
))
238 ret
= get_user_pages(tsk
, tsk
->mm
, vma
->vm_start
,
239 vma_pages(vma
), 1, 1, NULL
, NULL
);
245 unlink_file_vma(vma
);
249 up_read(&mm
->mmap_sem
);
253 static int revoke_break_cow(struct fileset
*fset
, struct inode
*inode
,
254 struct file
*to_exclude
)
259 for (i
= 0; i
< fset
->end
; i
++) {
260 struct revokefs_inode_info
*info
;
263 this = fset
->files
[i
];
264 info
= revokefs_i(this->f_dentry
->d_inode
);
266 err
= __revoke_break_cow(info
->owner
, inode
, to_exclude
);
274 * LOCKING: down_write(&mm->mmap_sem)
275 * -> spin_lock(&mapping->i_mmap_lock)
277 static int revoke_vma(struct vm_area_struct
*vma
, struct zap_details
*details
)
279 unsigned long restart_addr
, start_addr
, end_addr
;
282 start_addr
= vma
->vm_start
;
283 end_addr
= vma
->vm_end
;
286 restart_addr
= zap_page_range(vma
, start_addr
, end_addr
- start_addr
,
289 need_break
= need_resched() || need_lockbreak(details
->i_mmap_lock
);
293 if (restart_addr
< end_addr
) {
294 start_addr
= restart_addr
;
297 vma
->vm_flags
|= VM_REVOKED
;
301 spin_unlock(details
->i_mmap_lock
);
303 spin_lock(details
->i_mmap_lock
);
308 * LOCKING: spin_lock(&mapping->i_mmap_lock)
310 static int revoke_mm(struct mm_struct
*mm
, struct address_space
*mapping
,
311 struct file
*to_exclude
)
313 struct vm_area_struct
*vma
;
314 struct zap_details details
;
317 details
.i_mmap_lock
= &mapping
->i_mmap_lock
;
320 * If ->mmap_sem is under contention, we continue scanning other
321 * mms and try again later.
323 if (!down_write_trylock(&mm
->mmap_sem
)) {
327 for (vma
= mm
->mmap
; vma
!= NULL
; vma
= vma
->vm_next
) {
328 if (!(vma
->vm_flags
& VM_SHARED
))
331 if (!can_revoke_vma(vma
, mapping
->host
, to_exclude
))
334 err
= revoke_vma(vma
, &details
);
338 __unlink_file_vma(vma
);
342 up_write(&mm
->mmap_sem
);
348 * LOCKING: spin_lock(&mapping->i_mmap_lock)
350 static void revoke_mapping_tree(struct address_space
*mapping
,
351 struct file
*to_exclude
)
353 struct vm_area_struct
*vma
;
354 struct prio_tree_iter iter
;
358 vma_prio_tree_foreach(vma
, &iter
, &mapping
->i_mmap
, 0, ULONG_MAX
) {
361 if (!(vma
->vm_flags
& VM_SHARED
))
364 if (likely(!can_revoke_vma(vma
, mapping
->host
, to_exclude
)))
367 err
= revoke_mm(vma
->vm_mm
, mapping
, to_exclude
);
380 * LOCKING: spin_lock(&mapping->i_mmap_lock)
382 static void revoke_mapping_list(struct address_space
*mapping
,
383 struct file
*to_exclude
)
385 struct vm_area_struct
*vma
;
389 list_for_each_entry(vma
, &mapping
->i_mmap_nonlinear
, shared
.vm_set
.list
) {
392 if (likely(!can_revoke_vma(vma
, mapping
->host
, to_exclude
)))
395 err
= revoke_mm(vma
->vm_mm
, mapping
, to_exclude
);
396 if (err
== -EAGAIN
) {
409 static void revoke_mapping(struct address_space
*mapping
, struct file
*to_exclude
)
411 spin_lock(&mapping
->i_mmap_lock
);
412 if (unlikely(!prio_tree_empty(&mapping
->i_mmap
)))
413 revoke_mapping_tree(mapping
, to_exclude
);
414 if (unlikely(!list_empty(&mapping
->i_mmap_nonlinear
)))
415 revoke_mapping_list(mapping
, to_exclude
);
416 spin_unlock(&mapping
->i_mmap_lock
);
419 static void restore_file(struct revokefs_inode_info
*info
)
421 struct files_struct
*files
;
423 files
= get_files_struct(info
->owner
);
428 spin_lock(&files
->file_lock
);
429 fdt
= files_fdtable(files
);
431 filp
= fdt
->fd
[info
->fd
];
435 rcu_assign_pointer(fdt
->fd
[info
->fd
], info
->file
);
436 FD_SET(info
->fd
, fdt
->close_on_exec
);
437 spin_unlock(&files
->file_lock
);
438 put_files_struct(files
);
440 put_task_struct(info
->owner
);
441 info
->owner
= NULL
; /* To avoid double-restore. */
444 static void restore_files(struct revoke_details
*details
)
448 for (i
= details
->restore_start
; i
< details
->fset
->end
; i
++) {
449 struct revokefs_inode_info
*info
;
452 filp
= details
->fset
->files
[i
];
453 info
= revokefs_i(filp
->f_dentry
->d_inode
);
459 static int revoke_files(struct revoke_details
*details
)
464 for (i
= 0; i
< details
->fset
->end
; i
++) {
465 struct revokefs_inode_info
*info
;
466 struct file
*this, *filp
;
469 this = details
->fset
->files
[i
];
470 inode
= this->f_dentry
->d_inode
;
471 info
= revokefs_i(inode
);
474 * Increase count before attempting to close file as
475 * an partially closed file can no longer be restored.
477 details
->restore_start
++;
479 err
= filp
->f_op
->revoke(filp
, inode
->i_mapping
);
480 put_task_struct(info
->owner
);
481 info
->owner
= NULL
; /* To avoid restoring closed file. */
490 * Returns the maximum number of file descriptors pointing to an inode.
492 * LOCKING: read_lock(&tasklist_lock)
494 static unsigned long inode_fds(struct inode
*inode
, struct file
*to_exclude
)
496 struct task_struct
*g
, *p
;
497 unsigned long nr_fds
= 0;
499 do_each_thread(g
, p
) {
500 struct files_struct
*files
;
504 files
= get_files_struct(p
);
508 spin_lock(&files
->file_lock
);
509 fdt
= files_fdtable(files
);
510 for (fd
= 0; fd
< fdt
->max_fds
; fd
++) {
513 file
= fcheck_files(files
, fd
);
514 if (can_revoke_file(file
, inode
, to_exclude
)) {
515 nr_fds
+= fdt
->max_fds
;
519 spin_unlock(&files
->file_lock
);
520 put_files_struct(files
);
522 while_each_thread(g
, p
);
526 static struct fileset
*__alloc_revoke_fset(unsigned long size
)
528 struct fileset
*fset
;
531 fset
= alloc_fset(size
);
535 for (i
= 0; i
< fset
->nr
; i
++) {
538 filp
= get_revoked_file();
542 fset
->files
[i
] = filp
;
550 static int do_revoke(struct inode
*inode
, struct file
*to_exclude
)
552 struct revoke_details details
;
553 struct fileset
*fset
= NULL
;
554 struct task_struct
*g
, *p
;
555 unsigned long nr_fds
;
558 if (current
->fsuid
!= inode
->i_uid
&& !capable(CAP_FOWNER
)) {
564 if (signal_pending(current
)) {
569 read_lock(&tasklist_lock
);
570 nr_fds
= inode_fds(inode
, to_exclude
);
571 read_unlock(&tasklist_lock
);
577 * Pre-allocate memory because the first pass is done under
580 fset
= __alloc_revoke_fset(nr_fds
);
586 read_lock(&tasklist_lock
);
589 * If someone forked while we were allocating memory, try again.
591 if (inode_fds(inode
, to_exclude
) > fset
->nr
) {
592 read_unlock(&tasklist_lock
);
598 details
.restore_start
= 0;
601 * First revoke the descriptors. After we are done, no one can start
602 * new operations on them.
604 do_each_thread(g
, p
) {
605 err
= revoke_fds(p
, inode
, to_exclude
, fset
);
609 while_each_thread(g
, p
);
611 read_unlock(&tasklist_lock
);
617 * Take down shared memory mappings.
619 revoke_mapping(inode
->i_mapping
, to_exclude
);
622 * Break COW for private mappings.
624 err
= revoke_break_cow(fset
, inode
, to_exclude
);
629 * Now, revoke the files for good.
631 err
= revoke_files(&details
);
641 restore_files(&details
);
645 asmlinkage
long sys_revokeat(int dfd
, const char __user
* filename
)
650 err
= __user_walk_fd(dfd
, filename
, 0, &nd
);
652 err
= do_revoke(nd
.dentry
->d_inode
, NULL
);
658 asmlinkage
long sys_frevoke(unsigned int fd
)
660 struct file
*file
= fget(fd
);
664 err
= do_revoke(file
->f_dentry
->d_inode
, file
);
670 int generic_file_revoke(struct file
*file
, struct address_space
*new_mapping
)
672 struct address_space
*mapping
= file
->f_mapping
;
676 * Flush pending writes.
678 err
= do_fsync(file
, 1);
682 file
->f_mapping
= new_mapping
;
685 * Make pending reads fail.
687 err
= invalidate_inode_pages2(mapping
);
692 EXPORT_SYMBOL(generic_file_revoke
);
695 * Filesystem for revoked files.
698 static struct inode
*revokefs_alloc_inode(struct super_block
*sb
)
700 struct revokefs_inode_info
*info
;
702 info
= kmem_cache_alloc(revokefs_inode_cache
, GFP_KERNEL
);
706 return &info
->vfs_inode
;
709 static void revokefs_destroy_inode(struct inode
*inode
)
711 kmem_cache_free(revokefs_inode_cache
, revokefs_i(inode
));
714 static struct super_operations revokefs_super_ops
= {
715 .alloc_inode
= revokefs_alloc_inode
,
716 .destroy_inode
= revokefs_destroy_inode
,
717 .drop_inode
= generic_delete_inode
,
720 static int revokefs_get_sb(struct file_system_type
*fs_type
,
721 int flags
, const char *dev_name
, void *data
,
722 struct vfsmount
*mnt
)
724 return get_sb_pseudo(fs_type
, "revoke:", &revokefs_super_ops
,
725 REVOKEFS_MAGIC
, mnt
);
728 static struct file_system_type revokefs_fs_type
= {
730 .get_sb
= revokefs_get_sb
,
731 .kill_sb
= kill_anon_super
734 static void revokefs_init_inode(struct kmem_cache
*cache
, void *obj
)
736 struct revokefs_inode_info
*info
= obj
;
739 inode_init_once(&info
->vfs_inode
);
742 static int __init
revokefs_init(void)
746 revokefs_inode_cache
=
747 kmem_cache_create("revokefs_inode_cache",
748 sizeof(struct revokefs_inode_info
),
750 (SLAB_HWCACHE_ALIGN
| SLAB_RECLAIM_ACCOUNT
|
751 SLAB_MEM_SPREAD
), revokefs_init_inode
);
752 if (!revokefs_inode_cache
)
755 err
= register_filesystem(&revokefs_fs_type
);
759 revokefs_mnt
= kern_mount(&revokefs_fs_type
);
760 if (IS_ERR(revokefs_mnt
)) {
761 err
= PTR_ERR(revokefs_mnt
);
767 unregister_filesystem(&revokefs_fs_type
);
769 kmem_cache_destroy(revokefs_inode_cache
);
773 late_initcall(revokefs_init
);