Add linux-next specific files for 20110716
[linux-2.6/next.git] / fs / namei.c
blobf6d52d26281b779604757ca3491a4c60b35a755f
1 /*
2 * linux/fs/namei.c
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
7 /*
8 * Some corrections by tytso.
9 */
11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
12 * lookup logic.
14 /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
17 #include <linux/init.h>
18 #include <linux/module.h>
19 #include <linux/slab.h>
20 #include <linux/fs.h>
21 #include <linux/namei.h>
22 #include <linux/pagemap.h>
23 #include <linux/fsnotify.h>
24 #include <linux/personality.h>
25 #include <linux/security.h>
26 #include <linux/ima.h>
27 #include <linux/syscalls.h>
28 #include <linux/mount.h>
29 #include <linux/audit.h>
30 #include <linux/capability.h>
31 #include <linux/file.h>
32 #include <linux/fcntl.h>
33 #include <linux/device_cgroup.h>
34 #include <linux/fs_struct.h>
35 #include <asm/uaccess.h>
37 #include "internal.h"
39 /* [Feb-1997 T. Schoebel-Theuer]
40 * Fundamental changes in the pathname lookup mechanisms (namei)
41 * were necessary because of omirr. The reason is that omirr needs
42 * to know the _real_ pathname, not the user-supplied one, in case
43 * of symlinks (and also when transname replacements occur).
45 * The new code replaces the old recursive symlink resolution with
46 * an iterative one (in case of non-nested symlink chains). It does
47 * this with calls to <fs>_follow_link().
48 * As a side effect, dir_namei(), _namei() and follow_link() are now
49 * replaced with a single function lookup_dentry() that can handle all
50 * the special cases of the former code.
52 * With the new dcache, the pathname is stored at each inode, at least as
53 * long as the refcount of the inode is positive. As a side effect, the
54 * size of the dcache depends on the inode cache and thus is dynamic.
56 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
57 * resolution to correspond with current state of the code.
59 * Note that the symlink resolution is not *completely* iterative.
60 * There is still a significant amount of tail- and mid- recursion in
61 * the algorithm. Also, note that <fs>_readlink() is not used in
62 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
63 * may return different results than <fs>_follow_link(). Many virtual
64 * filesystems (including /proc) exhibit this behavior.
67 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
68 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
69 * and the name already exists in form of a symlink, try to create the new
70 * name indicated by the symlink. The old code always complained that the
71 * name already exists, due to not following the symlink even if its target
72 * is nonexistent. The new semantics affects also mknod() and link() when
73 * the name is a symlink pointing to a non-existent name.
75 * I don't know which semantics is the right one, since I have no access
76 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
77 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
78 * "old" one. Personally, I think the new semantics is much more logical.
79 * Note that "ln old new" where "new" is a symlink pointing to a non-existing
80 * file does succeed in both HP-UX and SunOs, but not in Solaris
81 * and in the old Linux semantics.
84 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
85 * semantics. See the comments in "open_namei" and "do_link" below.
87 * [10-Sep-98 Alan Modra] Another symlink change.
90 /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
91 * inside the path - always follow.
92 * in the last component in creation/removal/renaming - never follow.
93 * if LOOKUP_FOLLOW passed - follow.
94 * if the pathname has trailing slashes - follow.
95 * otherwise - don't follow.
96 * (applied in that order).
98 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
99 * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
100 * During the 2.4 we need to fix the userland stuff depending on it -
101 * hopefully we will be able to get rid of that wart in 2.5. So far only
102 * XEmacs seems to be relying on it...
105 * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland)
106 * implemented. Let's see if raised priority of ->s_vfs_rename_mutex gives
107 * any extra contention...
110 /* In order to reduce some races, while at the same time doing additional
111 * checking and hopefully speeding things up, we copy filenames to the
112 * kernel data space before using them..
114 * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
115 * PATH_MAX includes the nul terminator --RR.
117 static int do_getname(const char __user *filename, char *page)
119 int retval;
120 unsigned long len = PATH_MAX;
122 if (!segment_eq(get_fs(), KERNEL_DS)) {
123 if ((unsigned long) filename >= TASK_SIZE)
124 return -EFAULT;
125 if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
126 len = TASK_SIZE - (unsigned long) filename;
129 retval = strncpy_from_user(page, filename, len);
130 if (retval > 0) {
131 if (retval < len)
132 return 0;
133 return -ENAMETOOLONG;
134 } else if (!retval)
135 retval = -ENOENT;
136 return retval;
139 static char *getname_flags(const char __user * filename, int flags)
141 char *tmp, *result;
143 result = ERR_PTR(-ENOMEM);
144 tmp = __getname();
145 if (tmp) {
146 int retval = do_getname(filename, tmp);
148 result = tmp;
149 if (retval < 0) {
150 if (retval != -ENOENT || !(flags & LOOKUP_EMPTY)) {
151 __putname(tmp);
152 result = ERR_PTR(retval);
156 audit_getname(result);
157 return result;
160 char *getname(const char __user * filename)
162 return getname_flags(filename, 0);
165 #ifdef CONFIG_AUDITSYSCALL
166 void putname(const char *name)
168 if (unlikely(!audit_dummy_context()))
169 audit_putname(name);
170 else
171 __putname(name);
173 EXPORT_SYMBOL(putname);
174 #endif
177 * This does basic POSIX ACL permission checking
179 static int acl_permission_check(struct inode *inode, int mask)
181 int (*check_acl)(struct inode *inode, int mask);
182 unsigned int mode = inode->i_mode;
184 mask &= MAY_READ | MAY_WRITE | MAY_EXEC | MAY_NOT_BLOCK;
186 if (current_user_ns() != inode_userns(inode))
187 goto other_perms;
189 if (current_fsuid() == inode->i_uid)
190 mode >>= 6;
191 else {
192 check_acl = inode->i_op->check_acl;
193 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
194 int error = check_acl(inode, mask);
195 if (error != -EAGAIN)
196 return error;
199 if (in_group_p(inode->i_gid))
200 mode >>= 3;
203 other_perms:
205 * If the DACs are ok we don't need any capability check.
207 if ((mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
208 return 0;
209 return -EACCES;
213 * generic_permission - check for access rights on a Posix-like filesystem
214 * @inode: inode to check access rights for
215 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
216 * @flags: IPERM_FLAG_ flags.
218 * Used to check for read/write/execute permissions on a file.
219 * We use "fsuid" for this, letting us set arbitrary permissions
220 * for filesystem access without changing the "normal" uids which
221 * are used for other things.
223 * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
224 * request cannot be satisfied (eg. requires blocking or too much complexity).
225 * It would then be called again in ref-walk mode.
227 int generic_permission(struct inode *inode, int mask)
229 int ret;
232 * Do the basic POSIX ACL permission checks.
234 ret = acl_permission_check(inode, mask);
235 if (ret != -EACCES)
236 return ret;
238 if (S_ISDIR(inode->i_mode)) {
239 /* DACs are overridable for directories */
240 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
241 return 0;
242 if (!(mask & MAY_WRITE))
243 if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
244 return 0;
245 return -EACCES;
248 * Read/write DACs are always overridable.
249 * Executable DACs are overridable when there is
250 * at least one exec bit set.
252 if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
253 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
254 return 0;
257 * Searching includes executable on directories, else just read.
259 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
260 if (mask == MAY_READ)
261 if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
262 return 0;
264 return -EACCES;
268 * inode_permission - check for access rights to a given inode
269 * @inode: inode to check permission on
270 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
272 * Used to check for read/write/execute permissions on an inode.
273 * We use "fsuid" for this, letting us set arbitrary permissions
274 * for filesystem access without changing the "normal" uids which
275 * are used for other things.
277 int inode_permission(struct inode *inode, int mask)
279 int retval;
281 if (mask & MAY_WRITE) {
282 umode_t mode = inode->i_mode;
285 * Nobody gets write access to a read-only fs.
287 if (IS_RDONLY(inode) &&
288 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
289 return -EROFS;
292 * Nobody gets write access to an immutable file.
294 if (IS_IMMUTABLE(inode))
295 return -EACCES;
298 if (inode->i_op->permission)
299 retval = inode->i_op->permission(inode, mask);
300 else
301 retval = generic_permission(inode, mask);
303 if (retval)
304 return retval;
306 retval = devcgroup_inode_permission(inode, mask);
307 if (retval)
308 return retval;
310 return security_inode_permission(inode, mask);
314 * path_get - get a reference to a path
315 * @path: path to get the reference to
317 * Given a path increment the reference count to the dentry and the vfsmount.
319 void path_get(struct path *path)
321 mntget(path->mnt);
322 dget(path->dentry);
324 EXPORT_SYMBOL(path_get);
327 * path_put - put a reference to a path
328 * @path: path to put the reference to
330 * Given a path decrement the reference count to the dentry and the vfsmount.
332 void path_put(struct path *path)
334 dput(path->dentry);
335 mntput(path->mnt);
337 EXPORT_SYMBOL(path_put);
340 * Path walking has 2 modes, rcu-walk and ref-walk (see
341 * Documentation/filesystems/path-lookup.txt). In situations when we can't
342 * continue in RCU mode, we attempt to drop out of rcu-walk mode and grab
343 * normal reference counts on dentries and vfsmounts to transition to rcu-walk
344 * mode. Refcounts are grabbed at the last known good point before rcu-walk
345 * got stuck, so ref-walk may continue from there. If this is not successful
346 * (eg. a seqcount has changed), then failure is returned and it's up to caller
347 * to restart the path walk from the beginning in ref-walk mode.
351 * unlazy_walk - try to switch to ref-walk mode.
352 * @nd: nameidata pathwalk data
353 * @dentry: child of nd->path.dentry or NULL
354 * Returns: 0 on success, -ECHILD on failure
356 * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry
357 * for ref-walk mode. @dentry must be a path found by a do_lookup call on
358 * @nd or NULL. Must be called from rcu-walk context.
360 static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
362 struct fs_struct *fs = current->fs;
363 struct dentry *parent = nd->path.dentry;
364 int want_root = 0;
366 BUG_ON(!(nd->flags & LOOKUP_RCU));
367 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
368 want_root = 1;
369 spin_lock(&fs->lock);
370 if (nd->root.mnt != fs->root.mnt ||
371 nd->root.dentry != fs->root.dentry)
372 goto err_root;
374 spin_lock(&parent->d_lock);
375 if (!dentry) {
376 if (!__d_rcu_to_refcount(parent, nd->seq))
377 goto err_parent;
378 BUG_ON(nd->inode != parent->d_inode);
379 } else {
380 if (dentry->d_parent != parent)
381 goto err_parent;
382 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
383 if (!__d_rcu_to_refcount(dentry, nd->seq))
384 goto err_child;
386 * If the sequence check on the child dentry passed, then
387 * the child has not been removed from its parent. This
388 * means the parent dentry must be valid and able to take
389 * a reference at this point.
391 BUG_ON(!IS_ROOT(dentry) && dentry->d_parent != parent);
392 BUG_ON(!parent->d_count);
393 parent->d_count++;
394 spin_unlock(&dentry->d_lock);
396 spin_unlock(&parent->d_lock);
397 if (want_root) {
398 path_get(&nd->root);
399 spin_unlock(&fs->lock);
401 mntget(nd->path.mnt);
403 rcu_read_unlock();
404 br_read_unlock(vfsmount_lock);
405 nd->flags &= ~LOOKUP_RCU;
406 return 0;
408 err_child:
409 spin_unlock(&dentry->d_lock);
410 err_parent:
411 spin_unlock(&parent->d_lock);
412 err_root:
413 if (want_root)
414 spin_unlock(&fs->lock);
415 return -ECHILD;
419 * release_open_intent - free up open intent resources
420 * @nd: pointer to nameidata
422 void release_open_intent(struct nameidata *nd)
424 struct file *file = nd->intent.open.file;
426 if (file && !IS_ERR(file)) {
427 if (file->f_path.dentry == NULL)
428 put_filp(file);
429 else
430 fput(file);
434 static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
436 return dentry->d_op->d_revalidate(dentry, nd);
440 * complete_walk - successful completion of path walk
441 * @nd: pointer nameidata
443 * If we had been in RCU mode, drop out of it and legitimize nd->path.
444 * Revalidate the final result, unless we'd already done that during
445 * the path walk or the filesystem doesn't ask for it. Return 0 on
446 * success, -error on failure. In case of failure caller does not
447 * need to drop nd->path.
449 static int complete_walk(struct nameidata *nd)
451 struct dentry *dentry = nd->path.dentry;
452 int status;
454 if (nd->flags & LOOKUP_RCU) {
455 nd->flags &= ~LOOKUP_RCU;
456 if (!(nd->flags & LOOKUP_ROOT))
457 nd->root.mnt = NULL;
458 spin_lock(&dentry->d_lock);
459 if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) {
460 spin_unlock(&dentry->d_lock);
461 rcu_read_unlock();
462 br_read_unlock(vfsmount_lock);
463 return -ECHILD;
465 BUG_ON(nd->inode != dentry->d_inode);
466 spin_unlock(&dentry->d_lock);
467 mntget(nd->path.mnt);
468 rcu_read_unlock();
469 br_read_unlock(vfsmount_lock);
472 if (likely(!(nd->flags & LOOKUP_JUMPED)))
473 return 0;
475 if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
476 return 0;
478 if (likely(!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)))
479 return 0;
481 /* Note: we do not d_invalidate() */
482 status = d_revalidate(dentry, nd);
483 if (status > 0)
484 return 0;
486 if (!status)
487 status = -ESTALE;
489 path_put(&nd->path);
490 return status;
493 static __always_inline void set_root(struct nameidata *nd)
495 if (!nd->root.mnt)
496 get_fs_root(current->fs, &nd->root);
499 static int link_path_walk(const char *, struct nameidata *);
501 static __always_inline void set_root_rcu(struct nameidata *nd)
503 if (!nd->root.mnt) {
504 struct fs_struct *fs = current->fs;
505 unsigned seq;
507 do {
508 seq = read_seqcount_begin(&fs->seq);
509 nd->root = fs->root;
510 nd->seq = __read_seqcount_begin(&nd->root.dentry->d_seq);
511 } while (read_seqcount_retry(&fs->seq, seq));
515 static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
517 int ret;
519 if (IS_ERR(link))
520 goto fail;
522 if (*link == '/') {
523 set_root(nd);
524 path_put(&nd->path);
525 nd->path = nd->root;
526 path_get(&nd->root);
527 nd->flags |= LOOKUP_JUMPED;
529 nd->inode = nd->path.dentry->d_inode;
531 ret = link_path_walk(link, nd);
532 return ret;
533 fail:
534 path_put(&nd->path);
535 return PTR_ERR(link);
538 static void path_put_conditional(struct path *path, struct nameidata *nd)
540 dput(path->dentry);
541 if (path->mnt != nd->path.mnt)
542 mntput(path->mnt);
545 static inline void path_to_nameidata(const struct path *path,
546 struct nameidata *nd)
548 if (!(nd->flags & LOOKUP_RCU)) {
549 dput(nd->path.dentry);
550 if (nd->path.mnt != path->mnt)
551 mntput(nd->path.mnt);
553 nd->path.mnt = path->mnt;
554 nd->path.dentry = path->dentry;
557 static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
559 struct inode *inode = link->dentry->d_inode;
560 if (!IS_ERR(cookie) && inode->i_op->put_link)
561 inode->i_op->put_link(link->dentry, nd, cookie);
562 path_put(link);
565 static __always_inline int
566 follow_link(struct path *link, struct nameidata *nd, void **p)
568 int error;
569 struct dentry *dentry = link->dentry;
571 BUG_ON(nd->flags & LOOKUP_RCU);
573 if (link->mnt == nd->path.mnt)
574 mntget(link->mnt);
576 if (unlikely(current->total_link_count >= 40)) {
577 *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */
578 path_put(&nd->path);
579 return -ELOOP;
581 cond_resched();
582 current->total_link_count++;
584 touch_atime(link->mnt, dentry);
585 nd_set_link(nd, NULL);
587 error = security_inode_follow_link(link->dentry, nd);
588 if (error) {
589 *p = ERR_PTR(error); /* no ->put_link(), please */
590 path_put(&nd->path);
591 return error;
594 nd->last_type = LAST_BIND;
595 *p = dentry->d_inode->i_op->follow_link(dentry, nd);
596 error = PTR_ERR(*p);
597 if (!IS_ERR(*p)) {
598 char *s = nd_get_link(nd);
599 error = 0;
600 if (s)
601 error = __vfs_follow_link(nd, s);
602 else if (nd->last_type == LAST_BIND) {
603 nd->flags |= LOOKUP_JUMPED;
604 nd->inode = nd->path.dentry->d_inode;
605 if (nd->inode->i_op->follow_link) {
606 /* stepped on a _really_ weird one */
607 path_put(&nd->path);
608 error = -ELOOP;
612 return error;
615 static int follow_up_rcu(struct path *path)
617 struct vfsmount *parent;
618 struct dentry *mountpoint;
620 parent = path->mnt->mnt_parent;
621 if (parent == path->mnt)
622 return 0;
623 mountpoint = path->mnt->mnt_mountpoint;
624 path->dentry = mountpoint;
625 path->mnt = parent;
626 return 1;
629 int follow_up(struct path *path)
631 struct vfsmount *parent;
632 struct dentry *mountpoint;
634 br_read_lock(vfsmount_lock);
635 parent = path->mnt->mnt_parent;
636 if (parent == path->mnt) {
637 br_read_unlock(vfsmount_lock);
638 return 0;
640 mntget(parent);
641 mountpoint = dget(path->mnt->mnt_mountpoint);
642 br_read_unlock(vfsmount_lock);
643 dput(path->dentry);
644 path->dentry = mountpoint;
645 mntput(path->mnt);
646 path->mnt = parent;
647 return 1;
651 * Perform an automount
652 * - return -EISDIR to tell follow_managed() to stop and return the path we
653 * were called with.
655 static int follow_automount(struct path *path, unsigned flags,
656 bool *need_mntput)
658 struct vfsmount *mnt;
659 int err;
661 if (!path->dentry->d_op || !path->dentry->d_op->d_automount)
662 return -EREMOTE;
664 /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT
665 * and this is the terminal part of the path.
667 if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_CONTINUE))
668 return -EISDIR; /* we actually want to stop here */
670 /* We want to mount if someone is trying to open/create a file of any
671 * type under the mountpoint, wants to traverse through the mountpoint
672 * or wants to open the mounted directory.
674 * We don't want to mount if someone's just doing a stat and they've
675 * set AT_SYMLINK_NOFOLLOW - unless they're stat'ing a directory and
676 * appended a '/' to the name.
678 if (!(flags & LOOKUP_FOLLOW) &&
679 !(flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY |
680 LOOKUP_OPEN | LOOKUP_CREATE)))
681 return -EISDIR;
683 current->total_link_count++;
684 if (current->total_link_count >= 40)
685 return -ELOOP;
687 mnt = path->dentry->d_op->d_automount(path);
688 if (IS_ERR(mnt)) {
690 * The filesystem is allowed to return -EISDIR here to indicate
691 * it doesn't want to automount. For instance, autofs would do
692 * this so that its userspace daemon can mount on this dentry.
694 * However, we can only permit this if it's a terminal point in
695 * the path being looked up; if it wasn't then the remainder of
696 * the path is inaccessible and we should say so.
698 if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_CONTINUE))
699 return -EREMOTE;
700 return PTR_ERR(mnt);
703 if (!mnt) /* mount collision */
704 return 0;
706 if (!*need_mntput) {
707 /* lock_mount() may release path->mnt on error */
708 mntget(path->mnt);
709 *need_mntput = true;
711 err = finish_automount(mnt, path);
713 switch (err) {
714 case -EBUSY:
715 /* Someone else made a mount here whilst we were busy */
716 return 0;
717 case 0:
718 path_put(path);
719 path->mnt = mnt;
720 path->dentry = dget(mnt->mnt_root);
721 return 0;
722 default:
723 return err;
729 * Handle a dentry that is managed in some way.
730 * - Flagged for transit management (autofs)
731 * - Flagged as mountpoint
732 * - Flagged as automount point
734 * This may only be called in refwalk mode.
736 * Serialization is taken care of in namespace.c
738 static int follow_managed(struct path *path, unsigned flags)
740 struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
741 unsigned managed;
742 bool need_mntput = false;
743 int ret = 0;
745 /* Given that we're not holding a lock here, we retain the value in a
746 * local variable for each dentry as we look at it so that we don't see
747 * the components of that value change under us */
748 while (managed = ACCESS_ONCE(path->dentry->d_flags),
749 managed &= DCACHE_MANAGED_DENTRY,
750 unlikely(managed != 0)) {
751 /* Allow the filesystem to manage the transit without i_mutex
752 * being held. */
753 if (managed & DCACHE_MANAGE_TRANSIT) {
754 BUG_ON(!path->dentry->d_op);
755 BUG_ON(!path->dentry->d_op->d_manage);
756 ret = path->dentry->d_op->d_manage(path->dentry, false);
757 if (ret < 0)
758 break;
761 /* Transit to a mounted filesystem. */
762 if (managed & DCACHE_MOUNTED) {
763 struct vfsmount *mounted = lookup_mnt(path);
764 if (mounted) {
765 dput(path->dentry);
766 if (need_mntput)
767 mntput(path->mnt);
768 path->mnt = mounted;
769 path->dentry = dget(mounted->mnt_root);
770 need_mntput = true;
771 continue;
774 /* Something is mounted on this dentry in another
775 * namespace and/or whatever was mounted there in this
776 * namespace got unmounted before we managed to get the
777 * vfsmount_lock */
780 /* Handle an automount point */
781 if (managed & DCACHE_NEED_AUTOMOUNT) {
782 ret = follow_automount(path, flags, &need_mntput);
783 if (ret < 0)
784 break;
785 continue;
788 /* We didn't change the current path point */
789 break;
792 if (need_mntput && path->mnt == mnt)
793 mntput(path->mnt);
794 if (ret == -EISDIR)
795 ret = 0;
796 return ret;
799 int follow_down_one(struct path *path)
801 struct vfsmount *mounted;
803 mounted = lookup_mnt(path);
804 if (mounted) {
805 dput(path->dentry);
806 mntput(path->mnt);
807 path->mnt = mounted;
808 path->dentry = dget(mounted->mnt_root);
809 return 1;
811 return 0;
814 static inline bool managed_dentry_might_block(struct dentry *dentry)
816 return (dentry->d_flags & DCACHE_MANAGE_TRANSIT &&
817 dentry->d_op->d_manage(dentry, true) < 0);
821 * Try to skip to top of mountpoint pile in rcuwalk mode. Fail if
822 * we meet a managed dentry that would need blocking.
824 static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
825 struct inode **inode)
827 for (;;) {
828 struct vfsmount *mounted;
830 * Don't forget we might have a non-mountpoint managed dentry
831 * that wants to block transit.
833 *inode = path->dentry->d_inode;
834 if (unlikely(managed_dentry_might_block(path->dentry)))
835 return false;
837 if (!d_mountpoint(path->dentry))
838 break;
840 mounted = __lookup_mnt(path->mnt, path->dentry, 1);
841 if (!mounted)
842 break;
843 path->mnt = mounted;
844 path->dentry = mounted->mnt_root;
845 nd->seq = read_seqcount_begin(&path->dentry->d_seq);
847 return true;
850 static void follow_mount_rcu(struct nameidata *nd)
852 while (d_mountpoint(nd->path.dentry)) {
853 struct vfsmount *mounted;
854 mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1);
855 if (!mounted)
856 break;
857 nd->path.mnt = mounted;
858 nd->path.dentry = mounted->mnt_root;
859 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
863 static int follow_dotdot_rcu(struct nameidata *nd)
865 set_root_rcu(nd);
867 while (1) {
868 if (nd->path.dentry == nd->root.dentry &&
869 nd->path.mnt == nd->root.mnt) {
870 break;
872 if (nd->path.dentry != nd->path.mnt->mnt_root) {
873 struct dentry *old = nd->path.dentry;
874 struct dentry *parent = old->d_parent;
875 unsigned seq;
877 seq = read_seqcount_begin(&parent->d_seq);
878 if (read_seqcount_retry(&old->d_seq, nd->seq))
879 goto failed;
880 nd->path.dentry = parent;
881 nd->seq = seq;
882 break;
884 if (!follow_up_rcu(&nd->path))
885 break;
886 nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
888 follow_mount_rcu(nd);
889 nd->inode = nd->path.dentry->d_inode;
890 return 0;
892 failed:
893 nd->flags &= ~LOOKUP_RCU;
894 if (!(nd->flags & LOOKUP_ROOT))
895 nd->root.mnt = NULL;
896 rcu_read_unlock();
897 br_read_unlock(vfsmount_lock);
898 return -ECHILD;
902 * Follow down to the covering mount currently visible to userspace. At each
903 * point, the filesystem owning that dentry may be queried as to whether the
904 * caller is permitted to proceed or not.
906 int follow_down(struct path *path)
908 unsigned managed;
909 int ret;
911 while (managed = ACCESS_ONCE(path->dentry->d_flags),
912 unlikely(managed & DCACHE_MANAGED_DENTRY)) {
913 /* Allow the filesystem to manage the transit without i_mutex
914 * being held.
916 * We indicate to the filesystem if someone is trying to mount
917 * something here. This gives autofs the chance to deny anyone
918 * other than its daemon the right to mount on its
919 * superstructure.
921 * The filesystem may sleep at this point.
923 if (managed & DCACHE_MANAGE_TRANSIT) {
924 BUG_ON(!path->dentry->d_op);
925 BUG_ON(!path->dentry->d_op->d_manage);
926 ret = path->dentry->d_op->d_manage(
927 path->dentry, false);
928 if (ret < 0)
929 return ret == -EISDIR ? 0 : ret;
932 /* Transit to a mounted filesystem. */
933 if (managed & DCACHE_MOUNTED) {
934 struct vfsmount *mounted = lookup_mnt(path);
935 if (!mounted)
936 break;
937 dput(path->dentry);
938 mntput(path->mnt);
939 path->mnt = mounted;
940 path->dentry = dget(mounted->mnt_root);
941 continue;
944 /* Don't handle automount points here */
945 break;
947 return 0;
951 * Skip to top of mountpoint pile in refwalk mode for follow_dotdot()
953 static void follow_mount(struct path *path)
955 while (d_mountpoint(path->dentry)) {
956 struct vfsmount *mounted = lookup_mnt(path);
957 if (!mounted)
958 break;
959 dput(path->dentry);
960 mntput(path->mnt);
961 path->mnt = mounted;
962 path->dentry = dget(mounted->mnt_root);
966 static void follow_dotdot(struct nameidata *nd)
968 set_root(nd);
970 while(1) {
971 struct dentry *old = nd->path.dentry;
973 if (nd->path.dentry == nd->root.dentry &&
974 nd->path.mnt == nd->root.mnt) {
975 break;
977 if (nd->path.dentry != nd->path.mnt->mnt_root) {
978 /* rare case of legitimate dget_parent()... */
979 nd->path.dentry = dget_parent(nd->path.dentry);
980 dput(old);
981 break;
983 if (!follow_up(&nd->path))
984 break;
986 follow_mount(&nd->path);
987 nd->inode = nd->path.dentry->d_inode;
991 * Allocate a dentry with name and parent, and perform a parent
992 * directory ->lookup on it. Returns the new dentry, or ERR_PTR
993 * on error. parent->d_inode->i_mutex must be held. d_lookup must
994 * have verified that no child exists while under i_mutex.
996 static struct dentry *d_alloc_and_lookup(struct dentry *parent,
997 struct qstr *name, struct nameidata *nd)
999 struct inode *inode = parent->d_inode;
1000 struct dentry *dentry;
1001 struct dentry *old;
1003 /* Don't create child dentry for a dead directory. */
1004 if (unlikely(IS_DEADDIR(inode)))
1005 return ERR_PTR(-ENOENT);
1007 dentry = d_alloc(parent, name);
1008 if (unlikely(!dentry))
1009 return ERR_PTR(-ENOMEM);
1011 old = inode->i_op->lookup(inode, dentry, nd);
1012 if (unlikely(old)) {
1013 dput(dentry);
1014 dentry = old;
1016 return dentry;
1020 * We already have a dentry, but require a lookup to be performed on the parent
1021 * directory to fill in d_inode. Returns the new dentry, or ERR_PTR on error.
1022 * parent->d_inode->i_mutex must be held. d_lookup must have verified that no
1023 * child exists while under i_mutex.
1025 static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentry,
1026 struct nameidata *nd)
1028 struct inode *inode = parent->d_inode;
1029 struct dentry *old;
1031 /* Don't create child dentry for a dead directory. */
1032 if (unlikely(IS_DEADDIR(inode)))
1033 return ERR_PTR(-ENOENT);
1035 old = inode->i_op->lookup(inode, dentry, nd);
1036 if (unlikely(old)) {
1037 dput(dentry);
1038 dentry = old;
1040 return dentry;
1044 * It's more convoluted than I'd like it to be, but... it's still fairly
1045 * small and for now I'd prefer to have fast path as straight as possible.
1046 * It _is_ time-critical.
1048 static int do_lookup(struct nameidata *nd, struct qstr *name,
1049 struct path *path, struct inode **inode)
1051 struct vfsmount *mnt = nd->path.mnt;
1052 struct dentry *dentry, *parent = nd->path.dentry;
1053 int need_reval = 1;
1054 int status = 1;
1055 int err;
1058 * Rename seqlock is not required here because in the off chance
1059 * of a false negative due to a concurrent rename, we're going to
1060 * do the non-racy lookup, below.
1062 if (nd->flags & LOOKUP_RCU) {
1063 unsigned seq;
1064 *inode = nd->inode;
1065 dentry = __d_lookup_rcu(parent, name, &seq, inode);
1066 if (!dentry)
1067 goto unlazy;
1069 /* Memory barrier in read_seqcount_begin of child is enough */
1070 if (__read_seqcount_retry(&parent->d_seq, nd->seq))
1071 return -ECHILD;
1072 nd->seq = seq;
1074 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1075 status = d_revalidate(dentry, nd);
1076 if (unlikely(status <= 0)) {
1077 if (status != -ECHILD)
1078 need_reval = 0;
1079 goto unlazy;
1082 if (unlikely(d_need_lookup(dentry)))
1083 goto unlazy;
1084 path->mnt = mnt;
1085 path->dentry = dentry;
1086 if (unlikely(!__follow_mount_rcu(nd, path, inode)))
1087 goto unlazy;
1088 if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT))
1089 goto unlazy;
1090 return 0;
1091 unlazy:
1092 if (unlazy_walk(nd, dentry))
1093 return -ECHILD;
1094 } else {
1095 dentry = __d_lookup(parent, name);
1098 if (dentry && unlikely(d_need_lookup(dentry))) {
1099 dput(dentry);
1100 dentry = NULL;
1102 retry:
1103 if (unlikely(!dentry)) {
1104 struct inode *dir = parent->d_inode;
1105 BUG_ON(nd->inode != dir);
1107 mutex_lock(&dir->i_mutex);
1108 dentry = d_lookup(parent, name);
1109 if (likely(!dentry)) {
1110 dentry = d_alloc_and_lookup(parent, name, nd);
1111 if (IS_ERR(dentry)) {
1112 mutex_unlock(&dir->i_mutex);
1113 return PTR_ERR(dentry);
1115 /* known good */
1116 need_reval = 0;
1117 status = 1;
1118 } else if (unlikely(d_need_lookup(dentry))) {
1119 dentry = d_inode_lookup(parent, dentry, nd);
1120 if (IS_ERR(dentry)) {
1121 mutex_unlock(&dir->i_mutex);
1122 return PTR_ERR(dentry);
1124 /* known good */
1125 need_reval = 0;
1126 status = 1;
1128 mutex_unlock(&dir->i_mutex);
1130 if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
1131 status = d_revalidate(dentry, nd);
1132 if (unlikely(status <= 0)) {
1133 if (status < 0) {
1134 dput(dentry);
1135 return status;
1137 if (!d_invalidate(dentry)) {
1138 dput(dentry);
1139 dentry = NULL;
1140 need_reval = 1;
1141 goto retry;
1145 path->mnt = mnt;
1146 path->dentry = dentry;
1147 err = follow_managed(path, nd->flags);
1148 if (unlikely(err < 0)) {
1149 path_put_conditional(path, nd);
1150 return err;
1152 *inode = path->dentry->d_inode;
1153 return 0;
1156 static inline int may_lookup(struct nameidata *nd)
1158 if (nd->flags & LOOKUP_RCU) {
1159 int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
1160 if (err != -ECHILD)
1161 return err;
1162 if (unlazy_walk(nd, NULL))
1163 return -ECHILD;
1165 return inode_permission(nd->inode, MAY_EXEC);
1168 static inline int handle_dots(struct nameidata *nd, int type)
1170 if (type == LAST_DOTDOT) {
1171 if (nd->flags & LOOKUP_RCU) {
1172 if (follow_dotdot_rcu(nd))
1173 return -ECHILD;
1174 } else
1175 follow_dotdot(nd);
1177 return 0;
1180 static void terminate_walk(struct nameidata *nd)
1182 if (!(nd->flags & LOOKUP_RCU)) {
1183 path_put(&nd->path);
1184 } else {
1185 nd->flags &= ~LOOKUP_RCU;
1186 if (!(nd->flags & LOOKUP_ROOT))
1187 nd->root.mnt = NULL;
1188 rcu_read_unlock();
1189 br_read_unlock(vfsmount_lock);
1193 static inline int walk_component(struct nameidata *nd, struct path *path,
1194 struct qstr *name, int type, int follow)
1196 struct inode *inode;
1197 int err;
1199 * "." and ".." are special - ".." especially so because it has
1200 * to be able to know about the current root directory and
1201 * parent relationships.
1203 if (unlikely(type != LAST_NORM))
1204 return handle_dots(nd, type);
1205 err = do_lookup(nd, name, path, &inode);
1206 if (unlikely(err)) {
1207 terminate_walk(nd);
1208 return err;
1210 if (!inode) {
1211 path_to_nameidata(path, nd);
1212 terminate_walk(nd);
1213 return -ENOENT;
1215 if (unlikely(inode->i_op->follow_link) && follow) {
1216 if (nd->flags & LOOKUP_RCU) {
1217 if (unlikely(unlazy_walk(nd, path->dentry))) {
1218 terminate_walk(nd);
1219 return -ECHILD;
1222 BUG_ON(inode != path->dentry->d_inode);
1223 return 1;
1225 path_to_nameidata(path, nd);
1226 nd->inode = inode;
1227 return 0;
1231 * This limits recursive symlink follows to 8, while
1232 * limiting consecutive symlinks to 40.
1234 * Without that kind of total limit, nasty chains of consecutive
1235 * symlinks can cause almost arbitrarily long lookups.
1237 static inline int nested_symlink(struct path *path, struct nameidata *nd)
1239 int res;
1241 if (unlikely(current->link_count >= MAX_NESTED_LINKS)) {
1242 path_put_conditional(path, nd);
1243 path_put(&nd->path);
1244 return -ELOOP;
1246 BUG_ON(nd->depth >= MAX_NESTED_LINKS);
1248 nd->depth++;
1249 current->link_count++;
1251 do {
1252 struct path link = *path;
1253 void *cookie;
1255 res = follow_link(&link, nd, &cookie);
1256 if (!res)
1257 res = walk_component(nd, path, &nd->last,
1258 nd->last_type, LOOKUP_FOLLOW);
1259 put_link(nd, &link, cookie);
1260 } while (res > 0);
1262 current->link_count--;
1263 nd->depth--;
1264 return res;
1268 * Name resolution.
1269 * This is the basic name resolution function, turning a pathname into
1270 * the final dentry. We expect 'base' to be positive and a directory.
1272 * Returns 0 and nd will have valid dentry and mnt on success.
1273 * Returns error and drops reference to input namei data on failure.
1275 static int link_path_walk(const char *name, struct nameidata *nd)
1277 struct path next;
1278 int err;
1279 unsigned int lookup_flags = nd->flags;
1281 while (*name=='/')
1282 name++;
1283 if (!*name)
1284 return 0;
1286 /* At this point we know we have a real path component. */
1287 for(;;) {
1288 unsigned long hash;
1289 struct qstr this;
1290 unsigned int c;
1291 int type;
1293 nd->flags |= LOOKUP_CONTINUE;
1295 err = may_lookup(nd);
1296 if (err)
1297 break;
1299 this.name = name;
1300 c = *(const unsigned char *)name;
1302 hash = init_name_hash();
1303 do {
1304 name++;
1305 hash = partial_name_hash(c, hash);
1306 c = *(const unsigned char *)name;
1307 } while (c && (c != '/'));
1308 this.len = name - (const char *) this.name;
1309 this.hash = end_name_hash(hash);
1311 type = LAST_NORM;
1312 if (this.name[0] == '.') switch (this.len) {
1313 case 2:
1314 if (this.name[1] == '.') {
1315 type = LAST_DOTDOT;
1316 nd->flags |= LOOKUP_JUMPED;
1318 break;
1319 case 1:
1320 type = LAST_DOT;
1322 if (likely(type == LAST_NORM)) {
1323 struct dentry *parent = nd->path.dentry;
1324 nd->flags &= ~LOOKUP_JUMPED;
1325 if (unlikely(parent->d_flags & DCACHE_OP_HASH)) {
1326 err = parent->d_op->d_hash(parent, nd->inode,
1327 &this);
1328 if (err < 0)
1329 break;
1333 /* remove trailing slashes? */
1334 if (!c)
1335 goto last_component;
1336 while (*++name == '/');
1337 if (!*name)
1338 goto last_component;
1340 err = walk_component(nd, &next, &this, type, LOOKUP_FOLLOW);
1341 if (err < 0)
1342 return err;
1344 if (err) {
1345 err = nested_symlink(&next, nd);
1346 if (err)
1347 return err;
1349 err = -ENOTDIR;
1350 if (!nd->inode->i_op->lookup)
1351 break;
1352 continue;
1353 /* here ends the main loop */
1355 last_component:
1356 /* Clear LOOKUP_CONTINUE iff it was previously unset */
1357 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
1358 nd->last = this;
1359 nd->last_type = type;
1360 return 0;
1362 terminate_walk(nd);
1363 return err;
1366 static int path_init(int dfd, const char *name, unsigned int flags,
1367 struct nameidata *nd, struct file **fp)
1369 int retval = 0;
1370 int fput_needed;
1371 struct file *file;
1373 nd->last_type = LAST_ROOT; /* if there are only slashes... */
1374 nd->flags = flags | LOOKUP_JUMPED;
1375 nd->depth = 0;
1376 if (flags & LOOKUP_ROOT) {
1377 struct inode *inode = nd->root.dentry->d_inode;
1378 if (*name) {
1379 if (!inode->i_op->lookup)
1380 return -ENOTDIR;
1381 retval = inode_permission(inode, MAY_EXEC);
1382 if (retval)
1383 return retval;
1385 nd->path = nd->root;
1386 nd->inode = inode;
1387 if (flags & LOOKUP_RCU) {
1388 br_read_lock(vfsmount_lock);
1389 rcu_read_lock();
1390 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1391 } else {
1392 path_get(&nd->path);
1394 return 0;
1397 nd->root.mnt = NULL;
1399 if (*name=='/') {
1400 if (flags & LOOKUP_RCU) {
1401 br_read_lock(vfsmount_lock);
1402 rcu_read_lock();
1403 set_root_rcu(nd);
1404 } else {
1405 set_root(nd);
1406 path_get(&nd->root);
1408 nd->path = nd->root;
1409 } else if (dfd == AT_FDCWD) {
1410 if (flags & LOOKUP_RCU) {
1411 struct fs_struct *fs = current->fs;
1412 unsigned seq;
1414 br_read_lock(vfsmount_lock);
1415 rcu_read_lock();
1417 do {
1418 seq = read_seqcount_begin(&fs->seq);
1419 nd->path = fs->pwd;
1420 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1421 } while (read_seqcount_retry(&fs->seq, seq));
1422 } else {
1423 get_fs_pwd(current->fs, &nd->path);
1425 } else {
1426 struct dentry *dentry;
1428 file = fget_raw_light(dfd, &fput_needed);
1429 retval = -EBADF;
1430 if (!file)
1431 goto out_fail;
1433 dentry = file->f_path.dentry;
1435 if (*name) {
1436 retval = -ENOTDIR;
1437 if (!S_ISDIR(dentry->d_inode->i_mode))
1438 goto fput_fail;
1440 retval = inode_permission(dentry->d_inode, MAY_EXEC);
1441 if (retval)
1442 goto fput_fail;
1445 nd->path = file->f_path;
1446 if (flags & LOOKUP_RCU) {
1447 if (fput_needed)
1448 *fp = file;
1449 nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
1450 br_read_lock(vfsmount_lock);
1451 rcu_read_lock();
1452 } else {
1453 path_get(&file->f_path);
1454 fput_light(file, fput_needed);
1458 nd->inode = nd->path.dentry->d_inode;
1459 return 0;
1461 fput_fail:
1462 fput_light(file, fput_needed);
1463 out_fail:
1464 return retval;
1467 static inline int lookup_last(struct nameidata *nd, struct path *path)
1469 if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len])
1470 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
1472 nd->flags &= ~LOOKUP_PARENT;
1473 return walk_component(nd, path, &nd->last, nd->last_type,
1474 nd->flags & LOOKUP_FOLLOW);
1477 /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1478 static int path_lookupat(int dfd, const char *name,
1479 unsigned int flags, struct nameidata *nd)
1481 struct file *base = NULL;
1482 struct path path;
1483 int err;
1486 * Path walking is largely split up into 2 different synchronisation
1487 * schemes, rcu-walk and ref-walk (explained in
1488 * Documentation/filesystems/path-lookup.txt). These share much of the
1489 * path walk code, but some things particularly setup, cleanup, and
1490 * following mounts are sufficiently divergent that functions are
1491 * duplicated. Typically there is a function foo(), and its RCU
1492 * analogue, foo_rcu().
1494 * -ECHILD is the error number of choice (just to avoid clashes) that
1495 * is returned if some aspect of an rcu-walk fails. Such an error must
1496 * be handled by restarting a traditional ref-walk (which will always
1497 * be able to complete).
1499 err = path_init(dfd, name, flags | LOOKUP_PARENT, nd, &base);
1501 if (unlikely(err))
1502 return err;
1504 current->total_link_count = 0;
1505 err = link_path_walk(name, nd);
1507 if (!err && !(flags & LOOKUP_PARENT)) {
1508 err = lookup_last(nd, &path);
1509 while (err > 0) {
1510 void *cookie;
1511 struct path link = path;
1512 nd->flags |= LOOKUP_PARENT;
1513 err = follow_link(&link, nd, &cookie);
1514 if (!err)
1515 err = lookup_last(nd, &path);
1516 put_link(nd, &link, cookie);
1520 if (!err)
1521 err = complete_walk(nd);
1523 if (!err && nd->flags & LOOKUP_DIRECTORY) {
1524 if (!nd->inode->i_op->lookup) {
1525 path_put(&nd->path);
1526 err = -ENOTDIR;
1530 if (base)
1531 fput(base);
1533 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) {
1534 path_put(&nd->root);
1535 nd->root.mnt = NULL;
1537 return err;
1540 static int do_path_lookup(int dfd, const char *name,
1541 unsigned int flags, struct nameidata *nd)
1543 int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd);
1544 if (unlikely(retval == -ECHILD))
1545 retval = path_lookupat(dfd, name, flags, nd);
1546 if (unlikely(retval == -ESTALE))
1547 retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd);
1549 if (likely(!retval)) {
1550 if (unlikely(!audit_dummy_context())) {
1551 if (nd->path.dentry && nd->inode)
1552 audit_inode(name, nd->path.dentry);
1555 return retval;
1558 int kern_path_parent(const char *name, struct nameidata *nd)
1560 return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd);
1563 int kern_path(const char *name, unsigned int flags, struct path *path)
1565 struct nameidata nd;
1566 int res = do_path_lookup(AT_FDCWD, name, flags, &nd);
1567 if (!res)
1568 *path = nd.path;
1569 return res;
1573 * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
1574 * @dentry: pointer to dentry of the base directory
1575 * @mnt: pointer to vfs mount of the base directory
1576 * @name: pointer to file name
1577 * @flags: lookup flags
1578 * @nd: pointer to nameidata
1580 int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1581 const char *name, unsigned int flags,
1582 struct nameidata *nd)
1584 nd->root.dentry = dentry;
1585 nd->root.mnt = mnt;
1586 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
1587 return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd);
1590 static struct dentry *__lookup_hash(struct qstr *name,
1591 struct dentry *base, struct nameidata *nd)
1593 struct inode *inode = base->d_inode;
1594 struct dentry *dentry;
1595 int err;
1597 err = inode_permission(inode, MAY_EXEC);
1598 if (err)
1599 return ERR_PTR(err);
1602 * Don't bother with __d_lookup: callers are for creat as
1603 * well as unlink, so a lot of the time it would cost
1604 * a double lookup.
1606 dentry = d_lookup(base, name);
1608 if (dentry && d_need_lookup(dentry)) {
1610 * __lookup_hash is called with the parent dir's i_mutex already
1611 * held, so we are good to go here.
1613 dentry = d_inode_lookup(base, dentry, nd);
1614 if (IS_ERR(dentry))
1615 return dentry;
1618 if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1619 int status = d_revalidate(dentry, nd);
1620 if (unlikely(status <= 0)) {
1622 * The dentry failed validation.
1623 * If d_revalidate returned 0 attempt to invalidate
1624 * the dentry otherwise d_revalidate is asking us
1625 * to return a fail status.
1627 if (status < 0) {
1628 dput(dentry);
1629 return ERR_PTR(status);
1630 } else if (!d_invalidate(dentry)) {
1631 dput(dentry);
1632 dentry = NULL;
1637 if (!dentry)
1638 dentry = d_alloc_and_lookup(base, name, nd);
1640 return dentry;
1644 * Restricted form of lookup. Doesn't follow links, single-component only,
1645 * needs parent already locked. Doesn't follow mounts.
1646 * SMP-safe.
1648 static struct dentry *lookup_hash(struct nameidata *nd)
1650 return __lookup_hash(&nd->last, nd->path.dentry, nd);
1654 * lookup_one_len - filesystem helper to lookup single pathname component
1655 * @name: pathname component to lookup
1656 * @base: base directory to lookup from
1657 * @len: maximum length @len should be interpreted to
1659 * Note that this routine is purely a helper for filesystem usage and should
1660 * not be called by generic code. Also note that by using this function the
1661 * nameidata argument is passed to the filesystem methods and a filesystem
1662 * using this helper needs to be prepared for that.
1664 struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1666 struct qstr this;
1667 unsigned long hash;
1668 unsigned int c;
1670 WARN_ON_ONCE(!mutex_is_locked(&base->d_inode->i_mutex));
1672 this.name = name;
1673 this.len = len;
1674 if (!len)
1675 return ERR_PTR(-EACCES);
1677 hash = init_name_hash();
1678 while (len--) {
1679 c = *(const unsigned char *)name++;
1680 if (c == '/' || c == '\0')
1681 return ERR_PTR(-EACCES);
1682 hash = partial_name_hash(c, hash);
1684 this.hash = end_name_hash(hash);
1686 * See if the low-level filesystem might want
1687 * to use its own hash..
1689 if (base->d_flags & DCACHE_OP_HASH) {
1690 int err = base->d_op->d_hash(base, base->d_inode, &this);
1691 if (err < 0)
1692 return ERR_PTR(err);
1695 return __lookup_hash(&this, base, NULL);
1698 int user_path_at(int dfd, const char __user *name, unsigned flags,
1699 struct path *path)
1701 struct nameidata nd;
1702 char *tmp = getname_flags(name, flags);
1703 int err = PTR_ERR(tmp);
1704 if (!IS_ERR(tmp)) {
1706 BUG_ON(flags & LOOKUP_PARENT);
1708 err = do_path_lookup(dfd, tmp, flags, &nd);
1709 putname(tmp);
1710 if (!err)
1711 *path = nd.path;
1713 return err;
1716 static int user_path_parent(int dfd, const char __user *path,
1717 struct nameidata *nd, char **name)
1719 char *s = getname(path);
1720 int error;
1722 if (IS_ERR(s))
1723 return PTR_ERR(s);
1725 error = do_path_lookup(dfd, s, LOOKUP_PARENT, nd);
1726 if (error)
1727 putname(s);
1728 else
1729 *name = s;
1731 return error;
1735 * It's inline, so penalty for filesystems that don't use sticky bit is
1736 * minimal.
1738 static inline int check_sticky(struct inode *dir, struct inode *inode)
1740 uid_t fsuid = current_fsuid();
1742 if (!(dir->i_mode & S_ISVTX))
1743 return 0;
1744 if (current_user_ns() != inode_userns(inode))
1745 goto other_userns;
1746 if (inode->i_uid == fsuid)
1747 return 0;
1748 if (dir->i_uid == fsuid)
1749 return 0;
1751 other_userns:
1752 return !ns_capable(inode_userns(inode), CAP_FOWNER);
1756 * Check whether we can remove a link victim from directory dir, check
1757 * whether the type of victim is right.
1758 * 1. We can't do it if dir is read-only (done in permission())
1759 * 2. We should have write and exec permissions on dir
1760 * 3. We can't remove anything from append-only dir
1761 * 4. We can't do anything with immutable dir (done in permission())
1762 * 5. If the sticky bit on dir is set we should either
1763 * a. be owner of dir, or
1764 * b. be owner of victim, or
1765 * c. have CAP_FOWNER capability
1766 * 6. If the victim is append-only or immutable we can't do antyhing with
1767 * links pointing to it.
1768 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
1769 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
1770 * 9. We can't remove a root or mountpoint.
1771 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
1772 * nfs_async_unlink().
1774 static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
1776 int error;
1778 if (!victim->d_inode)
1779 return -ENOENT;
1781 BUG_ON(victim->d_parent->d_inode != dir);
1782 audit_inode_child(victim, dir);
1784 error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
1785 if (error)
1786 return error;
1787 if (IS_APPEND(dir))
1788 return -EPERM;
1789 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
1790 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
1791 return -EPERM;
1792 if (isdir) {
1793 if (!S_ISDIR(victim->d_inode->i_mode))
1794 return -ENOTDIR;
1795 if (IS_ROOT(victim))
1796 return -EBUSY;
1797 } else if (S_ISDIR(victim->d_inode->i_mode))
1798 return -EISDIR;
1799 if (IS_DEADDIR(dir))
1800 return -ENOENT;
1801 if (victim->d_flags & DCACHE_NFSFS_RENAMED)
1802 return -EBUSY;
1803 return 0;
1806 /* Check whether we can create an object with dentry child in directory
1807 * dir.
1808 * 1. We can't do it if child already exists (open has special treatment for
1809 * this case, but since we are inlined it's OK)
1810 * 2. We can't do it if dir is read-only (done in permission())
1811 * 3. We should have write and exec permissions on dir
1812 * 4. We can't do it if dir is immutable (done in permission())
1814 static inline int may_create(struct inode *dir, struct dentry *child)
1816 if (child->d_inode)
1817 return -EEXIST;
1818 if (IS_DEADDIR(dir))
1819 return -ENOENT;
1820 return inode_permission(dir, MAY_WRITE | MAY_EXEC);
1824 * p1 and p2 should be directories on the same fs.
1826 struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
1828 struct dentry *p;
1830 if (p1 == p2) {
1831 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
1832 return NULL;
1835 mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
1837 p = d_ancestor(p2, p1);
1838 if (p) {
1839 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT);
1840 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD);
1841 return p;
1844 p = d_ancestor(p1, p2);
1845 if (p) {
1846 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
1847 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
1848 return p;
1851 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
1852 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
1853 return NULL;
1856 void unlock_rename(struct dentry *p1, struct dentry *p2)
1858 mutex_unlock(&p1->d_inode->i_mutex);
1859 if (p1 != p2) {
1860 mutex_unlock(&p2->d_inode->i_mutex);
1861 mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
1865 int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
1866 struct nameidata *nd)
1868 int error = may_create(dir, dentry);
1870 if (error)
1871 return error;
1873 if (!dir->i_op->create)
1874 return -EACCES; /* shouldn't it be ENOSYS? */
1875 mode &= S_IALLUGO;
1876 mode |= S_IFREG;
1877 error = security_inode_create(dir, dentry, mode);
1878 if (error)
1879 return error;
1880 error = dir->i_op->create(dir, dentry, mode, nd);
1881 if (!error)
1882 fsnotify_create(dir, dentry);
1883 return error;
1886 static int may_open(struct path *path, int acc_mode, int flag)
1888 struct dentry *dentry = path->dentry;
1889 struct inode *inode = dentry->d_inode;
1890 int error;
1892 /* O_PATH? */
1893 if (!acc_mode)
1894 return 0;
1896 if (!inode)
1897 return -ENOENT;
1899 switch (inode->i_mode & S_IFMT) {
1900 case S_IFLNK:
1901 return -ELOOP;
1902 case S_IFDIR:
1903 if (acc_mode & MAY_WRITE)
1904 return -EISDIR;
1905 break;
1906 case S_IFBLK:
1907 case S_IFCHR:
1908 if (path->mnt->mnt_flags & MNT_NODEV)
1909 return -EACCES;
1910 /*FALLTHRU*/
1911 case S_IFIFO:
1912 case S_IFSOCK:
1913 flag &= ~O_TRUNC;
1914 break;
1917 error = inode_permission(inode, acc_mode);
1918 if (error)
1919 return error;
1922 * An append-only file must be opened in append mode for writing.
1924 if (IS_APPEND(inode)) {
1925 if ((flag & O_ACCMODE) != O_RDONLY && !(flag & O_APPEND))
1926 return -EPERM;
1927 if (flag & O_TRUNC)
1928 return -EPERM;
1931 /* O_NOATIME can only be set by the owner or superuser */
1932 if (flag & O_NOATIME && !inode_owner_or_capable(inode))
1933 return -EPERM;
1936 * Ensure there are no outstanding leases on the file.
1938 return break_lease(inode, flag);
1941 static int handle_truncate(struct file *filp)
1943 struct path *path = &filp->f_path;
1944 struct inode *inode = path->dentry->d_inode;
1945 int error = get_write_access(inode);
1946 if (error)
1947 return error;
1949 * Refuse to truncate files with mandatory locks held on them.
1951 error = locks_verify_locked(inode);
1952 if (!error)
1953 error = security_path_truncate(path);
1954 if (!error) {
1955 error = do_truncate(path->dentry, 0,
1956 ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
1957 filp);
1959 put_write_access(inode);
1960 return error;
1964 * Note that while the flag value (low two bits) for sys_open means:
1965 * 00 - read-only
1966 * 01 - write-only
1967 * 10 - read-write
1968 * 11 - special
1969 * it is changed into
1970 * 00 - no permissions needed
1971 * 01 - read-permission
1972 * 10 - write-permission
1973 * 11 - read-write
1974 * for the internal routines (ie open_namei()/follow_link() etc)
1975 * This is more logical, and also allows the 00 "no perm needed"
1976 * to be used for symlinks (where the permissions are checked
1977 * later).
1980 static inline int open_to_namei_flags(int flag)
1982 if ((flag+1) & O_ACCMODE)
1983 flag++;
1984 return flag;
1988 * Handle the last step of open()
1990 static struct file *do_last(struct nameidata *nd, struct path *path,
1991 const struct open_flags *op, const char *pathname)
1993 struct dentry *dir = nd->path.dentry;
1994 struct dentry *dentry;
1995 int open_flag = op->open_flag;
1996 int will_truncate = open_flag & O_TRUNC;
1997 int want_write = 0;
1998 int acc_mode = op->acc_mode;
1999 struct file *filp;
2000 int error;
2002 nd->flags &= ~LOOKUP_PARENT;
2003 nd->flags |= op->intent;
2005 switch (nd->last_type) {
2006 case LAST_DOTDOT:
2007 case LAST_DOT:
2008 error = handle_dots(nd, nd->last_type);
2009 if (error)
2010 return ERR_PTR(error);
2011 /* fallthrough */
2012 case LAST_ROOT:
2013 error = complete_walk(nd);
2014 if (error)
2015 return ERR_PTR(error);
2016 audit_inode(pathname, nd->path.dentry);
2017 if (open_flag & O_CREAT) {
2018 error = -EISDIR;
2019 goto exit;
2021 goto ok;
2022 case LAST_BIND:
2023 error = complete_walk(nd);
2024 if (error)
2025 return ERR_PTR(error);
2026 audit_inode(pathname, dir);
2027 goto ok;
2030 if (!(open_flag & O_CREAT)) {
2031 int symlink_ok = 0;
2032 if (nd->last.name[nd->last.len])
2033 nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
2034 if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW))
2035 symlink_ok = 1;
2036 /* we _can_ be in RCU mode here */
2037 error = walk_component(nd, path, &nd->last, LAST_NORM,
2038 !symlink_ok);
2039 if (error < 0)
2040 return ERR_PTR(error);
2041 if (error) /* symlink */
2042 return NULL;
2043 /* sayonara */
2044 error = complete_walk(nd);
2045 if (error)
2046 return ERR_PTR(-ECHILD);
2048 error = -ENOTDIR;
2049 if (nd->flags & LOOKUP_DIRECTORY) {
2050 if (!nd->inode->i_op->lookup)
2051 goto exit;
2053 audit_inode(pathname, nd->path.dentry);
2054 goto ok;
2057 /* create side of things */
2058 error = complete_walk(nd);
2059 if (error)
2060 return ERR_PTR(error);
2062 audit_inode(pathname, dir);
2063 error = -EISDIR;
2064 /* trailing slashes? */
2065 if (nd->last.name[nd->last.len])
2066 goto exit;
2068 mutex_lock(&dir->d_inode->i_mutex);
2070 dentry = lookup_hash(nd);
2071 error = PTR_ERR(dentry);
2072 if (IS_ERR(dentry)) {
2073 mutex_unlock(&dir->d_inode->i_mutex);
2074 goto exit;
2077 path->dentry = dentry;
2078 path->mnt = nd->path.mnt;
2080 /* Negative dentry, just create the file */
2081 if (!dentry->d_inode) {
2082 int mode = op->mode;
2083 if (!IS_POSIXACL(dir->d_inode))
2084 mode &= ~current_umask();
2086 * This write is needed to ensure that a
2087 * rw->ro transition does not occur between
2088 * the time when the file is created and when
2089 * a permanent write count is taken through
2090 * the 'struct file' in nameidata_to_filp().
2092 error = mnt_want_write(nd->path.mnt);
2093 if (error)
2094 goto exit_mutex_unlock;
2095 want_write = 1;
2096 /* Don't check for write permission, don't truncate */
2097 open_flag &= ~O_TRUNC;
2098 will_truncate = 0;
2099 acc_mode = MAY_OPEN;
2100 error = security_path_mknod(&nd->path, dentry, mode, 0);
2101 if (error)
2102 goto exit_mutex_unlock;
2103 error = vfs_create(dir->d_inode, dentry, mode, nd);
2104 if (error)
2105 goto exit_mutex_unlock;
2106 mutex_unlock(&dir->d_inode->i_mutex);
2107 dput(nd->path.dentry);
2108 nd->path.dentry = dentry;
2109 goto common;
2113 * It already exists.
2115 mutex_unlock(&dir->d_inode->i_mutex);
2116 audit_inode(pathname, path->dentry);
2118 error = -EEXIST;
2119 if (open_flag & O_EXCL)
2120 goto exit_dput;
2122 error = follow_managed(path, nd->flags);
2123 if (error < 0)
2124 goto exit_dput;
2126 error = -ENOENT;
2127 if (!path->dentry->d_inode)
2128 goto exit_dput;
2130 if (path->dentry->d_inode->i_op->follow_link)
2131 return NULL;
2133 path_to_nameidata(path, nd);
2134 nd->inode = path->dentry->d_inode;
2135 error = -EISDIR;
2136 if (S_ISDIR(nd->inode->i_mode))
2137 goto exit;
2139 if (!S_ISREG(nd->inode->i_mode))
2140 will_truncate = 0;
2142 if (will_truncate) {
2143 error = mnt_want_write(nd->path.mnt);
2144 if (error)
2145 goto exit;
2146 want_write = 1;
2148 common:
2149 error = may_open(&nd->path, acc_mode, open_flag);
2150 if (error)
2151 goto exit;
2152 filp = nameidata_to_filp(nd);
2153 if (!IS_ERR(filp)) {
2154 error = ima_file_check(filp, op->acc_mode);
2155 if (error) {
2156 fput(filp);
2157 filp = ERR_PTR(error);
2160 if (!IS_ERR(filp)) {
2161 if (will_truncate) {
2162 error = handle_truncate(filp);
2163 if (error) {
2164 fput(filp);
2165 filp = ERR_PTR(error);
2169 out:
2170 if (want_write)
2171 mnt_drop_write(nd->path.mnt);
2172 path_put(&nd->path);
2173 return filp;
2175 exit_mutex_unlock:
2176 mutex_unlock(&dir->d_inode->i_mutex);
2177 exit_dput:
2178 path_put_conditional(path, nd);
2179 exit:
2180 filp = ERR_PTR(error);
2181 goto out;
2184 static struct file *path_openat(int dfd, const char *pathname,
2185 struct nameidata *nd, const struct open_flags *op, int flags)
2187 struct file *base = NULL;
2188 struct file *filp;
2189 struct path path;
2190 int error;
2192 filp = get_empty_filp();
2193 if (!filp)
2194 return ERR_PTR(-ENFILE);
2196 filp->f_flags = op->open_flag;
2197 nd->intent.open.file = filp;
2198 nd->intent.open.flags = open_to_namei_flags(op->open_flag);
2199 nd->intent.open.create_mode = op->mode;
2201 error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base);
2202 if (unlikely(error))
2203 goto out_filp;
2205 current->total_link_count = 0;
2206 error = link_path_walk(pathname, nd);
2207 if (unlikely(error))
2208 goto out_filp;
2210 filp = do_last(nd, &path, op, pathname);
2211 while (unlikely(!filp)) { /* trailing symlink */
2212 struct path link = path;
2213 void *cookie;
2214 if (!(nd->flags & LOOKUP_FOLLOW)) {
2215 path_put_conditional(&path, nd);
2216 path_put(&nd->path);
2217 filp = ERR_PTR(-ELOOP);
2218 break;
2220 nd->flags |= LOOKUP_PARENT;
2221 nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
2222 error = follow_link(&link, nd, &cookie);
2223 if (unlikely(error))
2224 filp = ERR_PTR(error);
2225 else
2226 filp = do_last(nd, &path, op, pathname);
2227 put_link(nd, &link, cookie);
2229 out:
2230 if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT))
2231 path_put(&nd->root);
2232 if (base)
2233 fput(base);
2234 release_open_intent(nd);
2235 return filp;
2237 out_filp:
2238 filp = ERR_PTR(error);
2239 goto out;
2242 struct file *do_filp_open(int dfd, const char *pathname,
2243 const struct open_flags *op, int flags)
2245 struct nameidata nd;
2246 struct file *filp;
2248 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU);
2249 if (unlikely(filp == ERR_PTR(-ECHILD)))
2250 filp = path_openat(dfd, pathname, &nd, op, flags);
2251 if (unlikely(filp == ERR_PTR(-ESTALE)))
2252 filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL);
2253 return filp;
2256 struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
2257 const char *name, const struct open_flags *op, int flags)
2259 struct nameidata nd;
2260 struct file *file;
2262 nd.root.mnt = mnt;
2263 nd.root.dentry = dentry;
2265 flags |= LOOKUP_ROOT;
2267 if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
2268 return ERR_PTR(-ELOOP);
2270 file = path_openat(-1, name, &nd, op, flags | LOOKUP_RCU);
2271 if (unlikely(file == ERR_PTR(-ECHILD)))
2272 file = path_openat(-1, name, &nd, op, flags);
2273 if (unlikely(file == ERR_PTR(-ESTALE)))
2274 file = path_openat(-1, name, &nd, op, flags | LOOKUP_REVAL);
2275 return file;
2279 * lookup_create - lookup a dentry, creating it if it doesn't exist
2280 * @nd: nameidata info
2281 * @is_dir: directory flag
2283 * Simple function to lookup and return a dentry and create it
2284 * if it doesn't exist. Is SMP-safe.
2286 * Returns with nd->path.dentry->d_inode->i_mutex locked.
2288 struct dentry *lookup_create(struct nameidata *nd, int is_dir)
2290 struct dentry *dentry = ERR_PTR(-EEXIST);
2292 mutex_lock_nested(&nd->path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2294 * Yucky last component or no last component at all?
2295 * (foo/., foo/.., /////)
2297 if (nd->last_type != LAST_NORM)
2298 goto fail;
2299 nd->flags &= ~LOOKUP_PARENT;
2300 nd->flags |= LOOKUP_CREATE | LOOKUP_EXCL;
2301 nd->intent.open.flags = O_EXCL;
2304 * Do the final lookup.
2306 dentry = lookup_hash(nd);
2307 if (IS_ERR(dentry))
2308 goto fail;
2310 if (dentry->d_inode)
2311 goto eexist;
2313 * Special case - lookup gave negative, but... we had foo/bar/
2314 * From the vfs_mknod() POV we just have a negative dentry -
2315 * all is fine. Let's be bastards - you had / on the end, you've
2316 * been asking for (non-existent) directory. -ENOENT for you.
2318 if (unlikely(!is_dir && nd->last.name[nd->last.len])) {
2319 dput(dentry);
2320 dentry = ERR_PTR(-ENOENT);
2322 return dentry;
2323 eexist:
2324 dput(dentry);
2325 dentry = ERR_PTR(-EEXIST);
2326 fail:
2327 return dentry;
2329 EXPORT_SYMBOL_GPL(lookup_create);
2331 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
2333 int error = may_create(dir, dentry);
2335 if (error)
2336 return error;
2338 if ((S_ISCHR(mode) || S_ISBLK(mode)) &&
2339 !ns_capable(inode_userns(dir), CAP_MKNOD))
2340 return -EPERM;
2342 if (!dir->i_op->mknod)
2343 return -EPERM;
2345 error = devcgroup_inode_mknod(mode, dev);
2346 if (error)
2347 return error;
2349 error = security_inode_mknod(dir, dentry, mode, dev);
2350 if (error)
2351 return error;
2353 error = dir->i_op->mknod(dir, dentry, mode, dev);
2354 if (!error)
2355 fsnotify_create(dir, dentry);
2356 return error;
2359 static int may_mknod(mode_t mode)
2361 switch (mode & S_IFMT) {
2362 case S_IFREG:
2363 case S_IFCHR:
2364 case S_IFBLK:
2365 case S_IFIFO:
2366 case S_IFSOCK:
2367 case 0: /* zero mode translates to S_IFREG */
2368 return 0;
2369 case S_IFDIR:
2370 return -EPERM;
2371 default:
2372 return -EINVAL;
2376 SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode,
2377 unsigned, dev)
2379 int error;
2380 char *tmp;
2381 struct dentry *dentry;
2382 struct nameidata nd;
2384 if (S_ISDIR(mode))
2385 return -EPERM;
2387 error = user_path_parent(dfd, filename, &nd, &tmp);
2388 if (error)
2389 return error;
2391 dentry = lookup_create(&nd, 0);
2392 if (IS_ERR(dentry)) {
2393 error = PTR_ERR(dentry);
2394 goto out_unlock;
2396 if (!IS_POSIXACL(nd.path.dentry->d_inode))
2397 mode &= ~current_umask();
2398 error = may_mknod(mode);
2399 if (error)
2400 goto out_dput;
2401 error = mnt_want_write(nd.path.mnt);
2402 if (error)
2403 goto out_dput;
2404 error = security_path_mknod(&nd.path, dentry, mode, dev);
2405 if (error)
2406 goto out_drop_write;
2407 switch (mode & S_IFMT) {
2408 case 0: case S_IFREG:
2409 error = vfs_create(nd.path.dentry->d_inode,dentry,mode,NULL);
2410 break;
2411 case S_IFCHR: case S_IFBLK:
2412 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,
2413 new_decode_dev(dev));
2414 break;
2415 case S_IFIFO: case S_IFSOCK:
2416 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0);
2417 break;
2419 out_drop_write:
2420 mnt_drop_write(nd.path.mnt);
2421 out_dput:
2422 dput(dentry);
2423 out_unlock:
2424 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2425 path_put(&nd.path);
2426 putname(tmp);
2428 return error;
2431 SYSCALL_DEFINE3(mknod, const char __user *, filename, int, mode, unsigned, dev)
2433 return sys_mknodat(AT_FDCWD, filename, mode, dev);
2436 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2438 int error = may_create(dir, dentry);
2440 if (error)
2441 return error;
2443 if (!dir->i_op->mkdir)
2444 return -EPERM;
2446 mode &= (S_IRWXUGO|S_ISVTX);
2447 error = security_inode_mkdir(dir, dentry, mode);
2448 if (error)
2449 return error;
2451 error = dir->i_op->mkdir(dir, dentry, mode);
2452 if (!error)
2453 fsnotify_mkdir(dir, dentry);
2454 return error;
2457 SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode)
2459 int error = 0;
2460 char * tmp;
2461 struct dentry *dentry;
2462 struct nameidata nd;
2464 error = user_path_parent(dfd, pathname, &nd, &tmp);
2465 if (error)
2466 goto out_err;
2468 dentry = lookup_create(&nd, 1);
2469 error = PTR_ERR(dentry);
2470 if (IS_ERR(dentry))
2471 goto out_unlock;
2473 if (!IS_POSIXACL(nd.path.dentry->d_inode))
2474 mode &= ~current_umask();
2475 error = mnt_want_write(nd.path.mnt);
2476 if (error)
2477 goto out_dput;
2478 error = security_path_mkdir(&nd.path, dentry, mode);
2479 if (error)
2480 goto out_drop_write;
2481 error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode);
2482 out_drop_write:
2483 mnt_drop_write(nd.path.mnt);
2484 out_dput:
2485 dput(dentry);
2486 out_unlock:
2487 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2488 path_put(&nd.path);
2489 putname(tmp);
2490 out_err:
2491 return error;
2494 SYSCALL_DEFINE2(mkdir, const char __user *, pathname, int, mode)
2496 return sys_mkdirat(AT_FDCWD, pathname, mode);
2500 * The dentry_unhash() helper will try to drop the dentry early: we
2501 * should have a usage count of 2 if we're the only user of this
2502 * dentry, and if that is true (possibly after pruning the dcache),
2503 * then we drop the dentry now.
2505 * A low-level filesystem can, if it choses, legally
2506 * do a
2508 * if (!d_unhashed(dentry))
2509 * return -EBUSY;
2511 * if it cannot handle the case of removing a directory
2512 * that is still in use by something else..
2514 void dentry_unhash(struct dentry *dentry)
2516 shrink_dcache_parent(dentry);
2517 spin_lock(&dentry->d_lock);
2518 if (dentry->d_count == 1)
2519 __d_drop(dentry);
2520 spin_unlock(&dentry->d_lock);
2523 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
2525 int error = may_delete(dir, dentry, 1);
2527 if (error)
2528 return error;
2530 if (!dir->i_op->rmdir)
2531 return -EPERM;
2533 mutex_lock(&dentry->d_inode->i_mutex);
2535 error = -EBUSY;
2536 if (d_mountpoint(dentry))
2537 goto out;
2539 error = security_inode_rmdir(dir, dentry);
2540 if (error)
2541 goto out;
2543 shrink_dcache_parent(dentry);
2544 error = dir->i_op->rmdir(dir, dentry);
2545 if (error)
2546 goto out;
2548 dentry->d_inode->i_flags |= S_DEAD;
2549 dont_mount(dentry);
2551 out:
2552 mutex_unlock(&dentry->d_inode->i_mutex);
2553 if (!error)
2554 d_delete(dentry);
2555 return error;
2558 static long do_rmdir(int dfd, const char __user *pathname)
2560 int error = 0;
2561 char * name;
2562 struct dentry *dentry;
2563 struct nameidata nd;
2565 error = user_path_parent(dfd, pathname, &nd, &name);
2566 if (error)
2567 return error;
2569 switch(nd.last_type) {
2570 case LAST_DOTDOT:
2571 error = -ENOTEMPTY;
2572 goto exit1;
2573 case LAST_DOT:
2574 error = -EINVAL;
2575 goto exit1;
2576 case LAST_ROOT:
2577 error = -EBUSY;
2578 goto exit1;
2581 nd.flags &= ~LOOKUP_PARENT;
2583 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2584 dentry = lookup_hash(&nd);
2585 error = PTR_ERR(dentry);
2586 if (IS_ERR(dentry))
2587 goto exit2;
2588 if (!dentry->d_inode) {
2589 error = -ENOENT;
2590 goto exit3;
2592 error = mnt_want_write(nd.path.mnt);
2593 if (error)
2594 goto exit3;
2595 error = security_path_rmdir(&nd.path, dentry);
2596 if (error)
2597 goto exit4;
2598 error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
2599 exit4:
2600 mnt_drop_write(nd.path.mnt);
2601 exit3:
2602 dput(dentry);
2603 exit2:
2604 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2605 exit1:
2606 path_put(&nd.path);
2607 putname(name);
2608 return error;
2611 SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
2613 return do_rmdir(AT_FDCWD, pathname);
2616 int vfs_unlink(struct inode *dir, struct dentry *dentry)
2618 int error = may_delete(dir, dentry, 0);
2620 if (error)
2621 return error;
2623 if (!dir->i_op->unlink)
2624 return -EPERM;
2626 mutex_lock(&dentry->d_inode->i_mutex);
2627 if (d_mountpoint(dentry))
2628 error = -EBUSY;
2629 else {
2630 error = security_inode_unlink(dir, dentry);
2631 if (!error) {
2632 error = dir->i_op->unlink(dir, dentry);
2633 if (!error)
2634 dont_mount(dentry);
2637 mutex_unlock(&dentry->d_inode->i_mutex);
2639 /* We don't d_delete() NFS sillyrenamed files--they still exist. */
2640 if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
2641 fsnotify_link_count(dentry->d_inode);
2642 d_delete(dentry);
2645 return error;
2649 * Make sure that the actual truncation of the file will occur outside its
2650 * directory's i_mutex. Truncate can take a long time if there is a lot of
2651 * writeout happening, and we don't want to prevent access to the directory
2652 * while waiting on the I/O.
2654 static long do_unlinkat(int dfd, const char __user *pathname)
2656 int error;
2657 char *name;
2658 struct dentry *dentry;
2659 struct nameidata nd;
2660 struct inode *inode = NULL;
2662 error = user_path_parent(dfd, pathname, &nd, &name);
2663 if (error)
2664 return error;
2666 error = -EISDIR;
2667 if (nd.last_type != LAST_NORM)
2668 goto exit1;
2670 nd.flags &= ~LOOKUP_PARENT;
2672 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2673 dentry = lookup_hash(&nd);
2674 error = PTR_ERR(dentry);
2675 if (!IS_ERR(dentry)) {
2676 /* Why not before? Because we want correct error value */
2677 if (nd.last.name[nd.last.len])
2678 goto slashes;
2679 inode = dentry->d_inode;
2680 if (!inode)
2681 goto slashes;
2682 ihold(inode);
2683 error = mnt_want_write(nd.path.mnt);
2684 if (error)
2685 goto exit2;
2686 error = security_path_unlink(&nd.path, dentry);
2687 if (error)
2688 goto exit3;
2689 error = vfs_unlink(nd.path.dentry->d_inode, dentry);
2690 exit3:
2691 mnt_drop_write(nd.path.mnt);
2692 exit2:
2693 dput(dentry);
2695 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2696 if (inode)
2697 iput(inode); /* truncate the inode here */
2698 exit1:
2699 path_put(&nd.path);
2700 putname(name);
2701 return error;
2703 slashes:
2704 error = !dentry->d_inode ? -ENOENT :
2705 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
2706 goto exit2;
2709 SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
2711 if ((flag & ~AT_REMOVEDIR) != 0)
2712 return -EINVAL;
2714 if (flag & AT_REMOVEDIR)
2715 return do_rmdir(dfd, pathname);
2717 return do_unlinkat(dfd, pathname);
2720 SYSCALL_DEFINE1(unlink, const char __user *, pathname)
2722 return do_unlinkat(AT_FDCWD, pathname);
2725 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
2727 int error = may_create(dir, dentry);
2729 if (error)
2730 return error;
2732 if (!dir->i_op->symlink)
2733 return -EPERM;
2735 error = security_inode_symlink(dir, dentry, oldname);
2736 if (error)
2737 return error;
2739 error = dir->i_op->symlink(dir, dentry, oldname);
2740 if (!error)
2741 fsnotify_create(dir, dentry);
2742 return error;
2745 SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
2746 int, newdfd, const char __user *, newname)
2748 int error;
2749 char *from;
2750 char *to;
2751 struct dentry *dentry;
2752 struct nameidata nd;
2754 from = getname(oldname);
2755 if (IS_ERR(from))
2756 return PTR_ERR(from);
2758 error = user_path_parent(newdfd, newname, &nd, &to);
2759 if (error)
2760 goto out_putname;
2762 dentry = lookup_create(&nd, 0);
2763 error = PTR_ERR(dentry);
2764 if (IS_ERR(dentry))
2765 goto out_unlock;
2767 error = mnt_want_write(nd.path.mnt);
2768 if (error)
2769 goto out_dput;
2770 error = security_path_symlink(&nd.path, dentry, from);
2771 if (error)
2772 goto out_drop_write;
2773 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from);
2774 out_drop_write:
2775 mnt_drop_write(nd.path.mnt);
2776 out_dput:
2777 dput(dentry);
2778 out_unlock:
2779 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2780 path_put(&nd.path);
2781 putname(to);
2782 out_putname:
2783 putname(from);
2784 return error;
2787 SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname)
2789 return sys_symlinkat(oldname, AT_FDCWD, newname);
2792 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
2794 struct inode *inode = old_dentry->d_inode;
2795 int error;
2797 if (!inode)
2798 return -ENOENT;
2800 error = may_create(dir, new_dentry);
2801 if (error)
2802 return error;
2804 if (dir->i_sb != inode->i_sb)
2805 return -EXDEV;
2808 * A link to an append-only or immutable file cannot be created.
2810 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2811 return -EPERM;
2812 if (!dir->i_op->link)
2813 return -EPERM;
2814 if (S_ISDIR(inode->i_mode))
2815 return -EPERM;
2817 error = security_inode_link(old_dentry, dir, new_dentry);
2818 if (error)
2819 return error;
2821 mutex_lock(&inode->i_mutex);
2822 /* Make sure we don't allow creating hardlink to an unlinked file */
2823 if (inode->i_nlink == 0)
2824 error = -ENOENT;
2825 else
2826 error = dir->i_op->link(old_dentry, dir, new_dentry);
2827 mutex_unlock(&inode->i_mutex);
2828 if (!error)
2829 fsnotify_link(dir, inode, new_dentry);
2830 return error;
2834 * Hardlinks are often used in delicate situations. We avoid
2835 * security-related surprises by not following symlinks on the
2836 * newname. --KAB
2838 * We don't follow them on the oldname either to be compatible
2839 * with linux 2.0, and to avoid hard-linking to directories
2840 * and other special files. --ADM
2842 SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
2843 int, newdfd, const char __user *, newname, int, flags)
2845 struct dentry *new_dentry;
2846 struct nameidata nd;
2847 struct path old_path;
2848 int how = 0;
2849 int error;
2850 char *to;
2852 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
2853 return -EINVAL;
2855 * To use null names we require CAP_DAC_READ_SEARCH
2856 * This ensures that not everyone will be able to create
2857 * handlink using the passed filedescriptor.
2859 if (flags & AT_EMPTY_PATH) {
2860 if (!capable(CAP_DAC_READ_SEARCH))
2861 return -ENOENT;
2862 how = LOOKUP_EMPTY;
2865 if (flags & AT_SYMLINK_FOLLOW)
2866 how |= LOOKUP_FOLLOW;
2868 error = user_path_at(olddfd, oldname, how, &old_path);
2869 if (error)
2870 return error;
2872 error = user_path_parent(newdfd, newname, &nd, &to);
2873 if (error)
2874 goto out;
2875 error = -EXDEV;
2876 if (old_path.mnt != nd.path.mnt)
2877 goto out_release;
2878 new_dentry = lookup_create(&nd, 0);
2879 error = PTR_ERR(new_dentry);
2880 if (IS_ERR(new_dentry))
2881 goto out_unlock;
2882 error = mnt_want_write(nd.path.mnt);
2883 if (error)
2884 goto out_dput;
2885 error = security_path_link(old_path.dentry, &nd.path, new_dentry);
2886 if (error)
2887 goto out_drop_write;
2888 error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry);
2889 out_drop_write:
2890 mnt_drop_write(nd.path.mnt);
2891 out_dput:
2892 dput(new_dentry);
2893 out_unlock:
2894 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2895 out_release:
2896 path_put(&nd.path);
2897 putname(to);
2898 out:
2899 path_put(&old_path);
2901 return error;
2904 SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname)
2906 return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
2910 * The worst of all namespace operations - renaming directory. "Perverted"
2911 * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
2912 * Problems:
2913 * a) we can get into loop creation. Check is done in is_subdir().
2914 * b) race potential - two innocent renames can create a loop together.
2915 * That's where 4.4 screws up. Current fix: serialization on
2916 * sb->s_vfs_rename_mutex. We might be more accurate, but that's another
2917 * story.
2918 * c) we have to lock _three_ objects - parents and victim (if it exists).
2919 * And that - after we got ->i_mutex on parents (until then we don't know
2920 * whether the target exists). Solution: try to be smart with locking
2921 * order for inodes. We rely on the fact that tree topology may change
2922 * only under ->s_vfs_rename_mutex _and_ that parent of the object we
2923 * move will be locked. Thus we can rank directories by the tree
2924 * (ancestors first) and rank all non-directories after them.
2925 * That works since everybody except rename does "lock parent, lookup,
2926 * lock child" and rename is under ->s_vfs_rename_mutex.
2927 * HOWEVER, it relies on the assumption that any object with ->lookup()
2928 * has no more than 1 dentry. If "hybrid" objects will ever appear,
2929 * we'd better make sure that there's no link(2) for them.
2930 * d) conversion from fhandle to dentry may come in the wrong moment - when
2931 * we are removing the target. Solution: we will have to grab ->i_mutex
2932 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
2933 * ->i_mutex on parents, which works but leads to some truly excessive
2934 * locking].
2936 static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
2937 struct inode *new_dir, struct dentry *new_dentry)
2939 int error = 0;
2940 struct inode *target = new_dentry->d_inode;
2943 * If we are going to change the parent - check write permissions,
2944 * we'll need to flip '..'.
2946 if (new_dir != old_dir) {
2947 error = inode_permission(old_dentry->d_inode, MAY_WRITE);
2948 if (error)
2949 return error;
2952 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
2953 if (error)
2954 return error;
2956 if (target)
2957 mutex_lock(&target->i_mutex);
2959 error = -EBUSY;
2960 if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry))
2961 goto out;
2963 if (target)
2964 shrink_dcache_parent(new_dentry);
2965 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
2966 if (error)
2967 goto out;
2969 if (target) {
2970 target->i_flags |= S_DEAD;
2971 dont_mount(new_dentry);
2973 out:
2974 if (target)
2975 mutex_unlock(&target->i_mutex);
2976 if (!error)
2977 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
2978 d_move(old_dentry,new_dentry);
2979 return error;
2982 static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
2983 struct inode *new_dir, struct dentry *new_dentry)
2985 struct inode *target = new_dentry->d_inode;
2986 int error;
2988 error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
2989 if (error)
2990 return error;
2992 dget(new_dentry);
2993 if (target)
2994 mutex_lock(&target->i_mutex);
2996 error = -EBUSY;
2997 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
2998 goto out;
3000 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
3001 if (error)
3002 goto out;
3004 if (target)
3005 dont_mount(new_dentry);
3006 if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
3007 d_move(old_dentry, new_dentry);
3008 out:
3009 if (target)
3010 mutex_unlock(&target->i_mutex);
3011 dput(new_dentry);
3012 return error;
3015 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
3016 struct inode *new_dir, struct dentry *new_dentry)
3018 int error;
3019 int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
3020 const unsigned char *old_name;
3022 if (old_dentry->d_inode == new_dentry->d_inode)
3023 return 0;
3025 error = may_delete(old_dir, old_dentry, is_dir);
3026 if (error)
3027 return error;
3029 if (!new_dentry->d_inode)
3030 error = may_create(new_dir, new_dentry);
3031 else
3032 error = may_delete(new_dir, new_dentry, is_dir);
3033 if (error)
3034 return error;
3036 if (!old_dir->i_op->rename)
3037 return -EPERM;
3039 old_name = fsnotify_oldname_init(old_dentry->d_name.name);
3041 if (is_dir)
3042 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
3043 else
3044 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
3045 if (!error)
3046 fsnotify_move(old_dir, new_dir, old_name, is_dir,
3047 new_dentry->d_inode, old_dentry);
3048 fsnotify_oldname_free(old_name);
3050 return error;
3053 SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
3054 int, newdfd, const char __user *, newname)
3056 struct dentry *old_dir, *new_dir;
3057 struct dentry *old_dentry, *new_dentry;
3058 struct dentry *trap;
3059 struct nameidata oldnd, newnd;
3060 char *from;
3061 char *to;
3062 int error;
3064 error = user_path_parent(olddfd, oldname, &oldnd, &from);
3065 if (error)
3066 goto exit;
3068 error = user_path_parent(newdfd, newname, &newnd, &to);
3069 if (error)
3070 goto exit1;
3072 error = -EXDEV;
3073 if (oldnd.path.mnt != newnd.path.mnt)
3074 goto exit2;
3076 old_dir = oldnd.path.dentry;
3077 error = -EBUSY;
3078 if (oldnd.last_type != LAST_NORM)
3079 goto exit2;
3081 new_dir = newnd.path.dentry;
3082 if (newnd.last_type != LAST_NORM)
3083 goto exit2;
3085 oldnd.flags &= ~LOOKUP_PARENT;
3086 newnd.flags &= ~LOOKUP_PARENT;
3087 newnd.flags |= LOOKUP_RENAME_TARGET;
3089 trap = lock_rename(new_dir, old_dir);
3091 old_dentry = lookup_hash(&oldnd);
3092 error = PTR_ERR(old_dentry);
3093 if (IS_ERR(old_dentry))
3094 goto exit3;
3095 /* source must exist */
3096 error = -ENOENT;
3097 if (!old_dentry->d_inode)
3098 goto exit4;
3099 /* unless the source is a directory trailing slashes give -ENOTDIR */
3100 if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
3101 error = -ENOTDIR;
3102 if (oldnd.last.name[oldnd.last.len])
3103 goto exit4;
3104 if (newnd.last.name[newnd.last.len])
3105 goto exit4;
3107 /* source should not be ancestor of target */
3108 error = -EINVAL;
3109 if (old_dentry == trap)
3110 goto exit4;
3111 new_dentry = lookup_hash(&newnd);
3112 error = PTR_ERR(new_dentry);
3113 if (IS_ERR(new_dentry))
3114 goto exit4;
3115 /* target should not be an ancestor of source */
3116 error = -ENOTEMPTY;
3117 if (new_dentry == trap)
3118 goto exit5;
3120 error = mnt_want_write(oldnd.path.mnt);
3121 if (error)
3122 goto exit5;
3123 error = security_path_rename(&oldnd.path, old_dentry,
3124 &newnd.path, new_dentry);
3125 if (error)
3126 goto exit6;
3127 error = vfs_rename(old_dir->d_inode, old_dentry,
3128 new_dir->d_inode, new_dentry);
3129 exit6:
3130 mnt_drop_write(oldnd.path.mnt);
3131 exit5:
3132 dput(new_dentry);
3133 exit4:
3134 dput(old_dentry);
3135 exit3:
3136 unlock_rename(new_dir, old_dir);
3137 exit2:
3138 path_put(&newnd.path);
3139 putname(to);
3140 exit1:
3141 path_put(&oldnd.path);
3142 putname(from);
3143 exit:
3144 return error;
3147 SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname)
3149 return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname);
3152 int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link)
3154 int len;
3156 len = PTR_ERR(link);
3157 if (IS_ERR(link))
3158 goto out;
3160 len = strlen(link);
3161 if (len > (unsigned) buflen)
3162 len = buflen;
3163 if (copy_to_user(buffer, link, len))
3164 len = -EFAULT;
3165 out:
3166 return len;
3170 * A helper for ->readlink(). This should be used *ONLY* for symlinks that
3171 * have ->follow_link() touching nd only in nd_set_link(). Using (or not
3172 * using) it for any given inode is up to filesystem.
3174 int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
3176 struct nameidata nd;
3177 void *cookie;
3178 int res;
3180 nd.depth = 0;
3181 cookie = dentry->d_inode->i_op->follow_link(dentry, &nd);
3182 if (IS_ERR(cookie))
3183 return PTR_ERR(cookie);
3185 res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd));
3186 if (dentry->d_inode->i_op->put_link)
3187 dentry->d_inode->i_op->put_link(dentry, &nd, cookie);
3188 return res;
3191 int vfs_follow_link(struct nameidata *nd, const char *link)
3193 return __vfs_follow_link(nd, link);
3196 /* get the link contents into pagecache */
3197 static char *page_getlink(struct dentry * dentry, struct page **ppage)
3199 char *kaddr;
3200 struct page *page;
3201 struct address_space *mapping = dentry->d_inode->i_mapping;
3202 page = read_mapping_page(mapping, 0, NULL);
3203 if (IS_ERR(page))
3204 return (char*)page;
3205 *ppage = page;
3206 kaddr = kmap(page);
3207 nd_terminate_link(kaddr, dentry->d_inode->i_size, PAGE_SIZE - 1);
3208 return kaddr;
3211 int page_readlink(struct dentry *dentry, char __user *buffer, int buflen)
3213 struct page *page = NULL;
3214 char *s = page_getlink(dentry, &page);
3215 int res = vfs_readlink(dentry,buffer,buflen,s);
3216 if (page) {
3217 kunmap(page);
3218 page_cache_release(page);
3220 return res;
3223 void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd)
3225 struct page *page = NULL;
3226 nd_set_link(nd, page_getlink(dentry, &page));
3227 return page;
3230 void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
3232 struct page *page = cookie;
3234 if (page) {
3235 kunmap(page);
3236 page_cache_release(page);
3241 * The nofs argument instructs pagecache_write_begin to pass AOP_FLAG_NOFS
3243 int __page_symlink(struct inode *inode, const char *symname, int len, int nofs)
3245 struct address_space *mapping = inode->i_mapping;
3246 struct page *page;
3247 void *fsdata;
3248 int err;
3249 char *kaddr;
3250 unsigned int flags = AOP_FLAG_UNINTERRUPTIBLE;
3251 if (nofs)
3252 flags |= AOP_FLAG_NOFS;
3254 retry:
3255 err = pagecache_write_begin(NULL, mapping, 0, len-1,
3256 flags, &page, &fsdata);
3257 if (err)
3258 goto fail;
3260 kaddr = kmap_atomic(page, KM_USER0);
3261 memcpy(kaddr, symname, len-1);
3262 kunmap_atomic(kaddr, KM_USER0);
3264 err = pagecache_write_end(NULL, mapping, 0, len-1, len-1,
3265 page, fsdata);
3266 if (err < 0)
3267 goto fail;
3268 if (err < len-1)
3269 goto retry;
3271 mark_inode_dirty(inode);
3272 return 0;
3273 fail:
3274 return err;
3277 int page_symlink(struct inode *inode, const char *symname, int len)
3279 return __page_symlink(inode, symname, len,
3280 !(mapping_gfp_mask(inode->i_mapping) & __GFP_FS));
3283 const struct inode_operations page_symlink_inode_operations = {
3284 .readlink = generic_readlink,
3285 .follow_link = page_follow_link_light,
3286 .put_link = page_put_link,
3289 EXPORT_SYMBOL(user_path_at);
3290 EXPORT_SYMBOL(follow_down_one);
3291 EXPORT_SYMBOL(follow_down);
3292 EXPORT_SYMBOL(follow_up);
3293 EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
3294 EXPORT_SYMBOL(getname);
3295 EXPORT_SYMBOL(lock_rename);
3296 EXPORT_SYMBOL(lookup_one_len);
3297 EXPORT_SYMBOL(page_follow_link_light);
3298 EXPORT_SYMBOL(page_put_link);
3299 EXPORT_SYMBOL(page_readlink);
3300 EXPORT_SYMBOL(__page_symlink);
3301 EXPORT_SYMBOL(page_symlink);
3302 EXPORT_SYMBOL(page_symlink_inode_operations);
3303 EXPORT_SYMBOL(kern_path_parent);
3304 EXPORT_SYMBOL(kern_path);
3305 EXPORT_SYMBOL(vfs_path_lookup);
3306 EXPORT_SYMBOL(inode_permission);
3307 EXPORT_SYMBOL(unlock_rename);
3308 EXPORT_SYMBOL(vfs_create);
3309 EXPORT_SYMBOL(vfs_follow_link);
3310 EXPORT_SYMBOL(vfs_link);
3311 EXPORT_SYMBOL(vfs_mkdir);
3312 EXPORT_SYMBOL(vfs_mknod);
3313 EXPORT_SYMBOL(generic_permission);
3314 EXPORT_SYMBOL(vfs_readlink);
3315 EXPORT_SYMBOL(vfs_rename);
3316 EXPORT_SYMBOL(vfs_rmdir);
3317 EXPORT_SYMBOL(vfs_symlink);
3318 EXPORT_SYMBOL(vfs_unlink);
3319 EXPORT_SYMBOL(dentry_unhash);
3320 EXPORT_SYMBOL(generic_readlink);