staging: erofs: fix warning Comparison to bool
[linux/fpc-iii.git] / fs / nfs / dir.c
blob57b6a45576adf028ee056c8c7bcf54707446e968
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * linux/fs/nfs/dir.c
5 * Copyright (C) 1992 Rick Sladkey
7 * nfs directory handling functions
9 * 10 Apr 1996 Added silly rename for unlink --okir
10 * 28 Sep 1996 Improved directory cache --okir
11 * 23 Aug 1997 Claus Heine claus@momo.math.rwth-aachen.de
12 * Re-implemented silly rename for unlink, newly implemented
13 * silly rename for nfs_rename() following the suggestions
14 * of Olaf Kirch (okir) found in this file.
15 * Following Linus comments on my original hack, this version
16 * depends only on the dcache stuff and doesn't touch the inode
17 * layer (iput() and friends).
18 * 6 Jun 1999 Cache readdir lookups in the page cache. -DaveM
21 #include <linux/module.h>
22 #include <linux/time.h>
23 #include <linux/errno.h>
24 #include <linux/stat.h>
25 #include <linux/fcntl.h>
26 #include <linux/string.h>
27 #include <linux/kernel.h>
28 #include <linux/slab.h>
29 #include <linux/mm.h>
30 #include <linux/sunrpc/clnt.h>
31 #include <linux/nfs_fs.h>
32 #include <linux/nfs_mount.h>
33 #include <linux/pagemap.h>
34 #include <linux/pagevec.h>
35 #include <linux/namei.h>
36 #include <linux/mount.h>
37 #include <linux/swap.h>
38 #include <linux/sched.h>
39 #include <linux/kmemleak.h>
40 #include <linux/xattr.h>
42 #include "delegation.h"
43 #include "iostat.h"
44 #include "internal.h"
45 #include "fscache.h"
47 #include "nfstrace.h"
49 /* #define NFS_DEBUG_VERBOSE 1 */
51 static int nfs_opendir(struct inode *, struct file *);
52 static int nfs_closedir(struct inode *, struct file *);
53 static int nfs_readdir(struct file *, struct dir_context *);
54 static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
55 static loff_t nfs_llseek_dir(struct file *, loff_t, int);
56 static void nfs_readdir_clear_array(struct page*);
58 const struct file_operations nfs_dir_operations = {
59 .llseek = nfs_llseek_dir,
60 .read = generic_read_dir,
61 .iterate = nfs_readdir,
62 .open = nfs_opendir,
63 .release = nfs_closedir,
64 .fsync = nfs_fsync_dir,
67 const struct address_space_operations nfs_dir_aops = {
68 .freepage = nfs_readdir_clear_array,
71 static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, const struct cred *cred)
73 struct nfs_inode *nfsi = NFS_I(dir);
74 struct nfs_open_dir_context *ctx;
75 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
76 if (ctx != NULL) {
77 ctx->duped = 0;
78 ctx->attr_gencount = nfsi->attr_gencount;
79 ctx->dir_cookie = 0;
80 ctx->dup_cookie = 0;
81 ctx->cred = get_cred(cred);
82 spin_lock(&dir->i_lock);
83 list_add(&ctx->list, &nfsi->open_files);
84 spin_unlock(&dir->i_lock);
85 return ctx;
87 return ERR_PTR(-ENOMEM);
90 static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx)
92 spin_lock(&dir->i_lock);
93 list_del(&ctx->list);
94 spin_unlock(&dir->i_lock);
95 put_cred(ctx->cred);
96 kfree(ctx);
100 * Open file
102 static int
103 nfs_opendir(struct inode *inode, struct file *filp)
105 int res = 0;
106 struct nfs_open_dir_context *ctx;
108 dfprintk(FILE, "NFS: open dir(%pD2)\n", filp);
110 nfs_inc_stats(inode, NFSIOS_VFSOPEN);
112 ctx = alloc_nfs_open_dir_context(inode, current_cred());
113 if (IS_ERR(ctx)) {
114 res = PTR_ERR(ctx);
115 goto out;
117 filp->private_data = ctx;
118 out:
119 return res;
122 static int
123 nfs_closedir(struct inode *inode, struct file *filp)
125 put_nfs_open_dir_context(file_inode(filp), filp->private_data);
126 return 0;
129 struct nfs_cache_array_entry {
130 u64 cookie;
131 u64 ino;
132 struct qstr string;
133 unsigned char d_type;
136 struct nfs_cache_array {
137 int size;
138 int eof_index;
139 u64 last_cookie;
140 struct nfs_cache_array_entry array[0];
143 struct readdirvec {
144 unsigned long nr;
145 unsigned long index;
146 struct page *pages[NFS_MAX_READDIR_RAPAGES];
149 typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, bool);
150 typedef struct {
151 struct file *file;
152 struct page *page;
153 struct dir_context *ctx;
154 unsigned long page_index;
155 struct readdirvec pvec;
156 u64 *dir_cookie;
157 u64 last_cookie;
158 loff_t current_index;
159 decode_dirent_t decode;
161 unsigned long timestamp;
162 unsigned long gencount;
163 unsigned int cache_entry_index;
164 bool plus;
165 bool eof;
166 } nfs_readdir_descriptor_t;
169 * we are freeing strings created by nfs_add_to_readdir_array()
171 static
172 void nfs_readdir_clear_array(struct page *page)
174 struct nfs_cache_array *array;
175 int i;
177 array = kmap_atomic(page);
178 for (i = 0; i < array->size; i++)
179 kfree(array->array[i].string.name);
180 kunmap_atomic(array);
184 * the caller is responsible for freeing qstr.name
185 * when called by nfs_readdir_add_to_array, the strings will be freed in
186 * nfs_clear_readdir_array()
188 static
189 int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len)
191 string->len = len;
192 string->name = kmemdup(name, len, GFP_KERNEL);
193 if (string->name == NULL)
194 return -ENOMEM;
196 * Avoid a kmemleak false positive. The pointer to the name is stored
197 * in a page cache page which kmemleak does not scan.
199 kmemleak_not_leak(string->name);
200 string->hash = full_name_hash(NULL, name, len);
201 return 0;
204 static
205 int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
207 struct nfs_cache_array *array = kmap(page);
208 struct nfs_cache_array_entry *cache_entry;
209 int ret;
211 cache_entry = &array->array[array->size];
213 /* Check that this entry lies within the page bounds */
214 ret = -ENOSPC;
215 if ((char *)&cache_entry[1] - (char *)page_address(page) > PAGE_SIZE)
216 goto out;
218 cache_entry->cookie = entry->prev_cookie;
219 cache_entry->ino = entry->ino;
220 cache_entry->d_type = entry->d_type;
221 ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
222 if (ret)
223 goto out;
224 array->last_cookie = entry->cookie;
225 array->size++;
226 if (entry->eof != 0)
227 array->eof_index = array->size;
228 out:
229 kunmap(page);
230 return ret;
233 static
234 int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
236 loff_t diff = desc->ctx->pos - desc->current_index;
237 unsigned int index;
239 if (diff < 0)
240 goto out_eof;
241 if (diff >= array->size) {
242 if (array->eof_index >= 0)
243 goto out_eof;
244 return -EAGAIN;
247 index = (unsigned int)diff;
248 *desc->dir_cookie = array->array[index].cookie;
249 desc->cache_entry_index = index;
250 return 0;
251 out_eof:
252 desc->eof = true;
253 return -EBADCOOKIE;
256 static bool
257 nfs_readdir_inode_mapping_valid(struct nfs_inode *nfsi)
259 if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
260 return false;
261 smp_rmb();
262 return !test_bit(NFS_INO_INVALIDATING, &nfsi->flags);
265 static
266 int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
268 int i;
269 loff_t new_pos;
270 int status = -EAGAIN;
272 for (i = 0; i < array->size; i++) {
273 if (array->array[i].cookie == *desc->dir_cookie) {
274 struct nfs_inode *nfsi = NFS_I(file_inode(desc->file));
275 struct nfs_open_dir_context *ctx = desc->file->private_data;
277 new_pos = desc->current_index + i;
278 if (ctx->attr_gencount != nfsi->attr_gencount ||
279 !nfs_readdir_inode_mapping_valid(nfsi)) {
280 ctx->duped = 0;
281 ctx->attr_gencount = nfsi->attr_gencount;
282 } else if (new_pos < desc->ctx->pos) {
283 if (ctx->duped > 0
284 && ctx->dup_cookie == *desc->dir_cookie) {
285 if (printk_ratelimit()) {
286 pr_notice("NFS: directory %pD2 contains a readdir loop."
287 "Please contact your server vendor. "
288 "The file: %.*s has duplicate cookie %llu\n",
289 desc->file, array->array[i].string.len,
290 array->array[i].string.name, *desc->dir_cookie);
292 status = -ELOOP;
293 goto out;
295 ctx->dup_cookie = *desc->dir_cookie;
296 ctx->duped = -1;
298 desc->ctx->pos = new_pos;
299 desc->cache_entry_index = i;
300 return 0;
303 if (array->eof_index >= 0) {
304 status = -EBADCOOKIE;
305 if (*desc->dir_cookie == array->last_cookie)
306 desc->eof = true;
308 out:
309 return status;
312 static
313 int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
315 struct nfs_cache_array *array;
316 int status;
318 array = kmap(desc->page);
320 if (*desc->dir_cookie == 0)
321 status = nfs_readdir_search_for_pos(array, desc);
322 else
323 status = nfs_readdir_search_for_cookie(array, desc);
325 if (status == -EAGAIN) {
326 desc->last_cookie = array->last_cookie;
327 desc->current_index += array->size;
328 desc->page_index++;
330 kunmap(desc->page);
331 return status;
334 /* Fill a page with xdr information before transferring to the cache page */
335 static
336 int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
337 struct nfs_entry *entry, struct file *file, struct inode *inode)
339 struct nfs_open_dir_context *ctx = file->private_data;
340 const struct cred *cred = ctx->cred;
341 unsigned long timestamp, gencount;
342 int error;
344 again:
345 timestamp = jiffies;
346 gencount = nfs_inc_attr_generation_counter();
347 error = NFS_PROTO(inode)->readdir(file_dentry(file), cred, entry->cookie, pages,
348 NFS_SERVER(inode)->dtsize, desc->plus);
349 if (error < 0) {
350 /* We requested READDIRPLUS, but the server doesn't grok it */
351 if (error == -ENOTSUPP && desc->plus) {
352 NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
353 clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
354 desc->plus = false;
355 goto again;
357 goto error;
359 desc->timestamp = timestamp;
360 desc->gencount = gencount;
361 error:
362 return error;
365 static int xdr_decode(nfs_readdir_descriptor_t *desc,
366 struct nfs_entry *entry, struct xdr_stream *xdr)
368 int error;
370 error = desc->decode(xdr, entry, desc->plus);
371 if (error)
372 return error;
373 entry->fattr->time_start = desc->timestamp;
374 entry->fattr->gencount = desc->gencount;
375 return 0;
378 /* Match file and dirent using either filehandle or fileid
379 * Note: caller is responsible for checking the fsid
381 static
382 int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
384 struct inode *inode;
385 struct nfs_inode *nfsi;
387 if (d_really_is_negative(dentry))
388 return 0;
390 inode = d_inode(dentry);
391 if (is_bad_inode(inode) || NFS_STALE(inode))
392 return 0;
394 nfsi = NFS_I(inode);
395 if (entry->fattr->fileid != nfsi->fileid)
396 return 0;
397 if (entry->fh->size && nfs_compare_fh(entry->fh, &nfsi->fh) != 0)
398 return 0;
399 return 1;
402 static
403 bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx)
405 if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
406 return false;
407 if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags))
408 return true;
409 if (ctx->pos == 0)
410 return true;
411 return false;
415 * This function is called by the lookup and getattr code to request the
416 * use of readdirplus to accelerate any future lookups in the same
417 * directory.
419 void nfs_advise_use_readdirplus(struct inode *dir)
421 struct nfs_inode *nfsi = NFS_I(dir);
423 if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
424 !list_empty(&nfsi->open_files))
425 set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
429 * This function is mainly for use by nfs_getattr().
431 * If this is an 'ls -l', we want to force use of readdirplus.
432 * Do this by checking if there is an active file descriptor
433 * and calling nfs_advise_use_readdirplus, then forcing a
434 * cache flush.
436 void nfs_force_use_readdirplus(struct inode *dir)
438 struct nfs_inode *nfsi = NFS_I(dir);
440 if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
441 !list_empty(&nfsi->open_files)) {
442 set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
443 invalidate_mapping_pages(dir->i_mapping, 0, -1);
447 static
448 void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
450 struct qstr filename = QSTR_INIT(entry->name, entry->len);
451 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
452 struct dentry *dentry;
453 struct dentry *alias;
454 struct inode *dir = d_inode(parent);
455 struct inode *inode;
456 int status;
458 if (!(entry->fattr->valid & NFS_ATTR_FATTR_FILEID))
459 return;
460 if (!(entry->fattr->valid & NFS_ATTR_FATTR_FSID))
461 return;
462 if (filename.len == 0)
463 return;
464 /* Validate that the name doesn't contain any illegal '\0' */
465 if (strnlen(filename.name, filename.len) != filename.len)
466 return;
467 /* ...or '/' */
468 if (strnchr(filename.name, filename.len, '/'))
469 return;
470 if (filename.name[0] == '.') {
471 if (filename.len == 1)
472 return;
473 if (filename.len == 2 && filename.name[1] == '.')
474 return;
476 filename.hash = full_name_hash(parent, filename.name, filename.len);
478 dentry = d_lookup(parent, &filename);
479 again:
480 if (!dentry) {
481 dentry = d_alloc_parallel(parent, &filename, &wq);
482 if (IS_ERR(dentry))
483 return;
485 if (!d_in_lookup(dentry)) {
486 /* Is there a mountpoint here? If so, just exit */
487 if (!nfs_fsid_equal(&NFS_SB(dentry->d_sb)->fsid,
488 &entry->fattr->fsid))
489 goto out;
490 if (nfs_same_file(dentry, entry)) {
491 if (!entry->fh->size)
492 goto out;
493 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
494 status = nfs_refresh_inode(d_inode(dentry), entry->fattr);
495 if (!status)
496 nfs_setsecurity(d_inode(dentry), entry->fattr, entry->label);
497 goto out;
498 } else {
499 d_invalidate(dentry);
500 dput(dentry);
501 dentry = NULL;
502 goto again;
505 if (!entry->fh->size) {
506 d_lookup_done(dentry);
507 goto out;
510 inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr, entry->label);
511 alias = d_splice_alias(inode, dentry);
512 d_lookup_done(dentry);
513 if (alias) {
514 if (IS_ERR(alias))
515 goto out;
516 dput(dentry);
517 dentry = alias;
519 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
520 out:
521 dput(dentry);
524 /* Perform conversion from xdr to cache array */
525 static
526 int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
527 struct page **xdr_pages, struct page *page, unsigned int buflen)
529 struct xdr_stream stream;
530 struct xdr_buf buf;
531 struct page *scratch;
532 struct nfs_cache_array *array;
533 unsigned int count = 0;
534 int status;
535 int max_rapages = NFS_MAX_READDIR_RAPAGES;
537 desc->pvec.index = desc->page_index;
538 desc->pvec.nr = 0;
540 scratch = alloc_page(GFP_KERNEL);
541 if (scratch == NULL)
542 return -ENOMEM;
544 if (buflen == 0)
545 goto out_nopages;
547 xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
548 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
550 do {
551 status = xdr_decode(desc, entry, &stream);
552 if (status != 0) {
553 if (status == -EAGAIN)
554 status = 0;
555 break;
558 count++;
560 if (desc->plus)
561 nfs_prime_dcache(file_dentry(desc->file), entry);
563 status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]);
564 if (status == -ENOSPC) {
565 desc->pvec.nr++;
566 if (desc->pvec.nr == max_rapages)
567 break;
568 status = nfs_readdir_add_to_array(entry, desc->pvec.pages[desc->pvec.nr]);
570 if (status != 0)
571 break;
572 } while (!entry->eof);
575 * page and desc->pvec.pages[0] are valid, don't need to check
576 * whether or not to be NULL.
578 copy_highpage(page, desc->pvec.pages[0]);
580 out_nopages:
581 if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
582 array = kmap_atomic(desc->pvec.pages[desc->pvec.nr]);
583 array->eof_index = array->size;
584 status = 0;
585 kunmap_atomic(array);
588 put_page(scratch);
591 * desc->pvec.nr > 0 means at least one page was completely filled,
592 * we should return -ENOSPC. Otherwise function
593 * nfs_readdir_xdr_to_array will enter infinite loop.
595 if (desc->pvec.nr > 0)
596 return -ENOSPC;
597 return status;
600 static
601 void nfs_readdir_free_pages(struct page **pages, unsigned int npages)
603 unsigned int i;
604 for (i = 0; i < npages; i++)
605 put_page(pages[i]);
609 * nfs_readdir_alloc_pages() will allocate pages that must be freed with a call
610 * to nfs_readdir_free_pages()
612 static
613 int nfs_readdir_alloc_pages(struct page **pages, unsigned int npages)
615 unsigned int i;
617 for (i = 0; i < npages; i++) {
618 struct page *page = alloc_page(GFP_KERNEL);
619 if (page == NULL)
620 goto out_freepages;
621 pages[i] = page;
623 return 0;
625 out_freepages:
626 nfs_readdir_free_pages(pages, i);
627 return -ENOMEM;
631 * nfs_readdir_rapages_init initialize rapages by nfs_cache_array structure.
633 static
634 void nfs_readdir_rapages_init(nfs_readdir_descriptor_t *desc)
636 struct nfs_cache_array *array;
637 int max_rapages = NFS_MAX_READDIR_RAPAGES;
638 int index;
640 for (index = 0; index < max_rapages; index++) {
641 array = kmap_atomic(desc->pvec.pages[index]);
642 memset(array, 0, sizeof(struct nfs_cache_array));
643 array->eof_index = -1;
644 kunmap_atomic(array);
648 static
649 int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
651 struct page *pages[NFS_MAX_READDIR_PAGES];
652 struct nfs_entry entry;
653 struct file *file = desc->file;
654 struct nfs_cache_array *array;
655 int status = -ENOMEM;
656 unsigned int array_size = ARRAY_SIZE(pages);
659 * This means we hit readdir rdpages miss, the preallocated rdpages
660 * are useless, the preallocate rdpages should be reinitialized.
662 nfs_readdir_rapages_init(desc);
664 entry.prev_cookie = 0;
665 entry.cookie = desc->last_cookie;
666 entry.eof = 0;
667 entry.fh = nfs_alloc_fhandle();
668 entry.fattr = nfs_alloc_fattr();
669 entry.server = NFS_SERVER(inode);
670 if (entry.fh == NULL || entry.fattr == NULL)
671 goto out;
673 entry.label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
674 if (IS_ERR(entry.label)) {
675 status = PTR_ERR(entry.label);
676 goto out;
679 array = kmap(page);
680 memset(array, 0, sizeof(struct nfs_cache_array));
681 array->eof_index = -1;
683 status = nfs_readdir_alloc_pages(pages, array_size);
684 if (status < 0)
685 goto out_release_array;
686 do {
687 unsigned int pglen;
688 status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
690 if (status < 0)
691 break;
692 pglen = status;
693 status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
694 if (status < 0) {
695 if (status == -ENOSPC)
696 status = 0;
697 break;
699 } while (array->eof_index < 0);
701 nfs_readdir_free_pages(pages, array_size);
702 out_release_array:
703 kunmap(page);
704 nfs4_label_free(entry.label);
705 out:
706 nfs_free_fattr(entry.fattr);
707 nfs_free_fhandle(entry.fh);
708 return status;
712 * Now we cache directories properly, by converting xdr information
713 * to an array that can be used for lookups later. This results in
714 * fewer cache pages, since we can store more information on each page.
715 * We only need to convert from xdr once so future lookups are much simpler
717 static
718 int nfs_readdir_filler(void *data, struct page* page)
720 nfs_readdir_descriptor_t *desc = data;
721 struct inode *inode = file_inode(desc->file);
722 int ret;
725 * If desc->page_index in range desc->pvec.index and
726 * desc->pvec.index + desc->pvec.nr, we get readdir cache hit.
728 if (desc->page_index >= desc->pvec.index &&
729 desc->page_index < (desc->pvec.index + desc->pvec.nr)) {
731 * page and desc->pvec.pages[x] are valid, don't need to check
732 * whether or not to be NULL.
734 copy_highpage(page, desc->pvec.pages[desc->page_index - desc->pvec.index]);
735 ret = 0;
736 } else {
737 ret = nfs_readdir_xdr_to_array(desc, page, inode);
738 if (ret < 0)
739 goto error;
742 SetPageUptodate(page);
744 if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
745 /* Should never happen */
746 nfs_zap_mapping(inode, inode->i_mapping);
748 unlock_page(page);
749 return 0;
750 error:
751 unlock_page(page);
752 return ret;
755 static
756 void cache_page_release(nfs_readdir_descriptor_t *desc)
758 if (!desc->page->mapping)
759 nfs_readdir_clear_array(desc->page);
760 put_page(desc->page);
761 desc->page = NULL;
764 static
765 struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
767 return read_cache_page(desc->file->f_mapping, desc->page_index,
768 nfs_readdir_filler, desc);
772 * Returns 0 if desc->dir_cookie was found on page desc->page_index
774 static
775 int find_cache_page(nfs_readdir_descriptor_t *desc)
777 int res;
779 desc->page = get_cache_page(desc);
780 if (IS_ERR(desc->page))
781 return PTR_ERR(desc->page);
783 res = nfs_readdir_search_array(desc);
784 if (res != 0)
785 cache_page_release(desc);
786 return res;
789 /* Search for desc->dir_cookie from the beginning of the page cache */
790 static inline
791 int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
793 int res;
795 if (desc->page_index == 0) {
796 desc->current_index = 0;
797 desc->last_cookie = 0;
799 do {
800 res = find_cache_page(desc);
801 } while (res == -EAGAIN);
802 return res;
806 * Once we've found the start of the dirent within a page: fill 'er up...
808 static
809 int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
811 struct file *file = desc->file;
812 int i = 0;
813 int res = 0;
814 struct nfs_cache_array *array = NULL;
815 struct nfs_open_dir_context *ctx = file->private_data;
817 array = kmap(desc->page);
818 for (i = desc->cache_entry_index; i < array->size; i++) {
819 struct nfs_cache_array_entry *ent;
821 ent = &array->array[i];
822 if (!dir_emit(desc->ctx, ent->string.name, ent->string.len,
823 nfs_compat_user_ino64(ent->ino), ent->d_type)) {
824 desc->eof = true;
825 break;
827 desc->ctx->pos++;
828 if (i < (array->size-1))
829 *desc->dir_cookie = array->array[i+1].cookie;
830 else
831 *desc->dir_cookie = array->last_cookie;
832 if (ctx->duped != 0)
833 ctx->duped = 1;
835 if (array->eof_index >= 0)
836 desc->eof = true;
838 kunmap(desc->page);
839 cache_page_release(desc);
840 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
841 (unsigned long long)*desc->dir_cookie, res);
842 return res;
846 * If we cannot find a cookie in our cache, we suspect that this is
847 * because it points to a deleted file, so we ask the server to return
848 * whatever it thinks is the next entry. We then feed this to filldir.
849 * If all goes well, we should then be able to find our way round the
850 * cache on the next call to readdir_search_pagecache();
852 * NOTE: we cannot add the anonymous page to the pagecache because
853 * the data it contains might not be page aligned. Besides,
854 * we should already have a complete representation of the
855 * directory in the page cache by the time we get here.
857 static inline
858 int uncached_readdir(nfs_readdir_descriptor_t *desc)
860 struct page *page = NULL;
861 int status;
862 struct inode *inode = file_inode(desc->file);
863 struct nfs_open_dir_context *ctx = desc->file->private_data;
865 dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
866 (unsigned long long)*desc->dir_cookie);
868 page = alloc_page(GFP_HIGHUSER);
869 if (!page) {
870 status = -ENOMEM;
871 goto out;
874 desc->page_index = 0;
875 desc->last_cookie = *desc->dir_cookie;
876 desc->page = page;
877 ctx->duped = 0;
879 status = nfs_readdir_xdr_to_array(desc, page, inode);
880 if (status < 0)
881 goto out_release;
883 status = nfs_do_filldir(desc);
885 out:
886 dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
887 __func__, status);
888 return status;
889 out_release:
890 cache_page_release(desc);
891 goto out;
894 /* The file offset position represents the dirent entry number. A
895 last cookie cache takes care of the common case of reading the
896 whole directory.
898 static int nfs_readdir(struct file *file, struct dir_context *ctx)
900 struct dentry *dentry = file_dentry(file);
901 struct inode *inode = d_inode(dentry);
902 nfs_readdir_descriptor_t my_desc,
903 *desc = &my_desc;
904 struct nfs_open_dir_context *dir_ctx = file->private_data;
905 int res = 0;
906 int max_rapages = NFS_MAX_READDIR_RAPAGES;
908 dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
909 file, (long long)ctx->pos);
910 nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
913 * ctx->pos points to the dirent entry number.
914 * *desc->dir_cookie has the cookie for the next entry. We have
915 * to either find the entry with the appropriate number or
916 * revalidate the cookie.
918 memset(desc, 0, sizeof(*desc));
920 desc->file = file;
921 desc->ctx = ctx;
922 desc->dir_cookie = &dir_ctx->dir_cookie;
923 desc->decode = NFS_PROTO(inode)->decode_dirent;
924 desc->plus = nfs_use_readdirplus(inode, ctx);
926 res = nfs_readdir_alloc_pages(desc->pvec.pages, max_rapages);
927 if (res < 0)
928 return -ENOMEM;
930 nfs_readdir_rapages_init(desc);
932 if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
933 res = nfs_revalidate_mapping(inode, file->f_mapping);
934 if (res < 0)
935 goto out;
937 do {
938 res = readdir_search_pagecache(desc);
940 if (res == -EBADCOOKIE) {
941 res = 0;
942 /* This means either end of directory */
943 if (*desc->dir_cookie && !desc->eof) {
944 /* Or that the server has 'lost' a cookie */
945 res = uncached_readdir(desc);
946 if (res == 0)
947 continue;
949 break;
951 if (res == -ETOOSMALL && desc->plus) {
952 clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
953 nfs_zap_caches(inode);
954 desc->page_index = 0;
955 desc->plus = false;
956 desc->eof = false;
957 continue;
959 if (res < 0)
960 break;
962 res = nfs_do_filldir(desc);
963 if (res < 0)
964 break;
965 } while (!desc->eof);
966 out:
967 nfs_readdir_free_pages(desc->pvec.pages, max_rapages);
968 if (res > 0)
969 res = 0;
970 dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
971 return res;
974 static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
976 struct inode *inode = file_inode(filp);
977 struct nfs_open_dir_context *dir_ctx = filp->private_data;
979 dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
980 filp, offset, whence);
982 switch (whence) {
983 default:
984 return -EINVAL;
985 case SEEK_SET:
986 if (offset < 0)
987 return -EINVAL;
988 inode_lock(inode);
989 break;
990 case SEEK_CUR:
991 if (offset == 0)
992 return filp->f_pos;
993 inode_lock(inode);
994 offset += filp->f_pos;
995 if (offset < 0) {
996 inode_unlock(inode);
997 return -EINVAL;
1000 if (offset != filp->f_pos) {
1001 filp->f_pos = offset;
1002 dir_ctx->dir_cookie = 0;
1003 dir_ctx->duped = 0;
1005 inode_unlock(inode);
1006 return offset;
1010 * All directory operations under NFS are synchronous, so fsync()
1011 * is a dummy operation.
1013 static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
1014 int datasync)
1016 struct inode *inode = file_inode(filp);
1018 dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
1020 inode_lock(inode);
1021 nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
1022 inode_unlock(inode);
1023 return 0;
1027 * nfs_force_lookup_revalidate - Mark the directory as having changed
1028 * @dir: pointer to directory inode
1030 * This forces the revalidation code in nfs_lookup_revalidate() to do a
1031 * full lookup on all child dentries of 'dir' whenever a change occurs
1032 * on the server that might have invalidated our dcache.
1034 * The caller should be holding dir->i_lock
1036 void nfs_force_lookup_revalidate(struct inode *dir)
1038 NFS_I(dir)->cache_change_attribute++;
1040 EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
1043 * A check for whether or not the parent directory has changed.
1044 * In the case it has, we assume that the dentries are untrustworthy
1045 * and may need to be looked up again.
1046 * If rcu_walk prevents us from performing a full check, return 0.
1048 static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
1049 int rcu_walk)
1051 if (IS_ROOT(dentry))
1052 return 1;
1053 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
1054 return 0;
1055 if (!nfs_verify_change_attribute(dir, dentry->d_time))
1056 return 0;
1057 /* Revalidate nfsi->cache_change_attribute before we declare a match */
1058 if (nfs_mapping_need_revalidate_inode(dir)) {
1059 if (rcu_walk)
1060 return 0;
1061 if (__nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
1062 return 0;
1064 if (!nfs_verify_change_attribute(dir, dentry->d_time))
1065 return 0;
1066 return 1;
1070 * Use intent information to check whether or not we're going to do
1071 * an O_EXCL create using this path component.
1073 static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags)
1075 if (NFS_PROTO(dir)->version == 2)
1076 return 0;
1077 return flags & LOOKUP_EXCL;
1081 * Inode and filehandle revalidation for lookups.
1083 * We force revalidation in the cases where the VFS sets LOOKUP_REVAL,
1084 * or if the intent information indicates that we're about to open this
1085 * particular file and the "nocto" mount flag is not set.
1088 static
1089 int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
1091 struct nfs_server *server = NFS_SERVER(inode);
1092 int ret;
1094 if (IS_AUTOMOUNT(inode))
1095 return 0;
1097 if (flags & LOOKUP_OPEN) {
1098 switch (inode->i_mode & S_IFMT) {
1099 case S_IFREG:
1100 /* A NFSv4 OPEN will revalidate later */
1101 if (server->caps & NFS_CAP_ATOMIC_OPEN)
1102 goto out;
1103 /* Fallthrough */
1104 case S_IFDIR:
1105 if (server->flags & NFS_MOUNT_NOCTO)
1106 break;
1107 /* NFS close-to-open cache consistency validation */
1108 goto out_force;
1112 /* VFS wants an on-the-wire revalidation */
1113 if (flags & LOOKUP_REVAL)
1114 goto out_force;
1115 out:
1116 return (inode->i_nlink == 0) ? -ESTALE : 0;
1117 out_force:
1118 if (flags & LOOKUP_RCU)
1119 return -ECHILD;
1120 ret = __nfs_revalidate_inode(server, inode);
1121 if (ret != 0)
1122 return ret;
1123 goto out;
1127 * We judge how long we want to trust negative
1128 * dentries by looking at the parent inode mtime.
1130 * If parent mtime has changed, we revalidate, else we wait for a
1131 * period corresponding to the parent's attribute cache timeout value.
1133 * If LOOKUP_RCU prevents us from performing a full check, return 1
1134 * suggesting a reval is needed.
1136 * Note that when creating a new file, or looking up a rename target,
1137 * then it shouldn't be necessary to revalidate a negative dentry.
1139 static inline
1140 int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
1141 unsigned int flags)
1143 if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
1144 return 0;
1145 if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
1146 return 1;
1147 return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
1150 static int
1151 nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
1152 struct inode *inode, int error)
1154 switch (error) {
1155 case 1:
1156 dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
1157 __func__, dentry);
1158 return 1;
1159 case 0:
1160 nfs_mark_for_revalidate(dir);
1161 if (inode && S_ISDIR(inode->i_mode)) {
1162 /* Purge readdir caches. */
1163 nfs_zap_caches(inode);
1165 * We can't d_drop the root of a disconnected tree:
1166 * its d_hash is on the s_anon list and d_drop() would hide
1167 * it from shrink_dcache_for_unmount(), leading to busy
1168 * inodes on unmount and further oopses.
1170 if (IS_ROOT(dentry))
1171 return 1;
1173 dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
1174 __func__, dentry);
1175 return 0;
1177 dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
1178 __func__, dentry, error);
1179 return error;
1182 static int
1183 nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry,
1184 unsigned int flags)
1186 int ret = 1;
1187 if (nfs_neg_need_reval(dir, dentry, flags)) {
1188 if (flags & LOOKUP_RCU)
1189 return -ECHILD;
1190 ret = 0;
1192 return nfs_lookup_revalidate_done(dir, dentry, NULL, ret);
1195 static int
1196 nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
1197 struct inode *inode)
1199 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1200 return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
1203 static int
1204 nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
1205 struct inode *inode)
1207 struct nfs_fh *fhandle;
1208 struct nfs_fattr *fattr;
1209 struct nfs4_label *label;
1210 int ret;
1212 ret = -ENOMEM;
1213 fhandle = nfs_alloc_fhandle();
1214 fattr = nfs_alloc_fattr();
1215 label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
1216 if (fhandle == NULL || fattr == NULL || IS_ERR(label))
1217 goto out;
1219 ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
1220 if (ret < 0) {
1221 if (ret == -ESTALE || ret == -ENOENT)
1222 ret = 0;
1223 goto out;
1225 ret = 0;
1226 if (nfs_compare_fh(NFS_FH(inode), fhandle))
1227 goto out;
1228 if (nfs_refresh_inode(inode, fattr) < 0)
1229 goto out;
1231 nfs_setsecurity(inode, fattr, label);
1232 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1234 /* set a readdirplus hint that we had a cache miss */
1235 nfs_force_use_readdirplus(dir);
1236 ret = 1;
1237 out:
1238 nfs_free_fattr(fattr);
1239 nfs_free_fhandle(fhandle);
1240 nfs4_label_free(label);
1241 return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
1245 * This is called every time the dcache has a lookup hit,
1246 * and we should check whether we can really trust that
1247 * lookup.
1249 * NOTE! The hit can be a negative hit too, don't assume
1250 * we have an inode!
1252 * If the parent directory is seen to have changed, we throw out the
1253 * cached dentry and do a new lookup.
1255 static int
1256 nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
1257 unsigned int flags)
1259 struct inode *inode;
1260 int error;
1262 nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
1263 inode = d_inode(dentry);
1265 if (!inode)
1266 return nfs_lookup_revalidate_negative(dir, dentry, flags);
1268 if (is_bad_inode(inode)) {
1269 dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1270 __func__, dentry);
1271 goto out_bad;
1274 if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
1275 return nfs_lookup_revalidate_delegated(dir, dentry, inode);
1277 /* Force a full look up iff the parent directory has changed */
1278 if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) &&
1279 nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
1280 error = nfs_lookup_verify_inode(inode, flags);
1281 if (error) {
1282 if (error == -ESTALE)
1283 nfs_zap_caches(dir);
1284 goto out_bad;
1286 nfs_advise_use_readdirplus(dir);
1287 goto out_valid;
1290 if (flags & LOOKUP_RCU)
1291 return -ECHILD;
1293 if (NFS_STALE(inode))
1294 goto out_bad;
1296 trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
1297 error = nfs_lookup_revalidate_dentry(dir, dentry, inode);
1298 trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
1299 return error;
1300 out_valid:
1301 return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
1302 out_bad:
1303 if (flags & LOOKUP_RCU)
1304 return -ECHILD;
1305 return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
1308 static int
1309 __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
1310 int (*reval)(struct inode *, struct dentry *, unsigned int))
1312 struct dentry *parent;
1313 struct inode *dir;
1314 int ret;
1316 if (flags & LOOKUP_RCU) {
1317 parent = READ_ONCE(dentry->d_parent);
1318 dir = d_inode_rcu(parent);
1319 if (!dir)
1320 return -ECHILD;
1321 ret = reval(dir, dentry, flags);
1322 if (parent != READ_ONCE(dentry->d_parent))
1323 return -ECHILD;
1324 } else {
1325 parent = dget_parent(dentry);
1326 ret = reval(d_inode(parent), dentry, flags);
1327 dput(parent);
1329 return ret;
1332 static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1334 return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
1338 * A weaker form of d_revalidate for revalidating just the d_inode(dentry)
1339 * when we don't really care about the dentry name. This is called when a
1340 * pathwalk ends on a dentry that was not found via a normal lookup in the
1341 * parent dir (e.g.: ".", "..", procfs symlinks or mountpoint traversals).
1343 * In this situation, we just want to verify that the inode itself is OK
1344 * since the dentry might have changed on the server.
1346 static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
1348 struct inode *inode = d_inode(dentry);
1349 int error = 0;
1352 * I believe we can only get a negative dentry here in the case of a
1353 * procfs-style symlink. Just assume it's correct for now, but we may
1354 * eventually need to do something more here.
1356 if (!inode) {
1357 dfprintk(LOOKUPCACHE, "%s: %pd2 has negative inode\n",
1358 __func__, dentry);
1359 return 1;
1362 if (is_bad_inode(inode)) {
1363 dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1364 __func__, dentry);
1365 return 0;
1368 error = nfs_lookup_verify_inode(inode, flags);
1369 dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
1370 __func__, inode->i_ino, error ? "invalid" : "valid");
1371 return !error;
1375 * This is called from dput() when d_count is going to 0.
1377 static int nfs_dentry_delete(const struct dentry *dentry)
1379 dfprintk(VFS, "NFS: dentry_delete(%pd2, %x)\n",
1380 dentry, dentry->d_flags);
1382 /* Unhash any dentry with a stale inode */
1383 if (d_really_is_positive(dentry) && NFS_STALE(d_inode(dentry)))
1384 return 1;
1386 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
1387 /* Unhash it, so that ->d_iput() would be called */
1388 return 1;
1390 if (!(dentry->d_sb->s_flags & SB_ACTIVE)) {
1391 /* Unhash it, so that ancestors of killed async unlink
1392 * files will be cleaned up during umount */
1393 return 1;
1395 return 0;
1399 /* Ensure that we revalidate inode->i_nlink */
1400 static void nfs_drop_nlink(struct inode *inode)
1402 spin_lock(&inode->i_lock);
1403 /* drop the inode if we're reasonably sure this is the last link */
1404 if (inode->i_nlink > 0)
1405 drop_nlink(inode);
1406 NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter();
1407 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE
1408 | NFS_INO_INVALID_CTIME
1409 | NFS_INO_INVALID_OTHER
1410 | NFS_INO_REVAL_FORCED;
1411 spin_unlock(&inode->i_lock);
1415 * Called when the dentry loses inode.
1416 * We use it to clean up silly-renamed files.
1418 static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
1420 if (S_ISDIR(inode->i_mode))
1421 /* drop any readdir cache as it could easily be old */
1422 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
1424 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
1425 nfs_complete_unlink(dentry, inode);
1426 nfs_drop_nlink(inode);
1428 iput(inode);
1431 static void nfs_d_release(struct dentry *dentry)
1433 /* free cached devname value, if it survived that far */
1434 if (unlikely(dentry->d_fsdata)) {
1435 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
1436 WARN_ON(1);
1437 else
1438 kfree(dentry->d_fsdata);
1442 const struct dentry_operations nfs_dentry_operations = {
1443 .d_revalidate = nfs_lookup_revalidate,
1444 .d_weak_revalidate = nfs_weak_revalidate,
1445 .d_delete = nfs_dentry_delete,
1446 .d_iput = nfs_dentry_iput,
1447 .d_automount = nfs_d_automount,
1448 .d_release = nfs_d_release,
1450 EXPORT_SYMBOL_GPL(nfs_dentry_operations);
1452 struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
1454 struct dentry *res;
1455 struct inode *inode = NULL;
1456 struct nfs_fh *fhandle = NULL;
1457 struct nfs_fattr *fattr = NULL;
1458 struct nfs4_label *label = NULL;
1459 int error;
1461 dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry);
1462 nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
1464 if (unlikely(dentry->d_name.len > NFS_SERVER(dir)->namelen))
1465 return ERR_PTR(-ENAMETOOLONG);
1468 * If we're doing an exclusive create, optimize away the lookup
1469 * but don't hash the dentry.
1471 if (nfs_is_exclusive_create(dir, flags) || flags & LOOKUP_RENAME_TARGET)
1472 return NULL;
1474 res = ERR_PTR(-ENOMEM);
1475 fhandle = nfs_alloc_fhandle();
1476 fattr = nfs_alloc_fattr();
1477 if (fhandle == NULL || fattr == NULL)
1478 goto out;
1480 label = nfs4_label_alloc(NFS_SERVER(dir), GFP_NOWAIT);
1481 if (IS_ERR(label))
1482 goto out;
1484 trace_nfs_lookup_enter(dir, dentry, flags);
1485 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
1486 if (error == -ENOENT)
1487 goto no_entry;
1488 if (error < 0) {
1489 res = ERR_PTR(error);
1490 goto out_label;
1492 inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label);
1493 res = ERR_CAST(inode);
1494 if (IS_ERR(res))
1495 goto out_label;
1497 /* Notify readdir to use READDIRPLUS */
1498 nfs_force_use_readdirplus(dir);
1500 no_entry:
1501 res = d_splice_alias(inode, dentry);
1502 if (res != NULL) {
1503 if (IS_ERR(res))
1504 goto out_label;
1505 dentry = res;
1507 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1508 out_label:
1509 trace_nfs_lookup_exit(dir, dentry, flags, error);
1510 nfs4_label_free(label);
1511 out:
1512 nfs_free_fattr(fattr);
1513 nfs_free_fhandle(fhandle);
1514 return res;
1516 EXPORT_SYMBOL_GPL(nfs_lookup);
1518 #if IS_ENABLED(CONFIG_NFS_V4)
1519 static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
1521 const struct dentry_operations nfs4_dentry_operations = {
1522 .d_revalidate = nfs4_lookup_revalidate,
1523 .d_weak_revalidate = nfs_weak_revalidate,
1524 .d_delete = nfs_dentry_delete,
1525 .d_iput = nfs_dentry_iput,
1526 .d_automount = nfs_d_automount,
1527 .d_release = nfs_d_release,
1529 EXPORT_SYMBOL_GPL(nfs4_dentry_operations);
1531 static fmode_t flags_to_mode(int flags)
1533 fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
1534 if ((flags & O_ACCMODE) != O_WRONLY)
1535 res |= FMODE_READ;
1536 if ((flags & O_ACCMODE) != O_RDONLY)
1537 res |= FMODE_WRITE;
1538 return res;
1541 static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp)
1543 return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp);
1546 static int do_open(struct inode *inode, struct file *filp)
1548 nfs_fscache_open_file(inode, filp);
1549 return 0;
1552 static int nfs_finish_open(struct nfs_open_context *ctx,
1553 struct dentry *dentry,
1554 struct file *file, unsigned open_flags)
1556 int err;
1558 err = finish_open(file, dentry, do_open);
1559 if (err)
1560 goto out;
1561 if (S_ISREG(file->f_path.dentry->d_inode->i_mode))
1562 nfs_file_set_open_context(file, ctx);
1563 else
1564 err = -ESTALE;
1565 out:
1566 return err;
1569 int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
1570 struct file *file, unsigned open_flags,
1571 umode_t mode)
1573 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
1574 struct nfs_open_context *ctx;
1575 struct dentry *res;
1576 struct iattr attr = { .ia_valid = ATTR_OPEN };
1577 struct inode *inode;
1578 unsigned int lookup_flags = 0;
1579 bool switched = false;
1580 int created = 0;
1581 int err;
1583 /* Expect a negative dentry */
1584 BUG_ON(d_inode(dentry));
1586 dfprintk(VFS, "NFS: atomic_open(%s/%lu), %pd\n",
1587 dir->i_sb->s_id, dir->i_ino, dentry);
1589 err = nfs_check_flags(open_flags);
1590 if (err)
1591 return err;
1593 /* NFS only supports OPEN on regular files */
1594 if ((open_flags & O_DIRECTORY)) {
1595 if (!d_in_lookup(dentry)) {
1597 * Hashed negative dentry with O_DIRECTORY: dentry was
1598 * revalidated and is fine, no need to perform lookup
1599 * again
1601 return -ENOENT;
1603 lookup_flags = LOOKUP_OPEN|LOOKUP_DIRECTORY;
1604 goto no_open;
1607 if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
1608 return -ENAMETOOLONG;
1610 if (open_flags & O_CREAT) {
1611 struct nfs_server *server = NFS_SERVER(dir);
1613 if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
1614 mode &= ~current_umask();
1616 attr.ia_valid |= ATTR_MODE;
1617 attr.ia_mode = mode;
1619 if (open_flags & O_TRUNC) {
1620 attr.ia_valid |= ATTR_SIZE;
1621 attr.ia_size = 0;
1624 if (!(open_flags & O_CREAT) && !d_in_lookup(dentry)) {
1625 d_drop(dentry);
1626 switched = true;
1627 dentry = d_alloc_parallel(dentry->d_parent,
1628 &dentry->d_name, &wq);
1629 if (IS_ERR(dentry))
1630 return PTR_ERR(dentry);
1631 if (unlikely(!d_in_lookup(dentry)))
1632 return finish_no_open(file, dentry);
1635 ctx = create_nfs_open_context(dentry, open_flags, file);
1636 err = PTR_ERR(ctx);
1637 if (IS_ERR(ctx))
1638 goto out;
1640 trace_nfs_atomic_open_enter(dir, ctx, open_flags);
1641 inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, &created);
1642 if (created)
1643 file->f_mode |= FMODE_CREATED;
1644 if (IS_ERR(inode)) {
1645 err = PTR_ERR(inode);
1646 trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
1647 put_nfs_open_context(ctx);
1648 d_drop(dentry);
1649 switch (err) {
1650 case -ENOENT:
1651 d_splice_alias(NULL, dentry);
1652 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1653 break;
1654 case -EISDIR:
1655 case -ENOTDIR:
1656 goto no_open;
1657 case -ELOOP:
1658 if (!(open_flags & O_NOFOLLOW))
1659 goto no_open;
1660 break;
1661 /* case -EINVAL: */
1662 default:
1663 break;
1665 goto out;
1668 err = nfs_finish_open(ctx, ctx->dentry, file, open_flags);
1669 trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
1670 put_nfs_open_context(ctx);
1671 out:
1672 if (unlikely(switched)) {
1673 d_lookup_done(dentry);
1674 dput(dentry);
1676 return err;
1678 no_open:
1679 res = nfs_lookup(dir, dentry, lookup_flags);
1680 if (switched) {
1681 d_lookup_done(dentry);
1682 if (!res)
1683 res = dentry;
1684 else
1685 dput(dentry);
1687 if (IS_ERR(res))
1688 return PTR_ERR(res);
1689 return finish_no_open(file, res);
1691 EXPORT_SYMBOL_GPL(nfs_atomic_open);
1693 static int
1694 nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
1695 unsigned int flags)
1697 struct inode *inode;
1699 if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
1700 goto full_reval;
1701 if (d_mountpoint(dentry))
1702 goto full_reval;
1704 inode = d_inode(dentry);
1706 /* We can't create new files in nfs_open_revalidate(), so we
1707 * optimize away revalidation of negative dentries.
1709 if (inode == NULL)
1710 goto full_reval;
1712 if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
1713 return nfs_lookup_revalidate_delegated(dir, dentry, inode);
1715 /* NFS only supports OPEN on regular files */
1716 if (!S_ISREG(inode->i_mode))
1717 goto full_reval;
1719 /* We cannot do exclusive creation on a positive dentry */
1720 if (flags & (LOOKUP_EXCL | LOOKUP_REVAL))
1721 goto reval_dentry;
1723 /* Check if the directory changed */
1724 if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU))
1725 goto reval_dentry;
1727 /* Let f_op->open() actually open (and revalidate) the file */
1728 return 1;
1729 reval_dentry:
1730 if (flags & LOOKUP_RCU)
1731 return -ECHILD;
1732 return nfs_lookup_revalidate_dentry(dir, dentry, inode);
1734 full_reval:
1735 return nfs_do_lookup_revalidate(dir, dentry, flags);
1738 static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1740 return __nfs_lookup_revalidate(dentry, flags,
1741 nfs4_do_lookup_revalidate);
1744 #endif /* CONFIG_NFSV4 */
1747 * Code common to create, mkdir, and mknod.
1749 int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
1750 struct nfs_fattr *fattr,
1751 struct nfs4_label *label)
1753 struct dentry *parent = dget_parent(dentry);
1754 struct inode *dir = d_inode(parent);
1755 struct inode *inode;
1756 struct dentry *d;
1757 int error = -EACCES;
1759 d_drop(dentry);
1761 /* We may have been initialized further down */
1762 if (d_really_is_positive(dentry))
1763 goto out;
1764 if (fhandle->size == 0) {
1765 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, NULL);
1766 if (error)
1767 goto out_error;
1769 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1770 if (!(fattr->valid & NFS_ATTR_FATTR)) {
1771 struct nfs_server *server = NFS_SB(dentry->d_sb);
1772 error = server->nfs_client->rpc_ops->getattr(server, fhandle,
1773 fattr, NULL, NULL);
1774 if (error < 0)
1775 goto out_error;
1777 inode = nfs_fhget(dentry->d_sb, fhandle, fattr, label);
1778 d = d_splice_alias(inode, dentry);
1779 if (IS_ERR(d)) {
1780 error = PTR_ERR(d);
1781 goto out_error;
1783 dput(d);
1784 out:
1785 dput(parent);
1786 return 0;
1787 out_error:
1788 nfs_mark_for_revalidate(dir);
1789 dput(parent);
1790 return error;
1792 EXPORT_SYMBOL_GPL(nfs_instantiate);
1795 * Following a failed create operation, we drop the dentry rather
1796 * than retain a negative dentry. This avoids a problem in the event
1797 * that the operation succeeded on the server, but an error in the
1798 * reply path made it appear to have failed.
1800 int nfs_create(struct inode *dir, struct dentry *dentry,
1801 umode_t mode, bool excl)
1803 struct iattr attr;
1804 int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
1805 int error;
1807 dfprintk(VFS, "NFS: create(%s/%lu), %pd\n",
1808 dir->i_sb->s_id, dir->i_ino, dentry);
1810 attr.ia_mode = mode;
1811 attr.ia_valid = ATTR_MODE;
1813 trace_nfs_create_enter(dir, dentry, open_flags);
1814 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
1815 trace_nfs_create_exit(dir, dentry, open_flags, error);
1816 if (error != 0)
1817 goto out_err;
1818 return 0;
1819 out_err:
1820 d_drop(dentry);
1821 return error;
1823 EXPORT_SYMBOL_GPL(nfs_create);
1826 * See comments for nfs_proc_create regarding failed operations.
1829 nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
1831 struct iattr attr;
1832 int status;
1834 dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n",
1835 dir->i_sb->s_id, dir->i_ino, dentry);
1837 attr.ia_mode = mode;
1838 attr.ia_valid = ATTR_MODE;
1840 trace_nfs_mknod_enter(dir, dentry);
1841 status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
1842 trace_nfs_mknod_exit(dir, dentry, status);
1843 if (status != 0)
1844 goto out_err;
1845 return 0;
1846 out_err:
1847 d_drop(dentry);
1848 return status;
1850 EXPORT_SYMBOL_GPL(nfs_mknod);
1853 * See comments for nfs_proc_create regarding failed operations.
1855 int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1857 struct iattr attr;
1858 int error;
1860 dfprintk(VFS, "NFS: mkdir(%s/%lu), %pd\n",
1861 dir->i_sb->s_id, dir->i_ino, dentry);
1863 attr.ia_valid = ATTR_MODE;
1864 attr.ia_mode = mode | S_IFDIR;
1866 trace_nfs_mkdir_enter(dir, dentry);
1867 error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
1868 trace_nfs_mkdir_exit(dir, dentry, error);
1869 if (error != 0)
1870 goto out_err;
1871 return 0;
1872 out_err:
1873 d_drop(dentry);
1874 return error;
1876 EXPORT_SYMBOL_GPL(nfs_mkdir);
1878 static void nfs_dentry_handle_enoent(struct dentry *dentry)
1880 if (simple_positive(dentry))
1881 d_delete(dentry);
1884 int nfs_rmdir(struct inode *dir, struct dentry *dentry)
1886 int error;
1888 dfprintk(VFS, "NFS: rmdir(%s/%lu), %pd\n",
1889 dir->i_sb->s_id, dir->i_ino, dentry);
1891 trace_nfs_rmdir_enter(dir, dentry);
1892 if (d_really_is_positive(dentry)) {
1893 down_write(&NFS_I(d_inode(dentry))->rmdir_sem);
1894 error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
1895 /* Ensure the VFS deletes this inode */
1896 switch (error) {
1897 case 0:
1898 clear_nlink(d_inode(dentry));
1899 break;
1900 case -ENOENT:
1901 nfs_dentry_handle_enoent(dentry);
1903 up_write(&NFS_I(d_inode(dentry))->rmdir_sem);
1904 } else
1905 error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
1906 trace_nfs_rmdir_exit(dir, dentry, error);
1908 return error;
1910 EXPORT_SYMBOL_GPL(nfs_rmdir);
1913 * Remove a file after making sure there are no pending writes,
1914 * and after checking that the file has only one user.
1916 * We invalidate the attribute cache and free the inode prior to the operation
1917 * to avoid possible races if the server reuses the inode.
1919 static int nfs_safe_remove(struct dentry *dentry)
1921 struct inode *dir = d_inode(dentry->d_parent);
1922 struct inode *inode = d_inode(dentry);
1923 int error = -EBUSY;
1925 dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry);
1927 /* If the dentry was sillyrenamed, we simply call d_delete() */
1928 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
1929 error = 0;
1930 goto out;
1933 trace_nfs_remove_enter(dir, dentry);
1934 if (inode != NULL) {
1935 error = NFS_PROTO(dir)->remove(dir, dentry);
1936 if (error == 0)
1937 nfs_drop_nlink(inode);
1938 } else
1939 error = NFS_PROTO(dir)->remove(dir, dentry);
1940 if (error == -ENOENT)
1941 nfs_dentry_handle_enoent(dentry);
1942 trace_nfs_remove_exit(dir, dentry, error);
1943 out:
1944 return error;
1947 /* We do silly rename. In case sillyrename() returns -EBUSY, the inode
1948 * belongs to an active ".nfs..." file and we return -EBUSY.
1950 * If sillyrename() returns 0, we do nothing, otherwise we unlink.
1952 int nfs_unlink(struct inode *dir, struct dentry *dentry)
1954 int error;
1955 int need_rehash = 0;
1957 dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
1958 dir->i_ino, dentry);
1960 trace_nfs_unlink_enter(dir, dentry);
1961 spin_lock(&dentry->d_lock);
1962 if (d_count(dentry) > 1) {
1963 spin_unlock(&dentry->d_lock);
1964 /* Start asynchronous writeout of the inode */
1965 write_inode_now(d_inode(dentry), 0);
1966 error = nfs_sillyrename(dir, dentry);
1967 goto out;
1969 if (!d_unhashed(dentry)) {
1970 __d_drop(dentry);
1971 need_rehash = 1;
1973 spin_unlock(&dentry->d_lock);
1974 error = nfs_safe_remove(dentry);
1975 if (!error || error == -ENOENT) {
1976 nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1977 } else if (need_rehash)
1978 d_rehash(dentry);
1979 out:
1980 trace_nfs_unlink_exit(dir, dentry, error);
1981 return error;
1983 EXPORT_SYMBOL_GPL(nfs_unlink);
1986 * To create a symbolic link, most file systems instantiate a new inode,
1987 * add a page to it containing the path, then write it out to the disk
1988 * using prepare_write/commit_write.
1990 * Unfortunately the NFS client can't create the in-core inode first
1991 * because it needs a file handle to create an in-core inode (see
1992 * fs/nfs/inode.c:nfs_fhget). We only have a file handle *after* the
1993 * symlink request has completed on the server.
1995 * So instead we allocate a raw page, copy the symname into it, then do
1996 * the SYMLINK request with the page as the buffer. If it succeeds, we
1997 * now have a new file handle and can instantiate an in-core NFS inode
1998 * and move the raw page into its mapping.
2000 int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
2002 struct page *page;
2003 char *kaddr;
2004 struct iattr attr;
2005 unsigned int pathlen = strlen(symname);
2006 int error;
2008 dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s)\n", dir->i_sb->s_id,
2009 dir->i_ino, dentry, symname);
2011 if (pathlen > PAGE_SIZE)
2012 return -ENAMETOOLONG;
2014 attr.ia_mode = S_IFLNK | S_IRWXUGO;
2015 attr.ia_valid = ATTR_MODE;
2017 page = alloc_page(GFP_USER);
2018 if (!page)
2019 return -ENOMEM;
2021 kaddr = page_address(page);
2022 memcpy(kaddr, symname, pathlen);
2023 if (pathlen < PAGE_SIZE)
2024 memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
2026 trace_nfs_symlink_enter(dir, dentry);
2027 error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
2028 trace_nfs_symlink_exit(dir, dentry, error);
2029 if (error != 0) {
2030 dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n",
2031 dir->i_sb->s_id, dir->i_ino,
2032 dentry, symname, error);
2033 d_drop(dentry);
2034 __free_page(page);
2035 return error;
2039 * No big deal if we can't add this page to the page cache here.
2040 * READLINK will get the missing page from the server if needed.
2042 if (!add_to_page_cache_lru(page, d_inode(dentry)->i_mapping, 0,
2043 GFP_KERNEL)) {
2044 SetPageUptodate(page);
2045 unlock_page(page);
2047 * add_to_page_cache_lru() grabs an extra page refcount.
2048 * Drop it here to avoid leaking this page later.
2050 put_page(page);
2051 } else
2052 __free_page(page);
2054 return 0;
2056 EXPORT_SYMBOL_GPL(nfs_symlink);
2059 nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
2061 struct inode *inode = d_inode(old_dentry);
2062 int error;
2064 dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n",
2065 old_dentry, dentry);
2067 trace_nfs_link_enter(inode, dir, dentry);
2068 d_drop(dentry);
2069 error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
2070 if (error == 0) {
2071 ihold(inode);
2072 d_add(dentry, inode);
2074 trace_nfs_link_exit(inode, dir, dentry, error);
2075 return error;
2077 EXPORT_SYMBOL_GPL(nfs_link);
2080 * RENAME
2081 * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
2082 * different file handle for the same inode after a rename (e.g. when
2083 * moving to a different directory). A fail-safe method to do so would
2084 * be to look up old_dir/old_name, create a link to new_dir/new_name and
2085 * rename the old file using the sillyrename stuff. This way, the original
2086 * file in old_dir will go away when the last process iput()s the inode.
2088 * FIXED.
2090 * It actually works quite well. One needs to have the possibility for
2091 * at least one ".nfs..." file in each directory the file ever gets
2092 * moved or linked to which happens automagically with the new
2093 * implementation that only depends on the dcache stuff instead of
2094 * using the inode layer
2096 * Unfortunately, things are a little more complicated than indicated
2097 * above. For a cross-directory move, we want to make sure we can get
2098 * rid of the old inode after the operation. This means there must be
2099 * no pending writes (if it's a file), and the use count must be 1.
2100 * If these conditions are met, we can drop the dentries before doing
2101 * the rename.
2103 int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2104 struct inode *new_dir, struct dentry *new_dentry,
2105 unsigned int flags)
2107 struct inode *old_inode = d_inode(old_dentry);
2108 struct inode *new_inode = d_inode(new_dentry);
2109 struct dentry *dentry = NULL, *rehash = NULL;
2110 struct rpc_task *task;
2111 int error = -EBUSY;
2113 if (flags)
2114 return -EINVAL;
2116 dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n",
2117 old_dentry, new_dentry,
2118 d_count(new_dentry));
2120 trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry);
2122 * For non-directories, check whether the target is busy and if so,
2123 * make a copy of the dentry and then do a silly-rename. If the
2124 * silly-rename succeeds, the copied dentry is hashed and becomes
2125 * the new target.
2127 if (new_inode && !S_ISDIR(new_inode->i_mode)) {
2129 * To prevent any new references to the target during the
2130 * rename, we unhash the dentry in advance.
2132 if (!d_unhashed(new_dentry)) {
2133 d_drop(new_dentry);
2134 rehash = new_dentry;
2137 if (d_count(new_dentry) > 2) {
2138 int err;
2140 /* copy the target dentry's name */
2141 dentry = d_alloc(new_dentry->d_parent,
2142 &new_dentry->d_name);
2143 if (!dentry)
2144 goto out;
2146 /* silly-rename the existing target ... */
2147 err = nfs_sillyrename(new_dir, new_dentry);
2148 if (err)
2149 goto out;
2151 new_dentry = dentry;
2152 rehash = NULL;
2153 new_inode = NULL;
2157 task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
2158 if (IS_ERR(task)) {
2159 error = PTR_ERR(task);
2160 goto out;
2163 error = rpc_wait_for_completion_task(task);
2164 if (error != 0) {
2165 ((struct nfs_renamedata *)task->tk_calldata)->cancelled = 1;
2166 /* Paired with the atomic_dec_and_test() barrier in rpc_do_put_task() */
2167 smp_wmb();
2168 } else
2169 error = task->tk_status;
2170 rpc_put_task(task);
2171 /* Ensure the inode attributes are revalidated */
2172 if (error == 0) {
2173 spin_lock(&old_inode->i_lock);
2174 NFS_I(old_inode)->attr_gencount = nfs_inc_attr_generation_counter();
2175 NFS_I(old_inode)->cache_validity |= NFS_INO_INVALID_CHANGE
2176 | NFS_INO_INVALID_CTIME
2177 | NFS_INO_REVAL_FORCED;
2178 spin_unlock(&old_inode->i_lock);
2180 out:
2181 if (rehash)
2182 d_rehash(rehash);
2183 trace_nfs_rename_exit(old_dir, old_dentry,
2184 new_dir, new_dentry, error);
2185 if (!error) {
2186 if (new_inode != NULL)
2187 nfs_drop_nlink(new_inode);
2189 * The d_move() should be here instead of in an async RPC completion
2190 * handler because we need the proper locks to move the dentry. If
2191 * we're interrupted by a signal, the async RPC completion handler
2192 * should mark the directories for revalidation.
2194 d_move(old_dentry, new_dentry);
2195 nfs_set_verifier(old_dentry,
2196 nfs_save_change_attribute(new_dir));
2197 } else if (error == -ENOENT)
2198 nfs_dentry_handle_enoent(old_dentry);
2200 /* new dentry created? */
2201 if (dentry)
2202 dput(dentry);
2203 return error;
2205 EXPORT_SYMBOL_GPL(nfs_rename);
2207 static DEFINE_SPINLOCK(nfs_access_lru_lock);
2208 static LIST_HEAD(nfs_access_lru_list);
2209 static atomic_long_t nfs_access_nr_entries;
2211 static unsigned long nfs_access_max_cachesize = ULONG_MAX;
2212 module_param(nfs_access_max_cachesize, ulong, 0644);
2213 MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length");
2215 static void nfs_access_free_entry(struct nfs_access_entry *entry)
2217 put_cred(entry->cred);
2218 kfree_rcu(entry, rcu_head);
2219 smp_mb__before_atomic();
2220 atomic_long_dec(&nfs_access_nr_entries);
2221 smp_mb__after_atomic();
2224 static void nfs_access_free_list(struct list_head *head)
2226 struct nfs_access_entry *cache;
2228 while (!list_empty(head)) {
2229 cache = list_entry(head->next, struct nfs_access_entry, lru);
2230 list_del(&cache->lru);
2231 nfs_access_free_entry(cache);
2235 static unsigned long
2236 nfs_do_access_cache_scan(unsigned int nr_to_scan)
2238 LIST_HEAD(head);
2239 struct nfs_inode *nfsi, *next;
2240 struct nfs_access_entry *cache;
2241 long freed = 0;
2243 spin_lock(&nfs_access_lru_lock);
2244 list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
2245 struct inode *inode;
2247 if (nr_to_scan-- == 0)
2248 break;
2249 inode = &nfsi->vfs_inode;
2250 spin_lock(&inode->i_lock);
2251 if (list_empty(&nfsi->access_cache_entry_lru))
2252 goto remove_lru_entry;
2253 cache = list_entry(nfsi->access_cache_entry_lru.next,
2254 struct nfs_access_entry, lru);
2255 list_move(&cache->lru, &head);
2256 rb_erase(&cache->rb_node, &nfsi->access_cache);
2257 freed++;
2258 if (!list_empty(&nfsi->access_cache_entry_lru))
2259 list_move_tail(&nfsi->access_cache_inode_lru,
2260 &nfs_access_lru_list);
2261 else {
2262 remove_lru_entry:
2263 list_del_init(&nfsi->access_cache_inode_lru);
2264 smp_mb__before_atomic();
2265 clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
2266 smp_mb__after_atomic();
2268 spin_unlock(&inode->i_lock);
2270 spin_unlock(&nfs_access_lru_lock);
2271 nfs_access_free_list(&head);
2272 return freed;
2275 unsigned long
2276 nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
2278 int nr_to_scan = sc->nr_to_scan;
2279 gfp_t gfp_mask = sc->gfp_mask;
2281 if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
2282 return SHRINK_STOP;
2283 return nfs_do_access_cache_scan(nr_to_scan);
2287 unsigned long
2288 nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
2290 return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
2293 static void
2294 nfs_access_cache_enforce_limit(void)
2296 long nr_entries = atomic_long_read(&nfs_access_nr_entries);
2297 unsigned long diff;
2298 unsigned int nr_to_scan;
2300 if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize)
2301 return;
2302 nr_to_scan = 100;
2303 diff = nr_entries - nfs_access_max_cachesize;
2304 if (diff < nr_to_scan)
2305 nr_to_scan = diff;
2306 nfs_do_access_cache_scan(nr_to_scan);
2309 static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
2311 struct rb_root *root_node = &nfsi->access_cache;
2312 struct rb_node *n;
2313 struct nfs_access_entry *entry;
2315 /* Unhook entries from the cache */
2316 while ((n = rb_first(root_node)) != NULL) {
2317 entry = rb_entry(n, struct nfs_access_entry, rb_node);
2318 rb_erase(n, root_node);
2319 list_move(&entry->lru, head);
2321 nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
2324 void nfs_access_zap_cache(struct inode *inode)
2326 LIST_HEAD(head);
2328 if (test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags) == 0)
2329 return;
2330 /* Remove from global LRU init */
2331 spin_lock(&nfs_access_lru_lock);
2332 if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
2333 list_del_init(&NFS_I(inode)->access_cache_inode_lru);
2335 spin_lock(&inode->i_lock);
2336 __nfs_access_zap_cache(NFS_I(inode), &head);
2337 spin_unlock(&inode->i_lock);
2338 spin_unlock(&nfs_access_lru_lock);
2339 nfs_access_free_list(&head);
2341 EXPORT_SYMBOL_GPL(nfs_access_zap_cache);
2343 static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, const struct cred *cred)
2345 struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
2347 while (n != NULL) {
2348 struct nfs_access_entry *entry =
2349 rb_entry(n, struct nfs_access_entry, rb_node);
2350 int cmp = cred_fscmp(cred, entry->cred);
2352 if (cmp < 0)
2353 n = n->rb_left;
2354 else if (cmp > 0)
2355 n = n->rb_right;
2356 else
2357 return entry;
2359 return NULL;
2362 static int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block)
2364 struct nfs_inode *nfsi = NFS_I(inode);
2365 struct nfs_access_entry *cache;
2366 bool retry = true;
2367 int err;
2369 spin_lock(&inode->i_lock);
2370 for(;;) {
2371 if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
2372 goto out_zap;
2373 cache = nfs_access_search_rbtree(inode, cred);
2374 err = -ENOENT;
2375 if (cache == NULL)
2376 goto out;
2377 /* Found an entry, is our attribute cache valid? */
2378 if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
2379 break;
2380 err = -ECHILD;
2381 if (!may_block)
2382 goto out;
2383 if (!retry)
2384 goto out_zap;
2385 spin_unlock(&inode->i_lock);
2386 err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
2387 if (err)
2388 return err;
2389 spin_lock(&inode->i_lock);
2390 retry = false;
2392 res->cred = cache->cred;
2393 res->mask = cache->mask;
2394 list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
2395 err = 0;
2396 out:
2397 spin_unlock(&inode->i_lock);
2398 return err;
2399 out_zap:
2400 spin_unlock(&inode->i_lock);
2401 nfs_access_zap_cache(inode);
2402 return -ENOENT;
2405 static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res)
2407 /* Only check the most recently returned cache entry,
2408 * but do it without locking.
2410 struct nfs_inode *nfsi = NFS_I(inode);
2411 struct nfs_access_entry *cache;
2412 int err = -ECHILD;
2413 struct list_head *lh;
2415 rcu_read_lock();
2416 if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
2417 goto out;
2418 lh = rcu_dereference(nfsi->access_cache_entry_lru.prev);
2419 cache = list_entry(lh, struct nfs_access_entry, lru);
2420 if (lh == &nfsi->access_cache_entry_lru ||
2421 cred != cache->cred)
2422 cache = NULL;
2423 if (cache == NULL)
2424 goto out;
2425 if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
2426 goto out;
2427 res->cred = cache->cred;
2428 res->mask = cache->mask;
2429 err = 0;
2430 out:
2431 rcu_read_unlock();
2432 return err;
2435 static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set)
2437 struct nfs_inode *nfsi = NFS_I(inode);
2438 struct rb_root *root_node = &nfsi->access_cache;
2439 struct rb_node **p = &root_node->rb_node;
2440 struct rb_node *parent = NULL;
2441 struct nfs_access_entry *entry;
2442 int cmp;
2444 spin_lock(&inode->i_lock);
2445 while (*p != NULL) {
2446 parent = *p;
2447 entry = rb_entry(parent, struct nfs_access_entry, rb_node);
2448 cmp = cred_fscmp(set->cred, entry->cred);
2450 if (cmp < 0)
2451 p = &parent->rb_left;
2452 else if (cmp > 0)
2453 p = &parent->rb_right;
2454 else
2455 goto found;
2457 rb_link_node(&set->rb_node, parent, p);
2458 rb_insert_color(&set->rb_node, root_node);
2459 list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
2460 spin_unlock(&inode->i_lock);
2461 return;
2462 found:
2463 rb_replace_node(parent, &set->rb_node, root_node);
2464 list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
2465 list_del(&entry->lru);
2466 spin_unlock(&inode->i_lock);
2467 nfs_access_free_entry(entry);
2470 void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
2472 struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
2473 if (cache == NULL)
2474 return;
2475 RB_CLEAR_NODE(&cache->rb_node);
2476 cache->cred = get_cred(set->cred);
2477 cache->mask = set->mask;
2479 /* The above field assignments must be visible
2480 * before this item appears on the lru. We cannot easily
2481 * use rcu_assign_pointer, so just force the memory barrier.
2483 smp_wmb();
2484 nfs_access_add_rbtree(inode, cache);
2486 /* Update accounting */
2487 smp_mb__before_atomic();
2488 atomic_long_inc(&nfs_access_nr_entries);
2489 smp_mb__after_atomic();
2491 /* Add inode to global LRU list */
2492 if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
2493 spin_lock(&nfs_access_lru_lock);
2494 if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
2495 list_add_tail(&NFS_I(inode)->access_cache_inode_lru,
2496 &nfs_access_lru_list);
2497 spin_unlock(&nfs_access_lru_lock);
2499 nfs_access_cache_enforce_limit();
2501 EXPORT_SYMBOL_GPL(nfs_access_add_cache);
2503 #define NFS_MAY_READ (NFS_ACCESS_READ)
2504 #define NFS_MAY_WRITE (NFS_ACCESS_MODIFY | \
2505 NFS_ACCESS_EXTEND | \
2506 NFS_ACCESS_DELETE)
2507 #define NFS_FILE_MAY_WRITE (NFS_ACCESS_MODIFY | \
2508 NFS_ACCESS_EXTEND)
2509 #define NFS_DIR_MAY_WRITE NFS_MAY_WRITE
2510 #define NFS_MAY_LOOKUP (NFS_ACCESS_LOOKUP)
2511 #define NFS_MAY_EXECUTE (NFS_ACCESS_EXECUTE)
2512 static int
2513 nfs_access_calc_mask(u32 access_result, umode_t umode)
2515 int mask = 0;
2517 if (access_result & NFS_MAY_READ)
2518 mask |= MAY_READ;
2519 if (S_ISDIR(umode)) {
2520 if ((access_result & NFS_DIR_MAY_WRITE) == NFS_DIR_MAY_WRITE)
2521 mask |= MAY_WRITE;
2522 if ((access_result & NFS_MAY_LOOKUP) == NFS_MAY_LOOKUP)
2523 mask |= MAY_EXEC;
2524 } else if (S_ISREG(umode)) {
2525 if ((access_result & NFS_FILE_MAY_WRITE) == NFS_FILE_MAY_WRITE)
2526 mask |= MAY_WRITE;
2527 if ((access_result & NFS_MAY_EXECUTE) == NFS_MAY_EXECUTE)
2528 mask |= MAY_EXEC;
2529 } else if (access_result & NFS_MAY_WRITE)
2530 mask |= MAY_WRITE;
2531 return mask;
2534 void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result)
2536 entry->mask = access_result;
2538 EXPORT_SYMBOL_GPL(nfs_access_set_mask);
2540 static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
2542 struct nfs_access_entry cache;
2543 bool may_block = (mask & MAY_NOT_BLOCK) == 0;
2544 int cache_mask;
2545 int status;
2547 trace_nfs_access_enter(inode);
2549 status = nfs_access_get_cached_rcu(inode, cred, &cache);
2550 if (status != 0)
2551 status = nfs_access_get_cached(inode, cred, &cache, may_block);
2552 if (status == 0)
2553 goto out_cached;
2555 status = -ECHILD;
2556 if (!may_block)
2557 goto out;
2560 * Determine which access bits we want to ask for...
2562 cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND;
2563 if (S_ISDIR(inode->i_mode))
2564 cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
2565 else
2566 cache.mask |= NFS_ACCESS_EXECUTE;
2567 cache.cred = cred;
2568 status = NFS_PROTO(inode)->access(inode, &cache);
2569 if (status != 0) {
2570 if (status == -ESTALE) {
2571 nfs_zap_caches(inode);
2572 if (!S_ISDIR(inode->i_mode))
2573 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
2575 goto out;
2577 nfs_access_add_cache(inode, &cache);
2578 out_cached:
2579 cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode);
2580 if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
2581 status = -EACCES;
2582 out:
2583 trace_nfs_access_exit(inode, status);
2584 return status;
2587 static int nfs_open_permission_mask(int openflags)
2589 int mask = 0;
2591 if (openflags & __FMODE_EXEC) {
2592 /* ONLY check exec rights */
2593 mask = MAY_EXEC;
2594 } else {
2595 if ((openflags & O_ACCMODE) != O_WRONLY)
2596 mask |= MAY_READ;
2597 if ((openflags & O_ACCMODE) != O_RDONLY)
2598 mask |= MAY_WRITE;
2601 return mask;
2604 int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags)
2606 return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
2608 EXPORT_SYMBOL_GPL(nfs_may_open);
2610 static int nfs_execute_ok(struct inode *inode, int mask)
2612 struct nfs_server *server = NFS_SERVER(inode);
2613 int ret = 0;
2615 if (S_ISDIR(inode->i_mode))
2616 return 0;
2617 if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_OTHER)) {
2618 if (mask & MAY_NOT_BLOCK)
2619 return -ECHILD;
2620 ret = __nfs_revalidate_inode(server, inode);
2622 if (ret == 0 && !execute_ok(inode))
2623 ret = -EACCES;
2624 return ret;
2627 int nfs_permission(struct inode *inode, int mask)
2629 const struct cred *cred = current_cred();
2630 int res = 0;
2632 nfs_inc_stats(inode, NFSIOS_VFSACCESS);
2634 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
2635 goto out;
2636 /* Is this sys_access() ? */
2637 if (mask & (MAY_ACCESS | MAY_CHDIR))
2638 goto force_lookup;
2640 switch (inode->i_mode & S_IFMT) {
2641 case S_IFLNK:
2642 goto out;
2643 case S_IFREG:
2644 if ((mask & MAY_OPEN) &&
2645 nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN))
2646 return 0;
2647 break;
2648 case S_IFDIR:
2650 * Optimize away all write operations, since the server
2651 * will check permissions when we perform the op.
2653 if ((mask & MAY_WRITE) && !(mask & MAY_READ))
2654 goto out;
2657 force_lookup:
2658 if (!NFS_PROTO(inode)->access)
2659 goto out_notsup;
2661 /* Always try fast lookups first */
2662 rcu_read_lock();
2663 res = nfs_do_access(inode, cred, mask|MAY_NOT_BLOCK);
2664 rcu_read_unlock();
2665 if (res == -ECHILD && !(mask & MAY_NOT_BLOCK)) {
2666 /* Fast lookup failed, try the slow way */
2667 res = nfs_do_access(inode, cred, mask);
2669 out:
2670 if (!res && (mask & MAY_EXEC))
2671 res = nfs_execute_ok(inode, mask);
2673 dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n",
2674 inode->i_sb->s_id, inode->i_ino, mask, res);
2675 return res;
2676 out_notsup:
2677 if (mask & MAY_NOT_BLOCK)
2678 return -ECHILD;
2680 res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
2681 if (res == 0)
2682 res = generic_permission(inode, mask);
2683 goto out;
2685 EXPORT_SYMBOL_GPL(nfs_permission);
2688 * Local variables:
2689 * version-control: t
2690 * kept-new-versions: 5
2691 * End: