2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2018 Miklos Szeredi <miklos@szeredi.hu>
5 This program can be distributed under the terms of the GNU GPL.
11 #include <linux/iversion.h>
12 #include <linux/posix_acl.h>
13 #include <linux/pagemap.h>
14 #include <linux/highmem.h>
16 static bool fuse_use_readdirplus(struct inode
*dir
, struct dir_context
*ctx
)
18 struct fuse_conn
*fc
= get_fuse_conn(dir
);
19 struct fuse_inode
*fi
= get_fuse_inode(dir
);
21 if (!fc
->do_readdirplus
)
23 if (!fc
->readdirplus_auto
)
25 if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS
, &fi
->state
))
32 static void fuse_add_dirent_to_cache(struct file
*file
,
33 struct fuse_dirent
*dirent
, loff_t pos
)
35 struct fuse_inode
*fi
= get_fuse_inode(file_inode(file
));
36 size_t reclen
= FUSE_DIRENT_SIZE(dirent
);
44 spin_lock(&fi
->rdc
.lock
);
46 * Is cache already completed? Or this entry does not go at the end of
49 if (fi
->rdc
.cached
|| pos
!= fi
->rdc
.pos
) {
50 spin_unlock(&fi
->rdc
.lock
);
53 version
= fi
->rdc
.version
;
55 offset
= size
& ~PAGE_MASK
;
56 index
= size
>> PAGE_SHIFT
;
57 /* Dirent doesn't fit in current page? Jump to next page. */
58 if (offset
+ reclen
> PAGE_SIZE
) {
62 spin_unlock(&fi
->rdc
.lock
);
65 page
= find_lock_page(file
->f_mapping
, index
);
67 page
= find_or_create_page(file
->f_mapping
, index
,
68 mapping_gfp_mask(file
->f_mapping
));
73 spin_lock(&fi
->rdc
.lock
);
74 /* Raced with another readdir */
75 if (fi
->rdc
.version
!= version
|| fi
->rdc
.size
!= size
||
76 WARN_ON(fi
->rdc
.pos
!= pos
))
79 addr
= kmap_atomic(page
);
82 memcpy(addr
+ offset
, dirent
, reclen
);
84 fi
->rdc
.size
= (index
<< PAGE_SHIFT
) + offset
+ reclen
;
85 fi
->rdc
.pos
= dirent
->off
;
87 spin_unlock(&fi
->rdc
.lock
);
92 static void fuse_readdir_cache_end(struct file
*file
, loff_t pos
)
94 struct fuse_inode
*fi
= get_fuse_inode(file_inode(file
));
97 spin_lock(&fi
->rdc
.lock
);
98 /* does cache end position match current position? */
99 if (fi
->rdc
.pos
!= pos
) {
100 spin_unlock(&fi
->rdc
.lock
);
104 fi
->rdc
.cached
= true;
105 end
= ALIGN(fi
->rdc
.size
, PAGE_SIZE
);
106 spin_unlock(&fi
->rdc
.lock
);
108 /* truncate unused tail of cache */
109 truncate_inode_pages(file
->f_mapping
, end
);
112 static bool fuse_emit(struct file
*file
, struct dir_context
*ctx
,
113 struct fuse_dirent
*dirent
)
115 struct fuse_file
*ff
= file
->private_data
;
117 if (ff
->open_flags
& FOPEN_CACHE_DIR
)
118 fuse_add_dirent_to_cache(file
, dirent
, ctx
->pos
);
120 return dir_emit(ctx
, dirent
->name
, dirent
->namelen
, dirent
->ino
,
124 static int parse_dirfile(char *buf
, size_t nbytes
, struct file
*file
,
125 struct dir_context
*ctx
)
127 while (nbytes
>= FUSE_NAME_OFFSET
) {
128 struct fuse_dirent
*dirent
= (struct fuse_dirent
*) buf
;
129 size_t reclen
= FUSE_DIRENT_SIZE(dirent
);
130 if (!dirent
->namelen
|| dirent
->namelen
> FUSE_NAME_MAX
)
134 if (memchr(dirent
->name
, '/', dirent
->namelen
) != NULL
)
137 if (!fuse_emit(file
, ctx
, dirent
))
142 ctx
->pos
= dirent
->off
;
148 static int fuse_direntplus_link(struct file
*file
,
149 struct fuse_direntplus
*direntplus
,
152 struct fuse_entry_out
*o
= &direntplus
->entry_out
;
153 struct fuse_dirent
*dirent
= &direntplus
->dirent
;
154 struct dentry
*parent
= file
->f_path
.dentry
;
155 struct qstr name
= QSTR_INIT(dirent
->name
, dirent
->namelen
);
156 struct dentry
*dentry
;
157 struct dentry
*alias
;
158 struct inode
*dir
= d_inode(parent
);
159 struct fuse_conn
*fc
;
161 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq
);
165 * Unlike in the case of fuse_lookup, zero nodeid does not mean
166 * ENOENT. Instead, it only means the userspace filesystem did
167 * not want to return attributes/handle for this entry.
174 if (name
.name
[0] == '.') {
176 * We could potentially refresh the attributes of the directory
181 if (name
.name
[1] == '.' && name
.len
== 2)
185 if (invalid_nodeid(o
->nodeid
))
187 if (fuse_invalid_attr(&o
->attr
))
190 fc
= get_fuse_conn(dir
);
192 name
.hash
= full_name_hash(parent
, name
.name
, name
.len
);
193 dentry
= d_lookup(parent
, &name
);
196 dentry
= d_alloc_parallel(parent
, &name
, &wq
);
198 return PTR_ERR(dentry
);
200 if (!d_in_lookup(dentry
)) {
201 struct fuse_inode
*fi
;
202 inode
= d_inode(dentry
);
204 get_node_id(inode
) != o
->nodeid
||
205 ((o
->attr
.mode
^ inode
->i_mode
) & S_IFMT
)) {
206 d_invalidate(dentry
);
210 if (is_bad_inode(inode
)) {
215 fi
= get_fuse_inode(inode
);
216 spin_lock(&fi
->lock
);
218 spin_unlock(&fi
->lock
);
220 forget_all_cached_acls(inode
);
221 fuse_change_attributes(inode
, &o
->attr
,
222 entry_attr_timeout(o
),
225 * The other branch comes via fuse_iget()
226 * which bumps nlookup inside
229 inode
= fuse_iget(dir
->i_sb
, o
->nodeid
, o
->generation
,
230 &o
->attr
, entry_attr_timeout(o
),
233 inode
= ERR_PTR(-ENOMEM
);
235 alias
= d_splice_alias(inode
, dentry
);
236 d_lookup_done(dentry
);
242 return PTR_ERR(dentry
);
244 if (fc
->readdirplus_auto
)
245 set_bit(FUSE_I_INIT_RDPLUS
, &get_fuse_inode(inode
)->state
);
246 fuse_change_entry_timeout(dentry
, o
);
252 static void fuse_force_forget(struct file
*file
, u64 nodeid
)
254 struct inode
*inode
= file_inode(file
);
255 struct fuse_conn
*fc
= get_fuse_conn(inode
);
256 struct fuse_forget_in inarg
;
259 memset(&inarg
, 0, sizeof(inarg
));
261 args
.opcode
= FUSE_FORGET
;
262 args
.nodeid
= nodeid
;
264 args
.in_args
[0].size
= sizeof(inarg
);
265 args
.in_args
[0].value
= &inarg
;
269 fuse_simple_request(fc
, &args
);
273 static int parse_dirplusfile(char *buf
, size_t nbytes
, struct file
*file
,
274 struct dir_context
*ctx
, u64 attr_version
)
276 struct fuse_direntplus
*direntplus
;
277 struct fuse_dirent
*dirent
;
282 while (nbytes
>= FUSE_NAME_OFFSET_DIRENTPLUS
) {
283 direntplus
= (struct fuse_direntplus
*) buf
;
284 dirent
= &direntplus
->dirent
;
285 reclen
= FUSE_DIRENTPLUS_SIZE(direntplus
);
287 if (!dirent
->namelen
|| dirent
->namelen
> FUSE_NAME_MAX
)
291 if (memchr(dirent
->name
, '/', dirent
->namelen
) != NULL
)
295 /* We fill entries into dstbuf only as much as
296 it can hold. But we still continue iterating
297 over remaining entries to link them. If not,
298 we need to send a FORGET for each of those
299 which we did not link.
301 over
= !fuse_emit(file
, ctx
, dirent
);
303 ctx
->pos
= dirent
->off
;
309 ret
= fuse_direntplus_link(file
, direntplus
, attr_version
);
311 fuse_force_forget(file
, direntplus
->entry_out
.nodeid
);
317 static int fuse_readdir_uncached(struct file
*file
, struct dir_context
*ctx
)
322 struct inode
*inode
= file_inode(file
);
323 struct fuse_conn
*fc
= get_fuse_conn(inode
);
324 struct fuse_io_args ia
= {};
325 struct fuse_args_pages
*ap
= &ia
.ap
;
326 struct fuse_page_desc desc
= { .length
= PAGE_SIZE
};
327 u64 attr_version
= 0;
330 page
= alloc_page(GFP_KERNEL
);
334 plus
= fuse_use_readdirplus(inode
, ctx
);
335 ap
->args
.out_pages
= true;
340 attr_version
= fuse_get_attr_version(fc
);
341 fuse_read_args_fill(&ia
, file
, ctx
->pos
, PAGE_SIZE
,
344 fuse_read_args_fill(&ia
, file
, ctx
->pos
, PAGE_SIZE
,
347 locked
= fuse_lock_inode(inode
);
348 res
= fuse_simple_request(fc
, &ap
->args
);
349 fuse_unlock_inode(inode
, locked
);
352 struct fuse_file
*ff
= file
->private_data
;
354 if (ff
->open_flags
& FOPEN_CACHE_DIR
)
355 fuse_readdir_cache_end(file
, ctx
->pos
);
357 res
= parse_dirplusfile(page_address(page
), res
,
358 file
, ctx
, attr_version
);
360 res
= parse_dirfile(page_address(page
), res
, file
,
366 fuse_invalidate_atime(inode
);
370 enum fuse_parse_result
{
377 static enum fuse_parse_result
fuse_parse_cache(struct fuse_file
*ff
,
378 void *addr
, unsigned int size
,
379 struct dir_context
*ctx
)
381 unsigned int offset
= ff
->readdir
.cache_off
& ~PAGE_MASK
;
382 enum fuse_parse_result res
= FOUND_NONE
;
384 WARN_ON(offset
>= size
);
387 struct fuse_dirent
*dirent
= addr
+ offset
;
388 unsigned int nbytes
= size
- offset
;
391 if (nbytes
< FUSE_NAME_OFFSET
|| !dirent
->namelen
)
394 reclen
= FUSE_DIRENT_SIZE(dirent
); /* derefs ->namelen */
396 if (WARN_ON(dirent
->namelen
> FUSE_NAME_MAX
))
398 if (WARN_ON(reclen
> nbytes
))
400 if (WARN_ON(memchr(dirent
->name
, '/', dirent
->namelen
) != NULL
))
403 if (ff
->readdir
.pos
== ctx
->pos
) {
405 if (!dir_emit(ctx
, dirent
->name
, dirent
->namelen
,
406 dirent
->ino
, dirent
->type
))
408 ctx
->pos
= dirent
->off
;
410 ff
->readdir
.pos
= dirent
->off
;
411 ff
->readdir
.cache_off
+= reclen
;
419 static void fuse_rdc_reset(struct inode
*inode
)
421 struct fuse_inode
*fi
= get_fuse_inode(inode
);
423 fi
->rdc
.cached
= false;
431 static int fuse_readdir_cached(struct file
*file
, struct dir_context
*ctx
)
433 struct fuse_file
*ff
= file
->private_data
;
434 struct inode
*inode
= file_inode(file
);
435 struct fuse_conn
*fc
= get_fuse_conn(inode
);
436 struct fuse_inode
*fi
= get_fuse_inode(inode
);
437 enum fuse_parse_result res
;
443 /* Seeked? If so, reset the cache stream */
444 if (ff
->readdir
.pos
!= ctx
->pos
) {
446 ff
->readdir
.cache_off
= 0;
450 * We're just about to start reading into the cache or reading the
451 * cache; both cases require an up-to-date mtime value.
453 if (!ctx
->pos
&& fc
->auto_inval_data
) {
454 int err
= fuse_update_attributes(inode
, file
);
461 spin_lock(&fi
->rdc
.lock
);
463 if (!fi
->rdc
.cached
) {
464 /* Starting cache? Set cache mtime. */
465 if (!ctx
->pos
&& !fi
->rdc
.size
) {
466 fi
->rdc
.mtime
= inode
->i_mtime
;
467 fi
->rdc
.iversion
= inode_query_iversion(inode
);
469 spin_unlock(&fi
->rdc
.lock
);
473 * When at the beginning of the directory (i.e. just after opendir(3) or
474 * rewinddir(3)), then need to check whether directory contents have
475 * changed, and reset the cache if so.
478 if (inode_peek_iversion(inode
) != fi
->rdc
.iversion
||
479 !timespec64_equal(&fi
->rdc
.mtime
, &inode
->i_mtime
)) {
480 fuse_rdc_reset(inode
);
486 * If cache version changed since the last getdents() call, then reset
489 if (ff
->readdir
.version
!= fi
->rdc
.version
) {
491 ff
->readdir
.cache_off
= 0;
494 * If at the beginning of the cache, than reset version to
497 if (ff
->readdir
.pos
== 0)
498 ff
->readdir
.version
= fi
->rdc
.version
;
500 WARN_ON(fi
->rdc
.size
< ff
->readdir
.cache_off
);
502 index
= ff
->readdir
.cache_off
>> PAGE_SHIFT
;
504 if (index
== (fi
->rdc
.size
>> PAGE_SHIFT
))
505 size
= fi
->rdc
.size
& ~PAGE_MASK
;
508 spin_unlock(&fi
->rdc
.lock
);
511 if ((ff
->readdir
.cache_off
& ~PAGE_MASK
) == size
)
514 page
= find_get_page_flags(file
->f_mapping
, index
,
515 FGP_ACCESSED
| FGP_LOCK
);
516 spin_lock(&fi
->rdc
.lock
);
519 * Uh-oh: page gone missing, cache is useless
521 if (fi
->rdc
.version
== ff
->readdir
.version
)
522 fuse_rdc_reset(inode
);
526 /* Make sure it's still the same version after getting the page. */
527 if (ff
->readdir
.version
!= fi
->rdc
.version
) {
528 spin_unlock(&fi
->rdc
.lock
);
533 spin_unlock(&fi
->rdc
.lock
);
536 * Contents of the page are now protected against changing by holding
540 res
= fuse_parse_cache(ff
, addr
, size
, ctx
);
545 if (res
== FOUND_ERR
)
548 if (res
== FOUND_ALL
)
551 if (size
== PAGE_SIZE
) {
552 /* We hit end of page: skip to next page. */
553 ff
->readdir
.cache_off
= ALIGN(ff
->readdir
.cache_off
, PAGE_SIZE
);
558 * End of cache reached. If found position, then we are done, otherwise
559 * need to fall back to uncached, since the position we were looking for
560 * wasn't in the cache.
562 return res
== FOUND_SOME
? 0 : UNCACHED
;
565 int fuse_readdir(struct file
*file
, struct dir_context
*ctx
)
567 struct fuse_file
*ff
= file
->private_data
;
568 struct inode
*inode
= file_inode(file
);
571 if (is_bad_inode(inode
))
574 mutex_lock(&ff
->readdir
.lock
);
577 if (ff
->open_flags
& FOPEN_CACHE_DIR
)
578 err
= fuse_readdir_cached(file
, ctx
);
580 err
= fuse_readdir_uncached(file
, ctx
);
582 mutex_unlock(&ff
->readdir
.lock
);