1 // SPDX-License-Identifier: GPL-2.0
5 * Copyright (C) 1995 Linus Torvalds
8 #include <linux/stddef.h>
9 #include <linux/kernel.h>
10 #include <linux/export.h>
11 #include <linux/time.h>
13 #include <linux/errno.h>
14 #include <linux/stat.h>
15 #include <linux/file.h>
17 #include <linux/fsnotify.h>
18 #include <linux/dirent.h>
19 #include <linux/security.h>
20 #include <linux/syscalls.h>
21 #include <linux/unistd.h>
22 #include <linux/compat.h>
23 #include <linux/uaccess.h>
26 * Some filesystems were never converted to '->iterate_shared()'
27 * and their directory iterators want the inode lock held for
28 * writing. This wrapper allows for converting from the shared
29 * semantics to the exclusive inode use.
31 int wrap_directory_iterator(struct file
*file
,
32 struct dir_context
*ctx
,
33 int (*iter
)(struct file
*, struct dir_context
*))
35 struct inode
*inode
= file_inode(file
);
39 * We'd love to have an 'inode_upgrade_trylock()' operation,
40 * see the comment in mmap_upgrade_trylock() in mm/memory.c.
42 * But considering this is for "filesystems that never got
43 * converted", it really doesn't matter.
45 * Also note that since we have to return with the lock held
46 * for reading, we can't use the "killable()" locking here,
47 * since we do need to get the lock even if we're dying.
49 * We could do the write part killably and then get the read
50 * lock unconditionally if it mattered, but see above on why
51 * this does the very simplistic conversion.
53 up_read(&inode
->i_rwsem
);
54 down_write(&inode
->i_rwsem
);
57 * Since we dropped the inode lock, we should do the
58 * DEADDIR test again. See 'iterate_dir()' below.
60 * Note that we don't need to re-do the f_pos games,
61 * since the file must be locked wrt f_pos anyway.
64 if (!IS_DEADDIR(inode
))
65 ret
= iter(file
, ctx
);
67 downgrade_write(&inode
->i_rwsem
);
70 EXPORT_SYMBOL(wrap_directory_iterator
);
73 * Note the "unsafe_put_user()" semantics: we goto a
76 #define unsafe_copy_dirent_name(_dst, _src, _len, label) do { \
77 char __user *dst = (_dst); \
78 const char *src = (_src); \
79 size_t len = (_len); \
80 unsafe_put_user(0, dst+len, label); \
81 unsafe_copy_to_user(dst, src, len, label); \
85 int iterate_dir(struct file
*file
, struct dir_context
*ctx
)
87 struct inode
*inode
= file_inode(file
);
90 if (!file
->f_op
->iterate_shared
)
93 res
= security_file_permission(file
, MAY_READ
);
97 res
= fsnotify_file_perm(file
, MAY_READ
);
101 res
= down_read_killable(&inode
->i_rwsem
);
106 if (!IS_DEADDIR(inode
)) {
107 ctx
->pos
= file
->f_pos
;
108 res
= file
->f_op
->iterate_shared(file
, ctx
);
109 file
->f_pos
= ctx
->pos
;
110 fsnotify_access(file
);
113 inode_unlock_shared(inode
);
117 EXPORT_SYMBOL(iterate_dir
);
120 * POSIX says that a dirent name cannot contain NULL or a '/'.
122 * It's not 100% clear what we should really do in this case.
123 * The filesystem is clearly corrupted, but returning a hard
124 * error means that you now don't see any of the other names
125 * either, so that isn't a perfect alternative.
127 * And if you return an error, what error do you use? Several
128 * filesystems seem to have decided on EUCLEAN being the error
129 * code for EFSCORRUPTED, and that may be the error to use. Or
130 * just EIO, which is perhaps more obvious to users.
132 * In order to see the other file names in the directory, the
133 * caller might want to make this a "soft" error: skip the
134 * entry, and return the error at the end instead.
136 * Note that this should likely do a "memchr(name, 0, len)"
137 * check too, since that would be filesystem corruption as
138 * well. However, that case can't actually confuse user space,
139 * which has to do a strlen() on the name anyway to find the
140 * filename length, and the above "soft error" worry means
141 * that it's probably better left alone until we have that
144 * Note the PATH_MAX check - it's arbitrary but the real
145 * kernel limit on a possible path component, not NAME_MAX,
146 * which is the technical standard limit.
148 static int verify_dirent_name(const char *name
, int len
)
150 if (len
<= 0 || len
>= PATH_MAX
)
152 if (memchr(name
, '/', len
))
158 * Traditional linux readdir() handling..
160 * "count=1" is a special case, meaning that the buffer is one
161 * dirent-structure in size and that the code can't handle more
162 * anyway. Thus the special "fillonedir()" function for that
163 * case (the low-level handlers don't need to care about this).
166 #ifdef __ARCH_WANT_OLD_READDIR
168 struct old_linux_dirent
{
170 unsigned long d_offset
;
171 unsigned short d_namlen
;
175 struct readdir_callback
{
176 struct dir_context ctx
;
177 struct old_linux_dirent __user
* dirent
;
181 static bool fillonedir(struct dir_context
*ctx
, const char *name
, int namlen
,
182 loff_t offset
, u64 ino
, unsigned int d_type
)
184 struct readdir_callback
*buf
=
185 container_of(ctx
, struct readdir_callback
, ctx
);
186 struct old_linux_dirent __user
* dirent
;
191 buf
->result
= verify_dirent_name(name
, namlen
);
195 if (sizeof(d_ino
) < sizeof(ino
) && d_ino
!= ino
) {
196 buf
->result
= -EOVERFLOW
;
200 dirent
= buf
->dirent
;
201 if (!user_write_access_begin(dirent
,
202 (unsigned long)(dirent
->d_name
+ namlen
+ 1) -
203 (unsigned long)dirent
))
205 unsafe_put_user(d_ino
, &dirent
->d_ino
, efault_end
);
206 unsafe_put_user(offset
, &dirent
->d_offset
, efault_end
);
207 unsafe_put_user(namlen
, &dirent
->d_namlen
, efault_end
);
208 unsafe_copy_dirent_name(dirent
->d_name
, name
, namlen
, efault_end
);
209 user_write_access_end();
212 user_write_access_end();
214 buf
->result
= -EFAULT
;
218 SYSCALL_DEFINE3(old_readdir
, unsigned int, fd
,
219 struct old_linux_dirent __user
*, dirent
, unsigned int, count
)
222 CLASS(fd_pos
, f
)(fd
);
223 struct readdir_callback buf
= {
224 .ctx
.actor
= fillonedir
,
231 error
= iterate_dir(fd_file(f
), &buf
.ctx
);
238 #endif /* __ARCH_WANT_OLD_READDIR */
241 * New, all-improved, singing, dancing, iBCS2-compliant getdents()
244 struct linux_dirent
{
247 unsigned short d_reclen
;
251 struct getdents_callback
{
252 struct dir_context ctx
;
253 struct linux_dirent __user
* current_dir
;
259 static bool filldir(struct dir_context
*ctx
, const char *name
, int namlen
,
260 loff_t offset
, u64 ino
, unsigned int d_type
)
262 struct linux_dirent __user
*dirent
, *prev
;
263 struct getdents_callback
*buf
=
264 container_of(ctx
, struct getdents_callback
, ctx
);
266 int reclen
= ALIGN(offsetof(struct linux_dirent
, d_name
) + namlen
+ 2,
270 buf
->error
= verify_dirent_name(name
, namlen
);
271 if (unlikely(buf
->error
))
273 buf
->error
= -EINVAL
; /* only used if we fail.. */
274 if (reclen
> buf
->count
)
277 if (sizeof(d_ino
) < sizeof(ino
) && d_ino
!= ino
) {
278 buf
->error
= -EOVERFLOW
;
281 prev_reclen
= buf
->prev_reclen
;
282 if (prev_reclen
&& signal_pending(current
))
284 dirent
= buf
->current_dir
;
285 prev
= (void __user
*) dirent
- prev_reclen
;
286 if (!user_write_access_begin(prev
, reclen
+ prev_reclen
))
289 /* This might be 'dirent->d_off', but if so it will get overwritten */
290 unsafe_put_user(offset
, &prev
->d_off
, efault_end
);
291 unsafe_put_user(d_ino
, &dirent
->d_ino
, efault_end
);
292 unsafe_put_user(reclen
, &dirent
->d_reclen
, efault_end
);
293 unsafe_put_user(d_type
, (char __user
*) dirent
+ reclen
- 1, efault_end
);
294 unsafe_copy_dirent_name(dirent
->d_name
, name
, namlen
, efault_end
);
295 user_write_access_end();
297 buf
->current_dir
= (void __user
*)dirent
+ reclen
;
298 buf
->prev_reclen
= reclen
;
299 buf
->count
-= reclen
;
302 user_write_access_end();
304 buf
->error
= -EFAULT
;
308 SYSCALL_DEFINE3(getdents
, unsigned int, fd
,
309 struct linux_dirent __user
*, dirent
, unsigned int, count
)
311 CLASS(fd_pos
, f
)(fd
);
312 struct getdents_callback buf
= {
313 .ctx
.actor
= filldir
,
315 .current_dir
= dirent
322 error
= iterate_dir(fd_file(f
), &buf
.ctx
);
325 if (buf
.prev_reclen
) {
326 struct linux_dirent __user
* lastdirent
;
327 lastdirent
= (void __user
*)buf
.current_dir
- buf
.prev_reclen
;
329 if (put_user(buf
.ctx
.pos
, &lastdirent
->d_off
))
332 error
= count
- buf
.count
;
337 struct getdents_callback64
{
338 struct dir_context ctx
;
339 struct linux_dirent64 __user
* current_dir
;
345 static bool filldir64(struct dir_context
*ctx
, const char *name
, int namlen
,
346 loff_t offset
, u64 ino
, unsigned int d_type
)
348 struct linux_dirent64 __user
*dirent
, *prev
;
349 struct getdents_callback64
*buf
=
350 container_of(ctx
, struct getdents_callback64
, ctx
);
351 int reclen
= ALIGN(offsetof(struct linux_dirent64
, d_name
) + namlen
+ 1,
355 buf
->error
= verify_dirent_name(name
, namlen
);
356 if (unlikely(buf
->error
))
358 buf
->error
= -EINVAL
; /* only used if we fail.. */
359 if (reclen
> buf
->count
)
361 prev_reclen
= buf
->prev_reclen
;
362 if (prev_reclen
&& signal_pending(current
))
364 dirent
= buf
->current_dir
;
365 prev
= (void __user
*)dirent
- prev_reclen
;
366 if (!user_write_access_begin(prev
, reclen
+ prev_reclen
))
369 /* This might be 'dirent->d_off', but if so it will get overwritten */
370 unsafe_put_user(offset
, &prev
->d_off
, efault_end
);
371 unsafe_put_user(ino
, &dirent
->d_ino
, efault_end
);
372 unsafe_put_user(reclen
, &dirent
->d_reclen
, efault_end
);
373 unsafe_put_user(d_type
, &dirent
->d_type
, efault_end
);
374 unsafe_copy_dirent_name(dirent
->d_name
, name
, namlen
, efault_end
);
375 user_write_access_end();
377 buf
->prev_reclen
= reclen
;
378 buf
->current_dir
= (void __user
*)dirent
+ reclen
;
379 buf
->count
-= reclen
;
383 user_write_access_end();
385 buf
->error
= -EFAULT
;
389 SYSCALL_DEFINE3(getdents64
, unsigned int, fd
,
390 struct linux_dirent64 __user
*, dirent
, unsigned int, count
)
392 CLASS(fd_pos
, f
)(fd
);
393 struct getdents_callback64 buf
= {
394 .ctx
.actor
= filldir64
,
396 .current_dir
= dirent
403 error
= iterate_dir(fd_file(f
), &buf
.ctx
);
406 if (buf
.prev_reclen
) {
407 struct linux_dirent64 __user
* lastdirent
;
408 typeof(lastdirent
->d_off
) d_off
= buf
.ctx
.pos
;
410 lastdirent
= (void __user
*) buf
.current_dir
- buf
.prev_reclen
;
411 if (put_user(d_off
, &lastdirent
->d_off
))
414 error
= count
- buf
.count
;
420 struct compat_old_linux_dirent
{
421 compat_ulong_t d_ino
;
422 compat_ulong_t d_offset
;
423 unsigned short d_namlen
;
427 struct compat_readdir_callback
{
428 struct dir_context ctx
;
429 struct compat_old_linux_dirent __user
*dirent
;
433 static bool compat_fillonedir(struct dir_context
*ctx
, const char *name
,
434 int namlen
, loff_t offset
, u64 ino
,
437 struct compat_readdir_callback
*buf
=
438 container_of(ctx
, struct compat_readdir_callback
, ctx
);
439 struct compat_old_linux_dirent __user
*dirent
;
440 compat_ulong_t d_ino
;
444 buf
->result
= verify_dirent_name(name
, namlen
);
448 if (sizeof(d_ino
) < sizeof(ino
) && d_ino
!= ino
) {
449 buf
->result
= -EOVERFLOW
;
453 dirent
= buf
->dirent
;
454 if (!user_write_access_begin(dirent
,
455 (unsigned long)(dirent
->d_name
+ namlen
+ 1) -
456 (unsigned long)dirent
))
458 unsafe_put_user(d_ino
, &dirent
->d_ino
, efault_end
);
459 unsafe_put_user(offset
, &dirent
->d_offset
, efault_end
);
460 unsafe_put_user(namlen
, &dirent
->d_namlen
, efault_end
);
461 unsafe_copy_dirent_name(dirent
->d_name
, name
, namlen
, efault_end
);
462 user_write_access_end();
465 user_write_access_end();
467 buf
->result
= -EFAULT
;
471 COMPAT_SYSCALL_DEFINE3(old_readdir
, unsigned int, fd
,
472 struct compat_old_linux_dirent __user
*, dirent
, unsigned int, count
)
475 CLASS(fd_pos
, f
)(fd
);
476 struct compat_readdir_callback buf
= {
477 .ctx
.actor
= compat_fillonedir
,
484 error
= iterate_dir(fd_file(f
), &buf
.ctx
);
491 struct compat_linux_dirent
{
492 compat_ulong_t d_ino
;
493 compat_ulong_t d_off
;
494 unsigned short d_reclen
;
498 struct compat_getdents_callback
{
499 struct dir_context ctx
;
500 struct compat_linux_dirent __user
*current_dir
;
506 static bool compat_filldir(struct dir_context
*ctx
, const char *name
, int namlen
,
507 loff_t offset
, u64 ino
, unsigned int d_type
)
509 struct compat_linux_dirent __user
*dirent
, *prev
;
510 struct compat_getdents_callback
*buf
=
511 container_of(ctx
, struct compat_getdents_callback
, ctx
);
512 compat_ulong_t d_ino
;
513 int reclen
= ALIGN(offsetof(struct compat_linux_dirent
, d_name
) +
514 namlen
+ 2, sizeof(compat_long_t
));
517 buf
->error
= verify_dirent_name(name
, namlen
);
518 if (unlikely(buf
->error
))
520 buf
->error
= -EINVAL
; /* only used if we fail.. */
521 if (reclen
> buf
->count
)
524 if (sizeof(d_ino
) < sizeof(ino
) && d_ino
!= ino
) {
525 buf
->error
= -EOVERFLOW
;
528 prev_reclen
= buf
->prev_reclen
;
529 if (prev_reclen
&& signal_pending(current
))
531 dirent
= buf
->current_dir
;
532 prev
= (void __user
*) dirent
- prev_reclen
;
533 if (!user_write_access_begin(prev
, reclen
+ prev_reclen
))
536 unsafe_put_user(offset
, &prev
->d_off
, efault_end
);
537 unsafe_put_user(d_ino
, &dirent
->d_ino
, efault_end
);
538 unsafe_put_user(reclen
, &dirent
->d_reclen
, efault_end
);
539 unsafe_put_user(d_type
, (char __user
*) dirent
+ reclen
- 1, efault_end
);
540 unsafe_copy_dirent_name(dirent
->d_name
, name
, namlen
, efault_end
);
541 user_write_access_end();
543 buf
->prev_reclen
= reclen
;
544 buf
->current_dir
= (void __user
*)dirent
+ reclen
;
545 buf
->count
-= reclen
;
548 user_write_access_end();
550 buf
->error
= -EFAULT
;
554 COMPAT_SYSCALL_DEFINE3(getdents
, unsigned int, fd
,
555 struct compat_linux_dirent __user
*, dirent
, unsigned int, count
)
557 CLASS(fd_pos
, f
)(fd
);
558 struct compat_getdents_callback buf
= {
559 .ctx
.actor
= compat_filldir
,
560 .current_dir
= dirent
,
568 error
= iterate_dir(fd_file(f
), &buf
.ctx
);
571 if (buf
.prev_reclen
) {
572 struct compat_linux_dirent __user
* lastdirent
;
573 lastdirent
= (void __user
*)buf
.current_dir
- buf
.prev_reclen
;
575 if (put_user(buf
.ctx
.pos
, &lastdirent
->d_off
))
578 error
= count
- buf
.count
;