1 // SPDX-License-Identifier: GPL-2.0
5 * Copyright (C) 1991, 1992 Linus Torvalds
7 * proc base directory handling functions
9 * 1999, Al Viro. Rewritten. Now it covers the whole per-process part.
10 * Instead of using magical inumbers to determine the kind of object
11 * we allocate and fill in-core inodes upon lookup. They don't even
12 * go into icache. We cache the reference to task_struct upon lookup too.
13 * Eventually it should become a filesystem in its own. We don't use the
14 * rest of procfs anymore.
20 * Bruna Moreira <bruna.moreira@indt.org.br>
21 * Edjard Mota <edjard.mota@indt.org.br>
22 * Ilias Biris <ilias.biris@indt.org.br>
23 * Mauricio Lin <mauricio.lin@indt.org.br>
25 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
27 * A new process specific entry (smaps) included in /proc. It shows the
28 * size of rss for each memory area. The maps entry lacks information
29 * about physical memory size (rss) for each mapped file, i.e.,
30 * rss information for executables and library files.
31 * This additional information is useful for any tools that need to know
32 * about physical memory consumption for a process specific library.
36 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
37 * Pud inclusion in the page table walking.
41 * 10LE Instituto Nokia de Tecnologia - INdT:
42 * A better way to walks through the page table as suggested by Hugh Dickins.
44 * Simo Piiroinen <simo.piiroinen@nokia.com>:
45 * Smaps information related to shared, private, clean and dirty pages.
47 * Paul Mundt <paul.mundt@nokia.com>:
48 * Overall revision about smaps.
51 #include <linux/uaccess.h>
53 #include <linux/errno.h>
54 #include <linux/time.h>
55 #include <linux/proc_fs.h>
56 #include <linux/stat.h>
57 #include <linux/task_io_accounting_ops.h>
58 #include <linux/init.h>
59 #include <linux/capability.h>
60 #include <linux/file.h>
61 #include <linux/generic-radix-tree.h>
62 #include <linux/string.h>
63 #include <linux/seq_file.h>
64 #include <linux/namei.h>
65 #include <linux/mnt_namespace.h>
67 #include <linux/swap.h>
68 #include <linux/rcupdate.h>
69 #include <linux/kallsyms.h>
70 #include <linux/stacktrace.h>
71 #include <linux/resource.h>
72 #include <linux/module.h>
73 #include <linux/mount.h>
74 #include <linux/security.h>
75 #include <linux/ptrace.h>
76 #include <linux/printk.h>
77 #include <linux/cache.h>
78 #include <linux/cgroup.h>
79 #include <linux/cpuset.h>
80 #include <linux/audit.h>
81 #include <linux/poll.h>
82 #include <linux/nsproxy.h>
83 #include <linux/oom.h>
84 #include <linux/elf.h>
85 #include <linux/pid_namespace.h>
86 #include <linux/user_namespace.h>
87 #include <linux/fs_parser.h>
88 #include <linux/fs_struct.h>
89 #include <linux/slab.h>
90 #include <linux/sched/autogroup.h>
91 #include <linux/sched/mm.h>
92 #include <linux/sched/coredump.h>
93 #include <linux/sched/debug.h>
94 #include <linux/sched/stat.h>
95 #include <linux/posix-timers.h>
96 #include <linux/time_namespace.h>
97 #include <linux/resctrl.h>
98 #include <linux/cn_proc.h>
99 #include <linux/ksm.h>
100 #include <uapi/linux/lsm.h>
101 #include <trace/events/oom.h>
102 #include "internal.h"
105 #include "../../lib/kstrtox.h"
108 * Implementing inode permission operations in /proc is almost
109 * certainly an error. Permission checks need to happen during
110 * each system call not at open time. The reason is that most of
111 * what we wish to check for permissions in /proc varies at runtime.
113 * The classic example of a problem is opening file descriptors
114 * in /proc for a task before it execs a suid executable.
117 static u8 nlink_tid __ro_after_init
;
118 static u8 nlink_tgid __ro_after_init
;
120 enum proc_mem_force
{
121 PROC_MEM_FORCE_ALWAYS
,
122 PROC_MEM_FORCE_PTRACE
,
126 static enum proc_mem_force proc_mem_force_override __ro_after_init
=
127 IS_ENABLED(CONFIG_PROC_MEM_NO_FORCE
) ? PROC_MEM_FORCE_NEVER
:
128 IS_ENABLED(CONFIG_PROC_MEM_FORCE_PTRACE
) ? PROC_MEM_FORCE_PTRACE
:
129 PROC_MEM_FORCE_ALWAYS
;
131 static const struct constant_table proc_mem_force_table
[] __initconst
= {
132 { "always", PROC_MEM_FORCE_ALWAYS
},
133 { "ptrace", PROC_MEM_FORCE_PTRACE
},
134 { "never", PROC_MEM_FORCE_NEVER
},
138 static int __init
early_proc_mem_force_override(char *buf
)
144 * lookup_constant() defaults to proc_mem_force_override to preseve
145 * the initial Kconfig choice in case an invalid param gets passed.
147 proc_mem_force_override
= lookup_constant(proc_mem_force_table
,
148 buf
, proc_mem_force_override
);
152 early_param("proc_mem.force_override", early_proc_mem_force_override
);
158 const struct inode_operations
*iop
;
159 const struct file_operations
*fop
;
163 #define NOD(NAME, MODE, IOP, FOP, OP) { \
165 .len = sizeof(NAME) - 1, \
172 #define DIR(NAME, MODE, iops, fops) \
173 NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} )
174 #define LNK(NAME, get_link) \
175 NOD(NAME, (S_IFLNK|S_IRWXUGO), \
176 &proc_pid_link_inode_operations, NULL, \
177 { .proc_get_link = get_link } )
178 #define REG(NAME, MODE, fops) \
179 NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {})
180 #define ONE(NAME, MODE, show) \
181 NOD(NAME, (S_IFREG|(MODE)), \
182 NULL, &proc_single_file_operations, \
183 { .proc_show = show } )
184 #define ATTR(LSMID, NAME, MODE) \
185 NOD(NAME, (S_IFREG|(MODE)), \
186 NULL, &proc_pid_attr_operations, \
190 * Count the number of hardlinks for the pid_entry table, excluding the .
193 static unsigned int __init
pid_entry_nlink(const struct pid_entry
*entries
,
200 for (i
= 0; i
< n
; ++i
) {
201 if (S_ISDIR(entries
[i
].mode
))
208 static int get_task_root(struct task_struct
*task
, struct path
*root
)
210 int result
= -ENOENT
;
214 get_fs_root(task
->fs
, root
);
221 static int proc_cwd_link(struct dentry
*dentry
, struct path
*path
)
223 struct task_struct
*task
= get_proc_task(d_inode(dentry
));
224 int result
= -ENOENT
;
229 get_fs_pwd(task
->fs
, path
);
233 put_task_struct(task
);
238 static int proc_root_link(struct dentry
*dentry
, struct path
*path
)
240 struct task_struct
*task
= get_proc_task(d_inode(dentry
));
241 int result
= -ENOENT
;
244 result
= get_task_root(task
, path
);
245 put_task_struct(task
);
251 * If the user used setproctitle(), we just get the string from
252 * user space at arg_start, and limit it to a maximum of one page.
254 static ssize_t
get_mm_proctitle(struct mm_struct
*mm
, char __user
*buf
,
255 size_t count
, unsigned long pos
,
256 unsigned long arg_start
)
261 if (pos
>= PAGE_SIZE
)
264 page
= (char *)__get_free_page(GFP_KERNEL
);
269 got
= access_remote_vm(mm
, arg_start
, page
, PAGE_SIZE
, FOLL_ANON
);
271 int len
= strnlen(page
, got
);
273 /* Include the NUL character if it was found */
281 len
-= copy_to_user(buf
, page
+pos
, len
);
287 free_page((unsigned long)page
);
291 static ssize_t
get_mm_cmdline(struct mm_struct
*mm
, char __user
*buf
,
292 size_t count
, loff_t
*ppos
)
294 unsigned long arg_start
, arg_end
, env_start
, env_end
;
295 unsigned long pos
, len
;
298 /* Check if process spawned far enough to have cmdline. */
302 spin_lock(&mm
->arg_lock
);
303 arg_start
= mm
->arg_start
;
304 arg_end
= mm
->arg_end
;
305 env_start
= mm
->env_start
;
306 env_end
= mm
->env_end
;
307 spin_unlock(&mm
->arg_lock
);
309 if (arg_start
>= arg_end
)
313 * We allow setproctitle() to overwrite the argument
314 * strings, and overflow past the original end. But
315 * only when it overflows into the environment area.
317 if (env_start
!= arg_end
|| env_end
< env_start
)
318 env_start
= env_end
= arg_end
;
319 len
= env_end
- arg_start
;
321 /* We're not going to care if "*ppos" has high bits set */
325 if (count
> len
- pos
)
331 * Magical special case: if the argv[] end byte is not
332 * zero, the user has overwritten it with setproctitle(3).
334 * Possible future enhancement: do this only once when
335 * pos is 0, and set a flag in the 'struct file'.
337 if (access_remote_vm(mm
, arg_end
-1, &c
, 1, FOLL_ANON
) == 1 && c
)
338 return get_mm_proctitle(mm
, buf
, count
, pos
, arg_start
);
341 * For the non-setproctitle() case we limit things strictly
342 * to the [arg_start, arg_end[ range.
345 if (pos
< arg_start
|| pos
>= arg_end
)
347 if (count
> arg_end
- pos
)
348 count
= arg_end
- pos
;
350 page
= (char *)__get_free_page(GFP_KERNEL
);
357 size_t size
= min_t(size_t, PAGE_SIZE
, count
);
359 got
= access_remote_vm(mm
, pos
, page
, size
, FOLL_ANON
);
362 got
-= copy_to_user(buf
, page
, got
);
363 if (unlikely(!got
)) {
374 free_page((unsigned long)page
);
378 static ssize_t
get_task_cmdline(struct task_struct
*tsk
, char __user
*buf
,
379 size_t count
, loff_t
*pos
)
381 struct mm_struct
*mm
;
384 mm
= get_task_mm(tsk
);
388 ret
= get_mm_cmdline(mm
, buf
, count
, pos
);
393 static ssize_t
proc_pid_cmdline_read(struct file
*file
, char __user
*buf
,
394 size_t count
, loff_t
*pos
)
396 struct task_struct
*tsk
;
401 tsk
= get_proc_task(file_inode(file
));
404 ret
= get_task_cmdline(tsk
, buf
, count
, pos
);
405 put_task_struct(tsk
);
411 static const struct file_operations proc_pid_cmdline_ops
= {
412 .read
= proc_pid_cmdline_read
,
413 .llseek
= generic_file_llseek
,
416 #ifdef CONFIG_KALLSYMS
418 * Provides a wchan file via kallsyms in a proper one-value-per-file format.
419 * Returns the resolved symbol. If that fails, simply return the address.
421 static int proc_pid_wchan(struct seq_file
*m
, struct pid_namespace
*ns
,
422 struct pid
*pid
, struct task_struct
*task
)
425 char symname
[KSYM_NAME_LEN
];
427 if (!ptrace_may_access(task
, PTRACE_MODE_READ_FSCREDS
))
430 wchan
= get_wchan(task
);
431 if (wchan
&& !lookup_symbol_name(wchan
, symname
)) {
432 seq_puts(m
, symname
);
440 #endif /* CONFIG_KALLSYMS */
442 static int lock_trace(struct task_struct
*task
)
444 int err
= down_read_killable(&task
->signal
->exec_update_lock
);
447 if (!ptrace_may_access(task
, PTRACE_MODE_ATTACH_FSCREDS
)) {
448 up_read(&task
->signal
->exec_update_lock
);
454 static void unlock_trace(struct task_struct
*task
)
456 up_read(&task
->signal
->exec_update_lock
);
459 #ifdef CONFIG_STACKTRACE
461 #define MAX_STACK_TRACE_DEPTH 64
463 static int proc_pid_stack(struct seq_file
*m
, struct pid_namespace
*ns
,
464 struct pid
*pid
, struct task_struct
*task
)
466 unsigned long *entries
;
470 * The ability to racily run the kernel stack unwinder on a running task
471 * and then observe the unwinder output is scary; while it is useful for
472 * debugging kernel issues, it can also allow an attacker to leak kernel
474 * Doing this in a manner that is at least safe from races would require
475 * some work to ensure that the remote task can not be scheduled; and
476 * even then, this would still expose the unwinder as local attack
478 * Therefore, this interface is restricted to root.
480 if (!file_ns_capable(m
->file
, &init_user_ns
, CAP_SYS_ADMIN
))
483 entries
= kmalloc_array(MAX_STACK_TRACE_DEPTH
, sizeof(*entries
),
488 err
= lock_trace(task
);
490 unsigned int i
, nr_entries
;
492 nr_entries
= stack_trace_save_tsk(task
, entries
,
493 MAX_STACK_TRACE_DEPTH
, 0);
495 for (i
= 0; i
< nr_entries
; i
++) {
496 seq_printf(m
, "[<0>] %pB\n", (void *)entries
[i
]);
507 #ifdef CONFIG_SCHED_INFO
509 * Provides /proc/PID/schedstat
511 static int proc_pid_schedstat(struct seq_file
*m
, struct pid_namespace
*ns
,
512 struct pid
*pid
, struct task_struct
*task
)
514 if (unlikely(!sched_info_on()))
515 seq_puts(m
, "0 0 0\n");
517 seq_printf(m
, "%llu %llu %lu\n",
518 (unsigned long long)task
->se
.sum_exec_runtime
,
519 (unsigned long long)task
->sched_info
.run_delay
,
520 task
->sched_info
.pcount
);
526 #ifdef CONFIG_LATENCYTOP
527 static int lstats_show_proc(struct seq_file
*m
, void *v
)
530 struct inode
*inode
= m
->private;
531 struct task_struct
*task
= get_proc_task(inode
);
535 seq_puts(m
, "Latency Top version : v0.1\n");
536 for (i
= 0; i
< LT_SAVECOUNT
; i
++) {
537 struct latency_record
*lr
= &task
->latency_record
[i
];
538 if (lr
->backtrace
[0]) {
540 seq_printf(m
, "%i %li %li",
541 lr
->count
, lr
->time
, lr
->max
);
542 for (q
= 0; q
< LT_BACKTRACEDEPTH
; q
++) {
543 unsigned long bt
= lr
->backtrace
[q
];
547 seq_printf(m
, " %ps", (void *)bt
);
553 put_task_struct(task
);
557 static int lstats_open(struct inode
*inode
, struct file
*file
)
559 return single_open(file
, lstats_show_proc
, inode
);
562 static ssize_t
lstats_write(struct file
*file
, const char __user
*buf
,
563 size_t count
, loff_t
*offs
)
565 struct task_struct
*task
= get_proc_task(file_inode(file
));
569 clear_tsk_latency_tracing(task
);
570 put_task_struct(task
);
575 static const struct file_operations proc_lstats_operations
= {
578 .write
= lstats_write
,
580 .release
= single_release
,
585 static int proc_oom_score(struct seq_file
*m
, struct pid_namespace
*ns
,
586 struct pid
*pid
, struct task_struct
*task
)
588 unsigned long totalpages
= totalram_pages() + total_swap_pages
;
589 unsigned long points
= 0;
592 badness
= oom_badness(task
, totalpages
);
594 * Special case OOM_SCORE_ADJ_MIN for all others scale the
595 * badness value into [0, 2000] range which we have been
596 * exporting for a long time so userspace might depend on it.
598 if (badness
!= LONG_MIN
)
599 points
= (1000 + badness
* 1000 / (long)totalpages
) * 2 / 3;
601 seq_printf(m
, "%lu\n", points
);
611 static const struct limit_names lnames
[RLIM_NLIMITS
] = {
612 [RLIMIT_CPU
] = {"Max cpu time", "seconds"},
613 [RLIMIT_FSIZE
] = {"Max file size", "bytes"},
614 [RLIMIT_DATA
] = {"Max data size", "bytes"},
615 [RLIMIT_STACK
] = {"Max stack size", "bytes"},
616 [RLIMIT_CORE
] = {"Max core file size", "bytes"},
617 [RLIMIT_RSS
] = {"Max resident set", "bytes"},
618 [RLIMIT_NPROC
] = {"Max processes", "processes"},
619 [RLIMIT_NOFILE
] = {"Max open files", "files"},
620 [RLIMIT_MEMLOCK
] = {"Max locked memory", "bytes"},
621 [RLIMIT_AS
] = {"Max address space", "bytes"},
622 [RLIMIT_LOCKS
] = {"Max file locks", "locks"},
623 [RLIMIT_SIGPENDING
] = {"Max pending signals", "signals"},
624 [RLIMIT_MSGQUEUE
] = {"Max msgqueue size", "bytes"},
625 [RLIMIT_NICE
] = {"Max nice priority", NULL
},
626 [RLIMIT_RTPRIO
] = {"Max realtime priority", NULL
},
627 [RLIMIT_RTTIME
] = {"Max realtime timeout", "us"},
630 /* Display limits for a process */
631 static int proc_pid_limits(struct seq_file
*m
, struct pid_namespace
*ns
,
632 struct pid
*pid
, struct task_struct
*task
)
637 struct rlimit rlim
[RLIM_NLIMITS
];
639 if (!lock_task_sighand(task
, &flags
))
641 memcpy(rlim
, task
->signal
->rlim
, sizeof(struct rlimit
) * RLIM_NLIMITS
);
642 unlock_task_sighand(task
, &flags
);
645 * print the file header
652 for (i
= 0; i
< RLIM_NLIMITS
; i
++) {
653 if (rlim
[i
].rlim_cur
== RLIM_INFINITY
)
654 seq_printf(m
, "%-25s %-20s ",
655 lnames
[i
].name
, "unlimited");
657 seq_printf(m
, "%-25s %-20lu ",
658 lnames
[i
].name
, rlim
[i
].rlim_cur
);
660 if (rlim
[i
].rlim_max
== RLIM_INFINITY
)
661 seq_printf(m
, "%-20s ", "unlimited");
663 seq_printf(m
, "%-20lu ", rlim
[i
].rlim_max
);
666 seq_printf(m
, "%-10s\n", lnames
[i
].unit
);
674 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
675 static int proc_pid_syscall(struct seq_file
*m
, struct pid_namespace
*ns
,
676 struct pid
*pid
, struct task_struct
*task
)
678 struct syscall_info info
;
679 u64
*args
= &info
.data
.args
[0];
682 res
= lock_trace(task
);
686 if (task_current_syscall(task
, &info
))
687 seq_puts(m
, "running\n");
688 else if (info
.data
.nr
< 0)
689 seq_printf(m
, "%d 0x%llx 0x%llx\n",
690 info
.data
.nr
, info
.sp
, info
.data
.instruction_pointer
);
693 "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n",
695 args
[0], args
[1], args
[2], args
[3], args
[4], args
[5],
696 info
.sp
, info
.data
.instruction_pointer
);
701 #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
703 /************************************************************************/
704 /* Here the fs part begins */
705 /************************************************************************/
707 /* permission checks */
708 static bool proc_fd_access_allowed(struct inode
*inode
)
710 struct task_struct
*task
;
711 bool allowed
= false;
712 /* Allow access to a task's file descriptors if it is us or we
713 * may use ptrace attach to the process and find out that
716 task
= get_proc_task(inode
);
718 allowed
= ptrace_may_access(task
, PTRACE_MODE_READ_FSCREDS
);
719 put_task_struct(task
);
724 int proc_setattr(struct mnt_idmap
*idmap
, struct dentry
*dentry
,
728 struct inode
*inode
= d_inode(dentry
);
730 if (attr
->ia_valid
& ATTR_MODE
)
733 error
= setattr_prepare(&nop_mnt_idmap
, dentry
, attr
);
737 setattr_copy(&nop_mnt_idmap
, inode
, attr
);
742 * May current process learn task's sched/cmdline info (for hide_pid_min=1)
743 * or euid/egid (for hide_pid_min=2)?
745 static bool has_pid_permissions(struct proc_fs_info
*fs_info
,
746 struct task_struct
*task
,
747 enum proc_hidepid hide_pid_min
)
750 * If 'hidpid' mount option is set force a ptrace check,
751 * we indicate that we are using a filesystem syscall
752 * by passing PTRACE_MODE_READ_FSCREDS
754 if (fs_info
->hide_pid
== HIDEPID_NOT_PTRACEABLE
)
755 return ptrace_may_access(task
, PTRACE_MODE_READ_FSCREDS
);
757 if (fs_info
->hide_pid
< hide_pid_min
)
759 if (in_group_p(fs_info
->pid_gid
))
761 return ptrace_may_access(task
, PTRACE_MODE_READ_FSCREDS
);
765 static int proc_pid_permission(struct mnt_idmap
*idmap
,
766 struct inode
*inode
, int mask
)
768 struct proc_fs_info
*fs_info
= proc_sb_info(inode
->i_sb
);
769 struct task_struct
*task
;
772 task
= get_proc_task(inode
);
775 has_perms
= has_pid_permissions(fs_info
, task
, HIDEPID_NO_ACCESS
);
776 put_task_struct(task
);
779 if (fs_info
->hide_pid
== HIDEPID_INVISIBLE
) {
781 * Let's make getdents(), stat(), and open()
782 * consistent with each other. If a process
783 * may not stat() a file, it shouldn't be seen
791 return generic_permission(&nop_mnt_idmap
, inode
, mask
);
796 static const struct inode_operations proc_def_inode_operations
= {
797 .setattr
= proc_setattr
,
800 static int proc_single_show(struct seq_file
*m
, void *v
)
802 struct inode
*inode
= m
->private;
803 struct pid_namespace
*ns
= proc_pid_ns(inode
->i_sb
);
804 struct pid
*pid
= proc_pid(inode
);
805 struct task_struct
*task
;
808 task
= get_pid_task(pid
, PIDTYPE_PID
);
812 ret
= PROC_I(inode
)->op
.proc_show(m
, ns
, pid
, task
);
814 put_task_struct(task
);
818 static int proc_single_open(struct inode
*inode
, struct file
*filp
)
820 return single_open(filp
, proc_single_show
, inode
);
823 static const struct file_operations proc_single_file_operations
= {
824 .open
= proc_single_open
,
827 .release
= single_release
,
831 struct mm_struct
*proc_mem_open(struct inode
*inode
, unsigned int mode
)
833 struct task_struct
*task
= get_proc_task(inode
);
834 struct mm_struct
*mm
;
837 return ERR_PTR(-ESRCH
);
839 mm
= mm_access(task
, mode
| PTRACE_MODE_FSCREDS
);
840 put_task_struct(task
);
843 return mm
== ERR_PTR(-ESRCH
) ? NULL
: mm
;
845 /* ensure this mm_struct can't be freed */
847 /* but do not pin its memory */
853 static int __mem_open(struct inode
*inode
, struct file
*file
, unsigned int mode
)
855 struct mm_struct
*mm
= proc_mem_open(inode
, mode
);
860 file
->private_data
= mm
;
864 static int mem_open(struct inode
*inode
, struct file
*file
)
866 if (WARN_ON_ONCE(!(file
->f_op
->fop_flags
& FOP_UNSIGNED_OFFSET
)))
868 return __mem_open(inode
, file
, PTRACE_MODE_ATTACH
);
871 static bool proc_mem_foll_force(struct file
*file
, struct mm_struct
*mm
)
873 struct task_struct
*task
;
874 bool ptrace_active
= false;
876 switch (proc_mem_force_override
) {
877 case PROC_MEM_FORCE_NEVER
:
879 case PROC_MEM_FORCE_PTRACE
:
880 task
= get_proc_task(file_inode(file
));
882 ptrace_active
= READ_ONCE(task
->ptrace
) &&
883 READ_ONCE(task
->mm
) == mm
&&
884 READ_ONCE(task
->parent
) == current
;
885 put_task_struct(task
);
887 return ptrace_active
;
893 static ssize_t
mem_rw(struct file
*file
, char __user
*buf
,
894 size_t count
, loff_t
*ppos
, int write
)
896 struct mm_struct
*mm
= file
->private_data
;
897 unsigned long addr
= *ppos
;
905 page
= (char *)__get_free_page(GFP_KERNEL
);
910 if (!mmget_not_zero(mm
))
913 flags
= write
? FOLL_WRITE
: 0;
914 if (proc_mem_foll_force(file
, mm
))
918 size_t this_len
= min_t(size_t, count
, PAGE_SIZE
);
920 if (write
&& copy_from_user(page
, buf
, this_len
)) {
925 this_len
= access_remote_vm(mm
, addr
, page
, this_len
, flags
);
932 if (!write
&& copy_to_user(buf
, page
, this_len
)) {
946 free_page((unsigned long) page
);
950 static ssize_t
mem_read(struct file
*file
, char __user
*buf
,
951 size_t count
, loff_t
*ppos
)
953 return mem_rw(file
, buf
, count
, ppos
, 0);
956 static ssize_t
mem_write(struct file
*file
, const char __user
*buf
,
957 size_t count
, loff_t
*ppos
)
959 return mem_rw(file
, (char __user
*)buf
, count
, ppos
, 1);
962 loff_t
mem_lseek(struct file
*file
, loff_t offset
, int orig
)
966 file
->f_pos
= offset
;
969 file
->f_pos
+= offset
;
974 force_successful_syscall_return();
978 static int mem_release(struct inode
*inode
, struct file
*file
)
980 struct mm_struct
*mm
= file
->private_data
;
986 static const struct file_operations proc_mem_operations
= {
991 .release
= mem_release
,
992 .fop_flags
= FOP_UNSIGNED_OFFSET
,
995 static int environ_open(struct inode
*inode
, struct file
*file
)
997 return __mem_open(inode
, file
, PTRACE_MODE_READ
);
1000 static ssize_t
environ_read(struct file
*file
, char __user
*buf
,
1001 size_t count
, loff_t
*ppos
)
1004 unsigned long src
= *ppos
;
1006 struct mm_struct
*mm
= file
->private_data
;
1007 unsigned long env_start
, env_end
;
1009 /* Ensure the process spawned far enough to have an environment. */
1010 if (!mm
|| !mm
->env_end
)
1013 page
= (char *)__get_free_page(GFP_KERNEL
);
1018 if (!mmget_not_zero(mm
))
1021 spin_lock(&mm
->arg_lock
);
1022 env_start
= mm
->env_start
;
1023 env_end
= mm
->env_end
;
1024 spin_unlock(&mm
->arg_lock
);
1027 size_t this_len
, max_len
;
1030 if (src
>= (env_end
- env_start
))
1033 this_len
= env_end
- (env_start
+ src
);
1035 max_len
= min_t(size_t, PAGE_SIZE
, count
);
1036 this_len
= min(max_len
, this_len
);
1038 retval
= access_remote_vm(mm
, (env_start
+ src
), page
, this_len
, FOLL_ANON
);
1045 if (copy_to_user(buf
, page
, retval
)) {
1059 free_page((unsigned long) page
);
1063 static const struct file_operations proc_environ_operations
= {
1064 .open
= environ_open
,
1065 .read
= environ_read
,
1066 .llseek
= generic_file_llseek
,
1067 .release
= mem_release
,
1070 static int auxv_open(struct inode
*inode
, struct file
*file
)
1072 return __mem_open(inode
, file
, PTRACE_MODE_READ_FSCREDS
);
1075 static ssize_t
auxv_read(struct file
*file
, char __user
*buf
,
1076 size_t count
, loff_t
*ppos
)
1078 struct mm_struct
*mm
= file
->private_data
;
1079 unsigned int nwords
= 0;
1085 } while (mm
->saved_auxv
[nwords
- 2] != 0); /* AT_NULL */
1086 return simple_read_from_buffer(buf
, count
, ppos
, mm
->saved_auxv
,
1087 nwords
* sizeof(mm
->saved_auxv
[0]));
1090 static const struct file_operations proc_auxv_operations
= {
1093 .llseek
= generic_file_llseek
,
1094 .release
= mem_release
,
1097 static ssize_t
oom_adj_read(struct file
*file
, char __user
*buf
, size_t count
,
1100 struct task_struct
*task
= get_proc_task(file_inode(file
));
1101 char buffer
[PROC_NUMBUF
];
1102 int oom_adj
= OOM_ADJUST_MIN
;
1107 if (task
->signal
->oom_score_adj
== OOM_SCORE_ADJ_MAX
)
1108 oom_adj
= OOM_ADJUST_MAX
;
1110 oom_adj
= (task
->signal
->oom_score_adj
* -OOM_DISABLE
) /
1112 put_task_struct(task
);
1113 if (oom_adj
> OOM_ADJUST_MAX
)
1114 oom_adj
= OOM_ADJUST_MAX
;
1115 len
= snprintf(buffer
, sizeof(buffer
), "%d\n", oom_adj
);
1116 return simple_read_from_buffer(buf
, count
, ppos
, buffer
, len
);
1119 static int __set_oom_adj(struct file
*file
, int oom_adj
, bool legacy
)
1121 struct mm_struct
*mm
= NULL
;
1122 struct task_struct
*task
;
1125 task
= get_proc_task(file_inode(file
));
1129 mutex_lock(&oom_adj_mutex
);
1131 if (oom_adj
< task
->signal
->oom_score_adj
&&
1132 !capable(CAP_SYS_RESOURCE
)) {
1137 * /proc/pid/oom_adj is provided for legacy purposes, ask users to use
1138 * /proc/pid/oom_score_adj instead.
1140 pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n",
1141 current
->comm
, task_pid_nr(current
), task_pid_nr(task
),
1144 if ((short)oom_adj
< task
->signal
->oom_score_adj_min
&&
1145 !capable(CAP_SYS_RESOURCE
)) {
1152 * Make sure we will check other processes sharing the mm if this is
1153 * not vfrok which wants its own oom_score_adj.
1154 * pin the mm so it doesn't go away and get reused after task_unlock
1156 if (!task
->vfork_done
) {
1157 struct task_struct
*p
= find_lock_task_mm(task
);
1160 if (test_bit(MMF_MULTIPROCESS
, &p
->mm
->flags
)) {
1168 task
->signal
->oom_score_adj
= oom_adj
;
1169 if (!legacy
&& has_capability_noaudit(current
, CAP_SYS_RESOURCE
))
1170 task
->signal
->oom_score_adj_min
= (short)oom_adj
;
1171 trace_oom_score_adj_update(task
);
1174 struct task_struct
*p
;
1177 for_each_process(p
) {
1178 if (same_thread_group(task
, p
))
1181 /* do not touch kernel threads or the global init */
1182 if (p
->flags
& PF_KTHREAD
|| is_global_init(p
))
1186 if (!p
->vfork_done
&& process_shares_mm(p
, mm
)) {
1187 p
->signal
->oom_score_adj
= oom_adj
;
1188 if (!legacy
&& has_capability_noaudit(current
, CAP_SYS_RESOURCE
))
1189 p
->signal
->oom_score_adj_min
= (short)oom_adj
;
1197 mutex_unlock(&oom_adj_mutex
);
1198 put_task_struct(task
);
1203 * /proc/pid/oom_adj exists solely for backwards compatibility with previous
1204 * kernels. The effective policy is defined by oom_score_adj, which has a
1205 * different scale: oom_adj grew exponentially and oom_score_adj grows linearly.
1206 * Values written to oom_adj are simply mapped linearly to oom_score_adj.
1207 * Processes that become oom disabled via oom_adj will still be oom disabled
1208 * with this implementation.
1210 * oom_adj cannot be removed since existing userspace binaries use it.
1212 static ssize_t
oom_adj_write(struct file
*file
, const char __user
*buf
,
1213 size_t count
, loff_t
*ppos
)
1215 char buffer
[PROC_NUMBUF
] = {};
1219 if (count
> sizeof(buffer
) - 1)
1220 count
= sizeof(buffer
) - 1;
1221 if (copy_from_user(buffer
, buf
, count
)) {
1226 err
= kstrtoint(strstrip(buffer
), 0, &oom_adj
);
1229 if ((oom_adj
< OOM_ADJUST_MIN
|| oom_adj
> OOM_ADJUST_MAX
) &&
1230 oom_adj
!= OOM_DISABLE
) {
1236 * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum
1237 * value is always attainable.
1239 if (oom_adj
== OOM_ADJUST_MAX
)
1240 oom_adj
= OOM_SCORE_ADJ_MAX
;
1242 oom_adj
= (oom_adj
* OOM_SCORE_ADJ_MAX
) / -OOM_DISABLE
;
1244 err
= __set_oom_adj(file
, oom_adj
, true);
1246 return err
< 0 ? err
: count
;
1249 static const struct file_operations proc_oom_adj_operations
= {
1250 .read
= oom_adj_read
,
1251 .write
= oom_adj_write
,
1252 .llseek
= generic_file_llseek
,
1255 static ssize_t
oom_score_adj_read(struct file
*file
, char __user
*buf
,
1256 size_t count
, loff_t
*ppos
)
1258 struct task_struct
*task
= get_proc_task(file_inode(file
));
1259 char buffer
[PROC_NUMBUF
];
1260 short oom_score_adj
= OOM_SCORE_ADJ_MIN
;
1265 oom_score_adj
= task
->signal
->oom_score_adj
;
1266 put_task_struct(task
);
1267 len
= snprintf(buffer
, sizeof(buffer
), "%hd\n", oom_score_adj
);
1268 return simple_read_from_buffer(buf
, count
, ppos
, buffer
, len
);
1271 static ssize_t
oom_score_adj_write(struct file
*file
, const char __user
*buf
,
1272 size_t count
, loff_t
*ppos
)
1274 char buffer
[PROC_NUMBUF
] = {};
1278 if (count
> sizeof(buffer
) - 1)
1279 count
= sizeof(buffer
) - 1;
1280 if (copy_from_user(buffer
, buf
, count
)) {
1285 err
= kstrtoint(strstrip(buffer
), 0, &oom_score_adj
);
1288 if (oom_score_adj
< OOM_SCORE_ADJ_MIN
||
1289 oom_score_adj
> OOM_SCORE_ADJ_MAX
) {
1294 err
= __set_oom_adj(file
, oom_score_adj
, false);
1296 return err
< 0 ? err
: count
;
1299 static const struct file_operations proc_oom_score_adj_operations
= {
1300 .read
= oom_score_adj_read
,
1301 .write
= oom_score_adj_write
,
1302 .llseek
= default_llseek
,
1306 #define TMPBUFLEN 11
1307 static ssize_t
proc_loginuid_read(struct file
* file
, char __user
* buf
,
1308 size_t count
, loff_t
*ppos
)
1310 struct inode
* inode
= file_inode(file
);
1311 struct task_struct
*task
= get_proc_task(inode
);
1313 char tmpbuf
[TMPBUFLEN
];
1317 length
= scnprintf(tmpbuf
, TMPBUFLEN
, "%u",
1318 from_kuid(file
->f_cred
->user_ns
,
1319 audit_get_loginuid(task
)));
1320 put_task_struct(task
);
1321 return simple_read_from_buffer(buf
, count
, ppos
, tmpbuf
, length
);
1324 static ssize_t
proc_loginuid_write(struct file
* file
, const char __user
* buf
,
1325 size_t count
, loff_t
*ppos
)
1327 struct inode
* inode
= file_inode(file
);
1332 /* Don't let kthreads write their own loginuid */
1333 if (current
->flags
& PF_KTHREAD
)
1337 if (current
!= pid_task(proc_pid(inode
), PIDTYPE_PID
)) {
1344 /* No partial writes. */
1348 rv
= kstrtou32_from_user(buf
, count
, 10, &loginuid
);
1352 /* is userspace tring to explicitly UNSET the loginuid? */
1353 if (loginuid
== AUDIT_UID_UNSET
) {
1354 kloginuid
= INVALID_UID
;
1356 kloginuid
= make_kuid(file
->f_cred
->user_ns
, loginuid
);
1357 if (!uid_valid(kloginuid
))
1361 rv
= audit_set_loginuid(kloginuid
);
1367 static const struct file_operations proc_loginuid_operations
= {
1368 .read
= proc_loginuid_read
,
1369 .write
= proc_loginuid_write
,
1370 .llseek
= generic_file_llseek
,
1373 static ssize_t
proc_sessionid_read(struct file
* file
, char __user
* buf
,
1374 size_t count
, loff_t
*ppos
)
1376 struct inode
* inode
= file_inode(file
);
1377 struct task_struct
*task
= get_proc_task(inode
);
1379 char tmpbuf
[TMPBUFLEN
];
1383 length
= scnprintf(tmpbuf
, TMPBUFLEN
, "%u",
1384 audit_get_sessionid(task
));
1385 put_task_struct(task
);
1386 return simple_read_from_buffer(buf
, count
, ppos
, tmpbuf
, length
);
1389 static const struct file_operations proc_sessionid_operations
= {
1390 .read
= proc_sessionid_read
,
1391 .llseek
= generic_file_llseek
,
1395 #ifdef CONFIG_FAULT_INJECTION
1396 static ssize_t
proc_fault_inject_read(struct file
* file
, char __user
* buf
,
1397 size_t count
, loff_t
*ppos
)
1399 struct task_struct
*task
= get_proc_task(file_inode(file
));
1400 char buffer
[PROC_NUMBUF
];
1406 make_it_fail
= task
->make_it_fail
;
1407 put_task_struct(task
);
1409 len
= snprintf(buffer
, sizeof(buffer
), "%i\n", make_it_fail
);
1411 return simple_read_from_buffer(buf
, count
, ppos
, buffer
, len
);
1414 static ssize_t
proc_fault_inject_write(struct file
* file
,
1415 const char __user
* buf
, size_t count
, loff_t
*ppos
)
1417 struct task_struct
*task
;
1418 char buffer
[PROC_NUMBUF
] = {};
1422 if (!capable(CAP_SYS_RESOURCE
))
1425 if (count
> sizeof(buffer
) - 1)
1426 count
= sizeof(buffer
) - 1;
1427 if (copy_from_user(buffer
, buf
, count
))
1429 rv
= kstrtoint(strstrip(buffer
), 0, &make_it_fail
);
1432 if (make_it_fail
< 0 || make_it_fail
> 1)
1435 task
= get_proc_task(file_inode(file
));
1438 task
->make_it_fail
= make_it_fail
;
1439 put_task_struct(task
);
1444 static const struct file_operations proc_fault_inject_operations
= {
1445 .read
= proc_fault_inject_read
,
1446 .write
= proc_fault_inject_write
,
1447 .llseek
= generic_file_llseek
,
1450 static ssize_t
proc_fail_nth_write(struct file
*file
, const char __user
*buf
,
1451 size_t count
, loff_t
*ppos
)
1453 struct task_struct
*task
;
1457 err
= kstrtouint_from_user(buf
, count
, 0, &n
);
1461 task
= get_proc_task(file_inode(file
));
1465 put_task_struct(task
);
1470 static ssize_t
proc_fail_nth_read(struct file
*file
, char __user
*buf
,
1471 size_t count
, loff_t
*ppos
)
1473 struct task_struct
*task
;
1474 char numbuf
[PROC_NUMBUF
];
1477 task
= get_proc_task(file_inode(file
));
1480 len
= snprintf(numbuf
, sizeof(numbuf
), "%u\n", task
->fail_nth
);
1481 put_task_struct(task
);
1482 return simple_read_from_buffer(buf
, count
, ppos
, numbuf
, len
);
1485 static const struct file_operations proc_fail_nth_operations
= {
1486 .read
= proc_fail_nth_read
,
1487 .write
= proc_fail_nth_write
,
1492 #ifdef CONFIG_SCHED_DEBUG
1494 * Print out various scheduling related per-task fields:
1496 static int sched_show(struct seq_file
*m
, void *v
)
1498 struct inode
*inode
= m
->private;
1499 struct pid_namespace
*ns
= proc_pid_ns(inode
->i_sb
);
1500 struct task_struct
*p
;
1502 p
= get_proc_task(inode
);
1505 proc_sched_show_task(p
, ns
, m
);
1513 sched_write(struct file
*file
, const char __user
*buf
,
1514 size_t count
, loff_t
*offset
)
1516 struct inode
*inode
= file_inode(file
);
1517 struct task_struct
*p
;
1519 p
= get_proc_task(inode
);
1522 proc_sched_set_task(p
);
1529 static int sched_open(struct inode
*inode
, struct file
*filp
)
1531 return single_open(filp
, sched_show
, inode
);
1534 static const struct file_operations proc_pid_sched_operations
= {
1537 .write
= sched_write
,
1538 .llseek
= seq_lseek
,
1539 .release
= single_release
,
1544 #ifdef CONFIG_SCHED_AUTOGROUP
1546 * Print out autogroup related information:
1548 static int sched_autogroup_show(struct seq_file
*m
, void *v
)
1550 struct inode
*inode
= m
->private;
1551 struct task_struct
*p
;
1553 p
= get_proc_task(inode
);
1556 proc_sched_autogroup_show_task(p
, m
);
1564 sched_autogroup_write(struct file
*file
, const char __user
*buf
,
1565 size_t count
, loff_t
*offset
)
1567 struct inode
*inode
= file_inode(file
);
1568 struct task_struct
*p
;
1569 char buffer
[PROC_NUMBUF
] = {};
1573 if (count
> sizeof(buffer
) - 1)
1574 count
= sizeof(buffer
) - 1;
1575 if (copy_from_user(buffer
, buf
, count
))
1578 err
= kstrtoint(strstrip(buffer
), 0, &nice
);
1582 p
= get_proc_task(inode
);
1586 err
= proc_sched_autogroup_set_nice(p
, nice
);
1595 static int sched_autogroup_open(struct inode
*inode
, struct file
*filp
)
1599 ret
= single_open(filp
, sched_autogroup_show
, NULL
);
1601 struct seq_file
*m
= filp
->private_data
;
1608 static const struct file_operations proc_pid_sched_autogroup_operations
= {
1609 .open
= sched_autogroup_open
,
1611 .write
= sched_autogroup_write
,
1612 .llseek
= seq_lseek
,
1613 .release
= single_release
,
1616 #endif /* CONFIG_SCHED_AUTOGROUP */
1618 #ifdef CONFIG_TIME_NS
1619 static int timens_offsets_show(struct seq_file
*m
, void *v
)
1621 struct task_struct
*p
;
1623 p
= get_proc_task(file_inode(m
->file
));
1626 proc_timens_show_offsets(p
, m
);
1633 static ssize_t
timens_offsets_write(struct file
*file
, const char __user
*buf
,
1634 size_t count
, loff_t
*ppos
)
1636 struct inode
*inode
= file_inode(file
);
1637 struct proc_timens_offset offsets
[2];
1638 char *kbuf
= NULL
, *pos
, *next_line
;
1639 struct task_struct
*p
;
1642 /* Only allow < page size writes at the beginning of the file */
1643 if ((*ppos
!= 0) || (count
>= PAGE_SIZE
))
1646 /* Slurp in the user data */
1647 kbuf
= memdup_user_nul(buf
, count
);
1649 return PTR_ERR(kbuf
);
1651 /* Parse the user data */
1654 for (pos
= kbuf
; pos
; pos
= next_line
) {
1655 struct proc_timens_offset
*off
= &offsets
[noffsets
];
1659 /* Find the end of line and ensure we don't look past it */
1660 next_line
= strchr(pos
, '\n');
1664 if (*next_line
== '\0')
1668 err
= sscanf(pos
, "%9s %lld %lu", clock
,
1669 &off
->val
.tv_sec
, &off
->val
.tv_nsec
);
1670 if (err
!= 3 || off
->val
.tv_nsec
>= NSEC_PER_SEC
)
1673 clock
[sizeof(clock
) - 1] = 0;
1674 if (strcmp(clock
, "monotonic") == 0 ||
1675 strcmp(clock
, __stringify(CLOCK_MONOTONIC
)) == 0)
1676 off
->clockid
= CLOCK_MONOTONIC
;
1677 else if (strcmp(clock
, "boottime") == 0 ||
1678 strcmp(clock
, __stringify(CLOCK_BOOTTIME
)) == 0)
1679 off
->clockid
= CLOCK_BOOTTIME
;
1684 if (noffsets
== ARRAY_SIZE(offsets
)) {
1686 count
= next_line
- kbuf
;
1692 p
= get_proc_task(inode
);
1695 ret
= proc_timens_set_offset(file
, p
, offsets
, noffsets
);
1706 static int timens_offsets_open(struct inode
*inode
, struct file
*filp
)
1708 return single_open(filp
, timens_offsets_show
, inode
);
1711 static const struct file_operations proc_timens_offsets_operations
= {
1712 .open
= timens_offsets_open
,
1714 .write
= timens_offsets_write
,
1715 .llseek
= seq_lseek
,
1716 .release
= single_release
,
1718 #endif /* CONFIG_TIME_NS */
1720 static ssize_t
comm_write(struct file
*file
, const char __user
*buf
,
1721 size_t count
, loff_t
*offset
)
1723 struct inode
*inode
= file_inode(file
);
1724 struct task_struct
*p
;
1725 char buffer
[TASK_COMM_LEN
] = {};
1726 const size_t maxlen
= sizeof(buffer
) - 1;
1728 if (copy_from_user(buffer
, buf
, count
> maxlen
? maxlen
: count
))
1731 p
= get_proc_task(inode
);
1735 if (same_thread_group(current
, p
)) {
1736 set_task_comm(p
, buffer
);
1737 proc_comm_connector(p
);
1747 static int comm_show(struct seq_file
*m
, void *v
)
1749 struct inode
*inode
= m
->private;
1750 struct task_struct
*p
;
1752 p
= get_proc_task(inode
);
1756 proc_task_name(m
, p
, false);
1764 static int comm_open(struct inode
*inode
, struct file
*filp
)
1766 return single_open(filp
, comm_show
, inode
);
1769 static const struct file_operations proc_pid_set_comm_operations
= {
1772 .write
= comm_write
,
1773 .llseek
= seq_lseek
,
1774 .release
= single_release
,
1777 static int proc_exe_link(struct dentry
*dentry
, struct path
*exe_path
)
1779 struct task_struct
*task
;
1780 struct file
*exe_file
;
1782 task
= get_proc_task(d_inode(dentry
));
1785 exe_file
= get_task_exe_file(task
);
1786 put_task_struct(task
);
1788 *exe_path
= exe_file
->f_path
;
1789 path_get(&exe_file
->f_path
);
1796 static const char *proc_pid_get_link(struct dentry
*dentry
,
1797 struct inode
*inode
,
1798 struct delayed_call
*done
)
1801 int error
= -EACCES
;
1804 return ERR_PTR(-ECHILD
);
1806 /* Are we allowed to snoop on the tasks file descriptors? */
1807 if (!proc_fd_access_allowed(inode
))
1810 error
= PROC_I(inode
)->op
.proc_get_link(dentry
, &path
);
1814 error
= nd_jump_link(&path
);
1816 return ERR_PTR(error
);
1819 static int do_proc_readlink(const struct path
*path
, char __user
*buffer
, int buflen
)
1821 char *tmp
= kmalloc(PATH_MAX
, GFP_KERNEL
);
1828 pathname
= d_path(path
, tmp
, PATH_MAX
);
1829 len
= PTR_ERR(pathname
);
1830 if (IS_ERR(pathname
))
1832 len
= tmp
+ PATH_MAX
- 1 - pathname
;
1836 if (copy_to_user(buffer
, pathname
, len
))
1843 static int proc_pid_readlink(struct dentry
* dentry
, char __user
* buffer
, int buflen
)
1845 int error
= -EACCES
;
1846 struct inode
*inode
= d_inode(dentry
);
1849 /* Are we allowed to snoop on the tasks file descriptors? */
1850 if (!proc_fd_access_allowed(inode
))
1853 error
= PROC_I(inode
)->op
.proc_get_link(dentry
, &path
);
1857 error
= do_proc_readlink(&path
, buffer
, buflen
);
1863 const struct inode_operations proc_pid_link_inode_operations
= {
1864 .readlink
= proc_pid_readlink
,
1865 .get_link
= proc_pid_get_link
,
1866 .setattr
= proc_setattr
,
1870 /* building an inode */
1872 void task_dump_owner(struct task_struct
*task
, umode_t mode
,
1873 kuid_t
*ruid
, kgid_t
*rgid
)
1875 /* Depending on the state of dumpable compute who should own a
1876 * proc file for a task.
1878 const struct cred
*cred
;
1882 if (unlikely(task
->flags
& PF_KTHREAD
)) {
1883 *ruid
= GLOBAL_ROOT_UID
;
1884 *rgid
= GLOBAL_ROOT_GID
;
1888 /* Default to the tasks effective ownership */
1890 cred
= __task_cred(task
);
1896 * Before the /proc/pid/status file was created the only way to read
1897 * the effective uid of a /process was to stat /proc/pid. Reading
1898 * /proc/pid/status is slow enough that procps and other packages
1899 * kept stating /proc/pid. To keep the rules in /proc simple I have
1900 * made this apply to all per process world readable and executable
1903 if (mode
!= (S_IFDIR
|S_IRUGO
|S_IXUGO
)) {
1904 struct mm_struct
*mm
;
1907 /* Make non-dumpable tasks owned by some root */
1909 if (get_dumpable(mm
) != SUID_DUMP_USER
) {
1910 struct user_namespace
*user_ns
= mm
->user_ns
;
1912 uid
= make_kuid(user_ns
, 0);
1913 if (!uid_valid(uid
))
1914 uid
= GLOBAL_ROOT_UID
;
1916 gid
= make_kgid(user_ns
, 0);
1917 if (!gid_valid(gid
))
1918 gid
= GLOBAL_ROOT_GID
;
1921 uid
= GLOBAL_ROOT_UID
;
1922 gid
= GLOBAL_ROOT_GID
;
1930 void proc_pid_evict_inode(struct proc_inode
*ei
)
1932 struct pid
*pid
= ei
->pid
;
1934 if (S_ISDIR(ei
->vfs_inode
.i_mode
)) {
1935 spin_lock(&pid
->lock
);
1936 hlist_del_init_rcu(&ei
->sibling_inodes
);
1937 spin_unlock(&pid
->lock
);
1941 struct inode
*proc_pid_make_inode(struct super_block
*sb
,
1942 struct task_struct
*task
, umode_t mode
)
1944 struct inode
* inode
;
1945 struct proc_inode
*ei
;
1948 /* We need a new inode */
1950 inode
= new_inode(sb
);
1956 inode
->i_mode
= mode
;
1957 inode
->i_ino
= get_next_ino();
1958 simple_inode_init_ts(inode
);
1959 inode
->i_op
= &proc_def_inode_operations
;
1962 * grab the reference to task.
1964 pid
= get_task_pid(task
, PIDTYPE_PID
);
1968 /* Let the pid remember us for quick removal */
1971 task_dump_owner(task
, 0, &inode
->i_uid
, &inode
->i_gid
);
1972 security_task_to_inode(task
, inode
);
1983 * Generating an inode and adding it into @pid->inodes, so that task will
1984 * invalidate inode's dentry before being released.
1986 * This helper is used for creating dir-type entries under '/proc' and
1987 * '/proc/<tgid>/task'. Other entries(eg. fd, stat) under '/proc/<tgid>'
1988 * can be released by invalidating '/proc/<tgid>' dentry.
1989 * In theory, dentries under '/proc/<tgid>/task' can also be released by
1990 * invalidating '/proc/<tgid>' dentry, we reserve it to handle single
1991 * thread exiting situation: Any one of threads should invalidate its
1992 * '/proc/<tgid>/task/<pid>' dentry before released.
1994 static struct inode
*proc_pid_make_base_inode(struct super_block
*sb
,
1995 struct task_struct
*task
, umode_t mode
)
1997 struct inode
*inode
;
1998 struct proc_inode
*ei
;
2001 inode
= proc_pid_make_inode(sb
, task
, mode
);
2005 /* Let proc_flush_pid find this directory inode */
2008 spin_lock(&pid
->lock
);
2009 hlist_add_head_rcu(&ei
->sibling_inodes
, &pid
->inodes
);
2010 spin_unlock(&pid
->lock
);
2015 int pid_getattr(struct mnt_idmap
*idmap
, const struct path
*path
,
2016 struct kstat
*stat
, u32 request_mask
, unsigned int query_flags
)
2018 struct inode
*inode
= d_inode(path
->dentry
);
2019 struct proc_fs_info
*fs_info
= proc_sb_info(inode
->i_sb
);
2020 struct task_struct
*task
;
2022 generic_fillattr(&nop_mnt_idmap
, request_mask
, inode
, stat
);
2024 stat
->uid
= GLOBAL_ROOT_UID
;
2025 stat
->gid
= GLOBAL_ROOT_GID
;
2027 task
= pid_task(proc_pid(inode
), PIDTYPE_PID
);
2029 if (!has_pid_permissions(fs_info
, task
, HIDEPID_INVISIBLE
)) {
2032 * This doesn't prevent learning whether PID exists,
2033 * it only makes getattr() consistent with readdir().
2037 task_dump_owner(task
, inode
->i_mode
, &stat
->uid
, &stat
->gid
);
2046 * Set <pid>/... inode ownership (can change due to setuid(), etc.)
2048 void pid_update_inode(struct task_struct
*task
, struct inode
*inode
)
2050 task_dump_owner(task
, inode
->i_mode
, &inode
->i_uid
, &inode
->i_gid
);
2052 inode
->i_mode
&= ~(S_ISUID
| S_ISGID
);
2053 security_task_to_inode(task
, inode
);
2057 * Rewrite the inode's ownerships here because the owning task may have
2058 * performed a setuid(), etc.
2061 static int pid_revalidate(struct dentry
*dentry
, unsigned int flags
)
2063 struct inode
*inode
;
2064 struct task_struct
*task
;
2068 inode
= d_inode_rcu(dentry
);
2071 task
= pid_task(proc_pid(inode
), PIDTYPE_PID
);
2074 pid_update_inode(task
, inode
);
2082 static inline bool proc_inode_is_dead(struct inode
*inode
)
2084 return !proc_pid(inode
)->tasks
[PIDTYPE_PID
].first
;
2087 int pid_delete_dentry(const struct dentry
*dentry
)
2089 /* Is the task we represent dead?
2090 * If so, then don't put the dentry on the lru list,
2091 * kill it immediately.
2093 return proc_inode_is_dead(d_inode(dentry
));
2096 const struct dentry_operations pid_dentry_operations
=
2098 .d_revalidate
= pid_revalidate
,
2099 .d_delete
= pid_delete_dentry
,
2105 * Fill a directory entry.
2107 * If possible create the dcache entry and derive our inode number and
2108 * file type from dcache entry.
2110 * Since all of the proc inode numbers are dynamically generated, the inode
2111 * numbers do not exist until the inode is cache. This means creating
2112 * the dcache entry in readdir is necessary to keep the inode numbers
2113 * reported by readdir in sync with the inode numbers reported
2116 bool proc_fill_cache(struct file
*file
, struct dir_context
*ctx
,
2117 const char *name
, unsigned int len
,
2118 instantiate_t instantiate
, struct task_struct
*task
, const void *ptr
)
2120 struct dentry
*child
, *dir
= file
->f_path
.dentry
;
2121 struct qstr qname
= QSTR_INIT(name
, len
);
2122 struct inode
*inode
;
2123 unsigned type
= DT_UNKNOWN
;
2126 child
= d_hash_and_lookup(dir
, &qname
);
2128 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq
);
2129 child
= d_alloc_parallel(dir
, &qname
, &wq
);
2131 goto end_instantiate
;
2132 if (d_in_lookup(child
)) {
2134 res
= instantiate(child
, task
, ptr
);
2135 d_lookup_done(child
);
2136 if (unlikely(res
)) {
2140 goto end_instantiate
;
2144 inode
= d_inode(child
);
2146 type
= inode
->i_mode
>> 12;
2149 return dir_emit(ctx
, name
, len
, ino
, type
);
2153 * dname_to_vma_addr - maps a dentry name into two unsigned longs
2154 * which represent vma start and end addresses.
2156 static int dname_to_vma_addr(struct dentry
*dentry
,
2157 unsigned long *start
, unsigned long *end
)
2159 const char *str
= dentry
->d_name
.name
;
2160 unsigned long long sval
, eval
;
2163 if (str
[0] == '0' && str
[1] != '-')
2165 len
= _parse_integer(str
, 16, &sval
);
2166 if (len
& KSTRTOX_OVERFLOW
)
2168 if (sval
!= (unsigned long)sval
)
2176 if (str
[0] == '0' && str
[1])
2178 len
= _parse_integer(str
, 16, &eval
);
2179 if (len
& KSTRTOX_OVERFLOW
)
2181 if (eval
!= (unsigned long)eval
)
2194 static int map_files_d_revalidate(struct dentry
*dentry
, unsigned int flags
)
2196 unsigned long vm_start
, vm_end
;
2197 bool exact_vma_exists
= false;
2198 struct mm_struct
*mm
= NULL
;
2199 struct task_struct
*task
;
2200 struct inode
*inode
;
2203 if (flags
& LOOKUP_RCU
)
2206 inode
= d_inode(dentry
);
2207 task
= get_proc_task(inode
);
2211 mm
= mm_access(task
, PTRACE_MODE_READ_FSCREDS
);
2215 if (!dname_to_vma_addr(dentry
, &vm_start
, &vm_end
)) {
2216 status
= mmap_read_lock_killable(mm
);
2218 exact_vma_exists
= !!find_exact_vma(mm
, vm_start
,
2220 mmap_read_unlock(mm
);
2226 if (exact_vma_exists
) {
2227 task_dump_owner(task
, 0, &inode
->i_uid
, &inode
->i_gid
);
2229 security_task_to_inode(task
, inode
);
2234 put_task_struct(task
);
2240 static const struct dentry_operations tid_map_files_dentry_operations
= {
2241 .d_revalidate
= map_files_d_revalidate
,
2242 .d_delete
= pid_delete_dentry
,
2245 static int map_files_get_link(struct dentry
*dentry
, struct path
*path
)
2247 unsigned long vm_start
, vm_end
;
2248 struct vm_area_struct
*vma
;
2249 struct task_struct
*task
;
2250 struct mm_struct
*mm
;
2254 task
= get_proc_task(d_inode(dentry
));
2258 mm
= get_task_mm(task
);
2259 put_task_struct(task
);
2263 rc
= dname_to_vma_addr(dentry
, &vm_start
, &vm_end
);
2267 rc
= mmap_read_lock_killable(mm
);
2272 vma
= find_exact_vma(mm
, vm_start
, vm_end
);
2273 if (vma
&& vma
->vm_file
) {
2274 *path
= *file_user_path(vma
->vm_file
);
2278 mmap_read_unlock(mm
);
2286 struct map_files_info
{
2287 unsigned long start
;
2293 * Only allow CAP_SYS_ADMIN and CAP_CHECKPOINT_RESTORE to follow the links, due
2294 * to concerns about how the symlinks may be used to bypass permissions on
2295 * ancestor directories in the path to the file in question.
2298 proc_map_files_get_link(struct dentry
*dentry
,
2299 struct inode
*inode
,
2300 struct delayed_call
*done
)
2302 if (!checkpoint_restore_ns_capable(&init_user_ns
))
2303 return ERR_PTR(-EPERM
);
2305 return proc_pid_get_link(dentry
, inode
, done
);
2309 * Identical to proc_pid_link_inode_operations except for get_link()
2311 static const struct inode_operations proc_map_files_link_inode_operations
= {
2312 .readlink
= proc_pid_readlink
,
2313 .get_link
= proc_map_files_get_link
,
2314 .setattr
= proc_setattr
,
2317 static struct dentry
*
2318 proc_map_files_instantiate(struct dentry
*dentry
,
2319 struct task_struct
*task
, const void *ptr
)
2321 fmode_t mode
= (fmode_t
)(unsigned long)ptr
;
2322 struct proc_inode
*ei
;
2323 struct inode
*inode
;
2325 inode
= proc_pid_make_inode(dentry
->d_sb
, task
, S_IFLNK
|
2326 ((mode
& FMODE_READ
) ? S_IRUSR
: 0) |
2327 ((mode
& FMODE_WRITE
) ? S_IWUSR
: 0));
2329 return ERR_PTR(-ENOENT
);
2332 ei
->op
.proc_get_link
= map_files_get_link
;
2334 inode
->i_op
= &proc_map_files_link_inode_operations
;
2337 return proc_splice_unmountable(inode
, dentry
,
2338 &tid_map_files_dentry_operations
);
2341 static struct dentry
*proc_map_files_lookup(struct inode
*dir
,
2342 struct dentry
*dentry
, unsigned int flags
)
2344 unsigned long vm_start
, vm_end
;
2345 struct vm_area_struct
*vma
;
2346 struct task_struct
*task
;
2347 struct dentry
*result
;
2348 struct mm_struct
*mm
;
2350 result
= ERR_PTR(-ENOENT
);
2351 task
= get_proc_task(dir
);
2355 result
= ERR_PTR(-EACCES
);
2356 if (!ptrace_may_access(task
, PTRACE_MODE_READ_FSCREDS
))
2359 result
= ERR_PTR(-ENOENT
);
2360 if (dname_to_vma_addr(dentry
, &vm_start
, &vm_end
))
2363 mm
= get_task_mm(task
);
2367 result
= ERR_PTR(-EINTR
);
2368 if (mmap_read_lock_killable(mm
))
2371 result
= ERR_PTR(-ENOENT
);
2372 vma
= find_exact_vma(mm
, vm_start
, vm_end
);
2377 result
= proc_map_files_instantiate(dentry
, task
,
2378 (void *)(unsigned long)vma
->vm_file
->f_mode
);
2381 mmap_read_unlock(mm
);
2385 put_task_struct(task
);
2390 static const struct inode_operations proc_map_files_inode_operations
= {
2391 .lookup
= proc_map_files_lookup
,
2392 .permission
= proc_fd_permission
,
2393 .setattr
= proc_setattr
,
2397 proc_map_files_readdir(struct file
*file
, struct dir_context
*ctx
)
2399 struct vm_area_struct
*vma
;
2400 struct task_struct
*task
;
2401 struct mm_struct
*mm
;
2402 unsigned long nr_files
, pos
, i
;
2403 GENRADIX(struct map_files_info
) fa
;
2404 struct map_files_info
*p
;
2406 struct vma_iterator vmi
;
2411 task
= get_proc_task(file_inode(file
));
2416 if (!ptrace_may_access(task
, PTRACE_MODE_READ_FSCREDS
))
2420 if (!dir_emit_dots(file
, ctx
))
2423 mm
= get_task_mm(task
);
2427 ret
= mmap_read_lock_killable(mm
);
2436 * We need two passes here:
2438 * 1) Collect vmas of mapped files with mmap_lock taken
2439 * 2) Release mmap_lock and instantiate entries
2441 * otherwise we get lockdep complained, since filldir()
2442 * routine might require mmap_lock taken in might_fault().
2446 vma_iter_init(&vmi
, mm
, 0);
2447 for_each_vma(vmi
, vma
) {
2450 if (++pos
<= ctx
->pos
)
2453 p
= genradix_ptr_alloc(&fa
, nr_files
++, GFP_KERNEL
);
2456 mmap_read_unlock(mm
);
2461 p
->start
= vma
->vm_start
;
2462 p
->end
= vma
->vm_end
;
2463 p
->mode
= vma
->vm_file
->f_mode
;
2465 mmap_read_unlock(mm
);
2468 for (i
= 0; i
< nr_files
; i
++) {
2469 char buf
[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */
2472 p
= genradix_ptr(&fa
, i
);
2473 len
= snprintf(buf
, sizeof(buf
), "%lx-%lx", p
->start
, p
->end
);
2474 if (!proc_fill_cache(file
, ctx
,
2476 proc_map_files_instantiate
,
2478 (void *)(unsigned long)p
->mode
))
2484 put_task_struct(task
);
2490 static const struct file_operations proc_map_files_operations
= {
2491 .read
= generic_read_dir
,
2492 .iterate_shared
= proc_map_files_readdir
,
2493 .llseek
= generic_file_llseek
,
2496 #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
2497 struct timers_private
{
2499 struct task_struct
*task
;
2500 struct sighand_struct
*sighand
;
2501 struct pid_namespace
*ns
;
2502 unsigned long flags
;
2505 static void *timers_start(struct seq_file
*m
, loff_t
*pos
)
2507 struct timers_private
*tp
= m
->private;
2509 tp
->task
= get_pid_task(tp
->pid
, PIDTYPE_PID
);
2511 return ERR_PTR(-ESRCH
);
2513 tp
->sighand
= lock_task_sighand(tp
->task
, &tp
->flags
);
2515 return ERR_PTR(-ESRCH
);
2517 return seq_hlist_start(&tp
->task
->signal
->posix_timers
, *pos
);
2520 static void *timers_next(struct seq_file
*m
, void *v
, loff_t
*pos
)
2522 struct timers_private
*tp
= m
->private;
2523 return seq_hlist_next(v
, &tp
->task
->signal
->posix_timers
, pos
);
2526 static void timers_stop(struct seq_file
*m
, void *v
)
2528 struct timers_private
*tp
= m
->private;
2531 unlock_task_sighand(tp
->task
, &tp
->flags
);
2536 put_task_struct(tp
->task
);
2541 static int show_timer(struct seq_file
*m
, void *v
)
2543 struct k_itimer
*timer
;
2544 struct timers_private
*tp
= m
->private;
2546 static const char * const nstr
[] = {
2547 [SIGEV_SIGNAL
] = "signal",
2548 [SIGEV_NONE
] = "none",
2549 [SIGEV_THREAD
] = "thread",
2552 timer
= hlist_entry((struct hlist_node
*)v
, struct k_itimer
, list
);
2553 notify
= timer
->it_sigev_notify
;
2555 seq_printf(m
, "ID: %d\n", timer
->it_id
);
2556 seq_printf(m
, "signal: %d/%px\n",
2557 timer
->sigq
.info
.si_signo
,
2558 timer
->sigq
.info
.si_value
.sival_ptr
);
2559 seq_printf(m
, "notify: %s/%s.%d\n",
2560 nstr
[notify
& ~SIGEV_THREAD_ID
],
2561 (notify
& SIGEV_THREAD_ID
) ? "tid" : "pid",
2562 pid_nr_ns(timer
->it_pid
, tp
->ns
));
2563 seq_printf(m
, "ClockID: %d\n", timer
->it_clock
);
2568 static const struct seq_operations proc_timers_seq_ops
= {
2569 .start
= timers_start
,
2570 .next
= timers_next
,
2571 .stop
= timers_stop
,
2575 static int proc_timers_open(struct inode
*inode
, struct file
*file
)
2577 struct timers_private
*tp
;
2579 tp
= __seq_open_private(file
, &proc_timers_seq_ops
,
2580 sizeof(struct timers_private
));
2584 tp
->pid
= proc_pid(inode
);
2585 tp
->ns
= proc_pid_ns(inode
->i_sb
);
2589 static const struct file_operations proc_timers_operations
= {
2590 .open
= proc_timers_open
,
2592 .llseek
= seq_lseek
,
2593 .release
= seq_release_private
,
2597 static ssize_t
timerslack_ns_write(struct file
*file
, const char __user
*buf
,
2598 size_t count
, loff_t
*offset
)
2600 struct inode
*inode
= file_inode(file
);
2601 struct task_struct
*p
;
2605 err
= kstrtoull_from_user(buf
, count
, 10, &slack_ns
);
2609 p
= get_proc_task(inode
);
2615 if (!ns_capable(__task_cred(p
)->user_ns
, CAP_SYS_NICE
)) {
2622 err
= security_task_setscheduler(p
);
2630 if (rt_or_dl_task_policy(p
))
2632 else if (slack_ns
== 0)
2633 slack_ns
= p
->default_timer_slack_ns
;
2634 p
->timer_slack_ns
= slack_ns
;
2643 static int timerslack_ns_show(struct seq_file
*m
, void *v
)
2645 struct inode
*inode
= m
->private;
2646 struct task_struct
*p
;
2649 p
= get_proc_task(inode
);
2655 if (!ns_capable(__task_cred(p
)->user_ns
, CAP_SYS_NICE
)) {
2662 err
= security_task_getscheduler(p
);
2668 seq_printf(m
, "%llu\n", p
->timer_slack_ns
);
2677 static int timerslack_ns_open(struct inode
*inode
, struct file
*filp
)
2679 return single_open(filp
, timerslack_ns_show
, inode
);
2682 static const struct file_operations proc_pid_set_timerslack_ns_operations
= {
2683 .open
= timerslack_ns_open
,
2685 .write
= timerslack_ns_write
,
2686 .llseek
= seq_lseek
,
2687 .release
= single_release
,
2690 static struct dentry
*proc_pident_instantiate(struct dentry
*dentry
,
2691 struct task_struct
*task
, const void *ptr
)
2693 const struct pid_entry
*p
= ptr
;
2694 struct inode
*inode
;
2695 struct proc_inode
*ei
;
2697 inode
= proc_pid_make_inode(dentry
->d_sb
, task
, p
->mode
);
2699 return ERR_PTR(-ENOENT
);
2702 if (S_ISDIR(inode
->i_mode
))
2703 set_nlink(inode
, 2); /* Use getattr to fix if necessary */
2705 inode
->i_op
= p
->iop
;
2707 inode
->i_fop
= p
->fop
;
2709 pid_update_inode(task
, inode
);
2710 d_set_d_op(dentry
, &pid_dentry_operations
);
2711 return d_splice_alias(inode
, dentry
);
2714 static struct dentry
*proc_pident_lookup(struct inode
*dir
,
2715 struct dentry
*dentry
,
2716 const struct pid_entry
*p
,
2717 const struct pid_entry
*end
)
2719 struct task_struct
*task
= get_proc_task(dir
);
2720 struct dentry
*res
= ERR_PTR(-ENOENT
);
2726 * Yes, it does not scale. And it should not. Don't add
2727 * new entries into /proc/<tgid>/ without very good reasons.
2729 for (; p
< end
; p
++) {
2730 if (p
->len
!= dentry
->d_name
.len
)
2732 if (!memcmp(dentry
->d_name
.name
, p
->name
, p
->len
)) {
2733 res
= proc_pident_instantiate(dentry
, task
, p
);
2737 put_task_struct(task
);
2742 static int proc_pident_readdir(struct file
*file
, struct dir_context
*ctx
,
2743 const struct pid_entry
*ents
, unsigned int nents
)
2745 struct task_struct
*task
= get_proc_task(file_inode(file
));
2746 const struct pid_entry
*p
;
2751 if (!dir_emit_dots(file
, ctx
))
2754 if (ctx
->pos
>= nents
+ 2)
2757 for (p
= ents
+ (ctx
->pos
- 2); p
< ents
+ nents
; p
++) {
2758 if (!proc_fill_cache(file
, ctx
, p
->name
, p
->len
,
2759 proc_pident_instantiate
, task
, p
))
2764 put_task_struct(task
);
2768 #ifdef CONFIG_SECURITY
2769 static int proc_pid_attr_open(struct inode
*inode
, struct file
*file
)
2771 file
->private_data
= NULL
;
2772 __mem_open(inode
, file
, PTRACE_MODE_READ_FSCREDS
);
2776 static ssize_t
proc_pid_attr_read(struct file
* file
, char __user
* buf
,
2777 size_t count
, loff_t
*ppos
)
2779 struct inode
* inode
= file_inode(file
);
2782 struct task_struct
*task
= get_proc_task(inode
);
2787 length
= security_getprocattr(task
, PROC_I(inode
)->op
.lsmid
,
2788 file
->f_path
.dentry
->d_name
.name
,
2790 put_task_struct(task
);
2792 length
= simple_read_from_buffer(buf
, count
, ppos
, p
, length
);
2797 static ssize_t
proc_pid_attr_write(struct file
* file
, const char __user
* buf
,
2798 size_t count
, loff_t
*ppos
)
2800 struct inode
* inode
= file_inode(file
);
2801 struct task_struct
*task
;
2805 /* A task may only write when it was the opener. */
2806 if (file
->private_data
!= current
->mm
)
2810 task
= pid_task(proc_pid(inode
), PIDTYPE_PID
);
2815 /* A task may only write its own attributes. */
2816 if (current
!= task
) {
2820 /* Prevent changes to overridden credentials. */
2821 if (current_cred() != current_real_cred()) {
2827 if (count
> PAGE_SIZE
)
2830 /* No partial writes. */
2834 page
= memdup_user(buf
, count
);
2840 /* Guard against adverse ptrace interaction */
2841 rv
= mutex_lock_interruptible(¤t
->signal
->cred_guard_mutex
);
2845 rv
= security_setprocattr(PROC_I(inode
)->op
.lsmid
,
2846 file
->f_path
.dentry
->d_name
.name
, page
,
2848 mutex_unlock(¤t
->signal
->cred_guard_mutex
);
2855 static const struct file_operations proc_pid_attr_operations
= {
2856 .open
= proc_pid_attr_open
,
2857 .read
= proc_pid_attr_read
,
2858 .write
= proc_pid_attr_write
,
2859 .llseek
= generic_file_llseek
,
2860 .release
= mem_release
,
2863 #define LSM_DIR_OPS(LSM) \
2864 static int proc_##LSM##_attr_dir_iterate(struct file *filp, \
2865 struct dir_context *ctx) \
2867 return proc_pident_readdir(filp, ctx, \
2868 LSM##_attr_dir_stuff, \
2869 ARRAY_SIZE(LSM##_attr_dir_stuff)); \
2872 static const struct file_operations proc_##LSM##_attr_dir_ops = { \
2873 .read = generic_read_dir, \
2874 .iterate_shared = proc_##LSM##_attr_dir_iterate, \
2875 .llseek = default_llseek, \
2878 static struct dentry *proc_##LSM##_attr_dir_lookup(struct inode *dir, \
2879 struct dentry *dentry, unsigned int flags) \
2881 return proc_pident_lookup(dir, dentry, \
2882 LSM##_attr_dir_stuff, \
2883 LSM##_attr_dir_stuff + ARRAY_SIZE(LSM##_attr_dir_stuff)); \
2886 static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \
2887 .lookup = proc_##LSM##_attr_dir_lookup, \
2888 .getattr = pid_getattr, \
2889 .setattr = proc_setattr, \
2892 #ifdef CONFIG_SECURITY_SMACK
2893 static const struct pid_entry smack_attr_dir_stuff
[] = {
2894 ATTR(LSM_ID_SMACK
, "current", 0666),
2899 #ifdef CONFIG_SECURITY_APPARMOR
2900 static const struct pid_entry apparmor_attr_dir_stuff
[] = {
2901 ATTR(LSM_ID_APPARMOR
, "current", 0666),
2902 ATTR(LSM_ID_APPARMOR
, "prev", 0444),
2903 ATTR(LSM_ID_APPARMOR
, "exec", 0666),
2905 LSM_DIR_OPS(apparmor
);
2908 static const struct pid_entry attr_dir_stuff
[] = {
2909 ATTR(LSM_ID_UNDEF
, "current", 0666),
2910 ATTR(LSM_ID_UNDEF
, "prev", 0444),
2911 ATTR(LSM_ID_UNDEF
, "exec", 0666),
2912 ATTR(LSM_ID_UNDEF
, "fscreate", 0666),
2913 ATTR(LSM_ID_UNDEF
, "keycreate", 0666),
2914 ATTR(LSM_ID_UNDEF
, "sockcreate", 0666),
2915 #ifdef CONFIG_SECURITY_SMACK
2917 proc_smack_attr_dir_inode_ops
, proc_smack_attr_dir_ops
),
2919 #ifdef CONFIG_SECURITY_APPARMOR
2920 DIR("apparmor", 0555,
2921 proc_apparmor_attr_dir_inode_ops
, proc_apparmor_attr_dir_ops
),
2925 static int proc_attr_dir_readdir(struct file
*file
, struct dir_context
*ctx
)
2927 return proc_pident_readdir(file
, ctx
,
2928 attr_dir_stuff
, ARRAY_SIZE(attr_dir_stuff
));
2931 static const struct file_operations proc_attr_dir_operations
= {
2932 .read
= generic_read_dir
,
2933 .iterate_shared
= proc_attr_dir_readdir
,
2934 .llseek
= generic_file_llseek
,
2937 static struct dentry
*proc_attr_dir_lookup(struct inode
*dir
,
2938 struct dentry
*dentry
, unsigned int flags
)
2940 return proc_pident_lookup(dir
, dentry
,
2942 attr_dir_stuff
+ ARRAY_SIZE(attr_dir_stuff
));
2945 static const struct inode_operations proc_attr_dir_inode_operations
= {
2946 .lookup
= proc_attr_dir_lookup
,
2947 .getattr
= pid_getattr
,
2948 .setattr
= proc_setattr
,
2953 #ifdef CONFIG_ELF_CORE
2954 static ssize_t
proc_coredump_filter_read(struct file
*file
, char __user
*buf
,
2955 size_t count
, loff_t
*ppos
)
2957 struct task_struct
*task
= get_proc_task(file_inode(file
));
2958 struct mm_struct
*mm
;
2959 char buffer
[PROC_NUMBUF
];
2967 mm
= get_task_mm(task
);
2969 len
= snprintf(buffer
, sizeof(buffer
), "%08lx\n",
2970 ((mm
->flags
& MMF_DUMP_FILTER_MASK
) >>
2971 MMF_DUMP_FILTER_SHIFT
));
2973 ret
= simple_read_from_buffer(buf
, count
, ppos
, buffer
, len
);
2976 put_task_struct(task
);
2981 static ssize_t
proc_coredump_filter_write(struct file
*file
,
2982 const char __user
*buf
,
2986 struct task_struct
*task
;
2987 struct mm_struct
*mm
;
2993 ret
= kstrtouint_from_user(buf
, count
, 0, &val
);
2998 task
= get_proc_task(file_inode(file
));
3002 mm
= get_task_mm(task
);
3007 for (i
= 0, mask
= 1; i
< MMF_DUMP_FILTER_BITS
; i
++, mask
<<= 1) {
3009 set_bit(i
+ MMF_DUMP_FILTER_SHIFT
, &mm
->flags
);
3011 clear_bit(i
+ MMF_DUMP_FILTER_SHIFT
, &mm
->flags
);
3016 put_task_struct(task
);
3023 static const struct file_operations proc_coredump_filter_operations
= {
3024 .read
= proc_coredump_filter_read
,
3025 .write
= proc_coredump_filter_write
,
3026 .llseek
= generic_file_llseek
,
3030 #ifdef CONFIG_TASK_IO_ACCOUNTING
3031 static int do_io_accounting(struct task_struct
*task
, struct seq_file
*m
, int whole
)
3033 struct task_io_accounting acct
;
3036 result
= down_read_killable(&task
->signal
->exec_update_lock
);
3040 if (!ptrace_may_access(task
, PTRACE_MODE_READ_FSCREDS
)) {
3046 struct signal_struct
*sig
= task
->signal
;
3047 struct task_struct
*t
;
3048 unsigned int seq
= 1;
3049 unsigned long flags
;
3053 seq
++; /* 2 on the 1st/lockless path, otherwise odd */
3054 flags
= read_seqbegin_or_lock_irqsave(&sig
->stats_lock
, &seq
);
3057 __for_each_thread(sig
, t
)
3058 task_io_accounting_add(&acct
, &t
->ioac
);
3060 } while (need_seqretry(&sig
->stats_lock
, seq
));
3061 done_seqretry_irqrestore(&sig
->stats_lock
, seq
, flags
);
3072 "read_bytes: %llu\n"
3073 "write_bytes: %llu\n"
3074 "cancelled_write_bytes: %llu\n",
3075 (unsigned long long)acct
.rchar
,
3076 (unsigned long long)acct
.wchar
,
3077 (unsigned long long)acct
.syscr
,
3078 (unsigned long long)acct
.syscw
,
3079 (unsigned long long)acct
.read_bytes
,
3080 (unsigned long long)acct
.write_bytes
,
3081 (unsigned long long)acct
.cancelled_write_bytes
);
3085 up_read(&task
->signal
->exec_update_lock
);
3089 static int proc_tid_io_accounting(struct seq_file
*m
, struct pid_namespace
*ns
,
3090 struct pid
*pid
, struct task_struct
*task
)
3092 return do_io_accounting(task
, m
, 0);
3095 static int proc_tgid_io_accounting(struct seq_file
*m
, struct pid_namespace
*ns
,
3096 struct pid
*pid
, struct task_struct
*task
)
3098 return do_io_accounting(task
, m
, 1);
3100 #endif /* CONFIG_TASK_IO_ACCOUNTING */
3102 #ifdef CONFIG_USER_NS
3103 static int proc_id_map_open(struct inode
*inode
, struct file
*file
,
3104 const struct seq_operations
*seq_ops
)
3106 struct user_namespace
*ns
= NULL
;
3107 struct task_struct
*task
;
3108 struct seq_file
*seq
;
3111 task
= get_proc_task(inode
);
3114 ns
= get_user_ns(task_cred_xxx(task
, user_ns
));
3116 put_task_struct(task
);
3121 ret
= seq_open(file
, seq_ops
);
3125 seq
= file
->private_data
;
3135 static int proc_id_map_release(struct inode
*inode
, struct file
*file
)
3137 struct seq_file
*seq
= file
->private_data
;
3138 struct user_namespace
*ns
= seq
->private;
3140 return seq_release(inode
, file
);
3143 static int proc_uid_map_open(struct inode
*inode
, struct file
*file
)
3145 return proc_id_map_open(inode
, file
, &proc_uid_seq_operations
);
3148 static int proc_gid_map_open(struct inode
*inode
, struct file
*file
)
3150 return proc_id_map_open(inode
, file
, &proc_gid_seq_operations
);
3153 static int proc_projid_map_open(struct inode
*inode
, struct file
*file
)
3155 return proc_id_map_open(inode
, file
, &proc_projid_seq_operations
);
3158 static const struct file_operations proc_uid_map_operations
= {
3159 .open
= proc_uid_map_open
,
3160 .write
= proc_uid_map_write
,
3162 .llseek
= seq_lseek
,
3163 .release
= proc_id_map_release
,
3166 static const struct file_operations proc_gid_map_operations
= {
3167 .open
= proc_gid_map_open
,
3168 .write
= proc_gid_map_write
,
3170 .llseek
= seq_lseek
,
3171 .release
= proc_id_map_release
,
3174 static const struct file_operations proc_projid_map_operations
= {
3175 .open
= proc_projid_map_open
,
3176 .write
= proc_projid_map_write
,
3178 .llseek
= seq_lseek
,
3179 .release
= proc_id_map_release
,
3182 static int proc_setgroups_open(struct inode
*inode
, struct file
*file
)
3184 struct user_namespace
*ns
= NULL
;
3185 struct task_struct
*task
;
3189 task
= get_proc_task(inode
);
3192 ns
= get_user_ns(task_cred_xxx(task
, user_ns
));
3194 put_task_struct(task
);
3199 if (file
->f_mode
& FMODE_WRITE
) {
3201 if (!ns_capable(ns
, CAP_SYS_ADMIN
))
3205 ret
= single_open(file
, &proc_setgroups_show
, ns
);
3216 static int proc_setgroups_release(struct inode
*inode
, struct file
*file
)
3218 struct seq_file
*seq
= file
->private_data
;
3219 struct user_namespace
*ns
= seq
->private;
3220 int ret
= single_release(inode
, file
);
3225 static const struct file_operations proc_setgroups_operations
= {
3226 .open
= proc_setgroups_open
,
3227 .write
= proc_setgroups_write
,
3229 .llseek
= seq_lseek
,
3230 .release
= proc_setgroups_release
,
3232 #endif /* CONFIG_USER_NS */
3234 static int proc_pid_personality(struct seq_file
*m
, struct pid_namespace
*ns
,
3235 struct pid
*pid
, struct task_struct
*task
)
3237 int err
= lock_trace(task
);
3239 seq_printf(m
, "%08x\n", task
->personality
);
3245 #ifdef CONFIG_LIVEPATCH
3246 static int proc_pid_patch_state(struct seq_file
*m
, struct pid_namespace
*ns
,
3247 struct pid
*pid
, struct task_struct
*task
)
3249 seq_printf(m
, "%d\n", task
->patch_state
);
3252 #endif /* CONFIG_LIVEPATCH */
3255 static int proc_pid_ksm_merging_pages(struct seq_file
*m
, struct pid_namespace
*ns
,
3256 struct pid
*pid
, struct task_struct
*task
)
3258 struct mm_struct
*mm
;
3260 mm
= get_task_mm(task
);
3262 seq_printf(m
, "%lu\n", mm
->ksm_merging_pages
);
3268 static int proc_pid_ksm_stat(struct seq_file
*m
, struct pid_namespace
*ns
,
3269 struct pid
*pid
, struct task_struct
*task
)
3271 struct mm_struct
*mm
;
3273 mm
= get_task_mm(task
);
3275 seq_printf(m
, "ksm_rmap_items %lu\n", mm
->ksm_rmap_items
);
3276 seq_printf(m
, "ksm_zero_pages %ld\n", mm_ksm_zero_pages(mm
));
3277 seq_printf(m
, "ksm_merging_pages %lu\n", mm
->ksm_merging_pages
);
3278 seq_printf(m
, "ksm_process_profit %ld\n", ksm_process_profit(mm
));
3284 #endif /* CONFIG_KSM */
3286 #ifdef CONFIG_STACKLEAK_METRICS
3287 static int proc_stack_depth(struct seq_file
*m
, struct pid_namespace
*ns
,
3288 struct pid
*pid
, struct task_struct
*task
)
3290 unsigned long prev_depth
= THREAD_SIZE
-
3291 (task
->prev_lowest_stack
& (THREAD_SIZE
- 1));
3292 unsigned long depth
= THREAD_SIZE
-
3293 (task
->lowest_stack
& (THREAD_SIZE
- 1));
3295 seq_printf(m
, "previous stack depth: %lu\nstack depth: %lu\n",
3299 #endif /* CONFIG_STACKLEAK_METRICS */
3304 static const struct file_operations proc_task_operations
;
3305 static const struct inode_operations proc_task_inode_operations
;
3307 static const struct pid_entry tgid_base_stuff
[] = {
3308 DIR("task", S_IRUGO
|S_IXUGO
, proc_task_inode_operations
, proc_task_operations
),
3309 DIR("fd", S_IRUSR
|S_IXUSR
, proc_fd_inode_operations
, proc_fd_operations
),
3310 DIR("map_files", S_IRUSR
|S_IXUSR
, proc_map_files_inode_operations
, proc_map_files_operations
),
3311 DIR("fdinfo", S_IRUGO
|S_IXUGO
, proc_fdinfo_inode_operations
, proc_fdinfo_operations
),
3312 DIR("ns", S_IRUSR
|S_IXUGO
, proc_ns_dir_inode_operations
, proc_ns_dir_operations
),
3314 DIR("net", S_IRUGO
|S_IXUGO
, proc_net_inode_operations
, proc_net_operations
),
3316 REG("environ", S_IRUSR
, proc_environ_operations
),
3317 REG("auxv", S_IRUSR
, proc_auxv_operations
),
3318 ONE("status", S_IRUGO
, proc_pid_status
),
3319 ONE("personality", S_IRUSR
, proc_pid_personality
),
3320 ONE("limits", S_IRUGO
, proc_pid_limits
),
3321 #ifdef CONFIG_SCHED_DEBUG
3322 REG("sched", S_IRUGO
|S_IWUSR
, proc_pid_sched_operations
),
3324 #ifdef CONFIG_SCHED_AUTOGROUP
3325 REG("autogroup", S_IRUGO
|S_IWUSR
, proc_pid_sched_autogroup_operations
),
3327 #ifdef CONFIG_TIME_NS
3328 REG("timens_offsets", S_IRUGO
|S_IWUSR
, proc_timens_offsets_operations
),
3330 REG("comm", S_IRUGO
|S_IWUSR
, proc_pid_set_comm_operations
),
3331 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
3332 ONE("syscall", S_IRUSR
, proc_pid_syscall
),
3334 REG("cmdline", S_IRUGO
, proc_pid_cmdline_ops
),
3335 ONE("stat", S_IRUGO
, proc_tgid_stat
),
3336 ONE("statm", S_IRUGO
, proc_pid_statm
),
3337 REG("maps", S_IRUGO
, proc_pid_maps_operations
),
3339 REG("numa_maps", S_IRUGO
, proc_pid_numa_maps_operations
),
3341 REG("mem", S_IRUSR
|S_IWUSR
, proc_mem_operations
),
3342 LNK("cwd", proc_cwd_link
),
3343 LNK("root", proc_root_link
),
3344 LNK("exe", proc_exe_link
),
3345 REG("mounts", S_IRUGO
, proc_mounts_operations
),
3346 REG("mountinfo", S_IRUGO
, proc_mountinfo_operations
),
3347 REG("mountstats", S_IRUSR
, proc_mountstats_operations
),
3348 #ifdef CONFIG_PROC_PAGE_MONITOR
3349 REG("clear_refs", S_IWUSR
, proc_clear_refs_operations
),
3350 REG("smaps", S_IRUGO
, proc_pid_smaps_operations
),
3351 REG("smaps_rollup", S_IRUGO
, proc_pid_smaps_rollup_operations
),
3352 REG("pagemap", S_IRUSR
, proc_pagemap_operations
),
3354 #ifdef CONFIG_SECURITY
3355 DIR("attr", S_IRUGO
|S_IXUGO
, proc_attr_dir_inode_operations
, proc_attr_dir_operations
),
3357 #ifdef CONFIG_KALLSYMS
3358 ONE("wchan", S_IRUGO
, proc_pid_wchan
),
3360 #ifdef CONFIG_STACKTRACE
3361 ONE("stack", S_IRUSR
, proc_pid_stack
),
3363 #ifdef CONFIG_SCHED_INFO
3364 ONE("schedstat", S_IRUGO
, proc_pid_schedstat
),
3366 #ifdef CONFIG_LATENCYTOP
3367 REG("latency", S_IRUGO
, proc_lstats_operations
),
3369 #ifdef CONFIG_PROC_PID_CPUSET
3370 ONE("cpuset", S_IRUGO
, proc_cpuset_show
),
3372 #ifdef CONFIG_CGROUPS
3373 ONE("cgroup", S_IRUGO
, proc_cgroup_show
),
3375 #ifdef CONFIG_PROC_CPU_RESCTRL
3376 ONE("cpu_resctrl_groups", S_IRUGO
, proc_resctrl_show
),
3378 ONE("oom_score", S_IRUGO
, proc_oom_score
),
3379 REG("oom_adj", S_IRUGO
|S_IWUSR
, proc_oom_adj_operations
),
3380 REG("oom_score_adj", S_IRUGO
|S_IWUSR
, proc_oom_score_adj_operations
),
3382 REG("loginuid", S_IWUSR
|S_IRUGO
, proc_loginuid_operations
),
3383 REG("sessionid", S_IRUGO
, proc_sessionid_operations
),
3385 #ifdef CONFIG_FAULT_INJECTION
3386 REG("make-it-fail", S_IRUGO
|S_IWUSR
, proc_fault_inject_operations
),
3387 REG("fail-nth", 0644, proc_fail_nth_operations
),
3389 #ifdef CONFIG_ELF_CORE
3390 REG("coredump_filter", S_IRUGO
|S_IWUSR
, proc_coredump_filter_operations
),
3392 #ifdef CONFIG_TASK_IO_ACCOUNTING
3393 ONE("io", S_IRUSR
, proc_tgid_io_accounting
),
3395 #ifdef CONFIG_USER_NS
3396 REG("uid_map", S_IRUGO
|S_IWUSR
, proc_uid_map_operations
),
3397 REG("gid_map", S_IRUGO
|S_IWUSR
, proc_gid_map_operations
),
3398 REG("projid_map", S_IRUGO
|S_IWUSR
, proc_projid_map_operations
),
3399 REG("setgroups", S_IRUGO
|S_IWUSR
, proc_setgroups_operations
),
3401 #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
3402 REG("timers", S_IRUGO
, proc_timers_operations
),
3404 REG("timerslack_ns", S_IRUGO
|S_IWUGO
, proc_pid_set_timerslack_ns_operations
),
3405 #ifdef CONFIG_LIVEPATCH
3406 ONE("patch_state", S_IRUSR
, proc_pid_patch_state
),
3408 #ifdef CONFIG_STACKLEAK_METRICS
3409 ONE("stack_depth", S_IRUGO
, proc_stack_depth
),
3411 #ifdef CONFIG_PROC_PID_ARCH_STATUS
3412 ONE("arch_status", S_IRUGO
, proc_pid_arch_status
),
3414 #ifdef CONFIG_SECCOMP_CACHE_DEBUG
3415 ONE("seccomp_cache", S_IRUSR
, proc_pid_seccomp_cache
),
3418 ONE("ksm_merging_pages", S_IRUSR
, proc_pid_ksm_merging_pages
),
3419 ONE("ksm_stat", S_IRUSR
, proc_pid_ksm_stat
),
3423 static int proc_tgid_base_readdir(struct file
*file
, struct dir_context
*ctx
)
3425 return proc_pident_readdir(file
, ctx
,
3426 tgid_base_stuff
, ARRAY_SIZE(tgid_base_stuff
));
3429 static const struct file_operations proc_tgid_base_operations
= {
3430 .read
= generic_read_dir
,
3431 .iterate_shared
= proc_tgid_base_readdir
,
3432 .llseek
= generic_file_llseek
,
3435 struct pid
*tgid_pidfd_to_pid(const struct file
*file
)
3437 if (file
->f_op
!= &proc_tgid_base_operations
)
3438 return ERR_PTR(-EBADF
);
3440 return proc_pid(file_inode(file
));
3443 static struct dentry
*proc_tgid_base_lookup(struct inode
*dir
, struct dentry
*dentry
, unsigned int flags
)
3445 return proc_pident_lookup(dir
, dentry
,
3447 tgid_base_stuff
+ ARRAY_SIZE(tgid_base_stuff
));
3450 static const struct inode_operations proc_tgid_base_inode_operations
= {
3451 .lookup
= proc_tgid_base_lookup
,
3452 .getattr
= pid_getattr
,
3453 .setattr
= proc_setattr
,
3454 .permission
= proc_pid_permission
,
3458 * proc_flush_pid - Remove dcache entries for @pid from the /proc dcache.
3459 * @pid: pid that should be flushed.
3461 * This function walks a list of inodes (that belong to any proc
3462 * filesystem) that are attached to the pid and flushes them from
3465 * It is safe and reasonable to cache /proc entries for a task until
3466 * that task exits. After that they just clog up the dcache with
3467 * useless entries, possibly causing useful dcache entries to be
3468 * flushed instead. This routine is provided to flush those useless
3469 * dcache entries when a process is reaped.
3471 * NOTE: This routine is just an optimization so it does not guarantee
3472 * that no dcache entries will exist after a process is reaped
3473 * it just makes it very unlikely that any will persist.
3476 void proc_flush_pid(struct pid
*pid
)
3478 proc_invalidate_siblings_dcache(&pid
->inodes
, &pid
->lock
);
3481 static struct dentry
*proc_pid_instantiate(struct dentry
* dentry
,
3482 struct task_struct
*task
, const void *ptr
)
3484 struct inode
*inode
;
3486 inode
= proc_pid_make_base_inode(dentry
->d_sb
, task
,
3487 S_IFDIR
| S_IRUGO
| S_IXUGO
);
3489 return ERR_PTR(-ENOENT
);
3491 inode
->i_op
= &proc_tgid_base_inode_operations
;
3492 inode
->i_fop
= &proc_tgid_base_operations
;
3493 inode
->i_flags
|=S_IMMUTABLE
;
3495 set_nlink(inode
, nlink_tgid
);
3496 pid_update_inode(task
, inode
);
3498 d_set_d_op(dentry
, &pid_dentry_operations
);
3499 return d_splice_alias(inode
, dentry
);
3502 struct dentry
*proc_pid_lookup(struct dentry
*dentry
, unsigned int flags
)
3504 struct task_struct
*task
;
3506 struct proc_fs_info
*fs_info
;
3507 struct pid_namespace
*ns
;
3508 struct dentry
*result
= ERR_PTR(-ENOENT
);
3510 tgid
= name_to_int(&dentry
->d_name
);
3514 fs_info
= proc_sb_info(dentry
->d_sb
);
3515 ns
= fs_info
->pid_ns
;
3517 task
= find_task_by_pid_ns(tgid
, ns
);
3519 get_task_struct(task
);
3524 /* Limit procfs to only ptraceable tasks */
3525 if (fs_info
->hide_pid
== HIDEPID_NOT_PTRACEABLE
) {
3526 if (!has_pid_permissions(fs_info
, task
, HIDEPID_NO_ACCESS
))
3530 result
= proc_pid_instantiate(dentry
, task
, NULL
);
3532 put_task_struct(task
);
3538 * Find the first task with tgid >= tgid
3543 struct task_struct
*task
;
3545 static struct tgid_iter
next_tgid(struct pid_namespace
*ns
, struct tgid_iter iter
)
3550 put_task_struct(iter
.task
);
3554 pid
= find_ge_pid(iter
.tgid
, ns
);
3556 iter
.tgid
= pid_nr_ns(pid
, ns
);
3557 iter
.task
= pid_task(pid
, PIDTYPE_TGID
);
3562 get_task_struct(iter
.task
);
3568 #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2)
3570 /* for the /proc/ directory itself, after non-process stuff has been done */
3571 int proc_pid_readdir(struct file
*file
, struct dir_context
*ctx
)
3573 struct tgid_iter iter
;
3574 struct proc_fs_info
*fs_info
= proc_sb_info(file_inode(file
)->i_sb
);
3575 struct pid_namespace
*ns
= proc_pid_ns(file_inode(file
)->i_sb
);
3576 loff_t pos
= ctx
->pos
;
3578 if (pos
>= PID_MAX_LIMIT
+ TGID_OFFSET
)
3581 if (pos
== TGID_OFFSET
- 2) {
3582 struct inode
*inode
= d_inode(fs_info
->proc_self
);
3583 if (!dir_emit(ctx
, "self", 4, inode
->i_ino
, DT_LNK
))
3585 ctx
->pos
= pos
= pos
+ 1;
3587 if (pos
== TGID_OFFSET
- 1) {
3588 struct inode
*inode
= d_inode(fs_info
->proc_thread_self
);
3589 if (!dir_emit(ctx
, "thread-self", 11, inode
->i_ino
, DT_LNK
))
3591 ctx
->pos
= pos
= pos
+ 1;
3593 iter
.tgid
= pos
- TGID_OFFSET
;
3595 for (iter
= next_tgid(ns
, iter
);
3597 iter
.tgid
+= 1, iter
= next_tgid(ns
, iter
)) {
3602 if (!has_pid_permissions(fs_info
, iter
.task
, HIDEPID_INVISIBLE
))
3605 len
= snprintf(name
, sizeof(name
), "%u", iter
.tgid
);
3606 ctx
->pos
= iter
.tgid
+ TGID_OFFSET
;
3607 if (!proc_fill_cache(file
, ctx
, name
, len
,
3608 proc_pid_instantiate
, iter
.task
, NULL
)) {
3609 put_task_struct(iter
.task
);
3613 ctx
->pos
= PID_MAX_LIMIT
+ TGID_OFFSET
;
3618 * proc_tid_comm_permission is a special permission function exclusively
3619 * used for the node /proc/<pid>/task/<tid>/comm.
3620 * It bypasses generic permission checks in the case where a task of the same
3621 * task group attempts to access the node.
3622 * The rationale behind this is that glibc and bionic access this node for
3623 * cross thread naming (pthread_set/getname_np(!self)). However, if
3624 * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0,
3625 * which locks out the cross thread naming implementation.
3626 * This function makes sure that the node is always accessible for members of
3627 * same thread group.
3629 static int proc_tid_comm_permission(struct mnt_idmap
*idmap
,
3630 struct inode
*inode
, int mask
)
3632 bool is_same_tgroup
;
3633 struct task_struct
*task
;
3635 task
= get_proc_task(inode
);
3638 is_same_tgroup
= same_thread_group(current
, task
);
3639 put_task_struct(task
);
3641 if (likely(is_same_tgroup
&& !(mask
& MAY_EXEC
))) {
3642 /* This file (/proc/<pid>/task/<tid>/comm) can always be
3643 * read or written by the members of the corresponding
3649 return generic_permission(&nop_mnt_idmap
, inode
, mask
);
3652 static const struct inode_operations proc_tid_comm_inode_operations
= {
3653 .setattr
= proc_setattr
,
3654 .permission
= proc_tid_comm_permission
,
3660 static const struct pid_entry tid_base_stuff
[] = {
3661 DIR("fd", S_IRUSR
|S_IXUSR
, proc_fd_inode_operations
, proc_fd_operations
),
3662 DIR("fdinfo", S_IRUGO
|S_IXUGO
, proc_fdinfo_inode_operations
, proc_fdinfo_operations
),
3663 DIR("ns", S_IRUSR
|S_IXUGO
, proc_ns_dir_inode_operations
, proc_ns_dir_operations
),
3665 DIR("net", S_IRUGO
|S_IXUGO
, proc_net_inode_operations
, proc_net_operations
),
3667 REG("environ", S_IRUSR
, proc_environ_operations
),
3668 REG("auxv", S_IRUSR
, proc_auxv_operations
),
3669 ONE("status", S_IRUGO
, proc_pid_status
),
3670 ONE("personality", S_IRUSR
, proc_pid_personality
),
3671 ONE("limits", S_IRUGO
, proc_pid_limits
),
3672 #ifdef CONFIG_SCHED_DEBUG
3673 REG("sched", S_IRUGO
|S_IWUSR
, proc_pid_sched_operations
),
3675 NOD("comm", S_IFREG
|S_IRUGO
|S_IWUSR
,
3676 &proc_tid_comm_inode_operations
,
3677 &proc_pid_set_comm_operations
, {}),
3678 #ifdef CONFIG_HAVE_ARCH_TRACEHOOK
3679 ONE("syscall", S_IRUSR
, proc_pid_syscall
),
3681 REG("cmdline", S_IRUGO
, proc_pid_cmdline_ops
),
3682 ONE("stat", S_IRUGO
, proc_tid_stat
),
3683 ONE("statm", S_IRUGO
, proc_pid_statm
),
3684 REG("maps", S_IRUGO
, proc_pid_maps_operations
),
3685 #ifdef CONFIG_PROC_CHILDREN
3686 REG("children", S_IRUGO
, proc_tid_children_operations
),
3689 REG("numa_maps", S_IRUGO
, proc_pid_numa_maps_operations
),
3691 REG("mem", S_IRUSR
|S_IWUSR
, proc_mem_operations
),
3692 LNK("cwd", proc_cwd_link
),
3693 LNK("root", proc_root_link
),
3694 LNK("exe", proc_exe_link
),
3695 REG("mounts", S_IRUGO
, proc_mounts_operations
),
3696 REG("mountinfo", S_IRUGO
, proc_mountinfo_operations
),
3697 #ifdef CONFIG_PROC_PAGE_MONITOR
3698 REG("clear_refs", S_IWUSR
, proc_clear_refs_operations
),
3699 REG("smaps", S_IRUGO
, proc_pid_smaps_operations
),
3700 REG("smaps_rollup", S_IRUGO
, proc_pid_smaps_rollup_operations
),
3701 REG("pagemap", S_IRUSR
, proc_pagemap_operations
),
3703 #ifdef CONFIG_SECURITY
3704 DIR("attr", S_IRUGO
|S_IXUGO
, proc_attr_dir_inode_operations
, proc_attr_dir_operations
),
3706 #ifdef CONFIG_KALLSYMS
3707 ONE("wchan", S_IRUGO
, proc_pid_wchan
),
3709 #ifdef CONFIG_STACKTRACE
3710 ONE("stack", S_IRUSR
, proc_pid_stack
),
3712 #ifdef CONFIG_SCHED_INFO
3713 ONE("schedstat", S_IRUGO
, proc_pid_schedstat
),
3715 #ifdef CONFIG_LATENCYTOP
3716 REG("latency", S_IRUGO
, proc_lstats_operations
),
3718 #ifdef CONFIG_PROC_PID_CPUSET
3719 ONE("cpuset", S_IRUGO
, proc_cpuset_show
),
3721 #ifdef CONFIG_CGROUPS
3722 ONE("cgroup", S_IRUGO
, proc_cgroup_show
),
3724 #ifdef CONFIG_PROC_CPU_RESCTRL
3725 ONE("cpu_resctrl_groups", S_IRUGO
, proc_resctrl_show
),
3727 ONE("oom_score", S_IRUGO
, proc_oom_score
),
3728 REG("oom_adj", S_IRUGO
|S_IWUSR
, proc_oom_adj_operations
),
3729 REG("oom_score_adj", S_IRUGO
|S_IWUSR
, proc_oom_score_adj_operations
),
3731 REG("loginuid", S_IWUSR
|S_IRUGO
, proc_loginuid_operations
),
3732 REG("sessionid", S_IRUGO
, proc_sessionid_operations
),
3734 #ifdef CONFIG_FAULT_INJECTION
3735 REG("make-it-fail", S_IRUGO
|S_IWUSR
, proc_fault_inject_operations
),
3736 REG("fail-nth", 0644, proc_fail_nth_operations
),
3738 #ifdef CONFIG_TASK_IO_ACCOUNTING
3739 ONE("io", S_IRUSR
, proc_tid_io_accounting
),
3741 #ifdef CONFIG_USER_NS
3742 REG("uid_map", S_IRUGO
|S_IWUSR
, proc_uid_map_operations
),
3743 REG("gid_map", S_IRUGO
|S_IWUSR
, proc_gid_map_operations
),
3744 REG("projid_map", S_IRUGO
|S_IWUSR
, proc_projid_map_operations
),
3745 REG("setgroups", S_IRUGO
|S_IWUSR
, proc_setgroups_operations
),
3747 #ifdef CONFIG_LIVEPATCH
3748 ONE("patch_state", S_IRUSR
, proc_pid_patch_state
),
3750 #ifdef CONFIG_PROC_PID_ARCH_STATUS
3751 ONE("arch_status", S_IRUGO
, proc_pid_arch_status
),
3753 #ifdef CONFIG_SECCOMP_CACHE_DEBUG
3754 ONE("seccomp_cache", S_IRUSR
, proc_pid_seccomp_cache
),
3757 ONE("ksm_merging_pages", S_IRUSR
, proc_pid_ksm_merging_pages
),
3758 ONE("ksm_stat", S_IRUSR
, proc_pid_ksm_stat
),
3762 static int proc_tid_base_readdir(struct file
*file
, struct dir_context
*ctx
)
3764 return proc_pident_readdir(file
, ctx
,
3765 tid_base_stuff
, ARRAY_SIZE(tid_base_stuff
));
3768 static struct dentry
*proc_tid_base_lookup(struct inode
*dir
, struct dentry
*dentry
, unsigned int flags
)
3770 return proc_pident_lookup(dir
, dentry
,
3772 tid_base_stuff
+ ARRAY_SIZE(tid_base_stuff
));
3775 static const struct file_operations proc_tid_base_operations
= {
3776 .read
= generic_read_dir
,
3777 .iterate_shared
= proc_tid_base_readdir
,
3778 .llseek
= generic_file_llseek
,
3781 static const struct inode_operations proc_tid_base_inode_operations
= {
3782 .lookup
= proc_tid_base_lookup
,
3783 .getattr
= pid_getattr
,
3784 .setattr
= proc_setattr
,
3787 static struct dentry
*proc_task_instantiate(struct dentry
*dentry
,
3788 struct task_struct
*task
, const void *ptr
)
3790 struct inode
*inode
;
3791 inode
= proc_pid_make_base_inode(dentry
->d_sb
, task
,
3792 S_IFDIR
| S_IRUGO
| S_IXUGO
);
3794 return ERR_PTR(-ENOENT
);
3796 inode
->i_op
= &proc_tid_base_inode_operations
;
3797 inode
->i_fop
= &proc_tid_base_operations
;
3798 inode
->i_flags
|= S_IMMUTABLE
;
3800 set_nlink(inode
, nlink_tid
);
3801 pid_update_inode(task
, inode
);
3803 d_set_d_op(dentry
, &pid_dentry_operations
);
3804 return d_splice_alias(inode
, dentry
);
3807 static struct dentry
*proc_task_lookup(struct inode
*dir
, struct dentry
* dentry
, unsigned int flags
)
3809 struct task_struct
*task
;
3810 struct task_struct
*leader
= get_proc_task(dir
);
3812 struct proc_fs_info
*fs_info
;
3813 struct pid_namespace
*ns
;
3814 struct dentry
*result
= ERR_PTR(-ENOENT
);
3819 tid
= name_to_int(&dentry
->d_name
);
3823 fs_info
= proc_sb_info(dentry
->d_sb
);
3824 ns
= fs_info
->pid_ns
;
3826 task
= find_task_by_pid_ns(tid
, ns
);
3828 get_task_struct(task
);
3832 if (!same_thread_group(leader
, task
))
3835 result
= proc_task_instantiate(dentry
, task
, NULL
);
3837 put_task_struct(task
);
3839 put_task_struct(leader
);
3845 * Find the first tid of a thread group to return to user space.
3847 * Usually this is just the thread group leader, but if the users
3848 * buffer was too small or there was a seek into the middle of the
3849 * directory we have more work todo.
3851 * In the case of a short read we start with find_task_by_pid.
3853 * In the case of a seek we start with the leader and walk nr
3856 static struct task_struct
*first_tid(struct pid
*pid
, int tid
, loff_t f_pos
,
3857 struct pid_namespace
*ns
)
3859 struct task_struct
*pos
, *task
;
3860 unsigned long nr
= f_pos
;
3862 if (nr
!= f_pos
) /* 32bit overflow? */
3866 task
= pid_task(pid
, PIDTYPE_PID
);
3870 /* Attempt to start with the tid of a thread */
3872 pos
= find_task_by_pid_ns(tid
, ns
);
3873 if (pos
&& same_thread_group(pos
, task
))
3877 /* If nr exceeds the number of threads there is nothing todo */
3878 if (nr
>= get_nr_threads(task
))
3881 /* If we haven't found our starting place yet start
3882 * with the leader and walk nr threads forward.
3884 for_each_thread(task
, pos
) {
3892 get_task_struct(pos
);
3899 * Find the next thread in the thread list.
3900 * Return NULL if there is an error or no next thread.
3902 * The reference to the input task_struct is released.
3904 static struct task_struct
*next_tid(struct task_struct
*start
)
3906 struct task_struct
*pos
= NULL
;
3908 if (pid_alive(start
)) {
3909 pos
= __next_thread(start
);
3911 get_task_struct(pos
);
3914 put_task_struct(start
);
3918 /* for the /proc/TGID/task/ directories */
3919 static int proc_task_readdir(struct file
*file
, struct dir_context
*ctx
)
3921 struct inode
*inode
= file_inode(file
);
3922 struct task_struct
*task
;
3923 struct pid_namespace
*ns
;
3926 if (proc_inode_is_dead(inode
))
3929 if (!dir_emit_dots(file
, ctx
))
3932 /* We cache the tgid value that the last readdir call couldn't
3933 * return and lseek resets it to 0.
3935 ns
= proc_pid_ns(inode
->i_sb
);
3936 tid
= (int)(intptr_t)file
->private_data
;
3937 file
->private_data
= NULL
;
3938 for (task
= first_tid(proc_pid(inode
), tid
, ctx
->pos
- 2, ns
);
3940 task
= next_tid(task
), ctx
->pos
++) {
3944 tid
= task_pid_nr_ns(task
, ns
);
3946 continue; /* The task has just exited. */
3947 len
= snprintf(name
, sizeof(name
), "%u", tid
);
3948 if (!proc_fill_cache(file
, ctx
, name
, len
,
3949 proc_task_instantiate
, task
, NULL
)) {
3950 /* returning this tgid failed, save it as the first
3951 * pid for the next readir call */
3952 file
->private_data
= (void *)(intptr_t)tid
;
3953 put_task_struct(task
);
3961 static int proc_task_getattr(struct mnt_idmap
*idmap
,
3962 const struct path
*path
, struct kstat
*stat
,
3963 u32 request_mask
, unsigned int query_flags
)
3965 struct inode
*inode
= d_inode(path
->dentry
);
3966 struct task_struct
*p
= get_proc_task(inode
);
3967 generic_fillattr(&nop_mnt_idmap
, request_mask
, inode
, stat
);
3970 stat
->nlink
+= get_nr_threads(p
);
3978 * proc_task_readdir() set @file->private_data to a positive integer
3979 * value, so casting that to u64 is safe. generic_llseek_cookie() will
3980 * set @cookie to 0, so casting to an int is safe. The WARN_ON_ONCE() is
3981 * here to catch any unexpected change in behavior either in
3982 * proc_task_readdir() or generic_llseek_cookie().
3984 static loff_t
proc_dir_llseek(struct file
*file
, loff_t offset
, int whence
)
3986 u64 cookie
= (u64
)(intptr_t)file
->private_data
;
3989 off
= generic_llseek_cookie(file
, offset
, whence
, &cookie
);
3990 WARN_ON_ONCE(cookie
> INT_MAX
);
3991 file
->private_data
= (void *)(intptr_t)cookie
; /* serialized by f_pos_lock */
3995 static const struct inode_operations proc_task_inode_operations
= {
3996 .lookup
= proc_task_lookup
,
3997 .getattr
= proc_task_getattr
,
3998 .setattr
= proc_setattr
,
3999 .permission
= proc_pid_permission
,
4002 static const struct file_operations proc_task_operations
= {
4003 .read
= generic_read_dir
,
4004 .iterate_shared
= proc_task_readdir
,
4005 .llseek
= proc_dir_llseek
,
4008 void __init
set_proc_pid_nlink(void)
4010 nlink_tid
= pid_entry_nlink(tid_base_stuff
, ARRAY_SIZE(tid_base_stuff
));
4011 nlink_tgid
= pid_entry_nlink(tgid_base_stuff
, ARRAY_SIZE(tgid_base_stuff
));