1 /* MIB service - proc.c - functionality based on service process tables */
2 /* Eventually, the CTL_PROC subtree might end up here as well. */
7 #include <minix/sysinfo.h>
9 #include <machine/archtypes.h>
10 #include "kernel/proc.h"
11 #include "servers/pm/mproc.h"
12 #include "servers/vfs/const.h"
13 #include "servers/vfs/fproc.h"
15 typedef struct proc ixfer_proc_t
;
16 typedef struct mproc ixfer_mproc_t
;
18 static ixfer_proc_t proc_tab
[NR_TASKS
+ NR_PROCS
];
19 static ixfer_mproc_t mproc_tab
[NR_PROCS
];
20 static struct fproc_light fproc_tab
[NR_PROCS
];
23 * The number of processes added to the current number of processes when doing
24 * a size estimation, so that the actual data retrieval does not end up with
25 * too little space if new processes have forked between the two calls. We do
26 * a process table update only once per clock tick, which means that typically
27 * no update will take place between the user process's size estimation request
28 * and its subsequent data retrieval request. On the other hand, if we do
29 * update process tables in between, quite a bit might have changed.
33 #define HASH_SLOTS (NR_PROCS / 4) /* expected nr. of processes in use */
35 static int hash_tab
[HASH_SLOTS
]; /* hash table mapping from PID.. */
36 static int hnext_tab
[NR_PROCS
]; /* ..to PM process slot */
38 static clock_t tabs_updated
= 0; /* when the tables were last updated */
39 static int tabs_valid
= TRUE
; /* FALSE if obtaining tables failed */
42 * Update the process tables by pulling in new copies from the kernel, PM, and
43 * VFS, but only every so often and only if it has not failed before. Return
44 * TRUE iff the tables are now valid.
51 int r
, kslot
, mslot
, hslot
;
54 * If retrieving the tables failed at some point, do not keep trying
55 * all the time. Such a failure is very unlikely to be transient.
57 if (tabs_valid
== FALSE
)
61 * Update the tables once per clock tick at most. The update operation
62 * is rather heavy, transferring several hundreds of kilobytes between
63 * servers. Userland should be able to live with information that is
64 * outdated by at most one clock tick.
68 if (tabs_updated
!= 0 && tabs_updated
== now
)
71 /* Perform an actual update now. */
74 /* Retrieve and check the kernel process table. */
75 if ((r
= sys_getproctab(proc_tab
)) != OK
) {
76 printf("MIB: unable to obtain kernel process table (%d)\n", r
);
81 for (kslot
= 0; kslot
< NR_TASKS
+ NR_PROCS
; kslot
++) {
82 if (proc_tab
[kslot
].p_magic
!= PMAGIC
) {
83 printf("MIB: kernel process table mismatch\n");
89 /* Retrieve and check the PM process table. */
90 r
= getsysinfo(PM_PROC_NR
, SI_PROC_TAB
, mproc_tab
, sizeof(mproc_tab
));
92 printf("MIB: unable to obtain PM process table (%d)\n", r
);
97 for (mslot
= 0; mslot
< NR_PROCS
; mslot
++) {
98 if (mproc_tab
[mslot
].mp_magic
!= MP_MAGIC
) {
99 printf("MIB: PM process table mismatch\n");
105 /* Retrieve an extract of the VFS process table. */
106 r
= getsysinfo(VFS_PROC_NR
, SI_PROCLIGHT_TAB
, fproc_tab
,
109 printf("MIB: unable to obtain VFS process table (%d)\n", r
);
118 * Build a hash table mapping from process IDs to slot numbers, for
119 * fast access. TODO: decide if this is better done on demand only.
121 for (hslot
= 0; hslot
< HASH_SLOTS
; hslot
++)
122 hash_tab
[hslot
] = NO_SLOT
;
124 for (mslot
= 0; mslot
< NR_PROCS
; mslot
++) {
125 if (mproc_tab
[mslot
].mp_flags
& IN_USE
) {
126 if ((pid
= mproc_tab
[mslot
].mp_pid
) <= 0)
129 hslot
= mproc_tab
[mslot
].mp_pid
% HASH_SLOTS
;
131 hnext_tab
[mslot
] = hash_tab
[hslot
];
132 hash_tab
[hslot
] = mslot
;
140 * Return the PM slot number for the given PID, or NO_SLOT if the PID is not in
148 /* PID 0 identifies the kernel; checking this is up to the caller. */
152 for (mslot
= hash_tab
[pid
% HASH_SLOTS
]; mslot
!= NO_SLOT
;
153 mslot
= hnext_tab
[mslot
])
154 if (mproc_tab
[mslot
].mp_pid
== pid
)
161 * Store the given number of clock ticks as a timeval structure.
164 ticks_to_timeval(struct timeval
* tv
, clock_t ticks
)
170 tv
->tv_sec
= ticks
/ hz
;
171 tv
->tv_usec
= (long)((ticks
% hz
) * 1000000ULL / hz
);
175 * Generate a wchan message text for the cases that the process is blocked on
176 * IPC with another process, of which the endpoint is given as 'endpt' here.
177 * The name of the other process is to be stored in 'wmesg', which is a buffer
178 * of size 'wmsz'. The result should be null terminated. If 'ipc' is set, the
179 * process is blocked on a direct IPC call, in which case the name of the other
180 * process is enclosed in parentheses. If 'ipc' is not set, the call is made
181 * indirectly through VFS, and the name of the other process should not be
182 * enclosed in parentheses. If no name can be obtained, we use the endpoint of
183 * the other process instead.
186 fill_wmesg(char * wmesg
, size_t wmsz
, endpoint_t endpt
, int ipc
)
202 mslot
= _ENDPOINT_P(endpt
);
203 if (mslot
>= -NR_TASKS
&& mslot
< NR_PROCS
&&
204 (mslot
< 0 || (mproc_tab
[mslot
].mp_flags
& IN_USE
)))
205 name
= proc_tab
[NR_TASKS
+ mslot
].p_name
;
211 snprintf(wmesg
, wmsz
, "%s%s%s",
212 ipc
? "(" : "", name
, ipc
? ")" : "");
214 snprintf(wmesg
, wmsz
, "%s%d%s",
215 ipc
? "(" : "", endpt
, ipc
? ")" : "");
219 * Return the LWP status of a process, along with additional information in
220 * case the process is sleeping (LSSLEEP): a wchan value and text to indicate
221 * what the process is sleeping on, and possibly a flag field modification to
222 * indicate that the sleep is interruptible.
225 get_lwp_stat(int mslot
, uint64_t * wcptr
, char * wmptr
, size_t wmsz
,
229 struct fproc_light
*fp
;
235 mp
= &mproc_tab
[mslot
];
236 fp
= &fproc_tab
[mslot
];
237 kp
= &proc_tab
[NR_TASKS
+ mslot
];
240 * First cover all the cases that the process is not sleeping. In
241 * those cases, we need not return additional sleep information either.
243 if (mp
->mp_flags
& (TRACE_ZOMBIE
| ZOMBIE
))
246 if (mp
->mp_flags
& EXITING
)
249 if ((mp
->mp_flags
& TRACE_STOPPED
) || RTS_ISSET(kp
, RTS_P_STOP
))
252 if (proc_is_runnable(kp
))
256 * The process is sleeping. In that case, we must also figure out why,
257 * and return an appropriate wchan value and human-readable wmesg text.
259 * The process can be blocked on either a known sleep state in PM or
260 * VFS, or otherwise on IPC communication with another process, or
261 * otherwise on a kernel RTS flag. In each case, decide what to use as
262 * wchan value and wmesg text, and whether the sleep is interruptible.
264 * The wchan value should be unique for the sleep reason. We use its
265 * lower eight bits to indicate a class:
267 * 0x01 = kerel RTS block
271 * 0xff = blocked on process
272 * The upper bits are used for class-specific information. The actual
273 * value does not really matter, as long as it is nonzero and there is
274 * no overlap between the different values.
280 * First see if the process is marked as blocked in the tables of PM or
281 * VFS. Such a block reason is always an interruptible sleep. Note
282 * that we do not use the kernel table at all in this case: each of the
283 * three tables is consistent within itself, but not necessarily
284 * consistent with any of the other tables, so we avoid internal
285 * mismatches if we can.
287 if (mp
->mp_flags
& WAITING
) {
290 } else if (mp
->mp_flags
& SIGSUSPENDED
) {
293 } else if (fp
->fpl_blocked_on
!= FP_BLOCKED_ON_NONE
) {
294 wchan
= (fp
->fpl_blocked_on
<< 8) | 0x03;
295 switch (fp
->fpl_blocked_on
) {
296 case FP_BLOCKED_ON_PIPE
:
299 case FP_BLOCKED_ON_FLOCK
:
302 case FP_BLOCKED_ON_POPEN
:
305 case FP_BLOCKED_ON_SELECT
:
308 case FP_BLOCKED_ON_CDEV
:
309 case FP_BLOCKED_ON_SDEV
:
311 * Add the task (= character or socket driver) endpoint
312 * to the wchan value, and use the driver's process
313 * name, without parentheses, as wmesg text.
315 wchan
|= (uint64_t)fp
->fpl_task
<< 16;
316 fill_wmesg(wmptr
, wmsz
, fp
->fpl_task
, FALSE
/*ipc*/);
319 /* A newly added flag we don't yet know about? */
326 if (wmesg
!= NULL
) /* NULL means "already set" here */
327 strlcpy(wmptr
, wmesg
, wmsz
);
332 * See if the process is blocked on sending or receiving. If not, then
333 * use one of the kernel RTS flags as reason.
335 endpt
= P_BLOCKEDON(kp
);
339 /* This is really just aesthetics. */
345 * The process is not running, but also not blocked on IPC with
346 * another process. This means it must be stopped on a kernel
349 wchan
= ((uint64_t)kp
->p_rts_flags
<< 8) | 0x01;
350 if (RTS_ISSET(kp
, RTS_PROC_STOP
))
352 else if (RTS_ISSET(kp
, RTS_SIGNALED
) ||
353 RTS_ISSET(kp
, RTS_SIGNALED
))
355 else if (RTS_ISSET(kp
, RTS_NO_PRIV
))
357 else if (RTS_ISSET(kp
, RTS_PAGEFAULT
) ||
358 RTS_ISSET(kp
, RTS_VMREQTARGET
))
360 else if (RTS_ISSET(kp
, RTS_NO_QUANTUM
))
367 * If the process is blocked receiving from ANY, mark it as
368 * being in an interruptible sleep. This looks nicer, even
369 * though "interruptible" is not applicable to services at all.
376 * If at this point wchan is still zero, the process is blocked sending
377 * or receiving. Use a wchan value based on the target endpoint, and
378 * use "(procname)" as wmesg text.
381 *wcptr
= ((uint64_t)endpt
<< 8) | 0xff;
382 fill_wmesg(wmptr
, wmsz
, endpt
, TRUE
/*ipc*/);
385 if (wmesg
!= NULL
) /* NULL means "already set" here */
386 strlcpy(wmptr
, wmesg
, wmsz
);
394 * Fill the part of a LWP structure that is common between kernel tasks and
395 * user processes. Also return a CPU estimate in 'estcpu', because we generate
396 * the value as a side effect here, and the LWP structure has no estcpu field.
399 fill_lwp_common(struct kinfo_lwp
* l
, int kslot
, uint32_t * estcpu
)
406 kp
= &proc_tab
[kslot
];
412 * We use the process endpoint as the LWP ID. Not only does this allow
413 * users to obtain process endpoints with "ps -s" (thus replacing the
414 * MINIX3 ps(1)'s "ps -E"), but if we ever do implement kernel threads,
415 * this is probably still going to be accurate.
417 l
->l_lid
= kp
->p_endpoint
;
420 * The time during which the process has not been swapped in or out is
421 * not applicable for us, and thus, we set it to the time the process
422 * has been running (in seconds). This value is relevant mostly for
423 * ps(1)'s CPU usage correction for processes that have just started.
425 if (kslot
>= NR_TASKS
)
426 l
->l_swtime
= uptime
- mproc_tab
[kslot
- NR_TASKS
].mp_started
;
428 l
->l_swtime
= uptime
;
432 * Sleep (dequeue) times are not maintained for kernel tasks, so
433 * pretend they are never asleep (which is pretty accurate).
435 if (kslot
< NR_TASKS
)
438 l
->l_slptime
= (uptime
- kp
->p_dequeued
) / hz
;
440 l
->l_priority
= kp
->p_priority
;
441 l
->l_usrpri
= kp
->p_priority
;
442 l
->l_cpuid
= kp
->p_cpu
;
443 ticks_to_timeval(&tv
, kp
->p_user_time
+ kp
->p_sys_time
);
444 l
->l_rtime_sec
= tv
.tv_sec
;
445 l
->l_rtime_usec
= tv
.tv_usec
;
448 * Obtain CPU usage percentages and estimates through library code
449 * shared between the kernel and this service; see its source for
450 * details. We note that the produced estcpu value is rather different
451 * from the one produced by NetBSD, but this should not be a problem.
453 l
->l_pctcpu
= cpuavg_getstats(&kp
->p_cpuavg
, &l
->l_cpticks
, estcpu
,
458 * Fill a LWP structure for a kernel task. Each kernel task has its own LWP,
459 * and all of them have negative PIDs.
462 fill_lwp_kern(struct kinfo_lwp
* l
, int kslot
)
466 memset(l
, 0, sizeof(*l
));
468 l
->l_flag
= L_INMEM
| L_SINTR
| L_SYSTEM
;
470 l
->l_pid
= kslot
- NR_TASKS
;
473 * When showing LWP entries, ps(1) uses the process name rather than
474 * the LWP name. All kernel tasks are therefore shown as "[kernel]"
475 * anyway. We use the wmesg field to show the actual kernel task name.
477 l
->l_wchan
= ((uint64_t)(l
->l_pid
) << 8) | 0x00;
478 strlcpy(l
->l_wmesg
, proc_tab
[kslot
].p_name
, sizeof(l
->l_wmesg
));
479 strlcpy(l
->l_name
, "kernel", sizeof(l
->l_name
));
481 fill_lwp_common(l
, kslot
, &estcpu
);
485 * Fill a LWP structure for a user process.
488 fill_lwp_user(struct kinfo_lwp
* l
, int mslot
)
493 memset(l
, 0, sizeof(*l
));
495 mp
= &mproc_tab
[mslot
];
498 l
->l_stat
= get_lwp_stat(mslot
, &l
->l_wchan
, l
->l_wmesg
,
499 sizeof(l
->l_wmesg
), &l
->l_flag
);
500 l
->l_pid
= mp
->mp_pid
;
501 strlcpy(l
->l_name
, mp
->mp_name
, sizeof(l
->l_name
));
503 fill_lwp_common(l
, NR_TASKS
+ mslot
, &estcpu
);
507 * Implementation of CTL_KERN KERN_LWP.
510 mib_kern_lwp(struct mib_call
* call
, struct mib_node
* node __unused
,
511 struct mib_oldp
* oldp
, struct mib_newp
* newp __unused
)
513 struct kinfo_lwp lwp
;
518 int r
, elsz
, elmax
, kslot
, mslot
, last_mslot
;
520 if (call
->call_namelen
!= 3)
523 pid
= (pid_t
)call
->call_name
[0];
524 elsz
= call
->call_name
[1];
525 elmax
= call
->call_name
[2]; /* redundant with the given oldlen.. */
527 if (pid
< -1 || elsz
<= 0 || elmax
< 0)
530 if (!update_tables())
534 copysz
= MIN((size_t)elsz
, sizeof(lwp
));
537 * We model kernel tasks as LWP threads of the kernel (with PID 0).
538 * Modeling the kernel tasks as processes with negative PIDs, like
539 * ProcFS does, conflicts with the KERN_LWP API here: a PID of -1
540 * indicates that the caller wants a full listing of LWPs.
543 for (kslot
= 0; kslot
< NR_TASKS
; kslot
++) {
544 if (mib_inrange(oldp
, off
) && elmax
> 0) {
545 fill_lwp_kern(&lwp
, kslot
);
546 if ((r
= mib_copyout(oldp
, off
, &lwp
,
554 /* No need to add extra space here: NR_TASKS is static. */
560 * With PID 0 out of the way: the user requested the LWP for either a
561 * specific user process (pid > 0), or for all processes (pid < 0).
564 if ((mslot
= get_mslot(pid
)) == NO_SLOT
||
565 (mproc_tab
[mslot
].mp_flags
& (TRACE_ZOMBIE
| ZOMBIE
)))
570 last_mslot
= NR_PROCS
- 1;
573 for (; mslot
<= last_mslot
; mslot
++) {
574 mp
= &mproc_tab
[mslot
];
576 if ((mp
->mp_flags
& (IN_USE
| TRACE_ZOMBIE
| ZOMBIE
)) !=
580 if (mib_inrange(oldp
, off
) && elmax
> 0) {
581 fill_lwp_user(&lwp
, mslot
);
582 if ((r
= mib_copyout(oldp
, off
, &lwp
, copysz
)) < 0)
589 if (oldp
== NULL
&& pid
< 0)
590 off
+= EXTRA_PROCS
* elsz
;
597 * Fill the part of a process structure that is common between kernel tasks and
601 fill_proc2_common(struct kinfo_proc2
* p
, int kslot
)
603 struct vm_usage_info vui
;
608 kp
= &proc_tab
[kslot
];
611 * Much of the information in the LWP structure also ends up in the
612 * process structure. In order to avoid duplication of some important
613 * code, first generate LWP values and then copy it them into the
616 memset(&l
, 0, sizeof(l
));
617 fill_lwp_common(&l
, kslot
, &p
->p_estcpu
);
619 /* Obtain memory usage information from VM. Ignore failures. */
620 memset(&vui
, 0, sizeof(vui
));
621 (void)vm_info_usage(kp
->p_endpoint
, &vui
);
623 ticks_to_timeval(&tv
, kp
->p_user_time
+ kp
->p_sys_time
);
624 p
->p_rtime_sec
= l
.l_rtime_sec
;
625 p
->p_rtime_usec
= l
.l_rtime_usec
;
626 p
->p_cpticks
= l
.l_cpticks
;
627 p
->p_pctcpu
= l
.l_pctcpu
;
628 p
->p_swtime
= l
.l_swtime
;
629 p
->p_slptime
= l
.l_slptime
;
630 p
->p_uticks
= kp
->p_user_time
;
631 p
->p_sticks
= kp
->p_sys_time
;
632 /* TODO: p->p_iticks */
633 ticks_to_timeval(&tv
, kp
->p_user_time
);
634 p
->p_uutime_sec
= tv
.tv_sec
;
635 p
->p_uutime_usec
= tv
.tv_usec
;
636 ticks_to_timeval(&tv
, kp
->p_sys_time
);
637 p
->p_ustime_sec
= tv
.tv_sec
;
638 p
->p_ustime_usec
= tv
.tv_usec
;
640 p
->p_priority
= l
.l_priority
;
641 p
->p_usrpri
= l
.l_usrpri
;
643 p
->p_vm_rssize
= howmany(vui
.vui_total
, PAGE_SIZE
);
644 p
->p_vm_vsize
= howmany(vui
.vui_virtual
, PAGE_SIZE
);
645 p
->p_vm_msize
= howmany(vui
.vui_mvirtual
, PAGE_SIZE
);
647 p
->p_uru_maxrss
= vui
.vui_maxrss
;
648 p
->p_uru_minflt
= vui
.vui_minflt
;
649 p
->p_uru_majflt
= vui
.vui_majflt
;
651 p
->p_cpuid
= l
.l_cpuid
;
655 * Fill a process structure for the kernel pseudo-process (with PID 0).
658 fill_proc2_kern(struct kinfo_proc2
* p
)
661 memset(p
, 0, sizeof(*p
));
663 p
->p_flag
= L_INMEM
| L_SYSTEM
| L_SINTR
;
668 /* Use the KERNEL task wchan, for consistency between ps and top. */
669 p
->p_wchan
= ((uint64_t)KERNEL
<< 8) | 0x00;
670 strlcpy(p
->p_wmesg
, "kernel", sizeof(p
->p_wmesg
));
672 strlcpy(p
->p_comm
, "kernel", sizeof(p
->p_comm
));
673 p
->p_realflag
= P_INMEM
| P_SYSTEM
| P_SINTR
;
674 p
->p_realstat
= SACTIVE
;
675 p
->p_nlwps
= NR_TASKS
;
678 * By using the KERNEL slot here, the kernel process will get a proper
681 fill_proc2_common(p
, KERNEL
+ NR_TASKS
);
685 * Fill a process structure for a user process.
688 fill_proc2_user(struct kinfo_proc2
* p
, int mslot
)
691 struct fproc_light
*fp
;
695 int i
, r
, kslot
, zombie
;
697 memset(p
, 0, sizeof(*p
));
699 if ((r
= getuptime(NULL
, NULL
, &boottime
)) != OK
)
700 panic("getuptime failed: %d", r
);
702 kslot
= NR_TASKS
+ mslot
;
703 mp
= &mproc_tab
[mslot
];
704 fp
= &fproc_tab
[mslot
];
706 zombie
= (mp
->mp_flags
& (TRACE_ZOMBIE
| ZOMBIE
));
707 tty
= (!zombie
) ? fp
->fpl_tty
: NO_DEV
;
711 p
->p_eflag
|= EPROC_CTTY
;
712 if (mp
->mp_pid
== mp
->mp_procgrp
) /* TODO: job control support */
713 p
->p_eflag
|= EPROC_SLEADER
;
715 p
->p_exitsig
= SIGCHLD
; /* TODO */
718 if (mp
->mp_flags
& TAINTED
)
719 p
->p_flag
|= P_SUGID
;
720 if (mp
->mp_tracer
!= NO_TRACER
)
721 p
->p_flag
|= P_TRACED
;
723 p
->p_flag
|= P_CONTROLT
;
724 p
->p_pid
= mp
->mp_pid
;
725 if (mp
->mp_parent
>= 0 && mp
->mp_parent
< NR_PROCS
)
726 p
->p_ppid
= mproc_tab
[mp
->mp_parent
].mp_pid
;
727 p
->p_sid
= mp
->mp_procgrp
; /* TODO: job control supported */
728 p
->p__pgid
= mp
->mp_procgrp
;
729 p
->p_tpgid
= (tty
!= NO_DEV
) ? mp
->mp_procgrp
: 0;
730 p
->p_uid
= mp
->mp_effuid
;
731 p
->p_ruid
= mp
->mp_realuid
;
732 p
->p_gid
= mp
->mp_effgid
;
733 p
->p_rgid
= mp
->mp_realgid
;
734 p
->p_ngroups
= MIN(mp
->mp_ngroups
, KI_NGROUPS
);
735 for (i
= 0; i
< p
->p_ngroups
; i
++)
736 p
->p_groups
[i
] = mp
->mp_sgroups
[i
];
738 memcpy(&p
->p_siglist
, &mp
->mp_sigpending
, sizeof(p
->p_siglist
));
739 memcpy(&p
->p_sigmask
, &mp
->mp_sigmask
, sizeof(p
->p_sigmask
));
740 memcpy(&p
->p_sigcatch
, &mp
->mp_catch
, sizeof(p
->p_sigcatch
));
741 memcpy(&p
->p_sigignore
, &mp
->mp_ignore
, sizeof(p
->p_sigignore
));
742 p
->p_nice
= mp
->mp_nice
+ NZERO
;
743 strlcpy(p
->p_comm
, mp
->mp_name
, sizeof(p
->p_comm
));
745 ticks_to_timeval(&tv
, mp
->mp_started
);
746 p
->p_ustart_sec
= boottime
+ tv
.tv_sec
;
747 p
->p_ustart_usec
= tv
.tv_usec
;
748 /* TODO: other rusage fields */
749 ticks_to_timeval(&tv
, mp
->mp_child_utime
+ mp
->mp_child_stime
);
750 p
->p_uctime_sec
= tv
.tv_sec
;
751 p
->p_uctime_usec
= tv
.tv_usec
;
752 p
->p_realflag
= p
->p_flag
;
753 p
->p_nlwps
= (zombie
) ? 0 : 1;
754 p
->p_svuid
= mp
->mp_svuid
;
755 p
->p_svgid
= mp
->mp_svgid
;
757 p
->p_stat
= get_lwp_stat(mslot
, &p
->p_wchan
, p
->p_wmesg
,
758 sizeof(p
->p_wmesg
), &p
->p_flag
);
762 p
->p_realstat
= SACTIVE
;
766 p
->p_realstat
= SACTIVE
;
767 if (p
->p_flag
& L_SINTR
)
768 p
->p_realflag
|= P_SINTR
;
771 p
->p_realstat
= SSTOP
;
774 p
->p_realstat
= SZOMB
;
777 p
->p_stat
= LSZOMB
; /* ps(1) STAT does not know LSDEAD */
778 p
->p_realstat
= SDEAD
;
785 fill_proc2_common(p
, kslot
);
789 * Implementation of CTL_KERN KERN_PROC2.
792 mib_kern_proc2(struct mib_call
* call
, struct mib_node
* node __unused
,
793 struct mib_oldp
* oldp
, struct mib_newp
* newp __unused
)
795 struct kinfo_proc2 proc2
;
800 int r
, req
, arg
, elsz
, elmax
, kmatch
, zombie
, mslot
;
802 if (call
->call_namelen
!= 4)
805 req
= call
->call_name
[0];
806 arg
= call
->call_name
[1];
807 elsz
= call
->call_name
[2];
808 elmax
= call
->call_name
[3]; /* redundant with the given oldlen.. */
811 * The kernel is special, in that it does not have a slot in the PM or
812 * VFS tables. As such, it is dealt with separately. While checking
813 * arguments, we might as well check whether the kernel is matched.
820 case KERN_PROC_SESSION
:
829 kmatch
= ((dev_t
)arg
== KERN_PROC_TTY_NODEV
);
835 if (elsz
<= 0 || elmax
< 0)
838 if (!update_tables())
842 copysz
= MIN((size_t)elsz
, sizeof(proc2
));
845 if (mib_inrange(oldp
, off
) && elmax
> 0) {
846 fill_proc2_kern(&proc2
);
847 if ((r
= mib_copyout(oldp
, off
, &proc2
, copysz
)) < 0)
854 for (mslot
= 0; mslot
< NR_PROCS
; mslot
++) {
855 mp
= &mproc_tab
[mslot
];
857 if (!(mp
->mp_flags
& IN_USE
))
862 if ((pid_t
)arg
!= mp
->mp_pid
)
865 case KERN_PROC_SESSION
: /* TODO: job control support */
867 if ((pid_t
)arg
!= mp
->mp_procgrp
)
871 if ((dev_t
)arg
== KERN_PROC_TTY_REVOKE
)
872 continue; /* TODO: revoke(2) support */
873 /* Do not access the fproc_tab slot of zombies. */
874 zombie
= (mp
->mp_flags
& (TRACE_ZOMBIE
| ZOMBIE
));
875 tty
= (zombie
) ? fproc_tab
[mslot
].fpl_tty
: NO_DEV
;
876 if ((dev_t
)arg
== KERN_PROC_TTY_NODEV
) {
879 } else if ((dev_t
)arg
== NO_DEV
|| (dev_t
)arg
!= tty
)
883 if ((uid_t
)arg
!= mp
->mp_effuid
)
887 if ((uid_t
)arg
!= mp
->mp_realuid
)
891 if ((gid_t
)arg
!= mp
->mp_effgid
)
895 if ((gid_t
)arg
!= mp
->mp_realgid
)
900 if (mib_inrange(oldp
, off
) && elmax
> 0) {
901 fill_proc2_user(&proc2
, mslot
);
902 if ((r
= mib_copyout(oldp
, off
, &proc2
, copysz
)) < 0)
909 if (oldp
== NULL
&& req
!= KERN_PROC_PID
)
910 off
+= EXTRA_PROCS
* elsz
;
916 * Implementation of CTL_KERN KERN_PROC_ARGS.
919 mib_kern_proc_args(struct mib_call
* call
, struct mib_node
* node __unused
,
920 struct mib_oldp
* oldp
, struct mib_newp
* newp __unused
)
922 char vbuf
[PAGE_SIZE
], sbuf
[PAGE_SIZE
], obuf
[PAGE_SIZE
];
923 struct ps_strings pss
;
925 char *buf
, *p
, *q
, *pptr
;
926 vir_bytes vaddr
, vpage
, spage
, paddr
, ppage
;
927 size_t max
, off
, olen
, oleft
, oldlen
, bytes
, pleft
;
928 unsigned int copybudget
;
930 int req
, mslot
, count
, aborted
, ended
;
933 if (call
->call_namelen
!= 2)
936 pid
= call
->call_name
[0];
937 req
= call
->call_name
[1];
942 case KERN_PROC_NARGV
:
949 if (!update_tables())
952 if ((mslot
= get_mslot(pid
)) == NO_SLOT
)
954 mp
= &mproc_tab
[mslot
];
955 if (mp
->mp_flags
& (TRACE_ZOMBIE
| ZOMBIE
))
958 /* We can return the count field size without copying in any data. */
959 if (oldp
== NULL
&& (req
== KERN_PROC_NARGV
|| req
== KERN_PROC_NENV
))
960 return sizeof(count
);
962 if (sys_datacopy(mp
->mp_endpoint
,
963 mp
->mp_frame_addr
+ mp
->mp_frame_len
- sizeof(pss
),
964 SELF
, (vir_bytes
)&pss
, sizeof(pss
)) != OK
)
968 * Determine the upper size limit of the requested data. Not only may
969 * the size never exceed ARG_MAX, it may also not exceed the frame
970 * length as given in its original exec call. In fact, the frame
971 * length should be substantially larger: all strings for both the
972 * arguments and the environment are in there, along with other stuff,
973 * and there must be no overlap between strings. It is possible that
974 * the application called setproctitle(3), in which case the ps_strings
975 * pointers refer to data outside the frame altogether. However, this
976 * data should not exceed 2048 bytes, and we cover this by rounding up
977 * the frame length to a multiple of the page size. Anyhow, NetBSD
978 * blindly returns ARG_MAX when asked for a size estimate, so with this
979 * maximum we are already quite a bit more accurate.
981 max
= roundup(MIN(mp
->mp_frame_len
, ARG_MAX
), PAGE_SIZE
);
984 case KERN_PROC_NARGV
:
985 count
= pss
.ps_nargvstr
;
986 return mib_copyout(oldp
, 0, &count
, sizeof(count
));
988 count
= pss
.ps_nenvstr
;
989 return mib_copyout(oldp
, 0, &count
, sizeof(count
));
993 vaddr
= (vir_bytes
)pss
.ps_argvstr
;
994 count
= pss
.ps_nargvstr
;
999 vaddr
= (vir_bytes
)pss
.ps_envstr
;
1000 count
= pss
.ps_nenvstr
;
1005 * Go through the strings. Copy in entire, machine-aligned pages at
1006 * once, in the hope that all data is stored consecutively, which it
1007 * should be: we expect that the vector is followed by the strings, and
1008 * that the strings are stored in order of vector reference. We keep
1009 * up to two pages with copied-in data: one for the vector, and
1010 * optionally one for string data. In addition, we keep one page with
1011 * data to be copied out, so that we do not cause a lot of copy
1012 * overhead for short strings.
1014 * We stop whenever any of the following conditions are met:
1015 * - copying in data from the target process fails for any reason;
1016 * - we have processed the last index ('count') into the vector;
1017 * - the current vector element is a NULL pointer;
1018 * - the requested number of output bytes ('oldlen') has been reached;
1019 * - the maximum number of output bytes ('max') has been reached;
1020 * - the number of page copy-ins exceeds an estimated threshold;
1021 * - copying out data fails for any reason (we then return the error).
1023 * We limit the number of page copy-ins because otherwise a rogue
1024 * process could create an argument vector consisting of only two-byte
1025 * strings that all span two pages, causing us to copy up to 1GB of
1026 * data with the current ARG_MAX value of 256K. No reasonable vector
1027 * should cause more than (ARG_MAX / PAGE_SIZE) page copies for
1028 * strings; we are nice enough to allow twice that. Vector copies do
1029 * not count, as they are linear anyway.
1031 * Unlike every other sysctl(2) call, we are supposed to truncate the
1032 * resulting size (the returned 'oldlen') to the requested size (the
1033 * given 'oldlen') *and* return the resulting size, rather than ENOMEM
1034 * and the real size. Unfortunately, libkvm actually relies on this.
1036 * Generally speaking, upon failure we just return a truncated result.
1037 * In case of truncation, the data we copy out need not be null
1038 * terminated. It is up to userland to process the data correctly.
1040 if (trunc_page(vaddr
) == 0 || vaddr
% sizeof(char *) != 0)
1047 oldlen
= mib_getoldlen(oldp
);
1051 copybudget
= (ARG_MAX
/ PAGE_SIZE
) * 2;
1056 while (count
> 0 && off
+ olen
< oldlen
&& !aborted
) {
1058 * Start by fetching the page containing the current vector
1059 * element, if needed. We could limit the fetch to the vector
1060 * size, but our hope is that for the simple cases, the strings
1061 * are on the remainder of the same page, so we save a copy
1062 * call. TODO: since the strings should follow the vector, we
1063 * could start the copy at the base of the vector.
1065 if (trunc_page(vaddr
) != vpage
) {
1066 vpage
= trunc_page(vaddr
);
1067 if (sys_datacopy(mp
->mp_endpoint
, vpage
, SELF
,
1068 (vir_bytes
)vbuf
, PAGE_SIZE
) != OK
)
1072 /* Get the current vector element, pointing to a string. */
1073 memcpy(&pptr
, &vbuf
[vaddr
- vpage
], sizeof(pptr
));
1074 paddr
= (vir_bytes
)pptr
;
1075 ppage
= trunc_page(paddr
);
1079 /* Fetch the string itself, one page at a time at most. */
1082 * See if the string pointer falls inside either the
1083 * vector page or the previously fetched string page
1084 * (if any). If not, fetch a string page.
1086 if (ppage
== vpage
) {
1088 } else if (ppage
== spage
) {
1091 if (--copybudget
== 0) {
1096 if (sys_datacopy(mp
->mp_endpoint
, spage
, SELF
,
1097 (vir_bytes
)sbuf
, PAGE_SIZE
) != OK
) {
1105 * We now have a string fragment in a buffer. See if
1106 * the string is null terminated. If not, all the data
1107 * up to the buffer end is part of the string, and the
1108 * string continues on the next page.
1110 p
= &buf
[paddr
- ppage
];
1111 pleft
= PAGE_SIZE
- (paddr
- ppage
);
1114 if ((q
= memchr(p
, '\0', pleft
)) != NULL
) {
1115 bytes
= (size_t)(q
- p
+ 1);
1116 assert(bytes
<= pleft
);
1123 /* Limit the result to the requested length. */
1124 if (off
+ olen
+ bytes
> oldlen
)
1125 bytes
= oldlen
- off
- olen
;
1128 * Add 'bytes' bytes from string pointer 'p' to the
1129 * output buffer, copying out its contents to userland
1130 * if it has filled up.
1132 if (olen
+ bytes
> sizeof(obuf
)) {
1133 oleft
= sizeof(obuf
) - olen
;
1134 memcpy(&obuf
[olen
], p
, oleft
);
1136 if ((r
= mib_copyout(oldp
, off
, obuf
,
1139 off
+= sizeof(obuf
);
1146 memcpy(&obuf
[olen
], p
, bytes
);
1151 * Continue as long as we have not yet found the string
1152 * end, and we have not yet filled the output buffer.
1155 assert(trunc_page(paddr
) == paddr
);
1157 } while (!ended
&& off
+ olen
< oldlen
);
1159 vaddr
+= sizeof(char *);
1163 /* Copy out any remainder of the output buffer. */
1165 if ((r
= mib_copyout(oldp
, off
, obuf
, olen
)) < 0)
1170 assert(off
<= oldlen
);
1175 * Implementation of CTL_MINIX MINIX_PROC PROC_LIST.
1178 mib_minix_proc_list(struct mib_call
* call __unused
,
1179 struct mib_node
* node __unused
, struct mib_oldp
* oldp
,
1180 struct mib_newp
* newp __unused
)
1182 struct minix_proc_list mpl
[NR_PROCS
];
1183 struct minix_proc_list
*mplp
;
1190 if (!update_tables())
1193 memset(&mpl
, 0, sizeof(mpl
));
1198 for (mslot
= 0; mslot
< NR_PROCS
; mslot
++, mplp
++, mp
++) {
1199 if (!(mp
->mp_flags
& IN_USE
) || mp
->mp_pid
<= 0)
1202 mplp
->mpl_flags
= MPLF_IN_USE
;
1203 if (mp
->mp_flags
& (TRACE_ZOMBIE
| ZOMBIE
))
1204 mplp
->mpl_flags
|= MPLF_ZOMBIE
;
1205 mplp
->mpl_pid
= mp
->mp_pid
;
1206 mplp
->mpl_uid
= mp
->mp_effuid
;
1207 mplp
->mpl_gid
= mp
->mp_effgid
;
1210 return mib_copyout(oldp
, 0, &mpl
, sizeof(mpl
));
1214 * Implementation of CTL_MINIX MINIX_PROC PROC_DATA.
1217 mib_minix_proc_data(struct mib_call
* call
, struct mib_node
* node __unused
,
1218 struct mib_oldp
* oldp
, struct mib_newp
* newp __unused
)
1220 struct minix_proc_data mpd
;
1222 int kslot
, mslot
= 0;
1223 unsigned int mflags
;
1227 * It is currently only possible to retrieve the process data for a
1228 * particular PID, which must be given as the last name component.
1230 if (call
->call_namelen
!= 1)
1233 pid
= (pid_t
)call
->call_name
[0];
1235 if (!update_tables())
1239 * Unlike the CTL_KERN nodes, we use the ProcFS semantics here: if the
1240 * given PID is negative, it is a kernel task; otherwise, it identifies
1241 * a user process. A request for PID 0 will result in ESRCH.
1244 if (pid
< -NR_TASKS
)
1247 kslot
= pid
+ NR_TASKS
;
1248 assert(kslot
< NR_TASKS
);
1250 if ((mslot
= get_mslot(pid
)) == NO_SLOT
)
1253 kslot
= NR_TASKS
+ mslot
;
1259 kp
= &proc_tab
[kslot
];
1261 mflags
= (pid
> 0) ? mproc_tab
[mslot
].mp_flags
: 0;
1263 memset(&mpd
, 0, sizeof(mpd
));
1264 mpd
.mpd_endpoint
= kp
->p_endpoint
;
1265 if (mflags
& PRIV_PROC
)
1266 mpd
.mpd_flags
|= MPDF_SYSTEM
;
1267 if (mflags
& (TRACE_ZOMBIE
| ZOMBIE
))
1268 mpd
.mpd_flags
|= MPDF_ZOMBIE
;
1269 else if ((mflags
& TRACE_STOPPED
) || RTS_ISSET(kp
, RTS_P_STOP
))
1270 mpd
.mpd_flags
|= MPDF_STOPPED
;
1271 else if (proc_is_runnable(kp
))
1272 mpd
.mpd_flags
|= MPDF_RUNNABLE
;
1273 mpd
.mpd_blocked_on
= P_BLOCKEDON(kp
);
1274 mpd
.mpd_priority
= kp
->p_priority
;
1275 mpd
.mpd_user_time
= kp
->p_user_time
;
1276 mpd
.mpd_sys_time
= kp
->p_sys_time
;
1277 mpd
.mpd_cycles
= kp
->p_cycles
;
1278 mpd
.mpd_kipc_cycles
= kp
->p_kipc_cycles
;
1279 mpd
.mpd_kcall_cycles
= kp
->p_kcall_cycles
;
1280 if (kslot
>= NR_TASKS
) {
1281 mpd
.mpd_nice
= mproc_tab
[mslot
].mp_nice
;
1282 strlcpy(mpd
.mpd_name
, mproc_tab
[mslot
].mp_name
,
1283 sizeof(mpd
.mpd_name
));
1285 strlcpy(mpd
.mpd_name
, kp
->p_name
, sizeof(mpd
.mpd_name
));
1287 return mib_copyout(oldp
, 0, &mpd
, sizeof(mpd
));