1 /* MIB service - proc.c - functionality based on service process tables */
2 /* Eventually, the CTL_PROC subtree might end up here as well. */
7 #include <minix/sysinfo.h>
9 #include <machine/archtypes.h>
10 #include "kernel/proc.h"
11 #include "servers/pm/mproc.h"
12 #include "servers/vfs/const.h"
13 #include "servers/vfs/fproc.h"
15 typedef struct proc ixfer_proc_t
;
16 typedef struct mproc ixfer_mproc_t
;
18 static ixfer_proc_t proc_tab
[NR_TASKS
+ NR_PROCS
];
19 static ixfer_mproc_t mproc_tab
[NR_PROCS
];
20 static struct fproc_light fproc_tab
[NR_PROCS
];
23 * The number of processes added to the current number of processes when doing
24 * a size estimation, so that the actual data retrieval does not end up with
25 * too little space if new processes have forked between the two calls. We do
26 * a process table update only once per clock tick, which means that typically
27 * no update will take place between the user process's size estimation request
28 * and its subsequent data retrieval request. On the other hand, if we do
29 * update process tables in between, quite a bit might have changed.
33 #define HASH_SLOTS (NR_PROCS / 4) /* expected nr. of processes in use */
35 static int hash_tab
[HASH_SLOTS
]; /* hash table mapping from PID.. */
36 static int hnext_tab
[NR_PROCS
]; /* ..to PM process slot */
38 static clock_t tabs_updated
= 0; /* when the tables were last updated */
39 static int tabs_valid
= TRUE
; /* FALSE if obtaining tables failed */
42 * Update the process tables by pulling in new copies from the kernel, PM, and
43 * VFS, but only every so often and only if it has not failed before. Return
44 * TRUE iff the tables are now valid.
51 int r
, kslot
, mslot
, hslot
;
54 * If retrieving the tables failed at some point, do not keep trying
55 * all the time. Such a failure is very unlikely to be transient.
57 if (tabs_valid
== FALSE
)
61 * Update the tables once per clock tick at most. The update operation
62 * is rather heavy, transferring several hundreds of kilobytes between
63 * servers. Userland should be able to live with information that is
64 * outdated by at most one clock tick.
68 if (tabs_updated
!= 0 && tabs_updated
== now
)
71 /* Perform an actual update now. */
74 /* Retrieve and check the kernel process table. */
75 if ((r
= sys_getproctab(proc_tab
)) != OK
) {
76 printf("MIB: unable to obtain kernel process table (%d)\n", r
);
81 for (kslot
= 0; kslot
< NR_TASKS
+ NR_PROCS
; kslot
++) {
82 if (proc_tab
[kslot
].p_magic
!= PMAGIC
) {
83 printf("MIB: kernel process table mismatch\n");
89 /* Retrieve and check the PM process table. */
90 r
= getsysinfo(PM_PROC_NR
, SI_PROC_TAB
, mproc_tab
, sizeof(mproc_tab
));
92 printf("MIB: unable to obtain PM process table (%d)\n", r
);
97 for (mslot
= 0; mslot
< NR_PROCS
; mslot
++) {
98 if (mproc_tab
[mslot
].mp_magic
!= MP_MAGIC
) {
99 printf("MIB: PM process table mismatch\n");
105 /* Retrieve an extract of the VFS process table. */
106 r
= getsysinfo(VFS_PROC_NR
, SI_PROCLIGHT_TAB
, fproc_tab
,
109 printf("MIB: unable to obtain VFS process table (%d)\n", r
);
118 * Build a hash table mapping from process IDs to slot numbers, for
119 * fast access. TODO: decide if this is better done on demand only.
121 for (hslot
= 0; hslot
< HASH_SLOTS
; hslot
++)
122 hash_tab
[hslot
] = NO_SLOT
;
124 for (mslot
= 0; mslot
< NR_PROCS
; mslot
++) {
125 if (mproc_tab
[mslot
].mp_flags
& IN_USE
) {
126 if ((pid
= mproc_tab
[mslot
].mp_pid
) <= 0)
129 hslot
= mproc_tab
[mslot
].mp_pid
% HASH_SLOTS
;
131 hnext_tab
[mslot
] = hash_tab
[hslot
];
132 hash_tab
[hslot
] = mslot
;
140 * Return the PM slot number for the given PID, or NO_SLOT if the PID is not in
148 /* PID 0 identifies the kernel; checking this is up to the caller. */
152 for (mslot
= hash_tab
[pid
% HASH_SLOTS
]; mslot
!= NO_SLOT
;
153 mslot
= hnext_tab
[mslot
])
154 if (mproc_tab
[mslot
].mp_pid
== pid
)
161 * Store the given number of clock ticks as a timeval structure.
164 ticks_to_timeval(struct timeval
* tv
, clock_t ticks
)
170 tv
->tv_sec
= ticks
/ hz
;
171 tv
->tv_usec
= (long)((ticks
% hz
) * 1000000ULL / hz
);
175 * Generate a wchan message text for the cases that the process is blocked on
176 * IPC with another process, of which the endpoint is given as 'endpt' here.
177 * The name of the other process is to be stored in 'wmesg', which is a buffer
178 * of size 'wmsz'. The result should be null terminated. If 'ipc' is set, the
179 * process is blocked on a direct IPC call, in which case the name of the other
180 * process is enclosed in parentheses. If 'ipc' is not set, the call is made
181 * indirectly through VFS, and the name of the other process should not be
182 * enclosed in parentheses. If no name can be obtained, we use the endpoint of
183 * the other process instead.
186 fill_wmesg(char * wmesg
, size_t wmsz
, endpoint_t endpt
, int ipc
)
202 mslot
= _ENDPOINT_P(endpt
);
203 if (mslot
>= -NR_TASKS
&& mslot
< NR_PROCS
&&
204 (mslot
< 0 || (mproc_tab
[mslot
].mp_flags
& IN_USE
)))
205 name
= proc_tab
[NR_TASKS
+ mslot
].p_name
;
211 snprintf(wmesg
, wmsz
, "%s%s%s",
212 ipc
? "(" : "", name
, ipc
? ")" : "");
214 snprintf(wmesg
, wmsz
, "%s%d%s",
215 ipc
? "(" : "", endpt
, ipc
? ")" : "");
219 * Return the LWP status of a process, along with additional information in
220 * case the process is sleeping (LSSLEEP): a wchan value and text to indicate
221 * what the process is sleeping on, and possibly a flag field modification to
222 * indicate that the sleep is interruptible.
225 get_lwp_stat(int mslot
, uint64_t * wcptr
, char * wmptr
, size_t wmsz
,
229 struct fproc_light
*fp
;
235 mp
= &mproc_tab
[mslot
];
236 fp
= &fproc_tab
[mslot
];
237 kp
= &proc_tab
[NR_TASKS
+ mslot
];
240 * First cover all the cases that the process is not sleeping. In
241 * those cases, we need not return additional sleep information either.
243 if (mp
->mp_flags
& (TRACE_ZOMBIE
| ZOMBIE
))
246 if (mp
->mp_flags
& EXITING
)
249 if ((mp
->mp_flags
& TRACE_STOPPED
) || RTS_ISSET(kp
, RTS_P_STOP
))
252 if (proc_is_runnable(kp
))
256 * The process is sleeping. In that case, we must also figure out why,
257 * and return an appropriate wchan value and human-readable wmesg text.
259 * The process can be blocked on either a known sleep state in PM or
260 * VFS, or otherwise on IPC communication with another process, or
261 * otherwise on a kernel RTS flag. In each case, decide what to use as
262 * wchan value and wmesg text, and whether the sleep is interruptible.
264 * The wchan value should be unique for the sleep reason. We use its
265 * lower eight bits to indicate a class:
267 * 0x01 = kerel RTS block
271 * 0xff = blocked on process
272 * The upper bits are used for class-specific information. The actual
273 * value does not really matter, as long as it is nonzero and there is
274 * no overlap between the different values.
280 * First see if the process is marked as blocked in the tables of PM or
281 * VFS. Such a block reason is always an interruptible sleep. Note
282 * that we do not use the kernel table at all in this case: each of the
283 * three tables is consistent within itself, but not necessarily
284 * consistent with any of the other tables, so we avoid internal
285 * mismatches if we can.
287 if (mp
->mp_flags
& WAITING
) {
290 } else if (mp
->mp_flags
& SIGSUSPENDED
) {
293 } else if (fp
->fpl_blocked_on
!= FP_BLOCKED_ON_NONE
) {
294 wchan
= (fp
->fpl_blocked_on
<< 8) | 0x03;
295 switch (fp
->fpl_blocked_on
) {
296 case FP_BLOCKED_ON_PIPE
:
299 case FP_BLOCKED_ON_FLOCK
:
302 case FP_BLOCKED_ON_POPEN
:
305 case FP_BLOCKED_ON_SELECT
:
308 case FP_BLOCKED_ON_CDEV
:
310 * Add the task (= character driver) endpoint to the
311 * wchan value, and use the driver's process name,
312 * without parentheses, as wmesg text.
314 wchan
|= (uint64_t)fp
->fpl_task
<< 16;
315 fill_wmesg(wmptr
, wmsz
, fp
->fpl_task
, FALSE
/*ipc*/);
318 /* A newly added flag we don't yet know about? */
325 if (wmesg
!= NULL
) /* NULL means "already set" here */
326 strlcpy(wmptr
, wmesg
, wmsz
);
331 * See if the process is blocked on sending or receiving. If not, then
332 * use one of the kernel RTS flags as reason.
334 endpt
= P_BLOCKEDON(kp
);
338 /* This is really just aesthetics. */
344 * The process is not running, but also not blocked on IPC with
345 * another process. This means it must be stopped on a kernel
348 wchan
= ((uint64_t)kp
->p_rts_flags
<< 8) | 0x01;
349 if (RTS_ISSET(kp
, RTS_PROC_STOP
))
351 else if (RTS_ISSET(kp
, RTS_SIGNALED
) ||
352 RTS_ISSET(kp
, RTS_SIGNALED
))
354 else if (RTS_ISSET(kp
, RTS_NO_PRIV
))
356 else if (RTS_ISSET(kp
, RTS_PAGEFAULT
) ||
357 RTS_ISSET(kp
, RTS_VMREQTARGET
))
359 else if (RTS_ISSET(kp
, RTS_NO_QUANTUM
))
366 * If the process is blocked receiving from ANY, mark it as
367 * being in an interruptible sleep. This looks nicer, even
368 * though "interruptible" is not applicable to services at all.
375 * If at this point wchan is still zero, the process is blocked sending
376 * or receiving. Use a wchan value based on the target endpoint, and
377 * use "(procname)" as wmesg text.
380 *wcptr
= ((uint64_t)endpt
<< 8) | 0xff;
381 fill_wmesg(wmptr
, wmsz
, endpt
, TRUE
/*ipc*/);
384 if (wmesg
!= NULL
) /* NULL means "already set" here */
385 strlcpy(wmptr
, wmesg
, wmsz
);
393 * Fill the part of a LWP structure that is common between kernel tasks and
394 * user processes. Also return a CPU estimate in 'estcpu', because we generate
395 * the value as a side effect here, and the LWP structure has no estcpu field.
398 fill_lwp_common(struct kinfo_lwp
* l
, int kslot
, uint32_t * estcpu
)
405 kp
= &proc_tab
[kslot
];
411 * We use the process endpoint as the LWP ID. Not only does this allow
412 * users to obtain process endpoints with "ps -s" (thus replacing the
413 * MINIX3 ps(1)'s "ps -E"), but if we ever do implement kernel threads,
414 * this is probably still going to be accurate.
416 l
->l_lid
= kp
->p_endpoint
;
419 * The time during which the process has not been swapped in or out is
420 * not applicable for us, and thus, we set it to the time the process
421 * has been running (in seconds). This value is relevant mostly for
422 * ps(1)'s CPU usage correction for processes that have just started.
424 if (kslot
>= NR_TASKS
)
425 l
->l_swtime
= uptime
- mproc_tab
[kslot
- NR_TASKS
].mp_started
;
427 l
->l_swtime
= uptime
;
431 * Sleep (dequeue) times are not maintained for kernel tasks, so
432 * pretend they are never asleep (which is pretty accurate).
434 if (kslot
< NR_TASKS
)
437 l
->l_slptime
= (uptime
- kp
->p_dequeued
) / hz
;
439 l
->l_priority
= kp
->p_priority
;
440 l
->l_usrpri
= kp
->p_priority
;
441 l
->l_cpuid
= kp
->p_cpu
;
442 ticks_to_timeval(&tv
, kp
->p_user_time
+ kp
->p_sys_time
);
443 l
->l_rtime_sec
= tv
.tv_sec
;
444 l
->l_rtime_usec
= tv
.tv_usec
;
447 * Obtain CPU usage percentages and estimates through library code
448 * shared between the kernel and this service; see its source for
449 * details. We note that the produced estcpu value is rather different
450 * from the one produced by NetBSD, but this should not be a problem.
452 l
->l_pctcpu
= cpuavg_getstats(&kp
->p_cpuavg
, &l
->l_cpticks
, estcpu
,
457 * Fill a LWP structure for a kernel task. Each kernel task has its own LWP,
458 * and all of them have negative PIDs.
461 fill_lwp_kern(struct kinfo_lwp
* l
, int kslot
)
465 memset(l
, 0, sizeof(*l
));
467 l
->l_flag
= L_INMEM
| L_SINTR
| L_SYSTEM
;
469 l
->l_pid
= kslot
- NR_TASKS
;
472 * When showing LWP entries, ps(1) uses the process name rather than
473 * the LWP name. All kernel tasks are therefore shown as "[kernel]"
474 * anyway. We use the wmesg field to show the actual kernel task name.
476 l
->l_wchan
= ((uint64_t)(l
->l_pid
) << 8) | 0x00;
477 strlcpy(l
->l_wmesg
, proc_tab
[kslot
].p_name
, sizeof(l
->l_wmesg
));
478 strlcpy(l
->l_name
, "kernel", sizeof(l
->l_name
));
480 fill_lwp_common(l
, kslot
, &estcpu
);
484 * Fill a LWP structure for a user process.
487 fill_lwp_user(struct kinfo_lwp
* l
, int mslot
)
492 memset(l
, 0, sizeof(*l
));
494 mp
= &mproc_tab
[mslot
];
497 l
->l_stat
= get_lwp_stat(mslot
, &l
->l_wchan
, l
->l_wmesg
,
498 sizeof(l
->l_wmesg
), &l
->l_flag
);
499 l
->l_pid
= mp
->mp_pid
;
500 strlcpy(l
->l_name
, mp
->mp_name
, sizeof(l
->l_name
));
502 fill_lwp_common(l
, NR_TASKS
+ mslot
, &estcpu
);
506 * Implementation of CTL_KERN KERN_LWP.
509 mib_kern_lwp(struct mib_call
* call
, struct mib_node
* node __unused
,
510 struct mib_oldp
* oldp
, struct mib_newp
* newp __unused
)
512 struct kinfo_lwp lwp
;
517 int r
, elsz
, elmax
, kslot
, mslot
, last_mslot
;
519 if (call
->call_namelen
!= 3)
522 pid
= (pid_t
)call
->call_name
[0];
523 elsz
= call
->call_name
[1];
524 elmax
= call
->call_name
[2]; /* redundant with the given oldlen.. */
526 if (pid
< -1 || elsz
<= 0 || elmax
< 0)
529 if (!update_tables())
533 copysz
= MIN((size_t)elsz
, sizeof(lwp
));
536 * We model kernel tasks as LWP threads of the kernel (with PID 0).
537 * Modeling the kernel tasks as processes with negative PIDs, like
538 * ProcFS does, conflicts with the KERN_LWP API here: a PID of -1
539 * indicates that the caller wants a full listing of LWPs.
542 for (kslot
= 0; kslot
< NR_TASKS
; kslot
++) {
543 if (mib_inrange(oldp
, off
) && elmax
> 0) {
544 fill_lwp_kern(&lwp
, kslot
);
545 if ((r
= mib_copyout(oldp
, off
, &lwp
,
553 /* No need to add extra space here: NR_TASKS is static. */
559 * With PID 0 out of the way: the user requested the LWP for either a
560 * specific user process (pid > 0), or for all processes (pid < 0).
563 if ((mslot
= get_mslot(pid
)) == NO_SLOT
||
564 (mproc_tab
[mslot
].mp_flags
& (TRACE_ZOMBIE
| ZOMBIE
)))
569 last_mslot
= NR_PROCS
- 1;
572 for (; mslot
<= last_mslot
; mslot
++) {
573 mp
= &mproc_tab
[mslot
];
575 if ((mp
->mp_flags
& (IN_USE
| TRACE_ZOMBIE
| ZOMBIE
)) !=
579 if (mib_inrange(oldp
, off
) && elmax
> 0) {
580 fill_lwp_user(&lwp
, mslot
);
581 if ((r
= mib_copyout(oldp
, off
, &lwp
, copysz
)) < 0)
588 if (oldp
== NULL
&& pid
< 0)
589 off
+= EXTRA_PROCS
* elsz
;
596 * Fill the part of a process structure that is common between kernel tasks and
600 fill_proc2_common(struct kinfo_proc2
* p
, int kslot
)
602 struct vm_usage_info vui
;
607 kp
= &proc_tab
[kslot
];
610 * Much of the information in the LWP structure also ends up in the
611 * process structure. In order to avoid duplication of some important
612 * code, first generate LWP values and then copy it them into the
615 memset(&l
, 0, sizeof(l
));
616 fill_lwp_common(&l
, kslot
, &p
->p_estcpu
);
618 /* Obtain memory usage information from VM. Ignore failures. */
619 memset(&vui
, 0, sizeof(vui
));
620 (void)vm_info_usage(kp
->p_endpoint
, &vui
);
622 ticks_to_timeval(&tv
, kp
->p_user_time
+ kp
->p_sys_time
);
623 p
->p_rtime_sec
= l
.l_rtime_sec
;
624 p
->p_rtime_usec
= l
.l_rtime_usec
;
625 p
->p_cpticks
= l
.l_cpticks
;
626 p
->p_pctcpu
= l
.l_pctcpu
;
627 p
->p_swtime
= l
.l_swtime
;
628 p
->p_slptime
= l
.l_slptime
;
629 p
->p_uticks
= kp
->p_user_time
;
630 p
->p_sticks
= kp
->p_sys_time
;
631 /* TODO: p->p_iticks */
632 ticks_to_timeval(&tv
, kp
->p_user_time
);
633 p
->p_uutime_sec
= tv
.tv_sec
;
634 p
->p_uutime_usec
= tv
.tv_usec
;
635 ticks_to_timeval(&tv
, kp
->p_sys_time
);
636 p
->p_ustime_sec
= tv
.tv_sec
;
637 p
->p_ustime_usec
= tv
.tv_usec
;
639 p
->p_priority
= l
.l_priority
;
640 p
->p_usrpri
= l
.l_usrpri
;
642 p
->p_vm_rssize
= howmany(vui
.vui_total
, PAGE_SIZE
);
643 p
->p_vm_vsize
= howmany(vui
.vui_virtual
, PAGE_SIZE
);
644 p
->p_vm_msize
= howmany(vui
.vui_mvirtual
, PAGE_SIZE
);
646 p
->p_uru_maxrss
= vui
.vui_maxrss
;
647 p
->p_uru_minflt
= vui
.vui_minflt
;
648 p
->p_uru_majflt
= vui
.vui_majflt
;
650 p
->p_cpuid
= l
.l_cpuid
;
654 * Fill a process structure for the kernel pseudo-process (with PID 0).
657 fill_proc2_kern(struct kinfo_proc2
* p
)
660 memset(p
, 0, sizeof(*p
));
662 p
->p_flag
= L_INMEM
| L_SYSTEM
| L_SINTR
;
667 /* Use the KERNEL task wchan, for consistency between ps and top. */
668 p
->p_wchan
= ((uint64_t)KERNEL
<< 8) | 0x00;
669 strlcpy(p
->p_wmesg
, "kernel", sizeof(p
->p_wmesg
));
671 strlcpy(p
->p_comm
, "kernel", sizeof(p
->p_comm
));
672 p
->p_realflag
= P_INMEM
| P_SYSTEM
| P_SINTR
;
673 p
->p_realstat
= SACTIVE
;
674 p
->p_nlwps
= NR_TASKS
;
677 * By using the KERNEL slot here, the kernel process will get a proper
680 fill_proc2_common(p
, KERNEL
+ NR_TASKS
);
684 * Fill a process structure for a user process.
687 fill_proc2_user(struct kinfo_proc2
* p
, int mslot
)
690 struct fproc_light
*fp
;
694 int i
, r
, kslot
, zombie
;
696 memset(p
, 0, sizeof(*p
));
698 if ((r
= getuptime(NULL
, NULL
, &boottime
)) != OK
)
699 panic("getuptime failed: %d", r
);
701 kslot
= NR_TASKS
+ mslot
;
702 mp
= &mproc_tab
[mslot
];
703 fp
= &fproc_tab
[mslot
];
705 zombie
= (mp
->mp_flags
& (TRACE_ZOMBIE
| ZOMBIE
));
706 tty
= (!zombie
) ? fp
->fpl_tty
: NO_DEV
;
710 p
->p_eflag
|= EPROC_CTTY
;
711 if (mp
->mp_pid
== mp
->mp_procgrp
) /* TODO: job control support */
712 p
->p_eflag
|= EPROC_SLEADER
;
714 p
->p_exitsig
= SIGCHLD
; /* TODO */
717 if (mp
->mp_flags
& TAINTED
)
718 p
->p_flag
|= P_SUGID
;
719 if (mp
->mp_tracer
!= NO_TRACER
)
720 p
->p_flag
|= P_TRACED
;
722 p
->p_flag
|= P_CONTROLT
;
723 p
->p_pid
= mp
->mp_pid
;
724 if (mp
->mp_parent
>= 0 && mp
->mp_parent
< NR_PROCS
)
725 p
->p_ppid
= mproc_tab
[mp
->mp_parent
].mp_pid
;
726 p
->p_sid
= mp
->mp_procgrp
; /* TODO: job control supported */
727 p
->p__pgid
= mp
->mp_procgrp
;
728 p
->p_tpgid
= (tty
!= NO_DEV
) ? mp
->mp_procgrp
: 0;
729 p
->p_uid
= mp
->mp_effuid
;
730 p
->p_ruid
= mp
->mp_realuid
;
731 p
->p_gid
= mp
->mp_effgid
;
732 p
->p_rgid
= mp
->mp_realgid
;
733 p
->p_ngroups
= MIN(mp
->mp_ngroups
, KI_NGROUPS
);
734 for (i
= 0; i
< p
->p_ngroups
; i
++)
735 p
->p_groups
[i
] = mp
->mp_sgroups
[i
];
737 memcpy(&p
->p_siglist
, &mp
->mp_sigpending
, sizeof(p
->p_siglist
));
738 memcpy(&p
->p_sigmask
, &mp
->mp_sigmask
, sizeof(p
->p_sigmask
));
739 memcpy(&p
->p_sigcatch
, &mp
->mp_catch
, sizeof(p
->p_sigcatch
));
740 memcpy(&p
->p_sigignore
, &mp
->mp_ignore
, sizeof(p
->p_sigignore
));
741 p
->p_nice
= mp
->mp_nice
+ NZERO
;
742 strlcpy(p
->p_comm
, mp
->mp_name
, sizeof(p
->p_comm
));
744 ticks_to_timeval(&tv
, mp
->mp_started
);
745 p
->p_ustart_sec
= boottime
+ tv
.tv_sec
;
746 p
->p_ustart_usec
= tv
.tv_usec
;
747 /* TODO: other rusage fields */
748 ticks_to_timeval(&tv
, mp
->mp_child_utime
+ mp
->mp_child_stime
);
749 p
->p_uctime_sec
= tv
.tv_sec
;
750 p
->p_uctime_usec
= tv
.tv_usec
;
751 p
->p_realflag
= p
->p_flag
;
752 p
->p_nlwps
= (zombie
) ? 0 : 1;
753 p
->p_svuid
= mp
->mp_svuid
;
754 p
->p_svgid
= mp
->mp_svgid
;
756 p
->p_stat
= get_lwp_stat(mslot
, &p
->p_wchan
, p
->p_wmesg
,
757 sizeof(p
->p_wmesg
), &p
->p_flag
);
761 p
->p_realstat
= SACTIVE
;
765 p
->p_realstat
= SACTIVE
;
766 if (p
->p_flag
& L_SINTR
)
767 p
->p_realflag
|= P_SINTR
;
770 p
->p_realstat
= SSTOP
;
773 p
->p_realstat
= SZOMB
;
776 p
->p_stat
= LSZOMB
; /* ps(1) STAT does not know LSDEAD */
777 p
->p_realstat
= SDEAD
;
784 fill_proc2_common(p
, kslot
);
788 * Implementation of CTL_KERN KERN_PROC2.
791 mib_kern_proc2(struct mib_call
* call
, struct mib_node
* node __unused
,
792 struct mib_oldp
* oldp
, struct mib_newp
* newp __unused
)
794 struct kinfo_proc2 proc2
;
799 int r
, req
, arg
, elsz
, elmax
, kmatch
, zombie
, mslot
;
801 if (call
->call_namelen
!= 4)
804 req
= call
->call_name
[0];
805 arg
= call
->call_name
[1];
806 elsz
= call
->call_name
[2];
807 elmax
= call
->call_name
[3]; /* redundant with the given oldlen.. */
810 * The kernel is special, in that it does not have a slot in the PM or
811 * VFS tables. As such, it is dealt with separately. While checking
812 * arguments, we might as well check whether the kernel is matched.
819 case KERN_PROC_SESSION
:
828 kmatch
= ((dev_t
)arg
== KERN_PROC_TTY_NODEV
);
834 if (elsz
<= 0 || elmax
< 0)
837 if (!update_tables())
841 copysz
= MIN((size_t)elsz
, sizeof(proc2
));
844 if (mib_inrange(oldp
, off
) && elmax
> 0) {
845 fill_proc2_kern(&proc2
);
846 if ((r
= mib_copyout(oldp
, off
, &proc2
, copysz
)) < 0)
853 for (mslot
= 0; mslot
< NR_PROCS
; mslot
++) {
854 mp
= &mproc_tab
[mslot
];
856 if (!(mp
->mp_flags
& IN_USE
))
861 if ((pid_t
)arg
!= mp
->mp_pid
)
864 case KERN_PROC_SESSION
: /* TODO: job control support */
866 if ((pid_t
)arg
!= mp
->mp_procgrp
)
870 if ((dev_t
)arg
== KERN_PROC_TTY_REVOKE
)
871 continue; /* TODO: revoke(2) support */
872 /* Do not access the fproc_tab slot of zombies. */
873 zombie
= (mp
->mp_flags
& (TRACE_ZOMBIE
| ZOMBIE
));
874 tty
= (zombie
) ? fproc_tab
[mslot
].fpl_tty
: NO_DEV
;
875 if ((dev_t
)arg
== KERN_PROC_TTY_NODEV
) {
878 } else if ((dev_t
)arg
== NO_DEV
|| (dev_t
)arg
!= tty
)
882 if ((uid_t
)arg
!= mp
->mp_effuid
)
886 if ((uid_t
)arg
!= mp
->mp_realuid
)
890 if ((gid_t
)arg
!= mp
->mp_effgid
)
894 if ((gid_t
)arg
!= mp
->mp_realgid
)
899 if (mib_inrange(oldp
, off
) && elmax
> 0) {
900 fill_proc2_user(&proc2
, mslot
);
901 if ((r
= mib_copyout(oldp
, off
, &proc2
, copysz
)) < 0)
908 if (oldp
== NULL
&& req
!= KERN_PROC_PID
)
909 off
+= EXTRA_PROCS
* elsz
;
915 * Implementation of CTL_KERN KERN_PROC_ARGS.
918 mib_kern_proc_args(struct mib_call
* call
, struct mib_node
* node __unused
,
919 struct mib_oldp
* oldp
, struct mib_newp
* newp __unused
)
921 char vbuf
[PAGE_SIZE
], sbuf
[PAGE_SIZE
], obuf
[PAGE_SIZE
];
922 struct ps_strings pss
;
924 char *buf
, *p
, *q
, *pptr
;
925 vir_bytes vaddr
, vpage
, spage
, paddr
, ppage
;
926 size_t max
, off
, olen
, oleft
, oldlen
, bytes
, pleft
;
927 unsigned int copybudget
;
929 int req
, mslot
, count
, aborted
, ended
;
932 if (call
->call_namelen
!= 2)
935 pid
= call
->call_name
[0];
936 req
= call
->call_name
[1];
941 case KERN_PROC_NARGV
:
948 if (!update_tables())
951 if ((mslot
= get_mslot(pid
)) == NO_SLOT
)
953 mp
= &mproc_tab
[mslot
];
954 if (mp
->mp_flags
& (TRACE_ZOMBIE
| ZOMBIE
))
957 /* We can return the count field size without copying in any data. */
958 if (oldp
== NULL
&& (req
== KERN_PROC_NARGV
|| req
== KERN_PROC_NENV
))
959 return sizeof(count
);
961 if (sys_datacopy(mp
->mp_endpoint
,
962 mp
->mp_frame_addr
+ mp
->mp_frame_len
- sizeof(pss
),
963 SELF
, (vir_bytes
)&pss
, sizeof(pss
)) != OK
)
967 * Determine the upper size limit of the requested data. Not only may
968 * the size never exceed ARG_MAX, it may also not exceed the frame
969 * length as given in its original exec call. In fact, the frame
970 * length should be substantially larger: all strings for both the
971 * arguments and the environment are in there, along with other stuff,
972 * and there must be no overlap between strings. It is possible that
973 * the application called setproctitle(3), in which case the ps_strings
974 * pointers refer to data outside the frame altogether. However, this
975 * data should not exceed 2048 bytes, and we cover this by rounding up
976 * the frame length to a multiple of the page size. Anyhow, NetBSD
977 * blindly returns ARG_MAX when asked for a size estimate, so with this
978 * maximum we are already quite a bit more accurate.
980 max
= roundup(MIN(mp
->mp_frame_len
, ARG_MAX
), PAGE_SIZE
);
983 case KERN_PROC_NARGV
:
984 count
= pss
.ps_nargvstr
;
985 return mib_copyout(oldp
, 0, &count
, sizeof(count
));
987 count
= pss
.ps_nenvstr
;
988 return mib_copyout(oldp
, 0, &count
, sizeof(count
));
992 vaddr
= (vir_bytes
)pss
.ps_argvstr
;
993 count
= pss
.ps_nargvstr
;
998 vaddr
= (vir_bytes
)pss
.ps_envstr
;
999 count
= pss
.ps_nenvstr
;
1004 * Go through the strings. Copy in entire, machine-aligned pages at
1005 * once, in the hope that all data is stored consecutively, which it
1006 * should be: we expect that the vector is followed by the strings, and
1007 * that the strings are stored in order of vector reference. We keep
1008 * up to two pages with copied-in data: one for the vector, and
1009 * optionally one for string data. In addition, we keep one page with
1010 * data to be copied out, so that we do not cause a lot of copy
1011 * overhead for short strings.
1013 * We stop whenever any of the following conditions are met:
1014 * - copying in data from the target process fails for any reason;
1015 * - we have processed the last index ('count') into the vector;
1016 * - the current vector element is a NULL pointer;
1017 * - the requested number of output bytes ('oldlen') has been reached;
1018 * - the maximum number of output bytes ('max') has been reached;
1019 * - the number of page copy-ins exceeds an estimated threshold;
1020 * - copying out data fails for any reason (we then return the error).
1022 * We limit the number of page copy-ins because otherwise a rogue
1023 * process could create an argument vector consisting of only two-byte
1024 * strings that all span two pages, causing us to copy up to 1GB of
1025 * data with the current ARG_MAX value of 256K. No reasonable vector
1026 * should cause more than (ARG_MAX / PAGE_SIZE) page copies for
1027 * strings; we are nice enough to allow twice that. Vector copies do
1028 * not count, as they are linear anyway.
1030 * Unlike every other sysctl(2) call, we are supposed to truncate the
1031 * resulting size (the returned 'oldlen') to the requested size (the
1032 * given 'oldlen') *and* return the resulting size, rather than ENOMEM
1033 * and the real size. Unfortunately, libkvm actually relies on this.
1035 * Generally speaking, upon failure we just return a truncated result.
1036 * In case of truncation, the data we copy out need not be null
1037 * terminated. It is up to userland to process the data correctly.
1039 if (trunc_page(vaddr
) == 0 || vaddr
% sizeof(char *) != 0)
1046 oldlen
= mib_getoldlen(oldp
);
1050 copybudget
= (ARG_MAX
/ PAGE_SIZE
) * 2;
1055 while (count
> 0 && off
+ olen
< oldlen
&& !aborted
) {
1057 * Start by fetching the page containing the current vector
1058 * element, if needed. We could limit the fetch to the vector
1059 * size, but our hope is that for the simple cases, the strings
1060 * are on the remainder of the same page, so we save a copy
1061 * call. TODO: since the strings should follow the vector, we
1062 * could start the copy at the base of the vector.
1064 if (trunc_page(vaddr
) != vpage
) {
1065 vpage
= trunc_page(vaddr
);
1066 if (sys_datacopy(mp
->mp_endpoint
, vpage
, SELF
,
1067 (vir_bytes
)vbuf
, PAGE_SIZE
) != OK
)
1071 /* Get the current vector element, pointing to a string. */
1072 memcpy(&pptr
, &vbuf
[vaddr
- vpage
], sizeof(pptr
));
1073 paddr
= (vir_bytes
)pptr
;
1074 ppage
= trunc_page(paddr
);
1078 /* Fetch the string itself, one page at a time at most. */
1081 * See if the string pointer falls inside either the
1082 * vector page or the previously fetched string page
1083 * (if any). If not, fetch a string page.
1085 if (ppage
== vpage
) {
1087 } else if (ppage
== spage
) {
1090 if (--copybudget
== 0) {
1095 if (sys_datacopy(mp
->mp_endpoint
, spage
, SELF
,
1096 (vir_bytes
)sbuf
, PAGE_SIZE
) != OK
) {
1104 * We now have a string fragment in a buffer. See if
1105 * the string is null terminated. If not, all the data
1106 * up to the buffer end is part of the string, and the
1107 * string continues on the next page.
1109 p
= &buf
[paddr
- ppage
];
1110 pleft
= PAGE_SIZE
- (paddr
- ppage
);
1113 if ((q
= memchr(p
, '\0', pleft
)) != NULL
) {
1114 bytes
= (size_t)(q
- p
+ 1);
1115 assert(bytes
<= pleft
);
1122 /* Limit the result to the requested length. */
1123 if (off
+ olen
+ bytes
> oldlen
)
1124 bytes
= oldlen
- off
- olen
;
1127 * Add 'bytes' bytes from string pointer 'p' to the
1128 * output buffer, copying out its contents to userland
1129 * if it has filled up.
1131 if (olen
+ bytes
> sizeof(obuf
)) {
1132 oleft
= sizeof(obuf
) - olen
;
1133 memcpy(&obuf
[olen
], p
, oleft
);
1135 if ((r
= mib_copyout(oldp
, off
, obuf
,
1138 off
+= sizeof(obuf
);
1145 memcpy(&obuf
[olen
], p
, bytes
);
1150 * Continue as long as we have not yet found the string
1151 * end, and we have not yet filled the output buffer.
1154 assert(trunc_page(paddr
) == paddr
);
1156 } while (!ended
&& off
+ olen
< oldlen
);
1158 vaddr
+= sizeof(char *);
1162 /* Copy out any remainder of the output buffer. */
1164 if ((r
= mib_copyout(oldp
, off
, obuf
, olen
)) < 0)
1169 assert(off
<= oldlen
);
1174 * Implementation of CTL_MINIX MINIX_PROC PROC_LIST.
1177 mib_minix_proc_list(struct mib_call
* call __unused
,
1178 struct mib_node
* node __unused
, struct mib_oldp
* oldp
,
1179 struct mib_newp
* newp __unused
)
1181 struct minix_proc_list mpl
[NR_PROCS
];
1182 struct minix_proc_list
*mplp
;
1189 if (!update_tables())
1192 memset(&mpl
, 0, sizeof(mpl
));
1197 for (mslot
= 0; mslot
< NR_PROCS
; mslot
++, mplp
++, mp
++) {
1198 if (!(mp
->mp_flags
& IN_USE
) || mp
->mp_pid
<= 0)
1201 mplp
->mpl_flags
= MPLF_IN_USE
;
1202 if (mp
->mp_flags
& (TRACE_ZOMBIE
| ZOMBIE
))
1203 mplp
->mpl_flags
|= MPLF_ZOMBIE
;
1204 mplp
->mpl_pid
= mp
->mp_pid
;
1205 mplp
->mpl_uid
= mp
->mp_effuid
;
1206 mplp
->mpl_gid
= mp
->mp_effgid
;
1209 return mib_copyout(oldp
, 0, &mpl
, sizeof(mpl
));
1213 * Implementation of CTL_MINIX MINIX_PROC PROC_DATA.
1216 mib_minix_proc_data(struct mib_call
* call
, struct mib_node
* node __unused
,
1217 struct mib_oldp
* oldp
, struct mib_newp
* newp __unused
)
1219 struct minix_proc_data mpd
;
1221 int kslot
, mslot
= 0;
1222 unsigned int mflags
;
1226 * It is currently only possible to retrieve the process data for a
1227 * particular PID, which must be given as the last name component.
1229 if (call
->call_namelen
!= 1)
1232 pid
= (pid_t
)call
->call_name
[0];
1234 if (!update_tables())
1238 * Unlike the CTL_KERN nodes, we use the ProcFS semantics here: if the
1239 * given PID is negative, it is a kernel task; otherwise, it identifies
1240 * a user process. A request for PID 0 will result in ESRCH.
1243 if (pid
< -NR_TASKS
)
1246 kslot
= pid
+ NR_TASKS
;
1247 assert(kslot
< NR_TASKS
);
1249 if ((mslot
= get_mslot(pid
)) == NO_SLOT
)
1252 kslot
= NR_TASKS
+ mslot
;
1258 kp
= &proc_tab
[kslot
];
1260 mflags
= (pid
> 0) ? mproc_tab
[mslot
].mp_flags
: 0;
1262 memset(&mpd
, 0, sizeof(mpd
));
1263 mpd
.mpd_endpoint
= kp
->p_endpoint
;
1264 if (mflags
& PRIV_PROC
)
1265 mpd
.mpd_flags
|= MPDF_SYSTEM
;
1266 if (mflags
& (TRACE_ZOMBIE
| ZOMBIE
))
1267 mpd
.mpd_flags
|= MPDF_ZOMBIE
;
1268 else if ((mflags
& TRACE_STOPPED
) || RTS_ISSET(kp
, RTS_P_STOP
))
1269 mpd
.mpd_flags
|= MPDF_STOPPED
;
1270 else if (proc_is_runnable(kp
))
1271 mpd
.mpd_flags
|= MPDF_RUNNABLE
;
1272 mpd
.mpd_blocked_on
= P_BLOCKEDON(kp
);
1273 mpd
.mpd_priority
= kp
->p_priority
;
1274 mpd
.mpd_user_time
= kp
->p_user_time
;
1275 mpd
.mpd_sys_time
= kp
->p_sys_time
;
1276 mpd
.mpd_cycles
= kp
->p_cycles
;
1277 mpd
.mpd_kipc_cycles
= kp
->p_kipc_cycles
;
1278 mpd
.mpd_kcall_cycles
= kp
->p_kcall_cycles
;
1279 if (kslot
>= NR_TASKS
) {
1280 mpd
.mpd_nice
= mproc_tab
[mslot
].mp_nice
;
1281 strlcpy(mpd
.mpd_name
, mproc_tab
[mslot
].mp_name
,
1282 sizeof(mpd
.mpd_name
));
1284 strlcpy(mpd
.mpd_name
, kp
->p_name
, sizeof(mpd
.mpd_name
));
1286 return mib_copyout(oldp
, 0, &mpd
, sizeof(mpd
));