2 * Copyright (c) 1984 through 2008, William LeFebvre
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following disclaimer
13 * in the documentation and/or other materials provided with the
16 * * Neither the name of William LeFebvre nor the names of other
17 * contributors may be used to endorse or promote products derived from
18 * this software without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 * top - a top users display for Unix
36 * SYNOPSIS: PowerPC running AIX 5.1 or higher
39 * This is the machine-dependent module for AIX 5.1 and higher (may work on
40 * older releases too). It is currently only tested on PowerPC
45 * CFLAGS: -DORDER -DHAVE_GETOPT -DHAVE_STRERROR -DMAXPROCS=10240
49 * AUTHOR: Joep Vesseur <joep@fwi.uva.nl>
51 * PATCHES: Antoine Tabary <tabary@bruyeres.cea.fr>, Dan Nelson <dnelson@allantgroup.com>
54 #define MAXPROCS 10240
65 #include <sys/types.h>
67 #include <sys/sysinfo.h>
68 #include <sys/sysconfig.h>
71 #include <libperfstat.h>
77 #define PROCRESS(p) (((p)->pi_trss + (p)->pi_drss)*4)
78 #define PROCSIZE(p) (((p)->pi_tsize/1024+(p)->pi_dvm)*4)
79 #define PROCTIME(pi) (pi->pi_ru.ru_utime.tv_sec + pi->pi_ru.ru_stime.tv_sec)
83 * structure definition taken from 'monitor' by Jussi Maki (jmaki@hut.fi)
86 uint n0
,n1
,n2
,n3
,n4
,n5
,n6
,n7
,n8
;
88 uint badmem
; /* this is used in RS/6000 model 220 */
91 uint numperm
; /* this seems to keep other than text and data segment
92 usage; name taken from /usr/lpp/bos/samples/vmtune.c */
93 uint totalvmem
,freevmem
;
94 uint n15
, n16
, n17
, n18
, n19
;
97 #define KMEM "/dev/kmem"
99 /* Indices in the nlist array */
105 static struct nlist nlst
[] = {
106 { "avenrun", 0, 0, 0, 0, 0 }, /* 0 */
107 { "sysinfo", 0, 0, 0, 0, 0 }, /* 1 */
108 { "vmker", 0, 0, 0, 0, 0 }, /* 2 */
109 { "v", 0, 0, 0, 0, 0 }, /* 3 */
110 { NULL
, 0, 0, 0, 0, 0 }
115 /* get_process_info returns handle. definition is here */
118 struct procentry64
**next_proc
;
123 * These definitions control the format of the per-process area
125 static char header
[] =
126 " PID X PRI NICE SIZE RES STATE TIME WCPU CPU COMMAND";
127 /* 0123456 -- field to fill in starts at header+6 */
128 #define UNAME_START 7
130 #define Proc_format \
131 "%6d %-8.8s %3d %4d %5d%c %4d%c %-5s %6s %5.2f%% %5.2f%% %.14s%s"
134 /* these are for detailing the process states */
135 int process_states
[9];
136 char *procstatenames
[] = {
137 " none, ", " sleeping, ", " state2, ", " runnable, ",
138 " idle, ", " zombie, ", " stopped, ", " running, ", " swapped, ",
142 /* these are for detailing the cpu states */
143 int cpu_states
[CPU_NTIMES
];
144 char *cpustatenames
[] = {
145 "idle", "user", "kernel", "wait",
149 /* these are for detailing the memory statistics */
150 long memory_stats
[7];
151 char *memorynames
[] = {
152 "K total, ", "K buf, ", "K sys, ", "K free", NULL
160 char *swapnames
[] = {
161 "K total, ", "K free", NULL
166 char *state_abbrev
[] = {
167 NULL
, NULL
, NULL
, NULL
, "idle", "zomb", "stop", "run", "swap"
170 /* sorting orders. first is default */
171 char *ordernames
[] = {
172 "cpu", "size", "res", "time", "pri", NULL
175 /* compare routines */
176 int compare_cpu(), compare_size(), compare_res(), compare_time(),
179 int (*proc_compares
[])() = {
188 /* useful externals */
189 long percentages(int cnt
, int *out
, long *new, long *old
, long *diffs
);
190 char *format_time(long seconds
);
194 int kmem
; /* file descriptor */
196 /* offsets in kernel */
197 static unsigned long avenrun_offset
;
198 static unsigned long sysinfo_offset
;
199 static unsigned long vmker_offset
;
200 static unsigned long v_offset
;
203 /* used for calculating cpu state percentages */
204 static long cp_time
[CPU_NTIMES
];
205 static long cp_old
[CPU_NTIMES
];
206 static long cp_diff
[CPU_NTIMES
];
208 /* the runqueue length is a cumulative value. keep old value */
212 struct kernvars v_info
; /* to determine nprocs */
213 int nprocs
; /* maximum nr of procs in proctab */
214 int ncpus
; /* nr of cpus installed */
216 struct procentry64
*p_info
; /* needed for vm and ru info */
217 struct procentry64
**pref
; /* processes selected for display */
218 struct timeval64
*cpu_proc
, *old_cpu_proc
; /* total cpu used by each process */
219 int pref_len
; /* number of processes selected */
221 /* needed to calculate WCPU */
222 unsigned long curtime
;
224 /* needed to calculate CPU */
225 struct timeval curtimeval
;
226 struct timeval lasttimeval
;
229 int getkval(unsigned long offset
, caddr_t ptr
, int size
, char *refstr
);
232 void *xmalloc(long size
)
234 void *p
= malloc(size
);
237 fprintf(stderr
,"Could not allocate %ld bytes: %s\n", size
, strerror(errno
));
244 * Initialize globals, get kernel offsets and stuff...
246 int machine_init(statics
)
247 struct statics
*statics
;
250 if ((kmem
= open(KMEM
, O_RDONLY
)) == -1) {
255 /* get kernel symbol offsets */
256 if (knlist(nlst
, 4, sizeof(struct nlist
)) != 0) {
260 avenrun_offset
= nlst
[X_AVENRUN
].n_value
;
261 sysinfo_offset
= nlst
[X_SYSINFO
].n_value
;
262 vmker_offset
= nlst
[X_VMKER
].n_value
;
263 v_offset
= nlst
[X_V
].n_value
;
265 getkval(v_offset
, (caddr_t
)&v_info
, sizeof v_info
, "v");
267 sysconfig(SYS_GETPARMS
, &v_info
, sizeof v_info
);
269 ncpus
= v_info
.v_ncpus
; /* number of cpus */
271 /* procentry64 is 4912 bytes, and PROCMASK(PIDMAX) is 262144. That'd
272 require 1.2gb for the p_info array, which is way overkill. Raise
273 MAXPROCS if you have more than 10240 active processes in the system.
277 nprocs
= PROCMASK(PIDMAX
);
282 cpu_proc
= (struct timeval64
*)xmalloc(PROCMASK(PIDMAX
) * sizeof (struct timeval64
));
283 old_cpu_proc
= (struct timeval64
*)xmalloc(PROCMASK(PIDMAX
) * sizeof (struct timeval64
));
284 p_info
= (struct procentry64
*)xmalloc(nprocs
* sizeof (struct procentry64
));
285 pref
= (struct procentry64
**)xmalloc(nprocs
* sizeof (struct procentry64
*));
287 statics
->procstate_names
= procstatenames
;
288 statics
->cpustate_names
= cpustatenames
;
289 statics
->memory_names
= memorynames
;
290 statics
->swap_names
= swapnames
;
291 statics
->order_names
= ordernames
;
296 char *format_header(uname_field
)
297 register char *uname_field
;
301 ptr
= header
+ UNAME_START
;
302 while (*uname_field
!= '\0')
304 *ptr
++ = *uname_field
++;
313 void get_system_info(si
)
314 struct system_info
*si
;
317 long long load_avg
[3];
318 struct sysinfo64 s_info
;
321 perfstat_memory_total_t m_info1
;
322 perfstat_cpu_total_t s_info1
;
328 /* get the load avarage array */
329 getkval(avenrun_offset
, (caddr_t
)load_avg
, sizeof load_avg
, "avenrun");
331 /* get the sysinfo structure */
332 getkval(sysinfo_offset
, (caddr_t
)&s_info
, sizeof s_info
, "sysinfo64");
334 /* get vmker structure */
335 getkval(vmker_offset
, (caddr_t
)&m_info
, sizeof m_info
, "vmker");
338 perfstat_cpu_total(NULL
, &s_info1
, sizeof s_info1
, 1);
341 perfstat_memory_total(NULL
, &m_info1
, sizeof m_info1
, 1);
346 /* convert load avarages to doubles */
347 for (i
= 0; i
< 3; i
++)
348 si
->load_avg
[i
] = (double)load_avg
[i
]/65536.0;
350 /* calculate cpu state in percentages */
351 for (i
= 0; i
< CPU_NTIMES
; i
++) {
352 cp_old
[i
] = cp_time
[i
];
353 cp_time
[i
] = s_info
.cpu
[i
];
354 cp_diff
[i
] = cp_time
[i
] - cp_old
[i
];
359 /* convert load avarages to doubles */
360 for (i
= 0; i
< 3; i
++)
361 si
->load_avg
[i
] = (double)s_info1
.loadavg
[i
]/(1<<SBITS
);
363 /* calculate cpu state in percentages */
364 for (i
= 0; i
< CPU_NTIMES
; i
++) {
365 cp_old
[i
] = cp_time
[i
];
366 cp_time
[i
] = ( i
==CPU_IDLE
?s_info1
.idle
:
367 i
==CPU_USER
?s_info1
.user
:
368 i
==CPU_KERNEL
?s_info1
.sys
:
369 i
==CPU_WAIT
?s_info1
.wait
:0);
370 cp_diff
[i
] = cp_time
[i
] - cp_old
[i
];
374 for (i
= 0; i
< CPU_NTIMES
; i
++) {
375 cpu_states
[i
] = 1000 * cp_diff
[i
] / total
;
378 /* calculate memory statistics, scale 4K pages */
380 #define PAGE_TO_MB(a) ((a)*4/1024)
381 memory_stats
[M_TOTAL
] = PAGE_TO_MB(m_info
.totalmem
+m_info
.totalvmem
);
382 memory_stats
[M_REAL
] = PAGE_TO_MB(m_info
.totalmem
);
383 memory_stats
[M_REALFREE
] = PAGE_TO_MB(m_info
.freemem
);
384 memory_stats
[M_BUFFERS
] = PAGE_TO_MB(m_info
.numperm
);
385 swap_stats
[M_VIRTUAL
] = PAGE_TO_MB(m_info
.totalvmem
);
386 swap_stats
[M_VIRTFREE
] = PAGE_TO_MB(m_info
.freevmem
);
388 #define PAGE_TO_KB(a) ((a)*4)
389 memory_stats
[M_REAL
] = PAGE_TO_KB(m_info1
.real_total
);
390 memory_stats
[M_BUFFERS
] = PAGE_TO_KB(m_info1
.numperm
);
391 #ifdef _AIXVERSION_520
392 memory_stats
[M_SYSTEM
] = PAGE_TO_KB(m_info1
.real_system
);
394 memory_stats
[M_REALFREE
] = PAGE_TO_KB(m_info1
.real_free
);
395 swap_stats
[M_VIRTUAL
] = PAGE_TO_KB(m_info1
.pgsp_total
);
396 swap_stats
[M_VIRTFREE
] = PAGE_TO_KB(m_info1
.pgsp_free
);
399 /* runnable processes */
401 process_states
[0] = s_info
.runque
- old_runque
;
402 old_runque
= s_info
.runque
;
404 process_states
[0] = s_info1
.runque
- old_runque
;
405 old_runque
= s_info1
.runque
;
408 si
->cpustates
= cpu_states
;
409 si
->memory
= memory_stats
;
410 si
->swap
= swap_stats
;
413 static struct handle handle
;
415 caddr_t
get_process_info(si
, sel
, compare_index
)
416 struct system_info
*si
;
417 struct process_select
*sel
;
421 int active_procs
= 0, total_procs
= 0;
422 struct procentry64
*pp
, **p_pref
= pref
;
423 struct timeval64
*cpu_proc_temp
;
425 pid_t procsindex
= 0;
427 si
->procstates
= process_states
;
430 lasttimeval
= curtimeval
;
431 gettimeofday(&curtimeval
, NULL
);
433 /* get the procentry64 structures of all running processes */
434 nproc
= getprocs64(p_info
, sizeof (struct procentry64
), NULL
, 0,
435 &procsindex
, nprocs
);
437 perror("getprocs64");
441 /* the swapper has no cmd-line attached */
442 strcpy(p_info
[0].pi_comm
, "swapper");
444 if (lasttimeval
.tv_sec
)
446 timediff
= (curtimeval
.tv_sec
- lasttimeval
.tv_sec
) +
447 1.0*(curtimeval
.tv_usec
- lasttimeval
.tv_usec
) / uS_PER_SECOND
;
450 /* The pi_cpu value is wildly inaccurate. The maximum value is 120, but
451 when the scheduling timer fires, the field is zeroed for all
452 processes and ramps up over a short period of time. Instead of using
453 this weird number, manually calculate an accurate value from the
454 rusage data. Store this run's rusage in cpu_proc[pid], and subtract
457 for (pp
= p_info
, i
= 0; i
< nproc
; pp
++, i
++) {
458 pid_t pid
= PROCMASK(pp
->pi_pid
);
460 /* total system and user time into cpu_proc */
461 cpu_proc
[pid
] = pp
->pi_ru
.ru_utime
;
462 cpu_proc
[pid
].tv_sec
+= pp
->pi_ru
.ru_stime
.tv_sec
;
463 cpu_proc
[pid
].tv_usec
+= pp
->pi_ru
.ru_stime
.tv_usec
;
464 if (cpu_proc
[pid
].tv_usec
> NS_PER_SEC
) {
465 cpu_proc
[pid
].tv_sec
++;
466 cpu_proc
[pid
].tv_usec
-= NS_PER_SEC
;
469 /* If this process was around during the previous update, calculate
470 a true %CPU. If not, convert the kernel's cpu value from its
471 120-max value to a 10000-max one.
473 if (old_cpu_proc
[pid
].tv_sec
== 0 && old_cpu_proc
[pid
].tv_usec
== 0)
474 pp
->pi_cpu
= pp
->pi_cpu
* 10000 / 120;
476 pp
->pi_cpu
= ((cpu_proc
[pid
].tv_sec
- old_cpu_proc
[pid
].tv_sec
) +
477 1.0*(cpu_proc
[pid
].tv_usec
- old_cpu_proc
[pid
].tv_usec
) / NS_PER_SEC
) / timediff
* 10000;
480 /* remember our current values as old_cpu_proc, and zero out cpu_proc
481 for the next update cycle */
482 memset(old_cpu_proc
, 0, sizeof(struct timeval64
) * nprocs
);
483 cpu_proc_temp
= cpu_proc
;
484 cpu_proc
= old_cpu_proc
;
485 old_cpu_proc
= cpu_proc_temp
;
487 memset(process_states
, 0, sizeof process_states
);
489 /* build a list of pointers to processes to show. */
490 for (pp
= p_info
, i
= 0; i
< nproc
; pp
++, i
++) {
492 /* AIX marks all runnable processes as ACTIVE. We want to know
493 which processes are sleeping, so check used cpu and adjust status
496 if (pp
->pi_state
== SACTIVE
&& pp
->pi_cpu
== 0)
499 if (pp
->pi_state
&& (sel
->system
|| ((pp
->pi_flags
& SKPROC
) == 0))) {
501 process_states
[pp
->pi_state
]++;
502 if ( (pp
->pi_state
!= SZOMB
) &&
503 (sel
->idle
|| pp
->pi_cpu
!= 0 || (pp
->pi_state
== SACTIVE
))
504 && (sel
->uid
== -1 || pp
->pi_uid
== (uid_t
)sel
->uid
)) {
511 /* the pref array now holds pointers to the procentry64 structures in
512 * the p_info array that were selected for display
515 /* sort if requested */
516 if ( proc_compares
[compare_index
] != NULL
)
517 qsort((char *)pref
, active_procs
, sizeof (struct procentry64
*),
518 proc_compares
[compare_index
]);
520 si
->last_pid
= -1; /* no way to figure out last used pid */
521 si
->p_total
= total_procs
;
522 si
->p_active
= pref_len
= active_procs
;
524 handle
.next_proc
= pref
;
525 handle
.remaining
= active_procs
;
527 return((caddr_t
)&handle
);
530 char fmt
[128]; /* static area where result is built */
532 /* define what weighted cpu is. use definition of %CPU from 'man ps(1)' */
533 #define weighted_cpu(pp) (PROCTIME(pp) == 0 ? 0.0 : \
534 (((PROCTIME(pp)*100.0)/(curtime-pi->pi_start))))
536 char *format_next_process(handle
, get_userid
)
538 char *(*get_userid
)();
540 register struct handle
*hp
;
541 register struct procentry64
*pi
;
543 int proc_size
, proc_ress
;
544 char size_unit
= 'K';
545 char ress_unit
= 'K';
547 hp
= (struct handle
*)handle
;
548 if (hp
->remaining
== 0) { /* safe guard */
552 pi
= *(hp
->next_proc
++);
555 cpu_time
= PROCTIME(pi
);
557 /* we disply sizes up to 10M in KiloBytes, beyond 10M in MegaBytes */
558 if ((proc_size
= (pi
->pi_tsize
/1024+pi
->pi_dvm
)*4) > 10240) {
562 if ((proc_ress
= (pi
->pi_trss
+ pi
->pi_drss
)*4) > 10240) {
567 sprintf(fmt
, Proc_format
,
568 pi
->pi_pid
, /* PID */
569 (*get_userid
)(pi
->pi_uid
), /* login name */
570 pi
->pi_nice
, /* fixed or vari */
571 getpriority(PRIO_PROCESS
, pi
->pi_pid
),
572 proc_size
, /* size */
573 size_unit
, /* K or M */
574 proc_ress
, /* resident */
575 ress_unit
, /* K or M */
576 state_abbrev
[pi
->pi_state
], /* process state */
577 format_time(cpu_time
), /* time used */
578 weighted_cpu(pi
), /* WCPU */
579 pi
->pi_cpu
/ 100.0, /* CPU */
580 printable(pi
->pi_comm
), /* COMM */
581 (pi
->pi_flags
& SKPROC
) == 0 ? "" : " (sys)" /* kernel process? */
588 * getkval(offset, ptr, size, refstr) - get a value out of the kernel.
589 * "offset" is the byte offset into the kernel for the desired value,
590 * "ptr" points to a buffer into which the value is retrieved,
591 * "size" is the size of the buffer (and the object to retrieve),
592 * "refstr" is a reference string used when printing error meessages,
593 * if "refstr" starts with a '!', then a failure on read will not
594 * be fatal (this may seem like a silly way to do things, but I
595 * really didn't want the overhead of another argument).
598 int getkval(offset
, ptr
, size
, refstr
)
599 unsigned long offset
;
606 /* reads above 2Gb are done by seeking to offset%2Gb, and supplying
607 * 1 (opposed to 0) as fourth parameter to readx (see 'man kmem')
609 if (offset
> 1<<31) {
611 offset
&= 0x7fffffff;
614 if (lseek(kmem
, offset
, SEEK_SET
) != offset
) {
615 fprintf(stderr
, "top: lseek failed\n");
619 if (readx(kmem
, ptr
, size
, upper_2gb
) != size
) {
623 fprintf(stderr
, "top: kvm_read for %s: %s\n", refstr
,
633 /* comparison routine for qsort */
635 * The following code is taken from the solaris module and adjusted
639 #define ORDERKEY_PCTCPU \
640 if ((result = pi2->pi_cpu - pi1->pi_cpu) == 0)
642 #define ORDERKEY_CPTICKS \
643 if ((result = PROCTIME(pi2) - PROCTIME(pi1)) == 0)
645 #define ORDERKEY_STATE \
646 if ((result = sorted_state[pi2->pi_state] \
647 - sorted_state[pi1->pi_state]) == 0)
649 /* Nice values directly reflect the process' priority, and are always >0 ;-) */
650 #define ORDERKEY_PRIO \
651 if ((result = pi1->pi_nice - pi2->pi_nice) == 0)
652 #define ORDERKEY_RSSIZE \
653 if ((result = PROCRESS(pi2) - PROCRESS(pi1)) == 0)
654 #define ORDERKEY_MEM \
655 if ((result = PROCSIZE(pi2) - PROCSIZE(pi1)) == 0)
657 static unsigned char sorted_state
[] =
670 /* compare_cpu - the comparison function for sorting by cpu percentage */
673 compare_cpu(ppi1
, ppi2
)
674 struct procentry64
**ppi1
;
675 struct procentry64
**ppi2
;
677 register struct procentry64
*pi1
= *ppi1
, *pi2
= *ppi2
;
692 /* compare_size - the comparison function for sorting by total memory usage */
695 compare_size(ppi1
, ppi2
)
696 struct procentry64
**ppi1
;
697 struct procentry64
**ppi2
;
699 register struct procentry64
*pi1
= *ppi1
, *pi2
= *ppi2
;
714 /* compare_res - the comparison function for sorting by resident set size */
717 compare_res(ppi1
, ppi2
)
718 struct procentry64
**ppi1
;
719 struct procentry64
**ppi2
;
721 register struct procentry64
*pi1
= *ppi1
, *pi2
= *ppi2
;
736 /* compare_time - the comparison function for sorting by total cpu time */
739 compare_time(ppi1
, ppi2
)
740 struct procentry64
**ppi1
;
741 struct procentry64
**ppi2
;
743 register struct procentry64
*pi1
= *ppi1
, *pi2
= *ppi2
;
758 /* compare_prio - the comparison function for sorting by cpu percentage */
761 compare_prio(ppi1
, ppi2
)
762 struct procentry64
**ppi1
;
763 struct procentry64
**ppi2
;
765 register struct procentry64
*pi1
= *ppi1
, *pi2
= *ppi2
;
783 register struct procentry64
**prefp
= pref
;
784 register int cnt
= pref_len
;
787 if ((*prefp
)->pi_pid
== pid
)
788 return (*prefp
)->pi_uid
;