Merge remote-tracking branch 'origin/master'
[unleashed/lotheac.git] / usr / src / lib / madv / common / madv.c
blobd61bcbfb39630c727fc33e64225ca326ea5a07ed
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <strings.h>
29 #include <sys/shm.h>
30 #include <sys/mman.h>
31 #include <fcntl.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <errno.h>
35 #include <sys/types.h>
36 #include <sys/stat.h>
37 #include <sys/auxv.h>
38 #include <stdarg.h>
39 #include <syslog.h>
40 #include <sys/param.h>
41 #include <sys/sysmacros.h>
42 #include <procfs.h>
43 #include <dlfcn.h>
44 #include <assert.h>
45 #include <libintl.h>
46 #include <locale.h>
48 extern int gmatch(const char *s, const char *p);
50 #pragma init(__madvmain)
52 static FILE *errfp = NULL;
53 static const char *madvident = "madv.so.1";
54 static int pagesize;
55 static int advice_all = -1;
56 static int advice_heap = -1;
57 static int advice_shm = -1;
58 static int advice_ism = -1;
59 static int advice_dism = -1;
60 static int advice_map = -1;
61 static int advice_mapshared = -1;
62 static int advice_mapprivate = -1;
63 static int advice_mapanon = -1;
65 /* environment variables */
67 #define ENV_MADV "MADV"
68 #define ENV_MADVCFGFILE "MADVCFGFILE"
69 #define ENV_MADVERRFILE "MADVERRFILE"
71 /* config file */
73 #define DEF_MADVCFGFILE "/etc/madv.conf"
74 #define MAXLINELEN MAXPATHLEN + 64
75 #define CFGDELIMITER ':'
76 #define ARGDELIMITER ' '
79 * avoid malloc which causes certain applications to crash
81 static char lbuf[MAXLINELEN];
82 static char pbuf[MAXPATHLEN];
84 #ifdef MADVDEBUG
85 #define ENV_MADVDEBUG "MADVDEBUG"
86 #define MADVPRINT(x, y) if (madvdebug & x) (void) fprintf y;
88 static int madvdebug = 0;
89 #else
90 #define MADVPRINT(x, y)
91 #endif
94 * advice options
96 static char *legal_optstr[] = {
97 "madv",
98 "heap",
99 "shm",
100 "ism",
101 "dism",
102 "map",
103 "mapshared",
104 "mapprivate",
105 "mapanon",
106 NULL
109 enum optenum {
110 OPT_MADV,
111 OPT_HEAP,
112 OPT_SHM,
113 OPT_ISM,
114 OPT_DISM,
115 OPT_MAP,
116 OPT_MAPSHARED,
117 OPT_MAPPRIVATE,
118 OPT_MAPANON
122 * Advice values
123 * These need to correspond to the order of the MADV_ flags in mman.h
124 * since the position infers the value for the flag.
126 static char *legal_madvice[] = {
127 "normal",
128 "random",
129 "sequential",
130 "willneed_NOT_SUPPORTED!",
131 "dontneed_NOT_SUPPORTED!",
132 "free_NOT_SUPPORTED!",
133 "access_default",
134 "access_lwp",
135 "access_many",
136 NULL
139 #if !defined(TEXT_DOMAIN)
140 #define TEXT_DOMAIN "SYS_TEST"
141 #endif
143 /*PRINTFLIKE2*/
144 static void
145 madverr(FILE *fp, char *fmt, ...)
147 va_list ap;
148 va_start(ap, fmt);
149 if (fp)
150 (void) vfprintf(fp, fmt, ap);
151 else
152 vsyslog(LOG_ERR, fmt, ap);
153 va_end(ap);
157 * Return the pointer to the fully-resolved path name of the process's
158 * executable file obtained from the AT_SUN_EXECNAME aux vector entry.
160 static const char *
161 mygetexecname(void)
163 const char *execname = NULL;
164 static auxv_t auxb;
167 * The first time through, read the initial aux vector that was
168 * passed to the process at exec(2). Only do this once.
170 int fd = open("/proc/self/auxv", O_RDONLY);
172 if (fd >= 0) {
173 while (read(fd, &auxb, sizeof (auxv_t)) == sizeof (auxv_t)) {
174 if (auxb.a_type == AT_SUN_EXECNAME) {
175 execname = auxb.a_un.a_ptr;
176 break;
179 (void) close(fd);
181 return (execname);
185 * Return the process's current brk base and size.
187 static int
188 mygetbrk(uintptr_t *base, size_t *size)
190 int fd;
191 pstatus_t ps;
192 int rc;
194 fd = open("/proc/self/status", O_RDONLY);
196 if (fd >= 0) {
197 if (read(fd, &ps, sizeof (ps)) == sizeof (ps)) {
198 *base = ps.pr_brkbase;
199 *size = ps.pr_brksize;
200 rc = 0;
201 } else {
202 rc = errno;
204 (void) close(fd);
205 } else {
206 rc = errno;
208 return (rc);
212 * Check if exec name matches cfgname found in madv cfg file.
214 static int
215 fnmatch(const char *execname, char *cfgname, char *cwd)
217 const char *ename;
218 int rc;
220 /* cfgname should not have a '/' unless it begins with one */
221 if (cfgname[0] == '/') {
223 * if execname does not begin with a '/', prepend the
224 * current directory.
226 if (execname[0] != '/') {
227 ename = (const char *)strcat(cwd, execname);
228 } else
229 ename = execname;
230 } else { /* simple cfg name */
231 if (ename = strrchr(execname, '/'))
232 /* execname is a path name - get the base name */
233 ename++;
234 else
235 ename = execname;
237 rc = gmatch(ename, cfgname);
238 MADVPRINT(2, (stderr, "gmatch: %s %s %s %d\n",
239 cfgname, ename, execname, rc));
241 return (rc);
245 * Check if string matches any of exec arguments.
247 static int
248 argmatch(char *str)
250 int fd;
251 psinfo_t pi;
252 int rc = 0;
253 int arg;
254 char **argv;
256 fd = open("/proc/self/psinfo", O_RDONLY);
258 if (fd >= 0) {
259 if (read(fd, &pi, sizeof (pi)) == sizeof (pi)) {
260 argv = (char **)pi.pr_argv;
261 argv++;
262 MADVPRINT(2, (stderr, "argmatch: %s ", str));
263 for (arg = 1; arg < pi.pr_argc; arg++, argv++) {
264 if (rc = gmatch(*argv, str)) {
265 MADVPRINT(2, (stderr, "%s ", *argv));
266 break;
269 MADVPRINT(2, (stderr, "%d\n", rc));
270 } else {
271 madverr(errfp, dgettext(TEXT_DOMAIN,
272 "%s: /proc/self/psinfo read failed [%s]\n"),
273 madvident, strerror(errno));
275 (void) close(fd);
276 } else {
277 madverr(errfp, dgettext(TEXT_DOMAIN,
278 "%s: /proc/self/psinfo open failed [%s]\n"),
279 madvident, strerror(errno));
281 return (rc);
284 static int
285 empty(char *str)
287 char c;
289 while ((c = *str) == '\n' || c == ' ' || c == '\t')
290 str++;
291 return (*str == '\0');
294 static int
295 strtoadv(char *advstr)
297 char *dummy, *locstr = advstr;
299 return (getsubopt(&locstr, legal_madvice, &dummy));
302 static void
303 advice_opts(char *optstr, const char *execname, char *cfgfile, int lineno)
305 char *value;
306 int opt;
307 int advice = 0;
309 while (*optstr != '\0') {
310 opt = getsubopt(&optstr, legal_optstr, &value);
311 if (opt < 0) {
312 madverr(errfp, dgettext(TEXT_DOMAIN,
313 "%s: invalid advice option (%s)"
314 " for %s - cfgfile: %s, line: %d\n"),
315 madvident, value, execname, cfgfile, lineno);
316 break;
317 } else if (!value) {
318 madverr(errfp, dgettext(TEXT_DOMAIN,
319 "%s: option missing advice"
320 " for %s - cfgfile: %s, line: %d\n"),
321 madvident, execname, cfgfile, lineno);
322 break;
324 advice = strtoadv(value);
325 if (advice < 0) {
326 madverr(errfp, dgettext(TEXT_DOMAIN,
327 "%s: invalid advice specified (%s)"
328 " for %s - cfgfile: %s, line: %d\n"),
329 madvident, value, execname, cfgfile, lineno);
330 break;
332 switch (opt) {
333 case OPT_MADV:
334 advice_all = advice;
335 break;
336 case OPT_HEAP:
337 if (advice_heap < 0) {
338 advice_heap = advice;
339 } else {
340 madverr(errfp, dgettext(TEXT_DOMAIN,
341 "%s: duplicate advice specified "
342 "(%s) for %s - cfgfile: %s, line: %d\n"),
343 madvident, value, execname, cfgfile,
344 lineno);
346 break;
347 case OPT_SHM:
348 if (advice_shm < 0) {
349 advice_shm = advice;
350 } else {
351 madverr(errfp, dgettext(TEXT_DOMAIN,
352 "%s: duplicate advice specified "
353 "(%s) for %s - cfgfile: %s, line: %d\n"),
354 madvident, value, execname, cfgfile,
355 lineno);
357 break;
358 case OPT_ISM:
359 if (advice_ism < 0) {
360 advice_ism = advice;
361 } else {
362 madverr(errfp, dgettext(TEXT_DOMAIN,
363 "%s: duplicate advice specified "
364 "(%s) for %s - cfgfile: %s, line: %d\n"),
365 madvident, value, execname, cfgfile,
366 lineno);
368 break;
369 case OPT_DISM:
370 if (advice_dism < 0) {
371 advice_dism = advice;
372 } else {
373 madverr(errfp, dgettext(TEXT_DOMAIN,
374 "%s: duplicate advice specified "
375 "(%s) for %s - cfgfile: %s, line: %d\n"),
376 madvident, value, execname, cfgfile,
377 lineno);
379 break;
380 case OPT_MAP:
381 if (advice_map < 0) {
382 advice_map = advice;
383 } else {
384 madverr(errfp, dgettext(TEXT_DOMAIN,
385 "%s: duplicate advice specified "
386 "(%s) for %s - cfgfile: %s, line: %d\n"),
387 madvident, value, execname, cfgfile,
388 lineno);
390 break;
391 case OPT_MAPSHARED:
392 if (advice_mapshared < 0) {
393 advice_mapshared = advice;
394 } else {
395 madverr(errfp, dgettext(TEXT_DOMAIN,
396 "%s: duplicate advice specified "
397 "(%s) for %s - cfgfile: %s, line: %d\n"),
398 madvident, value, execname, cfgfile,
399 lineno);
401 break;
402 case OPT_MAPPRIVATE:
403 if (advice_mapprivate < 0) {
404 advice_mapprivate = advice;
405 } else {
406 madverr(errfp, dgettext(TEXT_DOMAIN,
407 "%s: duplicate advice specified "
408 "(%s) for %s - cfgfile: %s, line: %d\n"),
409 madvident, value, execname, cfgfile,
410 lineno);
412 break;
413 case OPT_MAPANON:
414 if (advice_mapanon < 0) {
415 advice_mapanon = advice;
416 } else {
417 madverr(errfp, dgettext(TEXT_DOMAIN,
418 "%s: duplicate advice specified "
419 "(%s) for %s - cfgfile: %s, line: %d\n"),
420 madvident, value, execname, cfgfile,
421 lineno);
423 break;
424 default:
425 madverr(errfp, dgettext(TEXT_DOMAIN,
426 "%s: invalid advice option (%s)"
427 " for %s - cfgfile: %s, line: %d\n"),
428 madvident, value, execname, cfgfile, lineno);
429 break;
434 static void
435 __madvmain()
437 char *cfgfile, *errfile;
438 FILE *fp = NULL;
439 const char *execname;
440 char *cwd;
441 int cwdlen;
442 char *tok, *tokadv, *tokarg;
443 char *str, *envadv;
444 int lineno = 0;
445 int advice;
446 uintptr_t brkbase, brkend;
447 size_t brksize;
448 int rc;
449 char *locale;
452 * If a private error file is indicated then set the locale
453 * for error messages for the duration of this routine.
454 * Error messages destined for syslog should not be translated
455 * and thus come from the default C locale.
457 if ((errfile = getenv(ENV_MADVERRFILE)) != NULL) {
458 errfp = fopen(errfile, "aF");
459 if (errfp) {
460 locale = setlocale(LC_MESSAGES, "");
461 } else {
462 madverr(NULL, dgettext(TEXT_DOMAIN,
463 "%s: cannot open error file: %s [%s]\n"),
464 madvident, errfile, strerror(errno));
468 #ifdef MADVDEBUG
469 if (str = getenv(ENV_MADVDEBUG))
470 madvdebug = atoi(str);
471 #endif
473 if (envadv = getenv(ENV_MADV)) {
474 if ((advice = strtoadv(envadv)) >= 0)
475 advice_all = advice;
476 else
477 madverr(errfp, dgettext(TEXT_DOMAIN,
478 "%s: invalid advice specified: MADV=%s\n"),
479 madvident, envadv);
483 * Open specified cfg file or default one.
485 if (cfgfile = getenv(ENV_MADVCFGFILE)) {
486 fp = fopen(cfgfile, "rF");
487 if (!fp) {
488 madverr(errfp, dgettext(TEXT_DOMAIN,
489 "%s: cannot open configuration file: %s [%s]\n"),
490 madvident, cfgfile, strerror(errno));
492 } else {
493 cfgfile = DEF_MADVCFGFILE;
494 fp = fopen(cfgfile, "rF");
497 if (fp) {
498 execname = mygetexecname();
500 cwd = getcwd(pbuf, MAXPATHLEN);
501 if (!cwd)
502 return;
504 cwd = strcat(cwd, "/");
505 cwdlen = strlen(cwd);
507 while (fgets(lbuf, MAXLINELEN, fp)) {
508 lineno++;
511 * Make sure line wasn't truncated.
513 if (strlen(lbuf) >= MAXLINELEN - 1) {
514 madverr(errfp, dgettext(TEXT_DOMAIN,
515 "%s: invalid entry, "
516 "line too long - cfgfile:"
517 " %s, line: %d\n"),
518 madvident, cfgfile, lineno);
519 continue;
522 if (empty(lbuf))
523 continue;
526 * Get advice options.
527 * Parse right to left in case delimiter is in name.
529 if (!(tokadv = strrchr(lbuf, CFGDELIMITER))) {
530 madverr(errfp, dgettext(TEXT_DOMAIN,
531 "%s: no delimiter specified - cfgfile:"
532 " %s, line: %d\n"),
533 madvident, cfgfile, lineno);
534 continue;
536 *tokadv++ = '\0';
539 * Remove newline from end of advice options.
541 if (str = strrchr(tokadv, '\n'))
542 *str = '\0';
545 * Get optional argument string.
547 if (tokarg = strrchr(lbuf, ARGDELIMITER)) {
548 *tokarg++ = '\0';
552 * Compare exec name.
554 tok = lbuf;
555 if (!fnmatch(execname, tok, cwd)) {
556 tokadv = tokarg = NULL;
557 cwd[cwdlen] = '\0';
558 continue;
562 * Compare arguments if argument string specified.
564 if (tokarg &&
565 !empty(tokarg) &&
566 !argmatch(tokarg)) {
567 tokadv = tokarg = NULL;
568 cwd[cwdlen] = '\0';
569 continue;
573 * Parse advice options.
574 * If empty, any advice from ENV_MADV is reset.
576 if (empty(tokadv)) {
577 advice_all = -1;
578 } else {
579 advice_opts(tokadv, execname, cfgfile, lineno);
581 break;
583 (void) fclose(fp);
587 * Pagesize needed for proper aligning by brk interpose.
589 pagesize = sysconf(_SC_PAGESIZE);
592 * Apply global advice if set.
593 * Specific options in the cfgfile take precedence.
595 if (advice_all >= 0) {
596 if (advice_heap < 0)
597 advice_heap = advice_all;
598 if (advice_shm < 0)
599 advice_shm = advice_all;
600 if (advice_map < 0)
601 advice_map = advice_all;
604 MADVPRINT(2, (stderr, "advice_all %d\n", advice_all));
605 MADVPRINT(2, (stderr, "advice_heap %d\n", advice_heap));
606 MADVPRINT(2, (stderr, "advice_shm %d\n", advice_shm));
607 MADVPRINT(2, (stderr, "advice_ism %d\n", advice_ism));
608 MADVPRINT(2, (stderr, "advice_dism %d\n", advice_dism));
609 MADVPRINT(2, (stderr, "advice_map %d\n", advice_map));
610 MADVPRINT(2, (stderr, "advice_mapshared %d\n", advice_mapshared));
611 MADVPRINT(2, (stderr, "advice_mapprivate %d\n", advice_mapprivate));
612 MADVPRINT(2, (stderr, "advice_mapanon %d\n", advice_mapanon));
615 * If heap advice is specified, apply it to the existing heap.
616 * As the heap grows the kernel applies the advice automatically
617 * to new portions of the heap.
619 if (advice_heap >= 0) {
620 if (rc = mygetbrk(&brkbase, &brksize)) {
621 madverr(errfp, dgettext(TEXT_DOMAIN,
622 "%s: /proc/self/status read failed [%s]\n"),
623 madvident, strerror(rc));
624 } else {
625 MADVPRINT(4, (stderr, "brkbase 0x%x brksize 0x%x\n",
626 brkbase, brksize));
628 * Align start address for memcntl and apply advice
629 * on full pages of heap. Create a page of heap if
630 * it does not already exist.
632 brkend = roundup(brkbase+brksize, pagesize);
633 brkbase = roundup(brkbase, pagesize);
634 brksize = brkend - brkbase;
635 if (brksize < pagesize) {
636 if (sbrk(pagesize) == (void *)-1) {
637 madverr(errfp, dgettext(TEXT_DOMAIN,
638 "%s: sbrk failed [%s]\n"),
639 madvident, strerror(errno));
640 goto out;
642 brksize = pagesize;
644 MADVPRINT(1, (stderr, "heap advice: 0x%x 0x%x %d\n",
645 brkbase, brksize, advice_heap));
646 if (memcntl((caddr_t)brkbase, brksize, MC_ADVISE,
647 (caddr_t)(intptr_t)advice_heap, 0, 0) < 0) {
648 madverr(errfp, dgettext(TEXT_DOMAIN,
649 "%s: memcntl() failed [%s]: heap advice\n"),
650 madvident, strerror(errno));
654 out:
655 if (errfp) {
656 (void) fclose(errfp);
657 (void) setlocale(LC_MESSAGES, locale);
658 } else {
659 /* close log file: no-op if nothing logged to syslog */
660 closelog();
666 * shmat interpose
668 void *
669 shmat(int shmid, const void *shmaddr, int shmflag)
671 static caddr_t (*shmatfunc)() = NULL;
672 void *result;
673 int advice = -1;
674 struct shmid_ds mds;
675 #ifdef MADVDEBUG
676 int rc;
677 #endif
679 if (!shmatfunc) {
680 shmatfunc = (caddr_t (*)()) dlsym(RTLD_NEXT, "shmat");
681 assert(shmatfunc);
684 result = shmatfunc(shmid, shmaddr, shmflag);
687 * Options ism, dism take precedence over option shm.
689 if (advice_ism >= 0 && (shmflag & SHM_SHARE_MMU)) {
690 advice = advice_ism;
691 } else if (advice_dism >= 0 && (shmflag & SHM_PAGEABLE)) {
692 advice = advice_dism;
693 } else if (advice_shm >= 0) {
694 advice = advice_shm;
698 * Apply advice if specified and shmat succeeded.
700 if (advice >= 0 && result != (void *)-1) {
701 #ifdef MADVDEBUG
702 /* First determine segment size */
703 rc = shmctl(shmid, IPC_STAT, &mds);
704 MADVPRINT(4, (stderr, "shmctl rc %d errno %d\n", rc, errno));
705 rc = memcntl(result, mds.shm_segsz, MC_ADVISE,
706 (caddr_t)(intptr_t)advice, 0, 0);
707 MADVPRINT(1, (stderr,
708 "shmat advice: 0x%x 0x%x %d, rc %d errno %d\n",
709 result, mds.shm_segsz, advice, rc, errno));
710 #else
711 /* First determine segment size */
712 (void) shmctl(shmid, IPC_STAT, &mds);
713 (void) memcntl(result, mds.shm_segsz, MC_ADVISE,
714 (caddr_t)(intptr_t)advice, 0, 0);
715 #endif
718 return (result);
721 #ifdef _LP64
723 * mmap interpose
725 void *
726 mmap(void *addr, size_t len, int prot, int flags, int fd, off_t pos)
728 static void *(*mmapfunc)();
729 void *result;
730 int advice = -1;
732 if (!mmapfunc) {
733 mmapfunc = dlsym(RTLD_NEXT, "mmap");
734 assert(mmapfunc);
737 result = mmapfunc(addr, len, prot, flags, fd, pos);
740 * Option mapanon has highest precedence while option map
741 * has lowest precedence.
743 if (advice_mapanon >= 0 && (flags & MAP_ANON)) {
744 advice = advice_mapanon;
745 } else if (advice_mapshared >= 0 && (flags & MAP_SHARED)) {
746 advice = advice_mapshared;
747 } else if (advice_mapprivate >= 0 && (flags & MAP_PRIVATE)) {
748 advice = advice_mapprivate;
749 } else if (advice_map >= 0) {
750 advice = advice_map;
754 * Apply advice if specified and mmap succeeded.
756 if (advice >= 0 && result != MAP_FAILED) {
757 #ifdef MADVDEBUG
758 int rc;
760 rc = memcntl(result, len, MC_ADVISE,
761 (caddr_t)(intptr_t)advice, 0, 0);
762 MADVPRINT(1, (stderr,
763 "mmap advice: %p 0x%x %d, rc %d errno %d\n",
764 result, len, advice, rc, errno));
765 #else
766 (void) memcntl(result, len, MC_ADVISE,
767 (caddr_t)(intptr_t)advice, 0, 0);
768 #endif
771 return (result);
774 #else
776 * mmap64 interpose
778 void *
779 mmap(void *addr, size_t len, int prot, int flags, int fd, off64_t pos)
781 static void *(*mmap64func)();
782 void *result;
783 int advice = -1;
785 if (!mmap64func) {
786 mmap64func = dlsym(RTLD_NEXT, "mmap64");
787 assert(mmap64func);
790 result = mmap64func(addr, len, prot, flags, fd, pos);
793 * Option mapanon has highest precedence while option map
794 * has lowest precedence.
796 if (advice_mapanon >= 0 && (flags & MAP_ANON)) {
797 advice = advice_mapanon;
798 } else if (advice_mapshared >= 0 && (flags & MAP_SHARED)) {
799 advice = advice_mapshared;
800 } else if (advice_mapprivate >= 0 && (flags & MAP_PRIVATE)) {
801 advice = advice_mapprivate;
802 } else if (advice_map >= 0) {
803 advice = advice_map;
807 * Apply advice if specified and mmap succeeded.
809 if (advice >= 0 && result != MAP_FAILED) {
810 #ifdef MADVDEBUG
811 int rc;
813 rc = memcntl(result, len, MC_ADVISE, (caddr_t)advice, 0, 0);
814 MADVPRINT(1, (stderr,
815 "mmap64 advice: %p 0x%x %d, rc %d errno %d\n",
816 result, len, advice, rc, errno));
817 #else
818 (void) memcntl(result, len, MC_ADVISE, (caddr_t)advice, 0, 0);
819 #endif
822 return (result);
824 #endif /* _LP64 */