8354 sync regcomp(3C) with upstream (fix make catalog)
[unleashed/tickless.git] / usr / src / cmd / sgs / ld / common / ld.c
blob794f5e5e0984311588dd582f21b3707930502a76
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <unistd.h>
29 #include <stdarg.h>
30 #include <string.h>
31 #include <strings.h>
32 #include <errno.h>
33 #include <fcntl.h>
34 #include <libintl.h>
35 #include <locale.h>
36 #include <fcntl.h>
37 #include <ar.h>
38 #include <gelf.h>
39 #include "conv.h"
40 #include "libld.h"
41 #include "machdep.h"
42 #include "msg.h"
45 * The following prevent us from having to include ctype.h which defines these
46 * functions as macros which reference the __ctype[] array. Go through .plt's
47 * to get to these functions in libc rather than have every invocation of ld
48 * have to suffer the R_SPARC_COPY overhead of the __ctype[] array.
50 extern int isspace(int);
53 * We examine ELF objects, and archives containing ELF objects, in order
54 * to determine the ELFCLASS of the resulting object and/or the linker to be
55 * used. We want to avoid the overhead of libelf for this, at least until
56 * we are certain that we need it, so we start by reading bytes from
57 * the beginning of the file. This type defines the buffer used to read
58 * these initial bytes.
60 * A plain ELF object will start with an ELF header, whereas an archive
61 * starts with a magic string (ARMAG) that is SARMAG bytes long. Any valid
62 * ELF file or archive will contain more bytes than this buffer, so any
63 * file shorter than this can be safely assummed not to be of interest.
65 * The ELF header for ELFCLASS32 and ELFCLASS64 are identical up through the
66 * the e_version field, and all the information we require is found in this
67 * common prefix. Furthermore, this cannot change, as the layout of an ELF
68 * header is fixed by the ELF ABI. Hence, the ehdr part of this union is
69 * not a full ELF header, but only the class-independent prefix that we need.
71 * As this is a raw (non-libelf) read, we are responsible for handling any
72 * byte order difference between the object and the system running this
73 * program when we read any datum larger than a byte (i.e. e_machine) from
74 * this header.
76 typedef union {
77 struct { /* Must match start of ELFxx_Ehdr in <sys/elf.h> */
78 uchar_t e_ident[EI_NIDENT]; /* ident bytes */
79 Half e_type; /* file type */
80 Half e_machine; /* target machine */
81 } ehdr;
82 char armag[SARMAG];
83 } FILE_HDR;
87 * Print a message to stdout
89 void
90 veprintf(Lm_list *lml, Error error, const char *format, va_list args)
92 static const char *strings[ERR_NUM];
94 #if defined(lint)
96 * The lml argument is only meaningful for diagnostics sent to ld.so.1.
97 * Supress the lint error by making a dummy assignment.
99 lml = 0;
100 #endif
102 * For error types we issue a prefix for, make sure the necessary
103 * string has been internationalized and is ready.
105 switch (error) {
106 case ERR_WARNING_NF:
107 if (strings[ERR_WARNING_NF] == NULL)
108 strings[ERR_WARNING_NF] = MSG_INTL(MSG_ERR_WARNING);
109 break;
110 case ERR_WARNING:
111 if (strings[ERR_WARNING] == NULL)
112 strings[ERR_WARNING] = MSG_INTL(MSG_ERR_WARNING);
113 break;
114 case ERR_GUIDANCE:
115 if (strings[ERR_GUIDANCE] == NULL)
116 strings[ERR_GUIDANCE] = MSG_INTL(MSG_ERR_GUIDANCE);
117 break;
118 case ERR_FATAL:
119 if (strings[ERR_FATAL] == NULL)
120 strings[ERR_FATAL] = MSG_INTL(MSG_ERR_FATAL);
121 break;
122 case ERR_ELF:
123 if (strings[ERR_ELF] == NULL)
124 strings[ERR_ELF] = MSG_INTL(MSG_ERR_ELF);
127 /* If strings[] element for our error type is non-NULL, issue prefix */
128 if (strings[error] != NULL) {
129 (void) fputs(MSG_ORIG(MSG_STR_LDDIAG), stderr);
130 (void) fputs(strings[error], stderr);
133 (void) vfprintf(stderr, format, args);
134 if (error == ERR_ELF) {
135 int elferr;
137 if ((elferr = elf_errno()) != 0)
138 (void) fprintf(stderr, MSG_ORIG(MSG_STR_ELFDIAG),
139 elf_errmsg(elferr));
141 (void) fprintf(stderr, MSG_ORIG(MSG_STR_NL));
142 (void) fflush(stderr);
147 * Print a message to stdout
149 /* VARARGS3 */
150 void
151 eprintf(Lm_list *lml, Error error, const char *format, ...)
153 va_list args;
155 va_start(args, format);
156 veprintf(lml, error, format, args);
157 va_end(args);
162 * Examine the first object in an archive to determine its ELFCLASS
163 * and machine type.
165 * entry:
166 * fd - Open file descriptor for file
167 * elf - libelf ELF descriptor
168 * class_ret, mach_ret - Address of variables to receive ELFCLASS
169 * and machine type.
171 * exit:
172 * On success, *class_ret and *mach_ret are filled in, and True (1)
173 * is returned. On failure, False (0) is returned.
175 static int
176 archive(int fd, Elf *elf, uchar_t *class_ret, Half *mach_ret)
178 Elf_Cmd cmd = ELF_C_READ;
179 Elf_Arhdr *arhdr;
180 Elf *_elf = NULL;
181 int found = 0;
184 * Process each item within the archive until we find the first
185 * ELF object, or alternatively another archive to recurse into.
186 * Stop after analyzing the first plain object found.
188 while (!found && ((_elf = elf_begin(fd, cmd, elf)) != NULL)) {
189 if ((arhdr = elf_getarhdr(_elf)) == NULL)
190 return (0);
191 if (*arhdr->ar_name != '/') {
192 switch (elf_kind(_elf)) {
193 case ELF_K_AR:
194 found = archive(fd, _elf, class_ret, mach_ret);
195 break;
196 case ELF_K_ELF:
197 if (gelf_getclass(_elf) == ELFCLASS64) {
198 Elf64_Ehdr *ehdr;
200 if ((ehdr = elf64_getehdr(_elf)) ==
201 NULL)
202 break;
203 *class_ret = ehdr->e_ident[EI_CLASS];
204 *mach_ret = ehdr->e_machine;
205 } else {
206 Elf32_Ehdr *ehdr;
208 if ((ehdr = elf32_getehdr(_elf)) ==
209 NULL)
210 break;
211 *class_ret = ehdr->e_ident[EI_CLASS];
212 *mach_ret = ehdr->e_machine;
214 found = 1;
215 break;
219 cmd = elf_next(_elf);
220 (void) elf_end(_elf);
223 return (found);
227 * Determine:
228 * - ELFCLASS of resulting object (class)
229 * - Whether user specified class of the linker (ldclass)
230 * - ELF machine type of resulting object (m_mach)
232 * In order of priority, we determine this information as follows:
234 * - Command line options (-32, -64, -z altexec64, -z target).
235 * - From the first plain object seen on the command line. (This is
236 * by far the most common case.)
237 * - From the first object contained within the first archive
238 * on the command line.
239 * - If all else fails, we assume a 32-bit object for the native machine.
241 * entry:
242 * argc, argv - Command line argument vector
243 * class_ret - Address of variable to receive ELFCLASS of output object
244 * ldclass_ret - Address of variable to receive ELFCLASS of
245 * linker to use. This will be ELFCLASS32/ELFCLASS64 if one
246 * is explicitly specified, and ELFCLASSNONE otherwise.
247 * ELFCLASSNONE therefore means that we should use the best
248 * link-editor that the system/kernel will allow.
250 static int
251 process_args(int argc, char **argv, uchar_t *class_ret, uchar_t *ldclass_ret,
252 Half *mach)
254 uchar_t ldclass = ELFCLASSNONE, class = ELFCLASSNONE, ar_class;
255 Half mach32 = EM_NONE, mach64 = EM_NONE, ar_mach;
256 int c, ar_found = 0;
259 * In general, libld.so is responsible for processing the
260 * command line options. The exception to this are those options
261 * that contain information about which linker to run and the
262 * class/machine of the output object. We examine the options
263 * here looking for the following:
265 * -32 Produce an ELFCLASS32 object. This is the default, so
266 * -32 is only needed when linking entirely from archives,
267 * and the first archive contains a mix of 32 and 64-bit
268 * objects, and the first object in that archive is 64-bit.
269 * We do not expect this option to get much use, but it
270 * ensures that the user can handle any situation.
272 * -64 Produce an ELFCLASS64 object. (Note that this will
273 * indirectly cause the use of the 64-bit linker if
274 * the system is 64-bit capable). The most common need
275 * for this option is when linking a filter object entirely
276 * from a mapfile. The less common case is when linking
277 * entirely from archives, and the first archive contains
278 * a mix of 32 and 64-bit objects, and the first object
279 * in that archive is 32-bit.
281 * -z altexec64
282 * Use the 64-bit linker regardless of the class
283 * of the output object.
285 * -z target=platform
286 * Produce output object for the specified platform.
287 * This option is needed when producing an object
288 * for a non-native target entirely from a mapfile,
289 * or when linking entirely from an archive containing
290 * objects for multiple targets, and the first object
291 * in the archive is not for the desired target.
293 * If we've already processed an object and we find -32/-64, and
294 * the object is of the wrong class, we have an error condition.
295 * We ignore it here, and let it fall through to libld, where the
296 * proper diagnosis and error message will occur.
298 opterr = 0;
299 optind = 1;
300 getmore:
301 while ((c = ld_getopt(0, optind, argc, argv)) != -1) {
302 switch (c) {
303 case '3':
304 if (strncmp(optarg, MSG_ORIG(MSG_ARG_TWO),
305 MSG_ARG_TWO_SIZE) == 0)
306 class = ELFCLASS32;
307 break;
309 case '6':
310 if (strncmp(optarg, MSG_ORIG(MSG_ARG_FOUR),
311 MSG_ARG_FOUR_SIZE) == 0)
312 class = ELFCLASS64;
313 break;
315 case 'z':
316 #if !defined(_LP64)
317 /* -z altexec64 */
318 if (strncmp(optarg, MSG_ORIG(MSG_ARG_ALTEXEC64),
319 MSG_ARG_ALTEXEC64_SIZE) == 0) {
320 ldclass = ELFCLASS64;
321 break;
323 #endif
324 /* -z target=platform */
325 if (strncmp(optarg, MSG_ORIG(MSG_ARG_TARGET),
326 MSG_ARG_TARGET_SIZE) == 0) {
327 char *pstr = optarg + MSG_ARG_TARGET_SIZE;
329 if (strcasecmp(pstr,
330 MSG_ORIG(MSG_TARG_SPARC)) == 0) {
331 mach32 = EM_SPARC;
332 mach64 = EM_SPARCV9;
333 } else if (strcasecmp(pstr,
334 MSG_ORIG(MSG_TARG_X86)) == 0) {
335 mach32 = EM_386;
336 mach64 = EM_AMD64;
337 } else {
338 eprintf(0, ERR_FATAL,
339 MSG_INTL(MSG_ERR_BADTARG), pstr);
340 return (1);
343 break;
348 * Continue to look for the first ELF object to determine the class of
349 * objects to operate on. At the same time, look for the first archive
350 * of ELF objects --- if no plain ELF object is specified, the type
351 * of the first ELF object in the first archive will be used. If
352 * there is no object, and no archive, then we fall back to a 32-bit
353 * object for the native machine.
355 for (; optind < argc; optind++) {
356 int fd;
357 FILE_HDR hdr;
360 * If we detect some more options return to getopt().
361 * Checking argv[optind][1] against null prevents a forever
362 * loop if an unadorned `-' argument is passed to us.
364 if (argv[optind][0] == '-') {
365 if (argv[optind][1] == '\0')
366 continue;
367 else
368 goto getmore;
372 * If we've already determined the object class and
373 * machine type, continue to the next argument. Only
374 * the first object contributes to this decision, and
375 * there's no value to opening or examing the subsequent
376 * ones. We do need to keep going though, because there
377 * may be additional options that might affect our
378 * class/machine decision.
380 if ((class != ELFCLASSNONE) && (mach32 != EM_NONE))
381 continue;
384 * Open the file and determine if it is an object. We are
385 * looking for ELF objects, or archives of ELF objects.
387 * Plain objects are simple, and are the common case, so
388 * we examine them directly and avoid the map-unmap-map
389 * that would occur if we used libelf. Archives are too
390 * complex to be worth accessing directly, so if we identify
391 * an archive, we use libelf on it and accept the cost.
393 if ((fd = open(argv[optind], O_RDONLY)) == -1) {
394 int err = errno;
396 eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_OPEN),
397 argv[optind], strerror(err));
398 return (1);
401 if (pread(fd, &hdr, sizeof (hdr), 0) != sizeof (hdr)) {
402 (void) close(fd);
403 continue;
406 if ((hdr.ehdr.e_ident[EI_MAG0] == ELFMAG0) &&
407 (hdr.ehdr.e_ident[EI_MAG1] == ELFMAG1) &&
408 (hdr.ehdr.e_ident[EI_MAG2] == ELFMAG2) &&
409 (hdr.ehdr.e_ident[EI_MAG3] == ELFMAG3)) {
410 if (class == ELFCLASSNONE) {
411 class = hdr.ehdr.e_ident[EI_CLASS];
412 if ((class != ELFCLASS32) &&
413 (class != ELFCLASS64))
414 class = ELFCLASSNONE;
417 if (mach32 == EM_NONE) {
418 int one = 1;
419 uchar_t *one_p = (uchar_t *)&one;
420 int ld_elfdata;
422 ld_elfdata = (one_p[0] == 1) ?
423 ELFDATA2LSB : ELFDATA2MSB;
425 * Both the 32 and 64-bit versions get the
426 * type from the object. If the user has
427 * asked for an inconsistant class/machine
428 * combination, libld will catch it.
430 mach32 = mach64 =
431 (ld_elfdata == hdr.ehdr.e_ident[EI_DATA]) ?
432 hdr.ehdr.e_machine :
433 BSWAP_HALF(hdr.ehdr.e_machine);
435 } else if (!ar_found &&
436 (memcmp(&hdr.armag, ARMAG, SARMAG) == 0)) {
437 Elf *elf;
439 (void) elf_version(EV_CURRENT);
440 if ((elf = elf_begin(fd, ELF_C_READ, NULL)) == NULL) {
441 (void) close(fd);
442 continue;
444 if (elf_kind(elf) == ELF_K_AR)
445 ar_found =
446 archive(fd, elf, &ar_class, &ar_mach);
447 (void) elf_end(elf);
450 (void) close(fd);
454 * ELFCLASS of output object: If we did not establish a class from a
455 * command option, or from the first plain object, then use the class
456 * from the first archive, and failing that, default to 32-bit.
458 if (class == ELFCLASSNONE)
459 class = ar_found ? ar_class : ELFCLASS32;
460 *class_ret = class;
462 /* ELFCLASS of link-editor to use */
463 *ldclass_ret = ldclass;
466 * Machine type of output object: If we did not establish a machine
467 * type from the command line, or from the first plain object, then
468 * use the machine established by the first archive, and failing that,
469 * use the native machine.
471 *mach = (class == ELFCLASS64) ? mach64 : mach32;
472 if (*mach == EM_NONE)
473 if (ar_found)
474 *mach = ar_mach;
475 else
476 *mach = (class == ELFCLASS64) ? M_MACH_64 : M_MACH_32;
478 return (0);
482 * Process an LD_OPTIONS environment string. This routine is first called to
483 * count the number of options, and second to initialize a new argument array
484 * with each option.
486 static int
487 process_ldoptions(char *str, char **nargv)
489 int argc = 0;
490 char *arg = str;
493 * Walk the environment string processing any arguments that are
494 * separated by white space.
496 while (*str != '\0') {
497 if (isspace(*str)) {
499 * If a new argument array has been provided, terminate
500 * the original environment string, and initialize the
501 * appropriate argument array entry.
503 if (nargv) {
504 *str++ = '\0';
505 nargv[argc] = arg;
508 argc++;
509 while (isspace(*str))
510 str++;
511 arg = str;
512 } else
513 str++;
515 if (arg != str) {
517 * If a new argument array has been provided, initialize the
518 * final argument array entry.
520 if (nargv)
521 nargv[argc] = arg;
522 argc++;
525 return (argc);
529 * Determine whether an LD_OPTIONS environment variable is set, and if so,
530 * prepend environment string as a series of options to the argv array.
532 static int
533 prepend_ldoptions(int *argcp, char ***argvp)
535 int nargc;
536 char **nargv, *ld_options;
537 int err, count;
539 if ((ld_options = getenv(MSG_ORIG(MSG_LD_OPTIONS))) == NULL)
540 return (0);
543 * Get rid of any leading white space, and make sure the environment
544 * string has size.
546 while (isspace(*ld_options))
547 ld_options++;
548 if (ld_options[0] == '\0')
549 return (0);
552 * Prevent modification of actual environment strings.
554 if ((ld_options = strdup(ld_options)) == NULL) {
555 err = errno;
556 eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_ALLOC), strerror(err));
557 return (1);
561 * Determine the number of options provided.
563 nargc = process_ldoptions(ld_options, NULL);
566 * Allocate a new argv array big enough to hold the new options from
567 * the environment string and the old argv options.
569 if ((nargv = malloc((nargc + *argcp + 1) * sizeof (char *))) == NULL) {
570 err = errno;
571 eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_ALLOC), strerror(err));
572 return (1);
576 * Initialize first element of new argv array to be the first element
577 * of the old argv array (ie. calling programs name). Then add the new
578 * args obtained from the environment.
580 nargc = 0;
581 nargv[nargc++] = (*argvp)[0];
582 nargc += process_ldoptions(ld_options, &nargv[nargc]);
585 * Now add the original argv array (skipping argv[0]) to the end of the
586 * new argv array, and re-vector argc and argv to reference this new
587 * array
589 for (count = 1; count < *argcp; count++, nargc++)
590 nargv[nargc] = (*argvp)[count];
592 nargv[nargc] = NULL;
594 *argcp = nargc;
595 *argvp = nargv;
597 return (0);
601 * Check to see if there is a LD_ALTEXEC=<path to alternate ld> in the
602 * environment. If so, first null the environment variable out, and then
603 * exec() the binary pointed to by the environment variable, passing the same
604 * arguments as the originating process. This mechanism permits using
605 * alternate link-editors (debugging/developer copies) even in complex build
606 * environments.
608 static int
609 ld_altexec(char **argv, char **envp)
611 char *execstr;
612 char **str;
613 int err;
615 for (str = envp; *str; str++) {
616 if (strncmp(*str, MSG_ORIG(MSG_LD_ALTEXEC),
617 MSG_LD_ALTEXEC_SIZE) == 0) {
618 break;
623 * If LD_ALTEXEC isn't set, return to continue executing the present
624 * link-editor.
626 if (*str == 0)
627 return (0);
630 * Get a pointer to the actual string. If it's a null entry, return.
632 execstr = strdup(*str + MSG_LD_ALTEXEC_SIZE);
633 if (*execstr == '\0')
634 return (0);
637 * Null out the LD_ALTEXEC= environment entry.
639 (*str)[MSG_LD_ALTEXEC_SIZE] = '\0';
642 * Set argv[0] to point to our new linker
644 argv[0] = execstr;
647 * And attempt to execute it.
649 (void) execve(execstr, argv, envp);
652 * If the exec() fails, return a failure indication.
654 err = errno;
655 eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_EXEC), execstr,
656 strerror(err));
657 return (1);
661 main(int argc, char **argv, char **envp)
663 char **oargv = argv;
664 uchar_t class, ldclass, checkclass;
665 Half mach;
668 * Establish locale.
670 (void) setlocale(LC_MESSAGES, MSG_ORIG(MSG_STR_EMPTY));
671 (void) textdomain(MSG_ORIG(MSG_SUNW_OST_SGS));
674 * Execute an alternate linker if the LD_ALTEXEC environment variable is
675 * set. If a specified alternative could not be found, bail.
677 if (ld_altexec(argv, envp))
678 return (1);
681 * Check the LD_OPTIONS environment variable, and if present prepend
682 * the arguments specified to the command line argument list.
684 if (prepend_ldoptions(&argc, &argv))
685 return (1);
688 * Examine the command arguments to determine:
689 * - object class
690 * - link-editor class
691 * - target machine
693 if (process_args(argc, argv, &class, &ldclass, &mach))
694 return (1);
697 * Unless a 32-bit link-editor was explicitly requested, try
698 * to exec the 64-bit version.
700 if (ldclass != ELFCLASS32)
701 checkclass = conv_check_native(oargv, envp);
704 * If an attempt to exec the 64-bit link-editor fails:
705 * - Bail if the 64-bit linker was explicitly requested
706 * - Continue quietly if the 64-bit linker was not requested.
707 * This is undoubtedly due to hardware/kernel limitations,
708 * and therefore represents the best we can do. Note that
709 * the 32-bit linker is capable of linking anything the
710 * 64-bit version is, subject to a 4GB limit on memory, and
711 * 2GB object size.
713 if ((ldclass == ELFCLASS64) && (checkclass != ELFCLASS64)) {
714 eprintf(0, ERR_FATAL, MSG_INTL(MSG_SYS_64));
715 return (1);
718 /* Call the libld entry point for the specified ELFCLASS */
719 if (class == ELFCLASS64)
720 return (ld64_main(argc, argv, mach));
721 else
722 return (ld32_main(argc, argv, mach));
726 * We supply this function for the msg module
728 const char *
729 _ld_msg(Msg mid)
731 return (gettext(MSG_ORIG(mid)));