1 /* BFD back-end for WebAssembly modules.
2 Copyright (C) 2017-2024 Free Software Foundation, Inc.
4 Based on srec.c, mmo.c, and binary.c
6 This file is part of BFD, the Binary File Descriptor library.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
21 MA 02110-1301, USA. */
23 /* The WebAssembly module format is a simple object file format
24 including up to 11 numbered sections, plus any number of named
25 "custom" sections. It is described at:
26 https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md. */
30 #include "libiberty.h"
32 #include "wasm-module.h"
42 bfd_size_type symcount
;
45 static const char * const wasm_numbered_sections
[] =
47 NULL
, /* Custom section, different layout. */
48 WASM_SECTION ( 1, "type"),
49 WASM_SECTION ( 2, "import"),
50 WASM_SECTION ( 3, "function"),
51 WASM_SECTION ( 4, "table"),
52 WASM_SECTION ( 5, "memory"),
53 WASM_SECTION ( 6, "global"),
54 WASM_SECTION ( 7, "export"),
55 WASM_SECTION ( 8, "start"),
56 WASM_SECTION ( 9, "element"),
57 WASM_SECTION (10, "code"),
58 WASM_SECTION (11, "data"),
61 #define WASM_NUMBERED_SECTIONS ARRAY_SIZE (wasm_numbered_sections)
63 /* Resolve SECTION_CODE to a section name if there is one, NULL
67 wasm_section_code_to_name (bfd_byte section_code
)
69 if (section_code
< WASM_NUMBERED_SECTIONS
)
70 return wasm_numbered_sections
[section_code
];
75 /* Translate section name NAME to a section code, or 0 if it's a
79 wasm_section_name_to_code (const char *name
)
83 for (i
= 1; i
< WASM_NUMBERED_SECTIONS
; i
++)
84 if (strcmp (name
, wasm_numbered_sections
[i
]) == 0)
90 /* WebAssembly LEB128 integers are sufficiently like DWARF LEB128
91 integers that we use _bfd_safe_read_leb128, but there are two
94 - WebAssembly requires a 32-bit value to be encoded in at most 5
96 - _bfd_safe_read_leb128 accepts incomplete LEB128 encodings at the
97 end of the buffer, while these are invalid in WebAssembly.
99 Those differences mean that we will accept some files that are
100 invalid WebAssembly. */
102 /* Read an LEB128-encoded integer from ABFD's I/O stream, reading one
103 byte at a time. Set ERROR_RETURN if no complete integer could be
104 read, LENGTH_RETURN to the number of bytes read (including bytes in
105 incomplete numbers). SIGN means interpret the number as SLEB128. */
108 wasm_read_leb128 (bfd
*abfd
,
110 unsigned int *length_return
,
114 unsigned int num_read
= 0;
115 unsigned int shift
= 0;
116 unsigned char byte
= 0;
117 unsigned char lost
, mask
;
120 while (bfd_read (&byte
, 1, abfd
) == 1)
124 if (shift
< CHAR_BIT
* sizeof (result
))
126 result
|= ((bfd_vma
) (byte
& 0x7f)) << shift
;
127 /* These bits overflowed. */
128 lost
= byte
^ (result
>> shift
);
129 /* And this is the mask of possible overflow bits. */
130 mask
= 0x7f ^ ((bfd_vma
) 0x7f << shift
>> shift
);
138 if ((lost
& mask
) != (sign
&& (bfd_signed_vma
) result
< 0 ? mask
: 0))
141 if ((byte
& 0x80) == 0)
144 if (sign
&& shift
< CHAR_BIT
* sizeof (result
) && (byte
& 0x40))
145 result
|= -((bfd_vma
) 1 << shift
);
150 if (length_return
!= NULL
)
151 *length_return
= num_read
;
152 if (error_return
!= NULL
)
153 *error_return
= status
!= 0;
158 /* Encode an integer V as LEB128 and write it to ABFD, return TRUE on
162 wasm_write_uleb128 (bfd
*abfd
, bfd_vma v
)
166 bfd_byte c
= v
& 0x7f;
172 if (bfd_write (&c
, 1, abfd
) != 1)
180 /* Read the LEB128 integer at P, saving it to X; at end of buffer,
181 jump to error_return. */
182 #define READ_LEB128(x, p, end) \
187 (x) = _bfd_safe_read_leb128 (abfd, &(p), false, (end)); \
191 /* Verify the magic number at the beginning of a WebAssembly module
192 ABFD, setting ERRORPTR if there's a mismatch. */
195 wasm_read_magic (bfd
*abfd
, bool *errorptr
)
197 bfd_byte magic_const
[SIZEOF_WASM_MAGIC
] = WASM_MAGIC
;
198 bfd_byte magic
[SIZEOF_WASM_MAGIC
];
200 if (bfd_read (magic
, sizeof (magic
), abfd
) == sizeof (magic
)
201 && memcmp (magic
, magic_const
, sizeof (magic
)) == 0)
208 /* Read the version number from ABFD, returning TRUE if it's a supported
209 version. Set ERRORPTR otherwise. */
212 wasm_read_version (bfd
*abfd
, bool *errorptr
)
214 bfd_byte vers_const
[SIZEOF_WASM_VERSION
] = WASM_VERSION
;
215 bfd_byte vers
[SIZEOF_WASM_VERSION
];
217 if (bfd_read (vers
, sizeof (vers
), abfd
) == sizeof (vers
)
218 /* Don't attempt to parse newer versions, which are likely to
219 require code changes. */
220 && memcmp (vers
, vers_const
, sizeof (vers
)) == 0)
227 /* Read the WebAssembly header (magic number plus version number) from
228 ABFD, setting ERRORPTR to TRUE if there is a mismatch. */
231 wasm_read_header (bfd
*abfd
, bool *errorptr
)
233 if (! wasm_read_magic (abfd
, errorptr
))
236 if (! wasm_read_version (abfd
, errorptr
))
242 /* Scan the "function" subsection of the "name" section ASECT in the
243 wasm module ABFD. Create symbols. Return TRUE on success. */
246 wasm_scan_name_function_section (bfd
*abfd
, sec_ptr asect
)
250 bfd_vma payload_size
;
251 bfd_vma symcount
= 0;
252 tdata_type
*tdata
= abfd
->tdata
.any
;
253 asymbol
*symbols
= NULL
;
254 sec_ptr space_function_index
;
258 end
= asect
->contents
+ asect
->size
;
265 bfd_byte subsection_code
= *p
++;
266 if (subsection_code
== WASM_FUNCTION_SUBSECTION
)
269 /* subsection_code is documented to be a varuint7, meaning that
270 it has to be a single byte in the 0 - 127 range. If it isn't,
271 the spec must have changed underneath us, so give up. */
272 if (subsection_code
& 0x80)
275 READ_LEB128 (payload_size
, p
, end
);
277 if (payload_size
> (size_t) (end
- p
))
286 READ_LEB128 (payload_size
, p
, end
);
288 if (payload_size
> (size_t) (end
- p
))
291 end
= p
+ payload_size
;
293 READ_LEB128 (symcount
, p
, end
);
295 /* Sanity check: each symbol has at least two bytes. */
296 if (symcount
> payload_size
/ 2)
299 tdata
->symcount
= symcount
;
302 = bfd_make_section_with_flags (abfd
, WASM_SECTION_FUNCTION_INDEX
,
303 SEC_READONLY
| SEC_CODE
);
305 if (!space_function_index
)
307 = bfd_get_section_by_name (abfd
, WASM_SECTION_FUNCTION_INDEX
);
309 if (!space_function_index
)
312 if (_bfd_mul_overflow (tdata
->symcount
, sizeof (asymbol
), &amt
))
314 bfd_set_error (bfd_error_file_too_big
);
317 symbols
= bfd_alloc (abfd
, amt
);
321 for (symcount
= 0; p
< end
&& symcount
< tdata
->symcount
; symcount
++)
328 READ_LEB128 (idx
, p
, end
);
329 READ_LEB128 (len
, p
, end
);
331 if (len
> (size_t) (end
- p
))
334 name
= bfd_alloc (abfd
, len
+ 1);
338 memcpy (name
, p
, len
);
342 sym
= &symbols
[symcount
];
346 sym
->flags
= BSF_GLOBAL
| BSF_FUNCTION
;
347 sym
->section
= space_function_index
;
351 if (symcount
< tdata
->symcount
)
354 tdata
->symbols
= symbols
;
355 abfd
->symcount
= symcount
;
361 bfd_release (abfd
, symbols
);
366 /* Read a byte from ABFD and return it, or EOF for EOF or error.
367 Set ERRORPTR on non-EOF error. */
370 wasm_read_byte (bfd
*abfd
, bool *errorptr
)
374 if (bfd_read (&byte
, 1, abfd
) != 1)
376 if (bfd_get_error () != bfd_error_file_truncated
)
384 /* Scan the wasm module ABFD, creating sections and symbols.
385 Return TRUE on success. */
388 wasm_scan (bfd
*abfd
)
391 /* Fake VMAs for now. Choose 0x80000000 as base to avoid clashes
392 with actual data addresses. */
393 bfd_vma vma
= 0x80000000;
395 unsigned int bytes_read
;
398 if (bfd_seek (abfd
, 0, SEEK_SET
) != 0)
401 if (!wasm_read_header (abfd
, &error
))
404 while ((section_code
= wasm_read_byte (abfd
, &error
)) != EOF
)
406 if (section_code
!= 0)
408 const char *sname
= wasm_section_code_to_name (section_code
);
413 bfdsec
= bfd_make_section_anyway_with_flags (abfd
, sname
,
418 bfdsec
->size
= wasm_read_leb128 (abfd
, &error
, &bytes_read
, false);
427 char *prefix
= WASM_SECTION_PREFIX
;
428 size_t prefixlen
= strlen (prefix
);
431 payload_len
= wasm_read_leb128 (abfd
, &error
, &bytes_read
, false);
434 namelen
= wasm_read_leb128 (abfd
, &error
, &bytes_read
, false);
435 if (error
|| bytes_read
> payload_len
436 || namelen
> payload_len
- bytes_read
)
438 payload_len
-= namelen
+ bytes_read
;
439 filesize
= bfd_get_file_size (abfd
);
440 if (filesize
!= 0 && namelen
> filesize
)
442 bfd_set_error (bfd_error_file_truncated
);
445 name
= bfd_alloc (abfd
, namelen
+ prefixlen
+ 1);
448 memcpy (name
, prefix
, prefixlen
);
449 if (bfd_read (name
+ prefixlen
, namelen
, abfd
) != namelen
)
451 name
[prefixlen
+ namelen
] = 0;
453 bfdsec
= bfd_make_section_anyway_with_flags (abfd
, name
,
458 bfdsec
->size
= payload_len
;
463 bfdsec
->alignment_power
= 0;
464 bfdsec
->filepos
= bfd_tell (abfd
);
465 if (bfdsec
->size
!= 0)
467 bfdsec
->contents
= _bfd_alloc_and_read (abfd
, bfdsec
->size
,
469 if (!bfdsec
->contents
)
476 /* Make sure we're at actual EOF. There's no indication in the
477 WebAssembly format of how long the file is supposed to be. */
487 /* Put a numbered section ASECT of ABFD into the table of numbered
488 sections pointed to by FSARG. */
491 wasm_register_section (bfd
*abfd ATTRIBUTE_UNUSED
,
495 sec_ptr
*numbered_sections
= fsarg
;
496 int idx
= wasm_section_name_to_code (asect
->name
);
501 numbered_sections
[idx
] = asect
;
504 struct compute_section_arg
510 /* Compute the file position of ABFD's section ASECT. FSARG is a
511 pointer to the current file position.
513 We allow section names of the form .wasm.id to encode the numbered
514 section with ID id, if it exists; otherwise, a custom section with
515 ID "id" is produced. Arbitrary section names are for sections that
516 are assumed already to contain a section header; those are appended
517 to the WebAssembly module verbatim. */
520 wasm_compute_custom_section_file_position (bfd
*abfd
,
524 struct compute_section_arg
*fs
= fsarg
;
530 idx
= wasm_section_name_to_code (asect
->name
);
535 if (startswith (asect
->name
, WASM_SECTION_PREFIX
))
537 const char *name
= asect
->name
+ strlen (WASM_SECTION_PREFIX
);
538 bfd_size_type payload_len
= asect
->size
;
539 bfd_size_type name_len
= strlen (name
);
540 bfd_size_type nl
= name_len
;
542 payload_len
+= name_len
;
551 if (bfd_seek (abfd
, fs
->pos
, SEEK_SET
) != 0
552 || ! wasm_write_uleb128 (abfd
, 0)
553 || ! wasm_write_uleb128 (abfd
, payload_len
)
554 || ! wasm_write_uleb128 (abfd
, name_len
)
555 || bfd_write (name
, name_len
, abfd
) != name_len
)
557 fs
->pos
= asect
->filepos
= bfd_tell (abfd
);
561 asect
->filepos
= fs
->pos
;
565 fs
->pos
+= asect
->size
;
572 /* Compute the file positions for the sections of ABFD. Currently,
573 this writes all numbered sections first, in order, then all custom
574 sections, in section order.
576 The spec says that the numbered sections must appear in order of
577 their ids, but custom sections can appear in any position and any
578 order, and more than once. FIXME: support that. */
581 wasm_compute_section_file_positions (bfd
*abfd
)
583 bfd_byte magic
[SIZEOF_WASM_MAGIC
] = WASM_MAGIC
;
584 bfd_byte vers
[SIZEOF_WASM_VERSION
] = WASM_VERSION
;
585 sec_ptr numbered_sections
[WASM_NUMBERED_SECTIONS
];
586 struct compute_section_arg fs
;
589 if (bfd_seek (abfd
, (bfd_vma
) 0, SEEK_SET
) != 0
590 || bfd_write (magic
, sizeof (magic
), abfd
) != (sizeof magic
)
591 || bfd_write (vers
, sizeof (vers
), abfd
) != sizeof (vers
))
594 for (i
= 0; i
< WASM_NUMBERED_SECTIONS
; i
++)
595 numbered_sections
[i
] = NULL
;
597 bfd_map_over_sections (abfd
, wasm_register_section
, numbered_sections
);
599 fs
.pos
= bfd_tell (abfd
);
600 for (i
= 0; i
< WASM_NUMBERED_SECTIONS
; i
++)
602 sec_ptr sec
= numbered_sections
[i
];
608 if (bfd_seek (abfd
, fs
.pos
, SEEK_SET
) != 0)
610 if (! wasm_write_uleb128 (abfd
, i
)
611 || ! wasm_write_uleb128 (abfd
, size
))
613 fs
.pos
= sec
->filepos
= bfd_tell (abfd
);
619 bfd_map_over_sections (abfd
, wasm_compute_custom_section_file_position
, &fs
);
624 abfd
->output_has_begun
= true;
630 wasm_set_section_contents (bfd
*abfd
,
632 const void *location
,
639 if (! abfd
->output_has_begun
640 && ! wasm_compute_section_file_positions (abfd
))
643 if (bfd_seek (abfd
, section
->filepos
+ offset
, SEEK_SET
) != 0
644 || bfd_write (location
, count
, abfd
) != count
)
651 wasm_write_object_contents (bfd
* abfd
)
653 bfd_byte magic
[] = WASM_MAGIC
;
654 bfd_byte vers
[] = WASM_VERSION
;
656 if (bfd_seek (abfd
, 0, SEEK_SET
) != 0)
659 if (bfd_write (magic
, sizeof (magic
), abfd
) != sizeof (magic
)
660 || bfd_write (vers
, sizeof (vers
), abfd
) != sizeof (vers
))
667 wasm_mkobject (bfd
*abfd
)
669 tdata_type
*tdata
= (tdata_type
*) bfd_alloc (abfd
, sizeof (tdata_type
));
674 tdata
->symbols
= NULL
;
677 abfd
->tdata
.any
= tdata
;
683 wasm_get_symtab_upper_bound (bfd
*abfd
)
685 tdata_type
*tdata
= abfd
->tdata
.any
;
687 return (tdata
->symcount
+ 1) * (sizeof (asymbol
*));
691 wasm_canonicalize_symtab (bfd
*abfd
, asymbol
**alocation
)
693 tdata_type
*tdata
= abfd
->tdata
.any
;
696 for (i
= 0; i
< tdata
->symcount
; i
++)
697 alocation
[i
] = &tdata
->symbols
[i
];
700 return tdata
->symcount
;
704 wasm_make_empty_symbol (bfd
*abfd
)
706 size_t amt
= sizeof (asymbol
);
707 asymbol
*new_symbol
= (asymbol
*) bfd_zalloc (abfd
, amt
);
711 new_symbol
->the_bfd
= abfd
;
716 wasm_print_symbol (bfd
*abfd
,
719 bfd_print_symbol_type how
)
721 FILE *file
= (FILE *) filep
;
725 case bfd_print_symbol_name
:
726 fprintf (file
, "%s", symbol
->name
);
730 bfd_print_symbol_vandf (abfd
, filep
, symbol
);
731 fprintf (file
, " %-5s %s", symbol
->section
->name
, symbol
->name
);
736 wasm_get_symbol_info (bfd
*abfd ATTRIBUTE_UNUSED
,
740 bfd_symbol_info (symbol
, ret
);
743 /* Check whether ABFD is a WebAssembly module; if so, scan it. */
746 wasm_object_p (bfd
*abfd
)
751 if (bfd_seek (abfd
, 0, SEEK_SET
) != 0)
754 if (!wasm_read_header (abfd
, &error
))
756 bfd_set_error (bfd_error_wrong_format
);
760 if (!wasm_mkobject (abfd
))
763 if (!wasm_scan (abfd
)
764 || !bfd_default_set_arch_mach (abfd
, bfd_arch_wasm32
, 0))
766 bfd_release (abfd
, abfd
->tdata
.any
);
767 abfd
->tdata
.any
= NULL
;
771 s
= bfd_get_section_by_name (abfd
, WASM_NAME_SECTION
);
772 if (s
!= NULL
&& wasm_scan_name_function_section (abfd
, s
))
773 abfd
->flags
|= HAS_SYMS
;
775 return _bfd_no_cleanup
;
778 /* BFD_JUMP_TABLE_WRITE */
779 #define wasm_set_arch_mach _bfd_generic_set_arch_mach
781 /* BFD_JUMP_TABLE_SYMBOLS */
782 #define wasm_get_symbol_version_string _bfd_nosymbols_get_symbol_version_string
783 #define wasm_bfd_is_local_label_name bfd_generic_is_local_label_name
784 #define wasm_bfd_is_target_special_symbol _bfd_bool_bfd_asymbol_false
785 #define wasm_get_lineno _bfd_nosymbols_get_lineno
786 #define wasm_find_nearest_line _bfd_nosymbols_find_nearest_line
787 #define wasm_find_nearest_line_with_alt _bfd_nosymbols_find_nearest_line_with_alt
788 #define wasm_find_line _bfd_nosymbols_find_line
789 #define wasm_find_inliner_info _bfd_nosymbols_find_inliner_info
790 #define wasm_bfd_make_debug_symbol _bfd_nosymbols_bfd_make_debug_symbol
791 #define wasm_read_minisymbols _bfd_generic_read_minisymbols
792 #define wasm_minisymbol_to_symbol _bfd_generic_minisymbol_to_symbol
794 const bfd_target wasm_vec
=
797 bfd_target_unknown_flavour
,
800 (HAS_SYMS
| WP_TEXT
), /* Object flags. */
801 (SEC_CODE
| SEC_DATA
| SEC_HAS_CONTENTS
), /* Section flags. */
802 0, /* Leading underscore. */
803 ' ', /* AR_pad_char. */
804 255, /* AR_max_namelen. */
805 0, /* Match priority. */
806 TARGET_KEEP_UNUSED_SECTION_SYMBOLS
, /* keep unused section symbols. */
807 /* Routines to byte-swap various sized integers from the data sections. */
808 bfd_getl64
, bfd_getl_signed_64
, bfd_putl64
,
809 bfd_getl32
, bfd_getl_signed_32
, bfd_putl32
,
810 bfd_getl16
, bfd_getl_signed_16
, bfd_putl16
,
812 /* Routines to byte-swap various sized integers from the file headers. */
813 bfd_getl64
, bfd_getl_signed_64
, bfd_putl64
,
814 bfd_getl32
, bfd_getl_signed_32
, bfd_putl32
,
815 bfd_getl16
, bfd_getl_signed_16
, bfd_putl16
,
819 wasm_object_p
, /* bfd_check_format. */
824 _bfd_bool_bfd_false_error
,
826 _bfd_generic_mkarchive
,
827 _bfd_bool_bfd_false_error
,
829 { /* bfd_write_contents. */
830 _bfd_bool_bfd_false_error
,
831 wasm_write_object_contents
,
832 _bfd_write_archive_contents
,
833 _bfd_bool_bfd_false_error
,
836 BFD_JUMP_TABLE_GENERIC (_bfd_generic
),
837 BFD_JUMP_TABLE_COPY (_bfd_generic
),
838 BFD_JUMP_TABLE_CORE (_bfd_nocore
),
839 BFD_JUMP_TABLE_ARCHIVE (_bfd_noarchive
),
840 BFD_JUMP_TABLE_SYMBOLS (wasm
),
841 BFD_JUMP_TABLE_RELOCS (_bfd_norelocs
),
842 BFD_JUMP_TABLE_WRITE (wasm
),
843 BFD_JUMP_TABLE_LINK (_bfd_nolink
),
844 BFD_JUMP_TABLE_DYNAMIC (_bfd_nodynamic
),