scripts/generate_builtin_ranges.awk

   1 #!/usr/bin/gawk -f
   2 # SPDX-License-Identifier: GPL-2.0
   3 # generate_builtin_ranges.awk: Generate address range data for builtin modules
   4 # Written by Kris Van Hees <kris.van.hees@oracle.com>
   5 #
   6 # Usage: generate_builtin_ranges.awk modules.builtin vmlinux.map \
   7 #               vmlinux.o.map > modules.builtin.ranges
   8 #
   9
  10 # Return the module name(s) (if any) associated with the given object.
  11 #
  12 # If we have seen this object before, return information from the cache.
  13 # Otherwise, retrieve it from the corresponding .cmd file.
  14 #
  15 function get_module_info(fn, mod, obj, s) {
  16         if (fn in omod)
  17                 return omod[fn];
  18
  19         if (match(fn, /\/[^/]+$/) == 0)
  20                 return "";
  21
  22         obj = fn;
  23         mod = "";
  24         fn = substr(fn, 1, RSTART) "." substr(fn, RSTART + 1) ".cmd";
  25         if (getline s <fn == 1) {
  26                 if (match(s, /DKBUILD_MODFILE=['"]+[^'"]+/) > 0) {
  27                         mod = substr(s, RSTART + 16, RLENGTH - 16);
  28                         gsub(/['"]/, "", mod);
  29                 } else if (match(s, /RUST_MODFILE=[^ ]+/) > 0)
  30                         mod = substr(s, RSTART + 13, RLENGTH - 13);
  31         }
  32         close(fn);
  33
  34         # A single module (common case) also reflects objects that are not part
  35         # of a module.  Some of those objects have names that are also a module
  36         # name (e.g. core).  We check the associated module file name, and if
  37         # they do not match, the object is not part of a module.
  38         if (mod !~ / /) {
  39                 if (!(mod in mods))
  40                         mod = "";
  41         }
  42
  43         gsub(/([^/ ]*\/)+/, "", mod);
  44         gsub(/-/, "_", mod);
  45
  46         # At this point, mod is a single (valid) module name, or a list of
  47         # module names (that do not need validation).
  48         omod[obj] = mod;
  49
  50         return mod;
  51 }
  52
  53 # Update the ranges entry for the given module 'mod' in section 'osect'.
  54 #
  55 # We use a modified absolute start address (soff + base) as index because we
  56 # may need to insert an anchor record later that must be at the start of the
  57 # section data, and the first module may very well start at the same address.
  58 # So, we use (addr << 1) + 1 to allow a possible anchor record to be placed at
  59 # (addr << 1).  This is safe because the index is only used to sort the entries
  60 # before writing them out.
  61 #
  62 function update_entry(osect, mod, soff, eoff, sect, idx) {
  63         sect = sect_in[osect];
  64         idx = sprintf("%016x", (soff + sect_base[osect]) * 2 + 1);
  65         entries[idx] = sprintf("%s %08x-%08x %s", sect, soff, eoff, mod);
  66         count[sect]++;
  67 }
  68
  69 # (1) Build a lookup map of built-in module names.
  70 #
  71 # The first file argument is used as input (modules.builtin).
  72 #
  73 # Lines will be like:
  74 #       kernel/crypto/lzo-rle.ko
  75 # and we record the object name "crypto/lzo-rle".
  76 #
  77 ARGIND == 1 {
  78         sub(/kernel\//, "");                    # strip off "kernel/" prefix
  79         sub(/\.ko$/, "");                       # strip off .ko suffix
  80
  81         mods[$1] = 1;
  82         next;
  83 }
  84
  85 # (2) Collect address information for each section.
  86 #
  87 # The second file argument is used as input (vmlinux.map).
  88 #
  89 # We collect the base address of the section in order to convert all addresses
  90 # in the section into offset values.
  91 #
  92 # We collect the address of the anchor (or first symbol in the section if there
  93 # is no explicit anchor) to allow users of the range data to calculate address
  94 # ranges based on the actual load address of the section in the running kernel.
  95 #
  96 # We collect the start address of any sub-section (section included in the top
  97 # level section being processed).  This is needed when the final linking was
  98 # done using vmlinux.a because then the list of objects contained in each
  99 # section is to be obtained from vmlinux.o.map.  The offset of the sub-section
 100 # is recorded here, to be used as an addend when processing vmlinux.o.map
 101 # later.
 102 #
 103
 104 # Both GNU ld and LLVM lld linker map format are supported by converting LLVM
 105 # lld linker map records into equivalent GNU ld linker map records.
 106 #
 107 # The first record of the vmlinux.map file provides enough information to know
 108 # which format we are dealing with.
 109 #
 110 ARGIND == 2 && FNR == 1 && NF == 7 && $1 == "VMA" && $7 == "Symbol" {
 111         map_is_lld = 1;
 112         if (dbg)
 113                 printf "NOTE: %s uses LLVM lld linker map format\n", FILENAME >"/dev/stderr";
 114         next;
 115 }
 116
 117 # (LLD) Convert a section record fronm lld format to ld format.
 118 #
 119 # lld: ffffffff82c00000          2c00000   2493c0  8192 .data
 120 #  ->
 121 # ld:  .data           0xffffffff82c00000   0x2493c0 load address 0x0000000002c00000
 122 #
 123 ARGIND == 2 && map_is_lld && NF == 5 && /[0-9] [^ ]+$/ {
 124         $0 = $5 " 0x"$1 " 0x"$3 " load address 0x"$2;
 125 }
 126
 127 # (LLD) Convert an anchor record from lld format to ld format.
 128 #
 129 # lld: ffffffff81000000          1000000        0     1         _text = .
 130 #  ->
 131 # ld:                  0xffffffff81000000                _text = .
 132 #
 133 ARGIND == 2 && map_is_lld && !anchor && NF == 7 && raw_addr == "0x"$1 && $6 == "=" && $7 == "." {
 134         $0 = "  0x"$1 " " $5 " = .";
 135 }
 136
 137 # (LLD) Convert an object record from lld format to ld format.
 138 #
 139 # lld:            11480            11480     1f07    16         vmlinux.a(arch/x86/events/amd/uncore.o):(.text)
 140 #  ->
 141 # ld:   .text          0x0000000000011480     0x1f07 arch/x86/events/amd/uncore.o
 142 #
 143 ARGIND == 2 && map_is_lld && NF == 5 && $5 ~ /:\(/ {
 144         gsub(/\)/, "");
 145         sub(/ vmlinux\.a\(/, " ");
 146         sub(/:\(/, " ");
 147         $0 = " "$6 " 0x"$1 " 0x"$3 " " $5;
 148 }
 149
 150 # (LLD) Convert a symbol record from lld format to ld format.
 151 #
 152 # We only care about these while processing a section for which no anchor has
 153 # been determined yet.
 154 #
 155 # lld: ffffffff82a859a4          2a859a4        0     1                 btf_ksym_iter_id
 156 #  ->
 157 # ld:                  0xffffffff82a859a4                btf_ksym_iter_id
 158 #
 159 ARGIND == 2 && map_is_lld && sect && !anchor && NF == 5 && $5 ~ /^[_A-Za-z][_A-Za-z0-9]*$/ {
 160         $0 = "  0x"$1 " " $5;
 161 }
 162
 163 # (LLD) We do not need any other ldd linker map records.
 164 #
 165 ARGIND == 2 && map_is_lld && /^[0-9a-f]{16} / {
 166         next;
 167 }
 168
 169 # (LD) Section records with just the section name at the start of the line
 170 #      need to have the next line pulled in to determine whether it is a
 171 #      loadable section.  If it is, the next line will contains a hex value
 172 #      as first and second items.
 173 #
 174 ARGIND == 2 && !map_is_lld && NF == 1 && /^[^ ]/ {
 175         s = $0;
 176         getline;
 177         if ($1 !~ /^0x/ || $2 !~ /^0x/)
 178                 next;
 179
 180         $0 = s " " $0;
 181 }
 182
 183 # (LD) Object records with just the section name denote records with a long
 184 #      section name for which the remainder of the record can be found on the
 185 #      next line.
 186 #
 187 # (This is also needed for vmlinux.o.map, when used.)
 188 #
 189 ARGIND >= 2 && !map_is_lld && NF == 1 && /^ [^ \*]/ {
 190         s = $0;
 191         getline;
 192         $0 = s " " $0;
 193 }
 194
 195 # Beginning a new section - done with the previous one (if any).
 196 #
 197 ARGIND == 2 && /^[^ ]/ {
 198         sect = 0;
 199 }
 200
 201 # Process a loadable section (we only care about .-sections).
 202 #
 203 # Record the section name and its base address.
 204 # We also record the raw (non-stripped) address of the section because it can
 205 # be used to identify an anchor record.
 206 #
 207 # Note:
 208 # Since some AWK implementations cannot handle large integers, we strip off the
 209 # first 4 hex digits from the address.  This is safe because the kernel space
 210 # is not large enough for addresses to extend into those digits.  The portion
 211 # to strip off is stored in addr_prefix as a regexp, so further clauses can
 212 # perform a simple substitution to do the address stripping.
 213 #
 214 ARGIND == 2 && /^\./ {
 215         # Explicitly ignore a few sections that are not relevant here.
 216         if ($1 ~ /^\.orc_/ || $1 ~ /_sites$/ || $1 ~ /\.percpu/)
 217                 next;
 218
 219         # Sections with a 0-address can be ignored as well.
 220         if ($2 ~ /^0x0+$/)
 221                 next;
 222
 223         raw_addr = $2;
 224         addr_prefix = "^" substr($2, 1, 6);
 225         base = $2;
 226         sub(addr_prefix, "0x", base);
 227         base = strtonum(base);
 228         sect = $1;
 229         anchor = 0;
 230         sect_base[sect] = base;
 231         sect_size[sect] = strtonum($3);
 232
 233         if (dbg)
 234                 printf "[%s] BASE   %016x\n", sect, base >"/dev/stderr";
 235
 236         next;
 237 }
 238
 239 # If we are not in a section we care about, we ignore the record.
 240 #
 241 ARGIND == 2 && !sect {
 242         next;
 243 }
 244
 245 # Record the first anchor symbol for the current section.
 246 #
 247 # An anchor record for the section bears the same raw address as the section
 248 # record.
 249 #
 250 ARGIND == 2 && !anchor && NF == 4 && raw_addr == $1 && $3 == "=" && $4 == "." {
 251         anchor = sprintf("%s %08x-%08x = %s", sect, 0, 0, $2);
 252         sect_anchor[sect] = anchor;
 253
 254         if (dbg)
 255                 printf "[%s] ANCHOR %016x = %s (.)\n", sect, 0, $2 >"/dev/stderr";
 256
 257         next;
 258 }
 259
 260 # If no anchor record was found for the current section, use the first symbol
 261 # in the section as anchor.
 262 #
 263 ARGIND == 2 && !anchor && NF == 2 && $1 ~ /^0x/ && $2 !~ /^0x/ {
 264         addr = $1;
 265         sub(addr_prefix, "0x", addr);
 266         addr = strtonum(addr) - base;
 267         anchor = sprintf("%s %08x-%08x = %s", sect, addr, addr, $2);
 268         sect_anchor[sect] = anchor;
 269
 270         if (dbg)
 271                 printf "[%s] ANCHOR %016x = %s\n", sect, addr, $2 >"/dev/stderr";
 272
 273         next;
 274 }
 275
 276 # The first occurrence of a section name in an object record establishes the
 277 # addend (often 0) for that section.  This information is needed to handle
 278 # sections that get combined in the final linking of vmlinux (e.g. .head.text
 279 # getting included at the start of .text).
 280 #
 281 # If the section does not have a base yet, use the base of the encapsulating
 282 # section.
 283 #
 284 ARGIND == 2 && sect && NF == 4 && /^ [^ \*]/ && !($1 in sect_addend) {
 285         if (!($1 in sect_base)) {
 286                 sect_base[$1] = base;
 287
 288                 if (dbg)
 289                         printf "[%s] BASE   %016x\n", $1, base >"/dev/stderr";
 290         }
 291
 292         addr = $2;
 293         sub(addr_prefix, "0x", addr);
 294         addr = strtonum(addr);
 295         sect_addend[$1] = addr - sect_base[$1];
 296         sect_in[$1] = sect;
 297
 298         if (dbg)
 299                 printf "[%s] ADDEND %016x - %016x = %016x\n",  $1, addr, base, sect_addend[$1] >"/dev/stderr";
 300
 301         # If the object is vmlinux.o then we will need vmlinux.o.map to get the
 302         # actual offsets of objects.
 303         if ($4 == "vmlinux.o")
 304                 need_o_map = 1;
 305 }
 306
 307 # (3) Collect offset ranges (relative to the section base address) for built-in
 308 # modules.
 309 #
 310 # If the final link was done using the actual objects, vmlinux.map contains all
 311 # the information we need (see section (3a)).
 312 # If linking was done using vmlinux.a as intermediary, we will need to process
 313 # vmlinux.o.map (see section (3b)).
 314
 315 # (3a) Determine offset range info using vmlinux.map.
 316 #
 317 # Since we are already processing vmlinux.map, the top level section that is
 318 # being processed is already known.  If we do not have a base address for it,
 319 # we do not need to process records for it.
 320 #
 321 # Given the object name, we determine the module(s) (if any) that the current
 322 # object is associated with.
 323 #
 324 # If we were already processing objects for a (list of) module(s):
 325 #  - If the current object belongs to the same module(s), update the range data
 326 #    to include the current object.
 327 #  - Otherwise, ensure that the end offset of the range is valid.
 328 #
 329 # If the current object does not belong to a built-in module, ignore it.
 330 #
 331 # If it does, we add a new built-in module offset range record.
 332 #
 333 ARGIND == 2 && !need_o_map && /^ [^ ]/ && NF == 4 && $3 != "0x0" {
 334         if (!(sect in sect_base))
 335                 next;
 336
 337         # Turn the address into an offset from the section base.
 338         soff = $2;
 339         sub(addr_prefix, "0x", soff);
 340         soff = strtonum(soff) - sect_base[sect];
 341         eoff = soff + strtonum($3);
 342
 343         # Determine which (if any) built-in modules the object belongs to.
 344         mod = get_module_info($4);
 345
 346         # If we are processing a built-in module:
 347         #   - If the current object is within the same module, we update its
 348         #     entry by extending the range and move on
 349         #   - Otherwise:
 350         #       + If we are still processing within the same main section, we
 351         #         validate the end offset against the start offset of the
 352         #         current object (e.g. .rodata.str1.[18] objects are often
 353         #         listed with an incorrect size in the linker map)
 354         #       + Otherwise, we validate the end offset against the section
 355         #         size
 356         if (mod_name) {
 357                 if (mod == mod_name) {
 358                         mod_eoff = eoff;
 359                         update_entry(mod_sect, mod_name, mod_soff, eoff);
 360
 361                         next;
 362                 } else if (sect == sect_in[mod_sect]) {
 363                         if (mod_eoff > soff)
 364                                 update_entry(mod_sect, mod_name, mod_soff, soff);
 365                 } else {
 366                         v = sect_size[sect_in[mod_sect]];
 367                         if (mod_eoff > v)
 368                                 update_entry(mod_sect, mod_name, mod_soff, v);
 369                 }
 370         }
 371
 372         mod_name = mod;
 373
 374         # If we encountered an object that is not part of a built-in module, we
 375         # do not need to record any data.
 376         if (!mod)
 377                 next;
 378
 379         # At this point, we encountered the start of a new built-in module.
 380         mod_name = mod;
 381         mod_soff = soff;
 382         mod_eoff = eoff;
 383         mod_sect = $1;
 384         update_entry($1, mod, soff, mod_eoff);
 385
 386         next;
 387 }
 388
 389 # If we do not need to parse the vmlinux.o.map file, we are done.
 390 #
 391 ARGIND == 3 && !need_o_map {
 392         if (dbg)
 393                 printf "Note: %s is not needed.\n", FILENAME >"/dev/stderr";
 394         exit;
 395 }
 396
 397 # (3) Collect offset ranges (relative to the section base address) for built-in
 398 # modules.
 399 #
 400
 401 # (LLD) Convert an object record from lld format to ld format.
 402 #
 403 ARGIND == 3 && map_is_lld && NF == 5 && $5 ~ /:\(/ {
 404         gsub(/\)/, "");
 405         sub(/:\(/, " ");
 406
 407         sect = $6;
 408         if (!(sect in sect_addend))
 409                 next;
 410
 411         sub(/ vmlinux\.a\(/, " ");
 412         $0 = " "sect " 0x"$1 " 0x"$3 " " $5;
 413 }
 414
 415 # (3b) Determine offset range info using vmlinux.o.map.
 416 #
 417 # If we do not know an addend for the object's section, we are interested in
 418 # anything within that section.
 419 #
 420 # Determine the top-level section that the object's section was included in
 421 # during the final link.  This is the section name offset range data will be
 422 # associated with for this object.
 423 #
 424 # The remainder of the processing of the current object record follows the
 425 # procedure outlined in (3a).
 426 #
 427 ARGIND == 3 && /^ [^ ]/ && NF == 4 && $3 != "0x0" {
 428         osect = $1;
 429         if (!(osect in sect_addend))
 430                 next;
 431
 432         # We need to work with the main section.
 433         sect = sect_in[osect];
 434
 435         # Turn the address into an offset from the section base.
 436         soff = $2;
 437         sub(addr_prefix, "0x", soff);
 438         soff = strtonum(soff) + sect_addend[osect];
 439         eoff = soff + strtonum($3);
 440
 441         # Determine which (if any) built-in modules the object belongs to.
 442         mod = get_module_info($4);
 443
 444         # If we are processing a built-in module:
 445         #   - If the current object is within the same module, we update its
 446         #     entry by extending the range and move on
 447         #   - Otherwise:
 448         #       + If we are still processing within the same main section, we
 449         #         validate the end offset against the start offset of the
 450         #         current object (e.g. .rodata.str1.[18] objects are often
 451         #         listed with an incorrect size in the linker map)
 452         #       + Otherwise, we validate the end offset against the section
 453         #         size
 454         if (mod_name) {
 455                 if (mod == mod_name) {
 456                         mod_eoff = eoff;
 457                         update_entry(mod_sect, mod_name, mod_soff, eoff);
 458
 459                         next;
 460                 } else if (sect == sect_in[mod_sect]) {
 461                         if (mod_eoff > soff)
 462                                 update_entry(mod_sect, mod_name, mod_soff, soff);
 463                 } else {
 464                         v = sect_size[sect_in[mod_sect]];
 465                         if (mod_eoff > v)
 466                                 update_entry(mod_sect, mod_name, mod_soff, v);
 467                 }
 468         }
 469
 470         mod_name = mod;
 471
 472         # If we encountered an object that is not part of a built-in module, we
 473         # do not need to record any data.
 474         if (!mod)
 475                 next;
 476
 477         # At this point, we encountered the start of a new built-in module.
 478         mod_name = mod;
 479         mod_soff = soff;
 480         mod_eoff = eoff;
 481         mod_sect = osect;
 482         update_entry(osect, mod, soff, mod_eoff);
 483
 484         next;
 485 }
 486
 487 # (4) Generate the output.
 488 #
 489 # Anchor records are added for each section that contains offset range data
 490 # records.  They are added at an adjusted section base address (base << 1) to
 491 # ensure they come first in the second records (see update_entry() above for
 492 # more information).
 493 #
 494 # All entries are sorted by (adjusted) address to ensure that the output can be
 495 # parsed in strict ascending address order.
 496 #
 497 END {
 498         for (sect in count) {
 499                 if (sect in sect_anchor) {
 500                         idx = sprintf("%016x", sect_base[sect] * 2);
 501                         entries[idx] = sect_anchor[sect];
 502                 }
 503         }
 504
 505         n = asorti(entries, indices);
 506         for (i = 1; i <= n; i++)
 507                 print entries[indices[i]];
 508 }