src/backend/catalog/Catalog.pm

   1 #----------------------------------------------------------------------
   2 #
   3 # Catalog.pm
   4 #    Perl module that extracts info from catalog files into Perl
   5 #    data structures
   6 #
   7 # Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
   8 # Portions Copyright (c) 1994, Regents of the University of California
   9 #
  10 # src/backend/catalog/Catalog.pm
  11 #
  12 #----------------------------------------------------------------------
  13
  14 package Catalog;
  15
  16 use strict;
  17 use warnings FATAL => 'all';
  18
  19 use File::Compare;
  20
  21
  22 # Parses a catalog header file into a data structure describing the schema
  23 # of the catalog.
  24 sub ParseHeader
  25 {
  26         my $input_file = shift;
  27
  28         # There are a few types which are given one name in the C source, but a
  29         # different name at the SQL level.  These are enumerated here.
  30         my %RENAME_ATTTYPE = (
  31                 'int16' => 'int2',
  32                 'int32' => 'int4',
  33                 'int64' => 'int8',
  34                 'Oid' => 'oid',
  35                 'NameData' => 'name',
  36                 'TransactionId' => 'xid',
  37                 'XLogRecPtr' => 'pg_lsn');
  38
  39         my %catalog;
  40         my $declaring_attributes = 0;
  41         my $is_varlen = 0;
  42         my $is_client_code = 0;
  43
  44         $catalog{columns} = [];
  45         $catalog{toasting} = [];
  46         $catalog{indexing} = [];
  47         $catalog{other_oids} = [];
  48         $catalog{foreign_keys} = [];
  49         $catalog{client_code} = [];
  50
  51         open(my $ifh, '<', $input_file) || die "$input_file: $!";
  52
  53         # Scan the input file.
  54         while (<$ifh>)
  55         {
  56
  57                 # Set appropriate flag when we're in certain code sections.
  58                 if (/^#/)
  59                 {
  60                         $is_varlen = 1 if /^#ifdef\s+CATALOG_VARLEN/;
  61                         if (/^#ifdef\s+EXPOSE_TO_CLIENT_CODE/)
  62                         {
  63                                 $is_client_code = 1;
  64                                 next;
  65                         }
  66                         next if !$is_client_code;
  67                 }
  68
  69                 if (!$is_client_code)
  70                 {
  71                         # Strip C-style comments.
  72                         s;/\*(.|\n)*\*/;;g;
  73                         if (m;/\*;)
  74                         {
  75
  76                                 # handle multi-line comments properly.
  77                                 my $next_line = <$ifh>;
  78                                 die "$input_file: ends within C-style comment\n"
  79                                   if !defined $next_line;
  80                                 $_ .= $next_line;
  81                                 redo;
  82                         }
  83
  84                         # Strip useless whitespace and trailing semicolons.
  85                         chomp;
  86                         s/^\s+//;
  87                         s/;\s*$//;
  88                         s/\s+/ /g;
  89                 }
  90
  91                 # Push the data into the appropriate data structure.
  92                 # Caution: when adding new recognized OID-defining macros,
  93                 # also update src/include/catalog/renumber_oids.pl.
  94                 if (/^DECLARE_TOAST\(\s*
  95                          (?<parent_table>\w+),\s*
  96                          (?<toast_oid>\d+),\s*
  97                          (?<toast_index_oid>\d+)\s*
  98                          \)/x
  99                   )
 100                 {
 101                         push @{ $catalog{toasting} }, {%+};
 102                 }
 103                 elsif (
 104                         /^DECLARE_TOAST_WITH_MACRO\(\s*
 105                          (?<parent_table>\w+),\s*
 106                          (?<toast_oid>\d+),\s*
 107                          (?<toast_index_oid>\d+),\s*
 108                          (?<toast_oid_macro>\w+),\s*
 109                          (?<toast_index_oid_macro>\w+)\s*
 110                          \)/x
 111                   )
 112                 {
 113                         push @{ $catalog{toasting} }, {%+};
 114                 }
 115                 elsif (
 116                         /^DECLARE_(UNIQUE_)?INDEX(_PKEY)?\(\s*
 117                          (?<index_name>\w+),\s*
 118                          (?<index_oid>\d+),\s*
 119                          (?<index_oid_macro>\w+),\s*
 120                          (?<table_name>\w+),\s*
 121                          (?<index_decl>.+)\s*
 122                          \)/x
 123                   )
 124                 {
 125                         push @{ $catalog{indexing} },
 126                           {
 127                                 is_unique => $1 ? 1 : 0,
 128                                 is_pkey => $2 ? 1 : 0,
 129                                 %+,
 130                           };
 131                 }
 132                 elsif (
 133                         /^MAKE_SYSCACHE\(\s*
 134                         (?<syscache_name>\w+),\s*
 135                         (?<index_name>\w+),\s*
 136                         (?<syscache_nbuckets>\w+)\s*
 137                         \)/x
 138                   )
 139                 {
 140                         push @{ $catalog{syscaches} }, {%+};
 141                 }
 142                 elsif (
 143                         /^DECLARE_OID_DEFINING_MACRO\(\s*
 144                          (?<other_name>\w+),\s*
 145                          (?<other_oid>\d+)\s*
 146                          \)/x
 147                   )
 148                 {
 149                         push @{ $catalog{other_oids} }, {%+};
 150                 }
 151                 elsif (
 152                         /^DECLARE_(ARRAY_)?FOREIGN_KEY(_OPT)?\(\s*
 153                          \((?<fk_cols>[^)]+)\),\s*
 154                          (?<pk_table>\w+),\s*
 155                          \((?<pk_cols>[^)]+)\)\s*
 156                          \)/x
 157                   )
 158                 {
 159                         push @{ $catalog{foreign_keys} },
 160                           {
 161                                 is_array => $1 ? 1 : 0,
 162                                 is_opt => $2 ? 1 : 0,
 163                                 %+,
 164                           };
 165                 }
 166                 elsif (
 167                         /^CATALOG\(\s*
 168                          (?<catname>\w+),\s*
 169                          (?<relation_oid>\d+),\s*
 170                          (?<relation_oid_macro>\w+)\s*
 171                          \)/x
 172                   )
 173                 {
 174                         @catalog{ keys %+ } = values %+;
 175
 176                         $catalog{bootstrap} = /BKI_BOOTSTRAP/ ? ' bootstrap' : '';
 177                         $catalog{shared_relation} =
 178                           /BKI_SHARED_RELATION/ ? ' shared_relation' : '';
 179                         if (/BKI_ROWTYPE_OID\(\s*
 180                                  (?<rowtype_oid>\d+),\s*
 181                                  (?<rowtype_oid_macro>\w+)\s*
 182                                  \)/x
 183                           )
 184                         {
 185                                 @catalog{ keys %+ } = values %+;
 186                                 $catalog{rowtype_oid_clause} = " rowtype_oid $+{rowtype_oid}";
 187                         }
 188                         else
 189                         {
 190                                 $catalog{rowtype_oid} = '';
 191                                 $catalog{rowtype_oid_clause} = '';
 192                                 $catalog{rowtype_oid_macro} = '';
 193                         }
 194                         $catalog{schema_macro} = /BKI_SCHEMA_MACRO/ ? 1 : 0;
 195                         $declaring_attributes = 1;
 196                 }
 197                 elsif ($is_client_code)
 198                 {
 199                         if (/^#endif/)
 200                         {
 201                                 $is_client_code = 0;
 202                         }
 203                         else
 204                         {
 205                                 push @{ $catalog{client_code} }, $_;
 206                         }
 207                 }
 208                 elsif ($declaring_attributes)
 209                 {
 210                         next if (/^{|^$/);
 211                         if (/^}/)
 212                         {
 213                                 $declaring_attributes = 0;
 214                         }
 215                         else
 216                         {
 217                                 my %column;
 218                                 my @attopts = split /\s+/, $_;
 219                                 my $atttype = shift @attopts;
 220                                 my $attname = shift @attopts;
 221                                 die "parse error ($input_file)"
 222                                   unless ($attname and $atttype);
 223
 224                                 if (exists $RENAME_ATTTYPE{$atttype})
 225                                 {
 226                                         $atttype = $RENAME_ATTTYPE{$atttype};
 227                                 }
 228
 229                                 # If the C name ends with '[]' or '[digits]', we have
 230                                 # an array type, so we discard that from the name and
 231                                 # prepend '_' to the type.
 232                                 if ($attname =~ /(\w+)\[\d*\]/)
 233                                 {
 234                                         $attname = $1;
 235                                         $atttype = '_' . $atttype;
 236                                 }
 237
 238                                 $column{type} = $atttype;
 239                                 $column{name} = $attname;
 240                                 $column{is_varlen} = 1 if $is_varlen;
 241
 242                                 foreach my $attopt (@attopts)
 243                                 {
 244                                         if ($attopt eq 'BKI_FORCE_NULL')
 245                                         {
 246                                                 $column{forcenull} = 1;
 247                                         }
 248                                         elsif ($attopt eq 'BKI_FORCE_NOT_NULL')
 249                                         {
 250                                                 $column{forcenotnull} = 1;
 251                                         }
 252
 253                                         # We use quotes for values like \0 and \054, to
 254                                         # make sure all compilers and syntax highlighters
 255                                         # can recognize them properly.
 256                                         elsif ($attopt =~ /BKI_DEFAULT\(['"]?([^'"]+)['"]?\)/)
 257                                         {
 258                                                 $column{default} = $1;
 259                                         }
 260                                         elsif (
 261                                                 $attopt =~ /BKI_ARRAY_DEFAULT\(['"]?([^'"]+)['"]?\)/)
 262                                         {
 263                                                 $column{array_default} = $1;
 264                                         }
 265                                         elsif ($attopt =~ /BKI_LOOKUP(_OPT)?\((\w+)\)/)
 266                                         {
 267                                                 $column{lookup} = $2;
 268                                                 $column{lookup_opt} = $1 ? 1 : 0;
 269                                                 # BKI_LOOKUP implicitly makes an FK reference
 270                                                 push @{ $catalog{foreign_keys} },
 271                                                   {
 272                                                         is_array => (
 273                                                                 $atttype eq 'oidvector' || $atttype eq '_oid')
 274                                                         ? 1
 275                                                         : 0,
 276                                                         is_opt => $column{lookup_opt},
 277                                                         fk_cols => $attname,
 278                                                         pk_table => $column{lookup},
 279                                                         pk_cols => 'oid'
 280                                                   };
 281                                         }
 282                                         else
 283                                         {
 284                                                 die
 285                                                   "unknown or misformatted column option $attopt on column $attname";
 286                                         }
 287
 288                                         if ($column{forcenull} and $column{forcenotnull})
 289                                         {
 290                                                 die "$attname is forced both null and not null";
 291                                         }
 292                                 }
 293                                 push @{ $catalog{columns} }, \%column;
 294                         }
 295                 }
 296         }
 297         close $ifh;
 298         return \%catalog;
 299 }
 300
 301 # Parses a file containing Perl data structure literals, returning live data.
 302 #
 303 # The parameter $preserve_comments needs to be set for callers that want
 304 # to work with non-data lines in the data files, such as comments and blank
 305 # lines. If a caller just wants to consume the data, leave it unset.
 306 # (When requested, non-data lines will be returned as array entries that
 307 # are strings not hashes, so extra code is needed to deal with that.)
 308 sub ParseData
 309 {
 310         my ($input_file, $schema, $preserve_comments) = @_;
 311
 312         open(my $ifd, '<', $input_file) || die "$input_file: $!";
 313         $input_file =~ /(\w+)\.dat$/
 314           or die "Input file $input_file needs to be a .dat file.\n";
 315         my $catname = $1;
 316         my $data = [];
 317
 318         # Scan the input file.
 319         while (<$ifd>)
 320         {
 321                 my $hash_ref;
 322
 323                 if (/{/)
 324                 {
 325                         # Capture the hash ref
 326                         # NB: Assumes that the next hash ref can't start on the
 327                         # same line where the present one ended.
 328                         # Not foolproof, but we shouldn't need a full parser,
 329                         # since we expect relatively well-behaved input.
 330
 331                         # Quick hack to detect when we have a full hash ref to
 332                         # parse. We can't just use a regex because of values in
 333                         # pg_aggregate and pg_proc like '{0,0}'.  This will need
 334                         # work if we ever need to allow unbalanced braces within
 335                         # a field value.
 336                         my $lcnt = tr/{//;
 337                         my $rcnt = tr/}//;
 338
 339                         if ($lcnt == $rcnt)
 340                         {
 341                                 # We're treating the input line as a piece of Perl, so we
 342                                 # need to use string eval here. Tell perlcritic we know what
 343                                 # we're doing.
 344                                 eval "\$hash_ref = $_";    ## no critic (ProhibitStringyEval)
 345                                 if (!ref $hash_ref)
 346                                 {
 347                                         die "$input_file: error parsing line $.:\n$_\n";
 348                                 }
 349
 350                                 # Annotate each hash with the source line number.
 351                                 $hash_ref->{line_number} = $.;
 352
 353                                 # Expand tuples to their full representation.
 354                                 AddDefaultValues($hash_ref, $schema, $catname);
 355                         }
 356                         else
 357                         {
 358                                 my $next_line = <$ifd>;
 359                                 die "$input_file: file ends within Perl hash\n"
 360                                   if !defined $next_line;
 361                                 $_ .= $next_line;
 362                                 redo;
 363                         }
 364                 }
 365
 366                 # If we found a hash reference, keep it, unless it is marked as
 367                 # autogenerated; in that case it'd duplicate an entry we'll
 368                 # autogenerate below.  (This makes it safe for reformat_dat_file.pl
 369                 # with --full-tuples to print autogenerated entries, which seems like
 370                 # useful behavior for debugging.)
 371                 #
 372                 # Otherwise, we have a non-data string, which we keep only if
 373                 # the caller requested it.
 374                 if (defined $hash_ref)
 375                 {
 376                         push @$data, $hash_ref if !$hash_ref->{autogenerated};
 377                 }
 378                 else
 379                 {
 380                         push @$data, $_ if $preserve_comments;
 381                 }
 382         }
 383
 384         close $ifd;
 385
 386         # If this is pg_type, auto-generate array types too.
 387         GenerateArrayTypes($schema, $data) if $catname eq 'pg_type';
 388
 389         return $data;
 390 }
 391
 392 # Fill in default values of a record using the given schema.
 393 # It's the caller's responsibility to specify other values beforehand.
 394 sub AddDefaultValues
 395 {
 396         my ($row, $schema, $catname) = @_;
 397         my @missing_fields;
 398
 399         # Compute special-case column values.
 400         # Note: If you add new cases here, you must also teach
 401         # strip_default_values() in include/catalog/reformat_dat_file.pl
 402         # to delete them.
 403         if ($catname eq 'pg_proc')
 404         {
 405                 # pg_proc.pronargs can be derived from proargtypes.
 406                 if (defined $row->{proargtypes})
 407                 {
 408                         my @proargtypes = split /\s+/, $row->{proargtypes};
 409                         $row->{pronargs} = scalar(@proargtypes);
 410                 }
 411         }
 412
 413         # Now fill in defaults, and note any columns that remain undefined.
 414         foreach my $column (@$schema)
 415         {
 416                 my $attname = $column->{name};
 417
 418                 # No work if field already has a value.
 419                 next if defined $row->{$attname};
 420
 421                 # Ignore 'oid' columns, they're handled elsewhere.
 422                 next if $attname eq 'oid';
 423
 424                 # If column has a default value, fill that in.
 425                 if (defined $column->{default})
 426                 {
 427                         $row->{$attname} = $column->{default};
 428                         next;
 429                 }
 430
 431                 # Failed to find a value for this field.
 432                 push @missing_fields, $attname;
 433         }
 434
 435         # Failure to provide all columns is a hard error.
 436         if (@missing_fields)
 437         {
 438                 die sprintf "missing values for field(s) %s in %s.dat line %s\n",
 439                   join(', ', @missing_fields), $catname, $row->{line_number};
 440         }
 441 }
 442
 443 # If a pg_type entry has an array_type_oid metadata field,
 444 # auto-generate an entry for its array type.
 445 sub GenerateArrayTypes
 446 {
 447         my $pgtype_schema = shift;
 448         my $types = shift;
 449         my @array_types;
 450
 451         foreach my $elem_type (@$types)
 452         {
 453                 next if !(ref $elem_type eq 'HASH');
 454                 next if !defined($elem_type->{array_type_oid});
 455
 456                 my %array_type;
 457
 458                 # Set up metadata fields for array type.
 459                 $array_type{oid} = $elem_type->{array_type_oid};
 460                 $array_type{autogenerated} = 1;
 461                 $array_type{line_number} = $elem_type->{line_number};
 462
 463                 # Set up column values derived from the element type.
 464                 $array_type{typname} = '_' . $elem_type->{typname};
 465                 $array_type{typelem} = $elem_type->{typname};
 466
 467                 # Arrays require INT alignment, unless the element type requires
 468                 # DOUBLE alignment.
 469                 $array_type{typalign} = $elem_type->{typalign} eq 'd' ? 'd' : 'i';
 470
 471                 # Fill in the rest of the array entry's fields.
 472                 foreach my $column (@$pgtype_schema)
 473                 {
 474                         my $attname = $column->{name};
 475
 476                         # Skip if we already set it above.
 477                         next if defined $array_type{$attname};
 478
 479                         # Apply the BKI_ARRAY_DEFAULT setting if there is one,
 480                         # otherwise copy the field from the element type.
 481                         if (defined $column->{array_default})
 482                         {
 483                                 $array_type{$attname} = $column->{array_default};
 484                         }
 485                         else
 486                         {
 487                                 $array_type{$attname} = $elem_type->{$attname};
 488                         }
 489                 }
 490
 491                 # Lastly, cross-link the array to the element type.
 492                 $elem_type->{typarray} = $array_type{typname};
 493
 494                 push @array_types, \%array_type;
 495         }
 496
 497         push @$types, @array_types;
 498
 499         return;
 500 }
 501
 502 # Rename temporary files to final names.
 503 # Call this function with the final file name and the .tmp extension.
 504 #
 505 # If the final file already exists and has identical contents, don't
 506 # overwrite it; this behavior avoids unnecessary recompiles due to
 507 # updating the mod date on unchanged header files.
 508 #
 509 # Note: recommended extension is ".tmp$$", so that parallel make steps
 510 # can't use the same temp files.
 511 sub RenameTempFile
 512 {
 513         my $final_name = shift;
 514         my $extension = shift;
 515         my $temp_name = $final_name . $extension;
 516
 517         if (-f $final_name
 518                 && compare($temp_name, $final_name) == 0)
 519         {
 520                 unlink($temp_name) || die "unlink: $temp_name: $!";
 521         }
 522         else
 523         {
 524                 rename($temp_name, $final_name) || die "rename: $temp_name: $!";
 525         }
 526         return;
 527 }
 528
 529 # Find a symbol defined in a particular header file and extract the value.
 530 # include_path should be the path to src/include/.
 531 sub FindDefinedSymbol
 532 {
 533         my ($catalog_header, $include_path, $symbol) = @_;
 534         my $value;
 535
 536         # Make sure include path ends in a slash.
 537         if (substr($include_path, -1) ne '/')
 538         {
 539                 $include_path .= '/';
 540         }
 541         my $file = $include_path . $catalog_header;
 542         open(my $find_defined_symbol, '<', $file) || die "$file: $!";
 543         while (<$find_defined_symbol>)
 544         {
 545                 if (/^#define\s+\Q$symbol\E\s+(\S+)/)
 546                 {
 547                         $value = $1;
 548                         last;
 549                 }
 550         }
 551         close $find_defined_symbol;
 552         return $value if defined $value;
 553         die "$file: no definition found for $symbol\n";
 554 }
 555
 556 # Similar to FindDefinedSymbol, but looks in the bootstrap metadata.
 557 sub FindDefinedSymbolFromData
 558 {
 559         my ($data, $symbol) = @_;
 560         foreach my $row (@{$data})
 561         {
 562                 if ($row->{oid_symbol} eq $symbol)
 563                 {
 564                         return $row->{oid};
 565                 }
 566         }
 567         die "no definition found for $symbol\n";
 568 }
 569
 570 # Extract an array of all the OIDs assigned in the specified catalog headers
 571 # and their associated data files (if any).
 572 # Caution: genbki.pl contains equivalent logic; change it too if you need to
 573 # touch this.
 574 sub FindAllOidsFromHeaders
 575 {
 576         my @input_files = @_;
 577
 578         my @oids = ();
 579
 580         foreach my $header (@input_files)
 581         {
 582                 $header =~ /(.+)\.h$/
 583                   or die "Input files need to be header files.\n";
 584                 my $datfile = "$1.dat";
 585
 586                 my $catalog = Catalog::ParseHeader($header);
 587
 588                 # We ignore the pg_class OID and rowtype OID of bootstrap catalogs,
 589                 # as those are expected to appear in the initial data for pg_class
 590                 # and pg_type.  For regular catalogs, include these OIDs.
 591                 if (!$catalog->{bootstrap})
 592                 {
 593                         push @oids, $catalog->{relation_oid}
 594                           if ($catalog->{relation_oid});
 595                         push @oids, $catalog->{rowtype_oid} if ($catalog->{rowtype_oid});
 596                 }
 597
 598                 # Not all catalogs have a data file.
 599                 if (-e $datfile)
 600                 {
 601                         my $catdata =
 602                           Catalog::ParseData($datfile, $catalog->{columns}, 0);
 603
 604                         foreach my $row (@$catdata)
 605                         {
 606                                 push @oids, $row->{oid} if defined $row->{oid};
 607                         }
 608                 }
 609
 610                 foreach my $toast (@{ $catalog->{toasting} })
 611                 {
 612                         push @oids, $toast->{toast_oid}, $toast->{toast_index_oid};
 613                 }
 614                 foreach my $index (@{ $catalog->{indexing} })
 615                 {
 616                         push @oids, $index->{index_oid};
 617                 }
 618                 foreach my $other (@{ $catalog->{other_oids} })
 619                 {
 620                         push @oids, $other->{other_oid};
 621                 }
 622         }
 623
 624         return \@oids;
 625 }
 626
 627 1;