src/include/catalog/reformat_dat_file.pl

   1 #!/usr/bin/perl
   2 #----------------------------------------------------------------------
   3 #
   4 # reformat_dat_file.pl
   5 #    Perl script that reads in catalog data file(s) and writes out
   6 #    functionally equivalent file(s) in a standard format.
   7 #
   8 #    In each entry of a reformatted file, metadata fields (if present)
   9 #    come first, with normal attributes starting on the following line,
  10 #    in the same order as the columns of the corresponding catalog.
  11 #    Comments and blank lines are preserved.
  12 #
  13 # Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
  14 # Portions Copyright (c) 1994, Regents of the University of California
  15 #
  16 # src/include/catalog/reformat_dat_file.pl
  17 #
  18 #----------------------------------------------------------------------
  19
  20 use strict;
  21 use warnings FATAL => 'all';
  22
  23 use FindBin;
  24 use Getopt::Long;
  25
  26 # If you copy this script to somewhere other than src/include/catalog,
  27 # you'll need to modify this "use lib" or provide a suitable -I switch.
  28 use lib "$FindBin::RealBin/../../backend/catalog/";
  29 use Catalog;
  30
  31 # Names of the metadata fields of a catalog entry.
  32 # Note: oid is a normal column from a storage perspective, but it's more
  33 # important than the rest, so it's listed first among the metadata fields.
  34 # Note: line_number is also a metadata field, but we never write it out,
  35 # so it's not listed here.
  36 my @METADATA =
  37   ('oid', 'oid_symbol', 'array_type_oid', 'descr', 'autogenerated');
  38
  39 # Process command line switches.
  40 my $output_path = '';
  41 my $full_tuples = 0;
  42
  43 GetOptions(
  44         'output=s' => \$output_path,
  45         'full-tuples' => \$full_tuples) || usage();
  46
  47 # Sanity check arguments.
  48 die "No input files.\n" unless @ARGV;
  49
  50 # Make sure output_path ends in a slash.
  51 if ($output_path ne '' && substr($output_path, -1) ne '/')
  52 {
  53         $output_path .= '/';
  54 }
  55
  56 # Read all the input files into internal data structures.
  57 # We pass data file names as arguments and then look for matching
  58 # headers to parse the schema from.
  59 my %catalogs;
  60 my %catalog_data;
  61 my @catnames;
  62 foreach my $datfile (@ARGV)
  63 {
  64         $datfile =~ /(.+)\.dat$/
  65           or die "Input files need to be data (.dat) files.\n";
  66
  67         my $header = "$1.h";
  68         die "There in no header file corresponding to $datfile"
  69           if !-e $header;
  70
  71         my $catalog = Catalog::ParseHeader($header);
  72         my $catname = $catalog->{catname};
  73         my $schema = $catalog->{columns};
  74
  75         push @catnames, $catname;
  76         $catalogs{$catname} = $catalog;
  77
  78         $catalog_data{$catname} = Catalog::ParseData($datfile, $schema, 1);
  79 }
  80
  81 ########################################################################
  82 # At this point, we have read all the data. If you are modifying this
  83 # script for bulk editing, this is a good place to build lookup tables,
  84 # if you need to. In the following example, the "next if !ref $row"
  85 # check below is a hack to filter out non-hash objects. This is because
  86 # we build the lookup tables from data that we read using the
  87 # "preserve_comments" parameter.
  88 #
  89 ##Index access method lookup.
  90 #my %amnames;
  91 #foreach my $row (@{ $catalog_data{pg_am} })
  92 #{
  93 #       next if !ref $row;
  94 #       $amnames{$row->{oid}} = $row->{amname};
  95 #}
  96 ########################################################################
  97
  98 # Write the data.
  99 foreach my $catname (@catnames)
 100 {
 101         my $catalog = $catalogs{$catname};
 102         my @attnames;
 103         my $schema = $catalog->{columns};
 104
 105         foreach my $column (@$schema)
 106         {
 107                 my $attname = $column->{name};
 108
 109                 # We may have ordinary columns at the storage level that we still
 110                 # want to format as a special value. Exclude these from the column
 111                 # list so they are not written twice.
 112                 push @attnames, $attname
 113                   if !(grep { $_ eq $attname } @METADATA);
 114         }
 115
 116         # Write output files to specified directory.
 117         my $datfile = "$output_path$catname.dat";
 118         open my $dat, '>', $datfile
 119           or die "can't open $datfile: $!";
 120
 121         foreach my $data (@{ $catalog_data{$catname} })
 122         {
 123
 124                 # Hash ref representing a data entry.
 125                 if (ref $data eq 'HASH')
 126                 {
 127                         my %values = %$data;
 128
 129                         ############################################################
 130                         # At this point we have the full tuple in memory as a hash
 131                         # and can do any operations we want. As written, it only
 132                         # removes default values, but this script can be adapted to
 133                         # do one-off bulk-editing.
 134                         ############################################################
 135
 136                         if (!$full_tuples)
 137                         {
 138                                 # If it's an autogenerated entry, drop it completely.
 139                                 next if $values{autogenerated};
 140                                 # Else, just drop any default/computed fields.
 141                                 strip_default_values(\%values, $schema, $catname);
 142                         }
 143
 144                         print $dat "{";
 145
 146                         # Separate out metadata fields for readability.
 147                         my $metadata_str = format_hash(\%values, @METADATA);
 148                         if ($metadata_str)
 149                         {
 150                                 print $dat $metadata_str;
 151
 152                                 # User attributes start on next line.
 153                                 print $dat ",\n ";
 154                         }
 155
 156                         my $data_str = format_hash(\%values, @attnames);
 157                         print $dat $data_str;
 158                         print $dat " },\n";
 159                 }
 160
 161                 # Preserve blank lines.
 162                 elsif ($data =~ /^\s*$/)
 163                 {
 164                         print $dat "\n";
 165                 }
 166
 167                 # Preserve comments or brackets that are on their own line.
 168                 elsif ($data =~ /^\s*(\[|\]|#.*?)\s*$/)
 169                 {
 170                         print $dat "$1\n";
 171                 }
 172         }
 173         close $dat;
 174 }
 175
 176 # Remove column values for which there is a matching default,
 177 # or if the value can be computed from other columns.
 178 sub strip_default_values
 179 {
 180         my ($row, $schema, $catname) = @_;
 181
 182         # Delete values that match defaults.
 183         foreach my $column (@$schema)
 184         {
 185                 my $attname = $column->{name};
 186
 187                 # It's okay if we have no oid value, since it will be assigned
 188                 # automatically before bootstrap.
 189                 die "strip_default_values: $catname.$attname undefined\n"
 190                   if !defined $row->{$attname} and $attname ne 'oid';
 191
 192                 if (defined $column->{default}
 193                         and ($row->{$attname} eq $column->{default}))
 194                 {
 195                         delete $row->{$attname};
 196                 }
 197         }
 198
 199         # Delete computed values.  See AddDefaultValues() in Catalog.pm.
 200         # Note: This must be done after deleting values matching defaults.
 201         if ($catname eq 'pg_proc')
 202         {
 203                 delete $row->{pronargs} if defined $row->{proargtypes};
 204         }
 205
 206         # If a pg_type entry has an auto-generated array type, then its
 207         # typarray field is a computed value too (see GenerateArrayTypes).
 208         if ($catname eq 'pg_type')
 209         {
 210                 delete $row->{typarray} if defined $row->{array_type_oid};
 211         }
 212
 213         return;
 214 }
 215
 216 # Format the individual elements of a Perl hash into a valid string
 217 # representation. We do this ourselves, rather than use native Perl
 218 # facilities, so we can keep control over the exact formatting of the
 219 # data files.
 220 sub format_hash
 221 {
 222         my $data = shift;
 223         my @orig_attnames = @_;
 224
 225         # Copy attname to new array if it has a value, so we can determine
 226         # the last populated element. We do this because we may have default
 227         # values or empty metadata fields.
 228         my @attnames;
 229         foreach my $orig_attname (@orig_attnames)
 230         {
 231                 push @attnames, $orig_attname
 232                   if defined $data->{$orig_attname};
 233         }
 234
 235         # When calling this function, we ether have an open-bracket or a
 236         # leading space already.
 237         my $char_count = 1;
 238
 239         my $threshold;
 240         my $hash_str = '';
 241         my $element_count = 0;
 242
 243         foreach my $attname (@attnames)
 244         {
 245                 $element_count++;
 246
 247                 # To limit the line to 80 chars, we need to account for the
 248                 # trailing characters.
 249                 if ($element_count == $#attnames + 1)
 250                 {
 251                         # Last element, so allow space for ' },'
 252                         $threshold = 77;
 253                 }
 254                 else
 255                 {
 256                         # Just need space for trailing comma
 257                         $threshold = 79;
 258                 }
 259
 260                 if ($element_count > 1)
 261                 {
 262                         $hash_str .= ',';
 263                         $char_count++;
 264                 }
 265
 266                 my $value = $data->{$attname};
 267
 268                 # Escape single quotes.
 269                 $value =~ s/'/\\'/g;
 270
 271                 # Include a leading space in the key-value pair, since this will
 272                 # always go after either a comma or an additional padding space on
 273                 # the next line.
 274                 my $element = " $attname => '$value'";
 275                 my $element_length = length($element);
 276
 277                 # If adding the element to the current line would expand the line
 278                 # beyond 80 chars, put it on the next line. We don't do this for
 279                 # the first element, since that would create a blank line.
 280                 if ($element_count > 1 and $char_count + $element_length > $threshold)
 281                 {
 282
 283                         # Put on next line with an additional space preceding. There
 284                         # are now two spaces in front of the key-value pair, lining
 285                         # it up with the line above it.
 286                         $hash_str .= "\n $element";
 287                         $char_count = $element_length + 1;
 288                 }
 289                 else
 290                 {
 291                         $hash_str .= $element;
 292                         $char_count += $element_length;
 293                 }
 294         }
 295         return $hash_str;
 296 }
 297
 298 sub usage
 299 {
 300         die <<EOM;
 301 Usage: reformat_dat_file.pl [options] datafile...
 302
 303 Options:
 304     --output PATH    output directory (default '.')
 305     --full-tuples    write out full tuples, including default values
 306
 307 Non-option arguments are the names of input .dat files.
 308 Updated files are written to the output directory,
 309 possibly overwriting the input files.
 310
 311 EOM
 312 }