1 #----------------------------------------------------------------------
4 # Perl script that transforms a list of keywords into a ScanKeywordList
5 # data structure that can be passed to ScanKeywordLookup().
7 # The input is a C header file containing a series of macro calls
8 # PG_KEYWORD("keyword", ...)
9 # Lines not starting with PG_KEYWORD are ignored. The keywords are
10 # implicitly numbered 0..N-1 in order of appearance in the header file.
11 # Currently, the keywords are required to appear in ASCII order.
13 # The output is a C header file that defines a "const ScanKeywordList"
14 # variable named according to the -v switch ("ScanKeywords" by default).
15 # The variable is marked "static" unless the -e switch is given.
17 # ScanKeywordList uses hash-based lookup, so this script also selects
18 # a minimal perfect hash function for the keyword set, and emits a
19 # static hash function that is referenced in the ScanKeywordList struct.
20 # The hash function is case-insensitive unless --no-case-fold is specified.
21 # Note that case folding works correctly only for all-ASCII keywords!
24 # Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
25 # Portions Copyright (c) 1994, Regents of the University of California
27 # src/tools/gen_keywordlist.pl
29 #----------------------------------------------------------------------
32 use warnings FATAL
=> 'all';
36 use lib
$FindBin::RealBin
;
43 my $varname = 'ScanKeywords';
46 'output:s' => \
$output_path,
48 'case-fold!' => \
$case_fold,
49 'varname:s' => \
$varname) || usage
();
51 my $kw_input_file = shift @ARGV || die "No input file.\n";
53 # Make sure output_path ends in a slash if needed.
54 if ($output_path ne '' && substr($output_path, -1) ne '/')
59 $kw_input_file =~ /(\w+)\.h$/
60 || die "Input file must be named something.h.\n";
61 my $base_filename = $1 . '_d';
62 my $kw_def_file = $output_path . $base_filename . '.h';
64 open(my $kif, '<', $kw_input_file) || die "$kw_input_file: $!\n";
65 open(my $kwdef, '>', $kw_def_file) || die "$kw_def_file: $!\n";
67 # Opening boilerplate for keyword definition header.
68 printf $kwdef <<EOM, $base_filename, uc $base_filename, uc $base_filename;
69 /*-------------------------------------------------------------------------
72 * List of keywords represented as a ScanKeywordList.
74 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
75 * Portions Copyright (c) 1994, Regents of the University of California
78 * ******************************
79 * *** DO NOT EDIT THIS FILE! ***
80 * ******************************
82 * It has been GENERATED by src/tools/gen_keywordlist.pl
84 *-------------------------------------------------------------------------
90 #include "common/kwlookup.h"
94 # Parse input file for keyword names.
98 if (/^PG_KEYWORD\("(\w+)"/)
104 # When being case-insensitive, insist that the input be all-lower-case.
107 foreach my $kw (@keywords)
109 die qq|The keyword
"$kw" is
not lower
-case
in $kw_input_file\n|
114 # Error out if the keyword names are not in ASCII order.
116 # While this isn't really necessary with hash-based lookup, it's still
117 # helpful because it provides a cheap way to reject duplicate keywords.
118 # Also, insisting on sorted order ensures that code that scans the keyword
119 # table linearly will see the keywords in a canonical order.
120 for my $i (0 .. $#keywords - 1)
123 qq|The keyword
"$keywords[$i + 1]" is out of order
in $kw_input_file\n|
124 if ($keywords[$i] cmp $keywords[ $i + 1 ]) >= 0;
127 # Emit the string containing all the keywords.
129 printf $kwdef qq|static const char
%s_kw_string[] =\n\t"|, $varname;
130 print $kwdef join qq|\\0"\n\t"|, @keywords;
131 print $kwdef qq|";\n\n|;
133 # Emit an array of numerical offsets which will be used to index into the
134 # keyword string. Also determine max keyword length.
136 printf $kwdef "static const uint16 %s_kw_offsets[] = {\n", $varname;
140 foreach my $name (@keywords)
142 my $this_length = length($name);
144 print $kwdef "\t$offset,\n";
146 # Calculate the cumulative offset of the next keyword,
147 # taking into account the null terminator.
148 $offset += $this_length + 1;
150 # Update max keyword length.
151 $max_len = $this_length if $max_len < $this_length;
154 print $kwdef "};\n\n";
156 # Emit a macro defining the number of keywords.
157 # (In some places it's useful to have access to that as a constant.)
159 printf $kwdef "#define %s_NUM_KEYWORDS %d\n\n", uc $varname, scalar @keywords;
161 # Emit the definition of the hash function.
163 my $funcname = $varname . "_hash_func";
165 my $f = PerfectHash
::generate_hash_function
(\
@keywords, $funcname,
166 case_fold
=> $case_fold);
168 printf $kwdef qq|static
%s\n|, $f;
170 # Emit the struct that wraps all this lookup info into one variable.
172 printf $kwdef "static " if !$extern;
173 printf $kwdef "const ScanKeywordList %s = {\n", $varname;
174 printf $kwdef qq|\t%s_kw_string,\n|, $varname;
175 printf $kwdef qq|\t%s_kw_offsets,\n|, $varname;
176 printf $kwdef qq|\t%s,\n|, $funcname;
177 printf $kwdef qq|\t%s_NUM_KEYWORDS,\n|, uc $varname;
178 printf $kwdef qq|\t%d\n|, $max_len;
179 printf $kwdef "};\n\n";
181 printf $kwdef "#endif\t\t\t\t\t\t\t/* %s_H */\n", uc $base_filename;
187 Usage: gen_keywordlist.pl [--output/-o <path>] [--varname/-v <varname>] [--extern/-e] [--[no-]case-fold] input_file
188 --output Output directory (default '.')
189 --varname Name for ScanKeywordList variable (default 'ScanKeywords')
190 --extern Allow the ScanKeywordList variable to be globally visible
191 --no-case-fold Keyword matching is to be case-sensitive
193 gen_keywordlist.pl transforms a list of keywords into a ScanKeywordList.
194 The output filename is derived from the input file by inserting _d,
195 for example kwlist_d.h is produced from kwlist.h.