4 * Copyright (C) 1989 Free Software Foundation, Inc.
5 * written by Douglas C. Schmidt (d.schmidt@vanderbilt.edu)
7 * This file is part of GNU GPERF.
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version 2
12 * of the License, or (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
26 #include "ace/OS_NS_stdlib.h"
27 #include "ace/OS_NS_time.h"
28 #include "ace/OS_NS_stdio.h"
29 #include "ace/OS_Memory.h"
30 #include "ace/Truncate.h"
32 /// Current release version.
33 extern const char *version_string
;
35 /// Reads input keys, possibly applies the reordering heuristic, sets
36 /// the maximum associated value size (rounded up to the nearest power
37 /// of 2), may initialize the associated values array, and determines
38 /// the maximum hash table size. Note: using the random numbers is
39 /// often helpful, though not as deterministic, of course!
42 fewest_collisions (0),
48 /// Merge two disjoint hash key multisets to form the ordered disjoint
49 /// union of the sets. (In a multiset, an element can occur multiple
50 /// times). Precondition: both set1 and set2 must be
51 /// ordered. Returns the length of the combined set.
53 Gen_Perf::compute_disjoint_union (char *set1
, char *set2
, char *set3
)
57 while (*set1
&& *set2
)
62 *set3
= *set1
< *set2
? *set1
++ : *set2
++;
63 if (set3
== base
|| *set3
!= *(set3
- 1))
70 if (set3
== base
|| *set3
!= *(set3
- 1))
77 if (set3
== base
|| *set3
!= *(set3
- 1))
81 return ACE_Utils::truncate_cast
<int> (set3
- base
);
84 /// Sort the UNION_SET in increasing frequency of occurrence. This
85 /// speeds up later processing since we may assume the resulting set
86 /// (Set_3, in this case), is ordered. Uses insertion sort, since the
87 /// UNION_SET is typically short.
89 Gen_Perf::sort_set (char *union_set
, int len
)
91 for (int i
= 0, j
= len
- 1; i
< j
; i
++)
95 for (curr
= i
+ 1, tmp
= (int) union_set
[curr
];
97 && Vectors::occurrences
[tmp
] < Vectors::occurrences
[(int) union_set
[curr
- 1]];
99 union_set
[curr
] = union_set
[curr
- 1];
101 union_set
[curr
] = static_cast<char> (tmp
);
105 /// Generate a keysig's hash value.
107 Gen_Perf::hash (List_Node
*key_node
)
109 int sum
= option
[NOLENGTH
] ? 0 : key_node
->length
;
111 for (char *ptr
= key_node
->keysig
; *ptr
; ptr
++)
112 sum
+= Vectors::asso_values
[(int) *ptr
];
114 key_node
->hash_value
= sum
;
118 /// Find out how character value change affects successfully hash
119 /// items. Returns FALSE if no other hash values are affected, else
120 /// returns TRUE. Note that because Option.Get_Asso_Max is a power of
121 /// two we can guarantee that all legal Vectors::Asso_Values are
122 /// visited without repetition since Option.Get_Jump was forced to be
125 Gen_Perf::affects_prev (char c
, List_Node
*curr
)
127 int const original_char
= Vectors::asso_values
[(int) c
];
128 int total_iterations
= 0;
132 total_iterations
= option
.asso_max ();
136 total_iterations
= option
.iterations ();
138 if (total_iterations
== 0)
139 total_iterations
= this->key_list
.keyword_list_length ();
142 // Try all legal associated values.
144 for (int i
= total_iterations
- 1; i
>= 0; --i
)
146 Vectors::asso_values
[(int) c
] =
147 (Vectors::asso_values
[(int) c
]
148 + (option
.jump () ? option
.jump () : ACE_OS::rand ()))
149 & (option
.asso_max () - 1);
151 // Iteration Number array is a win, O(1) intialization time!
152 this->char_search
.reset ();
156 // See how this asso_value change affects previous keywords. If
157 // it does better than before we'll take it!
158 for (List_Node
*ptr
= this->key_list
.head
;
159 this->char_search
.find (this->hash (ptr
)) == 0
160 || ++collisions
< fewest_collisions
;
165 fewest_collisions
= collisions
;
167 if (option
[DEBUGGING
])
169 ACE_DEBUG ((LM_DEBUG
,
170 "- resolved after %d iterations",
171 total_iterations
- i
));
179 // Restore original values, no more tries.
180 Vectors::asso_values
[(int) c
] = original_char
;
182 // If we're this far it's time to try the next character....
186 /// Change a character value, try least-used characters first.
188 Gen_Perf::change (List_Node
*prior
, List_Node
*curr
)
190 if (option
[DEBUGGING
])
191 ACE_DEBUG ((LM_DEBUG
,
192 "collision on keyword #%d, prior = \"%C\", curr = \"%C\" hash = %d\n",
197 Gen_Perf::sort_set (this->union_set
,
198 compute_disjoint_union (prior
->keysig
,
202 // Try changing some values, if change doesn't alter other values
203 // continue normal action.
206 for (char *temp
= union_set
; *temp
!= '\0'; temp
++)
207 if (affects_prev (*temp
, curr
) == 0)
209 if (option
[DEBUGGING
])
210 ACE_DEBUG ((LM_DEBUG
,
211 " by changing asso_value['%c'] (char #%d) to %d\n",
213 temp
- union_set
+ 1,
214 Vectors::asso_values
[(int) *temp
]));
215 // Good, doesn't affect previous hash values, we'll take it.
219 for (List_Node
*ptr
= this->key_list
.head
;
226 if (option
[DEBUGGING
])
227 ACE_DEBUG ((LM_DEBUG
,
228 "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
229 !option
[FAST
] ? option
.asso_max () : option
.iterations () ? option
.iterations () : this->key_list
.keyword_list_length (),
230 fewest_collisions
+ this->key_list
.total_duplicates
));
237 if (this->key_list
.read_keys () == -1)
241 this->key_list
.reorder ();
243 int asso_value_max
= option
.asso_max ();
244 int const non_linked_length
= this->key_list
.keyword_list_length ();
246 if (asso_value_max
== 0)
247 asso_value_max
= non_linked_length
;
248 else if (asso_value_max
> 0)
249 asso_value_max
*= non_linked_length
;
250 else // if (asso_value_max < 0)
251 asso_value_max
= non_linked_length
/ -asso_value_max
;
253 option
.asso_max (ACE_POW (asso_value_max
));
257 ACE_OS::srand ((u_int
) ACE_OS::time (0));
259 for (int i
= 0; i
< ACE_STANDARD_CHARACTER_SET_SIZE
; ++i
)
261 Vectors::asso_values
[i
] = (ACE_OS::rand () & (asso_value_max
- 1));
266 int const asso_value
= option
.initial_value ();
268 // Initialize array if user requests non-zero default.
271 for (int i
= ACE_STANDARD_CHARACTER_SET_SIZE
- 1; i
>= 0; --i
)
273 Vectors::asso_values
[i
] = asso_value
& (option
.asso_max () - 1);
278 this->max_hash_value
=
279 this->key_list
.max_key_length ()
281 * option
.max_keysig_size ();
283 ACE_NEW_RETURN (this->union_set
,
284 char[2 * option
.max_keysig_size () + 1],
286 ACE_OS::printf ("/* ");
289 ACE_OS::printf ("C");
291 else if (option
[CPLUSPLUS
])
292 ACE_OS::printf ("C++");
294 ACE_OS::printf (" code produced by gperf version %s */\n",
296 Options::print_options ();
298 if (option
[DEBUGGING
])
299 ACE_DEBUG ((LM_DEBUG
,
300 "total non-linked keys = %d\n"
301 "total duplicates = %d\n"
302 "maximum associated value is %d\n"
303 "maximum size of generated hash table is %d\n",
305 this->key_list
.total_duplicates
,
308 if (this->char_search
.open (max_hash_value
+ 1) == -1)
313 /// For binary search, do normal string sort on the keys, and then
314 /// assign hash values from 0 to N-1. Then go ahead with the normal
315 /// logic that is there for perfect hashing.
317 Gen_Perf::compute_binary_search ()
320 this->key_list
.string_sort ();
322 // Assign hash values.
325 for (hash_value
= 0, curr
= this->key_list
.head
;
327 curr
= curr
->next
, hash_value
++)
329 curr
->hash_value
= hash_value
;
336 Gen_Perf::compute_linear_search ()
338 // Convert the list of keys to a linear list without
339 // equivalence classes.
340 this->key_list
.string_sort ();
342 // Assign hash values.
345 for (hash_value
= 0, curr
= this->key_list
.head
;
347 curr
= curr
->next
, hash_value
++)
349 curr
->hash_value
= hash_value
;
355 Gen_Perf::compute_perfect_hash ()
359 for (curr
= this->key_list
.head
;
365 for (List_Node
*ptr
= this->key_list
.head
;
368 if (ptr
->hash_value
== curr
->hash_value
)
370 if (this->change (ptr
, curr
) == -1)
377 // Make one final check, just to make sure nothing weird happened...
379 this->char_search
.reset ();
381 for (curr
= this->key_list
.head
;
385 if (this->char_search
.find (this->hash (curr
)) != 0)
389 // Keep track of the number of "dynamic" links (i.e., keys
390 // that hash to the same value) so that we can use it later
391 // when generating the output.
392 this->key_list
.total_duplicates
++;
396 // Yow, big problems. we're outta here!
397 ACE_ERROR ((LM_ERROR
,
398 "\nInternal error, duplicate value %d:\n"
399 "try options -D or -r, or use new key positions.\n\n",
409 /// Does the hard stuff.... Initializes the Bool Array, and attempts
410 /// to find a perfect function that will hash all the key words without
411 /// getting any duplications. This is made much easier since we aren't
412 /// attempting to generate *minimum* functions, only perfect ones. If
413 /// we can't generate a perfect function in one pass *and* the user
414 /// hasn't enabled the DUP option, we'll inform the user to try the
415 /// randomization option, use -D, or choose alternative key positions.
416 /// The alternatives (e.g., back-tracking) are too time-consuming, i.e,
417 /// exponential in the number of keys.
421 if (this->open () == -1)
424 if (option
[BINARYSEARCH
])
426 if (this->compute_binary_search () == -1)
429 else if (option
[LINEARSEARCH
])
431 if (this->compute_linear_search () == -1)
436 if (this->compute_perfect_hash () == -1)
439 // Sorts the key word list by hash value, and then outputs the
440 // list. The generated hash table code is only output if the
441 // early stage of processing turned out O.K.
442 this->key_list
.sort ();
445 this->key_list
.output ();
449 /// Prints out some diagnostics upon completion.
450 Gen_Perf::~Gen_Perf ()
452 if (option
[DEBUGGING
])
454 ACE_DEBUG ((LM_DEBUG
,
455 "\ndumping occurrence and associated values tables\n"));
456 for (int i
= 0; i
< ACE_STANDARD_CHARACTER_SET_SIZE
; i
++)
457 if (Vectors::occurrences
[i
])
458 ACE_DEBUG ((LM_DEBUG
,
459 "Vectors::asso_values[%c] = %6d, Vectors::occurrences[%c] = %6d\n",
461 Vectors::asso_values
[i
],
463 Vectors::occurrences
[i
]));
464 ACE_DEBUG ((LM_DEBUG
,
465 "end table dumping\n"));
468 delete [] this->union_set
;