Cleanup ACE_HAS_PTHREAD_SIGMASK_PROTOTYPE, all platforms support it so far as I can...
[ACE_TAO.git] / ACE / apps / gperf / src / Gen_Perf.cpp
blob382e95cf50ca589fa893c09e4364733f10f01290
1 // -*- C++ -*-
3 /**
4 * Copyright (C) 1989 Free Software Foundation, Inc.
5 * written by Douglas C. Schmidt (d.schmidt@vanderbilt.edu)
7 * This file is part of GNU GPERF.
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version 2
12 * of the License, or (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24 #include "Gen_Perf.h"
25 #include "Vectors.h"
26 #include "ace/OS_NS_stdlib.h"
27 #include "ace/OS_NS_time.h"
28 #include "ace/OS_NS_stdio.h"
29 #include "ace/OS_Memory.h"
30 #include "ace/Truncate.h"
32 /// Current release version.
33 extern const char *version_string;
35 /// Reads input keys, possibly applies the reordering heuristic, sets
36 /// the maximum associated value size (rounded up to the nearest power
37 /// of 2), may initialize the associated values array, and determines
38 /// the maximum hash table size. Note: using the random numbers is
39 /// often helpful, though not as deterministic, of course!
40 Gen_Perf::Gen_Perf ()
41 : max_hash_value (0),
42 fewest_collisions (0),
43 num_done (1),
44 union_set (0)
48 /// Merge two disjoint hash key multisets to form the ordered disjoint
49 /// union of the sets. (In a multiset, an element can occur multiple
50 /// times). Precondition: both set1 and set2 must be
51 /// ordered. Returns the length of the combined set.
52 int
53 Gen_Perf::compute_disjoint_union (char *set1, char *set2, char *set3)
55 char *base = set3;
57 while (*set1 && *set2)
58 if (*set1 == *set2)
59 set1++, set2++;
60 else
62 *set3 = *set1 < *set2 ? *set1++ : *set2++;
63 if (set3 == base || *set3 != *(set3 - 1))
64 set3++;
67 while (*set1)
69 *set3 = *set1++;
70 if (set3 == base || *set3 != *(set3 - 1))
71 set3++;
74 while (*set2)
76 *set3 = *set2++;
77 if (set3 == base || *set3 != *(set3 - 1))
78 set3++;
80 *set3 = '\0';
81 return ACE_Utils::truncate_cast<int> (set3 - base);
84 /// Sort the UNION_SET in increasing frequency of occurrence. This
85 /// speeds up later processing since we may assume the resulting set
86 /// (Set_3, in this case), is ordered. Uses insertion sort, since the
87 /// UNION_SET is typically short.
88 void
89 Gen_Perf::sort_set (char *union_set, int len)
91 for (int i = 0, j = len - 1; i < j; i++)
93 int curr, tmp;
95 for (curr = i + 1, tmp = (int) union_set[curr];
96 curr > 0
97 && Vectors::occurrences[tmp] < Vectors::occurrences[(int) union_set[curr - 1]];
98 curr--)
99 union_set[curr] = union_set[curr - 1];
101 union_set[curr] = static_cast<char> (tmp);
105 /// Generate a keysig's hash value.
107 Gen_Perf::hash (List_Node *key_node)
109 int sum = option[NOLENGTH] ? 0 : key_node->length;
111 for (char *ptr = key_node->keysig; *ptr; ptr++)
112 sum += Vectors::asso_values[(int) *ptr];
114 key_node->hash_value = sum;
115 return sum;
118 /// Find out how character value change affects successfully hash
119 /// items. Returns FALSE if no other hash values are affected, else
120 /// returns TRUE. Note that because Option.Get_Asso_Max is a power of
121 /// two we can guarantee that all legal Vectors::Asso_Values are
122 /// visited without repetition since Option.Get_Jump was forced to be
123 /// an odd value!
124 inline int
125 Gen_Perf::affects_prev (char c, List_Node *curr)
127 int const original_char = Vectors::asso_values[(int) c];
128 int total_iterations = 0;
130 if (!option[FAST])
132 total_iterations = option.asso_max ();
134 else
136 total_iterations = option.iterations ();
138 if (total_iterations == 0)
139 total_iterations = this->key_list.keyword_list_length ();
142 // Try all legal associated values.
144 for (int i = total_iterations - 1; i >= 0; --i)
146 Vectors::asso_values[(int) c] =
147 (Vectors::asso_values[(int) c]
148 + (option.jump () ? option.jump () : ACE_OS::rand ()))
149 & (option.asso_max () - 1);
151 // Iteration Number array is a win, O(1) intialization time!
152 this->char_search.reset ();
154 int collisions = 0;
156 // See how this asso_value change affects previous keywords. If
157 // it does better than before we'll take it!
158 for (List_Node *ptr = this->key_list.head;
159 this->char_search.find (this->hash (ptr)) == 0
160 || ++collisions < fewest_collisions;
161 ptr = ptr->next)
163 if (ptr == curr)
165 fewest_collisions = collisions;
167 if (option[DEBUGGING])
169 ACE_DEBUG ((LM_DEBUG,
170 "- resolved after %d iterations",
171 total_iterations - i));
174 return 0;
179 // Restore original values, no more tries.
180 Vectors::asso_values[(int) c] = original_char;
182 // If we're this far it's time to try the next character....
183 return 1;
186 /// Change a character value, try least-used characters first.
188 Gen_Perf::change (List_Node *prior, List_Node *curr)
190 if (option[DEBUGGING])
191 ACE_DEBUG ((LM_DEBUG,
192 "collision on keyword #%d, prior = \"%C\", curr = \"%C\" hash = %d\n",
193 num_done,
194 prior->key,
195 curr->key,
196 curr->hash_value));
197 Gen_Perf::sort_set (this->union_set,
198 compute_disjoint_union (prior->keysig,
199 curr->keysig,
200 this->union_set));
202 // Try changing some values, if change doesn't alter other values
203 // continue normal action.
204 ++fewest_collisions;
206 for (char *temp = union_set; *temp != '\0'; temp++)
207 if (affects_prev (*temp, curr) == 0)
209 if (option[DEBUGGING])
210 ACE_DEBUG ((LM_DEBUG,
211 " by changing asso_value['%c'] (char #%d) to %d\n",
212 *temp,
213 temp - union_set + 1,
214 Vectors::asso_values[(int) *temp]));
215 // Good, doesn't affect previous hash values, we'll take it.
216 return 0;
219 for (List_Node *ptr = this->key_list.head;
220 ptr != curr;
221 ptr = ptr->next)
222 this->hash (ptr);
224 this->hash (curr);
226 if (option[DEBUGGING])
227 ACE_DEBUG ((LM_DEBUG,
228 "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
229 !option[FAST] ? option.asso_max () : option.iterations () ? option.iterations () : this->key_list.keyword_list_length (),
230 fewest_collisions + this->key_list.total_duplicates));
231 return 0;
235 Gen_Perf::open ()
237 if (this->key_list.read_keys () == -1)
238 return -1;
240 if (option[ORDER])
241 this->key_list.reorder ();
243 int asso_value_max = option.asso_max ();
244 int const non_linked_length = this->key_list.keyword_list_length ();
246 if (asso_value_max == 0)
247 asso_value_max = non_linked_length;
248 else if (asso_value_max > 0)
249 asso_value_max *= non_linked_length;
250 else // if (asso_value_max < 0)
251 asso_value_max = non_linked_length / -asso_value_max;
253 option.asso_max (ACE_POW (asso_value_max));
255 if (option[RANDOM])
257 ACE_OS::srand ((u_int) ACE_OS::time (0));
259 for (int i = 0; i < ACE_STANDARD_CHARACTER_SET_SIZE; ++i)
261 Vectors::asso_values[i] = (ACE_OS::rand () & (asso_value_max - 1));
264 else
266 int const asso_value = option.initial_value ();
268 // Initialize array if user requests non-zero default.
269 if (asso_value)
271 for (int i = ACE_STANDARD_CHARACTER_SET_SIZE - 1; i >= 0; --i)
273 Vectors::asso_values[i] = asso_value & (option.asso_max () - 1);
278 this->max_hash_value =
279 this->key_list.max_key_length ()
280 + option.asso_max ()
281 * option.max_keysig_size ();
283 ACE_NEW_RETURN (this->union_set,
284 char[2 * option.max_keysig_size () + 1],
285 -1);
286 ACE_OS::printf ("/* ");
288 if (option[C])
289 ACE_OS::printf ("C");
291 else if (option[CPLUSPLUS])
292 ACE_OS::printf ("C++");
294 ACE_OS::printf (" code produced by gperf version %s */\n",
295 version_string);
296 Options::print_options ();
298 if (option[DEBUGGING])
299 ACE_DEBUG ((LM_DEBUG,
300 "total non-linked keys = %d\n"
301 "total duplicates = %d\n"
302 "maximum associated value is %d\n"
303 "maximum size of generated hash table is %d\n",
304 non_linked_length,
305 this->key_list.total_duplicates,
306 asso_value_max,
307 max_hash_value));
308 if (this->char_search.open (max_hash_value + 1) == -1)
309 return -1;
310 return 0;
313 /// For binary search, do normal string sort on the keys, and then
314 /// assign hash values from 0 to N-1. Then go ahead with the normal
315 /// logic that is there for perfect hashing.
317 Gen_Perf::compute_binary_search ()
319 // Do a string sort.
320 this->key_list.string_sort ();
322 // Assign hash values.
323 List_Node *curr = 0;
324 int hash_value;
325 for (hash_value = 0, curr = this->key_list.head;
326 curr != 0;
327 curr = curr->next, hash_value++)
329 curr->hash_value = hash_value;
332 return 0;
336 Gen_Perf::compute_linear_search ()
338 // Convert the list of keys to a linear list without
339 // equivalence classes.
340 this->key_list.string_sort ();
342 // Assign hash values.
343 List_Node *curr = 0;
344 int hash_value = 0;
345 for (hash_value = 0, curr = this->key_list.head;
346 curr != 0;
347 curr = curr->next, hash_value++)
349 curr->hash_value = hash_value;
351 return 0;
355 Gen_Perf::compute_perfect_hash ()
357 List_Node *curr = 0;
359 for (curr = this->key_list.head;
360 curr != 0;
361 curr = curr->next)
363 this->hash (curr);
365 for (List_Node *ptr = this->key_list.head;
366 ptr != curr;
367 ptr = ptr->next)
368 if (ptr->hash_value == curr->hash_value)
370 if (this->change (ptr, curr) == -1)
371 return -1;
372 break;
374 num_done++;
377 // Make one final check, just to make sure nothing weird happened...
379 this->char_search.reset ();
381 for (curr = this->key_list.head;
382 curr;
383 curr = curr->next)
385 if (this->char_search.find (this->hash (curr)) != 0)
387 if (option[DUP])
389 // Keep track of the number of "dynamic" links (i.e., keys
390 // that hash to the same value) so that we can use it later
391 // when generating the output.
392 this->key_list.total_duplicates++;
394 else
396 // Yow, big problems. we're outta here!
397 ACE_ERROR ((LM_ERROR,
398 "\nInternal error, duplicate value %d:\n"
399 "try options -D or -r, or use new key positions.\n\n",
400 this->hash (curr)));
401 return -1;
406 return 0;
409 /// Does the hard stuff.... Initializes the Bool Array, and attempts
410 /// to find a perfect function that will hash all the key words without
411 /// getting any duplications. This is made much easier since we aren't
412 /// attempting to generate *minimum* functions, only perfect ones. If
413 /// we can't generate a perfect function in one pass *and* the user
414 /// hasn't enabled the DUP option, we'll inform the user to try the
415 /// randomization option, use -D, or choose alternative key positions.
416 /// The alternatives (e.g., back-tracking) are too time-consuming, i.e,
417 /// exponential in the number of keys.
419 Gen_Perf::run ()
421 if (this->open () == -1)
422 return 1;
424 if (option[BINARYSEARCH])
426 if (this->compute_binary_search () == -1)
427 return 1;
429 else if (option[LINEARSEARCH])
431 if (this->compute_linear_search () == -1)
432 return 1;
434 else
436 if (this->compute_perfect_hash () == -1)
437 return 1;
439 // Sorts the key word list by hash value, and then outputs the
440 // list. The generated hash table code is only output if the
441 // early stage of processing turned out O.K.
442 this->key_list.sort ();
445 this->key_list.output ();
446 return 0;
449 /// Prints out some diagnostics upon completion.
450 Gen_Perf::~Gen_Perf ()
452 if (option[DEBUGGING])
454 ACE_DEBUG ((LM_DEBUG,
455 "\ndumping occurrence and associated values tables\n"));
456 for (int i = 0; i < ACE_STANDARD_CHARACTER_SET_SIZE; i++)
457 if (Vectors::occurrences[i])
458 ACE_DEBUG ((LM_DEBUG,
459 "Vectors::asso_values[%c] = %6d, Vectors::occurrences[%c] = %6d\n",
461 Vectors::asso_values[i],
463 Vectors::occurrences[i]));
464 ACE_DEBUG ((LM_DEBUG,
465 "end table dumping\n"));
468 delete [] this->union_set;