1 /* Provides high-level routines to manipulate the keywork list
2 structures the code generation output.
3 Copyright (C) 1989-1998, 2000 Free Software Foundation, Inc.
4 written by Douglas C. Schmidt (schmidt@ics.uci.edu)
6 This file is part of GNU GPERF.
8 GNU GPERF is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 1, or (at your option)
13 GNU GPERF is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU GPERF; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
23 #include <stdlib.h> /* declares rand(), srand() */
24 #include <time.h> /* declares time() */
29 /* Efficiently returns the least power of two greater than or equal to X! */
30 #define POW(X) ((!X)?1:(X-=1,X|=X>>1,X|=X>>2,X|=X>>4,X|=X>>8,X|=X>>16,(++X)))
32 /* Reads input keys, possibly applies the reordering heuristic, sets the
33 maximum associated value size (rounded up to the nearest power of 2),
34 may initialize the associated values array, and determines the maximum
35 hash table size. Note: using the random numbers is often helpful,
36 though not as deterministic, of course! */
38 Gen_Perf::Gen_Perf (void)
40 T (Trace
t ("Gen_Perf::Gen_Perf");)
42 int non_linked_length
;
44 Key_List::read_keys ();
47 asso_value_max
= option
.get_asso_max ();
48 non_linked_length
= Key_List::keyword_list_length ();
50 fewest_collisions
= 0;
51 if (asso_value_max
== 0)
52 asso_value_max
= non_linked_length
;
53 else if (asso_value_max
> 0)
54 asso_value_max
*= non_linked_length
;
55 else /* if (asso_value_max < 0) */
56 asso_value_max
= non_linked_length
/ -asso_value_max
;
57 option
.set_asso_max (POW (asso_value_max
));
61 srand ((long) time (0));
63 for (int i
= 0; i
< ALPHA_SIZE
; i
++)
64 asso_values
[i
] = (rand () & asso_value_max
- 1);
68 int asso_value
= option
.initial_value ();
70 if (asso_value
) /* Initialize array if user requests non-zero default. */
71 for (int i
= ALPHA_SIZE
- 1; i
>= 0; i
--)
72 asso_values
[i
] = asso_value
& option
.get_asso_max () - 1;
74 max_hash_value
= Key_List::max_key_length () + option
.get_asso_max () *
75 option
.get_max_keysig_size ();
78 fprintf (stderr
, "total non-linked keys = %d\nmaximum associated value is %d"
79 "\nmaximum size of generated hash table is %d\n",
80 non_linked_length
, asso_value_max
, max_hash_value
);
83 /* Merge two disjoint hash key multisets to form the ordered disjoint union of the sets.
84 (In a multiset, an element can occur multiple times.)
85 Precondition: both set_1 and set_2 must be ordered. Returns the length
86 of the combined set. */
89 Gen_Perf::compute_disjoint_union (const char *set_1
, int size_1
, const char *set_2
, int size_2
, char *set_3
)
91 T (Trace
t ("Gen_Perf::compute_disjoint_union");)
94 while (size_1
> 0 && size_2
> 0)
96 set_1
++, size_1
--, set_2
++, size_2
--;
101 next
= *set_1
++, size_1
--;
103 next
= *set_2
++, size_2
--;
104 if (set_3
== base
|| next
!= set_3
[-1])
111 next
= *set_1
++, size_1
--;
112 if (set_3
== base
|| next
!= set_3
[-1])
119 next
= *set_2
++, size_2
--;
120 if (set_3
== base
|| next
!= set_3
[-1])
126 /* Sort the UNION_SET in increasing frequency of occurrence.
127 This speeds up later processing since we may assume the resulting
128 set (Set_3, in this case), is ordered. Uses insertion sort, since
129 the UNION_SET is typically short. */
132 Gen_Perf::sort_set (char *union_set
, int len
)
134 T (Trace
t ("Gen_Perf::sort_set");)
137 for (i
= 0, j
= len
- 1; i
< j
; i
++)
142 for (curr
= i
+ 1, tmp
= union_set
[curr
];
143 curr
> 0 && occurrences
[(unsigned char)tmp
] < occurrences
[(unsigned char)(union_set
[curr
-1])];
145 union_set
[curr
] = union_set
[curr
- 1];
147 union_set
[curr
] = tmp
;
151 /* Generate a key set's hash value. */
154 Gen_Perf::hash (List_Node
*key_node
)
156 T (Trace
t ("Gen_Perf::hash");)
157 int sum
= option
[NOLENGTH
] ? 0 : key_node
->key_length
;
159 const char *p
= key_node
->char_set
;
160 int i
= key_node
->char_set_length
;
161 for (; i
> 0; p
++, i
--)
162 sum
+= asso_values
[(unsigned char)(*p
)];
164 return key_node
->hash_value
= sum
;
167 /* Find out how character value change affects successfully hashed items.
168 Returns FALSE if no other hash values are affected, else returns TRUE.
169 Note that because Option.Get_Asso_Max is a power of two we can guarantee
170 that all legal Asso_Values are visited without repetition since
171 Option.Get_Jump was forced to be an odd value! */
174 Gen_Perf::affects_prev (char c
, List_Node
*curr
)
176 T (Trace
t ("Gen_Perf::affects_prev");)
177 int original_char
= asso_values
[(unsigned char)c
];
178 int total_iterations
= !option
[FAST
]
179 ? option
.get_asso_max () : option
.get_iterations () ? option
.get_iterations () : keyword_list_length ();
181 /* Try all legal associated values. */
183 for (int i
= total_iterations
- 1; i
>= 0; i
--)
187 asso_values
[(unsigned char)c
] =
188 (asso_values
[(unsigned char)c
] + (option
.get_jump () ? option
.get_jump () : rand ()))
189 & (option
.get_asso_max () - 1);
191 /* Iteration Number array is a win, O(1) intialization time! */
194 /* See how this asso_value change affects previous keywords. If
195 it does better than before we'll take it! */
197 for (List_Node
*ptr
= head
;
198 !Bool_Array::find (hash (ptr
)) || ++collisions
< fewest_collisions
;
202 fewest_collisions
= collisions
;
204 fprintf (stderr
, "- resolved after %d iterations", total_iterations
- i
);
209 /* Restore original values, no more tries. */
210 asso_values
[(unsigned char)c
] = original_char
;
211 /* If we're this far it's time to try the next character.... */
215 /* Change a character value, try least-used characters first. */
218 Gen_Perf::change (List_Node
*prior
, List_Node
*curr
)
220 T (Trace
t ("Gen_Perf::change");)
221 static char *union_set
;
222 int union_set_length
;
225 union_set
= new char [2 * option
.get_max_keysig_size ()];
229 fprintf (stderr
, "collision on keyword #%d, prior = \"%.*s\", curr = \"%.*s\" hash = %d\n",
231 prior
->key_length
, prior
->key
,
232 curr
->key_length
, curr
->key
,
236 union_set_length
= compute_disjoint_union (prior
->char_set
, prior
->char_set_length
, curr
->char_set
, curr
->char_set_length
, union_set
);
237 sort_set (union_set
, union_set_length
);
239 /* Try changing some values, if change doesn't alter other values continue normal action. */
242 const char *p
= union_set
;
243 int i
= union_set_length
;
244 for (; i
> 0; p
++, i
--)
245 if (!affects_prev (*p
, curr
))
249 fprintf (stderr
, " by changing asso_value['%c'] (char #%d) to %d\n",
250 *p
, p
- union_set
+ 1, asso_values
[(unsigned char)(*p
)]);
253 return; /* Good, doesn't affect previous hash values, we'll take it. */
256 for (List_Node
*ptr
= head
; ptr
!= curr
; ptr
= ptr
->next
)
263 fprintf (stderr
, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
264 !option
[FAST
] ? option
.get_asso_max () : option
.get_iterations () ? option
.get_iterations () : keyword_list_length (),
265 fewest_collisions
+ total_duplicates
);
270 /* Does the hard stuff....
271 Initializes the Iteration Number array, and attempts to find a perfect
272 function that will hash all the key words without getting any
273 duplications. This is made much easier since we aren't attempting
274 to generate *minimum* functions, only perfect ones.
275 If we can't generate a perfect function in one pass *and* the user
276 hasn't enabled the DUP option, we'll inform the user to try the
277 randomization option, use -D, or choose alternative key positions.
278 The alternatives (e.g., back-tracking) are too time-consuming, i.e,
279 exponential in the number of keys. */
282 Gen_Perf::operator() (void)
284 T (Trace
t ("Gen_Perf::operator()");)
285 #if LARGE_STACK_ARRAYS
286 STORAGE_TYPE buffer
[max_hash_value
+ 1];
288 // Note: we don't use new, because that invokes a custom operator new.
290 = (STORAGE_TYPE
*) malloc (sizeof(STORAGE_TYPE
) * (max_hash_value
+ 1));
295 Bool_Array::init (buffer
, max_hash_value
+ 1);
298 for (curr
= head
; curr
; curr
= curr
->next
)
302 for (List_Node
*ptr
= head
; ptr
!= curr
; ptr
= ptr
->next
)
303 if (ptr
->hash_value
== curr
->hash_value
)
311 /* Make one final check, just to make sure nothing weird happened.... */
313 Bool_Array::reset ();
315 for (curr
= head
; curr
; curr
= curr
->next
)
316 if (Bool_Array::find (hash (curr
)))
317 if (option
[DUP
]) /* Keep track of this number... */
319 else /* Yow, big problems. we're outta here! */
321 fprintf (stderr
, "\nInternal error, duplicate value %d:\n"
322 "try options -D or -r, or use new key positions.\n\n", hash (curr
));
323 #if !LARGE_STACK_ARRAYS
324 free ((char *) buffer
);
329 /* Sorts the key word list by hash value, and then outputs the list.
330 The generated hash table code is only output if the early stage of
331 processing turned out O.K. */
335 #if !LARGE_STACK_ARRAYS
336 free ((char *) buffer
);
341 /* Prints out some diagnostics upon completion. */
343 Gen_Perf::~Gen_Perf (void)
345 T (Trace
t ("Gen_Perf::~Gen_Perf");)
348 fprintf (stderr
, "\ndumping occurrence and associated values tables\n");
350 for (int i
= 0; i
< ALPHA_SIZE
; i
++)
352 fprintf (stderr
, "asso_values[%c] = %6d, occurrences[%c] = %6d\n",
353 i
, asso_values
[i
], i
, occurrences
[i
]);
355 fprintf (stderr
, "end table dumping\n");