1 #ifndef CGPERF_SEARCH_C
2 #define CGPERF_SEARCH_C
11 #include "keyword_list.h"
13 #include "positions.h"
14 #include "hash-table.h"
15 #include "bool-array.h"
16 /*------------------------------------------------------------------------------------------------*/
17 #include "namespace/globals.h"
18 #include "namespace/search.h"
19 #include "namespace/keyword.h"
20 #include "namespace/keyword_list.h"
21 #include "namespace/options.h"
22 #include "namespace/positions.h"
23 #include "namespace/hash-table.h"
24 #include "namespace/bool-array.h"
25 #include "namespace/search.c"
26 /*------------------------------------------------------------------------------------------------*/
28 /* The general form of the hash function is
30 hash (keyword) = sum (asso_values[keyword[i] + alpha_inc[i]] : i in Pos)
33 where Pos is a set of byte positions,
34 each alpha_inc[i] is a nonnegative integer,
35 each asso_values[c] is a nonnegative integer,
36 len (keyword) is the keyword's length if _hash_includes_len, or 0 otherwise.
38 Theorem 1: If all keywords are different, there is a set Pos such that
39 all tuples (keyword[i] : i in Pos) are different.
41 Theorem 2: If all tuples (keyword[i] : i in Pos) are different, there
42 are nonnegative integers alpha_inc[i] such that all multisets
43 {keyword[i] + alpha_inc[i] : i in Pos} are different.
45 Define selchars[keyword] := {keyword[i] + alpha_inc[i] : i in Pos}.
47 Theorem 3: If all multisets selchars[keyword] are different, there are
48 nonnegative integers asso_values[c] such that all hash values
49 sum (asso_values[c] : c in selchars[keyword]) are different.
51 Based on these three facts, we find the hash function in three steps:
53 Step 1 (Finding good byte positions):
54 Find a set Pos, as small as possible, such that all tuples
55 (keyword[i] : i in Pos) are different.
57 Step 2 (Finding good alpha increments):
58 Find nonnegative integers alpha_inc[i], as many of them as possible being
59 zero, and the others being as small as possible, such that all multisets
60 {keyword[i] + alpha_inc[i] : i in Pos} are different.
62 Step 3 (Finding good asso_values):
63 Find asso_values[c] such that all hash (keyword) are different.
65 In other words, each step finds a projection that is injective on the
67 proj1 : String --> Map (Pos --> N)
68 proj2 : Map (Pos --> N) --> Map (Pos --> N) / S(Pos)
69 proj3 : Map (Pos --> N) / S(Pos) --> N
71 N denotes the set of nonnegative integers,
72 Map (A --> B) := Hom_Set (A, B) is the set of maps from A to B, and
73 S(Pos) is the symmetric group over Pos.
75 This was the theory for !_hash_includes_len; if _hash_includes_len, slight
77 proj1 : String --> Map (Pos --> N) x N
78 proj2 : Map (Pos --> N) x N --> Map (Pos --> N) / S(Pos) x N
79 proj3 : Map (Pos --> N) / S(Pos) x N --> N
81 For a case-insensitive hash function, the general form is
84 sum (asso_values[alpha_unify[keyword[i] + alpha_inc[i]]] : i in Pos)
87 where alpha_unify[c] is chosen so that an upper/lower case change in
88 keyword[i] doesn't change alpha_unify[keyword[i] + alpha_inc[i]].
89 *//*}}} THEORY -- END */
90 /*{{{ finding asso_values[] that fit
91 The idea is to choose the _asso_values[] one by one, in a way that
92 a choice that has been made never needs to be undone later. This
93 means that we split the work into several steps. Each step chooses
94 one or more _asso_values[c]. The result of choosing one or more
95 _asso_values[c] is that the partitioning of the keyword set gets
97 Look at this partitioning: After every step, the _asso_values[] of a
98 certain set C of characters are undetermined. (At the beginning, C
99 is the set of characters c with _occurrences[c] > 0. At the end, C
100 is empty.) To each keyword K, we associate the multiset of _selchars
101 for which the _asso_values[] are undetermined:
102 K --> K->_selchars intersect C.
103 Consider two keywords equivalent if their value under this mapping is
104 the same. This introduces an equivalence relation on the set of
105 keywords. The equivalence classes partition the keyword set. (At the
106 beginning, the partition is the finest possible: each K is an equivalence
107 class by itself, because all K have a different _selchars. At the end,
108 all K have been merged into a single equivalence class.)
109 The partition before a step is always a refinement of the partition
111 We choose the steps in such a way that the partition really becomes
112 broader at each step. (A step that only chooses an _asso_values[c]
113 without changing the partition is better merged with the previous step,
114 to avoid useless backtracking.) }}}*/
115 /*------------------------------------------------------------------------------------------------*/
118 struct EquivalenceClass
120 /* the keywords in this equivalence class */
121 struct Keyword_List
*keywords
;
122 struct Keyword_List
*keywords_last
;
123 /* the number of keywords in this equivalence class */
126 * the undetermined selected characters for the keywords in this equivalence class, as a
127 * canonically reordered multiset
129 u32
*undetermined_chars
;
130 u32 undetermined_chars_length
;
132 struct EquivalenceClass
*next
;
137 /* the characters whose values are being determined in this step */
141 * Exclusive upper bound for the _asso_values[c] of this step. A power
145 /* the characters whose values will be determined after this step */
147 /* the keyword set partition after this step */
148 struct EquivalenceClass
*partition
;
149 /* the expected number of iterations in this step */
155 /*}}} types -- END */
158 static bool equals(u32
*ptr1
, u32
*ptr2
, u32 len
)
171 static void delete_partition(struct EquivalenceClass
*partition
)
174 struct EquivalenceClass
*equclass
;
178 equclass
= partition
;
179 partition
= equclass
->next
;
180 delete_list(equclass
->keywords
);
184 static bool less_by_hash_value(struct Keyword
*kw1
, struct Keyword
*kw2
)
186 return kw1
->hash_value
< kw2
->hash_value
;
189 /*}}} local -- END */
190 /*------------------------------------------------------------------------------------------------*/
192 static struct Search
*schr_new(struct Keyword_List
*list
)
196 t
= calloc(1, sizeof(*t
));
198 t
->key_positions
= pos_new();
202 static void schr_del(struct Search
*t
)
204 ba_del(t
->collision_detector
);
208 struct Keyword_List
*ptr
;
210 fprintf(stderr
, "\ndumping occurrence and associated values tables\n");
213 if (i
>= t
->alpha_size
)
215 if (t
->occurrences
[i
])
216 fprintf (stderr
, "asso_values[%c] = %6d, occurrences[%c] = %6d\n", i
, t
->asso_values
[i
], i
, t
->occurrences
[i
]);
219 fprintf(stderr
, "end table dumping\n");
220 fprintf(stderr
, "\nDumping key list information:\ntotal non-static linked keywords = %d\ntotal keywords = %d\ntotal duplicates = %d\nmaximum key length = %d\n", t
->list_len
, t
->total_keys
, t
->total_duplicates
, t
->max_key_len
);
221 field_width
= t
->max_selchars_length
;
222 fprintf(stderr
, "\nList contents are:\n(hash value, key length, index, %*s, keyword):\n", field_width
, "selchars");
229 fprintf(stderr
, "%11d,%11d,%6d, ", ptr
->kw
->hash_value
, ptr
->kw
->allchars_length
, ptr
->kw
->final_index
);
230 if (field_width
> ptr
->kw
->selchars_length
)
231 fprintf(stderr
, "%*s", field_width
- ptr
->kw
->selchars_length
, "");
234 if (j
>= ptr
->kw
->selchars_length
)
236 putc(ptr
->kw
->selchars
[j
], stderr
);
239 fprintf(stderr
, ", %.*s\n", ptr
->kw
->allchars_length
, ptr
->kw
->allchars
);
242 fprintf(stderr
, "End dumping list.\n\n");
244 pos_del(t
->key_positions
);
245 free(t
->asso_values
);
246 free(t
->occurrences
);
247 free(t
->alpha_unify
);
251 /*{{{ schr_optimize */
252 static void schr_optimize(struct Search
*t
)
254 struct Keyword_List
*curr_ptr
;
261 /* Step 1: Finding good byte positions. */
262 schr_find_positions(t
);
264 /* Step 2: Finding good alpha increments. */
265 schr_find_alpha_inc(t
);
267 /* Step 3: Finding good asso_values. */
268 schr_find_good_asso_values(t
);
269 /* Make one final check, just to make sure nothing weird happened.... */
270 ba_clear(t
->collision_detector
);
273 struct Keyword
*curr
;
279 hashcode
= schr_compute_hash(t
, curr
);
280 if (ba_set_bit(t
->collision_detector
, hashcode
)) {
282 * This shouldn't happen. proj1, proj2, proj3 must have been computed to be
283 * injective on the given keyword set.
285 fprintf(stderr
, "\nInternal error, unexpected duplicate hash code\n");
287 fprintf(stderr
, "try options -m or -r, or use new key positions.\n\n");
289 fprintf(stderr
, "try options -m or -r.\n\n");
292 curr_ptr
= curr_ptr
->next
;
294 /* sorts the keyword list by hash value */
297 * Set unused asso_values[c] to max_hash_value + 1. This is not absolutely necessary, but
298 * speeds up the lookup function in many cases of lookup failure: no string comparison is
299 * needed once the hash value of a string is larger than the hash value of any keyword.
302 struct Keyword_List
*tmp
;
310 max_hash_value
= tmp
->kw
->hash_value
;
314 if (c
>= t
->alpha_size
)
316 if (t
->occurrences
[c
] == 0)
317 t
->asso_values
[c
] = max_hash_value
+ 1;
320 /* propagate unified asso_values */
321 if (t
->alpha_unify
) {
326 if (c
>= t
->alpha_size
)
328 if (t
->alpha_unify
[c
] != c
)
329 t
->asso_values
[c
] = t
->asso_values
[t
->alpha_unify
[c
]];
334 /*{{{ schr_prepare */
335 static void schr_prepare(struct Search
*t
)
337 struct Keyword_List
*tmp
;
347 /* compute the minimum and maximum keyword length */
348 t
->max_key_len
= S32_MIN
;
349 t
->min_key_len
= S32_MAX
;
357 if (t
->max_key_len
< kw
->allchars_length
)
358 t
->max_key_len
= kw
->allchars_length
;
359 if (t
->min_key_len
> kw
->allchars_length
)
360 t
->min_key_len
= kw
->allchars_length
;
364 * exit program if an empty string is used as keyword, since the comparison expressions
365 * don't work correctly for looking up an empty string
367 if (t
->min_key_len
== 0) {
368 fprintf (stderr
, "Empty input keyword is not allowed.\nTo recognize an empty input keyword, your code should check for\nlen == 0 before calling the gperf generated lookup function.\n");
371 /* exit program if the characters in the keywords are not in the required range */
372 if (OPTS(SEVENBIT
)) {
383 i
= kw
->allchars_length
;
388 fprintf(stderr
, "Option --seven-bit has been specified,\nbut keyword \"%.*s\" contains non-ASCII characters.\nTry removing option --seven-bit.\n", kw
->allchars_length
, kw
->allchars
);
397 /* determine whether the hash function shall include the length */
398 t
->hash_includes_len
= !(OPTS(NOLENGTH
) || (t
->min_key_len
== t
->max_key_len
));
400 /*{{{ schr_find_positions */
401 /* find good key positions */
402 static void schr_find_positions(struct Search
*t
)
406 struct Positions
*mandatory
;
407 struct Positions
*current
;
408 u32 current_duplicates_count
;
409 /* if the user gave the key positions, we use them */
410 if (OPTS(POSITIONS
)) {
411 pos_cpy(t
->key_positions
, options
->key_positions
);
414 /* compute preliminary alpha_unify table */
415 alpha_unify
= schr_compute_alpha_unify(t
);
417 /* 1. find positions that must occur in order to distinguish duplicates */
418 mandatory
= pos_new();
420 struct Keyword_List
*l1
;
425 struct Keyword_List
*l2
;
427 if (l1
== 0 || l1
->next
== 0)
438 * if keyword1 and keyword2 have the same length and differ
439 * in just one position, and it is not the last character,
440 * this position is mandatory
442 if (kw1
->allchars_length
== kw2
->allchars_length
) {
446 n
= kw1
->allchars_length
;
454 c1
= kw1
->allchars
[i
];
455 c2
= kw2
->allchars
[i
];
456 if (OPTS(UPPERLOWER
)) {
457 if (c1
>= 'A' && c1
<= 'Z')
459 if (c2
>= 'A' && c2
<= 'Z')
476 c1
= kw1
->allchars
[j
];
477 c2
= kw2
->allchars
[j
];
478 if (OPTS(UPPERLOWER
)) {
479 if (c1
>= 'A' && c1
<= 'Z')
481 if (c2
>= 'A' && c2
<= 'Z')
489 /* position i is mandatory */
490 if (!pos_contains(mandatory
, i
))
491 pos_add(mandatory
, i
);
500 /* 2. add positions, as long as this decreases the duplicates count */
501 imax
= (t
->max_key_len
- 1 < (s32
)POS_MAX_KEY_POS
- 1 ? t
->max_key_len
- 1
502 : (s32
)POS_MAX_KEY_POS
- 1);
504 pos_cpy(current
, mandatory
);
505 current_duplicates_count
= schr_count_duplicates_tuple_do(t
, current
, alpha_unify
);
507 struct Positions
*best
;
508 u32 best_duplicates_count
;
512 best_duplicates_count
= U32_MAX
;
517 if (!pos_contains(current
, i
)) {
518 struct Positions
*tryal
;
519 u32 try_duplicates_count
;
522 pos_cpy(tryal
, current
);
524 try_duplicates_count
= schr_count_duplicates_tuple_do(t
, tryal
,
527 * We prefer 'try' to 'best' if it produces less duplicates, or if
528 * it produces the same number of duplicates but with a more
529 * efficient hash function.
531 if (try_duplicates_count
< best_duplicates_count
532 || (try_duplicates_count
== best_duplicates_count
534 pos_cpy(best
, tryal
);
535 best_duplicates_count
= try_duplicates_count
;
541 /* stop adding positions when it gives no improvement */
542 if (best_duplicates_count
>= current_duplicates_count
)
544 pos_cpy(current
, best
);
546 current_duplicates_count
= best_duplicates_count
;
548 /* 3. remove positions, as long as this doesn't increase the duplicates count */
550 struct Positions
*best
;
551 u32 best_duplicates_count
;
555 best_duplicates_count
= U32_MAX
;
560 if (pos_contains(current
, i
) && !pos_contains(mandatory
, i
)) {
561 struct Positions
*tryal
;
562 u32 try_duplicates_count
;
565 pos_cpy(tryal
, current
);
566 pos_remove(tryal
, i
);
567 try_duplicates_count
= schr_count_duplicates_tuple_do(t
, tryal
,
570 * We prefer 'try' to 'best' if it produces less duplicates, or if
571 * it produces the same number of duplicates but with a more
572 * efficient hash function.
574 if (try_duplicates_count
< best_duplicates_count
575 || (try_duplicates_count
== best_duplicates_count
577 pos_cpy(best
, tryal
);
578 best_duplicates_count
= try_duplicates_count
;
583 /* stop removing positions when it gives no improvement */
584 if (best_duplicates_count
> current_duplicates_count
)
586 pos_cpy(current
, best
);
588 current_duplicates_count
= best_duplicates_count
;
590 /* 4. replace two positions by one, as long as this doesn't increase the duplicates count */
592 struct Positions
*best
;
593 u32 best_duplicates_count
;
597 best_duplicates_count
= U32_MAX
;
599 * Loop over all pairs { i1, i2 } of currently selected positions. W.l.o.g. we can
606 if (pos_contains(current
, i1
) && !pos_contains(mandatory
, i1
)) {
613 if (pos_contains(current
, i2
) && !pos_contains(mandatory
, i2
)) {
620 if (!pos_contains(current
, i3
)) {
621 struct Positions
*tryal
;
622 u32 try_duplicates_count
;
625 pos_cpy(tryal
, current
);
626 pos_remove(tryal
, i1
);
627 pos_remove(tryal
, i2
);
629 try_duplicates_count
= schr_count_duplicates_tuple_do(t
, tryal
, alpha_unify
);
631 * We prefer 'try' to 'best' if it produces less
632 * duplicates, or if it produces the same number
633 * of duplicates but with a more efficient hash
636 if (try_duplicates_count
< best_duplicates_count
637 || (try_duplicates_count
== best_duplicates_count
638 && (i1
== -1 || i2
== -1) && i3
>= 0)) {
639 pos_cpy(best
, tryal
);
640 best_duplicates_count
= try_duplicates_count
;
652 /* stop removing positions when it gives no improvement */
653 if (best_duplicates_count
> current_duplicates_count
)
655 pos_cpy(current
, best
);
656 current_duplicates_count
= best_duplicates_count
;
659 /* That's it. Hope it's good enough. */
660 pos_cpy(t
->key_positions
, current
);
662 struct PositionReverseIterator
*iter
;
666 /* Print the result. */
667 fprintf(stderr
, "\nComputed positions: ");
668 iter
= pos_reviterator(t
->key_positions
);
669 seen_lastchar
= false;
672 i
= posrevit_next(iter
);
673 if (i
== POSREVIT_EOS
)
676 fprintf(stderr
, ", ");
677 if (i
== POS_LASTCHAR
)
678 seen_lastchar
= true;
680 fprintf(stderr
, "%d", i
+ 1);
686 fprintf(stderr
, ", ");
687 fprintf(stderr
, "$");
689 fprintf(stderr
, "\n");
696 /*{{{ schr_compute_alpha_size */
697 /* computes the upper bound on the indices passed to asso_values[], assuming no alpha_increments */
698 static u32
schr_compute_alpha_size(void)
700 return (OPTS(SEVENBIT
) ? 128 : 256);
702 /*{{{ schr_compute_alpha_unify */
703 static u32
*schr_compute_alpha_unify(struct Search
*t
)
705 if (OPTS(UPPERLOWER
)) {
706 /* uppercase to lowercase mapping */
711 alpha_size
= schr_compute_alpha_size();
712 alpha_unify
= calloc(alpha_size
, sizeof(*alpha_unify
));
724 alpha_unify
[c
] = c
+ ('a' - 'A');
729 /* identity mapping */
732 /*{{{ schr_find_alpha_inc */
733 /* find good alpha_inc[] */
734 static void schr_find_alpha_inc(struct Search
*t
)
737 * The goal is to choose _alpha_inc[] such that it doesn't introduce artificial duplicates.
738 * In other words, the goal is: # proj2 (proj1 (K)) = # proj1 (K).
743 u32 current_duplicates_count
;
745 duplicates_goal
= schr_count_duplicates_tuple(t
);
746 current
= calloc(t
->max_key_len
, sizeof(*current
));
749 if (i
>= t
->max_key_len
)
754 current_duplicates_count
= schr_count_duplicates_multiset(t
, current
);
755 if (current_duplicates_count
> duplicates_goal
) {
756 /* look which _alpha_inc[i] we are free to increment */
762 struct PositionIterator
*iter
;
765 iter
= pos_iterator(t
->key_positions
, t
->max_key_len
);
769 key_pos
= positer_next(iter
);
770 if (key_pos
== POSITER_EOS
)
772 if (key_pos
!= POS_LASTCHAR
)
777 indices
= calloc(nindices
, sizeof(*indices
));
780 struct PositionIterator
*iter
;
783 iter
= pos_iterator(t
->key_positions
, t
->max_key_len
);
787 key_pos
= positer_next(iter
);
788 if (key_pos
== POSITER_EOS
)
790 if (key_pos
!= POS_LASTCHAR
) {
791 indices
[j
] = key_pos
;
795 if (!(j
== nindices
))
800 * Perform several rounds of searching for a good alpha increment. Each round
801 * reduces the number of artificial collisions by adding an increment in a single
804 best
= calloc(t
->max_key_len
, sizeof(*best
));
805 tryal
= calloc(t
->max_key_len
, sizeof(*best
));
808 /* An increment of 1 is not always enough. Try higher increments also. */
811 u32 best_duplicates_count
;
814 best_duplicates_count
= U32_MAX
;
817 u32 try_duplicates_count
;
821 memcpy(tryal
, current
, t
->max_key_len
* sizeof(u32
));
822 tryal
[indices
[j
]] += inc
;
823 try_duplicates_count
= schr_count_duplicates_multiset(t
,
825 /* we prefer 'try' to 'best' if it produces less duplicates */
826 if (try_duplicates_count
< best_duplicates_count
) {
827 memcpy(best
, tryal
, t
->max_key_len
* sizeof(u32
));
828 best_duplicates_count
= try_duplicates_count
;
832 /* stop this round when we got an improvement */
833 if (best_duplicates_count
< current_duplicates_count
) {
834 memcpy(current
, best
, t
->max_key_len
* sizeof(u32
));
835 current_duplicates_count
= best_duplicates_count
;
840 if (current_duplicates_count
<= duplicates_goal
)
849 /* print the result */
850 fprintf(stderr
, "\nComputed alpha increments: ");
857 if (current
[indices
[j
]] != 0) {
859 fprintf(stderr
, ", ");
860 fprintf(stderr
, "%u:+%u", indices
[j
] + 1, current
[indices
[j
]]);
864 fprintf(stderr
, "\n");
868 t
->alpha_inc
= current
;
869 t
->alpha_size
= schr_compute_alpha_size_with_inc(t
, t
->alpha_inc
);
870 t
->alpha_unify
= schr_compute_alpha_unify_with_inc(t
, t
->key_positions
, t
->alpha_inc
);
872 /*{{{ schr_count_duplicates_tuple_do */
874 * Count the duplicate keywords that occur with a given set of positions. In other words, it
875 * returns the difference # K - # proj1 (K) where K is the multiset of given keywords.
877 static u32
schr_count_duplicates_tuple_do(struct Search
*t
, struct Positions
*positions
,
882 * Run through the keyword list and count the duplicates incrementally. The result does not
883 * depend on the order of the keyword list, thanks to the formula above.
885 schr_init_selchars_tuple(t
, positions
, alpha_unify
);
888 struct Hash_Table
*representatives
;
889 struct Keyword_List
*tmp
;
891 representatives
= ht_new(t
->total_keys
, !t
->hash_includes_len
);
899 if (ht_insert(representatives
, kw
))
903 ht_del(representatives
);
905 schr_delete_selchars(t
);
908 /*{{{ schr_init_selchars_tuple */
909 static void schr_init_selchars_tuple(struct Search
*t
, struct Positions
*positions
,
912 struct Keyword_List
*tmp
;
918 kw_init_selchars_tuple(tmp
->kw
, positions
, alpha_unify
);
922 /*{{{ schr_delete_selchars */
923 static void schr_delete_selchars(struct Search
*t
)
925 struct Keyword_List
*tmp
;
931 kw_delete_selchars(tmp
->kw
);
935 /*{{{ schr_count_duplicates_tuple */
937 * Count the duplicate keywords that occur with the found set of positions. In other words, it
938 * returns the difference # K - # proj1 (K) where K is the multiset of given keywords.
940 static u32
schr_count_duplicates_tuple(struct Search
*t
)
945 alpha_unify
= schr_compute_alpha_unify(t
);
946 count
= schr_count_duplicates_tuple_do(t
, t
->key_positions
, alpha_unify
);
950 /*{{{ schr_count_duplicates_multiset */
952 * Count the duplicate keywords that occur with the given set of positions and a given alpha_inc[]
954 * In other words, it returns the difference
955 * # K - # proj2 (proj1 (K))
956 * where K is the multiset of given keywords.
958 static u32
schr_count_duplicates_multiset(struct Search
*t
, u32
*alpha_inc
)
961 * Run through the keyword list and count the duplicates incrementally. The result does not
962 * depend on the order of the keyword list, thanks to the formula above.
967 alpha_unify
= schr_compute_alpha_unify_with_inc(t
, t
->key_positions
, alpha_inc
);
968 schr_init_selchars_multiset(t
, t
->key_positions
, alpha_unify
, alpha_inc
);
971 struct Hash_Table
*representatives
;
972 struct Keyword_List
*tmp
;
974 representatives
= ht_new(t
->total_keys
, !t
->hash_includes_len
);
982 if (ht_insert(representatives
, kw
))
986 ht_del(representatives
);
988 schr_delete_selchars(t
);
992 /*{{{ schr_compute_alpha_unify_with_inc */
993 /* computes the unification rules between different asso_values[c] */
994 static u32
*schr_compute_alpha_unify_with_inc(struct Search
*t
, struct Positions
*positions
,
997 if (OPTS(UPPERLOWER
)) {
999 * Without alpha increments, we would simply unify
1000 * 'A' -> 'a', ..., 'Z' -> 'z'.
1001 * But when a keyword contains at position i a character c,
1002 * we have the constraint
1003 * asso_values[tolower(c) + alpha_inc[i]] ==
1004 * asso_values[toupper(c) + alpha_inc[i]].
1005 * This introduces a unification
1006 * toupper(c) + alpha_inc[i] -> tolower(c) + alpha_inc[i].
1007 * Note that this unification can extend outside the range of
1008 * ASCII letters! But still every unified character pair is at
1009 * a distance of 'a'-'A' = 32, or (after chained unification)
1010 * at a multiple of 32. So in the end the alpha_unify vector
1011 * has the form c -> c + 32 * f(c) where f(c) is a
1012 * nonnegative integer.
1017 struct Keyword_List
*tmp
;
1019 alpha_size
= schr_compute_alpha_size_with_inc(t
, alpha_inc
);
1020 alpha_unify
= calloc(alpha_size
, sizeof(*alpha_unify
));
1023 if (c
>= alpha_size
)
1031 struct PositionIterator
*iter
;
1037 iter
= pos_iterator(positions
, kw
->allchars_length
);
1041 i
= positer_next(iter
);
1042 if (i
== POSITER_EOS
)
1044 if (i
== POS_LASTCHAR
)
1045 c
= (u8
)(kw
->allchars
[kw
->allchars_length
- 1]);
1046 else if (i
< kw
->allchars_length
)
1047 c
= (u8
)(kw
->allchars
[i
]);
1050 if (c
>= 'A' && c
<= 'Z')
1052 if (c
>= 'a' && c
<= 'z') {
1057 if (i
!= POS_LASTCHAR
)
1059 /* unify c with c - ('a'-'A') */
1061 b
= c
- ('a' - 'A');
1064 if (a
< 0 || alpha_unify
[a
] != b
)
1076 /* identity mapping */
1079 /*{{{ schr_compute_alpha_size_with_inc */
1080 /* computes the upper bound on the indices passed to asso_values[] */
1081 static u32
schr_compute_alpha_size_with_inc(struct Search
*t
, u32
*alpha_inc
)
1089 if (i
>= t
->max_key_len
)
1091 if (max_alpha_inc
< alpha_inc
[i
])
1092 max_alpha_inc
= alpha_inc
[i
];
1095 return (OPTS(SEVENBIT
) ? 128 : 256) + max_alpha_inc
;
1097 /*{{{ schr_init_selchars_multiset */
1098 static void schr_init_selchars_multiset(struct Search
*t
, struct Positions
*positions
,
1099 u32
*alpha_unify
, u32
*alpha_inc
)
1101 struct Keyword_List
*tmp
;
1107 kw_init_selchars_multiset(tmp
->kw
, positions
, alpha_unify
, alpha_inc
);
1111 /*{{{ schr_find_good_asso_values */
1112 /* finds good _asso_values[] */
1113 static void schr_find_good_asso_values(struct Search
*t
)
1115 s32 asso_iterations
;
1116 struct Keyword_List
*saved_head
;
1117 s32 best_initial_asso_value
;
1119 s32
*best_asso_values
;
1120 s32 best_collisions
;
1121 s32 best_max_hash_value
;
1123 schr_prepare_asso_values(t
);
1125 /* search for good _asso_values[] */
1126 asso_iterations
= options
->asso_iterations
;
1127 if (asso_iterations
== 0) {
1128 schr_find_asso_values(t
);
1132 * Try different pairs of _initial_asso_value and _jump, in the
1138 * (4, 1) (2, 3) (0, 5)
1139 * (5, 1) (3, 3) (1, 5)
1142 saved_head
= t
->head
;
1143 best_initial_asso_value
= 0;
1145 best_asso_values
= calloc(t
->alpha_size
, sizeof(*best_asso_values
));
1146 best_collisions
= S32_MAX
;
1147 best_max_hash_value
= S32_MAX
;
1149 t
->initial_asso_value
= 0;
1154 struct Keyword_List
*ptr
;
1155 /* restore the keyword list in its original order */
1156 t
->head
= copy_list(saved_head
);
1157 /* find good _asso_values[] */
1158 schr_find_asso_values(t
);
1159 /* test whether it is the best solution so far */
1161 max_hash_value
= S32_MIN
;
1162 ba_clear(t
->collision_detector
);
1171 hashcode
= schr_compute_hash(t
, kw
);
1172 if (max_hash_value
< hashcode
)
1173 max_hash_value
= hashcode
;
1174 if (ba_set_bit(t
->collision_detector
, hashcode
))
1178 if (collisions
< best_collisions
|| (collisions
== best_collisions
1179 && max_hash_value
< best_max_hash_value
)) {
1180 memcpy(best_asso_values
, t
->asso_values
, t
->alpha_size
1181 * sizeof(*(t
->asso_values
)));
1182 best_collisions
= collisions
;
1183 best_max_hash_value
= max_hash_value
;
1185 /* delete the copied keyword list */
1186 delete_list(t
->head
);
1189 if (asso_iterations
== 0)
1191 /* prepare for next iteration */
1192 if (t
->initial_asso_value
>= 2) {
1193 t
->initial_asso_value
-= 2;
1196 t
->initial_asso_value
+= t
->jump
;
1200 t
->head
= saved_head
;
1201 /* install the best found asso_values */
1202 t
->initial_asso_value
= best_initial_asso_value
;
1203 t
->jump
= best_jump
;
1204 memcpy(t
->asso_values
, best_asso_values
, t
->alpha_size
* sizeof(*(t
->asso_values
)));
1205 free(best_asso_values
);
1207 /*{{{ schr_find_asso_values */
1208 static void schr_find_asso_values(struct Search
*t
)
1214 /* determine the steps, starting with the last one */
1222 undetermined
= calloc(t
->alpha_size
, sizeof(*undetermined
));
1225 if (c
>= t
->alpha_size
)
1227 undetermined
[c
] = false;
1230 determined
= calloc(t
->alpha_size
, sizeof(*determined
));
1233 if (c
>= t
->alpha_size
)
1235 determined
[c
] = true;
1239 /* compute the partition that needs to be refined */
1240 struct EquivalenceClass
*partition
;
1242 u32 chosen_possible_collisions
;
1248 partition
= schr_compute_partition(t
, undetermined
);
1250 * Determine the main character to be chosen in this step. Choosing such a
1251 * character c has the effect of splitting every equivalence class
1252 * (according the the frequency of occurrence of c). We choose the c with
1253 * the minimum number of possible collisions, so that characters which lead
1254 * to a large number of collisions get handled early during the search.
1258 u32 best_possible_collisions
;
1262 best_possible_collisions
= U32_MAX
;
1265 if (c
>= t
->alpha_size
)
1267 if (t
->occurrences
[c
] > 0 && determined
[c
]) {
1268 u32 possible_collisions
;
1270 possible_collisions
=
1271 schr_count_possible_collisions(t
,
1273 if (possible_collisions
1274 < best_possible_collisions
) {
1276 best_possible_collisions
=
1277 possible_collisions
;
1282 if (best_possible_collisions
== U32_MAX
) {
1284 * All c with occurrences[c] > 0 are undetermined. We are
1285 * are the starting situation and don't need any more step.
1287 delete_partition(partition
);
1291 chosen_possible_collisions
= best_possible_collisions
;
1293 /* we need one more step */
1294 step
= calloc(1, sizeof(*step
));
1296 step
->undetermined
= calloc(t
->alpha_size
,
1297 sizeof(*(step
->undetermined
)));
1298 memcpy(step
->undetermined
, undetermined
, t
->alpha_size
1299 * sizeof(*(step
->undetermined
)));
1300 step
->partition
= partition
;
1301 /* now determine how the equivalence classes will be before this step */
1302 undetermined
[chosen_c
] = true;
1303 partition
= schr_compute_partition(t
, undetermined
);
1305 * Now determine which other characters should be determined in this step,
1306 * because they will not change the equivalence classes at this point. It
1307 * is the set of all c which, for all equivalence classes, have the same
1308 * frequency of occurrence in every keyword of the equivalence class.
1312 if (c
>= t
->alpha_size
)
1314 if (t
->occurrences
[c
] > 0 && determined
[c
]
1315 && schr_unchanged_partition(t
, partition
, c
)) {
1316 undetermined
[c
] = true;
1317 determined
[c
] = false;
1321 /* main_c must be one of these */
1322 if (determined
[chosen_c
])
1324 /* now the set of changing characters of this step */
1328 if (c
>= t
->alpha_size
)
1330 if (undetermined
[c
] && !step
->undetermined
[c
])
1334 changing
= calloc(changing_count
, sizeof(*changing
));
1338 if (c
>= t
->alpha_size
)
1340 if (undetermined
[c
] && !step
->undetermined
[c
]) {
1341 changing
[changing_count
] = c
;
1346 step
->changing
= changing
;
1347 step
->changing_count
= changing_count
;
1348 step
->asso_value_max
= t
->asso_value_max
;
1349 step
->expected_lower
= exp((f64
)(chosen_possible_collisions
)
1350 / (f64
)(t
->max_hash_value
));
1351 step
->expected_upper
= exp((f64
)(chosen_possible_collisions
)
1352 / (f64
)(t
->asso_value_max
));
1353 delete_partition(partition
);
1368 struct EquivalenceClass
*cls
;
1373 fprintf(stderr
, "Step %u chooses _asso_values[", stepno
);
1376 if (i
>= step
->changing_count
)
1379 fprintf(stderr
, ",");
1380 fprintf(stderr
, "'%c'", step
->changing
[i
]);
1383 fprintf(stderr
, "], expected number of iterations between %g and %g.\n", step
->expected_lower
, step
->expected_upper
);
1384 fprintf(stderr
, "Keyword equivalence classes:\n");
1385 cls
= step
->partition
;
1387 struct Keyword_List
*tmp
;
1391 fprintf (stderr
, "\n");
1392 tmp
= cls
->keywords
;
1399 fprintf(stderr
, " %.*s\n", kw
->allchars_length
, kw
->allchars
);
1404 fprintf(stderr
, "\n");
1409 * Initialize _asso_values[]. (The value given here matters only for those c which occur in
1410 * all keywords with equal multiplicity.)
1412 memset(t
->asso_values
, 0, t
->alpha_size
* sizeof(*t
->asso_values
));
1425 /* initialize the asso_values[] */
1426 k
= step
->changing_count
;
1433 c
= step
->changing
[i
];
1434 t
->asso_values
[c
] = (t
->initial_asso_value
< 0 ? rand()
1435 : t
->initial_asso_value
) & (step
->asso_value_max
- 1);
1439 iter
= calloc(k
, sizeof(*iter
));
1440 ii
= (t
->jump
!= 0 ? k
- 1 : 0);
1443 struct EquivalenceClass
*cls
;
1445 * test whether these asso_values[] lead to collisions among the equivalence
1446 * classes that should be collision-free
1448 has_collision
= false;
1449 cls
= step
->partition
;
1451 struct Keyword_List
*ptr
;
1455 /* Iteration Number array is a win, O(1) initialization time! */
1456 ba_clear(t
->collision_detector
);
1457 ptr
= cls
->keywords
;
1466 * compute the new hash code for the keyword, leaving apart
1467 * the yet undetermined asso_values[].
1474 sum
= t
->hash_includes_len
? kw
->allchars_length
1477 i
= kw
->selchars_length
;
1481 if (!step
->undetermined
[*p
])
1482 sum
+= t
->asso_values
[*p
];
1489 * See whether it collides with another keyword's hash code,
1490 * from the same equivalence class
1492 if (ba_set_bit(t
->collision_detector
, hashcode
)) {
1493 has_collision
= true;
1499 * don't need to continue looking at the other equivalence classes
1500 * if we already have found a collision
1509 /* try other asso_values[] */
1512 * The way we try various values for
1513 * asso_values[step->_changing[0],...step->_changing[k-1]]
1515 * for (bound = 0,1,...)
1516 * for (ii = 0,...,k-1)
1518 * iter[0..ii-1] := values <= bound
1519 * iter[ii+1..k-1] := values < bound
1521 * asso_values[step->_changing[i]] =
1522 * _initial_asso_value + iter[i] * _jump.
1523 * This makes it more likely to find small asso_values[].
1535 c
= step
->changing
[i
];
1537 t
->asso_values
[c
] = (t
->asso_values
[c
] + t
->jump
)
1538 & (step
->asso_value_max
- 1);
1539 if (iter
[i
] <= bound
)
1541 t
->asso_values
[c
] = (t
->asso_values
[c
] - iter
[i
] * t
->jump
)
1542 & (step
->asso_value_max
- 1);
1552 c
= step
->changing
[i
];
1554 t
->asso_values
[c
] = (t
->asso_values
[c
] + t
->jump
)
1555 & (step
->asso_value_max
- 1);
1556 if (iter
[i
] < bound
)
1558 t
->asso_values
[c
] = (t
->asso_values
[c
] - iter
[i
] * t
->jump
)
1559 & (step
->asso_value_max
- 1);
1563 /* switch from one ii to the next */
1567 c
= step
->changing
[ii
];
1568 t
->asso_values
[c
] = (t
->asso_values
[c
] - bound
* t
->jump
)
1569 & (step
->asso_value_max
- 1);
1572 /* here all iter[i] == 0 */
1577 if (bound
== step
->asso_value_max
) {
1579 * Out of search space! We can either backtrack, or
1580 * increase the available search space of this step.
1581 * It seems simpler to choose the latter solution.
1583 step
->asso_value_max
= 2 * step
->asso_value_max
;
1584 if (step
->asso_value_max
> t
->asso_value_max
) {
1585 t
->asso_value_max
= step
->asso_value_max
;
1586 /* reinitialize _max_hash_value */
1587 t
->max_hash_value
= (t
->hash_includes_len
? t
->max_key_len
: 0)
1588 + (t
->asso_value_max
- 1) * t
->max_selchars_length
;
1589 /* reinitialize _collision_detector */
1590 free(t
->collision_detector
);
1591 t
->collision_detector
= ba_new(
1592 t
->max_hash_value
+ 1);
1599 c
= step
->changing
[ii
];
1601 t
->asso_values
[c
] = (t
->asso_values
[c
] + bound
* t
->jump
)
1602 & (step
->asso_value_max
- 1);
1609 c
= step
->changing
[ii
];
1610 t
->asso_values
[c
] = (t
->asso_values
[c
] + rand())
1611 & (step
->asso_value_max
- 1);
1612 /* next time, change the next c */
1622 fprintf(stderr
, "Step %u chose _asso_values[", stepno
);
1625 if (i
>= step
->changing_count
)
1628 fprintf(stderr
, ",");
1629 fprintf(stderr
, "'%c'", step
->changing
[i
]);
1632 fprintf (stderr
, "] in %u iterations.\n", iterations
);
1636 /* free allocated memory */
1644 free(step
->changing
);
1645 free(step
->undetermined
);
1646 delete_partition(step
->partition
);
1650 /*{{{ chr_count_possible_collisions */
1652 * compute the possible number of collisions when asso_values[c] is chosen, leading to the given
1655 static u32
schr_count_possible_collisions(struct Search
*t
,
1656 struct EquivalenceClass
*partition
, u32 c
)
1659 * Every equivalence class p is split according to the frequency of occurrence of c, leading
1660 * to equivalence classes p1, p2, ...
1661 * This leads to (|p|^2 - |p1|^2 - |p2|^2 - ...)/2 possible collisions.
1662 * Return the sum of this expression over all equivalence classes.
1666 u32
*split_cardinalities
;
1667 struct EquivalenceClass
*cls
;
1670 m
= t
->max_selchars_length
;
1671 split_cardinalities
= calloc(m
+ 1, sizeof(*split_cardinalities
));
1675 struct Keyword_List
*tmp
;
1679 memset(split_cardinalities
, 0, (m
+ 1) * sizeof(*split_cardinalities
));
1680 tmp
= cls
->keywords
;
1692 if (i
>= kw
->selchars_length
)
1694 if (kw
->selchars
[i
] == c
)
1698 ++(split_cardinalities
[count
]);
1701 sum
+= cls
->cardinality
* cls
->cardinality
;
1706 sum
-= split_cardinalities
[i
] * split_cardinalities
[i
];
1711 free(split_cardinalities
);
1715 /*{{{ schr_compute_partition */
1716 static struct EquivalenceClass
*schr_compute_partition(struct Search
*t
, bool *undetermined
)
1718 struct EquivalenceClass
*partition
;
1719 struct EquivalenceClass
*partition_last
;
1720 struct Keyword_List
*tmp
;
1721 struct EquivalenceClass
*cls
;
1728 u32
*undetermined_chars
;
1729 u32 undetermined_chars_length
;
1731 struct EquivalenceClass
*equclass
;
1732 struct Keyword_List
*cons
;
1737 undetermined_chars
= calloc(kw
->selchars_length
, sizeof(*undetermined_chars
));
1738 undetermined_chars_length
= 0;
1741 if (i
>= kw
->selchars_length
)
1743 if (undetermined
[kw
->selchars
[i
]] != 0) {
1744 undetermined_chars
[undetermined_chars_length
] = kw
->selchars
[i
];
1745 ++undetermined_chars_length
;
1749 /* look up the equivalence class to which this keyword belongs */
1750 equclass
= partition
;
1754 if (equclass
->undetermined_chars_length
== undetermined_chars_length
1755 && equals(equclass
->undetermined_chars
, undetermined_chars
,
1756 undetermined_chars_length
))
1758 equclass
= equclass
->next
;
1760 if (equclass
== 0) {
1761 equclass
= calloc(1, sizeof(*equclass
));
1762 equclass
->undetermined_chars
= undetermined_chars
;
1763 equclass
->undetermined_chars_length
= undetermined_chars_length
;
1765 partition_last
->next
= equclass
;
1767 partition
= equclass
;
1768 partition_last
= equclass
;
1770 free(undetermined_chars
);
1771 /* add the keyword to the equivalence class */
1773 if (equclass
->keywords
!= 0)
1774 equclass
->keywords_last
->next
= cons
;
1776 equclass
->keywords
= cons
;
1777 equclass
->keywords_last
= cons
;
1778 ++(equclass
->cardinality
);
1781 /* Free some of the allocated memory. The caller doesn't need it. */
1786 free(cls
->undetermined_chars
);
1791 /*{{{ schr_prepare_asso_values */
1792 /* initializes the asso_values[] related parameters */
1793 static void schr_prepare_asso_values(struct Search
*t
)
1795 struct Keyword_List
*tmp
;
1796 struct PositionIterator
*iter
;
1797 s32 non_linked_length
;
1800 /* initialize each keyword's _selchars array */
1801 schr_init_selchars_multiset(t
, t
->key_positions
, t
->alpha_unify
, t
->alpha_inc
);
1802 /* compute the maximum _selchars_length over all keywords */
1803 iter
= pos_iterator(t
->key_positions
, t
->max_key_len
);
1804 t
->max_selchars_length
= positer_remaining(iter
);
1807 * Check for duplicates, i.e. keywords with the same _selchars array (and - if
1808 * _hash_includes_len - also the same length). We deal with these by building an
1809 * equivalence class, so that only 1 keyword is representative of the entire collection.
1810 * Only this representative remains in the keyword list; the others are accessible through
1811 * the _duplicate_link chain, starting at the representative. This *greatly* simplifies
1812 * processing during later stages of the program. Set _total_duplicates and _list_len =
1813 * _total_keys - _total_duplicates.
1816 struct Hash_Table
*representatives
;
1817 struct Keyword_List
*prev
;
1819 t
->list_len
= t
->total_keys
;
1820 t
->total_duplicates
= 0;
1822 representatives
= ht_new(t
->list_len
, !t
->hash_includes_len
);
1823 prev
= 0; /* list node before temp */
1827 struct Keyword
*other_kw
;
1828 struct Keyword_List
*garbage
;
1833 other_kw
= ht_insert(representatives
, kw
);
1835 if (other_kw
!= 0) {
1836 ++(t
->total_duplicates
);
1838 /* remove keyword from the main list */
1839 prev
->next
= tmp
->next
;
1841 /* and insert it on other_keyword's duplicate list */
1842 kw
->duplicate_link
= other_kw
->duplicate_link
;
1843 other_kw
->duplicate_link
= kw
;
1844 /* complain if user hasn't enabled the duplicate option */
1845 if (!OPTS(DUP
) || OPTS(DEBUG
)) {
1848 fprintf(stderr
, "Key link: \"%.*s\" = \"%.*s\", with key set \"", kw
->allchars_length
, kw
->allchars
, other_kw
->allchars_length
, other_kw
->allchars
);
1851 if (j
>= kw
->selchars_length
)
1853 putc(kw
->selchars
[j
], stderr
);
1856 fprintf(stderr
, "\".\n");
1859 kw
->duplicate_link
= 0;
1867 ht_dump(representatives
);
1868 ht_del(representatives
);
1871 * Exit program if duplicates exists and option[DUP] not set, since we don't want to
1872 * continue in this case. (We don't want to turn on option[DUP] implicitly, because the
1873 * generated code is usually much slower.
1875 if (t
->total_duplicates
!= 0) {
1877 fprintf(stderr
, "%d input keys have identical hash values, examine output carefully...\n", t
->total_duplicates
);
1879 fprintf(stderr
, "%d input keys have identical hash values,\n", t
->total_duplicates
);
1880 if (OPTS(POSITIONS
))
1881 fprintf(stderr
, "try different key positions or use option -D.\n");
1883 fprintf(stderr
, "use option -D.\n");
1887 /* compute the occurrences of each character in the alphabet */
1888 t
->occurrences
= calloc(t
->alpha_size
, sizeof(*(t
->occurrences
)));
1899 count
= kw
->selchars_length
;
1903 ++(t
->occurrences
[*ptr
]);
1909 /* memory allocation */
1910 t
->asso_values
= calloc(t
->alpha_size
, sizeof(*(t
->asso_values
)));
1911 non_linked_length
= t
->list_len
;
1912 asso_value_max
= (u32
)(non_linked_length
* options
->size_multiple
);
1914 * Round up to the next power of two. This makes it easy to ensure an _asso_value[c] is >= 0
1915 * and < asso_value_max. Also, the jump value being odd, it guarantees that
1916 * Search::try_asso_value() will iterate through different values for _asso_value[c].
1918 if (asso_value_max
== 0)
1920 asso_value_max
|= asso_value_max
>> 1;
1921 asso_value_max
|= asso_value_max
>> 2;
1922 asso_value_max
|= asso_value_max
>> 4;
1923 asso_value_max
|= asso_value_max
>> 8;
1924 asso_value_max
|= asso_value_max
>> 16;
1926 t
->asso_value_max
= asso_value_max
;
1928 * given the bound for _asso_values[c], we have a bound for the possible hash values, as
1929 * computed in compute_hash()
1931 t
->max_hash_value
= (t
->hash_includes_len
? t
->max_key_len
: 0) + (t
->asso_value_max
- 1)
1932 * t
->max_selchars_length
;
1933 /* allocate a sparse bit vector for detection of collisions of hash values */
1934 t
->collision_detector
= ba_new(t
->max_hash_value
+ 1);
1939 fprintf (stderr
, "total non-linked keys = %d\nmaximum associated value is %d\nmaximum size of generated hash table is %d\n", non_linked_length
, asso_value_max
, t
->max_hash_value
);
1942 struct Keyword_List
*tmp
;
1951 if (field_width
< kw
->selchars_length
)
1952 field_width
= kw
->selchars_length
;
1956 fprintf (stderr
, "\ndumping the keyword list without duplicates\n");
1957 fprintf (stderr
, "keyword #, %*s, keyword\n", field_width
, "keysig");
1968 fprintf(stderr
, "%9d, ", i
);
1969 if (field_width
> kw
->selchars_length
)
1970 fprintf(stderr
, "%*s", field_width
- kw
->selchars_length
, "");
1973 if (j
>= kw
->selchars_length
)
1975 putc(kw
->selchars
[j
], stderr
);
1978 fprintf(stderr
, ", %.*s\n", kw
->allchars_length
, kw
->allchars
);
1981 fprintf(stderr
, "\nend of keyword list\n\n");
1983 if (OPTS(RANDOM
) || options
->jump
== 0)
1984 srand((long)time(0));
1985 t
->initial_asso_value
= (OPTS(RANDOM
) ? -1 : options
->initial_asso_value
);
1986 t
->jump
= options
->jump
;
1988 /*{{{ schr_unchanged_partition */
1989 /* Test whether adding c to the undetermined characters changes the given partition. */
1990 static bool schr_unchanged_partition(struct Search
*t
, struct EquivalenceClass
*partition
, u32 c
)
1992 struct EquivalenceClass
*cls
;
1997 struct Keyword_List
*tmp
;
2001 first_count
= U32_MAX
;
2002 tmp
= cls
->keywords
;
2014 if (i
>= kw
->selchars_length
)
2016 if (kw
->selchars
[i
] == c
)
2020 if (tmp
== cls
->keywords
)
2021 first_count
= count
;
2022 else if (count
!= first_count
)
2023 /* c would split this equivalence class */
2032 /*{{{ schr_compute_hash */
2034 * computes a keyword's hash value, relative to the current _asso_values[], and stores it in
2035 * keyword->_hash_value
2037 static s32
schr_compute_hash(struct Search
*t
, struct Keyword
*kw
)
2043 sum
= t
->hash_includes_len
? kw
->allchars_length
: 0;
2045 i
= kw
->selchars_length
;
2049 sum
+= t
->asso_values
[*p
];
2053 kw
->hash_value
= sum
;
2057 static void schr_sort(struct Search
*t
)
2059 t
->head
= mergesort_list(t
->head
, less_by_hash_value
);
2062 /*------------------------------------------------------------------------------------------------*/
2064 #include "namespace/globals.h"
2065 #include "namespace/search.h"
2066 #include "namespace/keyword.h"
2067 #include "namespace/keyword_list.h"
2068 #include "namespace/options.h"
2069 #include "namespace/positions.h"
2070 #include "namespace/hash-table.h"
2071 #include "namespace/bool-array.h"
2072 #include "namespace/search.c"
2074 /*------------------------------------------------------------------------------------------------*/