1 ///////////////////////////////////////////////////////////////////////////////
2 // Indexing optimizations for pattern matching.
4 // The following indexing schemes is largely inspired by the work of
5 // Sekar, Ramesh and Ramakrishnan in ``Adaptive Pattern Matching.''
6 // Techreport, SUNY at Stony Brook (year 91+ but unknown).
8 // Intuitively, an index is the next position of a pattern that must be
9 // inspected to determine a match(Levy and Huet). Given a set of
10 // prioritized patterns, at least an index must exist at each position.
11 // The algorithm of SRR attempts to locate the index at each position
12 // when constructing the the DFA-like matching automata.
14 // In the previous algorithm I've implemented, merging of matching trees at
15 // identical positions are performed and the resulting automaton guarantees
16 // that the same position is only examined once. This is an improvement
17 // over Wadler's algorithm, which may examine the same position more than
18 // once in a match. However, we may examine a position even if it is not
27 // We exam position 1, 2, 3 in sequence in term f even though it is
28 // best to exam position 2 first.
30 // I think my algorithm is similar to the one by Graf. In any case,
31 // My, Graf's and Wadler's algorithms all use a fixed traversal order
32 // (usually left-to-right). However, the intuition is that in many instances
33 // it is best if the traversal order *adapts* itself according to the
34 // pattern. This is the essential idea of SRR's algorithm.
36 // Sekar et al. have shown that constructing a DFA-like matching
37 // automata can be exponential in time and space. They have
38 // developed a polynomial time algorithm for computing indices for untyped
39 // terms. Unfortunately, in our typed case the time complexity of index
40 // computation is co-NP.
42 // So we'll just use a few heuristics to compute something close to
43 // the notion of an index, and cross our fingers and hope for the best.
44 ///////////////////////////////////////////////////////////////////////////////
47 #include "matchcom.ph"
50 ///////////////////////////////////////////////////////////////////////////////
52 // Hash function on position list
54 ///////////////////////////////////////////////////////////////////////////////
55 unsigned int pos_hash (HashTable::Key p)
59 { POSzero: { return h; }
60 | POSinfinity: { return h + 83; }
61 | POSint(i,p): { h += i; pos = p; }
62 | POSlabel(l,p): { h += (unsigned int)l; pos = p; }
63 | POSadaptive(i,_,p): { h += i + 437; pos = p; }
67 ///////////////////////////////////////////////////////////////////////////////
69 // Checks if a pattern is refutable.
71 ///////////////////////////////////////////////////////////////////////////////
72 Bool is_refutable(Pat pat)
74 { NOpat || WILDpat _ || IDpat(_,_,NOexp): { return false; }
75 | IDpat(_,_,! NOexp) || ASpat(_,_,_,! NOexp)
76 || LITERALpat _ || LEXEMEpat _: { return true; }
77 | ASpat(_,p,_,_): { pat = p; }
78 | TYPEDpat(p,_): { pat = p; }
79 | MARKEDpat(_,p): { pat = p; }
80 | LOGICALpat (_,a,b): { return is_refutable(a) || is_refutable(b); }
81 | TUPLEpat ps: { return is_refutable(ps); }
82 | ARRAYpat (ps,_): { return is_refutable(ps); }
83 | VECTORpat { elements = ps, len = p ... }:
84 { return is_refutable(p) || is_refutable(ps); }
85 | RECORDpat (ps,_): { return is_refutable(ps); }
86 | CONTEXTpat(_,p): { return is_refutable(p); }
89 TYCONty(DATATYPEtycon{ unit, arg, qualifiers ... },_)
92 { return ((unit + arg > 1) || (qualifiers & QUALextensible)); }
93 | APPpat(a,b): { return is_refutable(a) || is_refutable(b); }
94 | LISTpat{cons,nil,head=ps,tail=p}:
95 { if (is_refutable(ps)) return true;
98 | _: { bug("is_refutable()"); }
102 ///////////////////////////////////////////////////////////////////////////////
104 // Is a pattern list refutable?
106 ///////////////////////////////////////////////////////////////////////////////
107 Bool is_refutable(Pats pats)
108 { for_each (Pat, p, pats) if (is_refutable(p)) return true;
112 ///////////////////////////////////////////////////////////////////////////////
114 // Is a labeled pattern list refutable?
116 ///////////////////////////////////////////////////////////////////////////////
117 Bool is_refutable(LabPats pats)
118 { for_each (LabPat, p, pats) if (is_refutable(p.pat)) return true;
122 ///////////////////////////////////////////////////////////////////////////////
124 // Method to compute the indices.
126 ///////////////////////////////////////////////////////////////////////////////
127 void indexing(int priority, Pat pat, Pos pos, HashTable& index_map)
129 { ASpat(_,p,_,_): { pat = p; }
130 | TYPEDpat(p,_): { pat = p; }
131 | MARKEDpat(_,p): { pat = p; }
132 | CONTEXTpat(_,p): { pat = p; }
133 | APPpat(CONSpat(ONEcons {
134 tag, alg_ty = TYCONty(DATATYPEtycon { unit ... }, _)
135 ... }),p as ! NOpat):
136 { indexing(priority, p, POSint(tag + unit, pos), index_map); return; }
137 | TUPLEpat ps: { indexing(priority, ps, pos, index_map); return; }
138 | ARRAYpat (ps,_): { indexing(priority, ps, pos, index_map); return; }
139 | VECTORpat { elements = ps, len = p ...}:
140 { indexing(priority, #[ p ... ps ], pos, index_map); return; }
142 { for_each(LabPat, p, ps)
143 indexing(priority, p.pat, POSlabel(p.label,pos), index_map);
146 | LOGICALpat(_,a,b): { indexing(priority,a,pos,index_map); pat = b; }
150 ///////////////////////////////////////////////////////////////////////////////
152 // Method to compute the indices.
154 ///////////////////////////////////////////////////////////////////////////////
155 void indexing(int priority, Pats pats, Pos pos, HashTable& index_map)
160 // initialize the pattern array
161 { for_each (Pat, p, pats) Ps[i++] = p; }
163 if (n <= 0 || n > 32) return;
165 // compute the set of index positions for this pattern list
166 unsigned long indices = 0;
167 for (i = 0; i < n; i++)
168 if (is_refutable(Ps[i])) indices |= 1 << i;
170 // Locate the position ranking
171 HashTable::Entry * e = index_map.lookup(pos);
174 // Heuristic to update the ranks given new index and priority
176 ranks = (int*)index_map.value(e);
179 // Initialize the new ranking array.
180 // Put all used positions in front and
181 // the rest the the back.
182 ranks = (int*)mem_pool[n * sizeof(int)];
184 for (i = 0; i < n; i++) if (indices & (1 << i)) ranks[j++] = i;
185 for (i = 0; i < n; i++) if (! (indices & (1 << i))) ranks[j++] = i;
186 index_map.insert(pos, ranks);
189 // Recursive on subcomponents, and insert an adaptive rank.
190 for (i = 0; i < n; i++)
191 indexing(priority, Ps[i], POSadaptive(i,ranks,pos), index_map);
194 ///////////////////////////////////////////////////////////////////////////////
196 // Method to compute the indices information for a set of pattern matching
199 ///////////////////////////////////////////////////////////////////////////////
200 void indexing (MatchRules rules, HashTable& index_map)
202 for_each (MatchRule, r, rules)
204 { MATCHrule(_, pat, _, _, _):
205 { indexing(priority, pat, POSzero, index_map);