1 //===- StringMatcher.cpp - Generate a matcher for input strings -----------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the StringMatcher class.
12 //===----------------------------------------------------------------------===//
14 #include "StringMatcher.h"
15 #include "llvm/Support/raw_ostream.h"
19 /// FindFirstNonCommonLetter - Find the first character in the keys of the
20 /// string pairs that is not shared across the whole set of strings. All
21 /// strings are assumed to have the same length.
23 FindFirstNonCommonLetter(const std::vector
<const
24 StringMatcher::StringPair
*> &Matches
) {
25 assert(!Matches
.empty());
26 for (unsigned i
= 0, e
= Matches
[0]->first
.size(); i
!= e
; ++i
) {
27 // Check to see if letter i is the same across the set.
28 char Letter
= Matches
[0]->first
[i
];
30 for (unsigned str
= 0, e
= Matches
.size(); str
!= e
; ++str
)
31 if (Matches
[str
]->first
[i
] != Letter
)
35 return Matches
[0]->first
.size();
38 /// EmitStringMatcherForChar - Given a set of strings that are known to be the
39 /// same length and whose characters leading up to CharNo are the same, emit
40 /// code to verify that CharNo and later are the same.
42 /// \return - True if control can leave the emitted code fragment.
44 EmitStringMatcherForChar(const std::vector
<const StringPair
*> &Matches
,
45 unsigned CharNo
, unsigned IndentCount
) const {
46 assert(!Matches
.empty() && "Must have at least one string to match!");
47 std::string
Indent(IndentCount
*2+4, ' ');
49 // If we have verified that the entire string matches, we're done: output the
51 if (CharNo
== Matches
[0]->first
.size()) {
52 assert(Matches
.size() == 1 && "Had duplicate keys to match on");
54 // If the to-execute code has \n's in it, indent each subsequent line.
55 StringRef Code
= Matches
[0]->second
;
57 std::pair
<StringRef
, StringRef
> Split
= Code
.split('\n');
58 OS
<< Indent
<< Split
.first
<< "\t // \"" << Matches
[0]->first
<< "\"\n";
61 while (!Code
.empty()) {
62 Split
= Code
.split('\n');
63 OS
<< Indent
<< Split
.first
<< "\n";
69 // Bucket the matches by the character we are comparing.
70 std::map
<char, std::vector
<const StringPair
*> > MatchesByLetter
;
72 for (unsigned i
= 0, e
= Matches
.size(); i
!= e
; ++i
)
73 MatchesByLetter
[Matches
[i
]->first
[CharNo
]].push_back(Matches
[i
]);
76 // If we have exactly one bucket to match, see how many characters are common
77 // across the whole set and match all of them at once.
78 if (MatchesByLetter
.size() == 1) {
79 unsigned FirstNonCommonLetter
= FindFirstNonCommonLetter(Matches
);
80 unsigned NumChars
= FirstNonCommonLetter
-CharNo
;
82 // Emit code to break out if the prefix doesn't match.
84 // Do the comparison with if (Str[1] != 'f')
85 // FIXME: Need to escape general characters.
86 OS
<< Indent
<< "if (" << StrVariableName
<< "[" << CharNo
<< "] != '"
87 << Matches
[0]->first
[CharNo
] << "')\n";
88 OS
<< Indent
<< " break;\n";
90 // Do the comparison with if (Str.substr(1, 3) != "foo").
91 // FIXME: Need to escape general strings.
92 OS
<< Indent
<< "if (" << StrVariableName
<< ".substr(" << CharNo
<< ", "
93 << NumChars
<< ") != \"";
94 OS
<< Matches
[0]->first
.substr(CharNo
, NumChars
) << "\")\n";
95 OS
<< Indent
<< " break;\n";
98 return EmitStringMatcherForChar(Matches
, FirstNonCommonLetter
, IndentCount
);
101 // Otherwise, we have multiple possible things, emit a switch on the
103 OS
<< Indent
<< "switch (" << StrVariableName
<< "[" << CharNo
<< "]) {\n";
104 OS
<< Indent
<< "default: break;\n";
106 for (std::map
<char, std::vector
<const StringPair
*> >::iterator LI
=
107 MatchesByLetter
.begin(), E
= MatchesByLetter
.end(); LI
!= E
; ++LI
) {
108 // TODO: escape hard stuff (like \n) if we ever care about it.
109 OS
<< Indent
<< "case '" << LI
->first
<< "':\t // "
110 << LI
->second
.size() << " string";
111 if (LI
->second
.size() != 1) OS
<< 's';
112 OS
<< " to match.\n";
113 if (EmitStringMatcherForChar(LI
->second
, CharNo
+1, IndentCount
+1))
114 OS
<< Indent
<< " break;\n";
117 OS
<< Indent
<< "}\n";
122 /// Emit - Top level entry point.
124 void StringMatcher::Emit(unsigned Indent
) const {
125 // If nothing to match, just fall through.
126 if (Matches
.empty()) return;
128 // First level categorization: group strings by length.
129 std::map
<unsigned, std::vector
<const StringPair
*> > MatchesByLength
;
131 for (unsigned i
= 0, e
= Matches
.size(); i
!= e
; ++i
)
132 MatchesByLength
[Matches
[i
].first
.size()].push_back(&Matches
[i
]);
134 // Output a switch statement on length and categorize the elements within each
136 OS
.indent(Indent
*2+2) << "switch (" << StrVariableName
<< ".size()) {\n";
137 OS
.indent(Indent
*2+2) << "default: break;\n";
139 for (std::map
<unsigned, std::vector
<const StringPair
*> >::iterator LI
=
140 MatchesByLength
.begin(), E
= MatchesByLength
.end(); LI
!= E
; ++LI
) {
141 OS
.indent(Indent
*2+2) << "case " << LI
->first
<< ":\t // "
143 << " string" << (LI
->second
.size() == 1 ? "" : "s") << " to match.\n";
144 if (EmitStringMatcherForChar(LI
->second
, 0, Indent
))
145 OS
.indent(Indent
*2+4) << "break;\n";
148 OS
.indent(Indent
*2+2) << "}\n";