1 //===- lib/DebugInfo/Symbolize/Markup.cpp ------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file defines the log symbolizer markup data model and parser.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/DebugInfo/Symbolize/Markup.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/ADT/StringExtras.h"
22 // Matches the following:
25 // "\033[30m" -- "\033[37m"
26 static const char SGRSyntaxStr
[] = "\033\\[([0-1]|3[0-7])m";
28 MarkupParser::MarkupParser(StringSet
<> MultilineTags
)
29 : MultilineTags(std::move(MultilineTags
)), SGRSyntax(SGRSyntaxStr
) {}
31 static StringRef
takeTo(StringRef Str
, StringRef::iterator Pos
) {
32 return Str
.take_front(Pos
- Str
.begin());
34 static void advanceTo(StringRef
&Str
, StringRef::iterator Pos
) {
35 Str
= Str
.drop_front(Pos
- Str
.begin());
38 void MarkupParser::parseLine(StringRef Line
) {
41 FinishedMultiline
.clear();
45 std::optional
<MarkupNode
> MarkupParser::nextNode() {
46 // Pull something out of the buffer if possible.
47 if (!Buffer
.empty()) {
48 if (NextIdx
< Buffer
.size())
49 return std::move(Buffer
[NextIdx
++]);
54 // The buffer is empty, so parse the next bit of the line.
59 if (!InProgressMultiline
.empty()) {
60 if (std::optional
<StringRef
> MultilineEnd
= parseMultiLineEnd(Line
)) {
61 llvm::append_range(InProgressMultiline
, *MultilineEnd
);
62 assert(FinishedMultiline
.empty() &&
63 "At most one multi-line element can be finished at a time.");
64 FinishedMultiline
.swap(InProgressMultiline
);
65 // Parse the multi-line element as if it were contiguous.
66 advanceTo(Line
, MultilineEnd
->end());
67 return *parseElement(FinishedMultiline
);
70 // The whole line is part of the multi-line element.
71 llvm::append_range(InProgressMultiline
, Line
);
72 Line
= Line
.drop_front(Line
.size());
76 // Find the first valid markup element, if any.
77 if (std::optional
<MarkupNode
> Element
= parseElement(Line
)) {
78 parseTextOutsideMarkup(takeTo(Line
, Element
->Text
.begin()));
79 Buffer
.push_back(std::move(*Element
));
80 advanceTo(Line
, Element
->Text
.end());
84 // Since there were no valid elements remaining, see if the line opens a
85 // multi-line element.
86 if (std::optional
<StringRef
> MultilineBegin
= parseMultiLineBegin(Line
)) {
87 // Emit any text before the element.
88 parseTextOutsideMarkup(takeTo(Line
, MultilineBegin
->begin()));
90 // Begin recording the multi-line element.
91 llvm::append_range(InProgressMultiline
, *MultilineBegin
);
92 Line
= Line
.drop_front(Line
.size());
96 // The line doesn't contain any more markup elements, so emit it as text.
97 parseTextOutsideMarkup(Line
);
98 Line
= Line
.drop_front(Line
.size());
102 void MarkupParser::flush() {
106 if (InProgressMultiline
.empty())
108 FinishedMultiline
.swap(InProgressMultiline
);
109 parseTextOutsideMarkup(FinishedMultiline
);
112 // Finds and returns the next valid markup element in the given line. Returns
113 // std::nullopt if the line contains no valid elements.
114 std::optional
<MarkupNode
> MarkupParser::parseElement(StringRef Line
) {
116 // Find next element using begin and end markers.
117 size_t BeginPos
= Line
.find("{{{");
118 if (BeginPos
== StringRef::npos
)
120 size_t EndPos
= Line
.find("}}}", BeginPos
+ 3);
121 if (EndPos
== StringRef::npos
)
125 Element
.Text
= Line
.slice(BeginPos
, EndPos
);
126 Line
= Line
.substr(EndPos
);
129 StringRef Content
= Element
.Text
.drop_front(3).drop_back(3);
130 StringRef FieldsContent
;
131 std::tie(Element
.Tag
, FieldsContent
) = Content
.split(':');
132 if (Element
.Tag
.empty())
136 if (!FieldsContent
.empty())
137 FieldsContent
.split(Element
.Fields
, ":");
138 else if (Content
.back() == ':')
139 Element
.Fields
.push_back(FieldsContent
);
145 static MarkupNode
textNode(StringRef Text
) {
151 // Parses a region of text known to be outside any markup elements. Such text
152 // may still contain SGR control codes, so the region is further subdivided into
153 // control codes and true text regions.
154 void MarkupParser::parseTextOutsideMarkup(StringRef Text
) {
157 SmallVector
<StringRef
> Matches
;
158 while (SGRSyntax
.match(Text
, &Matches
)) {
159 // Emit any text before the SGR element.
160 if (Matches
.begin()->begin() != Text
.begin())
161 Buffer
.push_back(textNode(takeTo(Text
, Matches
.begin()->begin())));
163 Buffer
.push_back(textNode(*Matches
.begin()));
164 advanceTo(Text
, Matches
.begin()->end());
167 Buffer
.push_back(textNode(Text
));
170 // Given that a line doesn't contain any valid markup, see if it ends with the
171 // start of a multi-line element. If so, returns the beginning.
172 std::optional
<StringRef
> MarkupParser::parseMultiLineBegin(StringRef Line
) {
173 // A multi-line begin marker must be the last one on the line.
174 size_t BeginPos
= Line
.rfind("{{{");
175 if (BeginPos
== StringRef::npos
)
177 size_t BeginTagPos
= BeginPos
+ 3;
179 // If there are any end markers afterwards, the begin marker cannot belong to
180 // a multi-line element.
181 size_t EndPos
= Line
.find("}}}", BeginTagPos
);
182 if (EndPos
!= StringRef::npos
)
185 // Check whether the tag is registered multi-line.
186 size_t EndTagPos
= Line
.find(':', BeginTagPos
);
187 if (EndTagPos
== StringRef::npos
)
189 StringRef Tag
= Line
.slice(BeginTagPos
, EndTagPos
);
190 if (!MultilineTags
.contains(Tag
))
192 return Line
.substr(BeginPos
);
195 // See if the line begins with the ending of an in-progress multi-line element.
196 // If so, return the ending.
197 std::optional
<StringRef
> MarkupParser::parseMultiLineEnd(StringRef Line
) {
198 size_t EndPos
= Line
.find("}}}");
199 if (EndPos
== StringRef::npos
)
201 return Line
.take_front(EndPos
+ 3);
204 } // end namespace symbolize
205 } // end namespace llvm