1 //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file contains the declaration of the SARIFDocumentWriter class, and
11 /// associated builders such as:
12 /// - \ref SarifArtifact
13 /// - \ref SarifArtifactLocation
15 /// - \ref SarifResult
16 //===----------------------------------------------------------------------===//
17 #include "clang/Basic/Sarif.h"
18 #include "clang/Basic/SourceLocation.h"
19 #include "clang/Basic/SourceManager.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/ConvertUTF.h"
25 #include "llvm/Support/JSON.h"
26 #include "llvm/Support/Path.h"
32 using namespace clang
;
35 using clang::detail::SarifArtifact
;
36 using clang::detail::SarifArtifactLocation
;
38 static StringRef
getFileName(FileEntryRef FE
) {
39 StringRef Filename
= FE
.getFileEntry().tryGetRealPathName();
41 Filename
= FE
.getName();
49 /// Return the RFC3986 encoding of the input character.
51 /// \param C Character to encode to RFC3986.
53 /// \return The RFC3986 representation of \c C.
54 static std::string
percentEncodeURICharacter(char C
) {
55 // RFC 3986 claims alpha, numeric, and this handful of
56 // characters are not reserved for the path component and
57 // should be written out directly. Otherwise, percent
58 // encode the character and write that out instead of the
59 // reserved character.
60 if (llvm::isAlnum(C
) ||
61 StringRef::npos
!= StringRef("-._~:@!$&'()*+,;=").find(C
))
62 return std::string(&C
, 1);
63 return "%" + llvm::toHex(StringRef(&C
, 1));
67 /// \brief Return a URI representing the given file name.
69 /// \param Filename The filename to be represented as URI.
71 /// \return RFC3986 URI representing the input file name.
72 static std::string
fileNameToURI(StringRef Filename
) {
73 SmallString
<32> Ret
= StringRef("file://");
75 // Get the root name to see if it has a URI authority.
76 StringRef Root
= sys::path::root_name(Filename
);
77 if (Root
.starts_with("//")) {
78 // There is an authority, so add it to the URI.
79 Ret
+= Root
.drop_front(2).str();
80 } else if (!Root
.empty()) {
81 // There is no authority, so end the component and add the root to the URI.
82 Ret
+= Twine("/" + Root
).str();
85 auto Iter
= sys::path::begin(Filename
), End
= sys::path::end(Filename
);
86 assert(Iter
!= End
&& "Expected there to be a non-root path component.");
87 // Add the rest of the path components, encoding any reserved characters;
88 // we skip past the first path component, as it was handled it above.
89 for (StringRef Component
: llvm::make_range(++Iter
, End
)) {
90 // For reasons unknown to me, we may get a backslash with Windows native
91 // paths for the initial backslash following the drive component, which
92 // we need to ignore as a URI path part.
93 if (Component
== "\\")
96 // Add the separator between the previous path part and the one being
97 // currently processed.
100 // URI encode the part.
101 for (char C
: Component
) {
102 Ret
+= percentEncodeURICharacter(C
);
106 return std::string(Ret
);
110 /// \brief Calculate the column position expressed in the number of UTF-8 code
111 /// points from column start to the source location
113 /// \param Loc The source location whose column needs to be calculated.
114 /// \param TokenLen Optional hint for when the token is multiple bytes long.
116 /// \return The column number as a UTF-8 aware byte offset from column start to
117 /// the effective source location.
118 static unsigned int adjustColumnPos(FullSourceLoc Loc
,
119 unsigned int TokenLen
= 0) {
120 assert(!Loc
.isInvalid() && "invalid Loc when adjusting column position");
122 std::pair
<FileID
, unsigned> LocInfo
= Loc
.getDecomposedExpansionLoc();
123 std::optional
<MemoryBufferRef
> Buf
=
124 Loc
.getManager().getBufferOrNone(LocInfo
.first
);
125 assert(Buf
&& "got an invalid buffer for the location's file");
126 assert(Buf
->getBufferSize() >= (LocInfo
.second
+ TokenLen
) &&
127 "token extends past end of buffer?");
129 // Adjust the offset to be the start of the line, since we'll be counting
130 // Unicode characters from there until our column offset.
131 unsigned int Off
= LocInfo
.second
- (Loc
.getExpansionColumnNumber() - 1);
132 unsigned int Ret
= 1;
133 while (Off
< (LocInfo
.second
+ TokenLen
)) {
134 Off
+= getNumBytesForUTF8(Buf
->getBuffer()[Off
]);
141 /// \name SARIF Utilities
145 json::Object
createMessage(StringRef Text
) {
146 return json::Object
{{"text", Text
.str()}};
150 /// \pre CharSourceRange must be a token range
151 static json::Object
createTextRegion(const SourceManager
&SM
,
152 const CharSourceRange
&R
) {
153 FullSourceLoc BeginCharLoc
{R
.getBegin(), SM
};
154 FullSourceLoc EndCharLoc
{R
.getEnd(), SM
};
155 json::Object Region
{{"startLine", BeginCharLoc
.getExpansionLineNumber()},
156 {"startColumn", adjustColumnPos(BeginCharLoc
)}};
158 if (BeginCharLoc
== EndCharLoc
) {
159 Region
["endColumn"] = adjustColumnPos(BeginCharLoc
);
161 Region
["endLine"] = EndCharLoc
.getExpansionLineNumber();
162 Region
["endColumn"] = adjustColumnPos(EndCharLoc
);
167 static json::Object
createLocation(json::Object
&&PhysicalLocation
,
168 StringRef Message
= "") {
169 json::Object Ret
{{"physicalLocation", std::move(PhysicalLocation
)}};
170 if (!Message
.empty())
171 Ret
.insert({"message", createMessage(Message
)});
175 static StringRef
importanceToStr(ThreadFlowImportance I
) {
177 case ThreadFlowImportance::Important
:
179 case ThreadFlowImportance::Essential
:
181 case ThreadFlowImportance::Unimportant
:
182 return "unimportant";
184 llvm_unreachable("Fully covered switch is not so fully covered");
187 static StringRef
resultLevelToStr(SarifResultLevel R
) {
189 case SarifResultLevel::None
:
191 case SarifResultLevel::Note
:
193 case SarifResultLevel::Warning
:
195 case SarifResultLevel::Error
:
198 llvm_unreachable("Potentially un-handled SarifResultLevel. "
199 "Is the switch not fully covered?");
203 createThreadFlowLocation(json::Object
&&Location
,
204 const ThreadFlowImportance
&Importance
) {
205 return json::Object
{{"location", std::move(Location
)},
206 {"importance", importanceToStr(Importance
)}};
211 SarifDocumentWriter::createPhysicalLocation(const CharSourceRange
&R
) {
212 assert(R
.isValid() &&
213 "Cannot create a physicalLocation from invalid SourceRange!");
214 assert(R
.isCharRange() &&
215 "Cannot create a physicalLocation from a token range!");
216 FullSourceLoc Start
{R
.getBegin(), SourceMgr
};
217 OptionalFileEntryRef FE
= Start
.getExpansionLoc().getFileEntryRef();
218 assert(FE
&& "Diagnostic does not exist within a valid file!");
220 const std::string
&FileURI
= fileNameToURI(getFileName(*FE
));
221 auto I
= CurrentArtifacts
.find(FileURI
);
223 if (I
== CurrentArtifacts
.end()) {
224 uint32_t Idx
= static_cast<uint32_t>(CurrentArtifacts
.size());
225 const SarifArtifactLocation
&Location
=
226 SarifArtifactLocation::create(FileURI
).setIndex(Idx
);
227 const SarifArtifact
&Artifact
= SarifArtifact::create(Location
)
228 .setRoles({"resultFile"})
229 .setLength(FE
->getSize())
230 .setMimeType("text/plain");
231 auto StatusIter
= CurrentArtifacts
.insert({FileURI
, Artifact
});
232 // If inserted, ensure the original iterator points to the newly inserted
233 // element, so it can be used downstream.
234 if (StatusIter
.second
)
235 I
= StatusIter
.first
;
237 assert(I
!= CurrentArtifacts
.end() && "Failed to insert new artifact");
238 const SarifArtifactLocation
&Location
= I
->second
.Location
;
239 json::Object ArtifactLocationObject
{{"uri", Location
.URI
}};
240 if (Location
.Index
.has_value())
241 ArtifactLocationObject
["index"] = *Location
.Index
;
242 return json::Object
{{{"artifactLocation", std::move(ArtifactLocationObject
)},
243 {"region", createTextRegion(SourceMgr
, R
)}}};
246 json::Object
&SarifDocumentWriter::getCurrentTool() {
247 assert(!Closed
&& "SARIF Document is closed. "
248 "Need to call createRun() before using getcurrentTool!");
250 // Since Closed = false here, expect there to be at least 1 Run, anything
251 // else is an invalid state.
252 assert(!Runs
.empty() && "There are no runs associated with the document!");
254 return *Runs
.back().getAsObject()->get("tool")->getAsObject();
257 void SarifDocumentWriter::reset() {
258 CurrentRules
.clear();
259 CurrentArtifacts
.clear();
262 void SarifDocumentWriter::endRun() {
263 // Exit early if trying to close a closed Document.
269 // Since Closed = false here, expect there to be at least 1 Run, anything
270 // else is an invalid state.
271 assert(!Runs
.empty() && "There are no runs associated with the document!");
273 // Flush all the rules.
274 json::Object
&Tool
= getCurrentTool();
276 for (const SarifRule
&R
: CurrentRules
) {
278 {"enabled", R
.DefaultConfiguration
.Enabled
},
279 {"level", resultLevelToStr(R
.DefaultConfiguration
.Level
)},
280 {"rank", R
.DefaultConfiguration
.Rank
}};
284 {"fullDescription", json::Object
{{"text", R
.Description
}}},
285 {"defaultConfiguration", std::move(Config
)}};
286 if (!R
.HelpURI
.empty())
287 Rule
["helpUri"] = R
.HelpURI
;
288 Rules
.emplace_back(std::move(Rule
));
290 json::Object
&Driver
= *Tool
.getObject("driver");
291 Driver
["rules"] = std::move(Rules
);
293 // Flush all the artifacts.
294 json::Object
&Run
= getCurrentRun();
295 json::Array
*Artifacts
= Run
.getArray("artifacts");
296 SmallVector
<std::pair
<StringRef
, SarifArtifact
>, 0> Vec
;
297 for (const auto &[K
, V
] : CurrentArtifacts
)
298 Vec
.emplace_back(K
, V
);
299 llvm::sort(Vec
, llvm::less_first());
300 for (const auto &[_
, A
] : Vec
) {
301 json::Object Loc
{{"uri", A
.Location
.URI
}};
302 if (A
.Location
.Index
.has_value()) {
303 Loc
["index"] = static_cast<int64_t>(*A
.Location
.Index
);
305 json::Object Artifact
;
306 Artifact
["location"] = std::move(Loc
);
307 if (A
.Length
.has_value())
308 Artifact
["length"] = static_cast<int64_t>(*A
.Length
);
309 if (!A
.Roles
.empty())
310 Artifact
["roles"] = json::Array(A
.Roles
);
311 if (!A
.MimeType
.empty())
312 Artifact
["mimeType"] = A
.MimeType
;
313 if (A
.Offset
.has_value())
314 Artifact
["offset"] = *A
.Offset
;
315 Artifacts
->push_back(json::Value(std::move(Artifact
)));
318 // Clear, reset temporaries before next run.
321 // Mark the document as closed.
326 SarifDocumentWriter::createThreadFlows(ArrayRef
<ThreadFlow
> ThreadFlows
) {
327 json::Object Ret
{{"locations", json::Array
{}}};
329 for (const auto &ThreadFlow
: ThreadFlows
) {
330 json::Object PLoc
= createPhysicalLocation(ThreadFlow
.Range
);
331 json::Object Loc
= createLocation(std::move(PLoc
), ThreadFlow
.Message
);
333 createThreadFlowLocation(std::move(Loc
), ThreadFlow
.Importance
));
335 Ret
["locations"] = std::move(Locs
);
336 return json::Array
{std::move(Ret
)};
340 SarifDocumentWriter::createCodeFlow(ArrayRef
<ThreadFlow
> ThreadFlows
) {
341 return json::Object
{{"threadFlows", createThreadFlows(ThreadFlows
)}};
344 void SarifDocumentWriter::createRun(StringRef ShortToolName
,
345 StringRef LongToolName
,
346 StringRef ToolVersion
) {
347 // Clear resources associated with a previous run.
350 // Signify a new run has begun.
355 json::Object
{{"name", ShortToolName
},
356 {"fullName", LongToolName
},
357 {"language", "en-US"},
358 {"version", ToolVersion
},
360 "https://clang.llvm.org/docs/UsersManual.html"}}}};
361 json::Object TheRun
{{"tool", std::move(Tool
)},
364 {"columnKind", "unicodeCodePoints"}};
365 Runs
.emplace_back(std::move(TheRun
));
368 json::Object
&SarifDocumentWriter::getCurrentRun() {
370 "SARIF Document is closed. "
371 "Can only getCurrentRun() if document is opened via createRun(), "
372 "create a run first");
374 // Since Closed = false here, expect there to be at least 1 Run, anything
375 // else is an invalid state.
376 assert(!Runs
.empty() && "There are no runs associated with the document!");
377 return *Runs
.back().getAsObject();
380 size_t SarifDocumentWriter::createRule(const SarifRule
&Rule
) {
381 size_t Ret
= CurrentRules
.size();
382 CurrentRules
.emplace_back(Rule
);
386 void SarifDocumentWriter::appendResult(const SarifResult
&Result
) {
387 size_t RuleIdx
= Result
.RuleIdx
;
388 assert(RuleIdx
< CurrentRules
.size() &&
389 "Trying to reference a rule that doesn't exist");
390 const SarifRule
&Rule
= CurrentRules
[RuleIdx
];
391 assert(Rule
.DefaultConfiguration
.Enabled
&&
392 "Cannot add a result referencing a disabled Rule");
393 json::Object Ret
{{"message", createMessage(Result
.DiagnosticMessage
)},
394 {"ruleIndex", static_cast<int64_t>(RuleIdx
)},
395 {"ruleId", Rule
.Id
}};
396 if (!Result
.Locations
.empty()) {
398 for (auto &Range
: Result
.Locations
) {
399 Locs
.emplace_back(createLocation(createPhysicalLocation(Range
)));
401 Ret
["locations"] = std::move(Locs
);
403 if (!Result
.ThreadFlows
.empty())
404 Ret
["codeFlows"] = json::Array
{createCodeFlow(Result
.ThreadFlows
)};
406 Ret
["level"] = resultLevelToStr(
407 Result
.LevelOverride
.value_or(Rule
.DefaultConfiguration
.Level
));
409 json::Object
&Run
= getCurrentRun();
410 json::Array
*Results
= Run
.getArray("results");
411 Results
->emplace_back(std::move(Ret
));
414 json::Object
SarifDocumentWriter::createDocument() {
415 // Flush all temporaries to their destinations if needed.
419 {"$schema", SchemaURI
},
420 {"version", SchemaVersion
},
423 Doc
["runs"] = json::Array(Runs
);