[docs] Add LICENSE.txt to the root of the mono-repo
[llvm-project.git] / clang / lib / Basic / Sarif.cpp
blob73be86b57df77db30eb675e752c5da793030621a
1 //===-- clang/Basic/Sarif.cpp - SarifDocumentWriter class definition ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the declaration of the SARIFDocumentWriter class, and
11 /// associated builders such as:
12 /// - \ref SarifArtifact
13 /// - \ref SarifArtifactLocation
14 /// - \ref SarifRule
15 /// - \ref SarifResult
16 //===----------------------------------------------------------------------===//
17 #include "clang/Basic/Sarif.h"
18 #include "clang/Basic/SourceLocation.h"
19 #include "clang/Basic/SourceManager.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/StringMap.h"
23 #include "llvm/ADT/StringRef.h"
24 #include "llvm/Support/ConvertUTF.h"
25 #include "llvm/Support/JSON.h"
26 #include "llvm/Support/Path.h"
28 #include <string>
29 #include <utility>
31 using namespace clang;
32 using namespace llvm;
34 using clang::detail::SarifArtifact;
35 using clang::detail::SarifArtifactLocation;
37 static StringRef getFileName(const FileEntry &FE) {
38 StringRef Filename = FE.tryGetRealPathName();
39 if (Filename.empty())
40 Filename = FE.getName();
41 return Filename;
43 /// \name URI
44 /// @{
46 /// \internal
47 /// \brief
48 /// Return the RFC3986 encoding of the input character.
49 ///
50 /// \param C Character to encode to RFC3986.
51 ///
52 /// \return The RFC3986 representation of \c C.
53 static std::string percentEncodeURICharacter(char C) {
54 // RFC 3986 claims alpha, numeric, and this handful of
55 // characters are not reserved for the path component and
56 // should be written out directly. Otherwise, percent
57 // encode the character and write that out instead of the
58 // reserved character.
59 if (llvm::isAlnum(C) ||
60 StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C))
61 return std::string(&C, 1);
62 return "%" + llvm::toHex(StringRef(&C, 1));
65 /// \internal
66 /// \brief Return a URI representing the given file name.
67 ///
68 /// \param Filename The filename to be represented as URI.
69 ///
70 /// \return RFC3986 URI representing the input file name.
71 static std::string fileNameToURI(StringRef Filename) {
72 SmallString<32> Ret = StringRef("file://");
74 // Get the root name to see if it has a URI authority.
75 StringRef Root = sys::path::root_name(Filename);
76 if (Root.startswith("//")) {
77 // There is an authority, so add it to the URI.
78 Ret += Root.drop_front(2).str();
79 } else if (!Root.empty()) {
80 // There is no authority, so end the component and add the root to the URI.
81 Ret += Twine("/" + Root).str();
84 auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);
85 assert(Iter != End && "Expected there to be a non-root path component.");
86 // Add the rest of the path components, encoding any reserved characters;
87 // we skip past the first path component, as it was handled it above.
88 std::for_each(++Iter, End, [&Ret](StringRef Component) {
89 // For reasons unknown to me, we may get a backslash with Windows native
90 // paths for the initial backslash following the drive component, which
91 // we need to ignore as a URI path part.
92 if (Component == "\\")
93 return;
95 // Add the separator between the previous path part and the one being
96 // currently processed.
97 Ret += "/";
99 // URI encode the part.
100 for (char C : Component) {
101 Ret += percentEncodeURICharacter(C);
105 return std::string(Ret);
107 /// @}
109 /// \brief Calculate the column position expressed in the number of UTF-8 code
110 /// points from column start to the source location
112 /// \param Loc The source location whose column needs to be calculated.
113 /// \param TokenLen Optional hint for when the token is multiple bytes long.
115 /// \return The column number as a UTF-8 aware byte offset from column start to
116 /// the effective source location.
117 static unsigned int adjustColumnPos(FullSourceLoc Loc,
118 unsigned int TokenLen = 0) {
119 assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
121 std::pair<FileID, unsigned> LocInfo = Loc.getDecomposedLoc();
122 Optional<MemoryBufferRef> Buf =
123 Loc.getManager().getBufferOrNone(LocInfo.first);
124 assert(Buf && "got an invalid buffer for the location's file");
125 assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
126 "token extends past end of buffer?");
128 // Adjust the offset to be the start of the line, since we'll be counting
129 // Unicode characters from there until our column offset.
130 unsigned int Off = LocInfo.second - (Loc.getExpansionColumnNumber() - 1);
131 unsigned int Ret = 1;
132 while (Off < (LocInfo.second + TokenLen)) {
133 Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);
134 Ret++;
137 return Ret;
140 /// \name SARIF Utilities
141 /// @{
143 /// \internal
144 json::Object createMessage(StringRef Text) {
145 return json::Object{{"text", Text.str()}};
148 /// \internal
149 /// \pre CharSourceRange must be a token range
150 static json::Object createTextRegion(const SourceManager &SM,
151 const CharSourceRange &R) {
152 FullSourceLoc FirstTokenLoc{R.getBegin(), SM};
153 FullSourceLoc LastTokenLoc{R.getEnd(), SM};
154 json::Object Region{{"startLine", FirstTokenLoc.getExpansionLineNumber()},
155 {"startColumn", adjustColumnPos(FirstTokenLoc)},
156 {"endColumn", adjustColumnPos(LastTokenLoc)}};
157 if (FirstTokenLoc != LastTokenLoc) {
158 Region["endLine"] = LastTokenLoc.getExpansionLineNumber();
160 return Region;
163 static json::Object createLocation(json::Object &&PhysicalLocation,
164 StringRef Message = "") {
165 json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
166 if (!Message.empty())
167 Ret.insert({"message", createMessage(Message)});
168 return Ret;
171 static StringRef importanceToStr(ThreadFlowImportance I) {
172 switch (I) {
173 case ThreadFlowImportance::Important:
174 return "important";
175 case ThreadFlowImportance::Essential:
176 return "essential";
177 case ThreadFlowImportance::Unimportant:
178 return "unimportant";
180 llvm_unreachable("Fully covered switch is not so fully covered");
183 static StringRef resultLevelToStr(SarifResultLevel R) {
184 switch (R) {
185 case SarifResultLevel::None:
186 return "none";
187 case SarifResultLevel::Note:
188 return "note";
189 case SarifResultLevel::Warning:
190 return "warning";
191 case SarifResultLevel::Error:
192 return "error";
194 llvm_unreachable("Potentially un-handled SarifResultLevel. "
195 "Is the switch not fully covered?");
198 static json::Object
199 createThreadFlowLocation(json::Object &&Location,
200 const ThreadFlowImportance &Importance) {
201 return json::Object{{"location", std::move(Location)},
202 {"importance", importanceToStr(Importance)}};
204 /// @}
206 json::Object
207 SarifDocumentWriter::createPhysicalLocation(const CharSourceRange &R) {
208 assert(R.isValid() &&
209 "Cannot create a physicalLocation from invalid SourceRange!");
210 assert(R.isCharRange() &&
211 "Cannot create a physicalLocation from a token range!");
212 FullSourceLoc Start{R.getBegin(), SourceMgr};
213 const FileEntry *FE = Start.getExpansionLoc().getFileEntry();
214 assert(FE != nullptr && "Diagnostic does not exist within a valid file!");
216 const std::string &FileURI = fileNameToURI(getFileName(*FE));
217 auto I = CurrentArtifacts.find(FileURI);
219 if (I == CurrentArtifacts.end()) {
220 uint32_t Idx = static_cast<uint32_t>(CurrentArtifacts.size());
221 const SarifArtifactLocation &Location =
222 SarifArtifactLocation::create(FileURI).setIndex(Idx);
223 const SarifArtifact &Artifact = SarifArtifact::create(Location)
224 .setRoles({"resultFile"})
225 .setLength(FE->getSize())
226 .setMimeType("text/plain");
227 auto StatusIter = CurrentArtifacts.insert({FileURI, Artifact});
228 // If inserted, ensure the original iterator points to the newly inserted
229 // element, so it can be used downstream.
230 if (StatusIter.second)
231 I = StatusIter.first;
233 assert(I != CurrentArtifacts.end() && "Failed to insert new artifact");
234 const SarifArtifactLocation &Location = I->second.Location;
235 uint32_t Idx = Location.Index.value();
236 return json::Object{{{"artifactLocation", json::Object{{{"index", Idx}}}},
237 {"region", createTextRegion(SourceMgr, R)}}};
240 json::Object &SarifDocumentWriter::getCurrentTool() {
241 assert(!Closed && "SARIF Document is closed. "
242 "Need to call createRun() before using getcurrentTool!");
244 // Since Closed = false here, expect there to be at least 1 Run, anything
245 // else is an invalid state.
246 assert(!Runs.empty() && "There are no runs associated with the document!");
248 return *Runs.back().getAsObject()->get("tool")->getAsObject();
251 void SarifDocumentWriter::reset() {
252 CurrentRules.clear();
253 CurrentArtifacts.clear();
256 void SarifDocumentWriter::endRun() {
257 // Exit early if trying to close a closed Document.
258 if (Closed) {
259 reset();
260 return;
263 // Since Closed = false here, expect there to be at least 1 Run, anything
264 // else is an invalid state.
265 assert(!Runs.empty() && "There are no runs associated with the document!");
267 // Flush all the rules.
268 json::Object &Tool = getCurrentTool();
269 json::Array Rules;
270 for (const SarifRule &R : CurrentRules) {
271 json::Object Config{
272 {"enabled", R.DefaultConfiguration.Enabled},
273 {"level", resultLevelToStr(R.DefaultConfiguration.Level)},
274 {"rank", R.DefaultConfiguration.Rank}};
275 json::Object Rule{
276 {"name", R.Name},
277 {"id", R.Id},
278 {"fullDescription", json::Object{{"text", R.Description}}},
279 {"defaultConfiguration", std::move(Config)}};
280 if (!R.HelpURI.empty())
281 Rule["helpUri"] = R.HelpURI;
282 Rules.emplace_back(std::move(Rule));
284 json::Object &Driver = *Tool.getObject("driver");
285 Driver["rules"] = std::move(Rules);
287 // Flush all the artifacts.
288 json::Object &Run = getCurrentRun();
289 json::Array *Artifacts = Run.getArray("artifacts");
290 for (const auto &Pair : CurrentArtifacts) {
291 const SarifArtifact &A = Pair.getValue();
292 json::Object Loc{{"uri", A.Location.URI}};
293 if (A.Location.Index.has_value()) {
294 Loc["index"] = static_cast<int64_t>(A.Location.Index.value());
296 json::Object Artifact;
297 Artifact["location"] = std::move(Loc);
298 if (A.Length.has_value())
299 Artifact["length"] = static_cast<int64_t>(A.Length.value());
300 if (!A.Roles.empty())
301 Artifact["roles"] = json::Array(A.Roles);
302 if (!A.MimeType.empty())
303 Artifact["mimeType"] = A.MimeType;
304 if (A.Offset.has_value())
305 Artifact["offset"] = A.Offset;
306 Artifacts->push_back(json::Value(std::move(Artifact)));
309 // Clear, reset temporaries before next run.
310 reset();
312 // Mark the document as closed.
313 Closed = true;
316 json::Array
317 SarifDocumentWriter::createThreadFlows(ArrayRef<ThreadFlow> ThreadFlows) {
318 json::Object Ret{{"locations", json::Array{}}};
319 json::Array Locs;
320 for (const auto &ThreadFlow : ThreadFlows) {
321 json::Object PLoc = createPhysicalLocation(ThreadFlow.Range);
322 json::Object Loc = createLocation(std::move(PLoc), ThreadFlow.Message);
323 Locs.emplace_back(
324 createThreadFlowLocation(std::move(Loc), ThreadFlow.Importance));
326 Ret["locations"] = std::move(Locs);
327 return json::Array{std::move(Ret)};
330 json::Object
331 SarifDocumentWriter::createCodeFlow(ArrayRef<ThreadFlow> ThreadFlows) {
332 return json::Object{{"threadFlows", createThreadFlows(ThreadFlows)}};
335 void SarifDocumentWriter::createRun(StringRef ShortToolName,
336 StringRef LongToolName,
337 StringRef ToolVersion) {
338 // Clear resources associated with a previous run.
339 endRun();
341 // Signify a new run has begun.
342 Closed = false;
344 json::Object Tool{
345 {"driver",
346 json::Object{{"name", ShortToolName},
347 {"fullName", LongToolName},
348 {"language", "en-US"},
349 {"version", ToolVersion},
350 {"informationUri",
351 "https://clang.llvm.org/docs/UsersManual.html"}}}};
352 json::Object TheRun{{"tool", std::move(Tool)},
353 {"results", {}},
354 {"artifacts", {}},
355 {"columnKind", "unicodeCodePoints"}};
356 Runs.emplace_back(std::move(TheRun));
359 json::Object &SarifDocumentWriter::getCurrentRun() {
360 assert(!Closed &&
361 "SARIF Document is closed. "
362 "Can only getCurrentRun() if document is opened via createRun(), "
363 "create a run first");
365 // Since Closed = false here, expect there to be at least 1 Run, anything
366 // else is an invalid state.
367 assert(!Runs.empty() && "There are no runs associated with the document!");
368 return *Runs.back().getAsObject();
371 size_t SarifDocumentWriter::createRule(const SarifRule &Rule) {
372 size_t Ret = CurrentRules.size();
373 CurrentRules.emplace_back(Rule);
374 return Ret;
377 void SarifDocumentWriter::appendResult(const SarifResult &Result) {
378 size_t RuleIdx = Result.RuleIdx;
379 assert(RuleIdx < CurrentRules.size() &&
380 "Trying to reference a rule that doesn't exist");
381 const SarifRule &Rule = CurrentRules[RuleIdx];
382 assert(Rule.DefaultConfiguration.Enabled &&
383 "Cannot add a result referencing a disabled Rule");
384 json::Object Ret{{"message", createMessage(Result.DiagnosticMessage)},
385 {"ruleIndex", static_cast<int64_t>(RuleIdx)},
386 {"ruleId", Rule.Id}};
387 if (!Result.Locations.empty()) {
388 json::Array Locs;
389 for (auto &Range : Result.Locations) {
390 Locs.emplace_back(createLocation(createPhysicalLocation(Range)));
392 Ret["locations"] = std::move(Locs);
394 if (!Result.ThreadFlows.empty())
395 Ret["codeFlows"] = json::Array{createCodeFlow(Result.ThreadFlows)};
397 Ret["level"] = resultLevelToStr(
398 Result.LevelOverride.value_or(Rule.DefaultConfiguration.Level));
400 json::Object &Run = getCurrentRun();
401 json::Array *Results = Run.getArray("results");
402 Results->emplace_back(std::move(Ret));
405 json::Object SarifDocumentWriter::createDocument() {
406 // Flush all temporaries to their destinations if needed.
407 endRun();
409 json::Object Doc{
410 {"$schema", SchemaURI},
411 {"version", SchemaVersion},
413 if (!Runs.empty())
414 Doc["runs"] = json::Array(Runs);
415 return Doc;