[clang-tools-extra] Fix a link in ReleaseNotes.rst
[llvm-project.git] / clang-tools-extra / clangd / SourceCode.h
blobfaed27d7c8c4ece0820673c8ed1bc4e6dbbd0483
1 //===--- SourceCode.h - Manipulating source code as strings -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Various code that examines C++ source code without using heavy AST machinery
10 // (and often not even the lexer). To be used sparingly!
12 //===----------------------------------------------------------------------===//
13 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H
14 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H
16 #include "Protocol.h"
17 #include "support/Context.h"
18 #include "support/ThreadsafeFS.h"
19 #include "clang/Basic/CharInfo.h"
20 #include "clang/Basic/Diagnostic.h"
21 #include "clang/Basic/LangOptions.h"
22 #include "clang/Basic/SourceLocation.h"
23 #include "clang/Basic/SourceManager.h"
24 #include "clang/Format/Format.h"
25 #include "clang/Lex/HeaderSearch.h"
26 #include "clang/Tooling/Core/Replacement.h"
27 #include "clang/Tooling/Syntax/Tokens.h"
28 #include "llvm/ADT/StringRef.h"
29 #include "llvm/ADT/StringSet.h"
30 #include "llvm/Support/Error.h"
31 #include <string>
33 namespace clang {
34 class SourceManager;
36 namespace clangd {
38 // We tend to generate digests for source codes in a lot of different places.
39 // This represents the type for those digests to prevent us hard coding details
40 // of hashing function at every place that needs to store this information.
41 using FileDigest = std::array<uint8_t, 8>;
42 FileDigest digest(StringRef Content);
43 Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID);
45 // This context variable controls the behavior of functions in this file
46 // that convert between LSP offsets and native clang byte offsets.
47 // If not set, defaults to UTF-16 for backwards-compatibility.
48 extern Key<OffsetEncoding> kCurrentOffsetEncoding;
50 // Counts the number of UTF-16 code units needed to represent a string (LSP
51 // specifies string lengths in UTF-16 code units).
52 // Use of UTF-16 may be overridden by kCurrentOffsetEncoding.
53 size_t lspLength(StringRef Code);
55 /// Turn a [line, column] pair into an offset in Code.
56 ///
57 /// If P.character exceeds the line length, returns the offset at end-of-line.
58 /// (If !AllowColumnsBeyondLineLength, then returns an error instead).
59 /// If the line number is out of range, returns an error.
60 ///
61 /// The returned value is in the range [0, Code.size()].
62 llvm::Expected<size_t>
63 positionToOffset(llvm::StringRef Code, Position P,
64 bool AllowColumnsBeyondLineLength = true);
66 /// Turn an offset in Code into a [line, column] pair.
67 /// The offset must be in range [0, Code.size()].
68 Position offsetToPosition(llvm::StringRef Code, size_t Offset);
70 /// Turn a SourceLocation into a [line, column] pair.
71 /// FIXME: This should return an error if the location is invalid.
72 Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc);
74 /// Return the file location, corresponding to \p P. Note that one should take
75 /// care to avoid comparing the result with expansion locations.
76 llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM,
77 Position P);
79 /// Returns true iff \p Loc is inside the main file. This function handles
80 /// file & macro locations. For macro locations, returns iff the macro is being
81 /// expanded inside the main file.
82 ///
83 /// The function is usually used to check whether a declaration is inside the
84 /// the main file.
85 bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM);
87 /// Returns the #include location through which IncludedFIle was loaded.
88 /// Where SM.getIncludeLoc() returns the location of the *filename*, which may
89 /// be in a macro, includeHashLoc() returns the location of the #.
90 SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager &SM);
92 /// Returns true if the token at Loc is spelled in the source code.
93 /// This is not the case for:
94 /// * symbols formed via macro concatenation, the spelling location will
95 /// be "<scratch space>"
96 /// * symbols controlled and defined by a compile command-line option
97 /// `-DName=foo`, the spelling location will be "<command line>".
98 bool isSpelledInSource(SourceLocation Loc, const SourceManager &SM);
100 /// Turns a token range into a half-open range and checks its correctness.
101 /// The resulting range will have only valid source location on both sides, both
102 /// of which are file locations.
104 /// File locations always point to a particular offset in a file, i.e. they
105 /// never refer to a location inside a macro expansion. Turning locations from
106 /// macro expansions into file locations is ambiguous - one can use
107 /// SourceManager::{getExpansion|getFile|getSpelling}Loc. This function
108 /// calls SourceManager::getFileLoc on both ends of \p R to do the conversion.
110 /// User input (e.g. cursor position) is expressed as a file location, so this
111 /// function can be viewed as a way to normalize the ranges used in the clang
112 /// AST so that they are comparable with ranges coming from the user input.
113 llvm::Optional<SourceRange> toHalfOpenFileRange(const SourceManager &Mgr,
114 const LangOptions &LangOpts,
115 SourceRange R);
117 /// Returns true iff all of the following conditions hold:
118 /// - start and end locations are valid,
119 /// - start and end locations are file locations from the same file
120 /// (i.e. expansion locations are not taken into account).
121 /// - start offset <= end offset.
122 /// FIXME: introduce a type for source range with this invariant.
123 bool isValidFileRange(const SourceManager &Mgr, SourceRange R);
125 /// Returns the source code covered by the source range.
126 /// EXPECTS: isValidFileRange(R) == true.
127 llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R);
129 // Converts a half-open clang source range to an LSP range.
130 // Note that clang also uses closed source ranges, which this can't handle!
131 Range halfOpenToRange(const SourceManager &SM, CharSourceRange R);
133 // Expand range `A` to also contain `B`.
134 void unionRanges(Range &A, Range B);
136 // Converts an offset to a clang line/column (1-based, columns are bytes).
137 // The offset must be in range [0, Code.size()].
138 // Prefer to use SourceManager if one is available.
139 std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code,
140 size_t Offset);
142 /// From "a::b::c", return {"a::b::", "c"}. Scope is empty if there's no
143 /// qualifier.
144 std::pair<llvm::StringRef, llvm::StringRef>
145 splitQualifiedName(llvm::StringRef QName);
147 TextEdit replacementToEdit(StringRef Code, const tooling::Replacement &R);
149 std::vector<TextEdit> replacementsToEdits(StringRef Code,
150 const tooling::Replacements &Repls);
152 TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M,
153 const LangOptions &L);
155 /// Get the canonical path of \p F. This means:
157 /// - Absolute path
158 /// - Symlinks resolved
159 /// - No "." or ".." component
160 /// - No duplicate or trailing directory separator
162 /// This function should be used when paths needs to be used outside the
163 /// component that generate it, so that paths are normalized as much as
164 /// possible.
165 llvm::Optional<std::string> getCanonicalPath(const FileEntry *F,
166 const SourceManager &SourceMgr);
168 /// Choose the clang-format style we should apply to a certain file.
169 /// This will usually use FS to look for .clang-format directories.
170 /// FIXME: should we be caching the .clang-format file search?
171 /// This uses format::DefaultFormatStyle and format::DefaultFallbackStyle,
172 /// though the latter may have been overridden in main()!
173 format::FormatStyle getFormatStyleForFile(llvm::StringRef File,
174 llvm::StringRef Content,
175 const ThreadsafeFS &TFS);
177 /// Cleanup and format the given replacements.
178 llvm::Expected<tooling::Replacements>
179 cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces,
180 const format::FormatStyle &Style);
182 /// A set of edits generated for a single file. Can verify whether it is safe to
183 /// apply these edits to a code block.
184 struct Edit {
185 tooling::Replacements Replacements;
186 std::string InitialCode;
188 Edit() = default;
190 Edit(llvm::StringRef Code, tooling::Replacements Reps)
191 : Replacements(std::move(Reps)), InitialCode(Code) {}
193 /// Returns the file contents after changes are applied.
194 llvm::Expected<std::string> apply() const;
196 /// Represents Replacements as TextEdits that are available for use in LSP.
197 std::vector<TextEdit> asTextEdits() const;
199 /// Checks whether the Replacements are applicable to given Code.
200 bool canApplyTo(llvm::StringRef Code) const;
202 /// A mapping from absolute file path (the one used for accessing the underlying
203 /// VFS) to edits.
204 using FileEdits = llvm::StringMap<Edit>;
206 /// Formats the edits and code around it according to Style. Changes
207 /// Replacements to formatted ones if succeeds.
208 llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style);
210 /// Apply an incremental update to a text document.
211 llvm::Error applyChange(std::string &Contents,
212 const TextDocumentContentChangeEvent &Change);
214 /// Collects identifiers with counts in the source code.
215 llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content,
216 const format::FormatStyle &Style);
218 /// Collects all ranges of the given identifier in the source code.
219 std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier,
220 llvm::StringRef Content,
221 const LangOptions &LangOpts);
223 /// Collects words from the source code.
224 /// Unlike collectIdentifiers:
225 /// - also finds text in comments:
226 /// - splits text into words
227 /// - drops stopwords like "get" and "for"
228 llvm::StringSet<> collectWords(llvm::StringRef Content);
230 // Something that looks like a word in the source code.
231 // Could be a "real" token that's "live" in the AST, a spelled token consumed by
232 // the preprocessor, or part of a spelled token (e.g. word in a comment).
233 struct SpelledWord {
234 // (Spelling) location of the start of the word.
235 SourceLocation Location;
236 // The range of the word itself, excluding any quotes.
237 // This is a subrange of the file buffer.
238 llvm::StringRef Text;
239 // Whether this word is likely to refer to an identifier. True if:
240 // - the word is a spelled identifier token
241 // - Text is identifier-like (e.g. "foo_bar")
242 // - Text is surrounded by backticks (e.g. Foo in "// returns `Foo`")
243 bool LikelyIdentifier = false;
244 // Set if the word is contained in a token spelled in the file.
245 // (This should always be true, but comments aren't retained by TokenBuffer).
246 const syntax::Token *PartOfSpelledToken = nullptr;
247 // Set if the word is exactly a token spelled in the file.
248 const syntax::Token *SpelledToken = nullptr;
249 // Set if the word is a token spelled in the file, and that token survives
250 // preprocessing to emit an expanded token spelled the same way.
251 const syntax::Token *ExpandedToken = nullptr;
253 // Find the unique word that contains SpelledLoc or starts/ends there.
254 static llvm::Optional<SpelledWord> touching(SourceLocation SpelledLoc,
255 const syntax::TokenBuffer &TB,
256 const LangOptions &LangOpts);
259 /// Return true if the \p TokenName is in the list of reversed keywords of the
260 /// language.
261 bool isKeyword(llvm::StringRef TokenName, const LangOptions &LangOpts);
263 /// Heuristically determine namespaces visible at a point, without parsing Code.
264 /// This considers using-directives and enclosing namespace-declarations that
265 /// are visible (and not obfuscated) in the file itself (not headers).
266 /// Code should be truncated at the point of interest.
268 /// The returned vector is always non-empty.
269 /// - The first element is the namespace that encloses the point: a declaration
270 /// near the point would be within this namespace.
271 /// - The elements are the namespaces in scope at the point: an unqualified
272 /// lookup would search within these namespaces.
274 /// Using directives are resolved against all enclosing scopes, but no other
275 /// namespace directives.
277 /// example:
278 /// using namespace a;
279 /// namespace foo {
280 /// using namespace b;
282 /// visibleNamespaces are {"foo::", "", "a::", "b::", "foo::b::"}, not "a::b::".
283 std::vector<std::string> visibleNamespaces(llvm::StringRef Code,
284 const LangOptions &LangOpts);
286 /// Represents locations that can accept a definition.
287 struct EligibleRegion {
288 /// Namespace that owns all of the EligiblePoints, e.g.
289 /// namespace a{ namespace b {^ void foo();^} }
290 /// It will be “a::b” for both carrot locations.
291 std::string EnclosingNamespace;
292 /// Offsets into the code marking eligible points to insert a function
293 /// definition.
294 std::vector<Position> EligiblePoints;
297 /// Returns most eligible region to insert a definition for \p
298 /// FullyQualifiedName in the \p Code.
299 /// Pseudo parses \pCode under the hood to determine namespace decls and
300 /// possible insertion points. Choses the region that matches the longest prefix
301 /// of \p FullyQualifiedName. Returns EOF if there are no shared namespaces.
302 /// \p FullyQualifiedName should not contain anonymous namespaces.
303 EligibleRegion getEligiblePoints(llvm::StringRef Code,
304 llvm::StringRef FullyQualifiedName,
305 const LangOptions &LangOpts);
307 struct DefinedMacro {
308 llvm::StringRef Name;
309 const MacroInfo *Info;
310 /// Location of the identifier that names the macro.
311 /// Unlike Info->Location, this translates preamble-patch locations to
312 /// main-file locations.
313 SourceLocation NameLoc;
315 /// Gets the macro referenced by \p SpelledTok. It must be a spelled token
316 /// aligned to the beginning of an identifier.
317 llvm::Optional<DefinedMacro> locateMacroAt(const syntax::Token &SpelledTok,
318 Preprocessor &PP);
320 /// Infers whether this is a header from the FileName and LangOpts (if
321 /// presents).
322 bool isHeaderFile(llvm::StringRef FileName,
323 llvm::Optional<LangOptions> LangOpts = llvm::None);
325 /// Returns true if the given location is in a generated protobuf file.
326 bool isProtoFile(SourceLocation Loc, const SourceManager &SourceMgr);
328 /// This scans source code, and should not be called when using a preamble.
329 /// Prefer to access the cache in IncludeStructure::isSelfContained if you can.
330 bool isSelfContainedHeader(const FileEntry *FE, FileID ID,
331 const SourceManager &SM, HeaderSearch &HeaderInfo);
333 /// Returns true if Name is reserved, like _Foo or __Vector_base.
334 inline bool isReservedName(llvm::StringRef Name) {
335 // This doesn't catch all cases, but the most common.
336 return Name.size() >= 2 && Name[0] == '_' &&
337 (isUppercase(Name[1]) || Name[1] == '_');
340 } // namespace clangd
341 } // namespace clang
342 #endif