1 //===-- SpecialCaseList.h - special case list for sanitizers ----*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //===----------------------------------------------------------------------===//
8 // This is a utility class used to parse user-provided text files with
9 // "special case lists" for code sanitizers. Such files are used to
10 // define an "ABI list" for DataFlowSanitizer and blacklists for sanitizers
11 // like AddressSanitizer or UndefinedBehaviorSanitizer.
13 // Empty lines and lines starting with "#" are ignored. Sections are defined
14 // using a '[section_name]' header and can be used to specify sanitizers the
15 // entries below it apply to. Section names are regular expressions, and
16 // entries without a section header match all sections (e.g. an '[*]' header
18 // The remaining lines should have the form:
19 // prefix:wildcard_expression[=category]
20 // If category is not specified, it is assumed to be empty string.
21 // Definitions of "prefix" and "category" are sanitizer-specific. For example,
22 // sanitizer blacklists support prefixes "src", "fun" and "global".
23 // Wildcard expressions define, respectively, source files, functions or
24 // globals which shouldn't be instrumented.
25 // Examples of categories:
26 // "functional": used in DFSan to list functions with pure functional
28 // "init": used in ASan blacklist to disable initialization-order bugs
29 // detection for certain globals or source files.
30 // Full special case list file example:
33 // # Blacklisted items:
34 // fun:*_ZN4base6subtle*
35 // global:*global_with_bad_access_or_initialization*
36 // global:*global_with_initialization_issues*=init
37 // type:*Namespace::ClassName*=init
38 // src:file_with_tricky_code.cc
39 // src:ignore-global-initializers-issues.cc=init
42 // # Functions with pure functional semantics:
46 // Note that the wild card is in fact an llvm::Regex, but * is automatically
49 //===----------------------------------------------------------------------===//
51 #ifndef LLVM_SUPPORT_SPECIALCASELIST_H
52 #define LLVM_SUPPORT_SPECIALCASELIST_H
54 #include "llvm/ADT/StringMap.h"
55 #include "llvm/ADT/StringSet.h"
56 #include "llvm/Support/Regex.h"
57 #include "llvm/Support/TrigramIndex.h"
66 class SpecialCaseList
{
68 /// Parses the special case list entries from files. On failure, returns
69 /// 0 and writes an error message to string.
70 static std::unique_ptr
<SpecialCaseList
>
71 create(const std::vector
<std::string
> &Paths
, std::string
&Error
);
72 /// Parses the special case list from a memory buffer. On failure, returns
73 /// 0 and writes an error message to string.
74 static std::unique_ptr
<SpecialCaseList
> create(const MemoryBuffer
*MB
,
76 /// Parses the special case list entries from files. On failure, reports a
78 static std::unique_ptr
<SpecialCaseList
>
79 createOrDie(const std::vector
<std::string
> &Paths
);
83 /// Returns true, if special case list contains a line
85 /// @Prefix:<E>=@Category
87 /// where @Query satisfies wildcard expression <E> in a given @Section.
88 bool inSection(StringRef Section
, StringRef Prefix
, StringRef Query
,
89 StringRef Category
= StringRef()) const;
91 /// Returns the line number corresponding to the special case list entry if
92 /// the special case list contains a line
94 /// @Prefix:<E>=@Category
96 /// where @Query satisfies wildcard expression <E> in a given @Section.
97 /// Returns zero if there is no blacklist entry corresponding to this
99 unsigned inSectionBlame(StringRef Section
, StringRef Prefix
, StringRef Query
,
100 StringRef Category
= StringRef()) const;
103 // Implementations of the create*() functions that can also be used by derived
105 bool createInternal(const std::vector
<std::string
> &Paths
,
107 bool createInternal(const MemoryBuffer
*MB
, std::string
&Error
);
109 SpecialCaseList() = default;
110 SpecialCaseList(SpecialCaseList
const &) = delete;
111 SpecialCaseList
&operator=(SpecialCaseList
const &) = delete;
113 /// Represents a set of regular expressions. Regular expressions which are
114 /// "literal" (i.e. no regex metacharacters) are stored in Strings. The
115 /// reason for doing so is efficiency; StringMap is much faster at matching
116 /// literal strings than Regex.
119 bool insert(std::string Regexp
, unsigned LineNumber
, std::string
&REError
);
120 // Returns the line number in the source file that this query matches to.
121 // Returns zero if no match is found.
122 unsigned match(StringRef Query
) const;
125 StringMap
<unsigned> Strings
;
126 TrigramIndex Trigrams
;
127 std::vector
<std::pair
<std::unique_ptr
<Regex
>, unsigned>> RegExes
;
130 using SectionEntries
= StringMap
<StringMap
<Matcher
>>;
133 Section(std::unique_ptr
<Matcher
> M
) : SectionMatcher(std::move(M
)){};
135 std::unique_ptr
<Matcher
> SectionMatcher
;
136 SectionEntries Entries
;
139 std::vector
<Section
> Sections
;
141 /// Parses just-constructed SpecialCaseList entries from a memory buffer.
142 bool parse(const MemoryBuffer
*MB
, StringMap
<size_t> &SectionsMap
,
145 // Helper method for derived classes to search by Prefix, Query, and Category
146 // once they have already resolved a section entry.
147 unsigned inSectionBlame(const SectionEntries
&Entries
, StringRef Prefix
,
148 StringRef Query
, StringRef Category
) const;
153 #endif // LLVM_SUPPORT_SPECIALCASELIST_H