1 //===-- StringRef.cpp - Lightweight String References ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 #include "llvm/ADT/StringRef.h"
11 #include "llvm/ADT/APFloat.h"
12 #include "llvm/ADT/APInt.h"
13 #include "llvm/ADT/Hashing.h"
14 #include "llvm/ADT/StringExtras.h"
15 #include "llvm/ADT/edit_distance.h"
20 // MSVC emits references to this into the translation units which reference it.
22 const size_t StringRef::npos
;
25 // strncasecmp() is not available on non-POSIX systems, so define an
26 // alternative function here.
27 static int ascii_strncasecmp(const char *LHS
, const char *RHS
, size_t Length
) {
28 for (size_t I
= 0; I
< Length
; ++I
) {
29 unsigned char LHC
= toLower(LHS
[I
]);
30 unsigned char RHC
= toLower(RHS
[I
]);
32 return LHC
< RHC
? -1 : 1;
37 /// compare_lower - Compare strings, ignoring case.
38 int StringRef::compare_lower(StringRef RHS
) const {
39 if (int Res
= ascii_strncasecmp(Data
, RHS
.Data
, std::min(Length
, RHS
.Length
)))
41 if (Length
== RHS
.Length
)
43 return Length
< RHS
.Length
? -1 : 1;
46 /// Check if this string starts with the given \p Prefix, ignoring case.
47 bool StringRef::startswith_lower(StringRef Prefix
) const {
48 return Length
>= Prefix
.Length
&&
49 ascii_strncasecmp(Data
, Prefix
.Data
, Prefix
.Length
) == 0;
52 /// Check if this string ends with the given \p Suffix, ignoring case.
53 bool StringRef::endswith_lower(StringRef Suffix
) const {
54 return Length
>= Suffix
.Length
&&
55 ascii_strncasecmp(end() - Suffix
.Length
, Suffix
.Data
, Suffix
.Length
) == 0;
58 size_t StringRef::find_lower(char C
, size_t From
) const {
60 return find_if([L
](char D
) { return toLower(D
) == L
; }, From
);
63 /// compare_numeric - Compare strings, handle embedded numbers.
64 int StringRef::compare_numeric(StringRef RHS
) const {
65 for (size_t I
= 0, E
= std::min(Length
, RHS
.Length
); I
!= E
; ++I
) {
66 // Check for sequences of digits.
67 if (isDigit(Data
[I
]) && isDigit(RHS
.Data
[I
])) {
68 // The longer sequence of numbers is considered larger.
69 // This doesn't really handle prefixed zeros well.
71 for (J
= I
+ 1; J
!= E
+ 1; ++J
) {
72 bool ld
= J
< Length
&& isDigit(Data
[J
]);
73 bool rd
= J
< RHS
.Length
&& isDigit(RHS
.Data
[J
]);
79 // The two number sequences have the same length (J-I), just memcmp them.
80 if (int Res
= compareMemory(Data
+ I
, RHS
.Data
+ I
, J
- I
))
81 return Res
< 0 ? -1 : 1;
82 // Identical number sequences, continue search after the numbers.
86 if (Data
[I
] != RHS
.Data
[I
])
87 return (unsigned char)Data
[I
] < (unsigned char)RHS
.Data
[I
] ? -1 : 1;
89 if (Length
== RHS
.Length
)
91 return Length
< RHS
.Length
? -1 : 1;
94 // Compute the edit distance between the two given strings.
95 unsigned StringRef::edit_distance(llvm::StringRef Other
,
96 bool AllowReplacements
,
97 unsigned MaxEditDistance
) const {
98 return llvm::ComputeEditDistance(
99 makeArrayRef(data(), size()),
100 makeArrayRef(Other
.data(), Other
.size()),
101 AllowReplacements
, MaxEditDistance
);
104 //===----------------------------------------------------------------------===//
106 //===----------------------------------------------------------------------===//
108 std::string
StringRef::lower() const {
109 std::string
Result(size(), char());
110 for (size_type i
= 0, e
= size(); i
!= e
; ++i
) {
111 Result
[i
] = toLower(Data
[i
]);
116 std::string
StringRef::upper() const {
117 std::string
Result(size(), char());
118 for (size_type i
= 0, e
= size(); i
!= e
; ++i
) {
119 Result
[i
] = toUpper(Data
[i
]);
124 //===----------------------------------------------------------------------===//
126 //===----------------------------------------------------------------------===//
129 /// find - Search for the first string \arg Str in the string.
131 /// \return - The index of the first occurrence of \arg Str, or npos if not
133 size_t StringRef::find(StringRef Str
, size_t From
) const {
137 const char *Start
= Data
+ From
;
138 size_t Size
= Length
- From
;
140 const char *Needle
= Str
.data();
141 size_t N
= Str
.size();
147 const char *Ptr
= (const char *)::memchr(Start
, Needle
[0], Size
);
148 return Ptr
== nullptr ? npos
: Ptr
- Data
;
151 const char *Stop
= Start
+ (Size
- N
+ 1);
153 // For short haystacks or unsupported needles fall back to the naive algorithm
154 if (Size
< 16 || N
> 255) {
156 if (std::memcmp(Start
, Needle
, N
) == 0)
159 } while (Start
< Stop
);
163 // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
164 uint8_t BadCharSkip
[256];
165 std::memset(BadCharSkip
, N
, 256);
166 for (unsigned i
= 0; i
!= N
-1; ++i
)
167 BadCharSkip
[(uint8_t)Str
[i
]] = N
-1-i
;
170 uint8_t Last
= Start
[N
- 1];
171 if (LLVM_UNLIKELY(Last
== (uint8_t)Needle
[N
- 1]))
172 if (std::memcmp(Start
, Needle
, N
- 1) == 0)
175 // Otherwise skip the appropriate number of bytes.
176 Start
+= BadCharSkip
[Last
];
177 } while (Start
< Stop
);
182 size_t StringRef::find_lower(StringRef Str
, size_t From
) const {
183 StringRef This
= substr(From
);
184 while (This
.size() >= Str
.size()) {
185 if (This
.startswith_lower(Str
))
187 This
= This
.drop_front();
193 size_t StringRef::rfind_lower(char C
, size_t From
) const {
194 From
= std::min(From
, Length
);
198 if (toLower(Data
[i
]) == toLower(C
))
204 /// rfind - Search for the last string \arg Str in the string.
206 /// \return - The index of the last occurrence of \arg Str, or npos if not
208 size_t StringRef::rfind(StringRef Str
) const {
209 size_t N
= Str
.size();
212 for (size_t i
= Length
- N
+ 1, e
= 0; i
!= e
;) {
214 if (substr(i
, N
).equals(Str
))
220 size_t StringRef::rfind_lower(StringRef Str
) const {
221 size_t N
= Str
.size();
224 for (size_t i
= Length
- N
+ 1, e
= 0; i
!= e
;) {
226 if (substr(i
, N
).equals_lower(Str
))
232 /// find_first_of - Find the first character in the string that is in \arg
233 /// Chars, or npos if not found.
235 /// Note: O(size() + Chars.size())
236 StringRef::size_type
StringRef::find_first_of(StringRef Chars
,
238 std::bitset
<1 << CHAR_BIT
> CharBits
;
239 for (size_type i
= 0; i
!= Chars
.size(); ++i
)
240 CharBits
.set((unsigned char)Chars
[i
]);
242 for (size_type i
= std::min(From
, Length
), e
= Length
; i
!= e
; ++i
)
243 if (CharBits
.test((unsigned char)Data
[i
]))
248 /// find_first_not_of - Find the first character in the string that is not
249 /// \arg C or npos if not found.
250 StringRef::size_type
StringRef::find_first_not_of(char C
, size_t From
) const {
251 for (size_type i
= std::min(From
, Length
), e
= Length
; i
!= e
; ++i
)
257 /// find_first_not_of - Find the first character in the string that is not
258 /// in the string \arg Chars, or npos if not found.
260 /// Note: O(size() + Chars.size())
261 StringRef::size_type
StringRef::find_first_not_of(StringRef Chars
,
263 std::bitset
<1 << CHAR_BIT
> CharBits
;
264 for (size_type i
= 0; i
!= Chars
.size(); ++i
)
265 CharBits
.set((unsigned char)Chars
[i
]);
267 for (size_type i
= std::min(From
, Length
), e
= Length
; i
!= e
; ++i
)
268 if (!CharBits
.test((unsigned char)Data
[i
]))
273 /// find_last_of - Find the last character in the string that is in \arg C,
274 /// or npos if not found.
276 /// Note: O(size() + Chars.size())
277 StringRef::size_type
StringRef::find_last_of(StringRef Chars
,
279 std::bitset
<1 << CHAR_BIT
> CharBits
;
280 for (size_type i
= 0; i
!= Chars
.size(); ++i
)
281 CharBits
.set((unsigned char)Chars
[i
]);
283 for (size_type i
= std::min(From
, Length
) - 1, e
= -1; i
!= e
; --i
)
284 if (CharBits
.test((unsigned char)Data
[i
]))
289 /// find_last_not_of - Find the last character in the string that is not
290 /// \arg C, or npos if not found.
291 StringRef::size_type
StringRef::find_last_not_of(char C
, size_t From
) const {
292 for (size_type i
= std::min(From
, Length
) - 1, e
= -1; i
!= e
; --i
)
298 /// find_last_not_of - Find the last character in the string that is not in
299 /// \arg Chars, or npos if not found.
301 /// Note: O(size() + Chars.size())
302 StringRef::size_type
StringRef::find_last_not_of(StringRef Chars
,
304 std::bitset
<1 << CHAR_BIT
> CharBits
;
305 for (size_type i
= 0, e
= Chars
.size(); i
!= e
; ++i
)
306 CharBits
.set((unsigned char)Chars
[i
]);
308 for (size_type i
= std::min(From
, Length
) - 1, e
= -1; i
!= e
; --i
)
309 if (!CharBits
.test((unsigned char)Data
[i
]))
314 void StringRef::split(SmallVectorImpl
<StringRef
> &A
,
315 StringRef Separator
, int MaxSplit
,
316 bool KeepEmpty
) const {
319 // Count down from MaxSplit. When MaxSplit is -1, this will just split
320 // "forever". This doesn't support splitting more than 2^31 times
321 // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
322 // but that seems unlikely to be useful.
323 while (MaxSplit
-- != 0) {
324 size_t Idx
= S
.find(Separator
);
329 if (KeepEmpty
|| Idx
> 0)
330 A
.push_back(S
.slice(0, Idx
));
333 S
= S
.slice(Idx
+ Separator
.size(), npos
);
337 if (KeepEmpty
|| !S
.empty())
341 void StringRef::split(SmallVectorImpl
<StringRef
> &A
, char Separator
,
342 int MaxSplit
, bool KeepEmpty
) const {
345 // Count down from MaxSplit. When MaxSplit is -1, this will just split
346 // "forever". This doesn't support splitting more than 2^31 times
347 // intentionally; if we ever want that we can make MaxSplit a 64-bit integer
348 // but that seems unlikely to be useful.
349 while (MaxSplit
-- != 0) {
350 size_t Idx
= S
.find(Separator
);
355 if (KeepEmpty
|| Idx
> 0)
356 A
.push_back(S
.slice(0, Idx
));
359 S
= S
.slice(Idx
+ 1, npos
);
363 if (KeepEmpty
|| !S
.empty())
367 //===----------------------------------------------------------------------===//
368 // Helpful Algorithms
369 //===----------------------------------------------------------------------===//
371 /// count - Return the number of non-overlapped occurrences of \arg Str in
373 size_t StringRef::count(StringRef Str
) const {
375 size_t N
= Str
.size();
378 for (size_t i
= 0, e
= Length
- N
+ 1; i
!= e
; ++i
)
379 if (substr(i
, N
).equals(Str
))
384 static unsigned GetAutoSenseRadix(StringRef
&Str
) {
388 if (Str
.startswith("0x") || Str
.startswith("0X")) {
393 if (Str
.startswith("0b") || Str
.startswith("0B")) {
398 if (Str
.startswith("0o")) {
403 if (Str
[0] == '0' && Str
.size() > 1 && isDigit(Str
[1])) {
411 bool llvm::consumeUnsignedInteger(StringRef
&Str
, unsigned Radix
,
412 unsigned long long &Result
) {
413 // Autosense radix if not specified.
415 Radix
= GetAutoSenseRadix(Str
);
417 // Empty strings (after the radix autosense) are invalid.
418 if (Str
.empty()) return true;
420 // Parse all the bytes of the string given this radix. Watch for overflow.
421 StringRef Str2
= Str
;
423 while (!Str2
.empty()) {
425 if (Str2
[0] >= '0' && Str2
[0] <= '9')
426 CharVal
= Str2
[0] - '0';
427 else if (Str2
[0] >= 'a' && Str2
[0] <= 'z')
428 CharVal
= Str2
[0] - 'a' + 10;
429 else if (Str2
[0] >= 'A' && Str2
[0] <= 'Z')
430 CharVal
= Str2
[0] - 'A' + 10;
434 // If the parsed value is larger than the integer radix, we cannot
435 // consume any more characters.
436 if (CharVal
>= Radix
)
439 // Add in this character.
440 unsigned long long PrevResult
= Result
;
441 Result
= Result
* Radix
+ CharVal
;
443 // Check for overflow by shifting back and seeing if bits were lost.
444 if (Result
/ Radix
< PrevResult
)
447 Str2
= Str2
.substr(1);
450 // We consider the operation a failure if no characters were consumed
452 if (Str
.size() == Str2
.size())
459 bool llvm::consumeSignedInteger(StringRef
&Str
, unsigned Radix
,
461 unsigned long long ULLVal
;
463 // Handle positive strings first.
464 if (Str
.empty() || Str
.front() != '-') {
465 if (consumeUnsignedInteger(Str
, Radix
, ULLVal
) ||
466 // Check for value so large it overflows a signed value.
467 (long long)ULLVal
< 0)
473 // Get the positive part of the value.
474 StringRef Str2
= Str
.drop_front(1);
475 if (consumeUnsignedInteger(Str2
, Radix
, ULLVal
) ||
476 // Reject values so large they'd overflow as negative signed, but allow
477 // "-0". This negates the unsigned so that the negative isn't undefined
478 // on signed overflow.
479 (long long)-ULLVal
> 0)
487 /// GetAsUnsignedInteger - Workhorse method that converts a integer character
488 /// sequence of radix up to 36 to an unsigned long long value.
489 bool llvm::getAsUnsignedInteger(StringRef Str
, unsigned Radix
,
490 unsigned long long &Result
) {
491 if (consumeUnsignedInteger(Str
, Radix
, Result
))
494 // For getAsUnsignedInteger, we require the whole string to be consumed or
495 // else we consider it a failure.
499 bool llvm::getAsSignedInteger(StringRef Str
, unsigned Radix
,
501 if (consumeSignedInteger(Str
, Radix
, Result
))
504 // For getAsSignedInteger, we require the whole string to be consumed or else
505 // we consider it a failure.
509 bool StringRef::getAsInteger(unsigned Radix
, APInt
&Result
) const {
510 StringRef Str
= *this;
512 // Autosense radix if not specified.
514 Radix
= GetAutoSenseRadix(Str
);
516 assert(Radix
> 1 && Radix
<= 36);
518 // Empty strings (after the radix autosense) are invalid.
519 if (Str
.empty()) return true;
521 // Skip leading zeroes. This can be a significant improvement if
522 // it means we don't need > 64 bits.
523 while (!Str
.empty() && Str
.front() == '0')
526 // If it was nothing but zeroes....
528 Result
= APInt(64, 0);
532 // (Over-)estimate the required number of bits.
533 unsigned Log2Radix
= 0;
534 while ((1U << Log2Radix
) < Radix
) Log2Radix
++;
535 bool IsPowerOf2Radix
= ((1U << Log2Radix
) == Radix
);
537 unsigned BitWidth
= Log2Radix
* Str
.size();
538 if (BitWidth
< Result
.getBitWidth())
539 BitWidth
= Result
.getBitWidth(); // don't shrink the result
540 else if (BitWidth
> Result
.getBitWidth())
541 Result
= Result
.zext(BitWidth
);
543 APInt RadixAP
, CharAP
; // unused unless !IsPowerOf2Radix
544 if (!IsPowerOf2Radix
) {
545 // These must have the same bit-width as Result.
546 RadixAP
= APInt(BitWidth
, Radix
);
547 CharAP
= APInt(BitWidth
, 0);
550 // Parse all the bytes of the string given this radix.
552 while (!Str
.empty()) {
554 if (Str
[0] >= '0' && Str
[0] <= '9')
555 CharVal
= Str
[0]-'0';
556 else if (Str
[0] >= 'a' && Str
[0] <= 'z')
557 CharVal
= Str
[0]-'a'+10;
558 else if (Str
[0] >= 'A' && Str
[0] <= 'Z')
559 CharVal
= Str
[0]-'A'+10;
563 // If the parsed value is larger than the integer radix, the string is
565 if (CharVal
>= Radix
)
568 // Add in this character.
569 if (IsPowerOf2Radix
) {
570 Result
<<= Log2Radix
;
584 bool StringRef::getAsDouble(double &Result
, bool AllowInexact
) const {
586 APFloat::opStatus Status
=
587 F
.convertFromString(*this, APFloat::rmNearestTiesToEven
);
588 if (Status
!= APFloat::opOK
) {
589 if (!AllowInexact
|| !(Status
& APFloat::opInexact
))
593 Result
= F
.convertToDouble();
597 // Implementation of StringRef hashing.
598 hash_code
llvm::hash_value(StringRef S
) {
599 return hash_combine_range(S
.begin(), S
.end());