1 /* SPDX-FileCopyrightText: 2023 Blender Authors
3 * SPDX-License-Identifier: GPL-2.0-or-later */
10 * A `blender::StringRef` references a const char array owned by someone else. It is just a pointer
11 * and a size. Since the memory is not owned, StringRef should not be used to transfer ownership of
12 * the string. The data referenced by a StringRef cannot be mutated through it.
14 * A StringRef is NOT null-terminated. This makes it much more powerful within C++, because we can
15 * also cut off parts of the end without creating a copy. When interfacing with C code that expects
16 * null-terminated strings, `blender::StringRefNull` can be used. It is essentially the same as
17 * StringRef, but with the restriction that the string has to be null-terminated.
19 * Whenever possible, string parameters should be of type StringRef and the string return type
20 * should be StringRefNull. Don't forget that the StringRefNull does not own the string, so don't
21 * return it when the string exists only in the scope of the function. This convention makes
22 * functions usable in the most contexts.
24 * blender::StringRef vs. std::string_view:
25 * Both types are certainly very similar. The main benefit of using StringRef in Blender is that
26 * this allows us to add convenience methods at any time. Especially, when doing a lot of string
27 * manipulation, this helps to keep the code clean. Furthermore, we need StringRefNull anyway,
28 * because there is a lot of C code that expects null-terminated strings. Conversion between
29 * StringRef and string_view is very cheap and can be done at api boundaries at essentially no
30 * cost. Another benefit of using StringRef is that it uses signed integers, thus developers
31 * have to deal less with issues resulting from unsigned integers.
36 #include <string_view>
38 #include "BLI_span.hh"
45 * A common base class for StringRef and StringRefNull. This should never be used in other files.
46 * It only exists to avoid some code duplication.
53 constexpr StringRefBase(const char *data
, int64_t size
);
56 /* Similar to string_view::npos, but signed. */
57 static constexpr int64_t not_found
= -1;
59 constexpr int64_t size() const;
60 constexpr bool is_empty() const;
61 constexpr const char *data() const;
62 constexpr operator Span
<char>() const;
64 operator std::string() const;
65 constexpr operator std::string_view() const;
67 constexpr const char *begin() const;
68 constexpr const char *end() const;
70 constexpr IndexRange
index_range() const;
73 * Copy the string into a char array. The copied string will be null-terminated. If it does not
74 * fit, it will be truncated while keeping it valid utf-8 (assuming the #StringRef itself is
77 void copy_utf8_truncated(char *dst
, int64_t dst_size
) const;
78 template<size_t N
> void copy_utf8_truncated(char (&dst
)[N
]) const;
81 * Copy the string into a buffer. The buffer has to be one byte larger than the size of the
82 * string, because the copied string will be null-terminated. Only use this when you are
83 * absolutely sure that the buffer is large enough.
85 void copy_unsafe(char *dst
) const;
87 constexpr bool startswith(StringRef prefix
) const;
88 constexpr bool endswith(StringRef suffix
) const;
89 constexpr StringRef
substr(int64_t start
, int64_t size
) const;
91 constexpr const char &front() const;
92 constexpr const char &back() const;
95 * The behavior of those functions matches the standard library implementation of
98 constexpr int64_t find(char c
, int64_t pos
= 0) const;
99 constexpr int64_t find(StringRef str
, int64_t pos
= 0) const;
100 constexpr int64_t rfind(char c
, int64_t pos
= INT64_MAX
) const;
101 constexpr int64_t rfind(StringRef str
, int64_t pos
= INT64_MAX
) const;
102 constexpr int64_t find_first_of(StringRef chars
, int64_t pos
= 0) const;
103 constexpr int64_t find_first_of(char c
, int64_t pos
= 0) const;
104 constexpr int64_t find_last_of(StringRef chars
, int64_t pos
= INT64_MAX
) const;
105 constexpr int64_t find_last_of(char c
, int64_t pos
= INT64_MAX
) const;
106 constexpr int64_t find_first_not_of(StringRef chars
, int64_t pos
= 0) const;
107 constexpr int64_t find_first_not_of(char c
, int64_t pos
= 0) const;
108 constexpr int64_t find_last_not_of(StringRef chars
, int64_t pos
= INT64_MAX
) const;
109 constexpr int64_t find_last_not_of(char c
, int64_t pos
= INT64_MAX
) const;
111 constexpr StringRef
trim() const;
112 constexpr StringRef
trim(StringRef characters_to_remove
) const;
113 constexpr StringRef
trim(char character_to_remove
) const;
117 * References a null-terminated const char array.
119 * StringRefNull can be compared with StringRef and StringRefNull.
121 class StringRefNull
: public StringRefBase
{
124 constexpr StringRefNull();
125 constexpr StringRefNull(const char *str
, int64_t size
);
126 StringRefNull(std::nullptr_t
) = delete;
127 StringRefNull(const char *str
);
128 StringRefNull(const std::string
&str
);
130 constexpr char operator[](int64_t index
) const;
131 constexpr const char *c_str() const;
135 * References a const char array. It might not be null terminated.
137 * StringRef can be compared with StringRef and StringRefNull.
139 class StringRef
: public StringRefBase
{
141 constexpr StringRef();
142 constexpr StringRef(StringRefNull other
);
143 constexpr StringRef(const char *str
);
144 constexpr StringRef(const char *str
, int64_t length
);
145 constexpr StringRef(const char *begin
, const char *one_after_end
);
146 constexpr StringRef(std::string_view view
);
147 StringRef(const std::string
&str
);
149 constexpr StringRef
drop_prefix(int64_t n
) const;
150 constexpr StringRef
drop_known_prefix(StringRef prefix
) const;
151 constexpr StringRef
drop_suffix(int64_t n
) const;
153 constexpr char operator[](int64_t index
) const;
156 /* -------------------------------------------------------------------- */
157 /** \name #StringRefBase Inline Methods
160 constexpr StringRefBase::StringRefBase(const char *data
, const int64_t size
)
161 : data_(data
), size_(size
)
166 * Return the (byte-)length of the referenced string, without any null-terminator.
168 constexpr int64_t StringRefBase::size() const
173 constexpr bool StringRefBase::is_empty() const
179 * Return a pointer to the start of the string.
181 constexpr const char *StringRefBase::data() const
186 constexpr StringRefBase::operator Span
<char>() const
188 return Span
<char>(data_
, size_
);
192 * Implicitly convert to std::string. This is convenient in most cases, but you have to be a bit
193 * careful not to convert to std::string accidentally.
195 inline StringRefBase::operator std::string() const
197 return std::string(data_
, size_t(size_
));
200 constexpr StringRefBase::operator std::string_view() const
202 return std::string_view(data_
, size_t(size_
));
205 constexpr const char *StringRefBase::begin() const
210 constexpr const char *StringRefBase::end() const
212 return data_
+ size_
;
215 constexpr IndexRange
StringRefBase::index_range() const
217 return IndexRange(size_
);
220 inline void StringRefBase::copy_unsafe(char *dst
) const
223 memcpy(dst
, data_
, size_t(size_
));
228 template<size_t N
> inline void StringRefBase::copy_utf8_truncated(char (&dst
)[N
]) const
230 this->copy_utf8_truncated(dst
, N
);
234 * Return true when the string starts with the given prefix.
236 constexpr bool StringRefBase::startswith(StringRef prefix
) const
238 if (size_
< prefix
.size_
) {
241 for (int64_t i
= 0; i
< prefix
.size_
; i
++) {
242 if (data_
[i
] != prefix
.data_
[i
]) {
250 * Return true when the string ends with the given suffix.
252 constexpr bool StringRefBase::endswith(StringRef suffix
) const
254 if (size_
< suffix
.size_
) {
257 const int64_t offset
= size_
- suffix
.size_
;
258 for (int64_t i
= 0; i
< suffix
.size_
; i
++) {
259 if (data_
[offset
+ i
] != suffix
.data_
[i
]) {
267 * Return a new #StringRef containing only a sub-string of the original string. This invokes
268 * undefined if the start or max_size is negative.
270 constexpr StringRef
StringRefBase::substr(const int64_t start
,
271 const int64_t max_size
= INT64_MAX
) const
273 BLI_assert(max_size
>= 0);
274 BLI_assert(start
>= 0);
275 const int64_t substr_size
= std::min(max_size
, size_
- start
);
276 return StringRef(data_
+ start
, substr_size
);
280 * Get the first char in the string. This invokes undefined behavior when the string is empty.
282 constexpr const char &StringRefBase::front() const
284 BLI_assert(size_
>= 1);
289 * Get the last char in the string. This invokes undefined behavior when the string is empty.
291 constexpr const char &StringRefBase::back() const
293 BLI_assert(size_
>= 1);
294 return data_
[size_
- 1];
297 constexpr int64_t index_or_npos_to_int64(size_t index
)
299 /* The compiler will probably optimize this check away. */
300 if (index
== std::string_view::npos
) {
301 return StringRef::not_found
;
303 return int64_t(index
);
306 constexpr int64_t StringRefBase::find(char c
, int64_t pos
) const
308 BLI_assert(pos
>= 0);
309 return index_or_npos_to_int64(std::string_view(*this).find(c
, size_t(pos
)));
312 constexpr int64_t StringRefBase::find(StringRef str
, int64_t pos
) const
314 BLI_assert(pos
>= 0);
315 return index_or_npos_to_int64(std::string_view(*this).find(str
, size_t(pos
)));
318 constexpr int64_t StringRefBase::rfind(char c
, int64_t pos
) const
320 BLI_assert(pos
>= 0);
321 return index_or_npos_to_int64(std::string_view(*this).rfind(c
, size_t(pos
)));
324 constexpr int64_t StringRefBase::rfind(StringRef str
, int64_t pos
) const
326 BLI_assert(pos
>= 0);
327 return index_or_npos_to_int64(std::string_view(*this).rfind(str
, size_t(pos
)));
330 constexpr int64_t StringRefBase::find_first_of(StringRef chars
, int64_t pos
) const
332 BLI_assert(pos
>= 0);
333 return index_or_npos_to_int64(std::string_view(*this).find_first_of(chars
, size_t(pos
)));
336 constexpr int64_t StringRefBase::find_first_of(char c
, int64_t pos
) const
338 BLI_assert(pos
>= 0);
339 return index_or_npos_to_int64(std::string_view(*this).find_first_of(c
, size_t(pos
)));
342 constexpr int64_t StringRefBase::find_last_of(StringRef chars
, int64_t pos
) const
344 BLI_assert(pos
>= 0);
345 return index_or_npos_to_int64(std::string_view(*this).find_last_of(chars
, size_t(pos
)));
348 constexpr int64_t StringRefBase::find_last_of(char c
, int64_t pos
) const
350 BLI_assert(pos
>= 0);
351 return index_or_npos_to_int64(std::string_view(*this).find_last_of(c
, size_t(pos
)));
354 constexpr int64_t StringRefBase::find_first_not_of(StringRef chars
, int64_t pos
) const
356 BLI_assert(pos
>= 0);
357 return index_or_npos_to_int64(std::string_view(*this).find_first_not_of(chars
, size_t(pos
)));
360 constexpr int64_t StringRefBase::find_first_not_of(char c
, int64_t pos
) const
362 BLI_assert(pos
>= 0);
363 return index_or_npos_to_int64(std::string_view(*this).find_first_not_of(c
, size_t(pos
)));
366 constexpr int64_t StringRefBase::find_last_not_of(StringRef chars
, int64_t pos
) const
368 BLI_assert(pos
>= 0);
369 return index_or_npos_to_int64(std::string_view(*this).find_last_not_of(chars
, size_t(pos
)));
372 constexpr int64_t StringRefBase::find_last_not_of(char c
, int64_t pos
) const
374 BLI_assert(pos
>= 0);
375 return index_or_npos_to_int64(std::string_view(*this).find_last_not_of(c
, size_t(pos
)));
378 constexpr StringRef
StringRefBase::trim() const
380 return this->trim(" \t\r\n");
384 * Return a new StringRef that does not contain leading and trailing white-space.
386 constexpr StringRef
StringRefBase::trim(const char character_to_remove
) const
388 return this->trim(StringRef(&character_to_remove
, 1));
392 * Return a new StringRef that removes all the leading and trailing characters
393 * that occur in `characters_to_remove`.
395 constexpr StringRef
StringRefBase::trim(StringRef characters_to_remove
) const
397 const int64_t find_front
= this->find_first_not_of(characters_to_remove
);
398 if (find_front
== not_found
) {
401 const int64_t find_end
= this->find_last_not_of(characters_to_remove
);
402 /* `find_end` cannot be `not_found`, because that means the string is only
403 * `characters_to_remove`, in which case `find_front` would already have
404 * been `not_found`. */
405 BLI_assert_msg(find_end
!= not_found
,
406 "forward search found characters-to-not-remove, but backward search did not");
407 const int64_t substr_len
= find_end
- find_front
+ 1;
408 return this->substr(find_front
, substr_len
);
413 /* -------------------------------------------------------------------- */
414 /** \name #StringRefNull Inline Methods
417 constexpr StringRefNull::StringRefNull() : StringRefBase("", 0) {}
420 * Construct a StringRefNull from a null terminated c-string. This invokes undefined behavior
421 * when the given size is not the correct size of the string.
423 constexpr StringRefNull::StringRefNull(const char *str
, const int64_t size
)
424 : StringRefBase(str
, size
)
426 BLI_assert(int64_t(std::char_traits
<char>::length(str
)) == size
);
430 * Construct a StringRefNull from a null terminated c-string. The pointer must not point to
433 inline StringRefNull::StringRefNull(const char *str
) : StringRefBase(str
, int64_t(strlen(str
)))
435 BLI_assert(str
!= nullptr);
436 BLI_assert(data_
[size_
] == '\0');
440 * Reference a std::string. Remember that when the std::string is destructed, the StringRefNull
441 * will point to uninitialized memory.
443 inline StringRefNull::StringRefNull(const std::string
&str
)
444 : StringRefNull(str
.c_str(), int64_t(str
.size()))
449 * Get the char at the given index.
451 constexpr char StringRefNull::operator[](const int64_t index
) const
453 BLI_assert(index
>= 0);
454 /* Use '<=' instead of just '<', so that the null character can be accessed as well. */
455 BLI_assert(index
<= size_
);
460 * Returns the beginning of a null-terminated char array.
462 * This is like ->data(), but can only be called on a StringRefNull.
464 constexpr const char *StringRefNull::c_str() const
471 /* -------------------------------------------------------------------- */
472 /** \name #StringRef Inline Methods
475 constexpr StringRef::StringRef() : StringRefBase(nullptr, 0) {}
478 * StringRefNull can be converted into StringRef, but not the other way around.
480 constexpr StringRef::StringRef(StringRefNull other
) : StringRefBase(other
.data(), other
.size()) {}
483 * Create a StringRef from a null-terminated c-string.
485 constexpr StringRef::StringRef(const char *str
)
486 : StringRefBase(str
, str
? int64_t(std::char_traits
<char>::length(str
)) : 0)
490 constexpr StringRef::StringRef(const char *str
, const int64_t length
) : StringRefBase(str
, length
)
495 * Returns a new StringRef that does not contain the first n chars. This invokes undefined
496 * behavior when n is negative.
498 constexpr StringRef
StringRef::drop_prefix(const int64_t n
) const
501 const int64_t clamped_n
= std::min(n
, size_
);
502 const int64_t new_size
= size_
- clamped_n
;
503 return StringRef(data_
+ clamped_n
, new_size
);
507 * Return a new StringRef with the given prefix being skipped. This invokes undefined behavior if
508 * the string does not begin with the given prefix.
510 constexpr StringRef
StringRef::drop_known_prefix(StringRef prefix
) const
512 BLI_assert(this->startswith(prefix
));
513 return this->drop_prefix(prefix
.size());
517 * Return a new StringRef that does not contain the last n chars. This invokes undefined behavior
518 * when n is negative.
520 constexpr StringRef
StringRef::drop_suffix(const int64_t n
) const
523 const int64_t new_size
= std::max
<int64_t>(0, size_
- n
);
524 return StringRef(data_
, new_size
);
528 * Get the char at the given index.
530 constexpr char StringRef::operator[](int64_t index
) const
532 BLI_assert(index
>= 0);
533 BLI_assert(index
< size_
);
538 * Create a StringRef from a start and end pointer. This invokes undefined behavior when the
539 * second point points to a smaller address than the first one.
541 constexpr StringRef::StringRef(const char *begin
, const char *one_after_end
)
542 : StringRefBase(begin
, int64_t(one_after_end
- begin
))
544 BLI_assert(begin
<= one_after_end
);
548 * Reference a std::string. Remember that when the std::string is destructed, the StringRef
549 * will point to uninitialized memory.
551 inline StringRef::StringRef(const std::string
&str
)
552 : StringRefBase(str
.data(), int64_t(str
.size()))
556 constexpr StringRef::StringRef(std::string_view view
)
557 : StringRefBase(view
.data(), int64_t(view
.size()))
563 /* -------------------------------------------------------------------- */
564 /** \name Operator Overloads
567 std::ostream
&operator<<(std::ostream
&stream
, StringRef ref
);
568 std::ostream
&operator<<(std::ostream
&stream
, StringRefNull ref
);
571 * Adding two #StringRefs will allocate an std::string.
572 * This is not efficient, but convenient in most cases.
574 inline std::string
operator+(StringRef a
, StringRef b
)
576 return std::string(a
) + std::string(b
);
579 /* This does not compare StringRef and std::string_view, because of ambiguous overloads. This is
580 * not a problem when std::string_view is only used at api boundaries. To compare a StringRef and a
581 * std::string_view, one should convert the std::string_view to StringRef (which is very cheap).
582 * Ideally, we only use StringRef in our code to avoid this problem altogether.
584 * NOTE: these functions are also suitable for StringRefNull comparisons, as these are
585 * implicitly converted to StringRef by the compiler. */
586 constexpr bool operator==(StringRef a
, StringRef b
)
588 return std::string_view(a
) == std::string_view(b
);
591 constexpr bool operator!=(StringRef a
, StringRef b
)
593 return std::string_view(a
) != std::string_view(b
);
596 constexpr bool operator<(StringRef a
, StringRef b
)
598 return std::string_view(a
) < std::string_view(b
);
601 constexpr bool operator>(StringRef a
, StringRef b
)
603 return std::string_view(a
) > std::string_view(b
);
606 constexpr bool operator<=(StringRef a
, StringRef b
)
608 return std::string_view(a
) <= std::string_view(b
);
611 constexpr bool operator>=(StringRef a
, StringRef b
)
613 return std::string_view(a
) >= std::string_view(b
);
618 /* -------------------------------------------------------------------- */
623 * Support using the `fmt` library with #StringRef and implicitly also #StringRefNull.
625 inline std::string_view
format_as(StringRef str
)
632 } // namespace blender