Remove unused function generate_excls and make clean_excls static
[gromacs.git] / src / gromacs / utility / stringutil.h
blob4a5f4f474989e1e4ad081c9b362eff7d61320784
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 /*! \file
36 * \brief
37 * Declares common string utility and formatting routines.
39 * \author Teemu Murtola <teemu.murtola@gmail.com>
40 * \inpublicapi
41 * \ingroup module_utility
43 #ifndef GMX_UTILITY_STRINGUTIL_H
44 #define GMX_UTILITY_STRINGUTIL_H
46 #include <cstdarg>
47 #include <cstring>
49 #include <string>
50 #include <vector>
52 namespace gmx
55 //! \addtogroup module_utility
56 //! \{
58 /*! \brief
59 * Tests whether a string is null or empty.
61 * Does not throw.
63 static inline bool isNullOrEmpty(const char *str)
65 return str == nullptr || str[0] == '\0';
68 /*! \brief
69 * Tests whether a string starts with another string.
71 * \param[in] str String to process.
72 * \param[in] prefix Prefix to find.
73 * \returns true if \p str starts with \p prefix.
75 * Returns true if \p prefix is empty.
76 * Does not throw.
78 static inline bool startsWith(const std::string &str, const std::string &prefix)
80 return str.compare(0, prefix.length(), prefix) == 0;
82 //! \copydoc startsWith(const std::string &, const std::string &)
83 static inline bool startsWith(const char *str, const char *prefix)
85 return std::strncmp(str, prefix, std::strlen(prefix)) == 0;
88 /*! \brief
89 * Tests whether a string ends with another string.
91 * \param[in] str String to process.
92 * \param[in] suffix Suffix to find.
93 * \returns true if \p str ends with \p suffix.
95 * Returns true if \p suffix is NULL or empty.
96 * Does not throw.
98 bool endsWith(const char *str, const char *suffix);
99 //! \copydoc endsWith(const char *, const char *)
100 static inline bool endsWith(const std::string &str, const char *suffix)
102 return endsWith(str.c_str(), suffix);
105 /*! \brief
106 * Tests whether a string contains another as a substring.
108 * \param[in] str String to process.
109 * \param[in] substr Substring to find.
110 * \returns true if \p str contains \p substr.
112 * Does not throw.
114 static inline bool contains(const std::string &str, const char *substr)
116 return str.find(substr) != std::string::npos;
118 //! \copydoc contains(const std::string &str, const char *substr)
119 static inline bool contains(const std::string &str, const std::string &substr)
121 return str.find(substr) != std::string::npos;
124 /*!\brief Returns number of space-separated words in zero-terminated char ptr
126 * \param s Character pointer to zero-terminated, which will not be changed.
128 * \returns number of words in string.
130 * \note This routine is mainly meant to support legacy code in GROMACS. For
131 * new source you should try hard to use C++ string objects instead.
133 std::size_t
134 countWords(const char *s);
136 /*!\brief Returns the number of space-separated words in a string object
138 * \param str Reference to string object, which will not be changed.
140 * \returns number of words in string.
142 std::size_t
143 countWords(const std::string &str);
145 //! \copydoc endsWith(const std::string &str, const char *suffix)
146 static inline bool endsWith(const std::string &str, const std::string &suffix)
148 return endsWith(str, suffix.c_str());
151 /*! \brief
152 * Removes a suffix from a string.
154 * \param[in] str String to process.
155 * \param[in] suffix Suffix to remove.
156 * \returns \p str with \p suffix removed, or \p str unmodified if it does
157 * not end with \p suffix.
158 * \throws std::bad_alloc if out of memory.
160 * Returns \p str if \p suffix is NULL or empty.
162 std::string stripSuffixIfPresent(const std::string &str, const char *suffix);
163 /*! \brief
164 * Removes leading and trailing whitespace from a string.
166 * \param[in] str String to process.
167 * \returns \p str with leading and trailing whitespaces removed.
168 * \throws std::bad_alloc if out of memory.
170 std::string stripString(const std::string &str);
171 #ifdef __GNUC__
172 #define gmx_format(archetype, string_index, first_to_check) \
173 __attribute__ ((format (archetype, string_index, first_to_check)))
174 #else
175 /*! \brief GCC like function format attribute
177 * The format attribute specifies that a function takes printf, scanf, ...
178 * style arguments that should be type-checked against a format string.
179 * The attribute has to be placed after the function.
180 * This attribute is only valid for function declarations and not function
181 * definitions (GCC limitation). For member functions the implicit `this`
182 * pointer is included in the argument count.
184 #define gmx_format(archetype, string_index, first_to_check)
185 #endif
186 #ifdef _MSC_VER
187 #define gmx_fmtstr _In_ _Printf_format_string_
188 #else
189 /*! \brief MSVC like function format attribute
191 * Does type checking for printf like format strings in MSVC style.
192 * Attribute has to be placed before format string.
194 #define gmx_fmtstr
195 #endif
196 /*! \brief
197 * Formats a string (snprintf() wrapper).
199 * \throws std::bad_alloc if out of memory.
201 * This function works like sprintf(), except that it returns an std::string
202 * instead of requiring a preallocated buffer. Arbitrary length output is
203 * supported.
205 std::string formatString(gmx_fmtstr const char *fmt, ...) gmx_format(printf, 1, 2);
207 /*! \brief
208 * Formats a string (vsnprintf() wrapper).
210 * \throws std::bad_alloc if out of memory.
212 * This function works like vsprintf(), except that it returns an std::string
213 * instead of requiring a preallocated buffer. Arbitrary length output is
214 * supported.
216 std::string formatStringV(const char *fmt, va_list ap);
218 /*! \brief Function object that wraps a call to formatString() that
219 * expects a single conversion argument, for use with algorithms. */
220 class StringFormatter
222 public:
223 /*! \brief Constructor
225 * \param[in] format The printf-style format string that will
226 * be applied to convert values of type T to
227 * string. Exactly one argument to the conversion
228 * specification(s) in `format` is supported. */
229 explicit StringFormatter(const char *format) : format_(format)
233 //! Implements the formatting functionality
234 template <typename T>
235 std::string operator()(const T &value) const
237 return formatString(format_, value);
240 private:
241 //! Format string to use
242 const char *format_;
245 /*! \brief Function object to implement the same interface as
246 * `StringFormatter` to use with strings that should not be formatted
247 * further. */
248 class IdentityFormatter
250 public:
251 //! Implements the formatting non-functionality
252 std::string operator()(const std::string &value) const
254 return value;
258 /*! \brief Formats all the range as strings, and then joins them with
259 * a separator in between.
261 * \param[in] begin Iterator the beginning of the range to join.
262 * \param[in] end Iterator the end of the range to join.
263 * \param[in] separator String to put in between the joined strings.
264 * \param[in] formatter Function object to format the objects in
265 * `container` as strings
266 * \returns All objects in the range from `begin` to `end` formatted
267 * as strings and concatenated with `separator` between each pair.
268 * \throws std::bad_alloc if out of memory.
270 template <typename InputIterator, typename FormatterType>
271 std::string formatAndJoin(InputIterator begin, InputIterator end, const char *separator, const FormatterType &formatter)
273 std::string result;
274 const char *currentSeparator = "";
275 for (InputIterator i = begin; i != end; ++i)
277 result.append(currentSeparator);
278 result.append(formatter(*i));
279 currentSeparator = separator;
281 return result;
284 /*! \brief Formats all elements of the container as strings, and then
285 * joins them with a separator in between.
287 * \param[in] container Objects to join.
288 * \param[in] separator String to put in between the joined strings.
289 * \param[in] formatter Function object to format the objects in
290 * `container` as strings
291 * \returns All objects from `container` formatted as strings and
292 * concatenated with `separator` between each pair.
293 * \throws std::bad_alloc if out of memory.
295 template <typename ContainerType, typename FormatterType>
296 std::string formatAndJoin(const ContainerType &container, const char *separator, const FormatterType &formatter)
298 return formatAndJoin(container.begin(), container.end(), separator, formatter);
301 /*! \brief
302 * Joins strings from a range with a separator in between.
304 * \param[in] begin Iterator the beginning of the range to join.
305 * \param[in] end Iterator the end of the range to join.
306 * \param[in] separator String to put in between the joined strings.
307 * \returns All strings from (`begin`, `end`) concatenated with `separator`
308 * between each pair.
309 * \throws std::bad_alloc if out of memory.
311 template <typename InputIterator>
312 std::string joinStrings(InputIterator begin, InputIterator end,
313 const char *separator)
315 return formatAndJoin(begin, end, separator, IdentityFormatter());
318 /*! \brief
319 * Joins strings from a container with a separator in between.
321 * \param[in] container Strings to join.
322 * \param[in] separator String to put in between the joined strings.
323 * \returns All strings from `container` concatenated with `separator`
324 * between each pair.
325 * \throws std::bad_alloc if out of memory.
327 template <typename ContainerType>
328 std::string joinStrings(const ContainerType &container, const char *separator)
330 return joinStrings(container.begin(), container.end(), separator);
333 /*! \brief
334 * Joins strings from an array with a separator in between.
336 * \param[in] array Array of strings to join.
337 * \param[in] separator String to put in between the joined strings.
338 * \tparam count Deduced number of elements in \p array.
339 * \returns All strings from `aray` concatenated with `separator`
340 * between each pair.
341 * \throws std::bad_alloc if out of memory.
343 template <size_t count>
344 std::string joinStrings(const char *const (&array)[count], const char *separator)
346 return joinStrings(array, array + count, separator);
349 /*! \brief
350 * Splits a string to whitespace separated tokens.
352 * \param[in] str String to process.
353 * \returns \p str split into tokens at each whitespace sequence.
354 * \throws std::bad_alloc if out of memory.
356 * This function works like `split` in Python, i.e., leading and trailing
357 * whitespace is ignored, and consecutive whitespaces are treated as a single
358 * separator.
360 std::vector<std::string> splitString(const std::string &str);
361 /*! \brief
362 * Splits a string to tokens separated by a given delimiter.
364 * \param[in] str String to process.
365 * \param[in] delim Delimiter to use for splitting.
366 * \returns \p str split into tokens at delimiter.
367 * \throws std::bad_alloc if out of memory.
369 * Unlike splitString(), consecutive delimiters will generate empty tokens, as
370 * will leading or trailing delimiters.
371 * Empty input will return an empty vector.
373 std::vector<std::string> splitDelimitedString(const std::string &str, char delim);
374 /*! \brief
375 * Splits \c str to tokens separated by delimiter \c delim. Removes
376 * leading and trailing whitespace from those strings with std::isspace.
378 * \param[in] str String to process.
379 * \param[in] delim Delimiter to use for splitting.
380 * \returns \p str split into tokens at delimiter, with whitespace stripped.
381 * \throws std::bad_alloc if out of memory.
383 * Unlike splitString(), consecutive delimiters will generate empty tokens, as
384 * will leading or trailing delimiters.
385 * Empty input will return an empty vector.
386 * Input with only whitespace will return a vector of size 1,
387 * that contains an empty token.
389 std::vector<std::string> splitAndTrimDelimitedString(const std::string &str, char delim);
391 /*! \brief
392 * Replace all occurrences of a string with another string.
394 * \param[in] input Input string.
395 * \param[in] from String to find.
396 * \param[in] to String to use to replace \p from.
397 * \returns Copy of \p input with all occurrences of \p from replaced with \p to.
398 * \throws std::bad_alloc if out of memory.
400 * The replacement is greedy and not recursive: starting from the beginning of
401 * \p input, each match of \p from is replaced with \p to, and the search for
402 * the next match begins after the end of the previous match.
404 * Compexity is O(N), where N is length of output.
406 * \see replaceAllWords()
408 std::string replaceAll(const std::string &input,
409 const char *from, const char *to);
410 //! \copydoc replaceAll(const std::string &, const char *, const char *)
411 std::string replaceAll(const std::string &input,
412 const std::string &from, const std::string &to);
413 /*! \brief
414 * Replace whole words with others.
416 * \param[in] input Input string.
417 * \param[in] from String to find.
418 * \param[in] to String to use to replace \p from.
419 * \returns Copy of \p input with all \p from words replaced with \p to.
420 * \throws std::bad_alloc if out of memory.
422 * Works as replaceAll(), but a match is only considered if it is delimited by
423 * non-alphanumeric characters.
425 * \see replaceAll()
427 std::string replaceAllWords(const std::string &input,
428 const char *from, const char *to);
429 //! \copydoc replaceAllWords(const std::string &, const char *, const char *)
430 std::string replaceAllWords(const std::string &input,
431 const std::string &from, const std::string &to);
433 /*! \brief Return whether two strings are equal, ignoring case.
435 * Checks if two strings have the same length and if all characters
436 * in them match when compared case insensitive.
437 * Characters are converted by using std::tolower.
439 * \param[in] source Search string to compare against \p target.
440 * \param[in] target String to be matched to \p source.
441 * \returns True if the strings match.
443 bool equalCaseInsensitive(const std::string &source,
444 const std::string &target);
446 /*! \brief
447 * Checks if at most \p maxLengthOfComparison characters of two strings match case insensitive.
449 * The function tests two strings \p source and \p target to see if at most
450 * \p maxLengthOfComparison characters match between the two. If fewer characters are present
451 * in \p source, only the maximum number of characters in \p source will be compared instead.
452 * In this case both \p source and \p target also need to have the same length, or the strings will
453 * compare as false, even if \p target matches \p source over the length of \p source.
455 * If \p maxLengthOfComparison is 0, the function always returns true.
456 * Characters are converted by using std::tolower.
458 * \param[in] source Search string to compare against \p target.
459 * \param[in] target String to be matched to \p source.
460 * \param[in] maxLengthOfComparison The maximum string length to compare.
461 * \returns True if the strings match.
463 bool equalCaseInsensitive(const std::string &source,
464 const std::string &target,
465 size_t maxLengthOfComparison);
467 class TextLineWrapper;
469 /*! \brief
470 * Stores settings for line wrapping.
472 * Methods in this class do not throw.
474 * \see TextLineWrapper
476 * \inpublicapi
478 class TextLineWrapperSettings
480 public:
481 /*! \brief
482 * Initializes default wrapper settings.
484 * Default settings are:
485 * - No maximum line width (only explicit line breaks).
486 * - No indentation.
487 * - No continuation characters.
488 * - Do not keep final spaces in input strings.
490 TextLineWrapperSettings();
492 /*! \brief
493 * Sets the maximum length for output lines.
495 * \param[in] length Maximum length for the lines after wrapping.
497 * If this method is not called, or is called with zero \p length, the
498 * wrapper has no maximum length (only wraps at explicit line breaks).
500 void setLineLength(int length) { maxLength_ = length; }
501 /*! \brief
502 * Sets the indentation for output lines.
504 * \param[in] indent Number of spaces to add for indentation.
506 * If this method is not called, the wrapper does not add indentation.
508 void setIndent(int indent) { indent_ = indent; }
509 /*! \brief
510 * Sets the indentation for first output line after a line break.
512 * \param[in] indent Number of spaces to add for indentation.
514 * If this method is not called, or called with \p indent equal to -1,
515 * the value set with setIndent() is used.
517 void setFirstLineIndent(int indent) { firstLineIndent_ = indent; }
518 /*! \brief
519 * Sets whether final spaces in input should be kept.
521 * \param[in] bKeep Whether to keep spaces at the end of the input.
523 * This means that wrapping a string that ends in spaces also keeps
524 * those spaces in the output. This allows using the wrapper for
525 * partial lines where the initial part of the line may end in a space.
526 * By default, all trailing whitespace is removed. Note that this
527 * option does not affect spaces before an explicit newline: those are
528 * always removed.
530 void setKeepFinalSpaces(bool bKeep) { bKeepFinalSpaces_ = bKeep; }
531 /*! \brief
532 * Sets a continuation marker for wrapped lines.
534 * \param[in] continuationChar Character to use to mark continuation
535 * lines.
537 * If set to non-zero character code, this character is added at the
538 * end of each line where a line break is added by TextLineWrapper
539 * (but not after lines produced by explicit line breaks).
540 * The default (\c '\0') is to not add continuation markers.
542 * Note that currently, the continuation char may cause the output line
543 * length to exceed the value set with setLineLength() by at most two
544 * characters.
546 void setContinuationChar(char continuationChar)
548 continuationChar_ = continuationChar;
551 //! Returns the maximum length set with setLineLength().
552 int lineLength() const { return maxLength_; }
553 //! Returns the indentation set with setIndent().
554 int indent() const { return indent_; }
555 /*! \brief
556 * Returns the indentation set with setFirstLineIndent().
558 * If setFirstLineIndent() has not been called or has been called with
559 * -1, indent() is returned.
561 int firstLineIndent() const
563 return (firstLineIndent_ >= 0 ? firstLineIndent_ : indent_);
566 private:
567 //! Maximum length of output lines, or <= 0 if no limit.
568 int maxLength_;
569 //! Number of spaces to indent each output line with.
570 int indent_;
571 /*! \brief
572 * Number of spaces to indent the first line after a newline.
574 * If -1, \a indent_ is used.
576 int firstLineIndent_;
577 //! Whether to keep spaces at end of input.
578 bool bKeepFinalSpaces_;
579 //! If not \c '\0', mark each wrapping point with this character.
580 char continuationChar_;
582 //! Needed to access the members.
583 friend class TextLineWrapper;
586 /*! \brief
587 * Wraps lines to a predefined length.
589 * This utility class wraps lines at word breaks to produce lines that are not
590 * longer than a predefined length. Explicit newlines ('\\n') are preserved.
591 * Only space is considered a word separator. If a single word exceeds the
592 * maximum line length, it is still printed on a single line.
593 * Extra whitespace is stripped from the end of produced lines.
594 * Other options on the wrapping, such as the line length or indentation,
595 * can be changed using a TextLineWrapperSettings object.
597 * Two interfaces to do the wrapping are provided:
598 * -# High-level interface using either wrapToString() (produces a single
599 * string with embedded newlines) or wrapToVector() (produces a vector of
600 * strings with each line as one element).
601 * These methods operate on std::string and wrap the entire input string.
602 * -# Low-level interface using findNextLine() and formatLine().
603 * findNextLine() operates either on a C string or an std::string, and does
604 * not do any memory allocation (so it does not throw). It finds the next
605 * line to be wrapped, considering the wrapping settings.
606 * formatLine() does whitespace operations on the line found by
607 * findNextLine() and returns an std::string.
608 * These methods allow custom wrapping implementation to either avoid
609 * exceptions or to wrap only a part of the input string.
611 * Typical usage:
612 * \code
613 gmx::TextLineWrapper wrapper;
614 wrapper.settings().setLineLength(78);
615 printf("%s\n", wrapper.wrapToString(textToWrap).c_str());
616 \endcode
618 * \inpublicapi
620 class TextLineWrapper
622 public:
623 /*! \brief
624 * Constructs a new line wrapper with default settings.
626 * Does not throw.
628 TextLineWrapper()
631 /*! \brief
632 * Constructs a new line wrapper with given settings.
634 * \param[in] settings Wrapping settings.
636 * Does not throw.
638 explicit TextLineWrapper(const TextLineWrapperSettings &settings)
639 : settings_(settings)
643 /*! \brief
644 * Provides access to settings of this wrapper.
646 * \returns The settings object for this wrapper.
648 * The returned object can be used to modify settings for the wrapper.
649 * All subsequent calls to wrapToString() and wrapToVector() use the
650 * modified settings.
652 * Does not throw.
654 TextLineWrapperSettings &settings() { return settings_; }
656 //! Returns true if the wrapper would not modify the input string.
657 bool isTrivial() const;
659 /*! \brief
660 * Finds the next line to be wrapped.
662 * \param[in] input String to wrap.
663 * \param[in] lineStart Index of first character of the line to find.
664 * \returns Index of first character of the next line.
666 * If this is the last line, returns the length of \p input.
667 * In determining the length of the returned line, this function
668 * considers the maximum line length, leaving space for indentation,
669 * and also whitespace stripping behavior.
670 * Thus, the line returned may be longer than the maximum line length
671 * if it has leading and/or trailing space.
672 * When wrapping a line on a space (not on an explicit line break),
673 * the returned index is always on a non-whitespace character after the
674 * space.
676 * To iterate over lines in a string, use the following code:
677 * \code
678 gmx::TextLineWrapper wrapper;
679 // <set desired wrapping settings>
680 size_t lineStart = 0;
681 size_t length = input.length();
682 while (lineStart < length)
684 size_t nextLineStart = wrapper.findNextLine(input, lineStart);
685 std::string line = wrapper.formatLine(input, lineStart, nextLineStart));
686 // <do something with the line>
687 lineStart = nextLineStart;
689 return result;
690 \endcode
692 * Does not throw.
694 size_t findNextLine(const char *input, size_t lineStart) const;
695 //! \copydoc findNextLine(const char *, size_t)const
696 size_t findNextLine(const std::string &input, size_t lineStart) const;
697 /*! \brief
698 * Formats a single line for output according to wrapping settings.
700 * \param[in] input Input string.
701 * \param[in] lineStart Index of first character of the line to format.
702 * \param[in] lineEnd Index of first character of the next line.
703 * \returns The line with leading and/or trailing whitespace removed
704 * and indentation applied.
705 * \throws std::bad_alloc if out of memory.
707 * Intended to be used on the lines found by findNextLine().
708 * When used with the lines returned from findNextLine(), the returned
709 * line conforms to the wrapper settings.
710 * Trailing whitespace is always stripped (including any newlines,
711 * i.e., the return value does not contain a newline).
713 std::string formatLine(const std::string &input,
714 size_t lineStart, size_t lineEnd) const;
716 /*! \brief
717 * Formats a string, producing a single string with all the lines.
719 * \param[in] input String to wrap.
720 * \returns \p input with added newlines such that maximum line
721 * length is not exceeded.
722 * \throws std::bad_alloc if out of memory.
724 * Newlines in the input are preserved, including terminal newlines.
725 * Note that if the input does not contain a terminal newline, the
726 * output does not either.
728 std::string wrapToString(const std::string &input) const;
729 /*! \brief
730 * Formats a string, producing a vector with all the lines.
732 * \param[in] input String to wrap.
733 * \returns \p input split into lines such that maximum line length
734 * is not exceeded.
735 * \throws std::bad_alloc if out of memory.
737 * The strings in the returned vector do not contain newlines at the
738 * end.
739 * Note that a single terminal newline does not affect the output:
740 * "line\\n" and "line" both produce the same output (but "line\\n\\n"
741 * produces two lines, the second of which is empty).
743 std::vector<std::string> wrapToVector(const std::string &input) const;
745 private:
746 TextLineWrapperSettings settings_;
749 //! \}
751 } // namespace gmx
753 #endif