2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2011-2018, The GROMACS development team.
5 * Copyright (c) 2019, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
38 * Implements functions and classes in stringutil.h.
40 * \author Teemu Murtola <teemu.murtola@gmail.com>
41 * \ingroup module_utility
45 #include "stringutil.h"
57 #include "gromacs/utility/exceptions.h"
58 #include "gromacs/utility/gmxassert.h"
63 std::size_t countWords(const char* s
)
65 std::size_t nWords
= 0;
66 // Use length variable to avoid N^2 complexity when executing strlen(s) every iteration
67 std::size_t length
= std::strlen(s
);
69 for (std::size_t i
= 0; i
< length
; i
++)
71 // If we found a new word, increase counter and step through the word
72 if (std::isalnum(s
[i
]))
75 // If we hit string end, '\0' is not alphanumerical
76 while (std::isalnum(s
[i
]))
78 // This might increment i to the string end, and then the outer
79 // loop will increment i one unit beyond that, but since
80 // we compare to the string length in the outer loop this is fine.
89 std::size_t countWords(const std::string
& str
)
91 // Under out beautiful C++ interface hides an ugly c-string implementation :-)
92 return countWords(str
.c_str());
95 bool endsWith(const char* str
, const char* suffix
)
97 if (isNullOrEmpty(suffix
))
101 const size_t strLength
= std::strlen(str
);
102 const size_t suffixLength
= std::strlen(suffix
);
103 return (strLength
>= suffixLength
&& std::strcmp(&str
[strLength
- suffixLength
], suffix
) == 0);
106 std::string
stripSuffixIfPresent(const std::string
& str
, const char* suffix
)
108 if (suffix
!= nullptr)
110 size_t suffixLength
= std::strlen(suffix
);
111 if (suffixLength
> 0 && endsWith(str
, suffix
))
113 return str
.substr(0, str
.length() - suffixLength
);
119 std::string
stripString(const std::string
& str
)
121 std::string::const_iterator start
= str
.begin();
122 std::string::const_iterator end
= str
.end();
123 while (start
!= end
&& std::isspace(*start
))
127 while (start
!= end
&& (std::isspace(*(end
- 1)) != 0))
131 return std::string(start
, end
);
134 std::string
formatString(gmx_fmtstr
const char* fmt
, ...)
138 std::string result
= formatStringV(fmt
, ap
);
143 std::string
formatStringV(const char* fmt
, va_list ap
)
146 char staticBuf
[1024];
148 std::vector
<char> dynamicBuf
;
149 char* buf
= staticBuf
;
151 // TODO: There may be a better way of doing this on Windows, Microsoft
152 // provides their own way of doing things...
155 va_copy(ap_copy
, ap
);
156 int n
= vsnprintf(buf
, length
, fmt
, ap_copy
);
158 if (n
> -1 && n
< length
)
160 std::string
result(buf
);
171 dynamicBuf
.resize(length
);
172 buf
= dynamicBuf
.data();
176 std::vector
<std::string
> splitString(const std::string
& str
)
178 std::vector
<std::string
> result
;
179 std::string::const_iterator currPos
= str
.begin();
180 const std::string::const_iterator end
= str
.end();
181 while (currPos
!= end
)
183 while (currPos
!= end
&& std::isspace(*currPos
))
187 const std::string::const_iterator startPos
= currPos
;
188 while (currPos
!= end
&& !std::isspace(*currPos
))
194 result
.emplace_back(startPos
, currPos
);
200 std::vector
<std::string
> splitDelimitedString(const std::string
& str
, char delim
)
202 std::vector
<std::string
> result
;
204 const size_t len
= str
.length();
210 nextDelim
= str
.find(delim
, currPos
);
211 result
.push_back(str
.substr(currPos
, nextDelim
- currPos
));
212 currPos
= nextDelim
< len
? nextDelim
+ 1 : len
;
213 } while (currPos
< len
|| nextDelim
< len
);
218 std::vector
<std::string
> splitAndTrimDelimitedString(const std::string
& str
, char delim
)
220 std::vector
<std::string
> result
;
222 result
= splitDelimitedString(str
, delim
);
223 std::transform(result
.begin(), result
.end(), result
.begin(), stripString
);
231 * Helper function to identify word boundaries for replaceAllWords().
233 * \returns `true` if the character is considered part of a word.
235 * \ingroup module_utility
237 bool isWordChar(char c
)
239 return (std::isalnum(c
) != 0) || c
== '-' || c
== '_';
243 * Common implementation for string replacement functions.
245 * \param[in] input Input string.
246 * \param[in] from String to find.
247 * \param[in] to String to use to replace \p from.
248 * \param[in] bWholeWords Whether to only consider matches to whole words.
249 * \returns \p input with all occurrences of \p from replaced with \p to.
250 * \throws std::bad_alloc if out of memory.
252 * \ingroup module_utility
254 std::string
replaceInternal(const std::string
& input
, const char* from
, const char* to
, bool bWholeWords
)
256 GMX_RELEASE_ASSERT(from
!= nullptr && to
!= nullptr, "Replacement strings must not be NULL");
257 size_t matchLength
= std::strlen(from
);
260 size_t matchPos
= input
.find(from
);
261 while (matchPos
< input
.length())
263 size_t matchEnd
= matchPos
+ matchLength
;
266 if (!((matchPos
== 0 || !isWordChar(input
[matchPos
- 1]))
267 && (matchEnd
== input
.length() || !isWordChar(input
[matchEnd
]))))
269 matchPos
= input
.find(from
, matchPos
+ 1);
273 result
.append(input
, inputPos
, matchPos
- inputPos
);
276 matchPos
= input
.find(from
, inputPos
);
278 result
.append(input
, inputPos
, matchPos
- inputPos
);
284 std::string
replaceAll(const std::string
& input
, const char* from
, const char* to
)
286 return replaceInternal(input
, from
, to
, false);
289 std::string
replaceAll(const std::string
& input
, const std::string
& from
, const std::string
& to
)
291 return replaceInternal(input
, from
.c_str(), to
.c_str(), false);
294 std::string
replaceAllWords(const std::string
& input
, const char* from
, const char* to
)
296 return replaceInternal(input
, from
, to
, true);
299 std::string
replaceAllWords(const std::string
& input
, const std::string
& from
, const std::string
& to
)
301 return replaceInternal(input
, from
.c_str(), to
.c_str(), true);
304 bool equalCaseInsensitive(const std::string
& source
, const std::string
& target
)
306 return source
.length() == target
.length()
307 && std::equal(source
.begin(), source
.end(), target
.begin(), [](const char& s
, const char& t
) {
308 return std::tolower(s
) == std::tolower(t
);
312 bool equalCaseInsensitive(const std::string
& source
, const std::string
& target
, size_t maxLengthOfComparison
)
314 std::string::const_iterator comparisonEnd
;
315 if (source
.length() < maxLengthOfComparison
)
317 if (source
.length() != target
.length())
321 comparisonEnd
= source
.end();
325 if (maxLengthOfComparison
> target
.length())
329 comparisonEnd
= source
.begin() + maxLengthOfComparison
;
331 return std::equal(source
.begin(), comparisonEnd
, target
.begin(),
332 [](const char& s
, const char& t
) { return std::tolower(s
) == std::tolower(t
); });
335 /********************************************************************
336 * TextLineWrapperSettings
339 TextLineWrapperSettings::TextLineWrapperSettings() :
342 firstLineIndent_(-1),
343 bKeepFinalSpaces_(false),
344 continuationChar_('\0')
349 /********************************************************************
353 bool TextLineWrapper::isTrivial() const
355 return settings_
.lineLength() == 0 && settings_
.indent() == 0 && settings_
.firstLineIndent_
<= 0;
358 size_t TextLineWrapper::findNextLine(const char* input
, size_t lineStart
) const
360 size_t inputLength
= std::strlen(input
);
361 bool bFirstLine
= (lineStart
== 0 || input
[lineStart
- 1] == '\n');
362 // Ignore leading whitespace if necessary.
365 lineStart
+= std::strspn(input
+ lineStart
, " ");
366 if (lineStart
>= inputLength
)
372 int indent
= (bFirstLine
? settings_
.firstLineIndent() : settings_
.indent());
373 size_t lastAllowedBreakPoint
=
374 (settings_
.lineLength() > 0 ? std::min(lineStart
+ settings_
.lineLength() - indent
, inputLength
)
376 // Ignore trailing whitespace.
377 lastAllowedBreakPoint
+= std::strspn(input
+ lastAllowedBreakPoint
, " ");
378 size_t lineEnd
= lineStart
;
381 const char* nextBreakPtr
= std::strpbrk(input
+ lineEnd
, " \n");
382 size_t nextBreak
= (nextBreakPtr
!= nullptr ? nextBreakPtr
- input
: inputLength
);
383 if (nextBreak
> lastAllowedBreakPoint
&& lineEnd
> lineStart
)
387 lineEnd
= nextBreak
+ 1;
388 } while (lineEnd
< lastAllowedBreakPoint
&& input
[lineEnd
- 1] != '\n');
389 return (lineEnd
< inputLength
? lineEnd
: inputLength
);
392 size_t TextLineWrapper::findNextLine(const std::string
& input
, size_t lineStart
) const
394 return findNextLine(input
.c_str(), lineStart
);
397 std::string
TextLineWrapper::formatLine(const std::string
& input
, size_t lineStart
, size_t lineEnd
) const
399 size_t inputLength
= input
.length();
400 bool bFirstLine
= (lineStart
== 0 || input
[lineStart
- 1] == '\n');
401 // Strip leading whitespace if necessary.
404 lineStart
= input
.find_first_not_of(' ', lineStart
);
405 if (lineStart
>= inputLength
)
407 return std::string();
410 int indent
= (bFirstLine
? settings_
.firstLineIndent() : settings_
.indent());
411 bool bContinuation
= (lineEnd
< inputLength
&& input
[lineEnd
- 1] != '\n');
412 // Remove explicit line breaks in input
413 // (the returned line should not contain line breaks).
414 while (lineEnd
> lineStart
&& input
[lineEnd
- 1] == '\n')
418 // Strip trailing whitespace, unless they are explicit in the input and it
419 // has been requested to keep them.
420 if (bContinuation
|| !settings_
.bKeepFinalSpaces_
)
422 while (lineEnd
> lineStart
&& std::isspace(input
[lineEnd
- 1]))
428 const size_t lineLength
= lineEnd
- lineStart
;
431 return std::string();
433 std::string
result(indent
, ' ');
434 result
.append(input
, lineStart
, lineLength
);
435 if (bContinuation
&& settings_
.continuationChar_
!= '\0')
437 result
.append(1, ' ');
438 result
.append(1, settings_
.continuationChar_
);
443 std::string
TextLineWrapper::wrapToString(const std::string
& input
) const
446 size_t lineStart
= 0;
447 size_t length
= input
.length();
448 while (lineStart
< length
)
450 size_t nextLineStart
= findNextLine(input
, lineStart
);
451 result
.append(formatLine(input
, lineStart
, nextLineStart
));
452 if (nextLineStart
< length
|| (nextLineStart
== length
&& input
[length
- 1] == '\n'))
456 lineStart
= nextLineStart
;
461 std::vector
<std::string
> TextLineWrapper::wrapToVector(const std::string
& input
) const
463 std::vector
<std::string
> result
;
464 size_t lineStart
= 0;
465 size_t length
= input
.length();
466 while (lineStart
< length
)
468 size_t nextLineStart
= findNextLine(input
, lineStart
);
469 result
.push_back(formatLine(input
, lineStart
, nextLineStart
));
470 lineStart
= nextLineStart
;