2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2011,2012,2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * Implements functions and classes in stringutil.h.
39 * \author Teemu Murtola <teemu.murtola@gmail.com>
40 * \ingroup module_utility
44 #include "stringutil.h"
56 #include "gromacs/utility/exceptions.h"
57 #include "gromacs/utility/gmxassert.h"
63 countWords(const char *s
)
65 std::size_t nWords
= 0;
66 // Use length variable to avoid N^2 complexity when executing strlen(s) every iteration
67 std::size_t length
= std::strlen(s
);
69 for (std::size_t i
= 0; i
< length
; i
++)
71 // If we found a new word, increase counter and step through the word
72 if (std::isalnum(s
[i
]))
75 // If we hit string end, '\0' is not alphanumerical
76 while (std::isalnum(s
[i
]))
78 // This might increment i to the string end, and then the outer
79 // loop will increment i one unit beyond that, but since
80 // we compare to the string length in the outer loop this is fine.
90 countWords(const std::string
&str
)
92 // Under out beautiful C++ interface hides an ugly c-string implementation :-)
93 return countWords(str
.c_str());
96 bool endsWith(const char *str
, const char *suffix
)
98 if (isNullOrEmpty(suffix
))
102 const size_t strLength
= std::strlen(str
);
103 const size_t suffixLength
= std::strlen(suffix
);
104 return (strLength
>= suffixLength
105 && std::strcmp(&str
[strLength
- suffixLength
], suffix
) == 0);
108 std::string
stripSuffixIfPresent(const std::string
&str
, const char *suffix
)
110 if (suffix
!= nullptr)
112 size_t suffixLength
= std::strlen(suffix
);
113 if (suffixLength
> 0 && endsWith(str
, suffix
))
115 return str
.substr(0, str
.length() - suffixLength
);
121 std::string
stripString(const std::string
&str
)
123 std::string::const_iterator start
= str
.begin();
124 std::string::const_iterator end
= str
.end();
125 while (start
!= end
&& std::isspace(*start
))
129 while (start
!= end
&& (std::isspace(*(end
- 1)) != 0))
133 return std::string(start
, end
);
136 std::string
formatString(gmx_fmtstr
const char *fmt
, ...)
140 std::string result
= formatStringV(fmt
, ap
);
145 std::string
formatStringV(const char *fmt
, va_list ap
)
148 char staticBuf
[1024];
150 std::vector
<char> dynamicBuf
;
151 char *buf
= staticBuf
;
153 // TODO: There may be a better way of doing this on Windows, Microsoft
154 // provides their own way of doing things...
157 va_copy(ap_copy
, ap
);
158 int n
= vsnprintf(buf
, length
, fmt
, ap_copy
);
160 if (n
> -1 && n
< length
)
162 std::string
result(buf
);
173 dynamicBuf
.resize(length
);
174 buf
= dynamicBuf
.data();
178 std::vector
<std::string
> splitString(const std::string
&str
)
180 std::vector
<std::string
> result
;
181 std::string::const_iterator currPos
= str
.begin();
182 const std::string::const_iterator end
= str
.end();
183 while (currPos
!= end
)
185 while (currPos
!= end
&& std::isspace(*currPos
))
189 const std::string::const_iterator startPos
= currPos
;
190 while (currPos
!= end
&& !std::isspace(*currPos
))
196 result
.emplace_back(startPos
, currPos
);
202 std::vector
<std::string
> splitDelimitedString(const std::string
&str
, char delim
)
204 std::vector
<std::string
> result
;
206 const size_t len
= str
.length();
212 nextDelim
= str
.find(delim
, currPos
);
213 result
.push_back(str
.substr(currPos
, nextDelim
- currPos
));
214 currPos
= nextDelim
< len
? nextDelim
+ 1 : len
;
216 while (currPos
< len
|| nextDelim
< len
);
221 std::vector
<std::string
> splitAndTrimDelimitedString(const std::string
&str
, char delim
)
223 std::vector
<std::string
> result
;
225 result
= splitDelimitedString(str
, delim
);
226 std::transform(result
.begin(), result
.end(), result
.begin(), stripString
);
234 * Helper function to identify word boundaries for replaceAllWords().
236 * \returns `true` if the character is considered part of a word.
238 * \ingroup module_utility
240 bool isWordChar(char c
)
242 return (std::isalnum(c
) != 0) || c
== '-' || c
== '_';
246 * Common implementation for string replacement functions.
248 * \param[in] input Input string.
249 * \param[in] from String to find.
250 * \param[in] to String to use to replace \p from.
251 * \param[in] bWholeWords Whether to only consider matches to whole words.
252 * \returns \p input with all occurrences of \p from replaced with \p to.
253 * \throws std::bad_alloc if out of memory.
255 * \ingroup module_utility
258 replaceInternal(const std::string
&input
, const char *from
, const char *to
,
261 GMX_RELEASE_ASSERT(from
!= nullptr && to
!= nullptr,
262 "Replacement strings must not be NULL");
263 size_t matchLength
= std::strlen(from
);
266 size_t matchPos
= input
.find(from
);
267 while (matchPos
< input
.length())
269 size_t matchEnd
= matchPos
+ matchLength
;
272 if (!((matchPos
== 0 || !isWordChar(input
[matchPos
-1]))
273 && (matchEnd
== input
.length() || !isWordChar(input
[matchEnd
]))))
275 matchPos
= input
.find(from
, matchPos
+ 1);
280 result
.append(input
, inputPos
, matchPos
- inputPos
);
283 matchPos
= input
.find(from
, inputPos
);
285 result
.append(input
, inputPos
, matchPos
- inputPos
);
292 replaceAll(const std::string
&input
, const char *from
, const char *to
)
294 return replaceInternal(input
, from
, to
, false);
298 replaceAll(const std::string
&input
, const std::string
&from
,
299 const std::string
&to
)
301 return replaceInternal(input
, from
.c_str(), to
.c_str(), false);
305 replaceAllWords(const std::string
&input
, const char *from
, const char *to
)
307 return replaceInternal(input
, from
, to
, true);
311 replaceAllWords(const std::string
&input
, const std::string
&from
,
312 const std::string
&to
)
314 return replaceInternal(input
, from
.c_str(), to
.c_str(), true);
317 bool equalCaseInsensitive(const std::string
&source
, const std::string
&target
)
319 return source
.length() == target
.length() &&
320 std::equal(source
.begin(), source
.end(), target
.begin(),
321 [](const char &s
, const char &t
)
322 { return std::tolower(s
) == std::tolower(t
); });
325 bool equalCaseInsensitive(const std::string
&source
,
326 const std::string
&target
,
327 size_t maxLengthOfComparison
)
329 std::string::const_iterator comparisonEnd
;
330 if (source
.length() < maxLengthOfComparison
)
332 if (source
.length() != target
.length())
336 comparisonEnd
= source
.end();
340 if (maxLengthOfComparison
> target
.length())
344 comparisonEnd
= source
.begin() + maxLengthOfComparison
;
346 return std::equal(source
.begin(), comparisonEnd
, target
.begin(),
347 [](const char &s
, const char &t
)
348 { return std::tolower(s
) == std::tolower(t
); });
351 /********************************************************************
352 * TextLineWrapperSettings
355 TextLineWrapperSettings::TextLineWrapperSettings()
356 : maxLength_(0), indent_(0), firstLineIndent_(-1),
357 bKeepFinalSpaces_(false), continuationChar_('\0')
362 /********************************************************************
366 bool TextLineWrapper::isTrivial() const
368 return settings_
.lineLength() == 0 && settings_
.indent() == 0
369 && settings_
.firstLineIndent_
<= 0;
373 TextLineWrapper::findNextLine(const char *input
, size_t lineStart
) const
375 size_t inputLength
= std::strlen(input
);
376 bool bFirstLine
= (lineStart
== 0 || input
[lineStart
- 1] == '\n');
377 // Ignore leading whitespace if necessary.
380 lineStart
+= std::strspn(input
+ lineStart
, " ");
381 if (lineStart
>= inputLength
)
387 int indent
= (bFirstLine
? settings_
.firstLineIndent() : settings_
.indent());
388 size_t lastAllowedBreakPoint
389 = (settings_
.lineLength() > 0
390 ? std::min(lineStart
+ settings_
.lineLength() - indent
, inputLength
)
392 // Ignore trailing whitespace.
393 lastAllowedBreakPoint
+= std::strspn(input
+ lastAllowedBreakPoint
, " ");
394 size_t lineEnd
= lineStart
;
397 const char *nextBreakPtr
= std::strpbrk(input
+ lineEnd
, " \n");
399 = (nextBreakPtr
!= nullptr ? nextBreakPtr
- input
: inputLength
);
400 if (nextBreak
> lastAllowedBreakPoint
&& lineEnd
> lineStart
)
404 lineEnd
= nextBreak
+ 1;
406 while (lineEnd
< lastAllowedBreakPoint
&& input
[lineEnd
- 1] != '\n');
407 return (lineEnd
< inputLength
? lineEnd
: inputLength
);
411 TextLineWrapper::findNextLine(const std::string
&input
, size_t lineStart
) const
413 return findNextLine(input
.c_str(), lineStart
);
417 TextLineWrapper::formatLine(const std::string
&input
,
418 size_t lineStart
, size_t lineEnd
) const
420 size_t inputLength
= input
.length();
421 bool bFirstLine
= (lineStart
== 0 || input
[lineStart
- 1] == '\n');
422 // Strip leading whitespace if necessary.
425 lineStart
= input
.find_first_not_of(' ', lineStart
);
426 if (lineStart
>= inputLength
)
428 return std::string();
431 int indent
= (bFirstLine
? settings_
.firstLineIndent() : settings_
.indent());
432 bool bContinuation
= (lineEnd
< inputLength
&& input
[lineEnd
- 1] != '\n');
433 // Remove explicit line breaks in input
434 // (the returned line should not contain line breaks).
435 while (lineEnd
> lineStart
&& input
[lineEnd
- 1] == '\n')
439 // Strip trailing whitespace, unless they are explicit in the input and it
440 // has been requested to keep them.
441 if (bContinuation
|| !settings_
.bKeepFinalSpaces_
)
443 while (lineEnd
> lineStart
&& std::isspace(input
[lineEnd
- 1]))
449 const size_t lineLength
= lineEnd
- lineStart
;
452 return std::string();
454 std::string
result(indent
, ' ');
455 result
.append(input
, lineStart
, lineLength
);
456 if (bContinuation
&& settings_
.continuationChar_
!= '\0')
458 result
.append(1, ' ');
459 result
.append(1, settings_
.continuationChar_
);
465 TextLineWrapper::wrapToString(const std::string
&input
) const
468 size_t lineStart
= 0;
469 size_t length
= input
.length();
470 while (lineStart
< length
)
472 size_t nextLineStart
= findNextLine(input
, lineStart
);
473 result
.append(formatLine(input
, lineStart
, nextLineStart
));
474 if (nextLineStart
< length
475 || (nextLineStart
== length
&& input
[length
- 1] == '\n'))
479 lineStart
= nextLineStart
;
484 std::vector
<std::string
>
485 TextLineWrapper::wrapToVector(const std::string
&input
) const
487 std::vector
<std::string
> result
;
488 size_t lineStart
= 0;
489 size_t length
= input
.length();
490 while (lineStart
< length
)
492 size_t nextLineStart
= findNextLine(input
, lineStart
);
493 result
.push_back(formatLine(input
, lineStart
, nextLineStart
));
494 lineStart
= nextLineStart
;