1 //===-- Args.cpp ----------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "lldb/Utility/Args.h"
10 #include "lldb/Utility/FileSpec.h"
11 #include "lldb/Utility/Stream.h"
12 #include "lldb/Utility/StringList.h"
13 #include "llvm/ADT/StringSwitch.h"
16 using namespace lldb_private
;
18 // A helper function for argument parsing.
19 // Parses the initial part of the first argument using normal double quote
20 // rules: backslash escapes the double quote and itself. The parsed string is
21 // appended to the second argument. The function returns the unparsed portion
22 // of the string, starting at the closing quote.
23 static llvm::StringRef
ParseDoubleQuotes(llvm::StringRef quoted
,
24 std::string
&result
) {
25 // Inside double quotes, '\' and '"' are special.
26 static const char *k_escapable_characters
= "\"\\";
28 // Skip over regular characters and append them.
29 size_t regular
= quoted
.find_first_of(k_escapable_characters
);
30 result
+= quoted
.substr(0, regular
);
31 quoted
= quoted
.substr(regular
);
33 // If we have reached the end of string or the closing quote, we're done.
34 if (quoted
.empty() || quoted
.front() == '"')
37 // We have found a backslash.
38 quoted
= quoted
.drop_front();
41 // A lone backslash at the end of string, let's just append it.
46 // If the character after the backslash is not an allowed escapable
47 // character, we leave the character sequence untouched.
48 if (strchr(k_escapable_characters
, quoted
.front()) == nullptr)
51 result
+= quoted
.front();
52 quoted
= quoted
.drop_front();
58 static size_t ArgvToArgc(const char **argv
) {
67 // Trims all whitespace that can separate command line arguments from the left
68 // side of the string.
69 static llvm::StringRef
ltrimForArgs(llvm::StringRef str
, size_t &shift
) {
70 static const char *k_space_separators
= " \t";
71 llvm::StringRef result
= str
.ltrim(k_space_separators
);
72 shift
= result
.data() - str
.data();
76 // A helper function for SetCommandString. Parses a single argument from the
77 // command string, processing quotes and backslashes in a shell-like manner.
78 // The function returns a tuple consisting of the parsed argument, the quote
79 // char used, and the unparsed portion of the string starting at the first
80 // unqouted, unescaped whitespace character.
81 static std::tuple
<std::string
, char, llvm::StringRef
>
82 ParseSingleArgument(llvm::StringRef command
) {
83 // Argument can be split into multiple discontiguous pieces, for example:
85 // this would result in a single argument "Hello World" (without the quotes)
86 // since the quotes would be removed and there is not space between the
90 // Since we can have multiple quotes that form a single command in a command
91 // like: "Hello "world'!' (which will make a single argument "Hello world!")
92 // we remember the first quote character we encounter and use that for the
94 char first_quote_char
= '\0';
96 bool arg_complete
= false;
98 // Skip over regular characters and append them.
99 size_t regular
= command
.find_first_of(" \t\r\"'`\\");
100 arg
+= command
.substr(0, regular
);
101 command
= command
.substr(regular
);
106 char special
= command
.front();
107 command
= command
.drop_front();
110 if (command
.empty()) {
115 // If the character after the backslash is not an allowed escapable
116 // character, we leave the character sequence untouched.
117 if (strchr(" \t\\'\"`", command
.front()) == nullptr)
120 arg
+= command
.front();
121 command
= command
.drop_front();
128 // We are not inside any quotes, we just found a space after an argument.
136 // We found the start of a quote scope.
137 if (first_quote_char
== '\0')
138 first_quote_char
= special
;
141 command
= ParseDoubleQuotes(command
, arg
);
143 // For single quotes, we simply skip ahead to the matching quote
144 // character (or the end of the string).
145 size_t quoted
= command
.find(special
);
146 arg
+= command
.substr(0, quoted
);
147 command
= command
.substr(quoted
);
150 // If we found a closing quote, skip it.
151 if (!command
.empty())
152 command
= command
.drop_front();
156 } while (!arg_complete
);
158 return std::make_tuple(arg
, first_quote_char
, command
);
161 Args::ArgEntry::ArgEntry(llvm::StringRef str
, char quote
,
162 std::optional
<uint16_t> column
)
163 : quote(quote
), column(column
) {
164 size_t size
= str
.size();
165 ptr
.reset(new char[size
+ 1]);
167 ::memcpy(data(), str
.data() ? str
.data() : "", size
);
172 Args::Args(llvm::StringRef command
) { SetCommandString(command
); }
174 Args::Args(const Args
&rhs
) { *this = rhs
; }
176 Args::Args(const StringList
&list
) : Args() {
177 for (const std::string
&arg
: list
)
181 Args::Args(llvm::ArrayRef
<llvm::StringRef
> args
) : Args() {
182 for (llvm::StringRef arg
: args
)
186 Args
&Args::operator=(const Args
&rhs
) {
191 for (auto &entry
: rhs
.m_entries
) {
192 m_entries
.emplace_back(entry
.ref(), entry
.quote
, entry
.column
);
193 m_argv
.push_back(m_entries
.back().data());
195 m_argv
.push_back(nullptr);
200 Args::~Args() = default;
202 void Args::Dump(Stream
&s
, const char *label_name
) const {
207 for (auto &entry
: m_entries
) {
209 s
.Format("{0}[{1}]=\"{2}\"\n", label_name
, i
++, entry
.ref());
211 s
.Format("{0}[{1}]=NULL\n", label_name
, i
);
215 bool Args::GetCommandString(std::string
&command
) const {
218 for (size_t i
= 0; i
< m_entries
.size(); ++i
) {
221 char quote
= m_entries
[i
].quote
;
224 command
+= m_entries
[i
].ref();
229 return !m_entries
.empty();
232 bool Args::GetQuotedCommandString(std::string
&command
) const {
235 for (size_t i
= 0; i
< m_entries
.size(); ++i
) {
239 if (m_entries
[i
].quote
) {
240 command
+= m_entries
[i
].quote
;
241 command
+= m_entries
[i
].ref();
242 command
+= m_entries
[i
].quote
;
244 command
+= m_entries
[i
].ref();
248 return !m_entries
.empty();
251 void Args::SetCommandString(llvm::StringRef command
) {
257 command
= ltrimForArgs(command
, shift
);
261 while (!command
.empty()) {
262 const char *prev
= command
.data();
263 std::tie(arg
, quote
, command
) = ParseSingleArgument(command
);
264 m_entries
.emplace_back(arg
, quote
, column
);
265 m_argv
.push_back(m_entries
.back().data());
266 command
= ltrimForArgs(command
, shift
);
268 column
+= command
.data() - prev
;
270 m_argv
.push_back(nullptr);
273 const char *Args::GetArgumentAtIndex(size_t idx
) const {
274 if (idx
< m_argv
.size())
279 char **Args::GetArgumentVector() {
280 assert(!m_argv
.empty());
281 // TODO: functions like execve and posix_spawnp exhibit undefined behavior
282 // when argv or envp is null. So the code below is actually wrong. However,
283 // other code in LLDB depends on it being null. The code has been acting
284 // this way for some time, so it makes sense to leave it this way until
285 // someone has the time to come along and fix it.
286 return (m_argv
.size() > 1) ? m_argv
.data() : nullptr;
289 const char **Args::GetConstArgumentVector() const {
290 assert(!m_argv
.empty());
291 return (m_argv
.size() > 1) ? const_cast<const char **>(m_argv
.data())
296 // Don't pop the last NULL terminator from the argv array
297 if (m_entries
.empty())
299 m_argv
.erase(m_argv
.begin());
300 m_entries
.erase(m_entries
.begin());
303 void Args::Unshift(llvm::StringRef arg_str
, char quote_char
) {
304 InsertArgumentAtIndex(0, arg_str
, quote_char
);
307 void Args::AppendArguments(const Args
&rhs
) {
308 assert(m_argv
.size() == m_entries
.size() + 1);
309 assert(m_argv
.back() == nullptr);
311 for (auto &entry
: rhs
.m_entries
) {
312 m_entries
.emplace_back(entry
.ref(), entry
.quote
, entry
.column
);
313 m_argv
.push_back(m_entries
.back().data());
315 m_argv
.push_back(nullptr);
318 void Args::AppendArguments(const char **argv
) {
319 size_t argc
= ArgvToArgc(argv
);
321 assert(m_argv
.size() == m_entries
.size() + 1);
322 assert(m_argv
.back() == nullptr);
324 for (auto arg
: llvm::ArrayRef(argv
, argc
)) {
325 m_entries
.emplace_back(arg
, '\0', std::nullopt
);
326 m_argv
.push_back(m_entries
.back().data());
329 m_argv
.push_back(nullptr);
332 void Args::AppendArgument(llvm::StringRef arg_str
, char quote_char
) {
333 InsertArgumentAtIndex(GetArgumentCount(), arg_str
, quote_char
);
336 void Args::InsertArgumentAtIndex(size_t idx
, llvm::StringRef arg_str
,
338 assert(m_argv
.size() == m_entries
.size() + 1);
339 assert(m_argv
.back() == nullptr);
341 if (idx
> m_entries
.size())
343 m_entries
.emplace(m_entries
.begin() + idx
, arg_str
, quote_char
, std::nullopt
);
344 m_argv
.insert(m_argv
.begin() + idx
, m_entries
[idx
].data());
347 void Args::ReplaceArgumentAtIndex(size_t idx
, llvm::StringRef arg_str
,
349 assert(m_argv
.size() == m_entries
.size() + 1);
350 assert(m_argv
.back() == nullptr);
352 if (idx
>= m_entries
.size())
355 m_entries
[idx
] = ArgEntry(arg_str
, quote_char
, std::nullopt
);
356 m_argv
[idx
] = m_entries
[idx
].data();
359 void Args::DeleteArgumentAtIndex(size_t idx
) {
360 if (idx
>= m_entries
.size())
363 m_argv
.erase(m_argv
.begin() + idx
);
364 m_entries
.erase(m_entries
.begin() + idx
);
367 void Args::SetArguments(size_t argc
, const char **argv
) {
370 auto args
= llvm::ArrayRef(argv
, argc
);
371 m_entries
.resize(argc
);
372 m_argv
.resize(argc
+ 1);
373 for (size_t i
= 0; i
< args
.size(); ++i
) {
375 ((args
[i
][0] == '\'') || (args
[i
][0] == '"') || (args
[i
][0] == '`'))
379 m_entries
[i
] = ArgEntry(args
[i
], quote
, std::nullopt
);
380 m_argv
[i
] = m_entries
[i
].data();
384 void Args::SetArguments(const char **argv
) {
385 SetArguments(ArgvToArgc(argv
), argv
);
391 m_argv
.push_back(nullptr);
394 std::string
Args::GetShellSafeArgument(const FileSpec
&shell
,
395 llvm::StringRef unsafe_arg
) {
396 struct ShellDescriptor
{
397 llvm::StringRef m_basename
;
398 llvm::StringRef m_escapables
;
401 static ShellDescriptor g_Shells
[] = {{"bash", " '\"<>()&;"},
402 {"fish", " '\"<>()&\\|;"},
403 {"tcsh", " '\"<>()&;"},
404 {"zsh", " '\"<>()&;\\|"},
405 {"sh", " '\"<>()&;"}};
408 llvm::StringRef escapables
= " '\"";
410 auto basename
= shell
.GetFilename().GetStringRef();
411 if (!basename
.empty()) {
412 for (const auto &Shell
: g_Shells
) {
413 if (Shell
.m_basename
== basename
) {
414 escapables
= Shell
.m_escapables
;
420 std::string safe_arg
;
421 safe_arg
.reserve(unsafe_arg
.size());
422 // Add a \ before every character that needs to be escaped.
423 for (char c
: unsafe_arg
) {
424 if (escapables
.contains(c
))
425 safe_arg
.push_back('\\');
426 safe_arg
.push_back(c
);
431 lldb::Encoding
Args::StringToEncoding(llvm::StringRef s
,
432 lldb::Encoding fail_value
) {
433 return llvm::StringSwitch
<lldb::Encoding
>(s
)
434 .Case("uint", eEncodingUint
)
435 .Case("sint", eEncodingSint
)
436 .Case("ieee754", eEncodingIEEE754
)
437 .Case("vector", eEncodingVector
)
438 .Default(fail_value
);
441 uint32_t Args::StringToGenericRegister(llvm::StringRef s
) {
443 return LLDB_INVALID_REGNUM
;
444 uint32_t result
= llvm::StringSwitch
<uint32_t>(s
)
445 .Case("pc", LLDB_REGNUM_GENERIC_PC
)
446 .Case("sp", LLDB_REGNUM_GENERIC_SP
)
447 .Case("fp", LLDB_REGNUM_GENERIC_FP
)
448 .Cases("ra", "lr", LLDB_REGNUM_GENERIC_RA
)
449 .Case("flags", LLDB_REGNUM_GENERIC_FLAGS
)
450 .Case("arg1", LLDB_REGNUM_GENERIC_ARG1
)
451 .Case("arg2", LLDB_REGNUM_GENERIC_ARG2
)
452 .Case("arg3", LLDB_REGNUM_GENERIC_ARG3
)
453 .Case("arg4", LLDB_REGNUM_GENERIC_ARG4
)
454 .Case("arg5", LLDB_REGNUM_GENERIC_ARG5
)
455 .Case("arg6", LLDB_REGNUM_GENERIC_ARG6
)
456 .Case("arg7", LLDB_REGNUM_GENERIC_ARG7
)
457 .Case("arg8", LLDB_REGNUM_GENERIC_ARG8
)
458 .Case("tp", LLDB_REGNUM_GENERIC_TP
)
459 .Default(LLDB_INVALID_REGNUM
);
463 void Args::EncodeEscapeSequences(const char *src
, std::string
&dst
) {
466 for (const char *p
= src
; *p
!= '\0'; ++p
) {
467 size_t non_special_chars
= ::strcspn(p
, "\\");
468 if (non_special_chars
> 0) {
469 dst
.append(p
, non_special_chars
);
470 p
+= non_special_chars
;
476 ++p
; // skip the slash
509 // 1 to 3 octal chars
511 // Make a string that can hold onto the initial zero char, up to 3
512 // octal digits, and a terminating NULL.
513 char oct_str
[5] = {'\0', '\0', '\0', '\0', '\0'};
516 for (i
= 0; (p
[i
] >= '0' && p
[i
] <= '7') && i
< 4; ++i
)
519 // We don't want to consume the last octal character since the main
520 // for loop will do this for us, so we advance p by one less than i
521 // (even if i is zero)
523 unsigned long octal_value
= ::strtoul(oct_str
, nullptr, 8);
524 if (octal_value
<= UINT8_MAX
) {
525 dst
.append(1, static_cast<char>(octal_value
));
531 // hex number in the format
532 if (isxdigit(p
[1])) {
535 // Make a string that can hold onto two hex chars plus a
537 char hex_str
[3] = {*p
, '\0', '\0'};
538 if (isxdigit(p
[1])) {
539 ++p
; // Skip the first of the two hex chars
543 unsigned long hex_value
= strtoul(hex_str
, nullptr, 16);
544 if (hex_value
<= UINT8_MAX
)
545 dst
.append(1, static_cast<char>(hex_value
));
552 // Just desensitize any other character by just printing what came
562 void Args::ExpandEscapedCharacters(const char *src
, std::string
&dst
) {
565 for (const char *p
= src
; *p
!= '\0'; ++p
) {
566 if (llvm::isPrint(*p
))
601 // Just encode as octal
604 snprintf(octal_str
, sizeof(octal_str
), "%o", *p
);
605 dst
.append(octal_str
);
613 std::string
Args::EscapeLLDBCommandArgument(const std::string
&arg
,
615 const char *chars_to_escape
= nullptr;
616 switch (quote_char
) {
618 chars_to_escape
= " \t\\'\"`";
621 chars_to_escape
= "$\"`\\";
627 assert(false && "Unhandled quote character");
632 res
.reserve(arg
.size());
634 if (::strchr(chars_to_escape
, c
))
641 OptionsWithRaw::OptionsWithRaw(llvm::StringRef arg_string
) {
642 SetFromString(arg_string
);
645 void OptionsWithRaw::SetFromString(llvm::StringRef arg_string
) {
646 const llvm::StringRef original_args
= arg_string
;
649 arg_string
= ltrimForArgs(arg_string
, shift
);
653 // If the string doesn't start with a dash, we just have no options and just
655 if (!arg_string
.starts_with("-")) {
656 m_suffix
= std::string(original_args
);
660 bool found_suffix
= false;
661 while (!arg_string
.empty()) {
662 // The length of the prefix before parsing.
663 std::size_t prev_prefix_length
= original_args
.size() - arg_string
.size();
665 // Parse the next argument from the remaining string.
666 std::tie(arg
, quote
, arg_string
) = ParseSingleArgument(arg_string
);
668 // If we get an unquoted '--' argument, then we reached the suffix part
670 Args::ArgEntry
entry(arg
, quote
, std::nullopt
);
671 if (!entry
.IsQuoted() && arg
== "--") {
672 // The remaining line is the raw suffix, and the line we parsed so far
673 // needs to be interpreted as arguments.
675 m_suffix
= std::string(arg_string
);
678 // The length of the prefix after parsing.
679 std::size_t prefix_length
= original_args
.size() - arg_string
.size();
681 // Take the string we know contains all the arguments and actually parse
682 // it as proper arguments.
683 llvm::StringRef prefix
= original_args
.take_front(prev_prefix_length
);
684 m_args
= Args(prefix
);
685 m_arg_string
= prefix
;
687 // We also record the part of the string that contains the arguments plus
689 m_arg_string_with_delimiter
= original_args
.take_front(prefix_length
);
691 // As the rest of the string became the raw suffix, we are done here.
695 arg_string
= ltrimForArgs(arg_string
, shift
);
698 // If we didn't find a suffix delimiter, the whole string is the raw suffix.
700 m_suffix
= std::string(original_args
);