1 //===-- Args.cpp ----------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "lldb/Utility/Args.h"
10 #include "lldb/Utility/FileSpec.h"
11 #include "lldb/Utility/Stream.h"
12 #include "lldb/Utility/StringList.h"
13 #include "llvm/ADT/StringSwitch.h"
16 using namespace lldb_private
;
18 // A helper function for argument parsing.
19 // Parses the initial part of the first argument using normal double quote
20 // rules: backslash escapes the double quote and itself. The parsed string is
21 // appended to the second argument. The function returns the unparsed portion
22 // of the string, starting at the closing quote.
23 static llvm::StringRef
ParseDoubleQuotes(llvm::StringRef quoted
,
24 std::string
&result
) {
25 // Inside double quotes, '\' and '"' are special.
26 static const char *k_escapable_characters
= "\"\\";
28 // Skip over regular characters and append them.
29 size_t regular
= quoted
.find_first_of(k_escapable_characters
);
30 result
+= quoted
.substr(0, regular
);
31 quoted
= quoted
.substr(regular
);
33 // If we have reached the end of string or the closing quote, we're done.
34 if (quoted
.empty() || quoted
.front() == '"')
37 // We have found a backslash.
38 quoted
= quoted
.drop_front();
41 // A lone backslash at the end of string, let's just append it.
46 // If the character after the backslash is not an allowed escapable
47 // character, we leave the character sequence untouched.
48 if (strchr(k_escapable_characters
, quoted
.front()) == nullptr)
51 result
+= quoted
.front();
52 quoted
= quoted
.drop_front();
58 static size_t ArgvToArgc(const char **argv
) {
67 // Trims all whitespace that can separate command line arguments from the left
68 // side of the string.
69 static llvm::StringRef
ltrimForArgs(llvm::StringRef str
) {
70 static const char *k_space_separators
= " \t";
71 return str
.ltrim(k_space_separators
);
74 // A helper function for SetCommandString. Parses a single argument from the
75 // command string, processing quotes and backslashes in a shell-like manner.
76 // The function returns a tuple consisting of the parsed argument, the quote
77 // char used, and the unparsed portion of the string starting at the first
78 // unqouted, unescaped whitespace character.
79 static std::tuple
<std::string
, char, llvm::StringRef
>
80 ParseSingleArgument(llvm::StringRef command
) {
81 // Argument can be split into multiple discontiguous pieces, for example:
83 // this would result in a single argument "Hello World" (without the quotes)
84 // since the quotes would be removed and there is not space between the
88 // Since we can have multiple quotes that form a single command in a command
89 // like: "Hello "world'!' (which will make a single argument "Hello world!")
90 // we remember the first quote character we encounter and use that for the
92 char first_quote_char
= '\0';
94 bool arg_complete
= false;
96 // Skip over regular characters and append them.
97 size_t regular
= command
.find_first_of(" \t\r\"'`\\");
98 arg
+= command
.substr(0, regular
);
99 command
= command
.substr(regular
);
104 char special
= command
.front();
105 command
= command
.drop_front();
108 if (command
.empty()) {
113 // If the character after the backslash is not an allowed escapable
114 // character, we leave the character sequence untouched.
115 if (strchr(" \t\\'\"`", command
.front()) == nullptr)
118 arg
+= command
.front();
119 command
= command
.drop_front();
126 // We are not inside any quotes, we just found a space after an argument.
134 // We found the start of a quote scope.
135 if (first_quote_char
== '\0')
136 first_quote_char
= special
;
139 command
= ParseDoubleQuotes(command
, arg
);
141 // For single quotes, we simply skip ahead to the matching quote
142 // character (or the end of the string).
143 size_t quoted
= command
.find(special
);
144 arg
+= command
.substr(0, quoted
);
145 command
= command
.substr(quoted
);
148 // If we found a closing quote, skip it.
149 if (!command
.empty())
150 command
= command
.drop_front();
154 } while (!arg_complete
);
156 return std::make_tuple(arg
, first_quote_char
, command
);
159 Args::ArgEntry::ArgEntry(llvm::StringRef str
, char quote
) : quote(quote
) {
160 size_t size
= str
.size();
161 ptr
.reset(new char[size
+ 1]);
163 ::memcpy(data(), str
.data() ? str
.data() : "", size
);
168 Args::Args(llvm::StringRef command
) { SetCommandString(command
); }
170 Args::Args(const Args
&rhs
) { *this = rhs
; }
172 Args::Args(const StringList
&list
) : Args() {
173 for (const std::string
&arg
: list
)
177 Args::Args(llvm::ArrayRef
<llvm::StringRef
> args
) : Args() {
178 for (llvm::StringRef arg
: args
)
182 Args
&Args::operator=(const Args
&rhs
) {
187 for (auto &entry
: rhs
.m_entries
) {
188 m_entries
.emplace_back(entry
.ref(), entry
.quote
);
189 m_argv
.push_back(m_entries
.back().data());
191 m_argv
.push_back(nullptr);
196 Args::~Args() = default;
198 void Args::Dump(Stream
&s
, const char *label_name
) const {
203 for (auto &entry
: m_entries
) {
205 s
.Format("{0}[{1}]=\"{2}\"\n", label_name
, i
++, entry
.ref());
207 s
.Format("{0}[{1}]=NULL\n", label_name
, i
);
211 bool Args::GetCommandString(std::string
&command
) const {
214 for (size_t i
= 0; i
< m_entries
.size(); ++i
) {
217 char quote
= m_entries
[i
].quote
;
220 command
+= m_entries
[i
].ref();
225 return !m_entries
.empty();
228 bool Args::GetQuotedCommandString(std::string
&command
) const {
231 for (size_t i
= 0; i
< m_entries
.size(); ++i
) {
235 if (m_entries
[i
].quote
) {
236 command
+= m_entries
[i
].quote
;
237 command
+= m_entries
[i
].ref();
238 command
+= m_entries
[i
].quote
;
240 command
+= m_entries
[i
].ref();
244 return !m_entries
.empty();
247 void Args::SetCommandString(llvm::StringRef command
) {
251 command
= ltrimForArgs(command
);
254 while (!command
.empty()) {
255 std::tie(arg
, quote
, command
) = ParseSingleArgument(command
);
256 m_entries
.emplace_back(arg
, quote
);
257 m_argv
.push_back(m_entries
.back().data());
258 command
= ltrimForArgs(command
);
260 m_argv
.push_back(nullptr);
263 const char *Args::GetArgumentAtIndex(size_t idx
) const {
264 if (idx
< m_argv
.size())
269 char **Args::GetArgumentVector() {
270 assert(!m_argv
.empty());
271 // TODO: functions like execve and posix_spawnp exhibit undefined behavior
272 // when argv or envp is null. So the code below is actually wrong. However,
273 // other code in LLDB depends on it being null. The code has been acting
274 // this way for some time, so it makes sense to leave it this way until
275 // someone has the time to come along and fix it.
276 return (m_argv
.size() > 1) ? m_argv
.data() : nullptr;
279 const char **Args::GetConstArgumentVector() const {
280 assert(!m_argv
.empty());
281 return (m_argv
.size() > 1) ? const_cast<const char **>(m_argv
.data())
286 // Don't pop the last NULL terminator from the argv array
287 if (m_entries
.empty())
289 m_argv
.erase(m_argv
.begin());
290 m_entries
.erase(m_entries
.begin());
293 void Args::Unshift(llvm::StringRef arg_str
, char quote_char
) {
294 InsertArgumentAtIndex(0, arg_str
, quote_char
);
297 void Args::AppendArguments(const Args
&rhs
) {
298 assert(m_argv
.size() == m_entries
.size() + 1);
299 assert(m_argv
.back() == nullptr);
301 for (auto &entry
: rhs
.m_entries
) {
302 m_entries
.emplace_back(entry
.ref(), entry
.quote
);
303 m_argv
.push_back(m_entries
.back().data());
305 m_argv
.push_back(nullptr);
308 void Args::AppendArguments(const char **argv
) {
309 size_t argc
= ArgvToArgc(argv
);
311 assert(m_argv
.size() == m_entries
.size() + 1);
312 assert(m_argv
.back() == nullptr);
314 for (auto arg
: llvm::ArrayRef(argv
, argc
)) {
315 m_entries
.emplace_back(arg
, '\0');
316 m_argv
.push_back(m_entries
.back().data());
319 m_argv
.push_back(nullptr);
322 void Args::AppendArgument(llvm::StringRef arg_str
, char quote_char
) {
323 InsertArgumentAtIndex(GetArgumentCount(), arg_str
, quote_char
);
326 void Args::InsertArgumentAtIndex(size_t idx
, llvm::StringRef arg_str
,
328 assert(m_argv
.size() == m_entries
.size() + 1);
329 assert(m_argv
.back() == nullptr);
331 if (idx
> m_entries
.size())
333 m_entries
.emplace(m_entries
.begin() + idx
, arg_str
, quote_char
);
334 m_argv
.insert(m_argv
.begin() + idx
, m_entries
[idx
].data());
337 void Args::ReplaceArgumentAtIndex(size_t idx
, llvm::StringRef arg_str
,
339 assert(m_argv
.size() == m_entries
.size() + 1);
340 assert(m_argv
.back() == nullptr);
342 if (idx
>= m_entries
.size())
345 m_entries
[idx
] = ArgEntry(arg_str
, quote_char
);
346 m_argv
[idx
] = m_entries
[idx
].data();
349 void Args::DeleteArgumentAtIndex(size_t idx
) {
350 if (idx
>= m_entries
.size())
353 m_argv
.erase(m_argv
.begin() + idx
);
354 m_entries
.erase(m_entries
.begin() + idx
);
357 void Args::SetArguments(size_t argc
, const char **argv
) {
360 auto args
= llvm::ArrayRef(argv
, argc
);
361 m_entries
.resize(argc
);
362 m_argv
.resize(argc
+ 1);
363 for (size_t i
= 0; i
< args
.size(); ++i
) {
365 ((args
[i
][0] == '\'') || (args
[i
][0] == '"') || (args
[i
][0] == '`'))
369 m_entries
[i
] = ArgEntry(args
[i
], quote
);
370 m_argv
[i
] = m_entries
[i
].data();
374 void Args::SetArguments(const char **argv
) {
375 SetArguments(ArgvToArgc(argv
), argv
);
381 m_argv
.push_back(nullptr);
384 std::string
Args::GetShellSafeArgument(const FileSpec
&shell
,
385 llvm::StringRef unsafe_arg
) {
386 struct ShellDescriptor
{
387 llvm::StringRef m_basename
;
388 llvm::StringRef m_escapables
;
391 static ShellDescriptor g_Shells
[] = {{"bash", " '\"<>()&;"},
392 {"fish", " '\"<>()&\\|;"},
393 {"tcsh", " '\"<>()&;"},
394 {"zsh", " '\"<>()&;\\|"},
395 {"sh", " '\"<>()&;"}};
398 llvm::StringRef escapables
= " '\"";
400 auto basename
= shell
.GetFilename().GetStringRef();
401 if (!basename
.empty()) {
402 for (const auto &Shell
: g_Shells
) {
403 if (Shell
.m_basename
== basename
) {
404 escapables
= Shell
.m_escapables
;
410 std::string safe_arg
;
411 safe_arg
.reserve(unsafe_arg
.size());
412 // Add a \ before every character that needs to be escaped.
413 for (char c
: unsafe_arg
) {
414 if (escapables
.contains(c
))
415 safe_arg
.push_back('\\');
416 safe_arg
.push_back(c
);
421 lldb::Encoding
Args::StringToEncoding(llvm::StringRef s
,
422 lldb::Encoding fail_value
) {
423 return llvm::StringSwitch
<lldb::Encoding
>(s
)
424 .Case("uint", eEncodingUint
)
425 .Case("sint", eEncodingSint
)
426 .Case("ieee754", eEncodingIEEE754
)
427 .Case("vector", eEncodingVector
)
428 .Default(fail_value
);
431 uint32_t Args::StringToGenericRegister(llvm::StringRef s
) {
433 return LLDB_INVALID_REGNUM
;
434 uint32_t result
= llvm::StringSwitch
<uint32_t>(s
)
435 .Case("pc", LLDB_REGNUM_GENERIC_PC
)
436 .Case("sp", LLDB_REGNUM_GENERIC_SP
)
437 .Case("fp", LLDB_REGNUM_GENERIC_FP
)
438 .Cases("ra", "lr", LLDB_REGNUM_GENERIC_RA
)
439 .Case("flags", LLDB_REGNUM_GENERIC_FLAGS
)
440 .Case("arg1", LLDB_REGNUM_GENERIC_ARG1
)
441 .Case("arg2", LLDB_REGNUM_GENERIC_ARG2
)
442 .Case("arg3", LLDB_REGNUM_GENERIC_ARG3
)
443 .Case("arg4", LLDB_REGNUM_GENERIC_ARG4
)
444 .Case("arg5", LLDB_REGNUM_GENERIC_ARG5
)
445 .Case("arg6", LLDB_REGNUM_GENERIC_ARG6
)
446 .Case("arg7", LLDB_REGNUM_GENERIC_ARG7
)
447 .Case("arg8", LLDB_REGNUM_GENERIC_ARG8
)
448 .Case("tp", LLDB_REGNUM_GENERIC_TP
)
449 .Default(LLDB_INVALID_REGNUM
);
453 void Args::EncodeEscapeSequences(const char *src
, std::string
&dst
) {
456 for (const char *p
= src
; *p
!= '\0'; ++p
) {
457 size_t non_special_chars
= ::strcspn(p
, "\\");
458 if (non_special_chars
> 0) {
459 dst
.append(p
, non_special_chars
);
460 p
+= non_special_chars
;
466 ++p
; // skip the slash
499 // 1 to 3 octal chars
501 // Make a string that can hold onto the initial zero char, up to 3
502 // octal digits, and a terminating NULL.
503 char oct_str
[5] = {'\0', '\0', '\0', '\0', '\0'};
506 for (i
= 0; (p
[i
] >= '0' && p
[i
] <= '7') && i
< 4; ++i
)
509 // We don't want to consume the last octal character since the main
510 // for loop will do this for us, so we advance p by one less than i
511 // (even if i is zero)
513 unsigned long octal_value
= ::strtoul(oct_str
, nullptr, 8);
514 if (octal_value
<= UINT8_MAX
) {
515 dst
.append(1, static_cast<char>(octal_value
));
521 // hex number in the format
522 if (isxdigit(p
[1])) {
525 // Make a string that can hold onto two hex chars plus a
527 char hex_str
[3] = {*p
, '\0', '\0'};
528 if (isxdigit(p
[1])) {
529 ++p
; // Skip the first of the two hex chars
533 unsigned long hex_value
= strtoul(hex_str
, nullptr, 16);
534 if (hex_value
<= UINT8_MAX
)
535 dst
.append(1, static_cast<char>(hex_value
));
542 // Just desensitize any other character by just printing what came
552 void Args::ExpandEscapedCharacters(const char *src
, std::string
&dst
) {
555 for (const char *p
= src
; *p
!= '\0'; ++p
) {
556 if (llvm::isPrint(*p
))
591 // Just encode as octal
594 snprintf(octal_str
, sizeof(octal_str
), "%o", *p
);
595 dst
.append(octal_str
);
603 std::string
Args::EscapeLLDBCommandArgument(const std::string
&arg
,
605 const char *chars_to_escape
= nullptr;
606 switch (quote_char
) {
608 chars_to_escape
= " \t\\'\"`";
611 chars_to_escape
= "$\"`\\";
617 assert(false && "Unhandled quote character");
622 res
.reserve(arg
.size());
624 if (::strchr(chars_to_escape
, c
))
631 OptionsWithRaw::OptionsWithRaw(llvm::StringRef arg_string
) {
632 SetFromString(arg_string
);
635 void OptionsWithRaw::SetFromString(llvm::StringRef arg_string
) {
636 const llvm::StringRef original_args
= arg_string
;
638 arg_string
= ltrimForArgs(arg_string
);
642 // If the string doesn't start with a dash, we just have no options and just
644 if (!arg_string
.startswith("-")) {
645 m_suffix
= std::string(original_args
);
649 bool found_suffix
= false;
650 while (!arg_string
.empty()) {
651 // The length of the prefix before parsing.
652 std::size_t prev_prefix_length
= original_args
.size() - arg_string
.size();
654 // Parse the next argument from the remaining string.
655 std::tie(arg
, quote
, arg_string
) = ParseSingleArgument(arg_string
);
657 // If we get an unquoted '--' argument, then we reached the suffix part
659 Args::ArgEntry
entry(arg
, quote
);
660 if (!entry
.IsQuoted() && arg
== "--") {
661 // The remaining line is the raw suffix, and the line we parsed so far
662 // needs to be interpreted as arguments.
664 m_suffix
= std::string(arg_string
);
667 // The length of the prefix after parsing.
668 std::size_t prefix_length
= original_args
.size() - arg_string
.size();
670 // Take the string we know contains all the arguments and actually parse
671 // it as proper arguments.
672 llvm::StringRef prefix
= original_args
.take_front(prev_prefix_length
);
673 m_args
= Args(prefix
);
674 m_arg_string
= prefix
;
676 // We also record the part of the string that contains the arguments plus
678 m_arg_string_with_delimiter
= original_args
.take_front(prefix_length
);
680 // As the rest of the string became the raw suffix, we are done here.
684 arg_string
= ltrimForArgs(arg_string
);
687 // If we didn't find a suffix delimiter, the whole string is the raw suffix.
689 m_suffix
= std::string(original_args
);