1 //===-- StringPrinter.cpp ----------------------------------------*- C++
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
8 //===----------------------------------------------------------------------===//
10 #include "lldb/DataFormatters/StringPrinter.h"
12 #include "lldb/Core/Debugger.h"
13 #include "lldb/Core/ValueObject.h"
14 #include "lldb/Target/Language.h"
15 #include "lldb/Target/Process.h"
16 #include "lldb/Target/Target.h"
17 #include "lldb/Utility/Status.h"
19 #include "llvm/Support/ConvertUTF.h"
26 using namespace lldb_private
;
27 using namespace lldb_private::formatters
;
29 // we define this for all values of type but only implement it for those we
30 // care about that's good because we get linker errors for any unsupported type
31 template <lldb_private::formatters::StringPrinter::StringElementType type
>
32 static StringPrinter::StringPrinterBufferPointer
<>
33 GetPrintableImpl(uint8_t *buffer
, uint8_t *buffer_end
, uint8_t *&next
);
35 // mimic isprint() for Unicode codepoints
36 static bool isprint(char32_t codepoint
) {
37 if (codepoint
<= 0x1F || codepoint
== 0x7F) // C0
41 if (codepoint
>= 0x80 && codepoint
<= 0x9F) // C1
45 if (codepoint
== 0x2028 || codepoint
== 0x2029) // line/paragraph separators
49 if (codepoint
== 0x200E || codepoint
== 0x200F ||
50 (codepoint
>= 0x202A &&
51 codepoint
<= 0x202E)) // bidirectional text control
55 if (codepoint
>= 0xFFF9 &&
56 codepoint
<= 0xFFFF) // interlinears and generally specials
64 StringPrinter::StringPrinterBufferPointer
<>
65 GetPrintableImpl
<StringPrinter::StringElementType::ASCII
>(uint8_t *buffer
,
68 StringPrinter::StringPrinterBufferPointer
<> retval
= {nullptr};
102 if (isprint(*buffer
))
103 retval
= {buffer
, 1};
105 uint8_t *data
= new uint8_t[5];
106 sprintf((char *)data
, "\\x%02x", *buffer
);
107 retval
= {data
, 4, [](const uint8_t *c
) { delete[] c
; }};
116 static char32_t
ConvertUTF8ToCodePoint(unsigned char c0
, unsigned char c1
) {
117 return (c0
- 192) * 64 + (c1
- 128);
119 static char32_t
ConvertUTF8ToCodePoint(unsigned char c0
, unsigned char c1
,
121 return (c0
- 224) * 4096 + (c1
- 128) * 64 + (c2
- 128);
123 static char32_t
ConvertUTF8ToCodePoint(unsigned char c0
, unsigned char c1
,
124 unsigned char c2
, unsigned char c3
) {
125 return (c0
- 240) * 262144 + (c2
- 128) * 4096 + (c2
- 128) * 64 + (c3
- 128);
129 StringPrinter::StringPrinterBufferPointer
<>
130 GetPrintableImpl
<StringPrinter::StringElementType::UTF8
>(uint8_t *buffer
,
133 StringPrinter::StringPrinterBufferPointer
<> retval
{nullptr};
135 unsigned utf8_encoded_len
= llvm::getNumBytesForUTF8(*buffer
);
137 if (1u + std::distance(buffer
, buffer_end
) < utf8_encoded_len
) {
138 // I don't have enough bytes - print whatever I have left
139 retval
= {buffer
, static_cast<size_t>(1 + buffer_end
- buffer
)};
140 next
= buffer_end
+ 1;
144 char32_t codepoint
= 0;
145 switch (utf8_encoded_len
) {
147 // this is just an ASCII byte - ask ASCII
148 return GetPrintableImpl
<StringPrinter::StringElementType::ASCII
>(
149 buffer
, buffer_end
, next
);
151 codepoint
= ConvertUTF8ToCodePoint((unsigned char)*buffer
,
152 (unsigned char)*(buffer
+ 1));
155 codepoint
= ConvertUTF8ToCodePoint((unsigned char)*buffer
,
156 (unsigned char)*(buffer
+ 1),
157 (unsigned char)*(buffer
+ 2));
160 codepoint
= ConvertUTF8ToCodePoint(
161 (unsigned char)*buffer
, (unsigned char)*(buffer
+ 1),
162 (unsigned char)*(buffer
+ 2), (unsigned char)*(buffer
+ 3));
165 // this is probably some bogus non-character thing just print it as-is and
166 // hope to sync up again soon
167 retval
= {buffer
, 1};
199 retval
= {"\\\"", 2};
202 retval
= {"\\\\", 2};
205 if (isprint(codepoint
))
206 retval
= {buffer
, utf8_encoded_len
};
208 uint8_t *data
= new uint8_t[11];
209 sprintf((char *)data
, "\\U%08x", (unsigned)codepoint
);
210 retval
= {data
, 10, [](const uint8_t *c
) { delete[] c
; }};
215 next
= buffer
+ utf8_encoded_len
;
219 // this should not happen - but just in case.. try to resync at some point
220 retval
= {buffer
, 1};
225 // Given a sequence of bytes, this function returns: a sequence of bytes to
226 // actually print out + a length the following unscanned position of the buffer
228 static StringPrinter::StringPrinterBufferPointer
<>
229 GetPrintable(StringPrinter::StringElementType type
, uint8_t *buffer
,
230 uint8_t *buffer_end
, uint8_t *&next
) {
235 case StringPrinter::StringElementType::ASCII
:
236 return GetPrintableImpl
<StringPrinter::StringElementType::ASCII
>(
237 buffer
, buffer_end
, next
);
238 case StringPrinter::StringElementType::UTF8
:
239 return GetPrintableImpl
<StringPrinter::StringElementType::UTF8
>(
240 buffer
, buffer_end
, next
);
246 StringPrinter::EscapingHelper
247 StringPrinter::GetDefaultEscapingHelper(GetPrintableElementType elem_type
) {
249 case GetPrintableElementType::UTF8
:
250 return [](uint8_t *buffer
, uint8_t *buffer_end
,
251 uint8_t *&next
) -> StringPrinter::StringPrinterBufferPointer
<> {
252 return GetPrintable(StringPrinter::StringElementType::UTF8
, buffer
,
255 case GetPrintableElementType::ASCII
:
256 return [](uint8_t *buffer
, uint8_t *buffer_end
,
257 uint8_t *&next
) -> StringPrinter::StringPrinterBufferPointer
<> {
258 return GetPrintable(StringPrinter::StringElementType::ASCII
, buffer
,
262 llvm_unreachable("bad element type");
265 // use this call if you already have an LLDB-side buffer for the data
266 template <typename SourceDataType
>
267 static bool DumpUTFBufferToStream(
268 llvm::ConversionResult (*ConvertFunction
)(const SourceDataType
**,
269 const SourceDataType
*,
270 llvm::UTF8
**, llvm::UTF8
*,
271 llvm::ConversionFlags
),
272 const StringPrinter::ReadBufferAndDumpToStreamOptions
&dump_options
) {
273 Stream
&stream(*dump_options
.GetStream());
274 if (dump_options
.GetPrefixToken() != nullptr)
275 stream
.Printf("%s", dump_options
.GetPrefixToken());
276 if (dump_options
.GetQuote() != 0)
277 stream
.Printf("%c", dump_options
.GetQuote());
278 auto data(dump_options
.GetData());
279 auto source_size(dump_options
.GetSourceSize());
280 if (data
.GetByteSize() && data
.GetDataStart() && data
.GetDataEnd()) {
281 const int bufferSPSize
= data
.GetByteSize();
282 if (dump_options
.GetSourceSize() == 0) {
283 const int origin_encoding
= 8 * sizeof(SourceDataType
);
284 source_size
= bufferSPSize
/ (origin_encoding
/ 4);
287 const SourceDataType
*data_ptr
=
288 (const SourceDataType
*)data
.GetDataStart();
289 const SourceDataType
*data_end_ptr
= data_ptr
+ source_size
;
291 const bool zero_is_terminator
= dump_options
.GetBinaryZeroIsTerminator();
293 if (zero_is_terminator
) {
294 while (data_ptr
< data_end_ptr
) {
296 data_end_ptr
= data_ptr
;
302 data_ptr
= (const SourceDataType
*)data
.GetDataStart();
305 lldb::DataBufferSP utf8_data_buffer_sp
;
306 llvm::UTF8
*utf8_data_ptr
= nullptr;
307 llvm::UTF8
*utf8_data_end_ptr
= nullptr;
309 if (ConvertFunction
) {
310 utf8_data_buffer_sp
=
311 std::make_shared
<DataBufferHeap
>(4 * bufferSPSize
, 0);
312 utf8_data_ptr
= (llvm::UTF8
*)utf8_data_buffer_sp
->GetBytes();
313 utf8_data_end_ptr
= utf8_data_ptr
+ utf8_data_buffer_sp
->GetByteSize();
314 ConvertFunction(&data_ptr
, data_end_ptr
, &utf8_data_ptr
,
315 utf8_data_end_ptr
, llvm::lenientConversion
);
316 if (!zero_is_terminator
)
317 utf8_data_end_ptr
= utf8_data_ptr
;
318 // needed because the ConvertFunction will change the value of the
321 (llvm::UTF8
*)utf8_data_buffer_sp
->GetBytes();
323 // just copy the pointers - the cast is necessary to make the compiler
324 // happy but this should only happen if we are reading UTF8 data
325 utf8_data_ptr
= const_cast<llvm::UTF8
*>(
326 reinterpret_cast<const llvm::UTF8
*>(data_ptr
));
327 utf8_data_end_ptr
= const_cast<llvm::UTF8
*>(
328 reinterpret_cast<const llvm::UTF8
*>(data_end_ptr
));
331 const bool escape_non_printables
= dump_options
.GetEscapeNonPrintables();
332 lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback
;
333 if (escape_non_printables
) {
334 if (Language
*language
= Language::FindPlugin(dump_options
.GetLanguage()))
335 escaping_callback
= language
->GetStringPrinterEscapingHelper(
336 lldb_private::formatters::StringPrinter::GetPrintableElementType::
340 lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(
341 lldb_private::formatters::StringPrinter::
342 GetPrintableElementType::UTF8
);
345 // since we tend to accept partial data (and even partially malformed data)
346 // we might end up with no NULL terminator before the end_ptr hence we need
347 // to take a slower route and ensure we stay within boundaries
348 for (; utf8_data_ptr
< utf8_data_end_ptr
;) {
349 if (zero_is_terminator
&& !*utf8_data_ptr
)
352 if (escape_non_printables
) {
353 uint8_t *next_data
= nullptr;
355 escaping_callback(utf8_data_ptr
, utf8_data_end_ptr
, next_data
);
356 auto printable_bytes
= printable
.GetBytes();
357 auto printable_size
= printable
.GetSize();
358 if (!printable_bytes
|| !next_data
) {
359 // GetPrintable() failed on us - print one byte in a desperate resync
361 printable_bytes
= utf8_data_ptr
;
363 next_data
= utf8_data_ptr
+ 1;
365 for (unsigned c
= 0; c
< printable_size
; c
++)
366 stream
.Printf("%c", *(printable_bytes
+ c
));
367 utf8_data_ptr
= (uint8_t *)next_data
;
369 stream
.Printf("%c", *utf8_data_ptr
);
374 if (dump_options
.GetQuote() != 0)
375 stream
.Printf("%c", dump_options
.GetQuote());
376 if (dump_options
.GetSuffixToken() != nullptr)
377 stream
.Printf("%s", dump_options
.GetSuffixToken());
378 if (dump_options
.GetIsTruncated())
379 stream
.Printf("...");
383 lldb_private::formatters::StringPrinter::ReadStringAndDumpToStreamOptions::
384 ReadStringAndDumpToStreamOptions(ValueObject
&valobj
)
385 : ReadStringAndDumpToStreamOptions() {
386 SetEscapeNonPrintables(
387 valobj
.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
390 lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::
391 ReadBufferAndDumpToStreamOptions(ValueObject
&valobj
)
392 : ReadBufferAndDumpToStreamOptions() {
393 SetEscapeNonPrintables(
394 valobj
.GetTargetSP()->GetDebugger().GetEscapeNonPrintables());
397 lldb_private::formatters::StringPrinter::ReadBufferAndDumpToStreamOptions::
398 ReadBufferAndDumpToStreamOptions(
399 const ReadStringAndDumpToStreamOptions
&options
)
400 : ReadBufferAndDumpToStreamOptions() {
401 SetStream(options
.GetStream());
402 SetPrefixToken(options
.GetPrefixToken());
403 SetSuffixToken(options
.GetSuffixToken());
404 SetQuote(options
.GetQuote());
405 SetEscapeNonPrintables(options
.GetEscapeNonPrintables());
406 SetBinaryZeroIsTerminator(options
.GetBinaryZeroIsTerminator());
407 SetLanguage(options
.GetLanguage());
410 namespace lldb_private
{
412 namespace formatters
{
415 bool StringPrinter::ReadStringAndDumpToStream
<
416 StringPrinter::StringElementType::ASCII
>(
417 const ReadStringAndDumpToStreamOptions
&options
) {
418 assert(options
.GetStream() && "need a Stream to print the string to");
421 ProcessSP
process_sp(options
.GetProcessSP());
423 if (process_sp
.get() == nullptr || options
.GetLocation() == 0)
427 const auto max_size
= process_sp
->GetTarget().GetMaximumSizeOfStringSummary();
428 bool is_truncated
= false;
430 if (options
.GetSourceSize() == 0)
432 else if (!options
.GetIgnoreMaxLength()) {
433 size
= options
.GetSourceSize();
434 if (size
> max_size
) {
439 size
= options
.GetSourceSize();
441 lldb::DataBufferSP
buffer_sp(new DataBufferHeap(size
, 0));
443 process_sp
->ReadCStringFromMemory(
444 options
.GetLocation(), (char *)buffer_sp
->GetBytes(), size
, my_error
);
449 const char *prefix_token
= options
.GetPrefixToken();
450 char quote
= options
.GetQuote();
452 if (prefix_token
!= nullptr)
453 options
.GetStream()->Printf("%s%c", prefix_token
, quote
);
455 options
.GetStream()->Printf("%c", quote
);
457 uint8_t *data_end
= buffer_sp
->GetBytes() + buffer_sp
->GetByteSize();
459 const bool escape_non_printables
= options
.GetEscapeNonPrintables();
460 lldb_private::formatters::StringPrinter::EscapingHelper escaping_callback
;
461 if (escape_non_printables
) {
462 if (Language
*language
= Language::FindPlugin(options
.GetLanguage()))
463 escaping_callback
= language
->GetStringPrinterEscapingHelper(
464 lldb_private::formatters::StringPrinter::GetPrintableElementType::
468 lldb_private::formatters::StringPrinter::GetDefaultEscapingHelper(
469 lldb_private::formatters::StringPrinter::GetPrintableElementType::
473 // since we tend to accept partial data (and even partially malformed data)
474 // we might end up with no NULL terminator before the end_ptr hence we need
475 // to take a slower route and ensure we stay within boundaries
476 for (uint8_t *data
= buffer_sp
->GetBytes(); *data
&& (data
< data_end
);) {
477 if (escape_non_printables
) {
478 uint8_t *next_data
= nullptr;
479 auto printable
= escaping_callback(data
, data_end
, next_data
);
480 auto printable_bytes
= printable
.GetBytes();
481 auto printable_size
= printable
.GetSize();
482 if (!printable_bytes
|| !next_data
) {
483 // GetPrintable() failed on us - print one byte in a desperate resync
485 printable_bytes
= data
;
487 next_data
= data
+ 1;
489 for (unsigned c
= 0; c
< printable_size
; c
++)
490 options
.GetStream()->Printf("%c", *(printable_bytes
+ c
));
491 data
= (uint8_t *)next_data
;
493 options
.GetStream()->Printf("%c", *data
);
498 const char *suffix_token
= options
.GetSuffixToken();
500 if (suffix_token
!= nullptr)
501 options
.GetStream()->Printf("%c%s", quote
, suffix_token
);
503 options
.GetStream()->Printf("%c", quote
);
506 options
.GetStream()->Printf("...");
511 template <typename SourceDataType
>
512 static bool ReadUTFBufferAndDumpToStream(
513 const StringPrinter::ReadStringAndDumpToStreamOptions
&options
,
514 llvm::ConversionResult (*ConvertFunction
)(const SourceDataType
**,
515 const SourceDataType
*,
516 llvm::UTF8
**, llvm::UTF8
*,
517 llvm::ConversionFlags
)) {
518 assert(options
.GetStream() && "need a Stream to print the string to");
520 if (options
.GetLocation() == 0 ||
521 options
.GetLocation() == LLDB_INVALID_ADDRESS
)
524 lldb::ProcessSP
process_sp(options
.GetProcessSP());
529 const int type_width
= sizeof(SourceDataType
);
530 const int origin_encoding
= 8 * type_width
;
531 if (origin_encoding
!= 8 && origin_encoding
!= 16 && origin_encoding
!= 32)
533 // if not UTF8, I need a conversion function to return proper UTF8
534 if (origin_encoding
!= 8 && !ConvertFunction
)
537 if (!options
.GetStream())
540 uint32_t sourceSize
= options
.GetSourceSize();
541 bool needs_zero_terminator
= options
.GetNeedsZeroTermination();
543 bool is_truncated
= false;
544 const auto max_size
= process_sp
->GetTarget().GetMaximumSizeOfStringSummary();
547 sourceSize
= max_size
;
548 needs_zero_terminator
= true;
549 } else if (!options
.GetIgnoreMaxLength()) {
550 if (sourceSize
> max_size
) {
551 sourceSize
= max_size
;
556 const int bufferSPSize
= sourceSize
* type_width
;
558 lldb::DataBufferSP
buffer_sp(new DataBufferHeap(bufferSPSize
, 0));
560 if (!buffer_sp
->GetBytes())
564 char *buffer
= reinterpret_cast<char *>(buffer_sp
->GetBytes());
566 if (needs_zero_terminator
)
567 process_sp
->ReadStringFromMemory(options
.GetLocation(), buffer
,
568 bufferSPSize
, error
, type_width
);
570 process_sp
->ReadMemoryFromInferior(options
.GetLocation(),
571 (char *)buffer_sp
->GetBytes(),
572 bufferSPSize
, error
);
575 options
.GetStream()->Printf("unable to read data");
579 DataExtractor
data(buffer_sp
, process_sp
->GetByteOrder(),
580 process_sp
->GetAddressByteSize());
582 StringPrinter::ReadBufferAndDumpToStreamOptions
dump_options(options
);
583 dump_options
.SetData(data
);
584 dump_options
.SetSourceSize(sourceSize
);
585 dump_options
.SetIsTruncated(is_truncated
);
587 return DumpUTFBufferToStream(ConvertFunction
, dump_options
);
591 bool StringPrinter::ReadStringAndDumpToStream
<
592 StringPrinter::StringElementType::UTF8
>(
593 const ReadStringAndDumpToStreamOptions
&options
) {
594 return ReadUTFBufferAndDumpToStream
<llvm::UTF8
>(options
, nullptr);
598 bool StringPrinter::ReadStringAndDumpToStream
<
599 StringPrinter::StringElementType::UTF16
>(
600 const ReadStringAndDumpToStreamOptions
&options
) {
601 return ReadUTFBufferAndDumpToStream
<llvm::UTF16
>(options
,
602 llvm::ConvertUTF16toUTF8
);
606 bool StringPrinter::ReadStringAndDumpToStream
<
607 StringPrinter::StringElementType::UTF32
>(
608 const ReadStringAndDumpToStreamOptions
&options
) {
609 return ReadUTFBufferAndDumpToStream
<llvm::UTF32
>(options
,
610 llvm::ConvertUTF32toUTF8
);
614 bool StringPrinter::ReadBufferAndDumpToStream
<
615 StringPrinter::StringElementType::UTF8
>(
616 const ReadBufferAndDumpToStreamOptions
&options
) {
617 assert(options
.GetStream() && "need a Stream to print the string to");
619 return DumpUTFBufferToStream
<llvm::UTF8
>(nullptr, options
);
623 bool StringPrinter::ReadBufferAndDumpToStream
<
624 StringPrinter::StringElementType::ASCII
>(
625 const ReadBufferAndDumpToStreamOptions
&options
) {
626 // treat ASCII the same as UTF8
627 // FIXME: can we optimize ASCII some more?
628 return ReadBufferAndDumpToStream
<StringElementType::UTF8
>(options
);
632 bool StringPrinter::ReadBufferAndDumpToStream
<
633 StringPrinter::StringElementType::UTF16
>(
634 const ReadBufferAndDumpToStreamOptions
&options
) {
635 assert(options
.GetStream() && "need a Stream to print the string to");
637 return DumpUTFBufferToStream(llvm::ConvertUTF16toUTF8
, options
);
641 bool StringPrinter::ReadBufferAndDumpToStream
<
642 StringPrinter::StringElementType::UTF32
>(
643 const ReadBufferAndDumpToStreamOptions
&options
) {
644 assert(options
.GetStream() && "need a Stream to print the string to");
646 return DumpUTFBufferToStream(llvm::ConvertUTF32toUTF8
, options
);
649 } // namespace formatters
651 } // namespace lldb_private