1 /* General utility routines for GDB/Python.
3 Copyright (C) 2008-2024 Free Software Foundation, Inc.
5 This file is part of GDB.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
23 #include "python-internal.h"
25 /* Converts a Python 8-bit string to a unicode string object. Assumes the
26 8-bit string is in the host charset. If an error occurs during conversion,
27 returns NULL with a python exception set.
29 As an added bonus, the functions accepts a unicode string and returns it
30 right away, so callers don't need to check which kind of string they've
31 got. In Python 3, all strings are Unicode so this case is always the
34 If the given object is not one of the mentioned string types, NULL is
35 returned, with the TypeError python exception set. */
37 python_string_to_unicode (PyObject
*obj
)
39 PyObject
*unicode_str
;
41 /* If obj is already a unicode string, just return it.
42 I wish life was always that simple... */
43 if (PyUnicode_Check (obj
))
50 PyErr_SetString (PyExc_TypeError
,
51 _("Expected a string object."));
55 return gdbpy_ref
<> (unicode_str
);
58 /* Returns a newly allocated string with the contents of the given unicode
59 string object converted to CHARSET. If an error occurs during the
60 conversion, NULL will be returned and a python exception will be
62 static gdb::unique_xmalloc_ptr
<char>
63 unicode_to_encoded_string (PyObject
*unicode_str
, const char *charset
)
65 /* Translate string to named charset. */
66 gdbpy_ref
<> string (PyUnicode_AsEncodedString (unicode_str
, charset
, NULL
));
70 return gdb::unique_xmalloc_ptr
<char>
71 (xstrdup (PyBytes_AsString (string
.get ())));
74 /* Returns a PyObject with the contents of the given unicode string
75 object converted to a named charset. If an error occurs during
76 the conversion, NULL will be returned and a python exception will
79 unicode_to_encoded_python_string (PyObject
*unicode_str
, const char *charset
)
81 /* Translate string to named charset. */
82 return gdbpy_ref
<> (PyUnicode_AsEncodedString (unicode_str
, charset
, NULL
));
85 /* Returns a newly allocated string with the contents of the given
86 unicode string object converted to the target's charset. If an
87 error occurs during the conversion, NULL will be returned and a
88 python exception will be set. */
89 gdb::unique_xmalloc_ptr
<char>
90 unicode_to_target_string (PyObject
*unicode_str
)
92 return (unicode_to_encoded_string
94 target_charset (gdbpy_enter::get_gdbarch ())));
97 /* Returns a PyObject with the contents of the given unicode string
98 object converted to the target's charset. If an error occurs
99 during the conversion, NULL will be returned and a python exception
102 unicode_to_target_python_string (PyObject
*unicode_str
)
104 return (unicode_to_encoded_python_string
106 target_charset (gdbpy_enter::get_gdbarch ())));
109 /* Converts a python string (8-bit or unicode) to a target string in
110 the target's charset. Returns NULL on error, with a python
112 gdb::unique_xmalloc_ptr
<char>
113 python_string_to_target_string (PyObject
*obj
)
115 gdbpy_ref
<> str
= python_string_to_unicode (obj
);
119 return unicode_to_target_string (str
.get ());
122 /* Converts a python string (8-bit or unicode) to a target string in the
123 target's charset. Returns NULL on error, with a python exception
126 In Python 3, the returned object is a "bytes" object (not a string). */
128 python_string_to_target_python_string (PyObject
*obj
)
130 gdbpy_ref
<> str
= python_string_to_unicode (obj
);
134 return unicode_to_target_python_string (str
.get ());
137 /* Converts a python string (8-bit or unicode) to a target string in
138 the host's charset. Returns NULL on error, with a python exception
140 gdb::unique_xmalloc_ptr
<char>
141 python_string_to_host_string (PyObject
*obj
)
143 gdbpy_ref
<> str
= python_string_to_unicode (obj
);
147 return unicode_to_encoded_string (str
.get (), host_charset ());
150 /* Convert a host string to a python string. */
153 host_string_to_python_string (const char *str
)
155 return gdbpy_ref
<> (PyUnicode_Decode (str
, strlen (str
), host_charset (),
159 /* Return true if OBJ is a Python string or unicode object, false
163 gdbpy_is_string (PyObject
*obj
)
165 return PyUnicode_Check (obj
);
168 /* Return the string representation of OBJ, i.e., str (obj).
169 If the result is NULL a python error occurred, the caller must clear it. */
171 gdb::unique_xmalloc_ptr
<char>
172 gdbpy_obj_to_string (PyObject
*obj
)
174 gdbpy_ref
<> str_obj (PyObject_Str (obj
));
177 return python_string_to_host_string (str_obj
.get ());
182 /* See python-internal.h. */
184 gdb::unique_xmalloc_ptr
<char>
185 gdbpy_err_fetch::to_string () const
187 /* There are a few cases to consider.
189 value is a string when PyErr_SetString is used.
190 value is not a string when raise "foo" is used, instead it is None
192 So the algorithm we use is to print `str (value)' if it's not
193 None, otherwise we print `str (type)'.
194 Using str (aka PyObject_Str) will fetch the error message from
195 gdb.GdbError ("message"). */
197 gdbpy_ref
<> value
= this->value ();
198 if (value
.get () != nullptr && value
.get () != Py_None
)
199 return gdbpy_obj_to_string (value
.get ());
201 return gdbpy_obj_to_string (this->type ().get ());
204 /* See python-internal.h. */
206 gdb::unique_xmalloc_ptr
<char>
207 gdbpy_err_fetch::type_to_string () const
209 return gdbpy_obj_to_string (this->type ().get ());
212 /* Convert a GDB exception to the appropriate Python exception.
214 This sets the Python error indicator. */
217 gdbpy_convert_exception (const struct gdb_exception
&exception
)
221 if (exception
.reason
== RETURN_QUIT
)
222 exc_class
= PyExc_KeyboardInterrupt
;
223 else if (exception
.reason
== RETURN_FORCED_QUIT
)
224 quit_force (NULL
, 0);
225 else if (exception
.error
== MEMORY_ERROR
)
226 exc_class
= gdbpy_gdb_memory_error
;
228 exc_class
= gdbpy_gdb_error
;
230 PyErr_Format (exc_class
, "%s", exception
.what ());
233 /* Converts OBJ to a CORE_ADDR value.
235 Returns 0 on success or -1 on failure, with a Python exception set.
239 get_addr_from_python (PyObject
*obj
, CORE_ADDR
*addr
)
241 if (gdbpy_is_value_object (obj
))
246 *addr
= value_as_address (value_object_to_value (obj
));
248 catch (const gdb_exception
&except
)
250 return gdbpy_handle_gdb_exception (-1, except
);
255 gdbpy_ref
<> num (PyNumber_Long (obj
));
261 val
= gdb_py_long_as_ulongest (num
.get ());
262 if (PyErr_Occurred ())
265 if (sizeof (val
) > sizeof (CORE_ADDR
) && ((CORE_ADDR
) val
) != val
)
267 PyErr_SetString (PyExc_ValueError
,
268 _("Overflow converting to address."));
278 /* Convert a LONGEST to the appropriate Python object -- either an
279 integer object or a long object, depending on its value. */
282 gdb_py_object_from_longest (LONGEST l
)
284 if (sizeof (l
) > sizeof (long))
285 return gdbpy_ref
<> (PyLong_FromLongLong (l
));
286 return gdbpy_ref
<> (PyLong_FromLong (l
));
289 /* Convert a ULONGEST to the appropriate Python object -- either an
290 integer object or a long object, depending on its value. */
293 gdb_py_object_from_ulongest (ULONGEST l
)
295 if (sizeof (l
) > sizeof (unsigned long))
296 return gdbpy_ref
<> (PyLong_FromUnsignedLongLong (l
));
297 return gdbpy_ref
<> (PyLong_FromUnsignedLong (l
));
300 /* Like PyLong_AsLong, but returns 0 on failure, 1 on success, and puts
301 the value into an out parameter. */
304 gdb_py_int_as_long (PyObject
*obj
, long *result
)
306 *result
= PyLong_AsLong (obj
);
307 return ! (*result
== -1 && PyErr_Occurred ());
312 /* Generic implementation of the __dict__ attribute for objects that
313 have a dictionary. The CLOSURE argument should be the type object.
314 This only handles positive values for tp_dictoffset. */
317 gdb_py_generic_dict (PyObject
*self
, void *closure
)
320 PyTypeObject
*type_obj
= (PyTypeObject
*) closure
;
323 raw_ptr
= (char *) self
+ type_obj
->tp_dictoffset
;
324 result
= * (PyObject
**) raw_ptr
;
330 /* Like PyModule_AddObject, but does not steal a reference to
334 gdb_pymodule_addobject (PyObject
*module
, const char *name
, PyObject
*object
)
339 result
= PyModule_AddObject (module
, name
, object
);
345 /* See python-internal.h. */
348 gdbpy_error (const char *fmt
, ...)
352 std::string str
= string_vprintf (fmt
, ap
);
355 const char *msg
= str
.c_str ();
356 if (msg
!= nullptr && *msg
!= '\0')
357 error (_("Error occurred in Python: %s"), msg
);
359 error (_("Error occurred in Python."));
362 /* Handle a Python exception when the special gdb.GdbError treatment
363 is desired. This should only be called when an exception is set.
364 If the exception is a gdb.GdbError, throw a gdb exception with the
365 exception text. For other exceptions, print the Python stack and
366 then throw a gdb exception. */
369 gdbpy_handle_exception ()
371 gdbpy_err_fetch fetched_error
;
372 gdb::unique_xmalloc_ptr
<char> msg
= fetched_error
.to_string ();
376 /* An error occurred computing the string representation of the
377 error message. This is rare, but we should inform the user. */
378 gdb_printf (_("An error occurred in Python "
379 "and then another occurred computing the "
380 "error message.\n"));
381 gdbpy_print_stack ();
384 /* Don't print the stack for gdb.GdbError exceptions.
385 It is generally used to flag user errors.
387 We also don't want to print "Error occurred in Python command"
388 for user errors. However, a missing message for gdb.GdbError
389 exceptions is arguably a bug, so we flag it as such. */
391 if (fetched_error
.type_matches (PyExc_KeyboardInterrupt
))
393 else if (fetched_error
.type_matches (PyExc_SystemExit
))
395 gdbpy_ref
<> value
= fetched_error
.value ();
396 gdbpy_ref
<> code (PyObject_GetAttrString (value
.get (), "code"));
399 if (code
.get () == Py_None
)
401 /* CODE == None: exit status is 0. */
404 else if (code
.get () != nullptr && PyLong_Check (code
.get ()))
406 /* CODE == integer: exit status is aforementioned integer. */
407 exit_arg
= PyLong_AsLong (code
.get ());
411 if (code
.get () == nullptr)
412 gdbpy_print_stack ();
414 /* Otherwise: exit status is 1, print code to stderr. */
416 gdb_printf (gdb_stderr
, "%s\n", msg
.get ());
420 quit_force (&exit_arg
, 0);
422 else if (! fetched_error
.type_matches (gdbpy_gdberror_exc
)
423 || msg
== NULL
|| *msg
== '\0')
425 fetched_error
.restore ();
426 gdbpy_print_stack ();
427 if (msg
!= NULL
&& *msg
!= '\0')
428 error (_("Error occurred in Python: %s"), msg
.get ());
430 error (_("Error occurred in Python."));
433 error ("%s", msg
.get ());
436 /* See python-internal.h. */
438 gdb::unique_xmalloc_ptr
<char>
439 gdbpy_fix_doc_string_indentation (gdb::unique_xmalloc_ptr
<char> doc
)
441 /* A structure used to track the white-space information on each line of
443 struct line_whitespace
445 /* Constructor. OFFSET is the offset from the start of DOC, WS_COUNT
446 is the number of whitespace characters starting at OFFSET. */
447 line_whitespace (size_t offset
, int ws_count
)
449 m_ws_count (ws_count
)
452 /* The offset from the start of DOC. */
453 size_t offset () const
456 /* The number of white-space characters at the start of this line. */
458 { return m_ws_count
; }
461 /* The offset from the start of DOC to the first character of this
465 /* White space count on this line, the first character of this
466 whitespace is at OFFSET. */
470 /* Count the number of white-space character starting at TXT. We
471 currently only count true single space characters, things like tabs,
472 newlines, etc are not counted. */
473 auto count_whitespace
= [] (const char *txt
) -> int
486 /* In MIN_WHITESPACE we track the smallest number of whitespace
487 characters seen at the start of a line (that has actual content), this
488 is the number of characters that we can delete off all lines without
489 altering the relative indentation of all lines in DOC.
491 The first line often has no indentation, but instead starts immediates
492 after the 3-quotes marker within the Python doc string, so, if the
493 first line has zero white-space then we just ignore it, and don't set
494 MIN_WHITESPACE to zero.
496 Lines without any content should (ideally) have no white-space at
497 all, but if they do then they might have an artificially low number
498 (user left a single stray space at the start of an otherwise blank
499 line), we don't consider lines without content when updating the
500 MIN_WHITESPACE value. */
501 std::optional
<int> min_whitespace
;
503 /* The index into WS_INFO at which the processing of DOC can be
504 considered "all done", that is, after this point there are no further
505 lines with useful content and we should just stop. */
506 std::optional
<size_t> all_done_idx
;
508 /* White-space information for each line in DOC. */
509 std::vector
<line_whitespace
> ws_info
;
511 /* Now look through DOC and collect the required information. */
512 const char *tmp
= doc
.get ();
515 /* Add an entry for the offset to the start of this line, and how
516 much white-space there is at the start of this line. */
517 size_t offset
= tmp
- doc
.get ();
518 int ws_count
= count_whitespace (tmp
);
519 ws_info
.emplace_back (offset
, ws_count
);
521 /* Skip over the white-space. */
524 /* Remember where the content of this line starts, and skip forward
525 to either the end of this line (newline) or the end of the DOC
526 string (null character), whichever comes first. */
527 const char *content_start
= tmp
;
528 while (*tmp
!= '\0' && *tmp
!= '\n')
531 /* If this is not the first line, and if this line has some content,
532 then update MIN_WHITESPACE, this reflects the smallest number of
533 whitespace characters we can delete from all lines without
534 impacting the relative indentation of all the lines of DOC. */
535 if (offset
> 0 && tmp
> content_start
)
537 if (!min_whitespace
.has_value ())
538 min_whitespace
= ws_count
;
540 min_whitespace
= std::min (*min_whitespace
, ws_count
);
543 /* Each time we encounter a line that has some content we update
544 ALL_DONE_IDX to be the index of the next line. If the last lines
545 of DOC don't contain any content then ALL_DONE_IDX will be left
546 pointing at an earlier line. When we rewrite DOC, when we reach
547 ALL_DONE_IDX then we can stop, the allows us to trim any blank
548 lines from the end of DOC. */
549 if (tmp
> content_start
)
550 all_done_idx
= ws_info
.size ();
552 /* If we reached a newline then skip forward to the start of the next
553 line. The other possibility at this point is that we're at the
554 very end of the DOC string (null terminator). */
559 /* We found no lines with content, fail safe by just returning the
560 original documentation string. */
561 if (!all_done_idx
.has_value () || !min_whitespace
.has_value ())
564 /* Setup DST and SRC, both pointing into the DOC string. We're going to
565 rewrite DOC in-place, as we only ever make DOC shorter (by removing
566 white-space), thus we know this will not overflow. */
567 char *dst
= doc
.get ();
568 char *src
= doc
.get ();
570 /* Array indices used with DST, SRC, and WS_INFO respectively. */
571 size_t dst_offset
= 0;
572 size_t src_offset
= 0;
573 size_t ws_info_offset
= 0;
575 /* Now, walk over the source string, this is the original DOC. */
576 while (src
[src_offset
] != '\0')
578 /* If we are at the start of the next line (in WS_INFO), then we may
579 need to skip some white-space characters. */
580 if (src_offset
== ws_info
[ws_info_offset
].offset ())
582 /* If a line has leading white-space then we need to skip over
583 some number of characters now. */
584 if (ws_info
[ws_info_offset
].ws () > 0)
586 /* If the line is entirely white-space then we skip all of
587 the white-space, the next character to copy will be the
588 newline or null character. Otherwise, we skip the just
589 some portion of the leading white-space. */
590 if (src
[src_offset
+ ws_info
[ws_info_offset
].ws ()] == '\n'
591 || src
[src_offset
+ ws_info
[ws_info_offset
].ws ()] == '\0')
592 src_offset
+= ws_info
[ws_info_offset
].ws ();
594 src_offset
+= std::min (*min_whitespace
,
595 ws_info
[ws_info_offset
].ws ());
597 /* If we skipped white-space, and are now at the end of the
598 input, then we're done. */
599 if (src
[src_offset
] == '\0')
602 if (ws_info_offset
< (ws_info
.size () - 1))
604 if (ws_info_offset
> *all_done_idx
)
608 /* Don't copy a newline to the start of the DST string, this would
609 result in a leading blank line. But in all other cases, copy the
610 next character into the destination string. */
611 if ((dst_offset
> 0 || src
[src_offset
] != '\n'))
613 dst
[dst_offset
] = src
[src_offset
];
617 /* Move to the next source character. */
621 /* Remove the trailing newline character(s), and ensure we have a null
622 terminator in place. */
623 while (dst_offset
> 1 && dst
[dst_offset
- 1] == '\n')
625 dst
[dst_offset
] = '\0';
630 /* See python-internal.h. */
633 gdb_py_invalid_object_repr (PyObject
*self
)
635 return PyUnicode_FromFormat ("<%s (invalid)>", Py_TYPE (self
)->tp_name
);