gdb/python/py-utils.c

   1 /* General utility routines for GDB/Python.
   2
   3    Copyright (C) 2008-2024 Free Software Foundation, Inc.
   4
   5    This file is part of GDB.
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 #include "top.h"
  21 #include "charset.h"
  22 #include "value.h"
  23 #include "python-internal.h"
  24
  25 /* Converts a Python 8-bit string to a unicode string object.  Assumes the
  26    8-bit string is in the host charset.  If an error occurs during conversion,
  27    returns NULL with a python exception set.
  28
  29    As an added bonus, the functions accepts a unicode string and returns it
  30    right away, so callers don't need to check which kind of string they've
  31    got.  In Python 3, all strings are Unicode so this case is always the
  32    one that applies.
  33
  34    If the given object is not one of the mentioned string types, NULL is
  35    returned, with the TypeError python exception set.  */
  36 gdbpy_ref<>
  37 python_string_to_unicode (PyObject *obj)
  38 {
  39   PyObject *unicode_str;
  40
  41   /* If obj is already a unicode string, just return it.
  42      I wish life was always that simple...  */
  43   if (PyUnicode_Check (obj))
  44     {
  45       unicode_str = obj;
  46       Py_INCREF (obj);
  47     }
  48   else
  49     {
  50       PyErr_SetString (PyExc_TypeError,
  51                        _("Expected a string object."));
  52       unicode_str = NULL;
  53     }
  54
  55   return gdbpy_ref<> (unicode_str);
  56 }
  57
  58 /* Returns a newly allocated string with the contents of the given unicode
  59    string object converted to CHARSET.  If an error occurs during the
  60    conversion, NULL will be returned and a python exception will be
  61    set.  */
  62 static gdb::unique_xmalloc_ptr<char>
  63 unicode_to_encoded_string (PyObject *unicode_str, const char *charset)
  64 {
  65   /* Translate string to named charset.  */
  66   gdbpy_ref<> string (PyUnicode_AsEncodedString (unicode_str, charset, NULL));
  67   if (string == NULL)
  68     return NULL;
  69
  70   return gdb::unique_xmalloc_ptr<char>
  71     (xstrdup (PyBytes_AsString (string.get ())));
  72 }
  73
  74 /* Returns a PyObject with the contents of the given unicode string
  75    object converted to a named charset.  If an error occurs during
  76    the conversion, NULL will be returned and a python exception will
  77    be set.  */
  78 static gdbpy_ref<>
  79 unicode_to_encoded_python_string (PyObject *unicode_str, const char *charset)
  80 {
  81   /* Translate string to named charset.  */
  82   return gdbpy_ref<> (PyUnicode_AsEncodedString (unicode_str, charset, NULL));
  83 }
  84
  85 /* Returns a newly allocated string with the contents of the given
  86    unicode string object converted to the target's charset.  If an
  87    error occurs during the conversion, NULL will be returned and a
  88    python exception will be set.  */
  89 gdb::unique_xmalloc_ptr<char>
  90 unicode_to_target_string (PyObject *unicode_str)
  91 {
  92   return (unicode_to_encoded_string
  93           (unicode_str,
  94            target_charset (gdbpy_enter::get_gdbarch ())));
  95 }
  96
  97 /* Returns a PyObject with the contents of the given unicode string
  98    object converted to the target's charset.  If an error occurs
  99    during the conversion, NULL will be returned and a python exception
 100    will be set.  */
 101 static gdbpy_ref<>
 102 unicode_to_target_python_string (PyObject *unicode_str)
 103 {
 104   return (unicode_to_encoded_python_string
 105           (unicode_str,
 106            target_charset (gdbpy_enter::get_gdbarch ())));
 107 }
 108
 109 /* Converts a python string (8-bit or unicode) to a target string in
 110    the target's charset.  Returns NULL on error, with a python
 111    exception set.  */
 112 gdb::unique_xmalloc_ptr<char>
 113 python_string_to_target_string (PyObject *obj)
 114 {
 115   gdbpy_ref<> str = python_string_to_unicode (obj);
 116   if (str == NULL)
 117     return NULL;
 118
 119   return unicode_to_target_string (str.get ());
 120 }
 121
 122 /* Converts a python string (8-bit or unicode) to a target string in the
 123    target's charset.  Returns NULL on error, with a python exception
 124    set.
 125
 126    In Python 3, the returned object is a "bytes" object (not a string).  */
 127 gdbpy_ref<>
 128 python_string_to_target_python_string (PyObject *obj)
 129 {
 130   gdbpy_ref<> str = python_string_to_unicode (obj);
 131   if (str == NULL)
 132     return str;
 133
 134   return unicode_to_target_python_string (str.get ());
 135 }
 136
 137 /* Converts a python string (8-bit or unicode) to a target string in
 138    the host's charset.  Returns NULL on error, with a python exception
 139    set.  */
 140 gdb::unique_xmalloc_ptr<char>
 141 python_string_to_host_string (PyObject *obj)
 142 {
 143   gdbpy_ref<> str = python_string_to_unicode (obj);
 144   if (str == NULL)
 145     return NULL;
 146
 147   return unicode_to_encoded_string (str.get (), host_charset ());
 148 }
 149
 150 /* Convert a host string to a python string.  */
 151
 152 gdbpy_ref<>
 153 host_string_to_python_string (const char *str)
 154 {
 155   return gdbpy_ref<> (PyUnicode_Decode (str, strlen (str), host_charset (),
 156                                         NULL));
 157 }
 158
 159 /* Return true if OBJ is a Python string or unicode object, false
 160    otherwise.  */
 161
 162 int
 163 gdbpy_is_string (PyObject *obj)
 164 {
 165   return PyUnicode_Check (obj);
 166 }
 167
 168 /* Return the string representation of OBJ, i.e., str (obj).
 169    If the result is NULL a python error occurred, the caller must clear it.  */
 170
 171 gdb::unique_xmalloc_ptr<char>
 172 gdbpy_obj_to_string (PyObject *obj)
 173 {
 174   gdbpy_ref<> str_obj (PyObject_Str (obj));
 175
 176   if (str_obj != NULL)
 177     return python_string_to_host_string (str_obj.get ());
 178
 179   return NULL;
 180 }
 181
 182 /* See python-internal.h.  */
 183
 184 gdb::unique_xmalloc_ptr<char>
 185 gdbpy_err_fetch::to_string () const
 186 {
 187   /* There are a few cases to consider.
 188      For example:
 189      value is a string when PyErr_SetString is used.
 190      value is not a string when raise "foo" is used, instead it is None
 191      and type is "foo".
 192      So the algorithm we use is to print `str (value)' if it's not
 193      None, otherwise we print `str (type)'.
 194      Using str (aka PyObject_Str) will fetch the error message from
 195      gdb.GdbError ("message").  */
 196
 197   gdbpy_ref<> value = this->value ();
 198   if (value.get () != nullptr && value.get () != Py_None)
 199     return gdbpy_obj_to_string (value.get ());
 200   else
 201     return gdbpy_obj_to_string (this->type ().get ());
 202 }
 203
 204 /* See python-internal.h.  */
 205
 206 gdb::unique_xmalloc_ptr<char>
 207 gdbpy_err_fetch::type_to_string () const
 208 {
 209   return gdbpy_obj_to_string (this->type ().get ());
 210 }
 211
 212 /* Convert a GDB exception to the appropriate Python exception.
 213
 214    This sets the Python error indicator.  */
 215
 216 void
 217 gdbpy_convert_exception (const struct gdb_exception &exception)
 218 {
 219   PyObject *exc_class;
 220
 221   if (exception.reason == RETURN_QUIT)
 222     exc_class = PyExc_KeyboardInterrupt;
 223   else if (exception.reason == RETURN_FORCED_QUIT)
 224     quit_force (NULL, 0);
 225   else if (exception.error == MEMORY_ERROR)
 226     exc_class = gdbpy_gdb_memory_error;
 227   else
 228     exc_class = gdbpy_gdb_error;
 229
 230   PyErr_Format (exc_class, "%s", exception.what ());
 231 }
 232
 233 /* Converts OBJ to a CORE_ADDR value.
 234
 235    Returns 0 on success or -1 on failure, with a Python exception set.
 236 */
 237
 238 int
 239 get_addr_from_python (PyObject *obj, CORE_ADDR *addr)
 240 {
 241   if (gdbpy_is_value_object (obj))
 242     {
 243
 244       try
 245         {
 246           *addr = value_as_address (value_object_to_value (obj));
 247         }
 248       catch (const gdb_exception &except)
 249         {
 250           return gdbpy_handle_gdb_exception (-1, except);
 251         }
 252     }
 253   else
 254     {
 255       gdbpy_ref<> num (PyNumber_Long (obj));
 256       gdb_py_ulongest val;
 257
 258       if (num == NULL)
 259         return -1;
 260
 261       val = gdb_py_long_as_ulongest (num.get ());
 262       if (PyErr_Occurred ())
 263         return -1;
 264
 265       if (sizeof (val) > sizeof (CORE_ADDR) && ((CORE_ADDR) val) != val)
 266         {
 267           PyErr_SetString (PyExc_ValueError,
 268                            _("Overflow converting to address."));
 269           return -1;
 270         }
 271
 272       *addr = val;
 273     }
 274
 275   return 0;
 276 }
 277
 278 /* Convert a LONGEST to the appropriate Python object -- either an
 279    integer object or a long object, depending on its value.  */
 280
 281 gdbpy_ref<>
 282 gdb_py_object_from_longest (LONGEST l)
 283 {
 284   if (sizeof (l) > sizeof (long))
 285     return gdbpy_ref<> (PyLong_FromLongLong (l));
 286   return gdbpy_ref<> (PyLong_FromLong (l));
 287 }
 288
 289 /* Convert a ULONGEST to the appropriate Python object -- either an
 290    integer object or a long object, depending on its value.  */
 291
 292 gdbpy_ref<>
 293 gdb_py_object_from_ulongest (ULONGEST l)
 294 {
 295   if (sizeof (l) > sizeof (unsigned long))
 296     return gdbpy_ref<> (PyLong_FromUnsignedLongLong (l));
 297   return gdbpy_ref<> (PyLong_FromUnsignedLong (l));
 298 }
 299
 300 /* Like PyLong_AsLong, but returns 0 on failure, 1 on success, and puts
 301    the value into an out parameter.  */
 302
 303 int
 304 gdb_py_int_as_long (PyObject *obj, long *result)
 305 {
 306   *result = PyLong_AsLong (obj);
 307   return ! (*result == -1 && PyErr_Occurred ());
 308 }
 309
 310 \f
 311
 312 /* Generic implementation of the __dict__ attribute for objects that
 313    have a dictionary.  The CLOSURE argument should be the type object.
 314    This only handles positive values for tp_dictoffset.  */
 315
 316 PyObject *
 317 gdb_py_generic_dict (PyObject *self, void *closure)
 318 {
 319   PyObject *result;
 320   PyTypeObject *type_obj = (PyTypeObject *) closure;
 321   char *raw_ptr;
 322
 323   raw_ptr = (char *) self + type_obj->tp_dictoffset;
 324   result = * (PyObject **) raw_ptr;
 325
 326   Py_INCREF (result);
 327   return result;
 328 }
 329
 330 /* Like PyModule_AddObject, but does not steal a reference to
 331    OBJECT.  */
 332
 333 int
 334 gdb_pymodule_addobject (PyObject *module, const char *name, PyObject *object)
 335 {
 336   int result;
 337
 338   Py_INCREF (object);
 339   result = PyModule_AddObject (module, name, object);
 340   if (result < 0)
 341     Py_DECREF (object);
 342   return result;
 343 }
 344
 345 /* See python-internal.h.  */
 346
 347 void
 348 gdbpy_error (const char *fmt, ...)
 349 {
 350   va_list ap;
 351   va_start (ap, fmt);
 352   std::string str = string_vprintf (fmt, ap);
 353   va_end (ap);
 354
 355   const char *msg = str.c_str ();
 356   if (msg != nullptr && *msg != '\0')
 357     error (_("Error occurred in Python: %s"), msg);
 358   else
 359     error (_("Error occurred in Python."));
 360 }
 361
 362 /* Handle a Python exception when the special gdb.GdbError treatment
 363    is desired.  This should only be called when an exception is set.
 364    If the exception is a gdb.GdbError, throw a gdb exception with the
 365    exception text.  For other exceptions, print the Python stack and
 366    then throw a gdb exception.  */
 367
 368 void
 369 gdbpy_handle_exception ()
 370 {
 371   gdbpy_err_fetch fetched_error;
 372   gdb::unique_xmalloc_ptr<char> msg = fetched_error.to_string ();
 373
 374   if (msg == NULL)
 375     {
 376       /* An error occurred computing the string representation of the
 377          error message.  This is rare, but we should inform the user.  */
 378       gdb_printf (_("An error occurred in Python "
 379                     "and then another occurred computing the "
 380                     "error message.\n"));
 381       gdbpy_print_stack ();
 382     }
 383
 384   /* Don't print the stack for gdb.GdbError exceptions.
 385      It is generally used to flag user errors.
 386
 387      We also don't want to print "Error occurred in Python command"
 388      for user errors.  However, a missing message for gdb.GdbError
 389      exceptions is arguably a bug, so we flag it as such.  */
 390
 391   if (fetched_error.type_matches (PyExc_KeyboardInterrupt))
 392     throw_quit ("Quit");
 393   else if (fetched_error.type_matches (PyExc_SystemExit))
 394     {
 395       gdbpy_ref<> value = fetched_error.value ();
 396       gdbpy_ref<> code (PyObject_GetAttrString (value.get (), "code"));
 397       int exit_arg;
 398
 399       if (code.get () == Py_None)
 400         {
 401           /* CODE == None: exit status is 0.  */
 402           exit_arg = 0;
 403         }
 404       else if (code.get () != nullptr && PyLong_Check (code.get ()))
 405         {
 406           /* CODE == integer: exit status is aforementioned integer.  */
 407           exit_arg = PyLong_AsLong (code.get ());
 408         }
 409       else
 410         {
 411           if (code.get () == nullptr)
 412             gdbpy_print_stack ();
 413
 414           /* Otherwise: exit status is 1, print code to stderr.  */
 415           if (msg != nullptr)
 416             gdb_printf (gdb_stderr, "%s\n", msg.get ());
 417           exit_arg = 1;
 418         }
 419
 420       quit_force (&exit_arg, 0);
 421     }
 422   else if (! fetched_error.type_matches (gdbpy_gdberror_exc)
 423            || msg == NULL || *msg == '\0')
 424     {
 425       fetched_error.restore ();
 426       gdbpy_print_stack ();
 427       if (msg != NULL && *msg != '\0')
 428         error (_("Error occurred in Python: %s"), msg.get ());
 429       else
 430         error (_("Error occurred in Python."));
 431     }
 432   else
 433     error ("%s", msg.get ());
 434 }
 435
 436 /* See python-internal.h.  */
 437
 438 gdb::unique_xmalloc_ptr<char>
 439 gdbpy_fix_doc_string_indentation (gdb::unique_xmalloc_ptr<char> doc)
 440 {
 441   /* A structure used to track the white-space information on each line of
 442      DOC.  */
 443   struct line_whitespace
 444   {
 445     /* Constructor.  OFFSET is the offset from the start of DOC, WS_COUNT
 446        is the number of whitespace characters starting at OFFSET.  */
 447     line_whitespace (size_t offset, int ws_count)
 448       : m_offset (offset),
 449         m_ws_count (ws_count)
 450     { /* Nothing.  */ }
 451
 452     /* The offset from the start of DOC.  */
 453     size_t offset () const
 454     { return m_offset; }
 455
 456     /* The number of white-space characters at the start of this line.  */
 457     int ws () const
 458     { return m_ws_count; }
 459
 460   private:
 461     /* The offset from the start of DOC to the first character of this
 462        line.  */
 463     size_t m_offset;
 464
 465     /* White space count on this line, the first character of this
 466        whitespace is at OFFSET.  */
 467     int m_ws_count;
 468   };
 469
 470   /* Count the number of white-space character starting at TXT.  We
 471      currently only count true single space characters, things like tabs,
 472      newlines, etc are not counted.  */
 473   auto count_whitespace = [] (const char *txt) -> int
 474   {
 475     int count = 0;
 476
 477     while (*txt == ' ')
 478       {
 479         ++txt;
 480         ++count;
 481       }
 482
 483     return count;
 484   };
 485
 486   /* In MIN_WHITESPACE we track the smallest number of whitespace
 487      characters seen at the start of a line (that has actual content), this
 488      is the number of characters that we can delete off all lines without
 489      altering the relative indentation of all lines in DOC.
 490
 491      The first line often has no indentation, but instead starts immediates
 492      after the 3-quotes marker within the Python doc string, so, if the
 493      first line has zero white-space then we just ignore it, and don't set
 494      MIN_WHITESPACE to zero.
 495
 496      Lines without any content should (ideally) have no white-space at
 497      all, but if they do then they might have an artificially low number
 498      (user left a single stray space at the start of an otherwise blank
 499      line), we don't consider lines without content when updating the
 500      MIN_WHITESPACE value.  */
 501   std::optional<int> min_whitespace;
 502
 503   /* The index into WS_INFO at which the processing of DOC can be
 504      considered "all done", that is, after this point there are no further
 505      lines with useful content and we should just stop.  */
 506   std::optional<size_t> all_done_idx;
 507
 508   /* White-space information for each line in DOC.  */
 509   std::vector<line_whitespace> ws_info;
 510
 511   /* Now look through DOC and collect the required information.  */
 512   const char *tmp = doc.get ();
 513   while (*tmp != '\0')
 514     {
 515       /* Add an entry for the offset to the start of this line, and how
 516          much white-space there is at the start of this line.  */
 517       size_t offset = tmp - doc.get ();
 518       int ws_count = count_whitespace (tmp);
 519       ws_info.emplace_back (offset, ws_count);
 520
 521       /* Skip over the white-space.  */
 522       tmp += ws_count;
 523
 524       /* Remember where the content of this line starts, and skip forward
 525          to either the end of this line (newline) or the end of the DOC
 526          string (null character), whichever comes first.  */
 527       const char *content_start = tmp;
 528       while (*tmp != '\0' && *tmp != '\n')
 529         ++tmp;
 530
 531       /* If this is not the first line, and if this line has some content,
 532          then update MIN_WHITESPACE, this reflects the smallest number of
 533          whitespace characters we can delete from all lines without
 534          impacting the relative indentation of all the lines of DOC.  */
 535       if (offset > 0 && tmp > content_start)
 536         {
 537           if (!min_whitespace.has_value ())
 538             min_whitespace = ws_count;
 539           else
 540             min_whitespace = std::min (*min_whitespace, ws_count);
 541         }
 542
 543       /* Each time we encounter a line that has some content we update
 544          ALL_DONE_IDX to be the index of the next line.  If the last lines
 545          of DOC don't contain any content then ALL_DONE_IDX will be left
 546          pointing at an earlier line.  When we rewrite DOC, when we reach
 547          ALL_DONE_IDX then we can stop, the allows us to trim any blank
 548          lines from the end of DOC.  */
 549       if (tmp > content_start)
 550         all_done_idx = ws_info.size ();
 551
 552       /* If we reached a newline then skip forward to the start of the next
 553          line.  The other possibility at this point is that we're at the
 554          very end of the DOC string (null terminator).  */
 555       if (*tmp == '\n')
 556         ++tmp;
 557     }
 558
 559   /* We found no lines with content, fail safe by just returning the
 560      original documentation string.  */
 561   if (!all_done_idx.has_value () || !min_whitespace.has_value ())
 562     return doc;
 563
 564   /* Setup DST and SRC, both pointing into the DOC string.  We're going to
 565      rewrite DOC in-place, as we only ever make DOC shorter (by removing
 566      white-space), thus we know this will not overflow.  */
 567   char *dst = doc.get ();
 568   char *src = doc.get ();
 569
 570   /* Array indices used with DST, SRC, and WS_INFO respectively.  */
 571   size_t dst_offset = 0;
 572   size_t src_offset = 0;
 573   size_t ws_info_offset = 0;
 574
 575   /* Now, walk over the source string, this is the original DOC.  */
 576   while (src[src_offset] != '\0')
 577     {
 578       /* If we are at the start of the next line (in WS_INFO), then we may
 579          need to skip some white-space characters.  */
 580       if (src_offset == ws_info[ws_info_offset].offset ())
 581         {
 582           /* If a line has leading white-space then we need to skip over
 583              some number of characters now.  */
 584           if (ws_info[ws_info_offset].ws () > 0)
 585             {
 586               /* If the line is entirely white-space then we skip all of
 587                  the white-space, the next character to copy will be the
 588                  newline or null character.  Otherwise, we skip the just
 589                  some portion of the leading white-space.  */
 590               if (src[src_offset + ws_info[ws_info_offset].ws ()] == '\n'
 591                   || src[src_offset + ws_info[ws_info_offset].ws ()] == '\0')
 592                 src_offset += ws_info[ws_info_offset].ws ();
 593               else
 594                 src_offset += std::min (*min_whitespace,
 595                                         ws_info[ws_info_offset].ws ());
 596
 597               /* If we skipped white-space, and are now at the end of the
 598                  input, then we're done.  */
 599               if (src[src_offset] == '\0')
 600                 break;
 601             }
 602           if (ws_info_offset < (ws_info.size () - 1))
 603             ++ws_info_offset;
 604           if (ws_info_offset > *all_done_idx)
 605             break;
 606         }
 607
 608       /* Don't copy a newline to the start of the DST string, this would
 609          result in a leading blank line.  But in all other cases, copy the
 610          next character into the destination string.  */
 611       if ((dst_offset > 0 || src[src_offset] != '\n'))
 612         {
 613           dst[dst_offset] = src[src_offset];
 614           ++dst_offset;
 615         }
 616
 617       /* Move to the next source character.  */
 618       ++src_offset;
 619     }
 620
 621   /* Remove the trailing newline character(s), and ensure we have a null
 622      terminator in place.  */
 623   while (dst_offset > 1 && dst[dst_offset - 1] == '\n')
 624     --dst_offset;
 625   dst[dst_offset] = '\0';
 626
 627   return doc;
 628 }
 629
 630 /* See python-internal.h.  */
 631
 632 PyObject *
 633 gdb_py_invalid_object_repr (PyObject *self)
 634 {
 635   return PyUnicode_FromFormat ("<%s (invalid)>", Py_TYPE (self)->tp_name);
 636 }