gdb/python/py-utils.c

   1 /* General utility routines for GDB/Python.
   2
   3    Copyright (C) 2008-2023 Free Software Foundation, Inc.
   4
   5    This file is part of GDB.
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 #include "defs.h"
  21 #include "charset.h"
  22 #include "value.h"
  23 #include "python-internal.h"
  24
  25 /* Converts a Python 8-bit string to a unicode string object.  Assumes the
  26    8-bit string is in the host charset.  If an error occurs during conversion,
  27    returns NULL with a python exception set.
  28
  29    As an added bonus, the functions accepts a unicode string and returns it
  30    right away, so callers don't need to check which kind of string they've
  31    got.  In Python 3, all strings are Unicode so this case is always the
  32    one that applies.
  33
  34    If the given object is not one of the mentioned string types, NULL is
  35    returned, with the TypeError python exception set.  */
  36 gdbpy_ref<>
  37 python_string_to_unicode (PyObject *obj)
  38 {
  39   PyObject *unicode_str;
  40
  41   /* If obj is already a unicode string, just return it.
  42      I wish life was always that simple...  */
  43   if (PyUnicode_Check (obj))
  44     {
  45       unicode_str = obj;
  46       Py_INCREF (obj);
  47     }
  48   else
  49     {
  50       PyErr_SetString (PyExc_TypeError,
  51                        _("Expected a string object."));
  52       unicode_str = NULL;
  53     }
  54
  55   return gdbpy_ref<> (unicode_str);
  56 }
  57
  58 /* Returns a newly allocated string with the contents of the given unicode
  59    string object converted to CHARSET.  If an error occurs during the
  60    conversion, NULL will be returned and a python exception will be
  61    set.  */
  62 static gdb::unique_xmalloc_ptr<char>
  63 unicode_to_encoded_string (PyObject *unicode_str, const char *charset)
  64 {
  65   /* Translate string to named charset.  */
  66   gdbpy_ref<> string (PyUnicode_AsEncodedString (unicode_str, charset, NULL));
  67   if (string == NULL)
  68     return NULL;
  69
  70   return gdb::unique_xmalloc_ptr<char>
  71     (xstrdup (PyBytes_AsString (string.get ())));
  72 }
  73
  74 /* Returns a PyObject with the contents of the given unicode string
  75    object converted to a named charset.  If an error occurs during
  76    the conversion, NULL will be returned and a python exception will
  77    be set.  */
  78 static gdbpy_ref<>
  79 unicode_to_encoded_python_string (PyObject *unicode_str, const char *charset)
  80 {
  81   /* Translate string to named charset.  */
  82   return gdbpy_ref<> (PyUnicode_AsEncodedString (unicode_str, charset, NULL));
  83 }
  84
  85 /* Returns a newly allocated string with the contents of the given
  86    unicode string object converted to the target's charset.  If an
  87    error occurs during the conversion, NULL will be returned and a
  88    python exception will be set.  */
  89 gdb::unique_xmalloc_ptr<char>
  90 unicode_to_target_string (PyObject *unicode_str)
  91 {
  92   return (unicode_to_encoded_string
  93           (unicode_str,
  94            target_charset (gdbpy_enter::get_gdbarch ())));
  95 }
  96
  97 /* Returns a PyObject with the contents of the given unicode string
  98    object converted to the target's charset.  If an error occurs
  99    during the conversion, NULL will be returned and a python exception
 100    will be set.  */
 101 static gdbpy_ref<>
 102 unicode_to_target_python_string (PyObject *unicode_str)
 103 {
 104   return (unicode_to_encoded_python_string
 105           (unicode_str,
 106            target_charset (gdbpy_enter::get_gdbarch ())));
 107 }
 108
 109 /* Converts a python string (8-bit or unicode) to a target string in
 110    the target's charset.  Returns NULL on error, with a python
 111    exception set.  */
 112 gdb::unique_xmalloc_ptr<char>
 113 python_string_to_target_string (PyObject *obj)
 114 {
 115   gdbpy_ref<> str = python_string_to_unicode (obj);
 116   if (str == NULL)
 117     return NULL;
 118
 119   return unicode_to_target_string (str.get ());
 120 }
 121
 122 /* Converts a python string (8-bit or unicode) to a target string in the
 123    target's charset.  Returns NULL on error, with a python exception
 124    set.
 125
 126    In Python 3, the returned object is a "bytes" object (not a string).  */
 127 gdbpy_ref<>
 128 python_string_to_target_python_string (PyObject *obj)
 129 {
 130   gdbpy_ref<> str = python_string_to_unicode (obj);
 131   if (str == NULL)
 132     return str;
 133
 134   return unicode_to_target_python_string (str.get ());
 135 }
 136
 137 /* Converts a python string (8-bit or unicode) to a target string in
 138    the host's charset.  Returns NULL on error, with a python exception
 139    set.  */
 140 gdb::unique_xmalloc_ptr<char>
 141 python_string_to_host_string (PyObject *obj)
 142 {
 143   gdbpy_ref<> str = python_string_to_unicode (obj);
 144   if (str == NULL)
 145     return NULL;
 146
 147   return unicode_to_encoded_string (str.get (), host_charset ());
 148 }
 149
 150 /* Convert a host string to a python string.  */
 151
 152 gdbpy_ref<>
 153 host_string_to_python_string (const char *str)
 154 {
 155   return gdbpy_ref<> (PyUnicode_Decode (str, strlen (str), host_charset (),
 156                                         NULL));
 157 }
 158
 159 /* Return true if OBJ is a Python string or unicode object, false
 160    otherwise.  */
 161
 162 int
 163 gdbpy_is_string (PyObject *obj)
 164 {
 165   return PyUnicode_Check (obj);
 166 }
 167
 168 /* Return the string representation of OBJ, i.e., str (obj).
 169    If the result is NULL a python error occurred, the caller must clear it.  */
 170
 171 gdb::unique_xmalloc_ptr<char>
 172 gdbpy_obj_to_string (PyObject *obj)
 173 {
 174   gdbpy_ref<> str_obj (PyObject_Str (obj));
 175
 176   if (str_obj != NULL)
 177     return python_string_to_host_string (str_obj.get ());
 178
 179   return NULL;
 180 }
 181
 182 /* See python-internal.h.  */
 183
 184 gdb::unique_xmalloc_ptr<char>
 185 gdbpy_err_fetch::to_string () const
 186 {
 187   /* There are a few cases to consider.
 188      For example:
 189      value is a string when PyErr_SetString is used.
 190      value is not a string when raise "foo" is used, instead it is None
 191      and type is "foo".
 192      So the algorithm we use is to print `str (value)' if it's not
 193      None, otherwise we print `str (type)'.
 194      Using str (aka PyObject_Str) will fetch the error message from
 195      gdb.GdbError ("message").  */
 196
 197   if (m_error_value.get () != nullptr && m_error_value.get () != Py_None)
 198     return gdbpy_obj_to_string (m_error_value.get ());
 199   else
 200     return gdbpy_obj_to_string (m_error_type.get ());
 201 }
 202
 203 /* See python-internal.h.  */
 204
 205 gdb::unique_xmalloc_ptr<char>
 206 gdbpy_err_fetch::type_to_string () const
 207 {
 208   return gdbpy_obj_to_string (m_error_type.get ());
 209 }
 210
 211 /* Convert a GDB exception to the appropriate Python exception.
 212
 213    This sets the Python error indicator.  */
 214
 215 void
 216 gdbpy_convert_exception (const struct gdb_exception &exception)
 217 {
 218   PyObject *exc_class;
 219
 220   if (exception.reason == RETURN_QUIT)
 221     exc_class = PyExc_KeyboardInterrupt;
 222   else if (exception.error == MEMORY_ERROR)
 223     exc_class = gdbpy_gdb_memory_error;
 224   else
 225     exc_class = gdbpy_gdb_error;
 226
 227   PyErr_Format (exc_class, "%s", exception.what ());
 228 }
 229
 230 /* Converts OBJ to a CORE_ADDR value.
 231
 232    Returns 0 on success or -1 on failure, with a Python exception set.
 233 */
 234
 235 int
 236 get_addr_from_python (PyObject *obj, CORE_ADDR *addr)
 237 {
 238   if (gdbpy_is_value_object (obj))
 239     {
 240
 241       try
 242         {
 243           *addr = value_as_address (value_object_to_value (obj));
 244         }
 245       catch (const gdb_exception &except)
 246         {
 247           GDB_PY_SET_HANDLE_EXCEPTION (except);
 248         }
 249     }
 250   else
 251     {
 252       gdbpy_ref<> num (PyNumber_Long (obj));
 253       gdb_py_ulongest val;
 254
 255       if (num == NULL)
 256         return -1;
 257
 258       val = gdb_py_long_as_ulongest (num.get ());
 259       if (PyErr_Occurred ())
 260         return -1;
 261
 262       if (sizeof (val) > sizeof (CORE_ADDR) && ((CORE_ADDR) val) != val)
 263         {
 264           PyErr_SetString (PyExc_ValueError,
 265                            _("Overflow converting to address."));
 266           return -1;
 267         }
 268
 269       *addr = val;
 270     }
 271
 272   return 0;
 273 }
 274
 275 /* Convert a LONGEST to the appropriate Python object -- either an
 276    integer object or a long object, depending on its value.  */
 277
 278 gdbpy_ref<>
 279 gdb_py_object_from_longest (LONGEST l)
 280 {
 281   if (sizeof (l) > sizeof (long))
 282     return gdbpy_ref<> (PyLong_FromLongLong (l));
 283   return gdbpy_ref<> (PyLong_FromLong (l));
 284 }
 285
 286 /* Convert a ULONGEST to the appropriate Python object -- either an
 287    integer object or a long object, depending on its value.  */
 288
 289 gdbpy_ref<>
 290 gdb_py_object_from_ulongest (ULONGEST l)
 291 {
 292   if (sizeof (l) > sizeof (unsigned long))
 293     return gdbpy_ref<> (PyLong_FromUnsignedLongLong (l));
 294   return gdbpy_ref<> (PyLong_FromUnsignedLong (l));
 295 }
 296
 297 /* Like PyLong_AsLong, but returns 0 on failure, 1 on success, and puts
 298    the value into an out parameter.  */
 299
 300 int
 301 gdb_py_int_as_long (PyObject *obj, long *result)
 302 {
 303   *result = PyLong_AsLong (obj);
 304   return ! (*result == -1 && PyErr_Occurred ());
 305 }
 306
 307 \f
 308
 309 /* Generic implementation of the __dict__ attribute for objects that
 310    have a dictionary.  The CLOSURE argument should be the type object.
 311    This only handles positive values for tp_dictoffset.  */
 312
 313 PyObject *
 314 gdb_py_generic_dict (PyObject *self, void *closure)
 315 {
 316   PyObject *result;
 317   PyTypeObject *type_obj = (PyTypeObject *) closure;
 318   char *raw_ptr;
 319
 320   raw_ptr = (char *) self + type_obj->tp_dictoffset;
 321   result = * (PyObject **) raw_ptr;
 322
 323   Py_INCREF (result);
 324   return result;
 325 }
 326
 327 /* Like PyModule_AddObject, but does not steal a reference to
 328    OBJECT.  */
 329
 330 int
 331 gdb_pymodule_addobject (PyObject *module, const char *name, PyObject *object)
 332 {
 333   int result;
 334
 335   Py_INCREF (object);
 336   result = PyModule_AddObject (module, name, object);
 337   if (result < 0)
 338     Py_DECREF (object);
 339   return result;
 340 }
 341
 342 /* See python-internal.h.  */
 343
 344 void
 345 gdbpy_error (const char *fmt, ...)
 346 {
 347   va_list ap;
 348   va_start (ap, fmt);
 349   std::string str = string_vprintf (fmt, ap);
 350   va_end (ap);
 351
 352   const char *msg = str.c_str ();
 353   if (msg != nullptr && *msg != '\0')
 354     error (_("Error occurred in Python: %s"), msg);
 355   else
 356     error (_("Error occurred in Python."));
 357 }
 358
 359 /* Handle a Python exception when the special gdb.GdbError treatment
 360    is desired.  This should only be called when an exception is set.
 361    If the exception is a gdb.GdbError, throw a gdb exception with the
 362    exception text.  For other exceptions, print the Python stack and
 363    then throw a gdb exception.  */
 364
 365 void
 366 gdbpy_handle_exception ()
 367 {
 368   gdbpy_err_fetch fetched_error;
 369   gdb::unique_xmalloc_ptr<char> msg = fetched_error.to_string ();
 370
 371   if (msg == NULL)
 372     {
 373       /* An error occurred computing the string representation of the
 374          error message.  This is rare, but we should inform the user.  */
 375       gdb_printf (_("An error occurred in Python "
 376                     "and then another occurred computing the "
 377                     "error message.\n"));
 378       gdbpy_print_stack ();
 379     }
 380
 381   /* Don't print the stack for gdb.GdbError exceptions.
 382      It is generally used to flag user errors.
 383
 384      We also don't want to print "Error occurred in Python command"
 385      for user errors.  However, a missing message for gdb.GdbError
 386      exceptions is arguably a bug, so we flag it as such.  */
 387
 388   if (fetched_error.type_matches (PyExc_KeyboardInterrupt))
 389     throw_quit ("Quit");
 390   else if (! fetched_error.type_matches (gdbpy_gdberror_exc)
 391            || msg == NULL || *msg == '\0')
 392     {
 393       fetched_error.restore ();
 394       gdbpy_print_stack ();
 395       if (msg != NULL && *msg != '\0')
 396         error (_("Error occurred in Python: %s"), msg.get ());
 397       else
 398         error (_("Error occurred in Python."));
 399     }
 400   else
 401     error ("%s", msg.get ());
 402 }
 403
 404 /* See python-internal.h.  */
 405
 406 gdb::unique_xmalloc_ptr<char>
 407 gdbpy_fix_doc_string_indentation (gdb::unique_xmalloc_ptr<char> doc)
 408 {
 409   /* A structure used to track the white-space information on each line of
 410      DOC.  */
 411   struct line_whitespace
 412   {
 413     /* Constructor.  OFFSET is the offset from the start of DOC, WS_COUNT
 414        is the number of whitespace characters starting at OFFSET.  */
 415     line_whitespace (size_t offset, int ws_count)
 416       : m_offset (offset),
 417         m_ws_count (ws_count)
 418     { /* Nothing.  */ }
 419
 420     /* The offset from the start of DOC.  */
 421     size_t offset () const
 422     { return m_offset; }
 423
 424     /* The number of white-space characters at the start of this line.  */
 425     int ws () const
 426     { return m_ws_count; }
 427
 428   private:
 429     /* The offset from the start of DOC to the first character of this
 430        line.  */
 431     size_t m_offset;
 432
 433     /* White space count on this line, the first character of this
 434        whitespace is at OFFSET.  */
 435     int m_ws_count;
 436   };
 437
 438   /* Count the number of white-space character starting at TXT.  We
 439      currently only count true single space characters, things like tabs,
 440      newlines, etc are not counted.  */
 441   auto count_whitespace = [] (const char *txt) -> int
 442   {
 443     int count = 0;
 444
 445     while (*txt == ' ')
 446       {
 447         ++txt;
 448         ++count;
 449       }
 450
 451     return count;
 452   };
 453
 454   /* In MIN_WHITESPACE we track the smallest number of whitespace
 455      characters seen at the start of a line (that has actual content), this
 456      is the number of characters that we can delete off all lines without
 457      altering the relative indentation of all lines in DOC.
 458
 459      The first line often has no indentation, but instead starts immediates
 460      after the 3-quotes marker within the Python doc string, so, if the
 461      first line has zero white-space then we just ignore it, and don't set
 462      MIN_WHITESPACE to zero.
 463
 464      Lines without any content should (ideally) have no white-space at
 465      all, but if they do then they might have an artificially low number
 466      (user left a single stray space at the start of an otherwise blank
 467      line), we don't consider lines without content when updating the
 468      MIN_WHITESPACE value.  */
 469   gdb::optional<int> min_whitespace;
 470
 471   /* The index into WS_INFO at which the processing of DOC can be
 472      considered "all done", that is, after this point there are no further
 473      lines with useful content and we should just stop.  */
 474   gdb::optional<size_t> all_done_idx;
 475
 476   /* White-space information for each line in DOC.  */
 477   std::vector<line_whitespace> ws_info;
 478
 479   /* Now look through DOC and collect the required information.  */
 480   const char *tmp = doc.get ();
 481   while (*tmp != '\0')
 482     {
 483       /* Add an entry for the offset to the start of this line, and how
 484          much white-space there is at the start of this line.  */
 485       size_t offset = tmp - doc.get ();
 486       int ws_count = count_whitespace (tmp);
 487       ws_info.emplace_back (offset, ws_count);
 488
 489       /* Skip over the white-space.  */
 490       tmp += ws_count;
 491
 492       /* Remember where the content of this line starts, and skip forward
 493          to either the end of this line (newline) or the end of the DOC
 494          string (null character), whichever comes first.  */
 495       const char *content_start = tmp;
 496       while (*tmp != '\0' && *tmp != '\n')
 497         ++tmp;
 498
 499       /* If this is not the first line, and if this line has some content,
 500          then update MIN_WHITESPACE, this reflects the smallest number of
 501          whitespace characters we can delete from all lines without
 502          impacting the relative indentation of all the lines of DOC.  */
 503       if (offset > 0 && tmp > content_start)
 504         {
 505           if (!min_whitespace.has_value ())
 506             min_whitespace = ws_count;
 507           else
 508             min_whitespace = std::min (*min_whitespace, ws_count);
 509         }
 510
 511       /* Each time we encounter a line that has some content we update
 512          ALL_DONE_IDX to be the index of the next line.  If the last lines
 513          of DOC don't contain any content then ALL_DONE_IDX will be left
 514          pointing at an earlier line.  When we rewrite DOC, when we reach
 515          ALL_DONE_IDX then we can stop, the allows us to trim any blank
 516          lines from the end of DOC.  */
 517       if (tmp > content_start)
 518         all_done_idx = ws_info.size ();
 519
 520       /* If we reached a newline then skip forward to the start of the next
 521          line.  The other possibility at this point is that we're at the
 522          very end of the DOC string (null terminator).  */
 523       if (*tmp == '\n')
 524         ++tmp;
 525     }
 526
 527   /* We found no lines with content, fail safe by just returning the
 528      original documentation string.  */
 529   if (!all_done_idx.has_value () || !min_whitespace.has_value ())
 530     return doc;
 531
 532   /* Setup DST and SRC, both pointing into the DOC string.  We're going to
 533      rewrite DOC in-place, as we only ever make DOC shorter (by removing
 534      white-space), thus we know this will not overflow.  */
 535   char *dst = doc.get ();
 536   char *src = doc.get ();
 537
 538   /* Array indices used with DST, SRC, and WS_INFO respectively.  */
 539   size_t dst_offset = 0;
 540   size_t src_offset = 0;
 541   size_t ws_info_offset = 0;
 542
 543   /* Now, walk over the source string, this is the original DOC.  */
 544   while (src[src_offset] != '\0')
 545     {
 546       /* If we are at the start of the next line (in WS_INFO), then we may
 547          need to skip some white-space characters.  */
 548       if (src_offset == ws_info[ws_info_offset].offset ())
 549         {
 550           /* If a line has leading white-space then we need to skip over
 551              some number of characters now.  */
 552           if (ws_info[ws_info_offset].ws () > 0)
 553             {
 554               /* If the line is entirely white-space then we skip all of
 555                  the white-space, the next character to copy will be the
 556                  newline or null character.  Otherwise, we skip the just
 557                  some portion of the leading white-space.  */
 558               if (src[src_offset + ws_info[ws_info_offset].ws ()] == '\n'
 559                   || src[src_offset + ws_info[ws_info_offset].ws ()] == '\0')
 560                 src_offset += ws_info[ws_info_offset].ws ();
 561               else
 562                 src_offset += std::min (*min_whitespace,
 563                                         ws_info[ws_info_offset].ws ());
 564
 565               /* If we skipped white-space, and are now at the end of the
 566                  input, then we're done.  */
 567               if (src[src_offset] == '\0')
 568                 break;
 569             }
 570           if (ws_info_offset < (ws_info.size () - 1))
 571             ++ws_info_offset;
 572           if (ws_info_offset > *all_done_idx)
 573             break;
 574         }
 575
 576       /* Don't copy a newline to the start of the DST string, this would
 577          result in a leading blank line.  But in all other cases, copy the
 578          next character into the destination string.  */
 579       if ((dst_offset > 0 || src[src_offset] != '\n'))
 580         {
 581           dst[dst_offset] = src[src_offset];
 582           ++dst_offset;
 583         }
 584
 585       /* Move to the next source character.  */
 586       ++src_offset;
 587     }
 588
 589   /* Remove the trailing newline character(s), and ensure we have a null
 590      terminator in place.  */
 591   while (dst_offset > 1 && dst[dst_offset - 1] == '\n')
 592     --dst_offset;
 593   dst[dst_offset] = '\0';
 594
 595   return doc;
 596 }