1 //===-- PythonDataObjects.h--------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 // !! FIXME FIXME FIXME !!
12 // Python APIs nearly all can return an exception. They do this
13 // by returning NULL, or -1, or some such value and setting
14 // the exception state with PyErr_Set*(). Exceptions must be
15 // handled before further python API functions are called. Failure
16 // to do so will result in asserts on debug builds of python.
17 // It will also sometimes, but not usually result in crashes of
20 // Nearly all the code in this header does not handle python exceptions
21 // correctly. It should all be converted to return Expected<> or
22 // Error types to capture the exception.
24 // Everything in this file except functions that return Error or
25 // Expected<> is considered deprecated and should not be
26 // used in new code. If you need to use it, fix it first.
29 // TODOs for this file
31 // * Make all methods safe for exceptions.
33 // * Eliminate method signatures that must translate exceptions into
34 // empty objects or NULLs. Almost everything here should return
35 // Expected<>. It should be acceptable for certain operations that
36 // can never fail to assert instead, such as the creation of
37 // PythonString from a string literal.
39 // * Eliminate Reset(), and make all non-default constructors private.
40 // Python objects should be created with Retain<> or Take<>, and they
41 // should be assigned with operator=
43 // * Eliminate default constructors, make python objects always
44 // nonnull, and use optionals where necessary.
48 #ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H
49 #define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H
51 #include "lldb/Host/Config.h"
53 #if LLDB_ENABLE_PYTHON
55 // LLDB Python header must be included first
56 #include "lldb-python.h"
58 #include "lldb/Host/File.h"
59 #include "lldb/Utility/StructuredData.h"
61 #include "llvm/ADT/ArrayRef.h"
63 namespace lldb_private
{
70 class PythonDictionary
;
72 class PythonException
;
77 m_state
= PyGILState_Ensure();
78 assert(!PyErr_Occurred());
80 ~GIL() { PyGILState_Release(m_state
); }
83 PyGILState_STATE m_state
;
86 enum class PyObjectType
{
102 enum class PyRefType
{
103 Borrowed
, // We are not given ownership of the incoming PyObject.
104 // We cannot safely hold it without calling Py_INCREF.
105 Owned
// We have ownership of the incoming PyObject. We should
106 // not call Py_INCREF.
110 // Take a reference that you already own, and turn it into
113 // Most python API methods will return a +1 reference
114 // if they succeed or NULL if and only if
115 // they set an exception. Use this to collect such return
116 // values, after checking for NULL.
118 // If T is not just PythonObject, then obj must be already be
119 // checked to be of the correct type.
120 template <typename T
> T
Take(PyObject
*obj
) {
122 assert(!PyErr_Occurred());
123 T
thing(PyRefType::Owned
, obj
);
124 assert(thing
.IsValid());
128 // Retain a reference you have borrowed, and turn it into
131 // A minority of python APIs return a borrowed reference
132 // instead of a +1. They will also return NULL if and only
133 // if they set an exception. Use this to collect such return
134 // values, after checking for NULL.
136 // If T is not just PythonObject, then obj must be already be
137 // checked to be of the correct type.
138 template <typename T
> T
Retain(PyObject
*obj
) {
140 assert(!PyErr_Occurred());
141 T
thing(PyRefType::Borrowed
, obj
);
142 assert(thing
.IsValid());
146 // This class can be used like a utility function to convert from
147 // a llvm-friendly Twine into a null-terminated const char *,
148 // which is the form python C APIs want their strings in.
151 // const llvm::Twine &some_twine;
152 // PyFoo_Bar(x, y, z, NullTerminated(some_twine));
154 // Why a class instead of a function? If the twine isn't already null
155 // terminated, it will need a temporary buffer to copy the string
156 // into. We need that buffer to stick around for the lifetime of the
158 class NullTerminated
{
160 llvm::SmallString
<32> storage
;
163 NullTerminated(const llvm::Twine
&twine
) {
164 llvm::StringRef ref
= twine
.toNullTerminatedStringRef(storage
);
167 operator const char *() { return str
; }
170 inline llvm::Error
nullDeref() {
171 return llvm::createStringError(llvm::inconvertibleErrorCode(),
172 "A NULL PyObject* was dereferenced");
175 inline llvm::Error
exception(const char *s
= nullptr) {
176 return llvm::make_error
<PythonException
>(s
);
179 inline llvm::Error
keyError() {
180 return llvm::createStringError(llvm::inconvertibleErrorCode(),
184 inline const char *py2_const_cast(const char *s
) { return s
; }
186 enum class PyInitialValue
{ Invalid
, Empty
};
188 // DOC: https://docs.python.org/3/c-api/arg.html#building-values
189 template <typename T
, typename Enable
= void> struct PythonFormat
;
191 template <typename T
, char F
> struct PassthroughFormat
{
192 static constexpr char format
= F
;
193 static constexpr T
get(T t
) { return t
; }
196 template <> struct PythonFormat
<char *> : PassthroughFormat
<char *, 's'> {};
197 template <> struct PythonFormat
<char> : PassthroughFormat
<char, 'b'> {};
199 struct PythonFormat
<unsigned char> : PassthroughFormat
<unsigned char, 'B'> {};
200 template <> struct PythonFormat
<short> : PassthroughFormat
<short, 'h'> {};
202 struct PythonFormat
<unsigned short> : PassthroughFormat
<unsigned short, 'H'> {};
203 template <> struct PythonFormat
<int> : PassthroughFormat
<int, 'i'> {};
204 template <> struct PythonFormat
<bool> : PassthroughFormat
<bool, 'p'> {};
206 struct PythonFormat
<unsigned int> : PassthroughFormat
<unsigned int, 'I'> {};
207 template <> struct PythonFormat
<long> : PassthroughFormat
<long, 'l'> {};
209 struct PythonFormat
<unsigned long> : PassthroughFormat
<unsigned long, 'k'> {};
211 struct PythonFormat
<long long> : PassthroughFormat
<long long, 'L'> {};
213 struct PythonFormat
<unsigned long long>
214 : PassthroughFormat
<unsigned long long, 'K'> {};
216 struct PythonFormat
<PyObject
*> : PassthroughFormat
<PyObject
*, 'O'> {};
218 template <typename T
>
220 T
, typename
std::enable_if
<std::is_base_of
<PythonObject
, T
>::value
>::type
> {
221 static constexpr char format
= 'O';
222 static auto get(const T
&value
) { return value
.get(); }
227 PythonObject() = default;
229 PythonObject(PyRefType type
, PyObject
*py_obj
) {
231 // If this is a borrowed reference, we need to convert it to
232 // an owned reference by incrementing it. If it is an owned
233 // reference (for example the caller allocated it with PyDict_New()
234 // then we must *not* increment it.
235 if (m_py_obj
&& Py_IsInitialized() && type
== PyRefType::Borrowed
)
236 Py_XINCREF(m_py_obj
);
239 PythonObject(const PythonObject
&rhs
)
240 : PythonObject(PyRefType::Borrowed
, rhs
.m_py_obj
) {}
242 PythonObject(PythonObject
&&rhs
) {
243 m_py_obj
= rhs
.m_py_obj
;
244 rhs
.m_py_obj
= nullptr;
247 ~PythonObject() { Reset(); }
253 _PyObject_Dump(m_py_obj
);
258 void Dump(Stream
&strm
) const;
260 PyObject
*get() const { return m_py_obj
; }
262 PyObject
*release() {
263 PyObject
*result
= m_py_obj
;
268 PythonObject
&operator=(PythonObject other
) {
270 m_py_obj
= std::exchange(other
.m_py_obj
, nullptr);
274 PyObjectType
GetObjectType() const;
276 PythonString
Repr() const;
278 PythonString
Str() const;
280 static PythonObject
ResolveNameWithDictionary(llvm::StringRef name
,
281 const PythonDictionary
&dict
);
283 template <typename T
>
284 static T
ResolveNameWithDictionary(llvm::StringRef name
,
285 const PythonDictionary
&dict
) {
286 return ResolveNameWithDictionary(name
, dict
).AsType
<T
>();
289 PythonObject
ResolveName(llvm::StringRef name
) const;
291 template <typename T
> T
ResolveName(llvm::StringRef name
) const {
292 return ResolveName(name
).AsType
<T
>();
295 bool HasAttribute(llvm::StringRef attribute
) const;
297 PythonObject
GetAttributeValue(llvm::StringRef attribute
) const;
299 bool IsNone() const { return m_py_obj
== Py_None
; }
301 bool IsValid() const { return m_py_obj
!= nullptr; }
303 bool IsAllocated() const { return IsValid() && !IsNone(); }
305 explicit operator bool() const { return IsValid() && !IsNone(); }
307 template <typename T
> T
AsType() const {
308 if (!T::Check(m_py_obj
))
310 return T(PyRefType::Borrowed
, m_py_obj
);
313 StructuredData::ObjectSP
CreateStructuredObject() const;
315 template <typename
... T
>
316 llvm::Expected
<PythonObject
> CallMethod(const char *name
,
317 const T
&... t
) const {
318 const char format
[] = {'(', PythonFormat
<T
>::format
..., ')', 0};
320 PyObject_CallMethod(m_py_obj
, py2_const_cast(name
),
321 py2_const_cast(format
), PythonFormat
<T
>::get(t
)...);
324 return python::Take
<PythonObject
>(obj
);
327 template <typename
... T
>
328 llvm::Expected
<PythonObject
> Call(const T
&... t
) const {
329 const char format
[] = {'(', PythonFormat
<T
>::format
..., ')', 0};
330 PyObject
*obj
= PyObject_CallFunction(m_py_obj
, py2_const_cast(format
),
331 PythonFormat
<T
>::get(t
)...);
334 return python::Take
<PythonObject
>(obj
);
337 llvm::Expected
<PythonObject
> GetAttribute(const llvm::Twine
&name
) const {
340 PyObject
*obj
= PyObject_GetAttrString(m_py_obj
, NullTerminated(name
));
343 return python::Take
<PythonObject
>(obj
);
346 llvm::Expected
<PythonObject
> GetType() const {
349 PyObject
*obj
= PyObject_Type(m_py_obj
);
352 return python::Take
<PythonObject
>(obj
);
355 llvm::Expected
<bool> IsTrue() {
358 int r
= PyObject_IsTrue(m_py_obj
);
364 llvm::Expected
<long long> AsLongLong() const;
366 llvm::Expected
<unsigned long long> AsUnsignedLongLong() const;
368 // wraps on overflow, instead of raising an error.
369 llvm::Expected
<unsigned long long> AsModuloUnsignedLongLong() const;
371 llvm::Expected
<bool> IsInstance(const PythonObject
&cls
) {
372 if (!m_py_obj
|| !cls
.IsValid())
374 int r
= PyObject_IsInstance(m_py_obj
, cls
.get());
381 PyObject
*m_py_obj
= nullptr;
385 // This is why C++ needs monads.
386 template <typename T
> llvm::Expected
<T
> As(llvm::Expected
<PythonObject
> &&obj
) {
388 return obj
.takeError();
389 if (!T::Check(obj
.get().get()))
390 return llvm::createStringError(llvm::inconvertibleErrorCode(),
392 return T(PyRefType::Borrowed
, std::move(obj
.get().get()));
395 template <> llvm::Expected
<bool> As
<bool>(llvm::Expected
<PythonObject
> &&obj
);
398 llvm::Expected
<long long> As
<long long>(llvm::Expected
<PythonObject
> &&obj
);
401 llvm::Expected
<unsigned long long>
402 As
<unsigned long long>(llvm::Expected
<PythonObject
> &&obj
);
405 llvm::Expected
<std::string
> As
<std::string
>(llvm::Expected
<PythonObject
> &&obj
);
408 template <class T
> class TypedPythonObject
: public PythonObject
{
410 TypedPythonObject(PyRefType type
, PyObject
*py_obj
) {
413 if (T::Check(py_obj
))
414 PythonObject::operator=(PythonObject(type
, py_obj
));
415 else if (type
== PyRefType::Owned
)
419 TypedPythonObject() = default;
422 class PythonBytes
: public TypedPythonObject
<PythonBytes
> {
424 using TypedPythonObject::TypedPythonObject
;
425 explicit PythonBytes(llvm::ArrayRef
<uint8_t> bytes
);
426 PythonBytes(const uint8_t *bytes
, size_t length
);
428 static bool Check(PyObject
*py_obj
);
430 llvm::ArrayRef
<uint8_t> GetBytes() const;
432 size_t GetSize() const;
434 void SetBytes(llvm::ArrayRef
<uint8_t> stringbytes
);
436 StructuredData::StringSP
CreateStructuredString() const;
439 class PythonByteArray
: public TypedPythonObject
<PythonByteArray
> {
441 using TypedPythonObject::TypedPythonObject
;
442 explicit PythonByteArray(llvm::ArrayRef
<uint8_t> bytes
);
443 PythonByteArray(const uint8_t *bytes
, size_t length
);
444 PythonByteArray(const PythonBytes
&object
);
446 static bool Check(PyObject
*py_obj
);
448 llvm::ArrayRef
<uint8_t> GetBytes() const;
450 size_t GetSize() const;
452 void SetBytes(llvm::ArrayRef
<uint8_t> stringbytes
);
454 StructuredData::StringSP
CreateStructuredString() const;
457 class PythonString
: public TypedPythonObject
<PythonString
> {
459 using TypedPythonObject::TypedPythonObject
;
460 static llvm::Expected
<PythonString
> FromUTF8(llvm::StringRef string
);
462 PythonString() : TypedPythonObject() {} // MSVC requires this for some reason
464 explicit PythonString(llvm::StringRef string
); // safe, null on error
466 static bool Check(PyObject
*py_obj
);
468 llvm::StringRef
GetString() const; // safe, empty string on error
470 llvm::Expected
<llvm::StringRef
> AsUTF8() const;
472 size_t GetSize() const;
474 void SetString(llvm::StringRef string
); // safe, null on error
476 StructuredData::StringSP
CreateStructuredString() const;
479 class PythonInteger
: public TypedPythonObject
<PythonInteger
> {
481 using TypedPythonObject::TypedPythonObject
;
483 PythonInteger() : TypedPythonObject() {} // MSVC requires this for some reason
485 explicit PythonInteger(int64_t value
);
487 static bool Check(PyObject
*py_obj
);
489 void SetInteger(int64_t value
);
491 StructuredData::IntegerSP
CreateStructuredInteger() const;
493 StructuredData::UnsignedIntegerSP
CreateStructuredUnsignedInteger() const;
495 StructuredData::SignedIntegerSP
CreateStructuredSignedInteger() const;
498 class PythonBoolean
: public TypedPythonObject
<PythonBoolean
> {
500 using TypedPythonObject::TypedPythonObject
;
502 explicit PythonBoolean(bool value
);
504 static bool Check(PyObject
*py_obj
);
506 bool GetValue() const;
508 void SetValue(bool value
);
510 StructuredData::BooleanSP
CreateStructuredBoolean() const;
513 class PythonList
: public TypedPythonObject
<PythonList
> {
515 using TypedPythonObject::TypedPythonObject
;
517 PythonList() : TypedPythonObject() {} // MSVC requires this for some reason
519 explicit PythonList(PyInitialValue value
);
520 explicit PythonList(int list_size
);
522 static bool Check(PyObject
*py_obj
);
524 uint32_t GetSize() const;
526 PythonObject
GetItemAtIndex(uint32_t index
) const;
528 void SetItemAtIndex(uint32_t index
, const PythonObject
&object
);
530 void AppendItem(const PythonObject
&object
);
532 StructuredData::ArraySP
CreateStructuredArray() const;
535 class PythonTuple
: public TypedPythonObject
<PythonTuple
> {
537 using TypedPythonObject::TypedPythonObject
;
539 explicit PythonTuple(PyInitialValue value
);
540 explicit PythonTuple(int tuple_size
);
541 PythonTuple(std::initializer_list
<PythonObject
> objects
);
542 PythonTuple(std::initializer_list
<PyObject
*> objects
);
544 static bool Check(PyObject
*py_obj
);
546 uint32_t GetSize() const;
548 PythonObject
GetItemAtIndex(uint32_t index
) const;
550 void SetItemAtIndex(uint32_t index
, const PythonObject
&object
);
552 StructuredData::ArraySP
CreateStructuredArray() const;
555 class PythonDictionary
: public TypedPythonObject
<PythonDictionary
> {
557 using TypedPythonObject::TypedPythonObject
;
559 PythonDictionary() : TypedPythonObject() {} // MSVC requires this for some reason
561 explicit PythonDictionary(PyInitialValue value
);
563 static bool Check(PyObject
*py_obj
);
565 uint32_t GetSize() const;
567 PythonList
GetKeys() const;
569 PythonObject
GetItemForKey(const PythonObject
&key
) const; // DEPRECATED
570 void SetItemForKey(const PythonObject
&key
,
571 const PythonObject
&value
); // DEPRECATED
573 llvm::Expected
<PythonObject
> GetItem(const PythonObject
&key
) const;
574 llvm::Expected
<PythonObject
> GetItem(const llvm::Twine
&key
) const;
575 llvm::Error
SetItem(const PythonObject
&key
, const PythonObject
&value
) const;
576 llvm::Error
SetItem(const llvm::Twine
&key
, const PythonObject
&value
) const;
578 StructuredData::DictionarySP
CreateStructuredDictionary() const;
581 class PythonModule
: public TypedPythonObject
<PythonModule
> {
583 using TypedPythonObject::TypedPythonObject
;
585 static bool Check(PyObject
*py_obj
);
587 static PythonModule
BuiltinsModule();
589 static PythonModule
MainModule();
591 static PythonModule
AddModule(llvm::StringRef module
);
593 // safe, returns invalid on error;
594 static PythonModule
ImportModule(llvm::StringRef name
) {
595 std::string s
= std::string(name
);
596 auto mod
= Import(s
.c_str());
598 llvm::consumeError(mod
.takeError());
599 return PythonModule();
601 return std::move(mod
.get());
604 static llvm::Expected
<PythonModule
> Import(const llvm::Twine
&name
);
606 llvm::Expected
<PythonObject
> Get(const llvm::Twine
&name
);
608 PythonDictionary
GetDictionary() const;
611 class PythonCallable
: public TypedPythonObject
<PythonCallable
> {
613 using TypedPythonObject::TypedPythonObject
;
616 /* the largest number of positional arguments this callable
617 * can accept, or UNBOUNDED, ie UINT_MAX if it's a varargs
618 * function and can accept an arbitrary number */
619 unsigned max_positional_args
;
620 static constexpr unsigned UNBOUNDED
= UINT_MAX
; // FIXME c++17 inline
623 static bool Check(PyObject
*py_obj
);
625 llvm::Expected
<ArgInfo
> GetArgInfo() const;
627 PythonObject
operator()();
629 PythonObject
operator()(std::initializer_list
<PyObject
*> args
);
631 PythonObject
operator()(std::initializer_list
<PythonObject
> args
);
633 template <typename Arg
, typename
... Args
>
634 PythonObject
operator()(const Arg
&arg
, Args
... args
) {
635 return operator()({arg
, args
...});
639 class PythonFile
: public TypedPythonObject
<PythonFile
> {
641 using TypedPythonObject::TypedPythonObject
;
643 PythonFile() : TypedPythonObject() {} // MSVC requires this for some reason
645 static bool Check(PyObject
*py_obj
);
647 static llvm::Expected
<PythonFile
> FromFile(File
&file
,
648 const char *mode
= nullptr);
650 llvm::Expected
<lldb::FileSP
> ConvertToFile(bool borrowed
= false);
651 llvm::Expected
<lldb::FileSP
>
652 ConvertToFileForcingUseOfScriptingIOMethods(bool borrowed
= false);
655 class PythonException
: public llvm::ErrorInfo
<PythonException
> {
657 PyObject
*m_exception_type
, *m_exception
, *m_traceback
;
658 PyObject
*m_repr_bytes
;
662 const char *toCString() const;
663 PythonException(const char *caller
= nullptr);
665 ~PythonException() override
;
666 void log(llvm::raw_ostream
&OS
) const override
;
667 std::error_code
convertToErrorCode() const override
;
668 bool Matches(PyObject
*exc
) const;
669 std::string
ReadBacktrace() const;
672 // This extracts the underlying T out of an Expected<T> and returns it.
673 // If the Expected is an Error instead of a T, that error will be converted
674 // into a python exception, and this will return a default-constructed T.
676 // This is appropriate for use right at the boundary of python calling into
677 // C++, such as in a SWIG typemap. In such a context you should simply
678 // check if the returned T is valid, and if it is, return a NULL back
679 // to python. This will result in the Error being raised as an exception
680 // from python code's point of view.
684 // Expected<Foo *> efoop = some_cpp_function();
685 // Foo *foop = unwrapOrSetPythonException(efoop);
688 // do_something(*foop);
690 // If the Error returned was itself created because a python exception was
691 // raised when C++ code called into python, then the original exception
692 // will be restored. Otherwise a simple string exception will be raised.
693 template <typename T
> T
unwrapOrSetPythonException(llvm::Expected
<T
> expected
) {
695 return expected
.get();
696 llvm::handleAllErrors(
697 expected
.takeError(), [](PythonException
&E
) { E
.Restore(); },
698 [](const llvm::ErrorInfoBase
&E
) {
699 PyErr_SetString(PyExc_Exception
, E
.message().c_str());
704 // This is only here to help incrementally migrate old, exception-unsafe
706 template <typename T
> T
unwrapIgnoringErrors(llvm::Expected
<T
> expected
) {
708 return std::move(expected
.get());
709 llvm::consumeError(expected
.takeError());
713 llvm::Expected
<PythonObject
> runStringOneLine(const llvm::Twine
&string
,
714 const PythonDictionary
&globals
,
715 const PythonDictionary
&locals
);
717 llvm::Expected
<PythonObject
> runStringMultiLine(const llvm::Twine
&string
,
718 const PythonDictionary
&globals
,
719 const PythonDictionary
&locals
);
721 // Sometimes the best way to interact with a python interpreter is
722 // to run some python code. You construct a PythonScript with
723 // script string. The script assigns some function to `_function_`
724 // and you get a C++ callable object that calls the python function.
728 // const char script[] = R"(
733 // Expected<PythonObject> cpp_foo_wrapper(PythonObject x, PythonObject y) {
734 // // no need to synchronize access to this global, we already have the GIL
735 // static PythonScript foo(script)
740 PythonCallable function
;
745 PythonScript(const char *script
) : script(script
), function() {}
747 template <typename
... Args
>
748 llvm::Expected
<PythonObject
> operator()(Args
&&... args
) {
749 if (llvm::Error error
= Init())
750 return std::move(error
);
751 return function
.Call(std::forward
<Args
>(args
)...);
755 class StructuredPythonObject
: public StructuredData::Generic
{
757 StructuredPythonObject() : StructuredData::Generic() {}
759 // Take ownership of the object we received.
760 StructuredPythonObject(PythonObject obj
)
761 : StructuredData::Generic(obj
.release()) {}
763 ~StructuredPythonObject() override
{
764 // Hand ownership back to a (temporary) PythonObject instance and let it
765 // take care of releasing it.
766 PythonObject(PyRefType::Owned
, static_cast<PyObject
*>(GetValue()));
769 bool IsValid() const override
{ return GetValue() && GetValue() != Py_None
; }
771 void Serialize(llvm::json::OStream
&s
) const override
;
774 StructuredPythonObject(const StructuredPythonObject
&) = delete;
775 const StructuredPythonObject
&
776 operator=(const StructuredPythonObject
&) = delete;
779 } // namespace python
780 } // namespace lldb_private
784 #endif // LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_PYTHONDATAOBJECTS_H