stem/response/__init__.py

   1 # Copyright 2012-2020, Damian Johnson and The Tor Project
   2 # See LICENSE for licensing information
   3
   4 """
   5 Parses replies from the control socket.
   6
   7 **Module Overview:**
   8
   9 ::
  10
  11   convert - translates a ControlMessage into a particular response subclass
  12
  13   ControlMessage - Message that's read from the control socket.
  14     |- SingleLineResponse - Simple tor response only including a single line of information.
  15     |
  16     |- from_str - provides a ControlMessage for the given string
  17     |- is_ok - response had a 250 status
  18     |- content - provides the parsed message content
  19     +- raw_content - unparsed socket data
  20
  21   ControlLine - String subclass with methods for parsing controller responses.
  22     |- remainder - provides the unparsed content
  23     |- is_empty - checks if the remaining content is empty
  24     |- is_next_quoted - checks if the next entry is a quoted value
  25     |- is_next_mapping - checks if the next entry is a KEY=VALUE mapping
  26     |- peek_key - provides the key of the next entry
  27     |- pop - removes and returns the next entry
  28     +- pop_mapping - removes and returns the next entry as a KEY=VALUE mapping
  29 """
  30
  31 import codecs
  32 import io
  33 import re
  34 import time
  35 import threading
  36
  37 import stem.socket
  38 import stem.util
  39 import stem.util.str_tools
  40
  41 from typing import Any, Iterator, List, Optional, Sequence, Tuple, Union
  42
  43 __all__ = [
  44   'add_onion',
  45   'events',
  46   'getinfo',
  47   'getconf',
  48   'onion_client_auth',
  49   'protocolinfo',
  50   'authchallenge',
  51   'convert',
  52   'ControlMessage',
  53   'ControlLine',
  54   'SingleLineResponse',
  55 ]
  56
  57 KEY_ARG = re.compile('^(\\S+)=')
  58
  59
  60 def convert(response_type: str, message: 'stem.response.ControlMessage', **kwargs: Any) -> None:
  61   """
  62   Converts a :class:`~stem.response.ControlMessage` into a particular kind of
  63   tor response. This does an in-place conversion of the message from being a
  64   :class:`~stem.response.ControlMessage` to a subclass for its response type.
  65   Recognized types include...
  66
  67   =========================== =====
  68   response_type               Class
  69   =========================== =====
  70   **ADD_ONION**               :class:`stem.response.add_onion.AddOnionResponse`
  71   **AUTHCHALLENGE**           :class:`stem.response.authchallenge.AuthChallengeResponse`
  72   **EVENT**                   :class:`stem.response.events.Event` subclass
  73   **GETCONF**                 :class:`stem.response.getconf.GetConfResponse`
  74   **GETINFO**                 :class:`stem.response.getinfo.GetInfoResponse`
  75   **MAPADDRESS**              :class:`stem.response.mapaddress.MapAddressResponse`
  76   **ONION_CLIENT_AUTH_VIEW**  :class:`stem.response.onion_client_auth.OnionClientAuthViewResponse`
  77   **PROTOCOLINFO**            :class:`stem.response.protocolinfo.ProtocolInfoResponse`
  78   **SINGLELINE**              :class:`stem.response.SingleLineResponse`
  79   =========================== =====
  80
  81   :param response_type: type of tor response to convert to
  82   :param message: message to be converted
  83   :param kwargs: optional keyword arguments to be passed to the parser method
  84
  85   :raises:
  86     * :class:`stem.ProtocolError` the message isn't a proper response of
  87       that type
  88     * :class:`stem.InvalidArguments` the arguments given as input are
  89       invalid, this is can only be raised if the response_type is: **GETINFO**,
  90       **GETCONF**
  91     * :class:`stem.InvalidRequest` the arguments given as input are
  92       invalid, this is can only be raised if the response_type is:
  93       **MAPADDRESS**
  94     * :class:`stem.OperationFailed` if the action the event represents failed,
  95       this is can only be raised if the response_type is: **MAPADDRESS**
  96     * **TypeError** if argument isn't a :class:`~stem.response.ControlMessage`
  97       or response_type isn't supported
  98   """
  99
 100   import stem.response.add_onion
 101   import stem.response.authchallenge
 102   import stem.response.events
 103   import stem.response.getinfo
 104   import stem.response.getconf
 105   import stem.response.mapaddress
 106   import stem.response.onion_client_auth
 107   import stem.response.protocolinfo
 108
 109   if not isinstance(message, ControlMessage):
 110     raise TypeError('Only able to convert stem.response.ControlMessage instances')
 111
 112   response_types = {
 113     'ADD_ONION': stem.response.add_onion.AddOnionResponse,
 114     'AUTHCHALLENGE': stem.response.authchallenge.AuthChallengeResponse,
 115     'EVENT': stem.response.events.Event,
 116     'GETCONF': stem.response.getconf.GetConfResponse,
 117     'GETINFO': stem.response.getinfo.GetInfoResponse,
 118     'MAPADDRESS': stem.response.mapaddress.MapAddressResponse,
 119     'ONION_CLIENT_AUTH_VIEW': stem.response.onion_client_auth.OnionClientAuthViewResponse,
 120     'PROTOCOLINFO': stem.response.protocolinfo.ProtocolInfoResponse,
 121     'SINGLELINE': SingleLineResponse,
 122   }
 123
 124   try:
 125     response_class = response_types[response_type]
 126   except TypeError:
 127     raise TypeError('Unsupported response type: %s' % response_type)
 128
 129   message.__class__ = response_class
 130   message._parse_message(**kwargs)  # type: ignore
 131
 132
 133 # TODO: These aliases are for type hint compatability. We should refactor how
 134 # message conversion is performed to avoid this headache.
 135
 136 def _convert_to_single_line(message: 'stem.response.ControlMessage', **kwargs: Any) -> 'stem.response.SingleLineResponse':
 137   stem.response.convert('SINGLELINE', message)
 138   return message  # type: ignore
 139
 140
 141 def _convert_to_event(message: 'stem.response.ControlMessage', **kwargs: Any) -> 'stem.response.events.Event':
 142   stem.response.convert('EVENT', message)
 143   return message  # type: ignore
 144
 145
 146 def _convert_to_getinfo(message: 'stem.response.ControlMessage', **kwargs: Any) -> 'stem.response.getinfo.GetInfoResponse':
 147   stem.response.convert('GETINFO', message)
 148   return message  # type: ignore
 149
 150
 151 def _convert_to_getconf(message: 'stem.response.ControlMessage', **kwargs: Any) -> 'stem.response.getconf.GetConfResponse':
 152   stem.response.convert('GETCONF', message)
 153   return message  # type: ignore
 154
 155
 156 def _convert_to_add_onion(message: 'stem.response.ControlMessage', **kwargs: Any) -> 'stem.response.add_onion.AddOnionResponse':
 157   stem.response.convert('ADD_ONION', message)
 158   return message  # type: ignore
 159
 160
 161 def _convert_to_onion_client_auth_view(message: 'stem.response.ControlMessage', **kwargs: Any) -> 'stem.response.onion_client_auth.OnionClientAuthViewResponse':
 162   stem.response.convert('ONION_CLIENT_AUTH_VIEW', message)
 163   return message  # type: ignore
 164
 165
 166 def _convert_to_mapaddress(message: 'stem.response.ControlMessage', **kwargs: Any) -> 'stem.response.mapaddress.MapAddressResponse':
 167   stem.response.convert('MAPADDRESS', message)
 168   return message  # type: ignore
 169
 170
 171 class ControlMessage(object):
 172   """
 173   Message from the control socket. This is iterable and can be stringified for
 174   individual message components stripped of protocol formatting. Messages are
 175   never empty.
 176
 177   :var int arrived_at: unix timestamp for when the message arrived
 178
 179   .. versionchanged:: 1.7.0
 180      Implemented equality and hashing.
 181
 182   .. versionchanged:: 1.8.0
 183      Moved **arrived_at** from the Event class up to this base ControlMessage.
 184   """
 185
 186   @staticmethod
 187   def from_str(content: Union[str, bytes], msg_type: Optional[str] = None, normalize: bool = False, **kwargs: Any) -> 'stem.response.ControlMessage':
 188     """
 189     Provides a ControlMessage for the given content.
 190
 191     .. versionadded:: 1.1.0
 192
 193     .. versionchanged:: 1.6.0
 194        Added the normalize argument.
 195
 196     :param content: message to construct the message from
 197     :param msg_type: type of tor reply to parse the content as
 198     :param normalize: ensures expected carriage return and ending newline
 199       are present
 200     :param kwargs: optional keyword arguments to be passed to the parser method
 201
 202     :returns: stem.response.ControlMessage instance
 203     """
 204
 205     if isinstance(content, str):
 206       content = stem.util.str_tools._to_bytes(content)
 207
 208     if normalize:
 209       if not content.endswith(b'\n'):
 210         content += b'\n'
 211
 212       content = re.sub(b'([\r]?)\n', b'\r\n', content)
 213
 214     msg = stem.socket.recv_message_from_bytes_io(io.BytesIO(stem.util.str_tools._to_bytes(content)), arrived_at = kwargs.pop('arrived_at', None))
 215
 216     if msg_type is not None:
 217       convert(msg_type, msg, **kwargs)
 218
 219     return msg
 220
 221   def __init__(self, parsed_content: Sequence[Tuple[str, str, bytes]], raw_content: bytes, arrived_at: Optional[float] = None) -> None:
 222     if not parsed_content:
 223       raise ValueError("ControlMessages can't be empty")
 224
 225     # TODO: Change arrived_at to a float (can't yet because it causes Event
 226     # equality checks to fail - events include arrived_at within their hash
 227     # whereas ControlMessages don't).
 228
 229     self.arrived_at = int(arrived_at if arrived_at else time.time())
 230
 231     self._parsed_content = parsed_content
 232     self._raw_content = raw_content
 233     self._str = None  # type: Optional[str]
 234     self._hash = stem.util._hash_attr(self, '_raw_content')
 235
 236   def is_ok(self) -> bool:
 237     """
 238     Checks if any of our lines have a 2xx response.
 239
 240     :returns: **True** if any lines have a 2xx response code, **False** otherwise
 241     """
 242
 243     for code, _, _ in self._parsed_content:
 244       if code.isdigit() and (200 <= int(code) < 300):
 245         return True
 246
 247     return False
 248
 249   # TODO: drop this alias when we provide better type support
 250
 251   def _content_bytes(self) -> List[Tuple[str, str, bytes]]:
 252     return self.content(get_bytes = True)  # type: ignore
 253
 254   def content(self, get_bytes: bool = False) -> List[Tuple[str, str, str]]:
 255     """
 256     Provides the parsed message content. These are entries of the form...
 257
 258     ::
 259
 260       (status_code, divider, content)
 261
 262     **status_code**
 263       Three character code for the type of response (defined in section 4 of
 264       the control-spec).
 265
 266     **divider**
 267       Single character to indicate if this is mid-reply, data, or an end to the
 268       message (defined in section 2.3 of the control-spec).
 269
 270     **content**
 271       The following content is the actual payload of the line.
 272
 273     For data entries the content is the full multi-line payload with newline
 274     linebreaks and leading periods unescaped.
 275
 276     The **status_code** and **divider** are both strings (**bytes** in python
 277     2.x and **unicode** in python 3.x). The **content** however is **bytes** if
 278     **get_bytes** is **True**.
 279
 280     .. versionchanged:: 1.1.0
 281        Added the get_bytes argument.
 282
 283     :param get_bytes: provides **bytes** for the **content** rather than a **str**
 284
 285     :returns: **list** of (str, str, str) tuples for the components of this message
 286     """
 287
 288     if not get_bytes:
 289       return [(code, div, stem.util.str_tools._to_unicode(content)) for (code, div, content) in self._parsed_content]
 290     else:
 291       return list(self._parsed_content)  # type: ignore
 292
 293   def raw_content(self, get_bytes: bool = False) -> Union[str, bytes]:
 294     """
 295     Provides the unparsed content read from the control socket.
 296
 297     .. versionchanged:: 1.1.0
 298        Added the get_bytes argument.
 299
 300     :param get_bytes: if **True** then this provides **bytes** rather than a **str**
 301
 302     :returns: **str** of the socket data used to generate this message
 303     """
 304
 305     if not get_bytes:
 306       return stem.util.str_tools._to_unicode(self._raw_content)
 307     else:
 308       return self._raw_content
 309
 310   def _parse_message(self) -> None:
 311     raise NotImplementedError('Implemented by subclasses')
 312
 313   def __str__(self) -> str:
 314     """
 315     Content of the message, stripped of status code and divider protocol
 316     formatting.
 317     """
 318
 319     if self._str is None:
 320       self._str = '\n'.join(list(self))
 321
 322     return self._str
 323
 324   def __iter__(self) -> Iterator['stem.response.ControlLine']:
 325     """
 326     Provides :class:`~stem.response.ControlLine` instances for the content of
 327     the message. This is stripped of status codes and dividers, for instance...
 328
 329     ::
 330
 331       250+info/names=
 332       desc/id/* -- Router descriptors by ID.
 333       desc/name/* -- Router descriptors by nickname.
 334       .
 335       250 OK
 336
 337     Would provide two entries...
 338
 339     ::
 340
 341       1st - "info/names=
 342              desc/id/* -- Router descriptors by ID.
 343              desc/name/* -- Router descriptors by nickname."
 344       2nd - "OK"
 345     """
 346
 347     for _, _, content in self._parsed_content:
 348       yield ControlLine(stem.util.str_tools._to_unicode(content))
 349
 350   def __len__(self) -> int:
 351     """
 352     :returns: number of ControlLines
 353     """
 354
 355     return len(self._parsed_content)
 356
 357   def __getitem__(self, index: int) -> 'stem.response.ControlLine':
 358     """
 359     :returns: :class:`~stem.response.ControlLine` at the index
 360     """
 361
 362     content = self._parsed_content[index][2]
 363     content = stem.util.str_tools._to_unicode(content)
 364
 365     return ControlLine(content)
 366
 367   def __hash__(self) -> int:
 368     return self._hash
 369
 370   def __eq__(self, other: Any) -> bool:
 371     return hash(self) == hash(other) if isinstance(other, ControlMessage) else False
 372
 373   def __ne__(self, other: Any) -> bool:
 374     return not self == other
 375
 376
 377 class ControlLine(str):
 378   """
 379   String subclass that represents a line of controller output. This behaves as
 380   a normal string with additional methods for parsing and popping entries from
 381   a space delimited series of elements like a stack.
 382
 383   None of these additional methods effect ourselves as a string (which is still
 384   immutable). All methods are thread safe.
 385   """
 386
 387   def __new__(self, value: str) -> 'stem.response.ControlLine':
 388     return str.__new__(self, value)  # type: ignore
 389
 390   def __init__(self, value: str) -> None:
 391     self._remainder = value
 392     self._remainder_lock = threading.RLock()
 393
 394   def remainder(self) -> str:
 395     """
 396     Provides our unparsed content. This is an empty string after we've popped
 397     all entries.
 398
 399     :returns: **str** of the unparsed content
 400     """
 401
 402     return self._remainder
 403
 404   def is_empty(self) -> bool:
 405     """
 406     Checks if we have further content to pop or not.
 407
 408     :returns: **True** if we have additional content, **False** otherwise
 409     """
 410
 411     return self._remainder == ''
 412
 413   def is_next_quoted(self, escaped: bool = False) -> bool:
 414     """
 415     Checks if our next entry is a quoted value or not.
 416
 417     :param escaped: unescapes the string
 418
 419     :returns: **True** if the next entry can be parsed as a quoted value, **False** otherwise
 420     """
 421
 422     start_quote, end_quote = _get_quote_indices(self._remainder, escaped)
 423     return start_quote == 0 and end_quote != -1
 424
 425   def is_next_mapping(self, key: Optional[str] = None, quoted: bool = False, escaped: bool = False) -> bool:
 426     """
 427     Checks if our next entry is a KEY=VALUE mapping or not.
 428
 429     :param key: checks that the key matches this value, skipping the check if **None**
 430     :param quoted: checks that the mapping is to a quoted value
 431     :param escaped: unescapes the string
 432
 433     :returns: **True** if the next entry can be parsed as a key=value mapping,
 434       **False** otherwise
 435     """
 436
 437     remainder = self._remainder  # temp copy to avoid locking
 438     key_match = KEY_ARG.match(remainder)
 439
 440     if key_match:
 441       if key and key != key_match.groups()[0]:
 442         return False
 443
 444       if quoted:
 445         # checks that we have a quoted value and that it comes after the 'key='
 446         start_quote, end_quote = _get_quote_indices(remainder, escaped)
 447         return start_quote == key_match.end() and end_quote != -1
 448       else:
 449         return True  # we just needed to check for the key
 450     else:
 451       return False  # doesn't start with a key
 452
 453   def peek_key(self) -> str:
 454     """
 455     Provides the key of the next entry, providing **None** if it isn't a
 456     key/value mapping.
 457
 458     :returns: **str** with the next entry's key
 459     """
 460
 461     remainder = self._remainder
 462     key_match = KEY_ARG.match(remainder)
 463
 464     if key_match:
 465       return key_match.groups()[0]
 466     else:
 467       return None
 468
 469   def pop(self, quoted: bool = False, escaped: bool = False) -> str:
 470     """
 471     Parses the next space separated entry, removing it and the space from our
 472     remaining content. Examples...
 473
 474     ::
 475
 476       >>> line = ControlLine("\\"We're all mad here.\\" says the grinning cat.")
 477       >>> print line.pop(True)
 478         "We're all mad here."
 479       >>> print line.pop()
 480         "says"
 481       >>> print line.remainder()
 482         "the grinning cat."
 483
 484       >>> line = ControlLine("\\"this has a \\\\\\" and \\\\\\\\ in it\\" foo=bar more_data")
 485       >>> print line.pop(True, True)
 486         "this has a \\" and \\\\ in it"
 487
 488     :param quoted: parses the next entry as a quoted value, removing the quotes
 489     :param escaped: unescapes the string
 490
 491     :returns: **str** of the next space separated entry
 492
 493     :raises:
 494       * **ValueError** if quoted is True without the value being quoted
 495       * **IndexError** if we don't have any remaining content left to parse
 496     """
 497
 498     with self._remainder_lock:
 499       next_entry, remainder = _parse_entry(self._remainder, quoted, escaped, False)
 500       self._remainder = remainder
 501       return next_entry  # type: ignore
 502
 503   # TODO: drop this alias when we provide better type support
 504
 505   def _pop_mapping_bytes(self, quoted: bool = False, escaped: bool = False) -> Tuple[str, bytes]:
 506     return self.pop_mapping(quoted, escaped, get_bytes = True)  # type: ignore
 507
 508   def pop_mapping(self, quoted: bool = False, escaped: bool = False, get_bytes: bool = False) -> Tuple[str, str]:
 509     """
 510     Parses the next space separated entry as a KEY=VALUE mapping, removing it
 511     and the space from our remaining content.
 512
 513     .. versionchanged:: 1.6.0
 514        Added the get_bytes argument.
 515
 516     :param quoted: parses the value as being quoted, removing the quotes
 517     :param escaped: unescapes the string
 518     :param get_bytes: provides **bytes** for the **value** rather than a **str**
 519
 520     :returns: **tuple** of the form (key, value)
 521
 522     :raises: **ValueError** if this isn't a KEY=VALUE mapping or if quoted is
 523       **True** without the value being quoted
 524     :raises: **IndexError** if there's nothing to parse from the line
 525     """
 526
 527     with self._remainder_lock:
 528       if self.is_empty():
 529         raise IndexError('no remaining content to parse')
 530
 531       key_match = KEY_ARG.match(self._remainder)
 532
 533       if not key_match:
 534         raise ValueError("the next entry isn't a KEY=VALUE mapping: " + self._remainder)
 535
 536       # parse off the key
 537       key = key_match.groups()[0]
 538       remainder = self._remainder[key_match.end():]
 539
 540       next_entry, remainder = _parse_entry(remainder, quoted, escaped, get_bytes)
 541       self._remainder = remainder
 542       return (key, next_entry)  # type: ignore
 543
 544
 545 def _parse_entry(line: str, quoted: bool, escaped: bool, get_bytes: bool) -> Tuple[Union[str, bytes], str]:
 546   """
 547   Parses the next entry from the given space separated content.
 548
 549   :param line: content to be parsed
 550   :param quoted: parses the next entry as a quoted value, removing the quotes
 551   :param escaped: unescapes the string
 552   :param get_bytes: provides **bytes** for the entry rather than a **str**
 553
 554   :returns: **tuple** of the form (entry, remainder)
 555
 556   :raises:
 557     * **ValueError** if quoted is True without the next value being quoted
 558     * **IndexError** if there's nothing to parse from the line
 559   """
 560
 561   if line == '':
 562     raise IndexError('no remaining content to parse')
 563
 564   next_entry, remainder = '', line
 565
 566   if quoted:
 567     # validate and parse the quoted value
 568     start_quote, end_quote = _get_quote_indices(remainder, escaped)
 569
 570     if start_quote != 0 or end_quote == -1:
 571       raise ValueError("the next entry isn't a quoted value: " + line)
 572
 573     next_entry, remainder = remainder[1:end_quote], remainder[end_quote + 1:]
 574   else:
 575     # non-quoted value, just need to check if there's more data afterward
 576     if ' ' in remainder:
 577       next_entry, remainder = remainder.split(' ', 1)
 578     else:
 579       next_entry, remainder = remainder, ''
 580
 581   if escaped:
 582     # Tor does escaping in its 'esc_for_log' function of 'common/util.c'. It's
 583     # hard to tell what controller functions use this in practice, but direct
 584     # users are...
 585     #
 586     #   * 'COOKIEFILE' field of PROTOCOLINFO responses
 587     #   * logged messages about bugs
 588     #   * the 'getinfo_helper_listeners' function of control.c
 589     #
 590     # Ideally we'd use "next_entry.decode('string_escape')" but it was removed
 591     # in python 3.x and 'unicode_escape' isn't quite the same...
 592     #
 593     #   https://stackoverflow.com/questions/14820429/how-do-i-decodestring-escape-in-python3
 594
 595     next_entry = codecs.escape_decode(next_entry)[0]  # type: ignore
 596
 597     if not get_bytes:
 598       next_entry = stem.util.str_tools._to_unicode(next_entry)  # normalize back to str
 599
 600   if get_bytes:
 601     return (stem.util.str_tools._to_bytes(next_entry), remainder.lstrip())
 602   else:
 603     return (next_entry, remainder.lstrip())
 604
 605
 606 def _get_quote_indices(line: str, escaped: bool) -> Tuple[int, int]:
 607   """
 608   Provides the indices of the next two quotes in the given content.
 609
 610   :param line: content to be parsed
 611   :param escaped: unescapes the string
 612
 613   :returns: **tuple** of two ints, indices being -1 if a quote doesn't exist
 614   """
 615
 616   indices, quote_index = [], -1
 617
 618   for _ in range(2):
 619     quote_index = line.find('"', quote_index + 1)
 620
 621     # if we have escapes then we need to skip any r'\"' entries
 622     if escaped:
 623       # skip check if index is -1 (no match) or 0 (first character)
 624       while quote_index >= 1 and line[quote_index - 1] == '\\':
 625         quote_index = line.find('"', quote_index + 1)
 626
 627     indices.append(quote_index)
 628
 629   return tuple(indices)  # type: ignore
 630
 631
 632 class SingleLineResponse(ControlMessage):
 633   """
 634   Reply to a request that performs an action rather than querying data. These
 635   requests only contain a single line, which is 'OK' if successful, and a
 636   description of the problem if not.
 637
 638   :var str code: status code for our line
 639   :var str message: content of the line
 640   """
 641
 642   def is_ok(self, strict: bool = False) -> bool:
 643     """
 644     Checks if the response code is "250". If strict is **True** then this
 645     checks if the response is "250 OK"
 646
 647     :param strict: checks for a "250 OK" message if **True**
 648
 649     :returns:
 650       * If strict is **False**: **True** if the response code is "250", **False** otherwise
 651       * If strict is **True**: **True** if the response is "250 OK", **False** otherwise
 652     """
 653
 654     if strict:
 655       return self.content()[0] == ('250', ' ', 'OK')
 656
 657     return self.content()[0][0] == '250'
 658
 659   def _parse_message(self) -> None:
 660     content = self.content()
 661
 662     if len(content) > 1:
 663       raise stem.ProtocolError('Received multi-line response')
 664     elif len(content) == 0:
 665       raise stem.ProtocolError('Received empty response')
 666     else:
 667       code, _, msg = content[0]
 668
 669       self.code = stem.util.str_tools._to_unicode(code)
 670       self.message = stem.util.str_tools._to_unicode(msg)