Lib/io.py

   1 """The io module provides the Python interfaces to stream handling. The
   2 builtin open function is defined in this module.
   3
   4 At the top of the I/O hierarchy is the abstract base class IOBase. It
   5 defines the basic interface to a stream. Note, however, that there is no
   6 seperation between reading and writing to streams; implementations are
   7 allowed to throw an IOError if they do not support a given operation.
   8
   9 Extending IOBase is RawIOBase which deals simply with the reading and
  10 writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide
  11 an interface to OS files.
  12
  13 BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its
  14 subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer
  15 streams that are readable, writable, and both respectively.
  16 BufferedRandom provides a buffered interface to random access
  17 streams. BytesIO is a simple stream of in-memory bytes.
  18
  19 Another IOBase subclass, TextIOBase, deals with the encoding and decoding
  20 of streams into text. TextIOWrapper, which extends it, is a buffered text
  21 interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO
  22 is a in-memory stream for text.
  23
  24 Argument names are not part of the specification, and only the arguments
  25 of open() are intended to be used as keyword arguments.
  26
  27 data:
  28
  29 DEFAULT_BUFFER_SIZE
  30
  31    An int containing the default buffer size used by the module's buffered
  32    I/O classes. open() uses the file's blksize (as obtained by os.stat) if
  33    possible.
  34 """
  35 # New I/O library conforming to PEP 3116.
  36
  37 # This is a prototype; hopefully eventually some of this will be
  38 # reimplemented in C.
  39
  40 # XXX edge cases when switching between reading/writing
  41 # XXX need to support 1 meaning line-buffered
  42 # XXX whenever an argument is None, use the default value
  43 # XXX read/write ops should check readable/writable
  44 # XXX buffered readinto should work with arbitrary buffer objects
  45 # XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
  46 # XXX check writable, readable and seekable in appropriate places
  47
  48
  49 __author__ = ("Guido van Rossum <guido@python.org>, "
  50               "Mike Verdone <mike.verdone@gmail.com>, "
  51               "Mark Russell <mark.russell@zen.co.uk>")
  52
  53 __all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
  54            "BytesIO", "StringIO", "BufferedIOBase",
  55            "BufferedReader", "BufferedWriter", "BufferedRWPair",
  56            "BufferedRandom", "TextIOBase", "TextIOWrapper"]
  57
  58 import os
  59 import abc
  60 import sys
  61 import codecs
  62 import _fileio
  63 import warnings
  64
  65 # open() uses st_blksize whenever we can
  66 DEFAULT_BUFFER_SIZE = 8 * 1024  # bytes
  67
  68
  69 class BlockingIOError(IOError):
  70
  71     """Exception raised when I/O would block on a non-blocking I/O stream."""
  72
  73     def __init__(self, errno, strerror, characters_written=0):
  74         IOError.__init__(self, errno, strerror)
  75         self.characters_written = characters_written
  76
  77
  78 def open(file, mode="r", buffering=None, encoding=None, errors=None,
  79          newline=None, closefd=True):
  80
  81     r"""Open file and return a stream. If the file cannot be opened, an IOError is
  82     raised.
  83
  84     file is either a string giving the name (and the path if the file
  85     isn't in the current working directory) of the file to be opened or an
  86     integer file descriptor of the file to be wrapped. (If a file
  87     descriptor is given, it is closed when the returned I/O object is
  88     closed, unless closefd is set to False.)
  89
  90     mode is an optional string that specifies the mode in which the file
  91     is opened. It defaults to 'r' which means open for reading in text
  92     mode.  Other common values are 'w' for writing (truncating the file if
  93     it already exists), and 'a' for appending (which on some Unix systems,
  94     means that all writes append to the end of the file regardless of the
  95     current seek position). In text mode, if encoding is not specified the
  96     encoding used is platform dependent. (For reading and writing raw
  97     bytes use binary mode and leave encoding unspecified.) The available
  98     modes are:
  99
 100     ========= ===============================================================
 101     Character Meaning
 102     --------- ---------------------------------------------------------------
 103     'r'       open for reading (default)
 104     'w'       open for writing, truncating the file first
 105     'a'       open for writing, appending to the end of the file if it exists
 106     'b'       binary mode
 107     't'       text mode (default)
 108     '+'       open a disk file for updating (reading and writing)
 109     'U'       universal newline mode (for backwards compatibility; unneeded
 110               for new code)
 111     ========= ===============================================================
 112
 113     The default mode is 'rt' (open for reading text). For binary random
 114     access, the mode 'w+b' opens and truncates the file to 0 bytes, while
 115     'r+b' opens the file without truncation.
 116
 117     Python distinguishes between files opened in binary and text modes,
 118     even when the underlying operating system doesn't. Files opened in
 119     binary mode (appending 'b' to the mode argument) return contents as
 120     bytes objects without any decoding. In text mode (the default, or when
 121     't' is appended to the mode argument), the contents of the file are
 122     returned as strings, the bytes having been first decoded using a
 123     platform-dependent encoding or using the specified encoding if given.
 124
 125     buffering is an optional integer used to set the buffering policy. By
 126     default full buffering is on. Pass 0 to switch buffering off (only
 127     allowed in binary mode), 1 to set line buffering, and an integer > 1
 128     for full buffering.
 129
 130     encoding is the name of the encoding used to decode or encode the
 131     file. This should only be used in text mode. The default encoding is
 132     platform dependent, but any encoding supported by Python can be
 133     passed.  See the codecs module for the list of supported encodings.
 134
 135     errors is an optional string that specifies how encoding errors are to
 136     be handled---this argument should not be used in binary mode. Pass
 137     'strict' to raise a ValueError exception if there is an encoding error
 138     (the default of None has the same effect), or pass 'ignore' to ignore
 139     errors. (Note that ignoring encoding errors can lead to data loss.)
 140     See the documentation for codecs.register for a list of the permitted
 141     encoding error strings.
 142
 143     newline controls how universal newlines works (it only applies to text
 144     mode). It can be None, '', '\n', '\r', and '\r\n'.  It works as
 145     follows:
 146
 147     * On input, if newline is None, universal newlines mode is
 148       enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
 149       these are translated into '\n' before being returned to the
 150       caller. If it is '', universal newline mode is enabled, but line
 151       endings are returned to the caller untranslated. If it has any of
 152       the other legal values, input lines are only terminated by the given
 153       string, and the line ending is returned to the caller untranslated.
 154
 155     * On output, if newline is None, any '\n' characters written are
 156       translated to the system default line separator, os.linesep. If
 157       newline is '', no translation takes place. If newline is any of the
 158       other legal values, any '\n' characters written are translated to
 159       the given string.
 160
 161     If closefd is False, the underlying file descriptor will be kept open
 162     when the file is closed. This does not work when a file name is given
 163     and must be True in that case.
 164
 165     open() returns a file object whose type depends on the mode, and
 166     through which the standard file operations such as reading and writing
 167     are performed. When open() is used to open a file in a text mode ('w',
 168     'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
 169     a file in a binary mode, the returned class varies: in read binary
 170     mode, it returns a BufferedReader; in write binary and append binary
 171     modes, it returns a BufferedWriter, and in read/write mode, it returns
 172     a BufferedRandom.
 173
 174     It is also possible to use a string or bytearray as a file for both
 175     reading and writing. For strings StringIO can be used like a file
 176     opened in a text mode, and for bytes a BytesIO can be used like a file
 177     opened in a binary mode.
 178     """
 179     if not isinstance(file, (str, int)):
 180         raise TypeError("invalid file: %r" % file)
 181     if not isinstance(mode, str):
 182         raise TypeError("invalid mode: %r" % mode)
 183     if buffering is not None and not isinstance(buffering, int):
 184         raise TypeError("invalid buffering: %r" % buffering)
 185     if encoding is not None and not isinstance(encoding, str):
 186         raise TypeError("invalid encoding: %r" % encoding)
 187     if errors is not None and not isinstance(errors, str):
 188         raise TypeError("invalid errors: %r" % errors)
 189     modes = set(mode)
 190     if modes - set("arwb+tU") or len(mode) > len(modes):
 191         raise ValueError("invalid mode: %r" % mode)
 192     reading = "r" in modes
 193     writing = "w" in modes
 194     appending = "a" in modes
 195     updating = "+" in modes
 196     text = "t" in modes
 197     binary = "b" in modes
 198     if "U" in modes:
 199         if writing or appending:
 200             raise ValueError("can't use U and writing mode at once")
 201         reading = True
 202     if text and binary:
 203         raise ValueError("can't have text and binary mode at once")
 204     if reading + writing + appending > 1:
 205         raise ValueError("can't have read/write/append mode at once")
 206     if not (reading or writing or appending):
 207         raise ValueError("must have exactly one of read/write/append mode")
 208     if binary and encoding is not None:
 209         raise ValueError("binary mode doesn't take an encoding argument")
 210     if binary and errors is not None:
 211         raise ValueError("binary mode doesn't take an errors argument")
 212     if binary and newline is not None:
 213         raise ValueError("binary mode doesn't take a newline argument")
 214     raw = FileIO(file,
 215                  (reading and "r" or "") +
 216                  (writing and "w" or "") +
 217                  (appending and "a" or "") +
 218                  (updating and "+" or ""),
 219                  closefd)
 220     if buffering is None:
 221         buffering = -1
 222     line_buffering = False
 223     if buffering == 1 or buffering < 0 and raw.isatty():
 224         buffering = -1
 225         line_buffering = True
 226     if buffering < 0:
 227         buffering = DEFAULT_BUFFER_SIZE
 228         try:
 229             bs = os.fstat(raw.fileno()).st_blksize
 230         except (os.error, AttributeError):
 231             pass
 232         else:
 233             if bs > 1:
 234                 buffering = bs
 235     if buffering < 0:
 236         raise ValueError("invalid buffering size")
 237     if buffering == 0:
 238         if binary:
 239             raw._name = file
 240             raw._mode = mode
 241             return raw
 242         raise ValueError("can't have unbuffered text I/O")
 243     if updating:
 244         buffer = BufferedRandom(raw, buffering)
 245     elif writing or appending:
 246         buffer = BufferedWriter(raw, buffering)
 247     elif reading:
 248         buffer = BufferedReader(raw, buffering)
 249     else:
 250         raise ValueError("unknown mode: %r" % mode)
 251     if binary:
 252         buffer.name = file
 253         buffer.mode = mode
 254         return buffer
 255     text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
 256     text.name = file
 257     text.mode = mode
 258     return text
 259
 260 class _DocDescriptor:
 261     """Helper for builtins.open.__doc__
 262     """
 263     def __get__(self, obj, typ):
 264         return (
 265             "open(file, mode='r', buffering=None, encoding=None, "
 266                  "errors=None, newline=None, closefd=True)\n\n" +
 267             open.__doc__)
 268
 269 class OpenWrapper:
 270     """Wrapper for builtins.open
 271
 272     Trick so that open won't become a bound method when stored
 273     as a class variable (as dumbdbm does).
 274
 275     See initstdio() in Python/pythonrun.c.
 276     """
 277     __doc__ = _DocDescriptor()
 278
 279     def __new__(cls, *args, **kwargs):
 280         return open(*args, **kwargs)
 281
 282
 283 class UnsupportedOperation(ValueError, IOError):
 284     pass
 285
 286
 287 class IOBase(metaclass=abc.ABCMeta):
 288
 289     """The abstract base class for all I/O classes, acting on streams of
 290     bytes. There is no public constructor.
 291
 292     This class provides dummy implementations for many methods that
 293     derived classes can override selectively; the default implementations
 294     represent a file that cannot be read, written or seeked.
 295
 296     Even though IOBase does not declare read, readinto, or write because
 297     their signatures will vary, implementations and clients should
 298     consider those methods part of the interface. Also, implementations
 299     may raise a IOError when operations they do not support are called.
 300
 301     The basic type used for binary data read from or written to a file is
 302     bytes. bytearrays are accepted too, and in some cases (such as
 303     readinto) needed. Text I/O classes work with str data.
 304
 305     Note that calling any method (even inquiries) on a closed stream is
 306     undefined. Implementations may raise IOError in this case.
 307
 308     IOBase (and its subclasses) support the iterator protocol, meaning
 309     that an IOBase object can be iterated over yielding the lines in a
 310     stream.
 311
 312     IOBase also supports the :keyword:`with` statement. In this example,
 313     fp is closed after the suite of the with statment is complete:
 314
 315     with open('spam.txt', 'r') as fp:
 316         fp.write('Spam and eggs!')
 317     """
 318
 319     ### Internal ###
 320
 321     def _unsupported(self, name: str) -> IOError:
 322         """Internal: raise an exception for unsupported operations."""
 323         raise UnsupportedOperation("%s.%s() not supported" %
 324                                    (self.__class__.__name__, name))
 325
 326     ### Positioning ###
 327
 328     def seek(self, pos: int, whence: int = 0) -> int:
 329         """Change stream position.
 330
 331         Change the stream position to byte offset offset. offset is
 332         interpreted relative to the position indicated by whence.  Values
 333         for whence are:
 334
 335         * 0 -- start of stream (the default); offset should be zero or positive
 336         * 1 -- current stream position; offset may be negative
 337         * 2 -- end of stream; offset is usually negative
 338
 339         Return the new absolute position.
 340         """
 341         self._unsupported("seek")
 342
 343     def tell(self) -> int:
 344         """Return current stream position."""
 345         return self.seek(0, 1)
 346
 347     def truncate(self, pos: int = None) -> int:
 348         """Truncate file to size bytes.
 349
 350         Size defaults to the current IO position as reported by tell().  Return
 351         the new size.
 352         """
 353         self._unsupported("truncate")
 354
 355     ### Flush and close ###
 356
 357     def flush(self) -> None:
 358         """Flush write buffers, if applicable.
 359
 360         This is not implemented for read-only and non-blocking streams.
 361         """
 362         # XXX Should this return the number of bytes written???
 363
 364     __closed = False
 365
 366     def close(self) -> None:
 367         """Flush and close the IO object.
 368
 369         This method has no effect if the file is already closed.
 370         """
 371         if not self.__closed:
 372             try:
 373                 self.flush()
 374             except IOError:
 375                 pass  # If flush() fails, just give up
 376             self.__closed = True
 377
 378     def __del__(self) -> None:
 379         """Destructor.  Calls close()."""
 380         # The try/except block is in case this is called at program
 381         # exit time, when it's possible that globals have already been
 382         # deleted, and then the close() call might fail.  Since
 383         # there's nothing we can do about such failures and they annoy
 384         # the end users, we suppress the traceback.
 385         try:
 386             self.close()
 387         except:
 388             pass
 389
 390     ### Inquiries ###
 391
 392     def seekable(self) -> bool:
 393         """Return whether object supports random access.
 394
 395         If False, seek(), tell() and truncate() will raise IOError.
 396         This method may need to do a test seek().
 397         """
 398         return False
 399
 400     def _checkSeekable(self, msg=None):
 401         """Internal: raise an IOError if file is not seekable
 402         """
 403         if not self.seekable():
 404             raise IOError("File or stream is not seekable."
 405                           if msg is None else msg)
 406
 407
 408     def readable(self) -> bool:
 409         """Return whether object was opened for reading.
 410
 411         If False, read() will raise IOError.
 412         """
 413         return False
 414
 415     def _checkReadable(self, msg=None):
 416         """Internal: raise an IOError if file is not readable
 417         """
 418         if not self.readable():
 419             raise IOError("File or stream is not readable."
 420                           if msg is None else msg)
 421
 422     def writable(self) -> bool:
 423         """Return whether object was opened for writing.
 424
 425         If False, write() and truncate() will raise IOError.
 426         """
 427         return False
 428
 429     def _checkWritable(self, msg=None):
 430         """Internal: raise an IOError if file is not writable
 431         """
 432         if not self.writable():
 433             raise IOError("File or stream is not writable."
 434                           if msg is None else msg)
 435
 436     @property
 437     def closed(self):
 438         """closed: bool.  True iff the file has been closed.
 439
 440         For backwards compatibility, this is a property, not a predicate.
 441         """
 442         return self.__closed
 443
 444     def _checkClosed(self, msg=None):
 445         """Internal: raise an ValueError if file is closed
 446         """
 447         if self.closed:
 448             raise ValueError("I/O operation on closed file."
 449                              if msg is None else msg)
 450
 451     ### Context manager ###
 452
 453     def __enter__(self) -> "IOBase":  # That's a forward reference
 454         """Context management protocol.  Returns self."""
 455         self._checkClosed()
 456         return self
 457
 458     def __exit__(self, *args) -> None:
 459         """Context management protocol.  Calls close()"""
 460         self.close()
 461
 462     ### Lower-level APIs ###
 463
 464     # XXX Should these be present even if unimplemented?
 465
 466     def fileno(self) -> int:
 467         """Returns underlying file descriptor if one exists.
 468
 469         An IOError is raised if the IO object does not use a file descriptor.
 470         """
 471         self._unsupported("fileno")
 472
 473     def isatty(self) -> bool:
 474         """Return whether this is an 'interactive' stream.
 475
 476         Return False if it can't be determined.
 477         """
 478         self._checkClosed()
 479         return False
 480
 481     ### Readline[s] and writelines ###
 482
 483     def readline(self, limit: int = -1) -> bytes:
 484         r"""Read and return a line from the stream.
 485
 486         If limit is specified, at most limit bytes will be read.
 487
 488         The line terminator is always b'\n' for binary files; for text
 489         files, the newlines argument to open can be used to select the line
 490         terminator(s) recognized.
 491         """
 492         # For backwards compatibility, a (slowish) readline().
 493         self._checkClosed()
 494         if hasattr(self, "peek"):
 495             def nreadahead():
 496                 readahead = self.peek(1)
 497                 if not readahead:
 498                     return 1
 499                 n = (readahead.find(b"\n") + 1) or len(readahead)
 500                 if limit >= 0:
 501                     n = min(n, limit)
 502                 return n
 503         else:
 504             def nreadahead():
 505                 return 1
 506         if limit is None:
 507             limit = -1
 508         res = bytearray()
 509         while limit < 0 or len(res) < limit:
 510             b = self.read(nreadahead())
 511             if not b:
 512                 break
 513             res += b
 514             if res.endswith(b"\n"):
 515                 break
 516         return bytes(res)
 517
 518     def __iter__(self):
 519         self._checkClosed()
 520         return self
 521
 522     def __next__(self):
 523         line = self.readline()
 524         if not line:
 525             raise StopIteration
 526         return line
 527
 528     def readlines(self, hint=None):
 529         """Return a list of lines from the stream.
 530
 531         hint can be specified to control the number of lines read: no more
 532         lines will be read if the total size (in bytes/characters) of all
 533         lines so far exceeds hint.
 534         """
 535         if hint is None or hint <= 0:
 536             return list(self)
 537         n = 0
 538         lines = []
 539         for line in self:
 540             lines.append(line)
 541             n += len(line)
 542             if n >= hint:
 543                 break
 544         return lines
 545
 546     def writelines(self, lines):
 547         self._checkClosed()
 548         for line in lines:
 549             self.write(line)
 550
 551
 552 class RawIOBase(IOBase):
 553
 554     """Base class for raw binary I/O."""
 555
 556     # The read() method is implemented by calling readinto(); derived
 557     # classes that want to support read() only need to implement
 558     # readinto() as a primitive operation.  In general, readinto() can be
 559     # more efficient than read().
 560
 561     # (It would be tempting to also provide an implementation of
 562     # readinto() in terms of read(), in case the latter is a more suitable
 563     # primitive operation, but that would lead to nasty recursion in case
 564     # a subclass doesn't implement either.)
 565
 566     def read(self, n: int = -1) -> bytes:
 567         """Read and return up to n bytes.
 568
 569         Returns an empty bytes object on EOF, or None if the object is
 570         set not to block and has no data to read.
 571         """
 572         if n is None:
 573             n = -1
 574         if n < 0:
 575             return self.readall()
 576         b = bytearray(n.__index__())
 577         n = self.readinto(b)
 578         del b[n:]
 579         return bytes(b)
 580
 581     def readall(self):
 582         """Read until EOF, using multiple read() call."""
 583         res = bytearray()
 584         while True:
 585             data = self.read(DEFAULT_BUFFER_SIZE)
 586             if not data:
 587                 break
 588             res += data
 589         return bytes(res)
 590
 591     def readinto(self, b: bytearray) -> int:
 592         """Read up to len(b) bytes into b.
 593
 594         Returns number of bytes read (0 for EOF), or None if the object
 595         is set not to block as has no data to read.
 596         """
 597         self._unsupported("readinto")
 598
 599     def write(self, b: bytes) -> int:
 600         """Write the given buffer to the IO stream.
 601
 602         Returns the number of bytes written, which may be less than len(b).
 603         """
 604         self._unsupported("write")
 605
 606
 607 class FileIO(_fileio._FileIO, RawIOBase):
 608
 609     """Raw I/O implementation for OS files."""
 610
 611     # This multiply inherits from _FileIO and RawIOBase to make
 612     # isinstance(io.FileIO(), io.RawIOBase) return True without requiring
 613     # that _fileio._FileIO inherits from io.RawIOBase (which would be hard
 614     # to do since _fileio.c is written in C).
 615
 616     def close(self):
 617         _fileio._FileIO.close(self)
 618         RawIOBase.close(self)
 619
 620     @property
 621     def name(self):
 622         return self._name
 623
 624     # XXX(gb): _FileIO already has a mode property
 625     @property
 626     def mode(self):
 627         return self._mode
 628
 629
 630 class BufferedIOBase(IOBase):
 631
 632     """Base class for buffered IO objects.
 633
 634     The main difference with RawIOBase is that the read() method
 635     supports omitting the size argument, and does not have a default
 636     implementation that defers to readinto().
 637
 638     In addition, read(), readinto() and write() may raise
 639     BlockingIOError if the underlying raw stream is in non-blocking
 640     mode and not ready; unlike their raw counterparts, they will never
 641     return None.
 642
 643     A typical implementation should not inherit from a RawIOBase
 644     implementation, but wrap one.
 645     """
 646
 647     def read(self, n: int = None) -> bytes:
 648         """Read and return up to n bytes.
 649
 650         If the argument is omitted, None, or negative, reads and
 651         returns all data until EOF.
 652
 653         If the argument is positive, and the underlying raw stream is
 654         not 'interactive', multiple raw reads may be issued to satisfy
 655         the byte count (unless EOF is reached first).  But for
 656         interactive raw streams (XXX and for pipes?), at most one raw
 657         read will be issued, and a short result does not imply that
 658         EOF is imminent.
 659
 660         Returns an empty bytes array on EOF.
 661
 662         Raises BlockingIOError if the underlying raw stream has no
 663         data at the moment.
 664         """
 665         self._unsupported("read")
 666
 667     def readinto(self, b: bytearray) -> int:
 668         """Read up to len(b) bytes into b.
 669
 670         Like read(), this may issue multiple reads to the underlying raw
 671         stream, unless the latter is 'interactive'.
 672
 673         Returns the number of bytes read (0 for EOF).
 674
 675         Raises BlockingIOError if the underlying raw stream has no
 676         data at the moment.
 677         """
 678         # XXX This ought to work with anything that supports the buffer API
 679         data = self.read(len(b))
 680         n = len(data)
 681         try:
 682             b[:n] = data
 683         except TypeError as err:
 684             import array
 685             if not isinstance(b, array.array):
 686                 raise err
 687             b[:n] = array.array('b', data)
 688         return n
 689
 690     def write(self, b: bytes) -> int:
 691         """Write the given buffer to the IO stream.
 692
 693         Return the number of bytes written, which is never less than
 694         len(b).
 695
 696         Raises BlockingIOError if the buffer is full and the
 697         underlying raw stream cannot accept more data at the moment.
 698         """
 699         self._unsupported("write")
 700
 701
 702 class _BufferedIOMixin(BufferedIOBase):
 703
 704     """A mixin implementation of BufferedIOBase with an underlying raw stream.
 705
 706     This passes most requests on to the underlying raw stream.  It
 707     does *not* provide implementations of read(), readinto() or
 708     write().
 709     """
 710
 711     def __init__(self, raw):
 712         self.raw = raw
 713
 714     ### Positioning ###
 715
 716     def seek(self, pos, whence=0):
 717         return self.raw.seek(pos, whence)
 718
 719     def tell(self):
 720         return self.raw.tell()
 721
 722     def truncate(self, pos=None):
 723         # Flush the stream.  We're mixing buffered I/O with lower-level I/O,
 724         # and a flush may be necessary to synch both views of the current
 725         # file state.
 726         self.flush()
 727
 728         if pos is None:
 729             pos = self.tell()
 730         # XXX: Should seek() be used, instead of passing the position
 731         # XXX  directly to truncate?
 732         return self.raw.truncate(pos)
 733
 734     ### Flush and close ###
 735
 736     def flush(self):
 737         self.raw.flush()
 738
 739     def close(self):
 740         if not self.closed:
 741             try:
 742                 self.flush()
 743             except IOError:
 744                 pass  # If flush() fails, just give up
 745             self.raw.close()
 746
 747     ### Inquiries ###
 748
 749     def seekable(self):
 750         return self.raw.seekable()
 751
 752     def readable(self):
 753         return self.raw.readable()
 754
 755     def writable(self):
 756         return self.raw.writable()
 757
 758     @property
 759     def closed(self):
 760         return self.raw.closed
 761
 762     ### Lower-level APIs ###
 763
 764     def fileno(self):
 765         return self.raw.fileno()
 766
 767     def isatty(self):
 768         return self.raw.isatty()
 769
 770
 771 class _BytesIO(BufferedIOBase):
 772
 773     """Buffered I/O implementation using an in-memory bytes buffer."""
 774
 775     def __init__(self, initial_bytes=None):
 776         buf = bytearray()
 777         if initial_bytes is not None:
 778             buf += initial_bytes
 779         self._buffer = buf
 780         self._pos = 0
 781
 782     def getvalue(self):
 783         """Return the bytes value (contents) of the buffer
 784         """
 785         if self.closed:
 786             raise ValueError("getvalue on closed file")
 787         return bytes(self._buffer)
 788
 789     def read(self, n=None):
 790         if self.closed:
 791             raise ValueError("read from closed file")
 792         if n is None:
 793             n = -1
 794         if n < 0:
 795             n = len(self._buffer)
 796         if len(self._buffer) <= self._pos:
 797             return b""
 798         newpos = min(len(self._buffer), self._pos + n)
 799         b = self._buffer[self._pos : newpos]
 800         self._pos = newpos
 801         return bytes(b)
 802
 803     def read1(self, n):
 804         """This is the same as read.
 805         """
 806         return self.read(n)
 807
 808     def write(self, b):
 809         if self.closed:
 810             raise ValueError("write to closed file")
 811         if isinstance(b, str):
 812             raise TypeError("can't write str to binary stream")
 813         n = len(b)
 814         if n == 0:
 815             return 0
 816         newpos = self._pos + n
 817         if newpos > len(self._buffer):
 818             # Inserts null bytes between the current end of the file
 819             # and the new write position.
 820             padding = b'\x00' * (newpos - len(self._buffer) - n)
 821             self._buffer[self._pos:newpos - n] = padding
 822         self._buffer[self._pos:newpos] = b
 823         self._pos = newpos
 824         return n
 825
 826     def seek(self, pos, whence=0):
 827         if self.closed:
 828             raise ValueError("seek on closed file")
 829         try:
 830             pos = pos.__index__()
 831         except AttributeError as err:
 832             raise TypeError("an integer is required") from err
 833         if whence == 0:
 834             if pos < 0:
 835                 raise ValueError("negative seek position %r" % (pos,))
 836             self._pos = max(0, pos)
 837         elif whence == 1:
 838             self._pos = max(0, self._pos + pos)
 839         elif whence == 2:
 840             self._pos = max(0, len(self._buffer) + pos)
 841         else:
 842             raise ValueError("invalid whence value")
 843         return self._pos
 844
 845     def tell(self):
 846         if self.closed:
 847             raise ValueError("tell on closed file")
 848         return self._pos
 849
 850     def truncate(self, pos=None):
 851         if self.closed:
 852             raise ValueError("truncate on closed file")
 853         if pos is None:
 854             pos = self._pos
 855         elif pos < 0:
 856             raise ValueError("negative truncate position %r" % (pos,))
 857         del self._buffer[pos:]
 858         return self.seek(pos)
 859
 860     def readable(self):
 861         return True
 862
 863     def writable(self):
 864         return True
 865
 866     def seekable(self):
 867         return True
 868
 869 # Use the faster implementation of BytesIO if available
 870 try:
 871     import _bytesio
 872
 873     class BytesIO(_bytesio._BytesIO, BufferedIOBase):
 874         __doc__ = _bytesio._BytesIO.__doc__
 875
 876 except ImportError:
 877     BytesIO = _BytesIO
 878
 879
 880 class BufferedReader(_BufferedIOMixin):
 881
 882     """BufferedReader(raw[, buffer_size])
 883
 884     A buffer for a readable, sequential BaseRawIO object.
 885
 886     The constructor creates a BufferedReader for the given readable raw
 887     stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
 888     is used.
 889     """
 890
 891     def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
 892         """Create a new buffered reader using the given readable raw IO object.
 893         """
 894         raw._checkReadable()
 895         _BufferedIOMixin.__init__(self, raw)
 896         self._read_buf = b""
 897         self.buffer_size = buffer_size
 898
 899     def read(self, n=None):
 900         """Read n bytes.
 901
 902         Returns exactly n bytes of data unless the underlying raw IO
 903         stream reaches EOF or if the call would block in non-blocking
 904         mode. If n is negative, read until EOF or until read() would
 905         block.
 906         """
 907         if n is None:
 908             n = -1
 909         nodata_val = b""
 910         while n < 0 or len(self._read_buf) < n:
 911             to_read = max(self.buffer_size,
 912                           n if n is not None else 2*len(self._read_buf))
 913             current = self.raw.read(to_read)
 914             if current in (b"", None):
 915                 nodata_val = current
 916                 break
 917             self._read_buf += current
 918         if self._read_buf:
 919             if n < 0:
 920                 n = len(self._read_buf)
 921             out = self._read_buf[:n]
 922             self._read_buf = self._read_buf[n:]
 923         else:
 924             out = nodata_val
 925         return out
 926
 927     def peek(self, n=0):
 928         """Returns buffered bytes without advancing the position.
 929
 930         The argument indicates a desired minimal number of bytes; we
 931         do at most one raw read to satisfy it.  We never return more
 932         than self.buffer_size.
 933         """
 934         want = min(n, self.buffer_size)
 935         have = len(self._read_buf)
 936         if have < want:
 937             to_read = self.buffer_size - have
 938             current = self.raw.read(to_read)
 939             if current:
 940                 self._read_buf += current
 941         return self._read_buf
 942
 943     def read1(self, n):
 944         """Reads up to n bytes, with at most one read() system call."""
 945         # Returns up to n bytes.  If at least one byte is buffered, we
 946         # only return buffered bytes.  Otherwise, we do one raw read.
 947         if n <= 0:
 948             return b""
 949         self.peek(1)
 950         return self.read(min(n, len(self._read_buf)))
 951
 952     def tell(self):
 953         return self.raw.tell() - len(self._read_buf)
 954
 955     def seek(self, pos, whence=0):
 956         if whence == 1:
 957             pos -= len(self._read_buf)
 958         pos = self.raw.seek(pos, whence)
 959         self._read_buf = b""
 960         return pos
 961
 962
 963 class BufferedWriter(_BufferedIOMixin):
 964
 965     """A buffer for a writeable sequential RawIO object.
 966
 967     The constructor creates a BufferedWriter for the given writeable raw
 968     stream. If the buffer_size is not given, it defaults to
 969     DEAFULT_BUFFER_SIZE. If max_buffer_size is omitted, it defaults to
 970     twice the buffer size.
 971     """
 972
 973     def __init__(self, raw,
 974                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
 975         raw._checkWritable()
 976         _BufferedIOMixin.__init__(self, raw)
 977         self.buffer_size = buffer_size
 978         self.max_buffer_size = (2*buffer_size
 979                                 if max_buffer_size is None
 980                                 else max_buffer_size)
 981         self._write_buf = bytearray()
 982
 983     def write(self, b):
 984         if self.closed:
 985             raise ValueError("write to closed file")
 986         if isinstance(b, str):
 987             raise TypeError("can't write str to binary stream")
 988         # XXX we can implement some more tricks to try and avoid partial writes
 989         if len(self._write_buf) > self.buffer_size:
 990             # We're full, so let's pre-flush the buffer
 991             try:
 992                 self.flush()
 993             except BlockingIOError as e:
 994                 # We can't accept anything else.
 995                 # XXX Why not just let the exception pass through?
 996                 raise BlockingIOError(e.errno, e.strerror, 0)
 997         before = len(self._write_buf)
 998         self._write_buf.extend(b)
 999         written = len(self._write_buf) - before
1000         if len(self._write_buf) > self.buffer_size:
1001             try:
1002                 self.flush()
1003             except BlockingIOError as e:
1004                 if (len(self._write_buf) > self.max_buffer_size):
1005                     # We've hit max_buffer_size. We have to accept a partial
1006                     # write and cut back our buffer.
1007                     overage = len(self._write_buf) - self.max_buffer_size
1008                     self._write_buf = self._write_buf[:self.max_buffer_size]
1009                     raise BlockingIOError(e.errno, e.strerror, overage)
1010         return written
1011
1012     def truncate(self, pos=None):
1013         self.flush()
1014         if pos is None:
1015             pos = self.raw.tell()
1016         return self.raw.truncate(pos)
1017
1018     def flush(self):
1019         if self.closed:
1020             raise ValueError("flush of closed file")
1021         written = 0
1022         try:
1023             while self._write_buf:
1024                 n = self.raw.write(self._write_buf)
1025                 del self._write_buf[:n]
1026                 written += n
1027         except BlockingIOError as e:
1028             n = e.characters_written
1029             del self._write_buf[:n]
1030             written += n
1031             raise BlockingIOError(e.errno, e.strerror, written)
1032
1033     def tell(self):
1034         return self.raw.tell() + len(self._write_buf)
1035
1036     def seek(self, pos, whence=0):
1037         self.flush()
1038         return self.raw.seek(pos, whence)
1039
1040
1041 class BufferedRWPair(BufferedIOBase):
1042
1043     """A buffered reader and writer object together.
1044
1045     A buffered reader object and buffered writer object put together to
1046     form a sequential IO object that can read and write. This is typically
1047     used with a socket or two-way pipe.
1048
1049     reader and writer are RawIOBase objects that are readable and
1050     writeable respectively. If the buffer_size is omitted it defaults to
1051     DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered writer)
1052     defaults to twice the buffer size.
1053     """
1054
1055     # XXX The usefulness of this (compared to having two separate IO
1056     # objects) is questionable.
1057
1058     def __init__(self, reader, writer,
1059                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1060         """Constructor.
1061
1062         The arguments are two RawIO instances.
1063         """
1064         reader._checkReadable()
1065         writer._checkWritable()
1066         self.reader = BufferedReader(reader, buffer_size)
1067         self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
1068
1069     def read(self, n=None):
1070         if n is None:
1071             n = -1
1072         return self.reader.read(n)
1073
1074     def readinto(self, b):
1075         return self.reader.readinto(b)
1076
1077     def write(self, b):
1078         return self.writer.write(b)
1079
1080     def peek(self, n=0):
1081         return self.reader.peek(n)
1082
1083     def read1(self, n):
1084         return self.reader.read1(n)
1085
1086     def readable(self):
1087         return self.reader.readable()
1088
1089     def writable(self):
1090         return self.writer.writable()
1091
1092     def flush(self):
1093         return self.writer.flush()
1094
1095     def close(self):
1096         self.writer.close()
1097         self.reader.close()
1098
1099     def isatty(self):
1100         return self.reader.isatty() or self.writer.isatty()
1101
1102     @property
1103     def closed(self):
1104         return self.writer.closed()
1105
1106
1107 class BufferedRandom(BufferedWriter, BufferedReader):
1108
1109     """A buffered interface to random access streams.
1110
1111     The constructor creates a reader and writer for a seekable stream,
1112     raw, given in the first argument. If the buffer_size is omitted it
1113     defaults to DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered
1114     writer) defaults to twice the buffer size.
1115     """
1116
1117     def __init__(self, raw,
1118                  buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1119         raw._checkSeekable()
1120         BufferedReader.__init__(self, raw, buffer_size)
1121         BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1122
1123     def seek(self, pos, whence=0):
1124         self.flush()
1125         # First do the raw seek, then empty the read buffer, so that
1126         # if the raw seek fails, we don't lose buffered data forever.
1127         pos = self.raw.seek(pos, whence)
1128         self._read_buf = b""
1129         return pos
1130
1131     def tell(self):
1132         if (self._write_buf):
1133             return self.raw.tell() + len(self._write_buf)
1134         else:
1135             return self.raw.tell() - len(self._read_buf)
1136
1137     def truncate(self, pos=None):
1138         if pos is None:
1139             pos = self.tell()
1140         # Use seek to flush the read buffer.
1141         self.seek(pos)
1142         return BufferedWriter.truncate(self)
1143
1144     def read(self, n=None):
1145         if n is None:
1146             n = -1
1147         self.flush()
1148         return BufferedReader.read(self, n)
1149
1150     def readinto(self, b):
1151         self.flush()
1152         return BufferedReader.readinto(self, b)
1153
1154     def peek(self, n=0):
1155         self.flush()
1156         return BufferedReader.peek(self, n)
1157
1158     def read1(self, n):
1159         self.flush()
1160         return BufferedReader.read1(self, n)
1161
1162     def write(self, b):
1163         if self._read_buf:
1164             self.raw.seek(-len(self._read_buf), 1) # Undo readahead
1165             self._read_buf = b""
1166         return BufferedWriter.write(self, b)
1167
1168
1169 class TextIOBase(IOBase):
1170
1171     """Base class for text I/O.
1172
1173     This class provides a character and line based interface to stream
1174     I/O. There is no readinto method because Python's character strings
1175     are immutable. There is no public constructor.
1176     """
1177
1178     def read(self, n: int = -1) -> str:
1179         """Read at most n characters from stream.
1180
1181         Read from underlying buffer until we have n characters or we hit EOF.
1182         If n is negative or omitted, read until EOF.
1183         """
1184         self._unsupported("read")
1185
1186     def write(self, s: str) -> int:
1187         """Write string s to stream."""
1188         self._unsupported("write")
1189
1190     def truncate(self, pos: int = None) -> int:
1191         """Truncate size to pos."""
1192         self._unsupported("truncate")
1193
1194     def readline(self) -> str:
1195         """Read until newline or EOF.
1196
1197         Returns an empty string if EOF is hit immediately.
1198         """
1199         self._unsupported("readline")
1200
1201     @property
1202     def encoding(self):
1203         """Subclasses should override."""
1204         return None
1205
1206     @property
1207     def newlines(self):
1208         """Line endings translated so far.
1209
1210         Only line endings translated during reading are considered.
1211
1212         Subclasses should override.
1213         """
1214         return None
1215
1216
1217 class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1218     r"""Codec used when reading a file in universal newlines mode.  It wraps
1219     another incremental decoder, translating \r\n and \r into \n.  It also
1220     records the types of newlines encountered.  When used with
1221     translate=False, it ensures that the newline sequence is returned in
1222     one piece.
1223     """
1224     def __init__(self, decoder, translate, errors='strict'):
1225         codecs.IncrementalDecoder.__init__(self, errors=errors)
1226         self.buffer = b''
1227         self.translate = translate
1228         self.decoder = decoder
1229         self.seennl = 0
1230
1231     def decode(self, input, final=False):
1232         # decode input (with the eventual \r from a previous pass)
1233         if self.buffer:
1234             input = self.buffer + input
1235
1236         output = self.decoder.decode(input, final=final)
1237
1238         # retain last \r even when not translating data:
1239         # then readline() is sure to get \r\n in one pass
1240         if output.endswith("\r") and not final:
1241             output = output[:-1]
1242             self.buffer = b'\r'
1243         else:
1244             self.buffer = b''
1245
1246         # Record which newlines are read
1247         crlf = output.count('\r\n')
1248         cr = output.count('\r') - crlf
1249         lf = output.count('\n') - crlf
1250         self.seennl |= (lf and self._LF) | (cr and self._CR) \
1251                     | (crlf and self._CRLF)
1252
1253         if self.translate:
1254             if crlf:
1255                 output = output.replace("\r\n", "\n")
1256             if cr:
1257                 output = output.replace("\r", "\n")
1258
1259         return output
1260
1261     def getstate(self):
1262         buf, flag = self.decoder.getstate()
1263         return buf + self.buffer, flag
1264
1265     def setstate(self, state):
1266         buf, flag = state
1267         if buf.endswith(b'\r'):
1268             self.buffer = b'\r'
1269             buf = buf[:-1]
1270         else:
1271             self.buffer = b''
1272         self.decoder.setstate((buf, flag))
1273
1274     def reset(self):
1275         self.seennl = 0
1276         self.buffer = b''
1277         self.decoder.reset()
1278
1279     _LF = 1
1280     _CR = 2
1281     _CRLF = 4
1282
1283     @property
1284     def newlines(self):
1285         return (None,
1286                 "\n",
1287                 "\r",
1288                 ("\r", "\n"),
1289                 "\r\n",
1290                 ("\n", "\r\n"),
1291                 ("\r", "\r\n"),
1292                 ("\r", "\n", "\r\n")
1293                )[self.seennl]
1294
1295
1296 class TextIOWrapper(TextIOBase):
1297
1298     r"""Character and line based layer over a BufferedIOBase object, buffer.
1299
1300     encoding gives the name of the encoding that the stream will be
1301     decoded or encoded with. It defaults to locale.getpreferredencoding.
1302
1303     errors determines the strictness of encoding and decoding (see the
1304     codecs.register) and defaults to "strict".
1305
1306     newline can be None, '', '\n', '\r', or '\r\n'.  It controls the
1307     handling of line endings. If it is None, universal newlines is
1308     enabled.  With this enabled, on input, the lines endings '\n', '\r',
1309     or '\r\n' are translated to '\n' before being returned to the
1310     caller. Conversely, on output, '\n' is translated to the system
1311     default line seperator, os.linesep. If newline is any other of its
1312     legal values, that newline becomes the newline when the file is read
1313     and it is returned untranslated. On output, '\n' is converted to the
1314     newline.
1315
1316     If line_buffering is True, a call to flush is implied when a call to
1317     write contains a newline character.
1318     """
1319
1320     _CHUNK_SIZE = 128
1321
1322     def __init__(self, buffer, encoding=None, errors=None, newline=None,
1323                  line_buffering=False):
1324         if newline not in (None, "", "\n", "\r", "\r\n"):
1325             raise ValueError("illegal newline value: %r" % (newline,))
1326         if encoding is None:
1327             try:
1328                 encoding = os.device_encoding(buffer.fileno())
1329             except (AttributeError, UnsupportedOperation):
1330                 pass
1331             if encoding is None:
1332                 try:
1333                     import locale
1334                 except ImportError:
1335                     # Importing locale may fail if Python is being built
1336                     encoding = "ascii"
1337                 else:
1338                     encoding = locale.getpreferredencoding()
1339
1340         if not isinstance(encoding, str):
1341             raise ValueError("invalid encoding: %r" % encoding)
1342
1343         if errors is None:
1344             errors = "strict"
1345         else:
1346             if not isinstance(errors, str):
1347                 raise ValueError("invalid errors: %r" % errors)
1348
1349         self.buffer = buffer
1350         self._line_buffering = line_buffering
1351         self._encoding = encoding
1352         self._errors = errors
1353         self._readuniversal = not newline
1354         self._readtranslate = newline is None
1355         self._readnl = newline
1356         self._writetranslate = newline != ''
1357         self._writenl = newline or os.linesep
1358         self._encoder = None
1359         self._decoder = None
1360         self._decoded_chars = ''  # buffer for text returned from decoder
1361         self._decoded_chars_used = 0  # offset into _decoded_chars for read()
1362         self._snapshot = None  # info for reconstructing decoder state
1363         self._seekable = self._telling = self.buffer.seekable()
1364
1365     # self._snapshot is either None, or a tuple (dec_flags, next_input)
1366     # where dec_flags is the second (integer) item of the decoder state
1367     # and next_input is the chunk of input bytes that comes next after the
1368     # snapshot point.  We use this to reconstruct decoder states in tell().
1369
1370     # Naming convention:
1371     #   - "bytes_..." for integer variables that count input bytes
1372     #   - "chars_..." for integer variables that count decoded characters
1373
1374     @property
1375     def encoding(self):
1376         return self._encoding
1377
1378     @property
1379     def errors(self):
1380         return self._errors
1381
1382     @property
1383     def line_buffering(self):
1384         return self._line_buffering
1385
1386     def seekable(self):
1387         return self._seekable
1388
1389     def readable(self):
1390         return self.buffer.readable()
1391
1392     def writable(self):
1393         return self.buffer.writable()
1394
1395     def flush(self):
1396         self.buffer.flush()
1397         self._telling = self._seekable
1398
1399     def close(self):
1400         try:
1401             self.flush()
1402         except:
1403             pass  # If flush() fails, just give up
1404         self.buffer.close()
1405
1406     @property
1407     def closed(self):
1408         return self.buffer.closed
1409
1410     def fileno(self):
1411         return self.buffer.fileno()
1412
1413     def isatty(self):
1414         return self.buffer.isatty()
1415
1416     def write(self, s: str):
1417         if self.closed:
1418             raise ValueError("write to closed file")
1419         if not isinstance(s, str):
1420             raise TypeError("can't write %s to text stream" %
1421                             s.__class__.__name__)
1422         length = len(s)
1423         haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1424         if haslf and self._writetranslate and self._writenl != "\n":
1425             s = s.replace("\n", self._writenl)
1426         encoder = self._encoder or self._get_encoder()
1427         # XXX What if we were just reading?
1428         b = encoder.encode(s)
1429         self.buffer.write(b)
1430         if self._line_buffering and (haslf or "\r" in s):
1431             self.flush()
1432         self._snapshot = None
1433         if self._decoder:
1434             self._decoder.reset()
1435         return length
1436
1437     def _get_encoder(self):
1438         make_encoder = codecs.getincrementalencoder(self._encoding)
1439         self._encoder = make_encoder(self._errors)
1440         return self._encoder
1441
1442     def _get_decoder(self):
1443         make_decoder = codecs.getincrementaldecoder(self._encoding)
1444         decoder = make_decoder(self._errors)
1445         if self._readuniversal:
1446             decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1447         self._decoder = decoder
1448         return decoder
1449
1450     # The following three methods implement an ADT for _decoded_chars.
1451     # Text returned from the decoder is buffered here until the client
1452     # requests it by calling our read() or readline() method.
1453     def _set_decoded_chars(self, chars):
1454         """Set the _decoded_chars buffer."""
1455         self._decoded_chars = chars
1456         self._decoded_chars_used = 0
1457
1458     def _get_decoded_chars(self, n=None):
1459         """Advance into the _decoded_chars buffer."""
1460         offset = self._decoded_chars_used
1461         if n is None:
1462             chars = self._decoded_chars[offset:]
1463         else:
1464             chars = self._decoded_chars[offset:offset + n]
1465         self._decoded_chars_used += len(chars)
1466         return chars
1467
1468     def _rewind_decoded_chars(self, n):
1469         """Rewind the _decoded_chars buffer."""
1470         if self._decoded_chars_used < n:
1471             raise AssertionError("rewind decoded_chars out of bounds")
1472         self._decoded_chars_used -= n
1473
1474     def _read_chunk(self):
1475         """
1476         Read and decode the next chunk of data from the BufferedReader.
1477         """
1478
1479         # The return value is True unless EOF was reached.  The decoded
1480         # string is placed in self._decoded_chars (replacing its previous
1481         # value).  The entire input chunk is sent to the decoder, though
1482         # some of it may remain buffered in the decoder, yet to be
1483         # converted.
1484
1485         if self._decoder is None:
1486             raise ValueError("no decoder")
1487
1488         if self._telling:
1489             # To prepare for tell(), we need to snapshot a point in the
1490             # file where the decoder's input buffer is empty.
1491
1492             dec_buffer, dec_flags = self._decoder.getstate()
1493             # Given this, we know there was a valid snapshot point
1494             # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1495
1496         # Read a chunk, decode it, and put the result in self._decoded_chars.
1497         input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1498         eof = not input_chunk
1499         self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1500
1501         if self._telling:
1502             # At the snapshot point, len(dec_buffer) bytes before the read,
1503             # the next input to be decoded is dec_buffer + input_chunk.
1504             self._snapshot = (dec_flags, dec_buffer + input_chunk)
1505
1506         return not eof
1507
1508     def _pack_cookie(self, position, dec_flags=0,
1509                            bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1510         # The meaning of a tell() cookie is: seek to position, set the
1511         # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1512         # into the decoder with need_eof as the EOF flag, then skip
1513         # chars_to_skip characters of the decoded result.  For most simple
1514         # decoders, tell() will often just give a byte offset in the file.
1515         return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
1516                (chars_to_skip<<192) | bool(need_eof)<<256)
1517
1518     def _unpack_cookie(self, bigint):
1519         rest, position = divmod(bigint, 1<<64)
1520         rest, dec_flags = divmod(rest, 1<<64)
1521         rest, bytes_to_feed = divmod(rest, 1<<64)
1522         need_eof, chars_to_skip = divmod(rest, 1<<64)
1523         return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1524
1525     def tell(self):
1526         if not self._seekable:
1527             raise IOError("underlying stream is not seekable")
1528         if not self._telling:
1529             raise IOError("telling position disabled by next() call")
1530         self.flush()
1531         position = self.buffer.tell()
1532         decoder = self._decoder
1533         if decoder is None or self._snapshot is None:
1534             if self._decoded_chars:
1535                 # This should never happen.
1536                 raise AssertionError("pending decoded text")
1537             return position
1538
1539         # Skip backward to the snapshot point (see _read_chunk).
1540         dec_flags, next_input = self._snapshot
1541         position -= len(next_input)
1542
1543         # How many decoded characters have been used up since the snapshot?
1544         chars_to_skip = self._decoded_chars_used
1545         if chars_to_skip == 0:
1546             # We haven't moved from the snapshot point.
1547             return self._pack_cookie(position, dec_flags)
1548
1549         # Starting from the snapshot position, we will walk the decoder
1550         # forward until it gives us enough decoded characters.
1551         saved_state = decoder.getstate()
1552         try:
1553             # Note our initial start point.
1554             decoder.setstate((b'', dec_flags))
1555             start_pos = position
1556             start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1557             need_eof = 0
1558
1559             # Feed the decoder one byte at a time.  As we go, note the
1560             # nearest "safe start point" before the current location
1561             # (a point where the decoder has nothing buffered, so seek()
1562             # can safely start from there and advance to this location).
1563             next_byte = bytearray(1)
1564             for next_byte[0] in next_input:
1565                 bytes_fed += 1
1566                 chars_decoded += len(decoder.decode(next_byte))
1567                 dec_buffer, dec_flags = decoder.getstate()
1568                 if not dec_buffer and chars_decoded <= chars_to_skip:
1569                     # Decoder buffer is empty, so this is a safe start point.
1570                     start_pos += bytes_fed
1571                     chars_to_skip -= chars_decoded
1572                     start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1573                 if chars_decoded >= chars_to_skip:
1574                     break
1575             else:
1576                 # We didn't get enough decoded data; signal EOF to get more.
1577                 chars_decoded += len(decoder.decode(b'', final=True))
1578                 need_eof = 1
1579                 if chars_decoded < chars_to_skip:
1580                     raise IOError("can't reconstruct logical file position")
1581
1582             # The returned cookie corresponds to the last safe start point.
1583             return self._pack_cookie(
1584                 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1585         finally:
1586             decoder.setstate(saved_state)
1587
1588     def truncate(self, pos=None):
1589         self.flush()
1590         if pos is None:
1591             pos = self.tell()
1592         self.seek(pos)
1593         return self.buffer.truncate()
1594
1595     def seek(self, cookie, whence=0):
1596         if self.closed:
1597             raise ValueError("tell on closed file")
1598         if not self._seekable:
1599             raise IOError("underlying stream is not seekable")
1600         if whence == 1: # seek relative to current position
1601             if cookie != 0:
1602                 raise IOError("can't do nonzero cur-relative seeks")
1603             # Seeking to the current position should attempt to
1604             # sync the underlying buffer with the current position.
1605             whence = 0
1606             cookie = self.tell()
1607         if whence == 2: # seek relative to end of file
1608             if cookie != 0:
1609                 raise IOError("can't do nonzero end-relative seeks")
1610             self.flush()
1611             position = self.buffer.seek(0, 2)
1612             self._set_decoded_chars('')
1613             self._snapshot = None
1614             if self._decoder:
1615                 self._decoder.reset()
1616             return position
1617         if whence != 0:
1618             raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1619                              (whence,))
1620         if cookie < 0:
1621             raise ValueError("negative seek position %r" % (cookie,))
1622         self.flush()
1623
1624         # The strategy of seek() is to go back to the safe start point
1625         # and replay the effect of read(chars_to_skip) from there.
1626         start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1627             self._unpack_cookie(cookie)
1628
1629         # Seek back to the safe start point.
1630         self.buffer.seek(start_pos)
1631         self._set_decoded_chars('')
1632         self._snapshot = None
1633
1634         # Restore the decoder to its state from the safe start point.
1635         if self._decoder or dec_flags or chars_to_skip:
1636             self._decoder = self._decoder or self._get_decoder()
1637             self._decoder.setstate((b'', dec_flags))
1638             self._snapshot = (dec_flags, b'')
1639
1640         if chars_to_skip:
1641             # Just like _read_chunk, feed the decoder and save a snapshot.
1642             input_chunk = self.buffer.read(bytes_to_feed)
1643             self._set_decoded_chars(
1644                 self._decoder.decode(input_chunk, need_eof))
1645             self._snapshot = (dec_flags, input_chunk)
1646
1647             # Skip chars_to_skip of the decoded characters.
1648             if len(self._decoded_chars) < chars_to_skip:
1649                 raise IOError("can't restore logical file position")
1650             self._decoded_chars_used = chars_to_skip
1651
1652         return cookie
1653
1654     def read(self, n=None):
1655         if n is None:
1656             n = -1
1657         decoder = self._decoder or self._get_decoder()
1658         if n < 0:
1659             # Read everything.
1660             result = (self._get_decoded_chars() +
1661                       decoder.decode(self.buffer.read(), final=True))
1662             self._set_decoded_chars('')
1663             self._snapshot = None
1664             return result
1665         else:
1666             # Keep reading chunks until we have n characters to return.
1667             eof = False
1668             result = self._get_decoded_chars(n)
1669             while len(result) < n and not eof:
1670                 eof = not self._read_chunk()
1671                 result += self._get_decoded_chars(n - len(result))
1672             return result
1673
1674     def __next__(self):
1675         self._telling = False
1676         line = self.readline()
1677         if not line:
1678             self._snapshot = None
1679             self._telling = self._seekable
1680             raise StopIteration
1681         return line
1682
1683     def readline(self, limit=None):
1684         if self.closed:
1685             raise ValueError("read from closed file")
1686         if limit is None:
1687             limit = -1
1688
1689         # Grab all the decoded text (we will rewind any extra bits later).
1690         line = self._get_decoded_chars()
1691
1692         start = 0
1693         decoder = self._decoder or self._get_decoder()
1694
1695         pos = endpos = None
1696         while True:
1697             if self._readtranslate:
1698                 # Newlines are already translated, only search for \n
1699                 pos = line.find('\n', start)
1700                 if pos >= 0:
1701                     endpos = pos + 1
1702                     break
1703                 else:
1704                     start = len(line)
1705
1706             elif self._readuniversal:
1707                 # Universal newline search. Find any of \r, \r\n, \n
1708                 # The decoder ensures that \r\n are not split in two pieces
1709
1710                 # In C we'd look for these in parallel of course.
1711                 nlpos = line.find("\n", start)
1712                 crpos = line.find("\r", start)
1713                 if crpos == -1:
1714                     if nlpos == -1:
1715                         # Nothing found
1716                         start = len(line)
1717                     else:
1718                         # Found \n
1719                         endpos = nlpos + 1
1720                         break
1721                 elif nlpos == -1:
1722                     # Found lone \r
1723                     endpos = crpos + 1
1724                     break
1725                 elif nlpos < crpos:
1726                     # Found \n
1727                     endpos = nlpos + 1
1728                     break
1729                 elif nlpos == crpos + 1:
1730                     # Found \r\n
1731                     endpos = crpos + 2
1732                     break
1733                 else:
1734                     # Found \r
1735                     endpos = crpos + 1
1736                     break
1737             else:
1738                 # non-universal
1739                 pos = line.find(self._readnl)
1740                 if pos >= 0:
1741                     endpos = pos + len(self._readnl)
1742                     break
1743
1744             if limit >= 0 and len(line) >= limit:
1745                 endpos = limit  # reached length limit
1746                 break
1747
1748             # No line ending seen yet - get more data
1749             more_line = ''
1750             while self._read_chunk():
1751                 if self._decoded_chars:
1752                     break
1753             if self._decoded_chars:
1754                 line += self._get_decoded_chars()
1755             else:
1756                 # end of file
1757                 self._set_decoded_chars('')
1758                 self._snapshot = None
1759                 return line
1760
1761         if limit >= 0 and endpos > limit:
1762             endpos = limit  # don't exceed limit
1763
1764         # Rewind _decoded_chars to just after the line ending we found.
1765         self._rewind_decoded_chars(len(line) - endpos)
1766         return line[:endpos]
1767
1768     @property
1769     def newlines(self):
1770         return self._decoder.newlines if self._decoder else None
1771
1772 class StringIO(TextIOWrapper):
1773     """An in-memory stream for text. The initial_value argument sets the
1774     value of object. The other arguments are like those of TextIOWrapper's
1775     constructor.
1776     """
1777
1778     # XXX This is really slow, but fully functional
1779
1780     def __init__(self, initial_value="", encoding="utf-8",
1781                  errors="strict", newline="\n"):
1782         super(StringIO, self).__init__(BytesIO(),
1783                                        encoding=encoding,
1784                                        errors=errors,
1785                                        newline=newline)
1786         if initial_value:
1787             if not isinstance(initial_value, str):
1788                 initial_value = str(initial_value)
1789             self.write(initial_value)
1790             self.seek(0)
1791
1792     def getvalue(self):
1793         self.flush()
1794         return self.buffer.getvalue().decode(self._encoding, self._errors)