Objects/fileobject.c

   1 /* File object implementation */
   2
   3 #include "Python.h"
   4 #include "structmember.h"
   5
   6 #ifndef DONT_HAVE_SYS_TYPES_H
   7 #include <sys/types.h>
   8 #endif /* DONT_HAVE_SYS_TYPES_H */
   9
  10 #ifdef MS_WINDOWS
  11 #define fileno _fileno
  12 /* can simulate truncate with Win32 API functions; see file_truncate */
  13 #define HAVE_FTRUNCATE
  14 #define WIN32_LEAN_AND_MEAN
  15 #include <windows.h>
  16 #endif
  17
  18 #ifdef _MSC_VER
  19 /* Need GetVersion to see if on NT so safe to use _wfopen */
  20 #define WIN32_LEAN_AND_MEAN
  21 #include <windows.h>
  22 #endif /* _MSC_VER */
  23
  24 #ifdef macintosh
  25 #ifdef USE_GUSI
  26 #define HAVE_FTRUNCATE
  27 #endif
  28 #endif
  29
  30 #ifdef __MWERKS__
  31 /* Mwerks fopen() doesn't always set errno */
  32 #define NO_FOPEN_ERRNO
  33 #endif
  34
  35 #if defined(PYOS_OS2) && defined(PYCC_GCC)
  36 #include <io.h>
  37 #endif
  38
  39 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
  40
  41 #ifndef DONT_HAVE_ERRNO_H
  42 #include <errno.h>
  43 #endif
  44
  45 #ifdef HAVE_GETC_UNLOCKED
  46 #define GETC(f) getc_unlocked(f)
  47 #define FLOCKFILE(f) flockfile(f)
  48 #define FUNLOCKFILE(f) funlockfile(f)
  49 #else
  50 #define GETC(f) getc(f)
  51 #define FLOCKFILE(f)
  52 #define FUNLOCKFILE(f)
  53 #endif
  54
  55 #ifdef WITH_UNIVERSAL_NEWLINES
  56 /* Bits in f_newlinetypes */
  57 #define NEWLINE_UNKNOWN 0       /* No newline seen, yet */
  58 #define NEWLINE_CR 1            /* \r newline seen */
  59 #define NEWLINE_LF 2            /* \n newline seen */
  60 #define NEWLINE_CRLF 4          /* \r\n newline seen */
  61 #endif
  62
  63 FILE *
  64 PyFile_AsFile(PyObject *f)
  65 {
  66         if (f == NULL || !PyFile_Check(f))
  67                 return NULL;
  68         else
  69                 return ((PyFileObject *)f)->f_fp;
  70 }
  71
  72 PyObject *
  73 PyFile_Name(PyObject *f)
  74 {
  75         if (f == NULL || !PyFile_Check(f))
  76                 return NULL;
  77         else
  78                 return ((PyFileObject *)f)->f_name;
  79 }
  80
  81 /* On Unix, fopen will succeed for directories.
  82    In Python, there should be no file objects referring to
  83    directories, so we need a check.  */
  84
  85 static PyFileObject*
  86 dircheck(PyFileObject* f)
  87 {
  88 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
  89         struct stat buf;
  90         if (f->f_fp == NULL)
  91                 return f;
  92         if (fstat(fileno(f->f_fp), &buf) == 0 &&
  93             S_ISDIR(buf.st_mode)) {
  94 #ifdef HAVE_STRERROR
  95                 char *msg = strerror(EISDIR);
  96 #else
  97                 char *msg = "Is a directory";
  98 #endif
  99                 PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(is)",
 100                                                       EISDIR, msg);
 101                 PyErr_SetObject(PyExc_IOError, exc);
 102                 Py_XDECREF(exc);
 103                 return NULL;
 104         }
 105 #endif
 106         return f;
 107 }
 108
 109
 110 static PyObject *
 111 fill_file_fields(PyFileObject *f, FILE *fp, char *name, char *mode,
 112                  int (*close)(FILE *), PyObject *wname)
 113 {
 114         assert(f != NULL);
 115         assert(PyFile_Check(f));
 116         assert(f->f_fp == NULL);
 117
 118         Py_DECREF(f->f_name);
 119         Py_DECREF(f->f_mode);
 120         Py_DECREF(f->f_encoding);
 121 #ifdef Py_USING_UNICODE
 122         if (wname)
 123                 f->f_name = PyUnicode_FromObject(wname);
 124         else
 125 #endif
 126                 f->f_name = PyString_FromString(name);
 127         f->f_mode = PyString_FromString(mode);
 128
 129         f->f_close = close;
 130         f->f_softspace = 0;
 131         f->f_binary = strchr(mode,'b') != NULL;
 132         f->f_buf = NULL;
 133 #ifdef WITH_UNIVERSAL_NEWLINES
 134         f->f_univ_newline = (strchr(mode, 'U') != NULL);
 135         f->f_newlinetypes = NEWLINE_UNKNOWN;
 136         f->f_skipnextlf = 0;
 137 #endif
 138         Py_INCREF(Py_None);
 139         f->f_encoding = Py_None;
 140
 141         if (f->f_name == NULL || f->f_mode == NULL)
 142                 return NULL;
 143         f->f_fp = fp;
 144         f = dircheck(f);
 145         return (PyObject *) f;
 146 }
 147
 148 static PyObject *
 149 open_the_file(PyFileObject *f, char *name, char *mode)
 150 {
 151         assert(f != NULL);
 152         assert(PyFile_Check(f));
 153 #ifdef MS_WINDOWS
 154         /* windows ignores the passed name in order to support Unicode */
 155         assert(f->f_name != NULL);
 156 #else
 157         assert(name != NULL);
 158 #endif
 159         assert(mode != NULL);
 160         assert(f->f_fp == NULL);
 161
 162         /* rexec.py can't stop a user from getting the file() constructor --
 163            all they have to do is get *any* file object f, and then do
 164            type(f).  Here we prevent them from doing damage with it. */
 165         if (PyEval_GetRestricted()) {
 166                 PyErr_SetString(PyExc_IOError,
 167                 "file() constructor not accessible in restricted mode");
 168                 return NULL;
 169         }
 170         errno = 0;
 171 #ifdef HAVE_FOPENRF
 172         if (*mode == '*') {
 173                 FILE *fopenRF();
 174                 f->f_fp = fopenRF(name, mode+1);
 175         }
 176         else
 177 #endif
 178         {
 179 #ifdef WITH_UNIVERSAL_NEWLINES
 180                 if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
 181                         mode = "rb";
 182 #else
 183                 /* Compatibility: specifying U in a Python without universal
 184                 ** newlines is allowed, and the file is opened as a normal text
 185                 ** file.
 186                 */
 187                 if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
 188                         mode = "r";
 189 #endif
 190 #ifdef MS_WINDOWS
 191                 if (PyUnicode_Check(f->f_name)) {
 192                         PyObject *wmode;
 193                         wmode = PyUnicode_DecodeASCII(mode, strlen(mode), NULL);
 194                         if (f->f_name && wmode) {
 195                                 Py_BEGIN_ALLOW_THREADS
 196                                 /* PyUnicode_AS_UNICODE OK without thread
 197                                    lock as it is a simple dereference. */
 198                                 f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
 199                                                   PyUnicode_AS_UNICODE(wmode));
 200                                 Py_END_ALLOW_THREADS
 201                         }
 202                         Py_XDECREF(wmode);
 203                 }
 204 #endif
 205                 if (NULL == f->f_fp && NULL != name) {
 206                         Py_BEGIN_ALLOW_THREADS
 207                         f->f_fp = fopen(name, mode);
 208                         Py_END_ALLOW_THREADS
 209                 }
 210         }
 211         if (f->f_fp == NULL) {
 212 #ifdef NO_FOPEN_ERRNO
 213                 /* Metroworks only, wich does not always sets errno */
 214                 if (errno == 0) {
 215                         PyObject *v;
 216                         v = Py_BuildValue("(is)", 0, "Cannot open file");
 217                         if (v != NULL) {
 218                                 PyErr_SetObject(PyExc_IOError, v);
 219                                 Py_DECREF(v);
 220                         }
 221                         return NULL;
 222                 }
 223 #endif
 224 #ifdef _MSC_VER
 225                 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
 226                  * across all Windows flavors.  When it sets EINVAL varies
 227                  * across Windows flavors, the exact conditions aren't
 228                  * documented, and the answer lies in the OS's implementation
 229                  * of Win32's CreateFile function (whose source is secret).
 230                  * Seems the best we can do is map EINVAL to ENOENT.
 231                  */
 232                 if (errno == 0) /* bad mode string */
 233                         errno = EINVAL;
 234                 else if (errno == EINVAL) /* unknown, but not a mode string */
 235                         errno = ENOENT;
 236 #endif
 237                 if (errno == EINVAL)
 238                         PyErr_Format(PyExc_IOError, "invalid mode: %s",
 239                                      mode);
 240                 else
 241 #ifdef MS_WINDOWS
 242                         PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
 243 #else
 244                         PyErr_SetFromErrnoWithFilename(PyExc_IOError, name);
 245 #endif /* MS_WINDOWS */
 246                 f = NULL;
 247         }
 248         if (f != NULL)
 249                 f = dircheck(f);
 250         return (PyObject *)f;
 251 }
 252
 253 PyObject *
 254 PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
 255 {
 256         PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
 257                                                              NULL, NULL);
 258         if (f != NULL) {
 259                 if (fill_file_fields(f, fp, name, mode, close, NULL) == NULL) {
 260                         Py_DECREF(f);
 261                         f = NULL;
 262                 }
 263         }
 264         return (PyObject *) f;
 265 }
 266
 267 PyObject *
 268 PyFile_FromString(char *name, char *mode)
 269 {
 270         extern int fclose(FILE *);
 271         PyFileObject *f;
 272
 273         f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
 274         if (f != NULL) {
 275                 if (open_the_file(f, name, mode) == NULL) {
 276                         Py_DECREF(f);
 277                         f = NULL;
 278                 }
 279         }
 280         return (PyObject *)f;
 281 }
 282
 283 void
 284 PyFile_SetBufSize(PyObject *f, int bufsize)
 285 {
 286         PyFileObject *file = (PyFileObject *)f;
 287         if (bufsize >= 0) {
 288                 int type;
 289                 switch (bufsize) {
 290                 case 0:
 291                         type = _IONBF;
 292                         break;
 293 #ifdef HAVE_SETVBUF
 294                 case 1:
 295                         type = _IOLBF;
 296                         bufsize = BUFSIZ;
 297                         break;
 298 #endif
 299                 default:
 300                         type = _IOFBF;
 301 #ifndef HAVE_SETVBUF
 302                         bufsize = BUFSIZ;
 303 #endif
 304                         break;
 305                 }
 306                 fflush(file->f_fp);
 307                 if (type == _IONBF) {
 308                         PyMem_Free(file->f_setbuf);
 309                         file->f_setbuf = NULL;
 310                 } else {
 311                         file->f_setbuf = PyMem_Realloc(file->f_setbuf, bufsize);
 312                 }
 313 #ifdef HAVE_SETVBUF
 314                 setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
 315 #else /* !HAVE_SETVBUF */
 316                 setbuf(file->f_fp, file->f_setbuf);
 317 #endif /* !HAVE_SETVBUF */
 318         }
 319 }
 320
 321 /* Set the encoding used to output Unicode strings.
 322    Returh 1 on success, 0 on failure. */
 323
 324 int
 325 PyFile_SetEncoding(PyObject *f, const char *enc)
 326 {
 327         PyFileObject *file = (PyFileObject*)f;
 328         PyObject *str = PyString_FromString(enc);
 329         if (!str)
 330                 return 0;
 331         Py_DECREF(file->f_encoding);
 332         file->f_encoding = str;
 333         return 1;
 334 }
 335
 336 static PyObject *
 337 err_closed(void)
 338 {
 339         PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
 340         return NULL;
 341 }
 342
 343 static void drop_readahead(PyFileObject *);
 344
 345 /* Methods */
 346
 347 static void
 348 file_dealloc(PyFileObject *f)
 349 {
 350         if (f->f_fp != NULL && f->f_close != NULL) {
 351                 Py_BEGIN_ALLOW_THREADS
 352                 (*f->f_close)(f->f_fp);
 353                 Py_END_ALLOW_THREADS
 354         }
 355         PyMem_Free(f->f_setbuf);
 356         Py_XDECREF(f->f_name);
 357         Py_XDECREF(f->f_mode);
 358         Py_XDECREF(f->f_encoding);
 359         drop_readahead(f);
 360         f->ob_type->tp_free((PyObject *)f);
 361 }
 362
 363 static PyObject *
 364 file_repr(PyFileObject *f)
 365 {
 366         if (PyUnicode_Check(f->f_name)) {
 367 #ifdef Py_USING_UNICODE
 368                 PyObject *ret = NULL;
 369                 PyObject *name;
 370                 name = PyUnicode_AsUnicodeEscapeString(f->f_name);
 371                 ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
 372                                    f->f_fp == NULL ? "closed" : "open",
 373                                    PyString_AsString(name),
 374                                    PyString_AsString(f->f_mode),
 375                                    f);
 376                 Py_XDECREF(name);
 377                 return ret;
 378 #endif
 379         } else {
 380                 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
 381                                    f->f_fp == NULL ? "closed" : "open",
 382                                    PyString_AsString(f->f_name),
 383                                    PyString_AsString(f->f_mode),
 384                                    f);
 385         }
 386 }
 387
 388 static PyObject *
 389 file_close(PyFileObject *f)
 390 {
 391         int sts = 0;
 392         if (f->f_fp != NULL) {
 393                 if (f->f_close != NULL) {
 394                         Py_BEGIN_ALLOW_THREADS
 395                         errno = 0;
 396                         sts = (*f->f_close)(f->f_fp);
 397                         Py_END_ALLOW_THREADS
 398                 }
 399                 f->f_fp = NULL;
 400         }
 401         PyMem_Free(f->f_setbuf);
 402         f->f_setbuf = NULL;
 403         if (sts == EOF)
 404                 return PyErr_SetFromErrno(PyExc_IOError);
 405         if (sts != 0)
 406                 return PyInt_FromLong((long)sts);
 407         Py_INCREF(Py_None);
 408         return Py_None;
 409 }
 410
 411
 412 /* Our very own off_t-like type, 64-bit if possible */
 413 #if !defined(HAVE_LARGEFILE_SUPPORT)
 414 typedef off_t Py_off_t;
 415 #elif SIZEOF_OFF_T >= 8
 416 typedef off_t Py_off_t;
 417 #elif SIZEOF_FPOS_T >= 8
 418 typedef fpos_t Py_off_t;
 419 #else
 420 #error "Large file support, but neither off_t nor fpos_t is large enough."
 421 #endif
 422
 423
 424 /* a portable fseek() function
 425    return 0 on success, non-zero on failure (with errno set) */
 426 static int
 427 _portable_fseek(FILE *fp, Py_off_t offset, int whence)
 428 {
 429 #if !defined(HAVE_LARGEFILE_SUPPORT)
 430         return fseek(fp, offset, whence);
 431 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
 432         return fseeko(fp, offset, whence);
 433 #elif defined(HAVE_FSEEK64)
 434         return fseek64(fp, offset, whence);
 435 #elif defined(__BEOS__)
 436         return _fseek(fp, offset, whence);
 437 #elif SIZEOF_FPOS_T >= 8
 438         /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
 439            and fgetpos() to implement fseek()*/
 440         fpos_t pos;
 441         switch (whence) {
 442         case SEEK_END:
 443 #ifdef MS_WINDOWS
 444                 fflush(fp);
 445                 if (_lseeki64(fileno(fp), 0, 2) == -1)
 446                         return -1;
 447 #else
 448                 if (fseek(fp, 0, SEEK_END) != 0)
 449                         return -1;
 450 #endif
 451                 /* fall through */
 452         case SEEK_CUR:
 453                 if (fgetpos(fp, &pos) != 0)
 454                         return -1;
 455                 offset += pos;
 456                 break;
 457         /* case SEEK_SET: break; */
 458         }
 459         return fsetpos(fp, &offset);
 460 #else
 461 #error "Large file support, but no way to fseek."
 462 #endif
 463 }
 464
 465
 466 /* a portable ftell() function
 467    Return -1 on failure with errno set appropriately, current file
 468    position on success */
 469 static Py_off_t
 470 _portable_ftell(FILE* fp)
 471 {
 472 #if !defined(HAVE_LARGEFILE_SUPPORT)
 473         return ftell(fp);
 474 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
 475         return ftello(fp);
 476 #elif defined(HAVE_FTELL64)
 477         return ftell64(fp);
 478 #elif SIZEOF_FPOS_T >= 8
 479         fpos_t pos;
 480         if (fgetpos(fp, &pos) != 0)
 481                 return -1;
 482         return pos;
 483 #else
 484 #error "Large file support, but no way to ftell."
 485 #endif
 486 }
 487
 488
 489 static PyObject *
 490 file_seek(PyFileObject *f, PyObject *args)
 491 {
 492         int whence;
 493         int ret;
 494         Py_off_t offset;
 495         PyObject *offobj;
 496
 497         if (f->f_fp == NULL)
 498                 return err_closed();
 499         drop_readahead(f);
 500         whence = 0;
 501         if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
 502                 return NULL;
 503 #if !defined(HAVE_LARGEFILE_SUPPORT)
 504         offset = PyInt_AsLong(offobj);
 505 #else
 506         offset = PyLong_Check(offobj) ?
 507                 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
 508 #endif
 509         if (PyErr_Occurred())
 510                 return NULL;
 511
 512         Py_BEGIN_ALLOW_THREADS
 513         errno = 0;
 514         ret = _portable_fseek(f->f_fp, offset, whence);
 515         Py_END_ALLOW_THREADS
 516
 517         if (ret != 0) {
 518                 PyErr_SetFromErrno(PyExc_IOError);
 519                 clearerr(f->f_fp);
 520                 return NULL;
 521         }
 522 #ifdef WITH_UNIVERSAL_NEWLINES
 523         f->f_skipnextlf = 0;
 524 #endif
 525         Py_INCREF(Py_None);
 526         return Py_None;
 527 }
 528
 529
 530 #ifdef HAVE_FTRUNCATE
 531 static PyObject *
 532 file_truncate(PyFileObject *f, PyObject *args)
 533 {
 534         int ret;
 535         Py_off_t newsize;
 536         PyObject *newsizeobj;
 537
 538         if (f->f_fp == NULL)
 539                 return err_closed();
 540         newsizeobj = NULL;
 541         if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
 542                 return NULL;
 543
 544         /* Set newsize to current postion if newsizeobj NULL, else to the
 545            specified value. */
 546         if (newsizeobj != NULL) {
 547 #if !defined(HAVE_LARGEFILE_SUPPORT)
 548                 newsize = PyInt_AsLong(newsizeobj);
 549 #else
 550                 newsize = PyLong_Check(newsizeobj) ?
 551                                 PyLong_AsLongLong(newsizeobj) :
 552                                 PyInt_AsLong(newsizeobj);
 553 #endif
 554                 if (PyErr_Occurred())
 555                         return NULL;
 556         }
 557         else {
 558                 /* Default to current position. */
 559                 Py_BEGIN_ALLOW_THREADS
 560                 errno = 0;
 561                 newsize = _portable_ftell(f->f_fp);
 562                 Py_END_ALLOW_THREADS
 563                 if (newsize == -1)
 564                         goto onioerror;
 565         }
 566
 567         /* Flush the file. */
 568         Py_BEGIN_ALLOW_THREADS
 569         errno = 0;
 570         ret = fflush(f->f_fp);
 571         Py_END_ALLOW_THREADS
 572         if (ret != 0)
 573                 goto onioerror;
 574
 575 #ifdef MS_WINDOWS
 576         /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
 577            so don't even try using it. */
 578         {
 579                 Py_off_t current;       /* current file position */
 580                 HANDLE hFile;
 581                 int error;
 582
 583                 /* current <- current file postion. */
 584                 if (newsizeobj == NULL)
 585                         current = newsize;
 586                 else {
 587                         Py_BEGIN_ALLOW_THREADS
 588                         errno = 0;
 589                         current = _portable_ftell(f->f_fp);
 590                         Py_END_ALLOW_THREADS
 591                         if (current == -1)
 592                                 goto onioerror;
 593                 }
 594
 595                 /* Move to newsize. */
 596                 if (current != newsize) {
 597                         Py_BEGIN_ALLOW_THREADS
 598                         errno = 0;
 599                         error = _portable_fseek(f->f_fp, newsize, SEEK_SET)
 600                                 != 0;
 601                         Py_END_ALLOW_THREADS
 602                         if (error)
 603                                 goto onioerror;
 604                 }
 605
 606                 /* Truncate.  Note that this may grow the file! */
 607                 Py_BEGIN_ALLOW_THREADS
 608                 errno = 0;
 609                 hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
 610                 error = hFile == (HANDLE)-1;
 611                 if (!error) {
 612                         error = SetEndOfFile(hFile) == 0;
 613                         if (error)
 614                                 errno = EACCES;
 615                 }
 616                 Py_END_ALLOW_THREADS
 617                 if (error)
 618                         goto onioerror;
 619
 620                 /* Restore original file position. */
 621                 if (current != newsize) {
 622                         Py_BEGIN_ALLOW_THREADS
 623                         errno = 0;
 624                         error = _portable_fseek(f->f_fp, current, SEEK_SET)
 625                                 != 0;
 626                         Py_END_ALLOW_THREADS
 627                         if (error)
 628                                 goto onioerror;
 629                 }
 630         }
 631 #else
 632         Py_BEGIN_ALLOW_THREADS
 633         errno = 0;
 634         ret = ftruncate(fileno(f->f_fp), newsize);
 635         Py_END_ALLOW_THREADS
 636         if (ret != 0) goto onioerror;
 637 #endif /* !MS_WINDOWS */
 638
 639         Py_INCREF(Py_None);
 640         return Py_None;
 641
 642 onioerror:
 643         PyErr_SetFromErrno(PyExc_IOError);
 644         clearerr(f->f_fp);
 645         return NULL;
 646 }
 647 #endif /* HAVE_FTRUNCATE */
 648
 649 static PyObject *
 650 file_tell(PyFileObject *f)
 651 {
 652         Py_off_t pos;
 653
 654         if (f->f_fp == NULL)
 655                 return err_closed();
 656         Py_BEGIN_ALLOW_THREADS
 657         errno = 0;
 658         pos = _portable_ftell(f->f_fp);
 659         Py_END_ALLOW_THREADS
 660         if (pos == -1) {
 661                 PyErr_SetFromErrno(PyExc_IOError);
 662                 clearerr(f->f_fp);
 663                 return NULL;
 664         }
 665 #ifdef WITH_UNIVERSAL_NEWLINES
 666         if (f->f_skipnextlf) {
 667                 int c;
 668                 c = GETC(f->f_fp);
 669                 if (c == '\n') {
 670                         pos++;
 671                         f->f_skipnextlf = 0;
 672                 } else if (c != EOF) ungetc(c, f->f_fp);
 673         }
 674 #endif
 675 #if !defined(HAVE_LARGEFILE_SUPPORT)
 676         return PyInt_FromLong(pos);
 677 #else
 678         return PyLong_FromLongLong(pos);
 679 #endif
 680 }
 681
 682 static PyObject *
 683 file_fileno(PyFileObject *f)
 684 {
 685         if (f->f_fp == NULL)
 686                 return err_closed();
 687         return PyInt_FromLong((long) fileno(f->f_fp));
 688 }
 689
 690 static PyObject *
 691 file_flush(PyFileObject *f)
 692 {
 693         int res;
 694
 695         if (f->f_fp == NULL)
 696                 return err_closed();
 697         Py_BEGIN_ALLOW_THREADS
 698         errno = 0;
 699         res = fflush(f->f_fp);
 700         Py_END_ALLOW_THREADS
 701         if (res != 0) {
 702                 PyErr_SetFromErrno(PyExc_IOError);
 703                 clearerr(f->f_fp);
 704                 return NULL;
 705         }
 706         Py_INCREF(Py_None);
 707         return Py_None;
 708 }
 709
 710 static PyObject *
 711 file_isatty(PyFileObject *f)
 712 {
 713         long res;
 714         if (f->f_fp == NULL)
 715                 return err_closed();
 716         Py_BEGIN_ALLOW_THREADS
 717         res = isatty((int)fileno(f->f_fp));
 718         Py_END_ALLOW_THREADS
 719         return PyBool_FromLong(res);
 720 }
 721
 722
 723 #if BUFSIZ < 8192
 724 #define SMALLCHUNK 8192
 725 #else
 726 #define SMALLCHUNK BUFSIZ
 727 #endif
 728
 729 #if SIZEOF_INT < 4
 730 #define BIGCHUNK  (512 * 32)
 731 #else
 732 #define BIGCHUNK  (512 * 1024)
 733 #endif
 734
 735 static size_t
 736 new_buffersize(PyFileObject *f, size_t currentsize)
 737 {
 738 #ifdef HAVE_FSTAT
 739         off_t pos, end;
 740         struct stat st;
 741         if (fstat(fileno(f->f_fp), &st) == 0) {
 742                 end = st.st_size;
 743                 /* The following is not a bug: we really need to call lseek()
 744                    *and* ftell().  The reason is that some stdio libraries
 745                    mistakenly flush their buffer when ftell() is called and
 746                    the lseek() call it makes fails, thereby throwing away
 747                    data that cannot be recovered in any way.  To avoid this,
 748                    we first test lseek(), and only call ftell() if lseek()
 749                    works.  We can't use the lseek() value either, because we
 750                    need to take the amount of buffered data into account.
 751                    (Yet another reason why stdio stinks. :-) */
 752 #ifdef USE_GUSI2
 753                 pos = lseek(fileno(f->f_fp), 1L, SEEK_CUR);
 754                 pos = lseek(fileno(f->f_fp), -1L, SEEK_CUR);
 755 #else
 756                 pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
 757 #endif
 758                 if (pos >= 0) {
 759                         pos = ftell(f->f_fp);
 760                 }
 761                 if (pos < 0)
 762                         clearerr(f->f_fp);
 763                 if (end > pos && pos >= 0)
 764                         return currentsize + end - pos + 1;
 765                 /* Add 1 so if the file were to grow we'd notice. */
 766         }
 767 #endif
 768         if (currentsize > SMALLCHUNK) {
 769                 /* Keep doubling until we reach BIGCHUNK;
 770                    then keep adding BIGCHUNK. */
 771                 if (currentsize <= BIGCHUNK)
 772                         return currentsize + currentsize;
 773                 else
 774                         return currentsize + BIGCHUNK;
 775         }
 776         return currentsize + SMALLCHUNK;
 777 }
 778
 779 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
 780 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
 781 #else
 782 #ifdef EWOULDBLOCK
 783 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
 784 #else
 785 #ifdef EAGAIN
 786 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
 787 #else
 788 #define BLOCKED_ERRNO(x) 0
 789 #endif
 790 #endif
 791 #endif
 792
 793 static PyObject *
 794 file_read(PyFileObject *f, PyObject *args)
 795 {
 796         long bytesrequested = -1;
 797         size_t bytesread, buffersize, chunksize;
 798         PyObject *v;
 799
 800         if (f->f_fp == NULL)
 801                 return err_closed();
 802         if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
 803                 return NULL;
 804         if (bytesrequested < 0)
 805                 buffersize = new_buffersize(f, (size_t)0);
 806         else
 807                 buffersize = bytesrequested;
 808         if (buffersize > INT_MAX) {
 809                 PyErr_SetString(PyExc_OverflowError,
 810         "requested number of bytes is more than a Python string can hold");
 811                 return NULL;
 812         }
 813         v = PyString_FromStringAndSize((char *)NULL, buffersize);
 814         if (v == NULL)
 815                 return NULL;
 816         bytesread = 0;
 817         for (;;) {
 818                 Py_BEGIN_ALLOW_THREADS
 819                 errno = 0;
 820                 chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
 821                           buffersize - bytesread, f->f_fp, (PyObject *)f);
 822                 Py_END_ALLOW_THREADS
 823                 if (chunksize == 0) {
 824                         if (!ferror(f->f_fp))
 825                                 break;
 826                         clearerr(f->f_fp);
 827                         /* When in non-blocking mode, data shouldn't
 828                          * be discarded if a blocking signal was
 829                          * received. That will also happen if
 830                          * chunksize != 0, but bytesread < buffersize. */
 831                         if (bytesread > 0 && BLOCKED_ERRNO(errno))
 832                                 break;
 833                         PyErr_SetFromErrno(PyExc_IOError);
 834                         Py_DECREF(v);
 835                         return NULL;
 836                 }
 837                 bytesread += chunksize;
 838                 if (bytesread < buffersize) {
 839                         clearerr(f->f_fp);
 840                         break;
 841                 }
 842                 if (bytesrequested < 0) {
 843                         buffersize = new_buffersize(f, buffersize);
 844                         if (_PyString_Resize(&v, buffersize) < 0)
 845                                 return NULL;
 846                 } else {
 847                         /* Got what was requested. */
 848                         break;
 849                 }
 850         }
 851         if (bytesread != buffersize)
 852                 _PyString_Resize(&v, bytesread);
 853         return v;
 854 }
 855
 856 static PyObject *
 857 file_readinto(PyFileObject *f, PyObject *args)
 858 {
 859         char *ptr;
 860         int ntodo;
 861         size_t ndone, nnow;
 862
 863         if (f->f_fp == NULL)
 864                 return err_closed();
 865         if (!PyArg_ParseTuple(args, "w#", &ptr, &ntodo))
 866                 return NULL;
 867         ndone = 0;
 868         while (ntodo > 0) {
 869                 Py_BEGIN_ALLOW_THREADS
 870                 errno = 0;
 871                 nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
 872                                                 (PyObject *)f);
 873                 Py_END_ALLOW_THREADS
 874                 if (nnow == 0) {
 875                         if (!ferror(f->f_fp))
 876                                 break;
 877                         PyErr_SetFromErrno(PyExc_IOError);
 878                         clearerr(f->f_fp);
 879                         return NULL;
 880                 }
 881                 ndone += nnow;
 882                 ntodo -= nnow;
 883         }
 884         return PyInt_FromLong((long)ndone);
 885 }
 886
 887 /**************************************************************************
 888 Routine to get next line using platform fgets().
 889
 890 Under MSVC 6:
 891
 892 + MS threadsafe getc is very slow (multiple layers of function calls before+
 893   after each character, to lock+unlock the stream).
 894 + The stream-locking functions are MS-internal -- can't access them from user
 895   code.
 896 + There's nothing Tim could find in the MS C or platform SDK libraries that
 897   can worm around this.
 898 + MS fgets locks/unlocks only once per line; it's the only hook we have.
 899
 900 So we use fgets for speed(!), despite that it's painful.
 901
 902 MS realloc is also slow.
 903
 904 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
 905 have):
 906         Linux           a wash
 907         Solaris         a wash
 908         Tru64 Unix      getline_via_fgets significantly faster
 909
 910 CAUTION:  The C std isn't clear about this:  in those cases where fgets
 911 writes something into the buffer, can it write into any position beyond the
 912 required trailing null byte?  MSVC 6 fgets does not, and no platform is (yet)
 913 known on which it does; and it would be a strange way to code fgets. Still,
 914 getline_via_fgets may not work correctly if it does.  The std test
 915 test_bufio.py should fail if platform fgets() routinely writes beyond the
 916 trailing null byte.  #define DONT_USE_FGETS_IN_GETLINE to disable this code.
 917 **************************************************************************/
 918
 919 /* Use this routine if told to, or by default on non-get_unlocked()
 920  * platforms unless told not to.  Yikes!  Let's spell that out:
 921  * On a platform with getc_unlocked():
 922  *     By default, use getc_unlocked().
 923  *     If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
 924  * On a platform without getc_unlocked():
 925  *     By default, use fgets().
 926  *     If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
 927  */
 928 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
 929 #define USE_FGETS_IN_GETLINE
 930 #endif
 931
 932 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
 933 #undef USE_FGETS_IN_GETLINE
 934 #endif
 935
 936 #ifdef USE_FGETS_IN_GETLINE
 937 static PyObject*
 938 getline_via_fgets(FILE *fp)
 939 {
 940 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
 941  * no-realloc, one-fgets()-call path.  Boosting it isn't free, because we have
 942  * to fill this much of the buffer with a known value in order to figure out
 943  * how much of the buffer fgets() overwrites.  So if INITBUFSIZE is larger
 944  * than "most" lines, we waste time filling unused buffer slots.  100 is
 945  * surely adequate for most peoples' email archives, chewing over source code,
 946  * etc -- "regular old text files".
 947  * MAXBUFSIZE is the maximum line length that lets us get away with the less
 948  * fast (but still zippy) no-realloc, two-fgets()-call path.  See above for
 949  * cautions about boosting that.  300 was chosen because the worst real-life
 950  * text-crunching job reported on Python-Dev was a mail-log crawler where over
 951  * half the lines were 254 chars.
 952  */
 953 #define INITBUFSIZE 100
 954 #define MAXBUFSIZE 300
 955         char* p;        /* temp */
 956         char buf[MAXBUFSIZE];
 957         PyObject* v;    /* the string object result */
 958         char* pvfree;   /* address of next free slot */
 959         char* pvend;    /* address one beyond last free slot */
 960         size_t nfree;   /* # of free buffer slots; pvend-pvfree */
 961         size_t total_v_size;  /* total # of slots in buffer */
 962         size_t increment;       /* amount to increment the buffer */
 963
 964         /* Optimize for normal case:  avoid _PyString_Resize if at all
 965          * possible via first reading into stack buffer "buf".
 966          */
 967         total_v_size = INITBUFSIZE;     /* start small and pray */
 968         pvfree = buf;
 969         for (;;) {
 970                 Py_BEGIN_ALLOW_THREADS
 971                 pvend = buf + total_v_size;
 972                 nfree = pvend - pvfree;
 973                 memset(pvfree, '\n', nfree);
 974                 p = fgets(pvfree, nfree, fp);
 975                 Py_END_ALLOW_THREADS
 976
 977                 if (p == NULL) {
 978                         clearerr(fp);
 979                         if (PyErr_CheckSignals())
 980                                 return NULL;
 981                         v = PyString_FromStringAndSize(buf, pvfree - buf);
 982                         return v;
 983                 }
 984                 /* fgets read *something* */
 985                 p = memchr(pvfree, '\n', nfree);
 986                 if (p != NULL) {
 987                         /* Did the \n come from fgets or from us?
 988                          * Since fgets stops at the first \n, and then writes
 989                          * \0, if it's from fgets a \0 must be next.  But if
 990                          * that's so, it could not have come from us, since
 991                          * the \n's we filled the buffer with have only more
 992                          * \n's to the right.
 993                          */
 994                         if (p+1 < pvend && *(p+1) == '\0') {
 995                                 /* It's from fgets:  we win!  In particular,
 996                                  * we haven't done any mallocs yet, and can
 997                                  * build the final result on the first try.
 998                                  */
 999                                 ++p;    /* include \n from fgets */
1000                         }
1001                         else {
1002                                 /* Must be from us:  fgets didn't fill the
1003                                  * buffer and didn't find a newline, so it
1004                                  * must be the last and newline-free line of
1005                                  * the file.
1006                                  */
1007                                 assert(p > pvfree && *(p-1) == '\0');
1008                                 --p;    /* don't include \0 from fgets */
1009                         }
1010                         v = PyString_FromStringAndSize(buf, p - buf);
1011                         return v;
1012                 }
1013                 /* yuck:  fgets overwrote all the newlines, i.e. the entire
1014                  * buffer.  So this line isn't over yet, or maybe it is but
1015                  * we're exactly at EOF.  If we haven't already, try using the
1016                  * rest of the stack buffer.
1017                  */
1018                 assert(*(pvend-1) == '\0');
1019                 if (pvfree == buf) {
1020                         pvfree = pvend - 1;     /* overwrite trailing null */
1021                         total_v_size = MAXBUFSIZE;
1022                 }
1023                 else
1024                         break;
1025         }
1026
1027         /* The stack buffer isn't big enough; malloc a string object and read
1028          * into its buffer.
1029          */
1030         total_v_size = MAXBUFSIZE << 1;
1031         v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
1032         if (v == NULL)
1033                 return v;
1034         /* copy over everything except the last null byte */
1035         memcpy(BUF(v), buf, MAXBUFSIZE-1);
1036         pvfree = BUF(v) + MAXBUFSIZE - 1;
1037
1038         /* Keep reading stuff into v; if it ever ends successfully, break
1039          * after setting p one beyond the end of the line.  The code here is
1040          * very much like the code above, except reads into v's buffer; see
1041          * the code above for detailed comments about the logic.
1042          */
1043         for (;;) {
1044                 Py_BEGIN_ALLOW_THREADS
1045                 pvend = BUF(v) + total_v_size;
1046                 nfree = pvend - pvfree;
1047                 memset(pvfree, '\n', nfree);
1048                 p = fgets(pvfree, nfree, fp);
1049                 Py_END_ALLOW_THREADS
1050
1051                 if (p == NULL) {
1052                         clearerr(fp);
1053                         if (PyErr_CheckSignals()) {
1054                                 Py_DECREF(v);
1055                                 return NULL;
1056                         }
1057                         p = pvfree;
1058                         break;
1059                 }
1060                 p = memchr(pvfree, '\n', nfree);
1061                 if (p != NULL) {
1062                         if (p+1 < pvend && *(p+1) == '\0') {
1063                                 /* \n came from fgets */
1064                                 ++p;
1065                                 break;
1066                         }
1067                         /* \n came from us; last line of file, no newline */
1068                         assert(p > pvfree && *(p-1) == '\0');
1069                         --p;
1070                         break;
1071                 }
1072                 /* expand buffer and try again */
1073                 assert(*(pvend-1) == '\0');
1074                 increment = total_v_size >> 2;  /* mild exponential growth */
1075                 total_v_size += increment;
1076                 if (total_v_size > INT_MAX) {
1077                         PyErr_SetString(PyExc_OverflowError,
1078                             "line is longer than a Python string can hold");
1079                         Py_DECREF(v);
1080                         return NULL;
1081                 }
1082                 if (_PyString_Resize(&v, (int)total_v_size) < 0)
1083                         return NULL;
1084                 /* overwrite the trailing null byte */
1085                 pvfree = BUF(v) + (total_v_size - increment - 1);
1086         }
1087         if (BUF(v) + total_v_size != p)
1088                 _PyString_Resize(&v, p - BUF(v));
1089         return v;
1090 #undef INITBUFSIZE
1091 #undef MAXBUFSIZE
1092 }
1093 #endif  /* ifdef USE_FGETS_IN_GETLINE */
1094
1095 /* Internal routine to get a line.
1096    Size argument interpretation:
1097    > 0: max length;
1098    <= 0: read arbitrary line
1099 */
1100
1101 static PyObject *
1102 get_line(PyFileObject *f, int n)
1103 {
1104         FILE *fp = f->f_fp;
1105         int c;
1106         char *buf, *end;
1107         size_t total_v_size;    /* total # of slots in buffer */
1108         size_t used_v_size;     /* # used slots in buffer */
1109         size_t increment;       /* amount to increment the buffer */
1110         PyObject *v;
1111 #ifdef WITH_UNIVERSAL_NEWLINES
1112         int newlinetypes = f->f_newlinetypes;
1113         int skipnextlf = f->f_skipnextlf;
1114         int univ_newline = f->f_univ_newline;
1115 #endif
1116
1117 #if defined(USE_FGETS_IN_GETLINE)
1118 #ifdef WITH_UNIVERSAL_NEWLINES
1119         if (n <= 0 && !univ_newline )
1120 #else
1121         if (n <= 0)
1122 #endif
1123                 return getline_via_fgets(fp);
1124 #endif
1125         total_v_size = n > 0 ? n : 100;
1126         v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1127         if (v == NULL)
1128                 return NULL;
1129         buf = BUF(v);
1130         end = buf + total_v_size;
1131
1132         for (;;) {
1133                 Py_BEGIN_ALLOW_THREADS
1134                 FLOCKFILE(fp);
1135 #ifdef WITH_UNIVERSAL_NEWLINES
1136                 if (univ_newline) {
1137                         c = 'x'; /* Shut up gcc warning */
1138                         while ( buf != end && (c = GETC(fp)) != EOF ) {
1139                                 if (skipnextlf ) {
1140                                         skipnextlf = 0;
1141                                         if (c == '\n') {
1142                                                 /* Seeing a \n here with
1143                                                  * skipnextlf true means we
1144                                                  * saw a \r before.
1145                                                  */
1146                                                 newlinetypes |= NEWLINE_CRLF;
1147                                                 c = GETC(fp);
1148                                                 if (c == EOF) break;
1149                                         } else {
1150                                                 newlinetypes |= NEWLINE_CR;
1151                                         }
1152                                 }
1153                                 if (c == '\r') {
1154                                         skipnextlf = 1;
1155                                         c = '\n';
1156                                 } else if ( c == '\n')
1157                                         newlinetypes |= NEWLINE_LF;
1158                                 *buf++ = c;
1159                                 if (c == '\n') break;
1160                         }
1161                         if ( c == EOF && skipnextlf )
1162                                 newlinetypes |= NEWLINE_CR;
1163                 } else /* If not universal newlines use the normal loop */
1164 #endif
1165                 while ((c = GETC(fp)) != EOF &&
1166                        (*buf++ = c) != '\n' &&
1167                         buf != end)
1168                         ;
1169                 FUNLOCKFILE(fp);
1170                 Py_END_ALLOW_THREADS
1171 #ifdef WITH_UNIVERSAL_NEWLINES
1172                 f->f_newlinetypes = newlinetypes;
1173                 f->f_skipnextlf = skipnextlf;
1174 #endif
1175                 if (c == '\n')
1176                         break;
1177                 if (c == EOF) {
1178                         if (ferror(fp)) {
1179                                 PyErr_SetFromErrno(PyExc_IOError);
1180                                 clearerr(fp);
1181                                 Py_DECREF(v);
1182                                 return NULL;
1183                         }
1184                         clearerr(fp);
1185                         if (PyErr_CheckSignals()) {
1186                                 Py_DECREF(v);
1187                                 return NULL;
1188                         }
1189                         break;
1190                 }
1191                 /* Must be because buf == end */
1192                 if (n > 0)
1193                         break;
1194                 used_v_size = total_v_size;
1195                 increment = total_v_size >> 2; /* mild exponential growth */
1196                 total_v_size += increment;
1197                 if (total_v_size > INT_MAX) {
1198                         PyErr_SetString(PyExc_OverflowError,
1199                             "line is longer than a Python string can hold");
1200                         Py_DECREF(v);
1201                         return NULL;
1202                 }
1203                 if (_PyString_Resize(&v, total_v_size) < 0)
1204                         return NULL;
1205                 buf = BUF(v) + used_v_size;
1206                 end = BUF(v) + total_v_size;
1207         }
1208
1209         used_v_size = buf - BUF(v);
1210         if (used_v_size != total_v_size)
1211                 _PyString_Resize(&v, used_v_size);
1212         return v;
1213 }
1214
1215 /* External C interface */
1216
1217 PyObject *
1218 PyFile_GetLine(PyObject *f, int n)
1219 {
1220         PyObject *result;
1221
1222         if (f == NULL) {
1223                 PyErr_BadInternalCall();
1224                 return NULL;
1225         }
1226
1227         if (PyFile_Check(f)) {
1228                 if (((PyFileObject*)f)->f_fp == NULL)
1229                         return err_closed();
1230                 result = get_line((PyFileObject *)f, n);
1231         }
1232         else {
1233                 PyObject *reader;
1234                 PyObject *args;
1235
1236                 reader = PyObject_GetAttrString(f, "readline");
1237                 if (reader == NULL)
1238                         return NULL;
1239                 if (n <= 0)
1240                         args = Py_BuildValue("()");
1241                 else
1242                         args = Py_BuildValue("(i)", n);
1243                 if (args == NULL) {
1244                         Py_DECREF(reader);
1245                         return NULL;
1246                 }
1247                 result = PyEval_CallObject(reader, args);
1248                 Py_DECREF(reader);
1249                 Py_DECREF(args);
1250                 if (result != NULL && !PyString_Check(result) &&
1251                     !PyUnicode_Check(result)) {
1252                         Py_DECREF(result);
1253                         result = NULL;
1254                         PyErr_SetString(PyExc_TypeError,
1255                                    "object.readline() returned non-string");
1256                 }
1257         }
1258
1259         if (n < 0 && result != NULL && PyString_Check(result)) {
1260                 char *s = PyString_AS_STRING(result);
1261                 int len = PyString_GET_SIZE(result);
1262                 if (len == 0) {
1263                         Py_DECREF(result);
1264                         result = NULL;
1265                         PyErr_SetString(PyExc_EOFError,
1266                                         "EOF when reading a line");
1267                 }
1268                 else if (s[len-1] == '\n') {
1269                         if (result->ob_refcnt == 1)
1270                                 _PyString_Resize(&result, len-1);
1271                         else {
1272                                 PyObject *v;
1273                                 v = PyString_FromStringAndSize(s, len-1);
1274                                 Py_DECREF(result);
1275                                 result = v;
1276                         }
1277                 }
1278         }
1279 #ifdef Py_USING_UNICODE
1280         if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1281                 Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1282                 int len = PyUnicode_GET_SIZE(result);
1283                 if (len == 0) {
1284                         Py_DECREF(result);
1285                         result = NULL;
1286                         PyErr_SetString(PyExc_EOFError,
1287                                         "EOF when reading a line");
1288                 }
1289                 else if (s[len-1] == '\n') {
1290                         if (result->ob_refcnt == 1)
1291                                 PyUnicode_Resize(&result, len-1);
1292                         else {
1293                                 PyObject *v;
1294                                 v = PyUnicode_FromUnicode(s, len-1);
1295                                 Py_DECREF(result);
1296                                 result = v;
1297                         }
1298                 }
1299         }
1300 #endif
1301         return result;
1302 }
1303
1304 /* Python method */
1305
1306 static PyObject *
1307 file_readline(PyFileObject *f, PyObject *args)
1308 {
1309         int n = -1;
1310
1311         if (f->f_fp == NULL)
1312                 return err_closed();
1313         if (!PyArg_ParseTuple(args, "|i:readline", &n))
1314                 return NULL;
1315         if (n == 0)
1316                 return PyString_FromString("");
1317         if (n < 0)
1318                 n = 0;
1319         return get_line(f, n);
1320 }
1321
1322 static PyObject *
1323 file_readlines(PyFileObject *f, PyObject *args)
1324 {
1325         long sizehint = 0;
1326         PyObject *list;
1327         PyObject *line;
1328         char small_buffer[SMALLCHUNK];
1329         char *buffer = small_buffer;
1330         size_t buffersize = SMALLCHUNK;
1331         PyObject *big_buffer = NULL;
1332         size_t nfilled = 0;
1333         size_t nread;
1334         size_t totalread = 0;
1335         char *p, *q, *end;
1336         int err;
1337         int shortread = 0;
1338
1339         if (f->f_fp == NULL)
1340                 return err_closed();
1341         if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1342                 return NULL;
1343         if ((list = PyList_New(0)) == NULL)
1344                 return NULL;
1345         for (;;) {
1346                 if (shortread)
1347                         nread = 0;
1348                 else {
1349                         Py_BEGIN_ALLOW_THREADS
1350                         errno = 0;
1351                         nread = Py_UniversalNewlineFread(buffer+nfilled,
1352                                 buffersize-nfilled, f->f_fp, (PyObject *)f);
1353                         Py_END_ALLOW_THREADS
1354                         shortread = (nread < buffersize-nfilled);
1355                 }
1356                 if (nread == 0) {
1357                         sizehint = 0;
1358                         if (!ferror(f->f_fp))
1359                                 break;
1360                         PyErr_SetFromErrno(PyExc_IOError);
1361                         clearerr(f->f_fp);
1362                   error:
1363                         Py_DECREF(list);
1364                         list = NULL;
1365                         goto cleanup;
1366                 }
1367                 totalread += nread;
1368                 p = memchr(buffer+nfilled, '\n', nread);
1369                 if (p == NULL) {
1370                         /* Need a larger buffer to fit this line */
1371                         nfilled += nread;
1372                         buffersize *= 2;
1373                         if (buffersize > INT_MAX) {
1374                                 PyErr_SetString(PyExc_OverflowError,
1375                             "line is longer than a Python string can hold");
1376                                 goto error;
1377                         }
1378                         if (big_buffer == NULL) {
1379                                 /* Create the big buffer */
1380                                 big_buffer = PyString_FromStringAndSize(
1381                                         NULL, buffersize);
1382                                 if (big_buffer == NULL)
1383                                         goto error;
1384                                 buffer = PyString_AS_STRING(big_buffer);
1385                                 memcpy(buffer, small_buffer, nfilled);
1386                         }
1387                         else {
1388                                 /* Grow the big buffer */
1389                                 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1390                                         goto error;
1391                                 buffer = PyString_AS_STRING(big_buffer);
1392                         }
1393                         continue;
1394                 }
1395                 end = buffer+nfilled+nread;
1396                 q = buffer;
1397                 do {
1398                         /* Process complete lines */
1399                         p++;
1400                         line = PyString_FromStringAndSize(q, p-q);
1401                         if (line == NULL)
1402                                 goto error;
1403                         err = PyList_Append(list, line);
1404                         Py_DECREF(line);
1405                         if (err != 0)
1406                                 goto error;
1407                         q = p;
1408                         p = memchr(q, '\n', end-q);
1409                 } while (p != NULL);
1410                 /* Move the remaining incomplete line to the start */
1411                 nfilled = end-q;
1412                 memmove(buffer, q, nfilled);
1413                 if (sizehint > 0)
1414                         if (totalread >= (size_t)sizehint)
1415                                 break;
1416         }
1417         if (nfilled != 0) {
1418                 /* Partial last line */
1419                 line = PyString_FromStringAndSize(buffer, nfilled);
1420                 if (line == NULL)
1421                         goto error;
1422                 if (sizehint > 0) {
1423                         /* Need to complete the last line */
1424                         PyObject *rest = get_line(f, 0);
1425                         if (rest == NULL) {
1426                                 Py_DECREF(line);
1427                                 goto error;
1428                         }
1429                         PyString_Concat(&line, rest);
1430                         Py_DECREF(rest);
1431                         if (line == NULL)
1432                                 goto error;
1433                 }
1434                 err = PyList_Append(list, line);
1435                 Py_DECREF(line);
1436                 if (err != 0)
1437                         goto error;
1438         }
1439   cleanup:
1440         Py_XDECREF(big_buffer);
1441         return list;
1442 }
1443
1444 static PyObject *
1445 file_write(PyFileObject *f, PyObject *args)
1446 {
1447         char *s;
1448         int n, n2;
1449         if (f->f_fp == NULL)
1450                 return err_closed();
1451         if (!PyArg_ParseTuple(args, f->f_binary ? "s#" : "t#", &s, &n))
1452                 return NULL;
1453         f->f_softspace = 0;
1454         Py_BEGIN_ALLOW_THREADS
1455         errno = 0;
1456         n2 = fwrite(s, 1, n, f->f_fp);
1457         Py_END_ALLOW_THREADS
1458         if (n2 != n) {
1459                 PyErr_SetFromErrno(PyExc_IOError);
1460                 clearerr(f->f_fp);
1461                 return NULL;
1462         }
1463         Py_INCREF(Py_None);
1464         return Py_None;
1465 }
1466
1467 static PyObject *
1468 file_writelines(PyFileObject *f, PyObject *seq)
1469 {
1470 #define CHUNKSIZE 1000
1471         PyObject *list, *line;
1472         PyObject *it;   /* iter(seq) */
1473         PyObject *result;
1474         int i, j, index, len, nwritten, islist;
1475
1476         assert(seq != NULL);
1477         if (f->f_fp == NULL)
1478                 return err_closed();
1479
1480         result = NULL;
1481         list = NULL;
1482         islist = PyList_Check(seq);
1483         if  (islist)
1484                 it = NULL;
1485         else {
1486                 it = PyObject_GetIter(seq);
1487                 if (it == NULL) {
1488                         PyErr_SetString(PyExc_TypeError,
1489                                 "writelines() requires an iterable argument");
1490                         return NULL;
1491                 }
1492                 /* From here on, fail by going to error, to reclaim "it". */
1493                 list = PyList_New(CHUNKSIZE);
1494                 if (list == NULL)
1495                         goto error;
1496         }
1497
1498         /* Strategy: slurp CHUNKSIZE lines into a private list,
1499            checking that they are all strings, then write that list
1500            without holding the interpreter lock, then come back for more. */
1501         for (index = 0; ; index += CHUNKSIZE) {
1502                 if (islist) {
1503                         Py_XDECREF(list);
1504                         list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1505                         if (list == NULL)
1506                                 goto error;
1507                         j = PyList_GET_SIZE(list);
1508                 }
1509                 else {
1510                         for (j = 0; j < CHUNKSIZE; j++) {
1511                                 line = PyIter_Next(it);
1512                                 if (line == NULL) {
1513                                         if (PyErr_Occurred())
1514                                                 goto error;
1515                                         break;
1516                                 }
1517                                 PyList_SetItem(list, j, line);
1518                         }
1519                 }
1520                 if (j == 0)
1521                         break;
1522
1523                 /* Check that all entries are indeed strings. If not,
1524                    apply the same rules as for file.write() and
1525                    convert the results to strings. This is slow, but
1526                    seems to be the only way since all conversion APIs
1527                    could potentially execute Python code. */
1528                 for (i = 0; i < j; i++) {
1529                         PyObject *v = PyList_GET_ITEM(list, i);
1530                         if (!PyString_Check(v)) {
1531                                 const char *buffer;
1532                                 int len;
1533                                 if (((f->f_binary &&
1534                                       PyObject_AsReadBuffer(v,
1535                                               (const void**)&buffer,
1536                                                             &len)) ||
1537                                      PyObject_AsCharBuffer(v,
1538                                                            &buffer,
1539                                                            &len))) {
1540                                         PyErr_SetString(PyExc_TypeError,
1541                         "writelines() argument must be a sequence of strings");
1542                                         goto error;
1543                                 }
1544                                 line = PyString_FromStringAndSize(buffer,
1545                                                                   len);
1546                                 if (line == NULL)
1547                                         goto error;
1548                                 Py_DECREF(v);
1549                                 PyList_SET_ITEM(list, i, line);
1550                         }
1551                 }
1552
1553                 /* Since we are releasing the global lock, the
1554                    following code may *not* execute Python code. */
1555                 Py_BEGIN_ALLOW_THREADS
1556                 f->f_softspace = 0;
1557                 errno = 0;
1558                 for (i = 0; i < j; i++) {
1559                         line = PyList_GET_ITEM(list, i);
1560                         len = PyString_GET_SIZE(line);
1561                         nwritten = fwrite(PyString_AS_STRING(line),
1562                                           1, len, f->f_fp);
1563                         if (nwritten != len) {
1564                                 Py_BLOCK_THREADS
1565                                 PyErr_SetFromErrno(PyExc_IOError);
1566                                 clearerr(f->f_fp);
1567                                 goto error;
1568                         }
1569                 }
1570                 Py_END_ALLOW_THREADS
1571
1572                 if (j < CHUNKSIZE)
1573                         break;
1574         }
1575
1576         Py_INCREF(Py_None);
1577         result = Py_None;
1578   error:
1579         Py_XDECREF(list);
1580         Py_XDECREF(it);
1581         return result;
1582 #undef CHUNKSIZE
1583 }
1584
1585 static PyObject *
1586 file_getiter(PyFileObject *f)
1587 {
1588         if (f->f_fp == NULL)
1589                 return err_closed();
1590         Py_INCREF(f);
1591         return (PyObject *)f;
1592 }
1593
1594 PyDoc_STRVAR(readline_doc,
1595 "readline([size]) -> next line from the file, as a string.\n"
1596 "\n"
1597 "Retain newline.  A non-negative size argument limits the maximum\n"
1598 "number of bytes to return (an incomplete line may be returned then).\n"
1599 "Return an empty string at EOF.");
1600
1601 PyDoc_STRVAR(read_doc,
1602 "read([size]) -> read at most size bytes, returned as a string.\n"
1603 "\n"
1604 "If the size argument is negative or omitted, read until EOF is reached.\n"
1605 "Notice that when in non-blocking mode, less data than what was requested\n"
1606 "may be returned, even if no size parameter was given.");
1607
1608 PyDoc_STRVAR(write_doc,
1609 "write(str) -> None.  Write string str to file.\n"
1610 "\n"
1611 "Note that due to buffering, flush() or close() may be needed before\n"
1612 "the file on disk reflects the data written.");
1613
1614 PyDoc_STRVAR(fileno_doc,
1615 "fileno() -> integer \"file descriptor\".\n"
1616 "\n"
1617 "This is needed for lower-level file interfaces, such os.read().");
1618
1619 PyDoc_STRVAR(seek_doc,
1620 "seek(offset[, whence]) -> None.  Move to new file position.\n"
1621 "\n"
1622 "Argument offset is a byte count.  Optional argument whence defaults to\n"
1623 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
1624 "(move relative to current position, positive or negative), and 2 (move\n"
1625 "relative to end of file, usually negative, although many platforms allow\n"
1626 "seeking beyond the end of a file).  If the file is opened in text mode,\n"
1627 "only offsets returned by tell() are legal.  Use of other offsets causes\n"
1628 "undefined behavior."
1629 "\n"
1630 "Note that not all file objects are seekable.");
1631
1632 #ifdef HAVE_FTRUNCATE
1633 PyDoc_STRVAR(truncate_doc,
1634 "truncate([size]) -> None.  Truncate the file to at most size bytes.\n"
1635 "\n"
1636 "Size defaults to the current file position, as returned by tell().");
1637 #endif
1638
1639 PyDoc_STRVAR(tell_doc,
1640 "tell() -> current file position, an integer (may be a long integer).");
1641
1642 PyDoc_STRVAR(readinto_doc,
1643 "readinto() -> Undocumented.  Don't use this; it may go away.");
1644
1645 PyDoc_STRVAR(readlines_doc,
1646 "readlines([size]) -> list of strings, each a line from the file.\n"
1647 "\n"
1648 "Call readline() repeatedly and return a list of the lines so read.\n"
1649 "The optional size argument, if given, is an approximate bound on the\n"
1650 "total number of bytes in the lines returned.");
1651
1652 PyDoc_STRVAR(xreadlines_doc,
1653 "xreadlines() -> returns self.\n"
1654 "\n"
1655 "For backward compatibility. File objects now include the performance\n"
1656 "optimizations previously implemented in the xreadlines module.");
1657
1658 PyDoc_STRVAR(writelines_doc,
1659 "writelines(sequence_of_strings) -> None.  Write the strings to the file.\n"
1660 "\n"
1661 "Note that newlines are not added.  The sequence can be any iterable object\n"
1662 "producing strings. This is equivalent to calling write() for each string.");
1663
1664 PyDoc_STRVAR(flush_doc,
1665 "flush() -> None.  Flush the internal I/O buffer.");
1666
1667 PyDoc_STRVAR(close_doc,
1668 "close() -> None or (perhaps) an integer.  Close the file.\n"
1669 "\n"
1670 "Sets data attribute .closed to True.  A closed file cannot be used for\n"
1671 "further I/O operations.  close() may be called more than once without\n"
1672 "error.  Some kinds of file objects (for example, opened by popen())\n"
1673 "may return an exit status upon closing.");
1674
1675 PyDoc_STRVAR(isatty_doc,
1676 "isatty() -> true or false.  True if the file is connected to a tty device.");
1677
1678 static PyMethodDef file_methods[] = {
1679         {"readline",  (PyCFunction)file_readline, METH_VARARGS, readline_doc},
1680         {"read",      (PyCFunction)file_read,     METH_VARARGS, read_doc},
1681         {"write",     (PyCFunction)file_write,    METH_VARARGS, write_doc},
1682         {"fileno",    (PyCFunction)file_fileno,   METH_NOARGS,  fileno_doc},
1683         {"seek",      (PyCFunction)file_seek,     METH_VARARGS, seek_doc},
1684 #ifdef HAVE_FTRUNCATE
1685         {"truncate",  (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
1686 #endif
1687         {"tell",      (PyCFunction)file_tell,     METH_NOARGS,  tell_doc},
1688         {"readinto",  (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
1689         {"readlines", (PyCFunction)file_readlines,METH_VARARGS, readlines_doc},
1690         {"xreadlines",(PyCFunction)file_getiter,  METH_NOARGS, xreadlines_doc},
1691         {"writelines",(PyCFunction)file_writelines, METH_O,    writelines_doc},
1692         {"flush",     (PyCFunction)file_flush,    METH_NOARGS,  flush_doc},
1693         {"close",     (PyCFunction)file_close,    METH_NOARGS,  close_doc},
1694         {"isatty",    (PyCFunction)file_isatty,   METH_NOARGS,  isatty_doc},
1695         {NULL,        NULL}             /* sentinel */
1696 };
1697
1698 #define OFF(x) offsetof(PyFileObject, x)
1699
1700 static PyMemberDef file_memberlist[] = {
1701         {"softspace",   T_INT,          OFF(f_softspace), 0,
1702          "flag indicating that a space needs to be printed; used by print"},
1703         {"mode",        T_OBJECT,       OFF(f_mode),    RO,
1704          "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
1705         {"name",        T_OBJECT,       OFF(f_name),    RO,
1706          "file name"},
1707         {"encoding",    T_OBJECT,       OFF(f_encoding),        RO,
1708          "file encoding"},
1709         /* getattr(f, "closed") is implemented without this table */
1710         {NULL}  /* Sentinel */
1711 };
1712
1713 static PyObject *
1714 get_closed(PyFileObject *f, void *closure)
1715 {
1716         return PyBool_FromLong((long)(f->f_fp == 0));
1717 }
1718 #ifdef WITH_UNIVERSAL_NEWLINES
1719 static PyObject *
1720 get_newlines(PyFileObject *f, void *closure)
1721 {
1722         switch (f->f_newlinetypes) {
1723         case NEWLINE_UNKNOWN:
1724                 Py_INCREF(Py_None);
1725                 return Py_None;
1726         case NEWLINE_CR:
1727                 return PyString_FromString("\r");
1728         case NEWLINE_LF:
1729                 return PyString_FromString("\n");
1730         case NEWLINE_CR|NEWLINE_LF:
1731                 return Py_BuildValue("(ss)", "\r", "\n");
1732         case NEWLINE_CRLF:
1733                 return PyString_FromString("\r\n");
1734         case NEWLINE_CR|NEWLINE_CRLF:
1735                 return Py_BuildValue("(ss)", "\r", "\r\n");
1736         case NEWLINE_LF|NEWLINE_CRLF:
1737                 return Py_BuildValue("(ss)", "\n", "\r\n");
1738         case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1739                 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1740         default:
1741                 PyErr_Format(PyExc_SystemError,
1742                              "Unknown newlines value 0x%x\n",
1743                              f->f_newlinetypes);
1744                 return NULL;
1745         }
1746 }
1747 #endif
1748
1749 static PyGetSetDef file_getsetlist[] = {
1750         {"closed", (getter)get_closed, NULL, "True if the file is closed"},
1751 #ifdef WITH_UNIVERSAL_NEWLINES
1752         {"newlines", (getter)get_newlines, NULL,
1753          "end-of-line convention used in this file"},
1754 #endif
1755         {0},
1756 };
1757
1758 static void
1759 drop_readahead(PyFileObject *f)
1760 {
1761         if (f->f_buf != NULL) {
1762                 PyMem_Free(f->f_buf);
1763                 f->f_buf = NULL;
1764         }
1765 }
1766
1767 /* Make sure that file has a readahead buffer with at least one byte
1768    (unless at EOF) and no more than bufsize.  Returns negative value on
1769    error */
1770 static int
1771 readahead(PyFileObject *f, int bufsize)
1772 {
1773         int chunksize;
1774
1775         if (f->f_buf != NULL) {
1776                 if( (f->f_bufend - f->f_bufptr) >= 1)
1777                         return 0;
1778                 else
1779                         drop_readahead(f);
1780         }
1781         if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
1782                 return -1;
1783         }
1784         Py_BEGIN_ALLOW_THREADS
1785         errno = 0;
1786         chunksize = Py_UniversalNewlineFread(
1787                 f->f_buf, bufsize, f->f_fp, (PyObject *)f);
1788         Py_END_ALLOW_THREADS
1789         if (chunksize == 0) {
1790                 if (ferror(f->f_fp)) {
1791                         PyErr_SetFromErrno(PyExc_IOError);
1792                         clearerr(f->f_fp);
1793                         drop_readahead(f);
1794                         return -1;
1795                 }
1796         }
1797         f->f_bufptr = f->f_buf;
1798         f->f_bufend = f->f_buf + chunksize;
1799         return 0;
1800 }
1801
1802 /* Used by file_iternext.  The returned string will start with 'skip'
1803    uninitialized bytes followed by the remainder of the line. Don't be
1804    horrified by the recursive call: maximum recursion depth is limited by
1805    logarithmic buffer growth to about 50 even when reading a 1gb line. */
1806
1807 static PyStringObject *
1808 readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
1809 {
1810         PyStringObject* s;
1811         char *bufptr;
1812         char *buf;
1813         int len;
1814
1815         if (f->f_buf == NULL)
1816                 if (readahead(f, bufsize) < 0)
1817                         return NULL;
1818
1819         len = f->f_bufend - f->f_bufptr;
1820         if (len == 0)
1821                 return (PyStringObject *)
1822                         PyString_FromStringAndSize(NULL, skip);
1823         bufptr = memchr(f->f_bufptr, '\n', len);
1824         if (bufptr != NULL) {
1825                 bufptr++;                       /* Count the '\n' */
1826                 len = bufptr - f->f_bufptr;
1827                 s = (PyStringObject *)
1828                         PyString_FromStringAndSize(NULL, skip+len);
1829                 if (s == NULL)
1830                         return NULL;
1831                 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
1832                 f->f_bufptr = bufptr;
1833                 if (bufptr == f->f_bufend)
1834                         drop_readahead(f);
1835         } else {
1836                 bufptr = f->f_bufptr;
1837                 buf = f->f_buf;
1838                 f->f_buf = NULL;        /* Force new readahead buffer */
1839                 s = readahead_get_line_skip(
1840                         f, skip+len, bufsize + (bufsize>>2) );
1841                 if (s == NULL) {
1842                         PyMem_Free(buf);
1843                         return NULL;
1844                 }
1845                 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
1846                 PyMem_Free(buf);
1847         }
1848         return s;
1849 }
1850
1851 /* A larger buffer size may actually decrease performance. */
1852 #define READAHEAD_BUFSIZE 8192
1853
1854 static PyObject *
1855 file_iternext(PyFileObject *f)
1856 {
1857         PyStringObject* l;
1858
1859         if (f->f_fp == NULL)
1860                 return err_closed();
1861
1862         l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
1863         if (l == NULL || PyString_GET_SIZE(l) == 0) {
1864                 Py_XDECREF(l);
1865                 return NULL;
1866         }
1867         return (PyObject *)l;
1868 }
1869
1870
1871 static PyObject *
1872 file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1873 {
1874         PyObject *self;
1875         static PyObject *not_yet_string;
1876
1877         assert(type != NULL && type->tp_alloc != NULL);
1878
1879         if (not_yet_string == NULL) {
1880                 not_yet_string = PyString_FromString("<uninitialized file>");
1881                 if (not_yet_string == NULL)
1882                         return NULL;
1883         }
1884
1885         self = type->tp_alloc(type, 0);
1886         if (self != NULL) {
1887                 /* Always fill in the name and mode, so that nobody else
1888                    needs to special-case NULLs there. */
1889                 Py_INCREF(not_yet_string);
1890                 ((PyFileObject *)self)->f_name = not_yet_string;
1891                 Py_INCREF(not_yet_string);
1892                 ((PyFileObject *)self)->f_mode = not_yet_string;
1893                 Py_INCREF(Py_None);
1894                 ((PyFileObject *)self)->f_encoding = Py_None;
1895         }
1896         return self;
1897 }
1898
1899 static int
1900 file_init(PyObject *self, PyObject *args, PyObject *kwds)
1901 {
1902         PyFileObject *foself = (PyFileObject *)self;
1903         int ret = 0;
1904         static char *kwlist[] = {"name", "mode", "buffering", 0};
1905         char *name = NULL;
1906         char *mode = "r";
1907         int bufsize = -1;
1908         int wideargument = 0;
1909
1910         assert(PyFile_Check(self));
1911         if (foself->f_fp != NULL) {
1912                 /* Have to close the existing file first. */
1913                 PyObject *closeresult = file_close(foself);
1914                 if (closeresult == NULL)
1915                         return -1;
1916                 Py_DECREF(closeresult);
1917         }
1918
1919 #ifdef Py_WIN_WIDE_FILENAMES
1920         if (GetVersion() < 0x80000000) {    /* On NT, so wide API available */
1921                 PyObject *po;
1922                 if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
1923                                                 kwlist, &po, &mode, &bufsize)) {
1924                         wideargument = 1;
1925                         if (fill_file_fields(foself, NULL, name, mode,
1926                                              fclose, po) == NULL)
1927                                 goto Error;
1928                 } else {
1929                         /* Drop the argument parsing error as narrow
1930                            strings are also valid. */
1931                         PyErr_Clear();
1932                 }
1933         }
1934 #endif
1935
1936         if (!wideargument) {
1937                 if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
1938                                                  Py_FileSystemDefaultEncoding,
1939                                                  &name,
1940                                                  &mode, &bufsize))
1941                         return -1;
1942                 if (fill_file_fields(foself, NULL, name, mode,
1943                                      fclose, NULL) == NULL)
1944                         goto Error;
1945         }
1946         if (open_the_file(foself, name, mode) == NULL)
1947                 goto Error;
1948         foself->f_setbuf = NULL;
1949         PyFile_SetBufSize(self, bufsize);
1950         goto Done;
1951
1952 Error:
1953         ret = -1;
1954         /* fall through */
1955 Done:
1956         PyMem_Free(name); /* free the encoded string */
1957         return ret;
1958 }
1959
1960 PyDoc_VAR(file_doc) =
1961 PyDoc_STR(
1962 "file(name[, mode[, buffering]]) -> file object\n"
1963 "\n"
1964 "Open a file.  The mode can be 'r', 'w' or 'a' for reading (default),\n"
1965 "writing or appending.  The file will be created if it doesn't exist\n"
1966 "when opened for writing or appending; it will be truncated when\n"
1967 "opened for writing.  Add a 'b' to the mode for binary files.\n"
1968 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
1969 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
1970 "buffered, and larger numbers specify the buffer size.\n"
1971 )
1972 #ifdef WITH_UNIVERSAL_NEWLINES
1973 PyDoc_STR(
1974 "Add a 'U' to mode to open the file for input with universal newline\n"
1975 "support.  Any line ending in the input file will be seen as a '\\n'\n"
1976 "in Python.  Also, a file so opened gains the attribute 'newlines';\n"
1977 "the value for this attribute is one of None (no newline read yet),\n"
1978 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
1979 "\n"
1980 "'U' cannot be combined with 'w' or '+' mode.\n"
1981 )
1982 #endif /* WITH_UNIVERSAL_NEWLINES */
1983 PyDoc_STR(
1984 "\n"
1985 "Note:  open() is an alias for file()."
1986 );
1987
1988 PyTypeObject PyFile_Type = {
1989         PyObject_HEAD_INIT(&PyType_Type)
1990         0,
1991         "file",
1992         sizeof(PyFileObject),
1993         0,
1994         (destructor)file_dealloc,               /* tp_dealloc */
1995         0,                                      /* tp_print */
1996         0,                                      /* tp_getattr */
1997         0,                                      /* tp_setattr */
1998         0,                                      /* tp_compare */
1999         (reprfunc)file_repr,                    /* tp_repr */
2000         0,                                      /* tp_as_number */
2001         0,                                      /* tp_as_sequence */
2002         0,                                      /* tp_as_mapping */
2003         0,                                      /* tp_hash */
2004         0,                                      /* tp_call */
2005         0,                                      /* tp_str */
2006         PyObject_GenericGetAttr,                /* tp_getattro */
2007         /* softspace is writable:  we must supply tp_setattro */
2008         PyObject_GenericSetAttr,                /* tp_setattro */
2009         0,                                      /* tp_as_buffer */
2010         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
2011         file_doc,                               /* tp_doc */
2012         0,                                      /* tp_traverse */
2013         0,                                      /* tp_clear */
2014         0,                                      /* tp_richcompare */
2015         0,                                      /* tp_weaklistoffset */
2016         (getiterfunc)file_getiter,              /* tp_iter */
2017         (iternextfunc)file_iternext,            /* tp_iternext */
2018         file_methods,                           /* tp_methods */
2019         file_memberlist,                        /* tp_members */
2020         file_getsetlist,                        /* tp_getset */
2021         0,                                      /* tp_base */
2022         0,                                      /* tp_dict */
2023         0,                                      /* tp_descr_get */
2024         0,                                      /* tp_descr_set */
2025         0,                                      /* tp_dictoffset */
2026         (initproc)file_init,                    /* tp_init */
2027         PyType_GenericAlloc,                    /* tp_alloc */
2028         file_new,                               /* tp_new */
2029         PyObject_Del,                           /* tp_free */
2030 };
2031
2032 /* Interface for the 'soft space' between print items. */
2033
2034 int
2035 PyFile_SoftSpace(PyObject *f, int newflag)
2036 {
2037         int oldflag = 0;
2038         if (f == NULL) {
2039                 /* Do nothing */
2040         }
2041         else if (PyFile_Check(f)) {
2042                 oldflag = ((PyFileObject *)f)->f_softspace;
2043                 ((PyFileObject *)f)->f_softspace = newflag;
2044         }
2045         else {
2046                 PyObject *v;
2047                 v = PyObject_GetAttrString(f, "softspace");
2048                 if (v == NULL)
2049                         PyErr_Clear();
2050                 else {
2051                         if (PyInt_Check(v))
2052                                 oldflag = PyInt_AsLong(v);
2053                         Py_DECREF(v);
2054                 }
2055                 v = PyInt_FromLong((long)newflag);
2056                 if (v == NULL)
2057                         PyErr_Clear();
2058                 else {
2059                         if (PyObject_SetAttrString(f, "softspace", v) != 0)
2060                                 PyErr_Clear();
2061                         Py_DECREF(v);
2062                 }
2063         }
2064         return oldflag;
2065 }
2066
2067 /* Interfaces to write objects/strings to file-like objects */
2068
2069 int
2070 PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
2071 {
2072         PyObject *writer, *value, *args, *result;
2073         if (f == NULL) {
2074                 PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2075                 return -1;
2076         }
2077         else if (PyFile_Check(f)) {
2078                 FILE *fp = PyFile_AsFile(f);
2079 #ifdef Py_USING_UNICODE
2080                 PyObject *enc = ((PyFileObject*)f)->f_encoding;
2081                 int result;
2082 #endif
2083                 if (fp == NULL) {
2084                         err_closed();
2085                         return -1;
2086                 }
2087 #ifdef Py_USING_UNICODE
2088                 if ((flags & Py_PRINT_RAW) &&
2089                     PyUnicode_Check(v) && enc != Py_None) {
2090                         char *cenc = PyString_AS_STRING(enc);
2091                         value = PyUnicode_AsEncodedString(v, cenc, "strict");
2092                         if (value == NULL)
2093                                 return -1;
2094                 } else {
2095                         value = v;
2096                         Py_INCREF(value);
2097                 }
2098                 result = PyObject_Print(value, fp, flags);
2099                 Py_DECREF(value);
2100                 return result;
2101 #else
2102                 return PyObject_Print(v, fp, flags);
2103 #endif
2104         }
2105         writer = PyObject_GetAttrString(f, "write");
2106         if (writer == NULL)
2107                 return -1;
2108         if (flags & Py_PRINT_RAW) {
2109                 if (PyUnicode_Check(v)) {
2110                         value = v;
2111                         Py_INCREF(value);
2112                 } else
2113                         value = PyObject_Str(v);
2114         }
2115         else
2116                 value = PyObject_Repr(v);
2117         if (value == NULL) {
2118                 Py_DECREF(writer);
2119                 return -1;
2120         }
2121         args = Py_BuildValue("(O)", value);
2122         if (args == NULL) {
2123                 Py_DECREF(value);
2124                 Py_DECREF(writer);
2125                 return -1;
2126         }
2127         result = PyEval_CallObject(writer, args);
2128         Py_DECREF(args);
2129         Py_DECREF(value);
2130         Py_DECREF(writer);
2131         if (result == NULL)
2132                 return -1;
2133         Py_DECREF(result);
2134         return 0;
2135 }
2136
2137 int
2138 PyFile_WriteString(const char *s, PyObject *f)
2139 {
2140         if (f == NULL) {
2141                 /* Should be caused by a pre-existing error */
2142                 if (!PyErr_Occurred())
2143                         PyErr_SetString(PyExc_SystemError,
2144                                         "null file for PyFile_WriteString");
2145                 return -1;
2146         }
2147         else if (PyFile_Check(f)) {
2148                 FILE *fp = PyFile_AsFile(f);
2149                 if (fp == NULL) {
2150                         err_closed();
2151                         return -1;
2152                 }
2153                 fputs(s, fp);
2154                 return 0;
2155         }
2156         else if (!PyErr_Occurred()) {
2157                 PyObject *v = PyString_FromString(s);
2158                 int err;
2159                 if (v == NULL)
2160                         return -1;
2161                 err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2162                 Py_DECREF(v);
2163                 return err;
2164         }
2165         else
2166                 return -1;
2167 }
2168
2169 /* Try to get a file-descriptor from a Python object.  If the object
2170    is an integer or long integer, its value is returned.  If not, the
2171    object's fileno() method is called if it exists; the method must return
2172    an integer or long integer, which is returned as the file descriptor value.
2173    -1 is returned on failure.
2174 */
2175
2176 int PyObject_AsFileDescriptor(PyObject *o)
2177 {
2178         int fd;
2179         PyObject *meth;
2180
2181         if (PyInt_Check(o)) {
2182                 fd = PyInt_AsLong(o);
2183         }
2184         else if (PyLong_Check(o)) {
2185                 fd = PyLong_AsLong(o);
2186         }
2187         else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2188         {
2189                 PyObject *fno = PyEval_CallObject(meth, NULL);
2190                 Py_DECREF(meth);
2191                 if (fno == NULL)
2192                         return -1;
2193
2194                 if (PyInt_Check(fno)) {
2195                         fd = PyInt_AsLong(fno);
2196                         Py_DECREF(fno);
2197                 }
2198                 else if (PyLong_Check(fno)) {
2199                         fd = PyLong_AsLong(fno);
2200                         Py_DECREF(fno);
2201                 }
2202                 else {
2203                         PyErr_SetString(PyExc_TypeError,
2204                                         "fileno() returned a non-integer");
2205                         Py_DECREF(fno);
2206                         return -1;
2207                 }
2208         }
2209         else {
2210                 PyErr_SetString(PyExc_TypeError,
2211                                 "argument must be an int, or have a fileno() method.");
2212                 return -1;
2213         }
2214
2215         if (fd < 0) {
2216                 PyErr_Format(PyExc_ValueError,
2217                              "file descriptor cannot be a negative integer (%i)",
2218                              fd);
2219                 return -1;
2220         }
2221         return fd;
2222 }
2223
2224 #ifdef WITH_UNIVERSAL_NEWLINES
2225 /* From here on we need access to the real fgets and fread */
2226 #undef fgets
2227 #undef fread
2228
2229 /*
2230 ** Py_UniversalNewlineFgets is an fgets variation that understands
2231 ** all of \r, \n and \r\n conventions.
2232 ** The stream should be opened in binary mode.
2233 ** If fobj is NULL the routine always does newline conversion, and
2234 ** it may peek one char ahead to gobble the second char in \r\n.
2235 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2236 ** is no readahead but in stead a flag is used to skip a following
2237 ** \n on the next read. Also, if the file is open in binary mode
2238 ** the whole conversion is skipped. Finally, the routine keeps track of
2239 ** the different types of newlines seen.
2240 ** Note that we need no error handling: fgets() treats error and eof
2241 ** identically.
2242 */
2243 char *
2244 Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2245 {
2246         char *p = buf;
2247         int c;
2248         int newlinetypes = 0;
2249         int skipnextlf = 0;
2250         int univ_newline = 1;
2251
2252         if (fobj) {
2253                 if (!PyFile_Check(fobj)) {
2254                         errno = ENXIO;  /* What can you do... */
2255                         return NULL;
2256                 }
2257                 univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2258                 if ( !univ_newline )
2259                         return fgets(buf, n, stream);
2260                 newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2261                 skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2262         }
2263         FLOCKFILE(stream);
2264         c = 'x'; /* Shut up gcc warning */
2265         while (--n > 0 && (c = GETC(stream)) != EOF ) {
2266                 if (skipnextlf ) {
2267                         skipnextlf = 0;
2268                         if (c == '\n') {
2269                                 /* Seeing a \n here with skipnextlf true
2270                                 ** means we saw a \r before.
2271                                 */
2272                                 newlinetypes |= NEWLINE_CRLF;
2273                                 c = GETC(stream);
2274                                 if (c == EOF) break;
2275                         } else {
2276                                 /*
2277                                 ** Note that c == EOF also brings us here,
2278                                 ** so we're okay if the last char in the file
2279                                 ** is a CR.
2280                                 */
2281                                 newlinetypes |= NEWLINE_CR;
2282                         }
2283                 }
2284                 if (c == '\r') {
2285                         /* A \r is translated into a \n, and we skip
2286                         ** an adjacent \n, if any. We don't set the
2287                         ** newlinetypes flag until we've seen the next char.
2288                         */
2289                         skipnextlf = 1;
2290                         c = '\n';
2291                 } else if ( c == '\n') {
2292                         newlinetypes |= NEWLINE_LF;
2293                 }
2294                 *p++ = c;
2295                 if (c == '\n') break;
2296         }
2297         if ( c == EOF && skipnextlf )
2298                 newlinetypes |= NEWLINE_CR;
2299         FUNLOCKFILE(stream);
2300         *p = '\0';
2301         if (fobj) {
2302                 ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2303                 ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2304         } else if ( skipnextlf ) {
2305                 /* If we have no file object we cannot save the
2306                 ** skipnextlf flag. We have to readahead, which
2307                 ** will cause a pause if we're reading from an
2308                 ** interactive stream, but that is very unlikely
2309                 ** unless we're doing something silly like
2310                 ** execfile("/dev/tty").
2311                 */
2312                 c = GETC(stream);
2313                 if ( c != '\n' )
2314                         ungetc(c, stream);
2315         }
2316         if (p == buf)
2317                 return NULL;
2318         return buf;
2319 }
2320
2321 /*
2322 ** Py_UniversalNewlineFread is an fread variation that understands
2323 ** all of \r, \n and \r\n conventions.
2324 ** The stream should be opened in binary mode.
2325 ** fobj must be a PyFileObject. In this case there
2326 ** is no readahead but in stead a flag is used to skip a following
2327 ** \n on the next read. Also, if the file is open in binary mode
2328 ** the whole conversion is skipped. Finally, the routine keeps track of
2329 ** the different types of newlines seen.
2330 */
2331 size_t
2332 Py_UniversalNewlineFread(char *buf, size_t n,
2333                          FILE *stream, PyObject *fobj)
2334 {
2335         char *dst = buf;
2336         PyFileObject *f = (PyFileObject *)fobj;
2337         int newlinetypes, skipnextlf;
2338
2339         assert(buf != NULL);
2340         assert(stream != NULL);
2341
2342         if (!fobj || !PyFile_Check(fobj)) {
2343                 errno = ENXIO;  /* What can you do... */
2344                 return 0;
2345         }
2346         if (!f->f_univ_newline)
2347                 return fread(buf, 1, n, stream);
2348         newlinetypes = f->f_newlinetypes;
2349         skipnextlf = f->f_skipnextlf;
2350         /* Invariant:  n is the number of bytes remaining to be filled
2351          * in the buffer.
2352          */
2353         while (n) {
2354                 size_t nread;
2355                 int shortread;
2356                 char *src = dst;
2357
2358                 nread = fread(dst, 1, n, stream);
2359                 assert(nread <= n);
2360                 if (nread == 0)
2361                         break;
2362
2363                 n -= nread; /* assuming 1 byte out for each in; will adjust */
2364                 shortread = n != 0;     /* true iff EOF or error */
2365                 while (nread--) {
2366                         char c = *src++;
2367                         if (c == '\r') {
2368                                 /* Save as LF and set flag to skip next LF. */
2369                                 *dst++ = '\n';
2370                                 skipnextlf = 1;
2371                         }
2372                         else if (skipnextlf && c == '\n') {
2373                                 /* Skip LF, and remember we saw CR LF. */
2374                                 skipnextlf = 0;
2375                                 newlinetypes |= NEWLINE_CRLF;
2376                                 ++n;
2377                         }
2378                         else {
2379                                 /* Normal char to be stored in buffer.  Also
2380                                  * update the newlinetypes flag if either this
2381                                  * is an LF or the previous char was a CR.
2382                                  */
2383                                 if (c == '\n')
2384                                         newlinetypes |= NEWLINE_LF;
2385                                 else if (skipnextlf)
2386                                         newlinetypes |= NEWLINE_CR;
2387                                 *dst++ = c;
2388                                 skipnextlf = 0;
2389                         }
2390                 }
2391                 if (shortread) {
2392                         /* If this is EOF, update type flags. */
2393                         if (skipnextlf && feof(stream))
2394                                 newlinetypes |= NEWLINE_CR;
2395                         break;
2396                 }
2397         }
2398         f->f_newlinetypes = newlinetypes;
2399         f->f_skipnextlf = skipnextlf;
2400         return dst - buf;
2401 }
2402 #endif