Whitespace normalization.
[python/dscho.git] / Objects / fileobject.c
blob34f28e32ac550a4e937e7bb94667090568eda529
1 /* File object implementation */
3 #include "Python.h"
4 #include "structmember.h"
6 #ifndef DONT_HAVE_SYS_TYPES_H
7 #include <sys/types.h>
8 #endif /* DONT_HAVE_SYS_TYPES_H */
10 #ifdef MS_WINDOWS
11 #define fileno _fileno
12 /* can simulate truncate with Win32 API functions; see file_truncate */
13 #define HAVE_FTRUNCATE
14 #define WIN32_LEAN_AND_MEAN
15 #include <windows.h>
16 #endif
18 #ifdef _MSC_VER
19 /* Need GetVersion to see if on NT so safe to use _wfopen */
20 #define WIN32_LEAN_AND_MEAN
21 #include <windows.h>
22 #endif /* _MSC_VER */
24 #if defined(PYOS_OS2) && defined(PYCC_GCC)
25 #include <io.h>
26 #endif
28 #define BUF(v) PyString_AS_STRING((PyStringObject *)v)
30 #ifndef DONT_HAVE_ERRNO_H
31 #include <errno.h>
32 #endif
34 #ifdef HAVE_GETC_UNLOCKED
35 #define GETC(f) getc_unlocked(f)
36 #define FLOCKFILE(f) flockfile(f)
37 #define FUNLOCKFILE(f) funlockfile(f)
38 #else
39 #define GETC(f) getc(f)
40 #define FLOCKFILE(f)
41 #define FUNLOCKFILE(f)
42 #endif
44 /* Bits in f_newlinetypes */
45 #define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
46 #define NEWLINE_CR 1 /* \r newline seen */
47 #define NEWLINE_LF 2 /* \n newline seen */
48 #define NEWLINE_CRLF 4 /* \r\n newline seen */
50 FILE *
51 PyFile_AsFile(PyObject *f)
53 if (f == NULL || !PyFile_Check(f))
54 return NULL;
55 else
56 return ((PyFileObject *)f)->f_fp;
59 PyObject *
60 PyFile_Name(PyObject *f)
62 if (f == NULL || !PyFile_Check(f))
63 return NULL;
64 else
65 return ((PyFileObject *)f)->f_name;
68 /* On Unix, fopen will succeed for directories.
69 In Python, there should be no file objects referring to
70 directories, so we need a check. */
72 static PyFileObject*
73 dircheck(PyFileObject* f)
75 #if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
76 struct stat buf;
77 if (f->f_fp == NULL)
78 return f;
79 if (fstat(fileno(f->f_fp), &buf) == 0 &&
80 S_ISDIR(buf.st_mode)) {
81 #ifdef HAVE_STRERROR
82 char *msg = strerror(EISDIR);
83 #else
84 char *msg = "Is a directory";
85 #endif
86 PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(is)",
87 EISDIR, msg);
88 PyErr_SetObject(PyExc_IOError, exc);
89 Py_XDECREF(exc);
90 return NULL;
92 #endif
93 return f;
97 static PyObject *
98 fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
99 int (*close)(FILE *))
101 assert(f != NULL);
102 assert(PyFile_Check(f));
103 assert(f->f_fp == NULL);
105 Py_DECREF(f->f_name);
106 Py_DECREF(f->f_mode);
107 Py_DECREF(f->f_encoding);
109 Py_INCREF (name);
110 f->f_name = name;
112 f->f_mode = PyString_FromString(mode);
114 f->f_close = close;
115 f->f_softspace = 0;
116 f->f_binary = strchr(mode,'b') != NULL;
117 f->f_buf = NULL;
118 f->f_univ_newline = (strchr(mode, 'U') != NULL);
119 f->f_newlinetypes = NEWLINE_UNKNOWN;
120 f->f_skipnextlf = 0;
121 Py_INCREF(Py_None);
122 f->f_encoding = Py_None;
124 if (f->f_name == NULL || f->f_mode == NULL)
125 return NULL;
126 f->f_fp = fp;
127 f = dircheck(f);
128 return (PyObject *) f;
131 static PyObject *
132 open_the_file(PyFileObject *f, char *name, char *mode)
134 assert(f != NULL);
135 assert(PyFile_Check(f));
136 #ifdef MS_WINDOWS
137 /* windows ignores the passed name in order to support Unicode */
138 assert(f->f_name != NULL);
139 #else
140 assert(name != NULL);
141 #endif
142 assert(mode != NULL);
143 assert(f->f_fp == NULL);
145 /* rexec.py can't stop a user from getting the file() constructor --
146 all they have to do is get *any* file object f, and then do
147 type(f). Here we prevent them from doing damage with it. */
148 if (PyEval_GetRestricted()) {
149 PyErr_SetString(PyExc_IOError,
150 "file() constructor not accessible in restricted mode");
151 return NULL;
153 errno = 0;
155 if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
156 mode = "rb";
157 #ifdef MS_WINDOWS
158 if (PyUnicode_Check(f->f_name)) {
159 PyObject *wmode;
160 wmode = PyUnicode_DecodeASCII(mode, strlen(mode), NULL);
161 if (f->f_name && wmode) {
162 Py_BEGIN_ALLOW_THREADS
163 /* PyUnicode_AS_UNICODE OK without thread
164 lock as it is a simple dereference. */
165 f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
166 PyUnicode_AS_UNICODE(wmode));
167 Py_END_ALLOW_THREADS
169 Py_XDECREF(wmode);
171 #endif
172 if (NULL == f->f_fp && NULL != name) {
173 Py_BEGIN_ALLOW_THREADS
174 f->f_fp = fopen(name, mode);
175 Py_END_ALLOW_THREADS
178 if (f->f_fp == NULL) {
179 #ifdef _MSC_VER
180 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
181 * across all Windows flavors. When it sets EINVAL varies
182 * across Windows flavors, the exact conditions aren't
183 * documented, and the answer lies in the OS's implementation
184 * of Win32's CreateFile function (whose source is secret).
185 * Seems the best we can do is map EINVAL to ENOENT.
187 if (errno == 0) /* bad mode string */
188 errno = EINVAL;
189 else if (errno == EINVAL) /* unknown, but not a mode string */
190 errno = ENOENT;
191 #endif
192 if (errno == EINVAL)
193 PyErr_Format(PyExc_IOError, "invalid mode: %s",
194 mode);
195 else
196 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
197 f = NULL;
199 if (f != NULL)
200 f = dircheck(f);
201 return (PyObject *)f;
204 PyObject *
205 PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
207 PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
208 NULL, NULL);
209 if (f != NULL) {
210 PyObject *o_name = PyString_FromString(name);
211 if (fill_file_fields(f, fp, o_name, mode, close) == NULL) {
212 Py_DECREF(f);
213 f = NULL;
215 Py_DECREF(o_name);
217 return (PyObject *) f;
220 PyObject *
221 PyFile_FromString(char *name, char *mode)
223 extern int fclose(FILE *);
224 PyFileObject *f;
226 f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
227 if (f != NULL) {
228 if (open_the_file(f, name, mode) == NULL) {
229 Py_DECREF(f);
230 f = NULL;
233 return (PyObject *)f;
236 void
237 PyFile_SetBufSize(PyObject *f, int bufsize)
239 PyFileObject *file = (PyFileObject *)f;
240 if (bufsize >= 0) {
241 int type;
242 switch (bufsize) {
243 case 0:
244 type = _IONBF;
245 break;
246 #ifdef HAVE_SETVBUF
247 case 1:
248 type = _IOLBF;
249 bufsize = BUFSIZ;
250 break;
251 #endif
252 default:
253 type = _IOFBF;
254 #ifndef HAVE_SETVBUF
255 bufsize = BUFSIZ;
256 #endif
257 break;
259 fflush(file->f_fp);
260 if (type == _IONBF) {
261 PyMem_Free(file->f_setbuf);
262 file->f_setbuf = NULL;
263 } else {
264 file->f_setbuf = PyMem_Realloc(file->f_setbuf, bufsize);
266 #ifdef HAVE_SETVBUF
267 setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
268 #else /* !HAVE_SETVBUF */
269 setbuf(file->f_fp, file->f_setbuf);
270 #endif /* !HAVE_SETVBUF */
274 /* Set the encoding used to output Unicode strings.
275 Returh 1 on success, 0 on failure. */
278 PyFile_SetEncoding(PyObject *f, const char *enc)
280 PyFileObject *file = (PyFileObject*)f;
281 PyObject *str = PyString_FromString(enc);
282 if (!str)
283 return 0;
284 Py_DECREF(file->f_encoding);
285 file->f_encoding = str;
286 return 1;
289 static PyObject *
290 err_closed(void)
292 PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
293 return NULL;
296 static void drop_readahead(PyFileObject *);
298 /* Methods */
300 static void
301 file_dealloc(PyFileObject *f)
303 if (f->weakreflist != NULL)
304 PyObject_ClearWeakRefs((PyObject *) f);
305 if (f->f_fp != NULL && f->f_close != NULL) {
306 Py_BEGIN_ALLOW_THREADS
307 (*f->f_close)(f->f_fp);
308 Py_END_ALLOW_THREADS
310 PyMem_Free(f->f_setbuf);
311 Py_XDECREF(f->f_name);
312 Py_XDECREF(f->f_mode);
313 Py_XDECREF(f->f_encoding);
314 drop_readahead(f);
315 f->ob_type->tp_free((PyObject *)f);
318 static PyObject *
319 file_repr(PyFileObject *f)
321 if (PyUnicode_Check(f->f_name)) {
322 #ifdef Py_USING_UNICODE
323 PyObject *ret = NULL;
324 PyObject *name;
325 name = PyUnicode_AsUnicodeEscapeString(f->f_name);
326 ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
327 f->f_fp == NULL ? "closed" : "open",
328 PyString_AsString(name),
329 PyString_AsString(f->f_mode),
331 Py_XDECREF(name);
332 return ret;
333 #endif
334 } else {
335 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
336 f->f_fp == NULL ? "closed" : "open",
337 PyString_AsString(f->f_name),
338 PyString_AsString(f->f_mode),
343 static PyObject *
344 file_close(PyFileObject *f)
346 int sts = 0;
347 if (f->f_fp != NULL) {
348 if (f->f_close != NULL) {
349 Py_BEGIN_ALLOW_THREADS
350 errno = 0;
351 sts = (*f->f_close)(f->f_fp);
352 Py_END_ALLOW_THREADS
354 f->f_fp = NULL;
356 PyMem_Free(f->f_setbuf);
357 f->f_setbuf = NULL;
358 if (sts == EOF)
359 return PyErr_SetFromErrno(PyExc_IOError);
360 if (sts != 0)
361 return PyInt_FromLong((long)sts);
362 Py_INCREF(Py_None);
363 return Py_None;
367 /* Our very own off_t-like type, 64-bit if possible */
368 #if !defined(HAVE_LARGEFILE_SUPPORT)
369 typedef off_t Py_off_t;
370 #elif SIZEOF_OFF_T >= 8
371 typedef off_t Py_off_t;
372 #elif SIZEOF_FPOS_T >= 8
373 typedef fpos_t Py_off_t;
374 #else
375 #error "Large file support, but neither off_t nor fpos_t is large enough."
376 #endif
379 /* a portable fseek() function
380 return 0 on success, non-zero on failure (with errno set) */
381 static int
382 _portable_fseek(FILE *fp, Py_off_t offset, int whence)
384 #if !defined(HAVE_LARGEFILE_SUPPORT)
385 return fseek(fp, offset, whence);
386 #elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
387 return fseeko(fp, offset, whence);
388 #elif defined(HAVE_FSEEK64)
389 return fseek64(fp, offset, whence);
390 #elif defined(__BEOS__)
391 return _fseek(fp, offset, whence);
392 #elif SIZEOF_FPOS_T >= 8
393 /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
394 and fgetpos() to implement fseek()*/
395 fpos_t pos;
396 switch (whence) {
397 case SEEK_END:
398 #ifdef MS_WINDOWS
399 fflush(fp);
400 if (_lseeki64(fileno(fp), 0, 2) == -1)
401 return -1;
402 #else
403 if (fseek(fp, 0, SEEK_END) != 0)
404 return -1;
405 #endif
406 /* fall through */
407 case SEEK_CUR:
408 if (fgetpos(fp, &pos) != 0)
409 return -1;
410 offset += pos;
411 break;
412 /* case SEEK_SET: break; */
414 return fsetpos(fp, &offset);
415 #else
416 #error "Large file support, but no way to fseek."
417 #endif
421 /* a portable ftell() function
422 Return -1 on failure with errno set appropriately, current file
423 position on success */
424 static Py_off_t
425 _portable_ftell(FILE* fp)
427 #if !defined(HAVE_LARGEFILE_SUPPORT)
428 return ftell(fp);
429 #elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
430 return ftello(fp);
431 #elif defined(HAVE_FTELL64)
432 return ftell64(fp);
433 #elif SIZEOF_FPOS_T >= 8
434 fpos_t pos;
435 if (fgetpos(fp, &pos) != 0)
436 return -1;
437 return pos;
438 #else
439 #error "Large file support, but no way to ftell."
440 #endif
444 static PyObject *
445 file_seek(PyFileObject *f, PyObject *args)
447 int whence;
448 int ret;
449 Py_off_t offset;
450 PyObject *offobj;
452 if (f->f_fp == NULL)
453 return err_closed();
454 drop_readahead(f);
455 whence = 0;
456 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
457 return NULL;
458 #if !defined(HAVE_LARGEFILE_SUPPORT)
459 offset = PyInt_AsLong(offobj);
460 #else
461 offset = PyLong_Check(offobj) ?
462 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
463 #endif
464 if (PyErr_Occurred())
465 return NULL;
467 Py_BEGIN_ALLOW_THREADS
468 errno = 0;
469 ret = _portable_fseek(f->f_fp, offset, whence);
470 Py_END_ALLOW_THREADS
472 if (ret != 0) {
473 PyErr_SetFromErrno(PyExc_IOError);
474 clearerr(f->f_fp);
475 return NULL;
477 f->f_skipnextlf = 0;
478 Py_INCREF(Py_None);
479 return Py_None;
483 #ifdef HAVE_FTRUNCATE
484 static PyObject *
485 file_truncate(PyFileObject *f, PyObject *args)
487 Py_off_t newsize;
488 PyObject *newsizeobj = NULL;
489 Py_off_t initialpos;
490 int ret;
492 if (f->f_fp == NULL)
493 return err_closed();
494 if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
495 return NULL;
497 /* Get current file position. If the file happens to be open for
498 * update and the last operation was an input operation, C doesn't
499 * define what the later fflush() will do, but we promise truncate()
500 * won't change the current position (and fflush() *does* change it
501 * then at least on Windows). The easiest thing is to capture
502 * current pos now and seek back to it at the end.
504 Py_BEGIN_ALLOW_THREADS
505 errno = 0;
506 initialpos = _portable_ftell(f->f_fp);
507 Py_END_ALLOW_THREADS
508 if (initialpos == -1)
509 goto onioerror;
511 /* Set newsize to current postion if newsizeobj NULL, else to the
512 * specified value.
514 if (newsizeobj != NULL) {
515 #if !defined(HAVE_LARGEFILE_SUPPORT)
516 newsize = PyInt_AsLong(newsizeobj);
517 #else
518 newsize = PyLong_Check(newsizeobj) ?
519 PyLong_AsLongLong(newsizeobj) :
520 PyInt_AsLong(newsizeobj);
521 #endif
522 if (PyErr_Occurred())
523 return NULL;
525 else /* default to current position */
526 newsize = initialpos;
528 /* Flush the stream. We're mixing stream-level I/O with lower-level
529 * I/O, and a flush may be necessary to synch both platform views
530 * of the current file state.
532 Py_BEGIN_ALLOW_THREADS
533 errno = 0;
534 ret = fflush(f->f_fp);
535 Py_END_ALLOW_THREADS
536 if (ret != 0)
537 goto onioerror;
539 #ifdef MS_WINDOWS
540 /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
541 so don't even try using it. */
543 HANDLE hFile;
545 /* Have to move current pos to desired endpoint on Windows. */
546 Py_BEGIN_ALLOW_THREADS
547 errno = 0;
548 ret = _portable_fseek(f->f_fp, newsize, SEEK_SET) != 0;
549 Py_END_ALLOW_THREADS
550 if (ret)
551 goto onioerror;
553 /* Truncate. Note that this may grow the file! */
554 Py_BEGIN_ALLOW_THREADS
555 errno = 0;
556 hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
557 ret = hFile == (HANDLE)-1;
558 if (ret == 0) {
559 ret = SetEndOfFile(hFile) == 0;
560 if (ret)
561 errno = EACCES;
563 Py_END_ALLOW_THREADS
564 if (ret)
565 goto onioerror;
567 #else
568 Py_BEGIN_ALLOW_THREADS
569 errno = 0;
570 ret = ftruncate(fileno(f->f_fp), newsize);
571 Py_END_ALLOW_THREADS
572 if (ret != 0)
573 goto onioerror;
574 #endif /* !MS_WINDOWS */
576 /* Restore original file position. */
577 Py_BEGIN_ALLOW_THREADS
578 errno = 0;
579 ret = _portable_fseek(f->f_fp, initialpos, SEEK_SET) != 0;
580 Py_END_ALLOW_THREADS
581 if (ret)
582 goto onioerror;
584 Py_INCREF(Py_None);
585 return Py_None;
587 onioerror:
588 PyErr_SetFromErrno(PyExc_IOError);
589 clearerr(f->f_fp);
590 return NULL;
592 #endif /* HAVE_FTRUNCATE */
594 static PyObject *
595 file_tell(PyFileObject *f)
597 Py_off_t pos;
599 if (f->f_fp == NULL)
600 return err_closed();
601 Py_BEGIN_ALLOW_THREADS
602 errno = 0;
603 pos = _portable_ftell(f->f_fp);
604 Py_END_ALLOW_THREADS
605 if (pos == -1) {
606 PyErr_SetFromErrno(PyExc_IOError);
607 clearerr(f->f_fp);
608 return NULL;
610 if (f->f_skipnextlf) {
611 int c;
612 c = GETC(f->f_fp);
613 if (c == '\n') {
614 pos++;
615 f->f_skipnextlf = 0;
616 } else if (c != EOF) ungetc(c, f->f_fp);
618 #if !defined(HAVE_LARGEFILE_SUPPORT)
619 return PyInt_FromLong(pos);
620 #else
621 return PyLong_FromLongLong(pos);
622 #endif
625 static PyObject *
626 file_fileno(PyFileObject *f)
628 if (f->f_fp == NULL)
629 return err_closed();
630 return PyInt_FromLong((long) fileno(f->f_fp));
633 static PyObject *
634 file_flush(PyFileObject *f)
636 int res;
638 if (f->f_fp == NULL)
639 return err_closed();
640 Py_BEGIN_ALLOW_THREADS
641 errno = 0;
642 res = fflush(f->f_fp);
643 Py_END_ALLOW_THREADS
644 if (res != 0) {
645 PyErr_SetFromErrno(PyExc_IOError);
646 clearerr(f->f_fp);
647 return NULL;
649 Py_INCREF(Py_None);
650 return Py_None;
653 static PyObject *
654 file_isatty(PyFileObject *f)
656 long res;
657 if (f->f_fp == NULL)
658 return err_closed();
659 Py_BEGIN_ALLOW_THREADS
660 res = isatty((int)fileno(f->f_fp));
661 Py_END_ALLOW_THREADS
662 return PyBool_FromLong(res);
666 #if BUFSIZ < 8192
667 #define SMALLCHUNK 8192
668 #else
669 #define SMALLCHUNK BUFSIZ
670 #endif
672 #if SIZEOF_INT < 4
673 #define BIGCHUNK (512 * 32)
674 #else
675 #define BIGCHUNK (512 * 1024)
676 #endif
678 static size_t
679 new_buffersize(PyFileObject *f, size_t currentsize)
681 #ifdef HAVE_FSTAT
682 off_t pos, end;
683 struct stat st;
684 if (fstat(fileno(f->f_fp), &st) == 0) {
685 end = st.st_size;
686 /* The following is not a bug: we really need to call lseek()
687 *and* ftell(). The reason is that some stdio libraries
688 mistakenly flush their buffer when ftell() is called and
689 the lseek() call it makes fails, thereby throwing away
690 data that cannot be recovered in any way. To avoid this,
691 we first test lseek(), and only call ftell() if lseek()
692 works. We can't use the lseek() value either, because we
693 need to take the amount of buffered data into account.
694 (Yet another reason why stdio stinks. :-) */
695 pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
696 if (pos >= 0) {
697 pos = ftell(f->f_fp);
699 if (pos < 0)
700 clearerr(f->f_fp);
701 if (end > pos && pos >= 0)
702 return currentsize + end - pos + 1;
703 /* Add 1 so if the file were to grow we'd notice. */
705 #endif
706 if (currentsize > SMALLCHUNK) {
707 /* Keep doubling until we reach BIGCHUNK;
708 then keep adding BIGCHUNK. */
709 if (currentsize <= BIGCHUNK)
710 return currentsize + currentsize;
711 else
712 return currentsize + BIGCHUNK;
714 return currentsize + SMALLCHUNK;
717 #if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
718 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
719 #else
720 #ifdef EWOULDBLOCK
721 #define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
722 #else
723 #ifdef EAGAIN
724 #define BLOCKED_ERRNO(x) ((x) == EAGAIN)
725 #else
726 #define BLOCKED_ERRNO(x) 0
727 #endif
728 #endif
729 #endif
731 static PyObject *
732 file_read(PyFileObject *f, PyObject *args)
734 long bytesrequested = -1;
735 size_t bytesread, buffersize, chunksize;
736 PyObject *v;
738 if (f->f_fp == NULL)
739 return err_closed();
740 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
741 return NULL;
742 if (bytesrequested < 0)
743 buffersize = new_buffersize(f, (size_t)0);
744 else
745 buffersize = bytesrequested;
746 if (buffersize > INT_MAX) {
747 PyErr_SetString(PyExc_OverflowError,
748 "requested number of bytes is more than a Python string can hold");
749 return NULL;
751 v = PyString_FromStringAndSize((char *)NULL, buffersize);
752 if (v == NULL)
753 return NULL;
754 bytesread = 0;
755 for (;;) {
756 Py_BEGIN_ALLOW_THREADS
757 errno = 0;
758 chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
759 buffersize - bytesread, f->f_fp, (PyObject *)f);
760 Py_END_ALLOW_THREADS
761 if (chunksize == 0) {
762 if (!ferror(f->f_fp))
763 break;
764 clearerr(f->f_fp);
765 /* When in non-blocking mode, data shouldn't
766 * be discarded if a blocking signal was
767 * received. That will also happen if
768 * chunksize != 0, but bytesread < buffersize. */
769 if (bytesread > 0 && BLOCKED_ERRNO(errno))
770 break;
771 PyErr_SetFromErrno(PyExc_IOError);
772 Py_DECREF(v);
773 return NULL;
775 bytesread += chunksize;
776 if (bytesread < buffersize) {
777 clearerr(f->f_fp);
778 break;
780 if (bytesrequested < 0) {
781 buffersize = new_buffersize(f, buffersize);
782 if (_PyString_Resize(&v, buffersize) < 0)
783 return NULL;
784 } else {
785 /* Got what was requested. */
786 break;
789 if (bytesread != buffersize)
790 _PyString_Resize(&v, bytesread);
791 return v;
794 static PyObject *
795 file_readinto(PyFileObject *f, PyObject *args)
797 char *ptr;
798 int ntodo;
799 size_t ndone, nnow;
801 if (f->f_fp == NULL)
802 return err_closed();
803 if (!PyArg_ParseTuple(args, "w#", &ptr, &ntodo))
804 return NULL;
805 ndone = 0;
806 while (ntodo > 0) {
807 Py_BEGIN_ALLOW_THREADS
808 errno = 0;
809 nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
810 (PyObject *)f);
811 Py_END_ALLOW_THREADS
812 if (nnow == 0) {
813 if (!ferror(f->f_fp))
814 break;
815 PyErr_SetFromErrno(PyExc_IOError);
816 clearerr(f->f_fp);
817 return NULL;
819 ndone += nnow;
820 ntodo -= nnow;
822 return PyInt_FromLong((long)ndone);
825 /**************************************************************************
826 Routine to get next line using platform fgets().
828 Under MSVC 6:
830 + MS threadsafe getc is very slow (multiple layers of function calls before+
831 after each character, to lock+unlock the stream).
832 + The stream-locking functions are MS-internal -- can't access them from user
833 code.
834 + There's nothing Tim could find in the MS C or platform SDK libraries that
835 can worm around this.
836 + MS fgets locks/unlocks only once per line; it's the only hook we have.
838 So we use fgets for speed(!), despite that it's painful.
840 MS realloc is also slow.
842 Reports from other platforms on this method vs getc_unlocked (which MS doesn't
843 have):
844 Linux a wash
845 Solaris a wash
846 Tru64 Unix getline_via_fgets significantly faster
848 CAUTION: The C std isn't clear about this: in those cases where fgets
849 writes something into the buffer, can it write into any position beyond the
850 required trailing null byte? MSVC 6 fgets does not, and no platform is (yet)
851 known on which it does; and it would be a strange way to code fgets. Still,
852 getline_via_fgets may not work correctly if it does. The std test
853 test_bufio.py should fail if platform fgets() routinely writes beyond the
854 trailing null byte. #define DONT_USE_FGETS_IN_GETLINE to disable this code.
855 **************************************************************************/
857 /* Use this routine if told to, or by default on non-get_unlocked()
858 * platforms unless told not to. Yikes! Let's spell that out:
859 * On a platform with getc_unlocked():
860 * By default, use getc_unlocked().
861 * If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
862 * On a platform without getc_unlocked():
863 * By default, use fgets().
864 * If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
866 #if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
867 #define USE_FGETS_IN_GETLINE
868 #endif
870 #if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
871 #undef USE_FGETS_IN_GETLINE
872 #endif
874 #ifdef USE_FGETS_IN_GETLINE
875 static PyObject*
876 getline_via_fgets(FILE *fp)
878 /* INITBUFSIZE is the maximum line length that lets us get away with the fast
879 * no-realloc, one-fgets()-call path. Boosting it isn't free, because we have
880 * to fill this much of the buffer with a known value in order to figure out
881 * how much of the buffer fgets() overwrites. So if INITBUFSIZE is larger
882 * than "most" lines, we waste time filling unused buffer slots. 100 is
883 * surely adequate for most peoples' email archives, chewing over source code,
884 * etc -- "regular old text files".
885 * MAXBUFSIZE is the maximum line length that lets us get away with the less
886 * fast (but still zippy) no-realloc, two-fgets()-call path. See above for
887 * cautions about boosting that. 300 was chosen because the worst real-life
888 * text-crunching job reported on Python-Dev was a mail-log crawler where over
889 * half the lines were 254 chars.
891 #define INITBUFSIZE 100
892 #define MAXBUFSIZE 300
893 char* p; /* temp */
894 char buf[MAXBUFSIZE];
895 PyObject* v; /* the string object result */
896 char* pvfree; /* address of next free slot */
897 char* pvend; /* address one beyond last free slot */
898 size_t nfree; /* # of free buffer slots; pvend-pvfree */
899 size_t total_v_size; /* total # of slots in buffer */
900 size_t increment; /* amount to increment the buffer */
902 /* Optimize for normal case: avoid _PyString_Resize if at all
903 * possible via first reading into stack buffer "buf".
905 total_v_size = INITBUFSIZE; /* start small and pray */
906 pvfree = buf;
907 for (;;) {
908 Py_BEGIN_ALLOW_THREADS
909 pvend = buf + total_v_size;
910 nfree = pvend - pvfree;
911 memset(pvfree, '\n', nfree);
912 p = fgets(pvfree, nfree, fp);
913 Py_END_ALLOW_THREADS
915 if (p == NULL) {
916 clearerr(fp);
917 if (PyErr_CheckSignals())
918 return NULL;
919 v = PyString_FromStringAndSize(buf, pvfree - buf);
920 return v;
922 /* fgets read *something* */
923 p = memchr(pvfree, '\n', nfree);
924 if (p != NULL) {
925 /* Did the \n come from fgets or from us?
926 * Since fgets stops at the first \n, and then writes
927 * \0, if it's from fgets a \0 must be next. But if
928 * that's so, it could not have come from us, since
929 * the \n's we filled the buffer with have only more
930 * \n's to the right.
932 if (p+1 < pvend && *(p+1) == '\0') {
933 /* It's from fgets: we win! In particular,
934 * we haven't done any mallocs yet, and can
935 * build the final result on the first try.
937 ++p; /* include \n from fgets */
939 else {
940 /* Must be from us: fgets didn't fill the
941 * buffer and didn't find a newline, so it
942 * must be the last and newline-free line of
943 * the file.
945 assert(p > pvfree && *(p-1) == '\0');
946 --p; /* don't include \0 from fgets */
948 v = PyString_FromStringAndSize(buf, p - buf);
949 return v;
951 /* yuck: fgets overwrote all the newlines, i.e. the entire
952 * buffer. So this line isn't over yet, or maybe it is but
953 * we're exactly at EOF. If we haven't already, try using the
954 * rest of the stack buffer.
956 assert(*(pvend-1) == '\0');
957 if (pvfree == buf) {
958 pvfree = pvend - 1; /* overwrite trailing null */
959 total_v_size = MAXBUFSIZE;
961 else
962 break;
965 /* The stack buffer isn't big enough; malloc a string object and read
966 * into its buffer.
968 total_v_size = MAXBUFSIZE << 1;
969 v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
970 if (v == NULL)
971 return v;
972 /* copy over everything except the last null byte */
973 memcpy(BUF(v), buf, MAXBUFSIZE-1);
974 pvfree = BUF(v) + MAXBUFSIZE - 1;
976 /* Keep reading stuff into v; if it ever ends successfully, break
977 * after setting p one beyond the end of the line. The code here is
978 * very much like the code above, except reads into v's buffer; see
979 * the code above for detailed comments about the logic.
981 for (;;) {
982 Py_BEGIN_ALLOW_THREADS
983 pvend = BUF(v) + total_v_size;
984 nfree = pvend - pvfree;
985 memset(pvfree, '\n', nfree);
986 p = fgets(pvfree, nfree, fp);
987 Py_END_ALLOW_THREADS
989 if (p == NULL) {
990 clearerr(fp);
991 if (PyErr_CheckSignals()) {
992 Py_DECREF(v);
993 return NULL;
995 p = pvfree;
996 break;
998 p = memchr(pvfree, '\n', nfree);
999 if (p != NULL) {
1000 if (p+1 < pvend && *(p+1) == '\0') {
1001 /* \n came from fgets */
1002 ++p;
1003 break;
1005 /* \n came from us; last line of file, no newline */
1006 assert(p > pvfree && *(p-1) == '\0');
1007 --p;
1008 break;
1010 /* expand buffer and try again */
1011 assert(*(pvend-1) == '\0');
1012 increment = total_v_size >> 2; /* mild exponential growth */
1013 total_v_size += increment;
1014 if (total_v_size > INT_MAX) {
1015 PyErr_SetString(PyExc_OverflowError,
1016 "line is longer than a Python string can hold");
1017 Py_DECREF(v);
1018 return NULL;
1020 if (_PyString_Resize(&v, (int)total_v_size) < 0)
1021 return NULL;
1022 /* overwrite the trailing null byte */
1023 pvfree = BUF(v) + (total_v_size - increment - 1);
1025 if (BUF(v) + total_v_size != p)
1026 _PyString_Resize(&v, p - BUF(v));
1027 return v;
1028 #undef INITBUFSIZE
1029 #undef MAXBUFSIZE
1031 #endif /* ifdef USE_FGETS_IN_GETLINE */
1033 /* Internal routine to get a line.
1034 Size argument interpretation:
1035 > 0: max length;
1036 <= 0: read arbitrary line
1039 static PyObject *
1040 get_line(PyFileObject *f, int n)
1042 FILE *fp = f->f_fp;
1043 int c;
1044 char *buf, *end;
1045 size_t total_v_size; /* total # of slots in buffer */
1046 size_t used_v_size; /* # used slots in buffer */
1047 size_t increment; /* amount to increment the buffer */
1048 PyObject *v;
1049 int newlinetypes = f->f_newlinetypes;
1050 int skipnextlf = f->f_skipnextlf;
1051 int univ_newline = f->f_univ_newline;
1053 #if defined(USE_FGETS_IN_GETLINE)
1054 if (n <= 0 && !univ_newline )
1055 return getline_via_fgets(fp);
1056 #endif
1057 total_v_size = n > 0 ? n : 100;
1058 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1059 if (v == NULL)
1060 return NULL;
1061 buf = BUF(v);
1062 end = buf + total_v_size;
1064 for (;;) {
1065 Py_BEGIN_ALLOW_THREADS
1066 FLOCKFILE(fp);
1067 if (univ_newline) {
1068 c = 'x'; /* Shut up gcc warning */
1069 while ( buf != end && (c = GETC(fp)) != EOF ) {
1070 if (skipnextlf ) {
1071 skipnextlf = 0;
1072 if (c == '\n') {
1073 /* Seeing a \n here with
1074 * skipnextlf true means we
1075 * saw a \r before.
1077 newlinetypes |= NEWLINE_CRLF;
1078 c = GETC(fp);
1079 if (c == EOF) break;
1080 } else {
1081 newlinetypes |= NEWLINE_CR;
1084 if (c == '\r') {
1085 skipnextlf = 1;
1086 c = '\n';
1087 } else if ( c == '\n')
1088 newlinetypes |= NEWLINE_LF;
1089 *buf++ = c;
1090 if (c == '\n') break;
1092 if ( c == EOF && skipnextlf )
1093 newlinetypes |= NEWLINE_CR;
1094 } else /* If not universal newlines use the normal loop */
1095 while ((c = GETC(fp)) != EOF &&
1096 (*buf++ = c) != '\n' &&
1097 buf != end)
1099 FUNLOCKFILE(fp);
1100 Py_END_ALLOW_THREADS
1101 f->f_newlinetypes = newlinetypes;
1102 f->f_skipnextlf = skipnextlf;
1103 if (c == '\n')
1104 break;
1105 if (c == EOF) {
1106 if (ferror(fp)) {
1107 PyErr_SetFromErrno(PyExc_IOError);
1108 clearerr(fp);
1109 Py_DECREF(v);
1110 return NULL;
1112 clearerr(fp);
1113 if (PyErr_CheckSignals()) {
1114 Py_DECREF(v);
1115 return NULL;
1117 break;
1119 /* Must be because buf == end */
1120 if (n > 0)
1121 break;
1122 used_v_size = total_v_size;
1123 increment = total_v_size >> 2; /* mild exponential growth */
1124 total_v_size += increment;
1125 if (total_v_size > INT_MAX) {
1126 PyErr_SetString(PyExc_OverflowError,
1127 "line is longer than a Python string can hold");
1128 Py_DECREF(v);
1129 return NULL;
1131 if (_PyString_Resize(&v, total_v_size) < 0)
1132 return NULL;
1133 buf = BUF(v) + used_v_size;
1134 end = BUF(v) + total_v_size;
1137 used_v_size = buf - BUF(v);
1138 if (used_v_size != total_v_size)
1139 _PyString_Resize(&v, used_v_size);
1140 return v;
1143 /* External C interface */
1145 PyObject *
1146 PyFile_GetLine(PyObject *f, int n)
1148 PyObject *result;
1150 if (f == NULL) {
1151 PyErr_BadInternalCall();
1152 return NULL;
1155 if (PyFile_Check(f)) {
1156 if (((PyFileObject*)f)->f_fp == NULL)
1157 return err_closed();
1158 result = get_line((PyFileObject *)f, n);
1160 else {
1161 PyObject *reader;
1162 PyObject *args;
1164 reader = PyObject_GetAttrString(f, "readline");
1165 if (reader == NULL)
1166 return NULL;
1167 if (n <= 0)
1168 args = PyTuple_New(0);
1169 else
1170 args = Py_BuildValue("(i)", n);
1171 if (args == NULL) {
1172 Py_DECREF(reader);
1173 return NULL;
1175 result = PyEval_CallObject(reader, args);
1176 Py_DECREF(reader);
1177 Py_DECREF(args);
1178 if (result != NULL && !PyString_Check(result) &&
1179 !PyUnicode_Check(result)) {
1180 Py_DECREF(result);
1181 result = NULL;
1182 PyErr_SetString(PyExc_TypeError,
1183 "object.readline() returned non-string");
1187 if (n < 0 && result != NULL && PyString_Check(result)) {
1188 char *s = PyString_AS_STRING(result);
1189 int len = PyString_GET_SIZE(result);
1190 if (len == 0) {
1191 Py_DECREF(result);
1192 result = NULL;
1193 PyErr_SetString(PyExc_EOFError,
1194 "EOF when reading a line");
1196 else if (s[len-1] == '\n') {
1197 if (result->ob_refcnt == 1)
1198 _PyString_Resize(&result, len-1);
1199 else {
1200 PyObject *v;
1201 v = PyString_FromStringAndSize(s, len-1);
1202 Py_DECREF(result);
1203 result = v;
1207 #ifdef Py_USING_UNICODE
1208 if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1209 Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1210 int len = PyUnicode_GET_SIZE(result);
1211 if (len == 0) {
1212 Py_DECREF(result);
1213 result = NULL;
1214 PyErr_SetString(PyExc_EOFError,
1215 "EOF when reading a line");
1217 else if (s[len-1] == '\n') {
1218 if (result->ob_refcnt == 1)
1219 PyUnicode_Resize(&result, len-1);
1220 else {
1221 PyObject *v;
1222 v = PyUnicode_FromUnicode(s, len-1);
1223 Py_DECREF(result);
1224 result = v;
1228 #endif
1229 return result;
1232 /* Python method */
1234 static PyObject *
1235 file_readline(PyFileObject *f, PyObject *args)
1237 int n = -1;
1239 if (f->f_fp == NULL)
1240 return err_closed();
1241 if (!PyArg_ParseTuple(args, "|i:readline", &n))
1242 return NULL;
1243 if (n == 0)
1244 return PyString_FromString("");
1245 if (n < 0)
1246 n = 0;
1247 return get_line(f, n);
1250 static PyObject *
1251 file_readlines(PyFileObject *f, PyObject *args)
1253 long sizehint = 0;
1254 PyObject *list;
1255 PyObject *line;
1256 char small_buffer[SMALLCHUNK];
1257 char *buffer = small_buffer;
1258 size_t buffersize = SMALLCHUNK;
1259 PyObject *big_buffer = NULL;
1260 size_t nfilled = 0;
1261 size_t nread;
1262 size_t totalread = 0;
1263 char *p, *q, *end;
1264 int err;
1265 int shortread = 0;
1267 if (f->f_fp == NULL)
1268 return err_closed();
1269 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1270 return NULL;
1271 if ((list = PyList_New(0)) == NULL)
1272 return NULL;
1273 for (;;) {
1274 if (shortread)
1275 nread = 0;
1276 else {
1277 Py_BEGIN_ALLOW_THREADS
1278 errno = 0;
1279 nread = Py_UniversalNewlineFread(buffer+nfilled,
1280 buffersize-nfilled, f->f_fp, (PyObject *)f);
1281 Py_END_ALLOW_THREADS
1282 shortread = (nread < buffersize-nfilled);
1284 if (nread == 0) {
1285 sizehint = 0;
1286 if (!ferror(f->f_fp))
1287 break;
1288 PyErr_SetFromErrno(PyExc_IOError);
1289 clearerr(f->f_fp);
1290 error:
1291 Py_DECREF(list);
1292 list = NULL;
1293 goto cleanup;
1295 totalread += nread;
1296 p = memchr(buffer+nfilled, '\n', nread);
1297 if (p == NULL) {
1298 /* Need a larger buffer to fit this line */
1299 nfilled += nread;
1300 buffersize *= 2;
1301 if (buffersize > INT_MAX) {
1302 PyErr_SetString(PyExc_OverflowError,
1303 "line is longer than a Python string can hold");
1304 goto error;
1306 if (big_buffer == NULL) {
1307 /* Create the big buffer */
1308 big_buffer = PyString_FromStringAndSize(
1309 NULL, buffersize);
1310 if (big_buffer == NULL)
1311 goto error;
1312 buffer = PyString_AS_STRING(big_buffer);
1313 memcpy(buffer, small_buffer, nfilled);
1315 else {
1316 /* Grow the big buffer */
1317 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1318 goto error;
1319 buffer = PyString_AS_STRING(big_buffer);
1321 continue;
1323 end = buffer+nfilled+nread;
1324 q = buffer;
1325 do {
1326 /* Process complete lines */
1327 p++;
1328 line = PyString_FromStringAndSize(q, p-q);
1329 if (line == NULL)
1330 goto error;
1331 err = PyList_Append(list, line);
1332 Py_DECREF(line);
1333 if (err != 0)
1334 goto error;
1335 q = p;
1336 p = memchr(q, '\n', end-q);
1337 } while (p != NULL);
1338 /* Move the remaining incomplete line to the start */
1339 nfilled = end-q;
1340 memmove(buffer, q, nfilled);
1341 if (sizehint > 0)
1342 if (totalread >= (size_t)sizehint)
1343 break;
1345 if (nfilled != 0) {
1346 /* Partial last line */
1347 line = PyString_FromStringAndSize(buffer, nfilled);
1348 if (line == NULL)
1349 goto error;
1350 if (sizehint > 0) {
1351 /* Need to complete the last line */
1352 PyObject *rest = get_line(f, 0);
1353 if (rest == NULL) {
1354 Py_DECREF(line);
1355 goto error;
1357 PyString_Concat(&line, rest);
1358 Py_DECREF(rest);
1359 if (line == NULL)
1360 goto error;
1362 err = PyList_Append(list, line);
1363 Py_DECREF(line);
1364 if (err != 0)
1365 goto error;
1367 cleanup:
1368 Py_XDECREF(big_buffer);
1369 return list;
1372 static PyObject *
1373 file_write(PyFileObject *f, PyObject *args)
1375 char *s;
1376 int n, n2;
1377 if (f->f_fp == NULL)
1378 return err_closed();
1379 if (!PyArg_ParseTuple(args, f->f_binary ? "s#" : "t#", &s, &n))
1380 return NULL;
1381 f->f_softspace = 0;
1382 Py_BEGIN_ALLOW_THREADS
1383 errno = 0;
1384 n2 = fwrite(s, 1, n, f->f_fp);
1385 Py_END_ALLOW_THREADS
1386 if (n2 != n) {
1387 PyErr_SetFromErrno(PyExc_IOError);
1388 clearerr(f->f_fp);
1389 return NULL;
1391 Py_INCREF(Py_None);
1392 return Py_None;
1395 static PyObject *
1396 file_writelines(PyFileObject *f, PyObject *seq)
1398 #define CHUNKSIZE 1000
1399 PyObject *list, *line;
1400 PyObject *it; /* iter(seq) */
1401 PyObject *result;
1402 int i, j, index, len, nwritten, islist;
1404 assert(seq != NULL);
1405 if (f->f_fp == NULL)
1406 return err_closed();
1408 result = NULL;
1409 list = NULL;
1410 islist = PyList_Check(seq);
1411 if (islist)
1412 it = NULL;
1413 else {
1414 it = PyObject_GetIter(seq);
1415 if (it == NULL) {
1416 PyErr_SetString(PyExc_TypeError,
1417 "writelines() requires an iterable argument");
1418 return NULL;
1420 /* From here on, fail by going to error, to reclaim "it". */
1421 list = PyList_New(CHUNKSIZE);
1422 if (list == NULL)
1423 goto error;
1426 /* Strategy: slurp CHUNKSIZE lines into a private list,
1427 checking that they are all strings, then write that list
1428 without holding the interpreter lock, then come back for more. */
1429 for (index = 0; ; index += CHUNKSIZE) {
1430 if (islist) {
1431 Py_XDECREF(list);
1432 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1433 if (list == NULL)
1434 goto error;
1435 j = PyList_GET_SIZE(list);
1437 else {
1438 for (j = 0; j < CHUNKSIZE; j++) {
1439 line = PyIter_Next(it);
1440 if (line == NULL) {
1441 if (PyErr_Occurred())
1442 goto error;
1443 break;
1445 PyList_SetItem(list, j, line);
1448 if (j == 0)
1449 break;
1451 /* Check that all entries are indeed strings. If not,
1452 apply the same rules as for file.write() and
1453 convert the results to strings. This is slow, but
1454 seems to be the only way since all conversion APIs
1455 could potentially execute Python code. */
1456 for (i = 0; i < j; i++) {
1457 PyObject *v = PyList_GET_ITEM(list, i);
1458 if (!PyString_Check(v)) {
1459 const char *buffer;
1460 int len;
1461 if (((f->f_binary &&
1462 PyObject_AsReadBuffer(v,
1463 (const void**)&buffer,
1464 &len)) ||
1465 PyObject_AsCharBuffer(v,
1466 &buffer,
1467 &len))) {
1468 PyErr_SetString(PyExc_TypeError,
1469 "writelines() argument must be a sequence of strings");
1470 goto error;
1472 line = PyString_FromStringAndSize(buffer,
1473 len);
1474 if (line == NULL)
1475 goto error;
1476 Py_DECREF(v);
1477 PyList_SET_ITEM(list, i, line);
1481 /* Since we are releasing the global lock, the
1482 following code may *not* execute Python code. */
1483 Py_BEGIN_ALLOW_THREADS
1484 f->f_softspace = 0;
1485 errno = 0;
1486 for (i = 0; i < j; i++) {
1487 line = PyList_GET_ITEM(list, i);
1488 len = PyString_GET_SIZE(line);
1489 nwritten = fwrite(PyString_AS_STRING(line),
1490 1, len, f->f_fp);
1491 if (nwritten != len) {
1492 Py_BLOCK_THREADS
1493 PyErr_SetFromErrno(PyExc_IOError);
1494 clearerr(f->f_fp);
1495 goto error;
1498 Py_END_ALLOW_THREADS
1500 if (j < CHUNKSIZE)
1501 break;
1504 Py_INCREF(Py_None);
1505 result = Py_None;
1506 error:
1507 Py_XDECREF(list);
1508 Py_XDECREF(it);
1509 return result;
1510 #undef CHUNKSIZE
1513 static PyObject *
1514 file_getiter(PyFileObject *f)
1516 if (f->f_fp == NULL)
1517 return err_closed();
1518 Py_INCREF(f);
1519 return (PyObject *)f;
1522 PyDoc_STRVAR(readline_doc,
1523 "readline([size]) -> next line from the file, as a string.\n"
1524 "\n"
1525 "Retain newline. A non-negative size argument limits the maximum\n"
1526 "number of bytes to return (an incomplete line may be returned then).\n"
1527 "Return an empty string at EOF.");
1529 PyDoc_STRVAR(read_doc,
1530 "read([size]) -> read at most size bytes, returned as a string.\n"
1531 "\n"
1532 "If the size argument is negative or omitted, read until EOF is reached.\n"
1533 "Notice that when in non-blocking mode, less data than what was requested\n"
1534 "may be returned, even if no size parameter was given.");
1536 PyDoc_STRVAR(write_doc,
1537 "write(str) -> None. Write string str to file.\n"
1538 "\n"
1539 "Note that due to buffering, flush() or close() may be needed before\n"
1540 "the file on disk reflects the data written.");
1542 PyDoc_STRVAR(fileno_doc,
1543 "fileno() -> integer \"file descriptor\".\n"
1544 "\n"
1545 "This is needed for lower-level file interfaces, such os.read().");
1547 PyDoc_STRVAR(seek_doc,
1548 "seek(offset[, whence]) -> None. Move to new file position.\n"
1549 "\n"
1550 "Argument offset is a byte count. Optional argument whence defaults to\n"
1551 "0 (offset from start of file, offset should be >= 0); other values are 1\n"
1552 "(move relative to current position, positive or negative), and 2 (move\n"
1553 "relative to end of file, usually negative, although many platforms allow\n"
1554 "seeking beyond the end of a file). If the file is opened in text mode,\n"
1555 "only offsets returned by tell() are legal. Use of other offsets causes\n"
1556 "undefined behavior."
1557 "\n"
1558 "Note that not all file objects are seekable.");
1560 #ifdef HAVE_FTRUNCATE
1561 PyDoc_STRVAR(truncate_doc,
1562 "truncate([size]) -> None. Truncate the file to at most size bytes.\n"
1563 "\n"
1564 "Size defaults to the current file position, as returned by tell().");
1565 #endif
1567 PyDoc_STRVAR(tell_doc,
1568 "tell() -> current file position, an integer (may be a long integer).");
1570 PyDoc_STRVAR(readinto_doc,
1571 "readinto() -> Undocumented. Don't use this; it may go away.");
1573 PyDoc_STRVAR(readlines_doc,
1574 "readlines([size]) -> list of strings, each a line from the file.\n"
1575 "\n"
1576 "Call readline() repeatedly and return a list of the lines so read.\n"
1577 "The optional size argument, if given, is an approximate bound on the\n"
1578 "total number of bytes in the lines returned.");
1580 PyDoc_STRVAR(xreadlines_doc,
1581 "xreadlines() -> returns self.\n"
1582 "\n"
1583 "For backward compatibility. File objects now include the performance\n"
1584 "optimizations previously implemented in the xreadlines module.");
1586 PyDoc_STRVAR(writelines_doc,
1587 "writelines(sequence_of_strings) -> None. Write the strings to the file.\n"
1588 "\n"
1589 "Note that newlines are not added. The sequence can be any iterable object\n"
1590 "producing strings. This is equivalent to calling write() for each string.");
1592 PyDoc_STRVAR(flush_doc,
1593 "flush() -> None. Flush the internal I/O buffer.");
1595 PyDoc_STRVAR(close_doc,
1596 "close() -> None or (perhaps) an integer. Close the file.\n"
1597 "\n"
1598 "Sets data attribute .closed to True. A closed file cannot be used for\n"
1599 "further I/O operations. close() may be called more than once without\n"
1600 "error. Some kinds of file objects (for example, opened by popen())\n"
1601 "may return an exit status upon closing.");
1603 PyDoc_STRVAR(isatty_doc,
1604 "isatty() -> true or false. True if the file is connected to a tty device.");
1606 static PyMethodDef file_methods[] = {
1607 {"readline", (PyCFunction)file_readline, METH_VARARGS, readline_doc},
1608 {"read", (PyCFunction)file_read, METH_VARARGS, read_doc},
1609 {"write", (PyCFunction)file_write, METH_VARARGS, write_doc},
1610 {"fileno", (PyCFunction)file_fileno, METH_NOARGS, fileno_doc},
1611 {"seek", (PyCFunction)file_seek, METH_VARARGS, seek_doc},
1612 #ifdef HAVE_FTRUNCATE
1613 {"truncate", (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
1614 #endif
1615 {"tell", (PyCFunction)file_tell, METH_NOARGS, tell_doc},
1616 {"readinto", (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
1617 {"readlines", (PyCFunction)file_readlines,METH_VARARGS, readlines_doc},
1618 {"xreadlines",(PyCFunction)file_getiter, METH_NOARGS, xreadlines_doc},
1619 {"writelines",(PyCFunction)file_writelines, METH_O, writelines_doc},
1620 {"flush", (PyCFunction)file_flush, METH_NOARGS, flush_doc},
1621 {"close", (PyCFunction)file_close, METH_NOARGS, close_doc},
1622 {"isatty", (PyCFunction)file_isatty, METH_NOARGS, isatty_doc},
1623 {NULL, NULL} /* sentinel */
1626 #define OFF(x) offsetof(PyFileObject, x)
1628 static PyMemberDef file_memberlist[] = {
1629 {"softspace", T_INT, OFF(f_softspace), 0,
1630 "flag indicating that a space needs to be printed; used by print"},
1631 {"mode", T_OBJECT, OFF(f_mode), RO,
1632 "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
1633 {"name", T_OBJECT, OFF(f_name), RO,
1634 "file name"},
1635 {"encoding", T_OBJECT, OFF(f_encoding), RO,
1636 "file encoding"},
1637 /* getattr(f, "closed") is implemented without this table */
1638 {NULL} /* Sentinel */
1641 static PyObject *
1642 get_closed(PyFileObject *f, void *closure)
1644 return PyBool_FromLong((long)(f->f_fp == 0));
1646 static PyObject *
1647 get_newlines(PyFileObject *f, void *closure)
1649 switch (f->f_newlinetypes) {
1650 case NEWLINE_UNKNOWN:
1651 Py_INCREF(Py_None);
1652 return Py_None;
1653 case NEWLINE_CR:
1654 return PyString_FromString("\r");
1655 case NEWLINE_LF:
1656 return PyString_FromString("\n");
1657 case NEWLINE_CR|NEWLINE_LF:
1658 return Py_BuildValue("(ss)", "\r", "\n");
1659 case NEWLINE_CRLF:
1660 return PyString_FromString("\r\n");
1661 case NEWLINE_CR|NEWLINE_CRLF:
1662 return Py_BuildValue("(ss)", "\r", "\r\n");
1663 case NEWLINE_LF|NEWLINE_CRLF:
1664 return Py_BuildValue("(ss)", "\n", "\r\n");
1665 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1666 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1667 default:
1668 PyErr_Format(PyExc_SystemError,
1669 "Unknown newlines value 0x%x\n",
1670 f->f_newlinetypes);
1671 return NULL;
1675 static PyGetSetDef file_getsetlist[] = {
1676 {"closed", (getter)get_closed, NULL, "True if the file is closed"},
1677 {"newlines", (getter)get_newlines, NULL,
1678 "end-of-line convention used in this file"},
1679 {0},
1682 static void
1683 drop_readahead(PyFileObject *f)
1685 if (f->f_buf != NULL) {
1686 PyMem_Free(f->f_buf);
1687 f->f_buf = NULL;
1691 /* Make sure that file has a readahead buffer with at least one byte
1692 (unless at EOF) and no more than bufsize. Returns negative value on
1693 error */
1694 static int
1695 readahead(PyFileObject *f, int bufsize)
1697 int chunksize;
1699 if (f->f_buf != NULL) {
1700 if( (f->f_bufend - f->f_bufptr) >= 1)
1701 return 0;
1702 else
1703 drop_readahead(f);
1705 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
1706 return -1;
1708 Py_BEGIN_ALLOW_THREADS
1709 errno = 0;
1710 chunksize = Py_UniversalNewlineFread(
1711 f->f_buf, bufsize, f->f_fp, (PyObject *)f);
1712 Py_END_ALLOW_THREADS
1713 if (chunksize == 0) {
1714 if (ferror(f->f_fp)) {
1715 PyErr_SetFromErrno(PyExc_IOError);
1716 clearerr(f->f_fp);
1717 drop_readahead(f);
1718 return -1;
1721 f->f_bufptr = f->f_buf;
1722 f->f_bufend = f->f_buf + chunksize;
1723 return 0;
1726 /* Used by file_iternext. The returned string will start with 'skip'
1727 uninitialized bytes followed by the remainder of the line. Don't be
1728 horrified by the recursive call: maximum recursion depth is limited by
1729 logarithmic buffer growth to about 50 even when reading a 1gb line. */
1731 static PyStringObject *
1732 readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
1734 PyStringObject* s;
1735 char *bufptr;
1736 char *buf;
1737 int len;
1739 if (f->f_buf == NULL)
1740 if (readahead(f, bufsize) < 0)
1741 return NULL;
1743 len = f->f_bufend - f->f_bufptr;
1744 if (len == 0)
1745 return (PyStringObject *)
1746 PyString_FromStringAndSize(NULL, skip);
1747 bufptr = memchr(f->f_bufptr, '\n', len);
1748 if (bufptr != NULL) {
1749 bufptr++; /* Count the '\n' */
1750 len = bufptr - f->f_bufptr;
1751 s = (PyStringObject *)
1752 PyString_FromStringAndSize(NULL, skip+len);
1753 if (s == NULL)
1754 return NULL;
1755 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
1756 f->f_bufptr = bufptr;
1757 if (bufptr == f->f_bufend)
1758 drop_readahead(f);
1759 } else {
1760 bufptr = f->f_bufptr;
1761 buf = f->f_buf;
1762 f->f_buf = NULL; /* Force new readahead buffer */
1763 s = readahead_get_line_skip(
1764 f, skip+len, bufsize + (bufsize>>2) );
1765 if (s == NULL) {
1766 PyMem_Free(buf);
1767 return NULL;
1769 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
1770 PyMem_Free(buf);
1772 return s;
1775 /* A larger buffer size may actually decrease performance. */
1776 #define READAHEAD_BUFSIZE 8192
1778 static PyObject *
1779 file_iternext(PyFileObject *f)
1781 PyStringObject* l;
1783 if (f->f_fp == NULL)
1784 return err_closed();
1786 l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
1787 if (l == NULL || PyString_GET_SIZE(l) == 0) {
1788 Py_XDECREF(l);
1789 return NULL;
1791 return (PyObject *)l;
1795 static PyObject *
1796 file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1798 PyObject *self;
1799 static PyObject *not_yet_string;
1801 assert(type != NULL && type->tp_alloc != NULL);
1803 if (not_yet_string == NULL) {
1804 not_yet_string = PyString_FromString("<uninitialized file>");
1805 if (not_yet_string == NULL)
1806 return NULL;
1809 self = type->tp_alloc(type, 0);
1810 if (self != NULL) {
1811 /* Always fill in the name and mode, so that nobody else
1812 needs to special-case NULLs there. */
1813 Py_INCREF(not_yet_string);
1814 ((PyFileObject *)self)->f_name = not_yet_string;
1815 Py_INCREF(not_yet_string);
1816 ((PyFileObject *)self)->f_mode = not_yet_string;
1817 Py_INCREF(Py_None);
1818 ((PyFileObject *)self)->f_encoding = Py_None;
1819 ((PyFileObject *)self)->weakreflist = NULL;
1821 return self;
1824 static int
1825 file_init(PyObject *self, PyObject *args, PyObject *kwds)
1827 PyFileObject *foself = (PyFileObject *)self;
1828 int ret = 0;
1829 static char *kwlist[] = {"name", "mode", "buffering", 0};
1830 char *name = NULL;
1831 char *mode = "r";
1832 int bufsize = -1;
1833 int wideargument = 0;
1835 assert(PyFile_Check(self));
1836 if (foself->f_fp != NULL) {
1837 /* Have to close the existing file first. */
1838 PyObject *closeresult = file_close(foself);
1839 if (closeresult == NULL)
1840 return -1;
1841 Py_DECREF(closeresult);
1844 #ifdef Py_WIN_WIDE_FILENAMES
1845 if (GetVersion() < 0x80000000) { /* On NT, so wide API available */
1846 PyObject *po;
1847 if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
1848 kwlist, &po, &mode, &bufsize)) {
1849 wideargument = 1;
1850 if (fill_file_fields(foself, NULL, po, mode,
1851 fclose) == NULL)
1852 goto Error;
1853 } else {
1854 /* Drop the argument parsing error as narrow
1855 strings are also valid. */
1856 PyErr_Clear();
1859 #endif
1861 if (!wideargument) {
1862 PyObject *o_name;
1864 if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
1865 Py_FileSystemDefaultEncoding,
1866 &name,
1867 &mode, &bufsize))
1868 return -1;
1870 /* We parse again to get the name as a PyObject */
1871 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:file", kwlist,
1872 &o_name, &mode, &bufsize))
1873 return -1;
1875 if (fill_file_fields(foself, NULL, o_name, mode,
1876 fclose) == NULL)
1877 goto Error;
1879 if (open_the_file(foself, name, mode) == NULL)
1880 goto Error;
1881 foself->f_setbuf = NULL;
1882 PyFile_SetBufSize(self, bufsize);
1883 goto Done;
1885 Error:
1886 ret = -1;
1887 /* fall through */
1888 Done:
1889 PyMem_Free(name); /* free the encoded string */
1890 return ret;
1893 PyDoc_VAR(file_doc) =
1894 PyDoc_STR(
1895 "file(name[, mode[, buffering]]) -> file object\n"
1896 "\n"
1897 "Open a file. The mode can be 'r', 'w' or 'a' for reading (default),\n"
1898 "writing or appending. The file will be created if it doesn't exist\n"
1899 "when opened for writing or appending; it will be truncated when\n"
1900 "opened for writing. Add a 'b' to the mode for binary files.\n"
1901 "Add a '+' to the mode to allow simultaneous reading and writing.\n"
1902 "If the buffering argument is given, 0 means unbuffered, 1 means line\n"
1903 "buffered, and larger numbers specify the buffer size.\n"
1905 PyDoc_STR(
1906 "Add a 'U' to mode to open the file for input with universal newline\n"
1907 "support. Any line ending in the input file will be seen as a '\\n'\n"
1908 "in Python. Also, a file so opened gains the attribute 'newlines';\n"
1909 "the value for this attribute is one of None (no newline read yet),\n"
1910 "'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
1911 "\n"
1912 "'U' cannot be combined with 'w' or '+' mode.\n"
1914 PyDoc_STR(
1915 "\n"
1916 "Note: open() is an alias for file()."
1919 PyTypeObject PyFile_Type = {
1920 PyObject_HEAD_INIT(&PyType_Type)
1922 "file",
1923 sizeof(PyFileObject),
1925 (destructor)file_dealloc, /* tp_dealloc */
1926 0, /* tp_print */
1927 0, /* tp_getattr */
1928 0, /* tp_setattr */
1929 0, /* tp_compare */
1930 (reprfunc)file_repr, /* tp_repr */
1931 0, /* tp_as_number */
1932 0, /* tp_as_sequence */
1933 0, /* tp_as_mapping */
1934 0, /* tp_hash */
1935 0, /* tp_call */
1936 0, /* tp_str */
1937 PyObject_GenericGetAttr, /* tp_getattro */
1938 /* softspace is writable: we must supply tp_setattro */
1939 PyObject_GenericSetAttr, /* tp_setattro */
1940 0, /* tp_as_buffer */
1941 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
1942 file_doc, /* tp_doc */
1943 0, /* tp_traverse */
1944 0, /* tp_clear */
1945 0, /* tp_richcompare */
1946 offsetof(PyFileObject, weakreflist), /* tp_weaklistoffset */
1947 (getiterfunc)file_getiter, /* tp_iter */
1948 (iternextfunc)file_iternext, /* tp_iternext */
1949 file_methods, /* tp_methods */
1950 file_memberlist, /* tp_members */
1951 file_getsetlist, /* tp_getset */
1952 0, /* tp_base */
1953 0, /* tp_dict */
1954 0, /* tp_descr_get */
1955 0, /* tp_descr_set */
1956 0, /* tp_dictoffset */
1957 (initproc)file_init, /* tp_init */
1958 PyType_GenericAlloc, /* tp_alloc */
1959 file_new, /* tp_new */
1960 PyObject_Del, /* tp_free */
1963 /* Interface for the 'soft space' between print items. */
1966 PyFile_SoftSpace(PyObject *f, int newflag)
1968 int oldflag = 0;
1969 if (f == NULL) {
1970 /* Do nothing */
1972 else if (PyFile_Check(f)) {
1973 oldflag = ((PyFileObject *)f)->f_softspace;
1974 ((PyFileObject *)f)->f_softspace = newflag;
1976 else {
1977 PyObject *v;
1978 v = PyObject_GetAttrString(f, "softspace");
1979 if (v == NULL)
1980 PyErr_Clear();
1981 else {
1982 if (PyInt_Check(v))
1983 oldflag = PyInt_AsLong(v);
1984 Py_DECREF(v);
1986 v = PyInt_FromLong((long)newflag);
1987 if (v == NULL)
1988 PyErr_Clear();
1989 else {
1990 if (PyObject_SetAttrString(f, "softspace", v) != 0)
1991 PyErr_Clear();
1992 Py_DECREF(v);
1995 return oldflag;
1998 /* Interfaces to write objects/strings to file-like objects */
2001 PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
2003 PyObject *writer, *value, *args, *result;
2004 if (f == NULL) {
2005 PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2006 return -1;
2008 else if (PyFile_Check(f)) {
2009 FILE *fp = PyFile_AsFile(f);
2010 #ifdef Py_USING_UNICODE
2011 PyObject *enc = ((PyFileObject*)f)->f_encoding;
2012 int result;
2013 #endif
2014 if (fp == NULL) {
2015 err_closed();
2016 return -1;
2018 #ifdef Py_USING_UNICODE
2019 if ((flags & Py_PRINT_RAW) &&
2020 PyUnicode_Check(v) && enc != Py_None) {
2021 char *cenc = PyString_AS_STRING(enc);
2022 value = PyUnicode_AsEncodedString(v, cenc, "strict");
2023 if (value == NULL)
2024 return -1;
2025 } else {
2026 value = v;
2027 Py_INCREF(value);
2029 result = PyObject_Print(value, fp, flags);
2030 Py_DECREF(value);
2031 return result;
2032 #else
2033 return PyObject_Print(v, fp, flags);
2034 #endif
2036 writer = PyObject_GetAttrString(f, "write");
2037 if (writer == NULL)
2038 return -1;
2039 if (flags & Py_PRINT_RAW) {
2040 if (PyUnicode_Check(v)) {
2041 value = v;
2042 Py_INCREF(value);
2043 } else
2044 value = PyObject_Str(v);
2046 else
2047 value = PyObject_Repr(v);
2048 if (value == NULL) {
2049 Py_DECREF(writer);
2050 return -1;
2052 args = PyTuple_Pack(1, value);
2053 if (args == NULL) {
2054 Py_DECREF(value);
2055 Py_DECREF(writer);
2056 return -1;
2058 result = PyEval_CallObject(writer, args);
2059 Py_DECREF(args);
2060 Py_DECREF(value);
2061 Py_DECREF(writer);
2062 if (result == NULL)
2063 return -1;
2064 Py_DECREF(result);
2065 return 0;
2069 PyFile_WriteString(const char *s, PyObject *f)
2071 if (f == NULL) {
2072 /* Should be caused by a pre-existing error */
2073 if (!PyErr_Occurred())
2074 PyErr_SetString(PyExc_SystemError,
2075 "null file for PyFile_WriteString");
2076 return -1;
2078 else if (PyFile_Check(f)) {
2079 FILE *fp = PyFile_AsFile(f);
2080 if (fp == NULL) {
2081 err_closed();
2082 return -1;
2084 fputs(s, fp);
2085 return 0;
2087 else if (!PyErr_Occurred()) {
2088 PyObject *v = PyString_FromString(s);
2089 int err;
2090 if (v == NULL)
2091 return -1;
2092 err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2093 Py_DECREF(v);
2094 return err;
2096 else
2097 return -1;
2100 /* Try to get a file-descriptor from a Python object. If the object
2101 is an integer or long integer, its value is returned. If not, the
2102 object's fileno() method is called if it exists; the method must return
2103 an integer or long integer, which is returned as the file descriptor value.
2104 -1 is returned on failure.
2107 int PyObject_AsFileDescriptor(PyObject *o)
2109 int fd;
2110 PyObject *meth;
2112 if (PyInt_Check(o)) {
2113 fd = PyInt_AsLong(o);
2115 else if (PyLong_Check(o)) {
2116 fd = PyLong_AsLong(o);
2118 else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2120 PyObject *fno = PyEval_CallObject(meth, NULL);
2121 Py_DECREF(meth);
2122 if (fno == NULL)
2123 return -1;
2125 if (PyInt_Check(fno)) {
2126 fd = PyInt_AsLong(fno);
2127 Py_DECREF(fno);
2129 else if (PyLong_Check(fno)) {
2130 fd = PyLong_AsLong(fno);
2131 Py_DECREF(fno);
2133 else {
2134 PyErr_SetString(PyExc_TypeError,
2135 "fileno() returned a non-integer");
2136 Py_DECREF(fno);
2137 return -1;
2140 else {
2141 PyErr_SetString(PyExc_TypeError,
2142 "argument must be an int, or have a fileno() method.");
2143 return -1;
2146 if (fd < 0) {
2147 PyErr_Format(PyExc_ValueError,
2148 "file descriptor cannot be a negative integer (%i)",
2149 fd);
2150 return -1;
2152 return fd;
2155 /* From here on we need access to the real fgets and fread */
2156 #undef fgets
2157 #undef fread
2160 ** Py_UniversalNewlineFgets is an fgets variation that understands
2161 ** all of \r, \n and \r\n conventions.
2162 ** The stream should be opened in binary mode.
2163 ** If fobj is NULL the routine always does newline conversion, and
2164 ** it may peek one char ahead to gobble the second char in \r\n.
2165 ** If fobj is non-NULL it must be a PyFileObject. In this case there
2166 ** is no readahead but in stead a flag is used to skip a following
2167 ** \n on the next read. Also, if the file is open in binary mode
2168 ** the whole conversion is skipped. Finally, the routine keeps track of
2169 ** the different types of newlines seen.
2170 ** Note that we need no error handling: fgets() treats error and eof
2171 ** identically.
2173 char *
2174 Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2176 char *p = buf;
2177 int c;
2178 int newlinetypes = 0;
2179 int skipnextlf = 0;
2180 int univ_newline = 1;
2182 if (fobj) {
2183 if (!PyFile_Check(fobj)) {
2184 errno = ENXIO; /* What can you do... */
2185 return NULL;
2187 univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2188 if ( !univ_newline )
2189 return fgets(buf, n, stream);
2190 newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2191 skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2193 FLOCKFILE(stream);
2194 c = 'x'; /* Shut up gcc warning */
2195 while (--n > 0 && (c = GETC(stream)) != EOF ) {
2196 if (skipnextlf ) {
2197 skipnextlf = 0;
2198 if (c == '\n') {
2199 /* Seeing a \n here with skipnextlf true
2200 ** means we saw a \r before.
2202 newlinetypes |= NEWLINE_CRLF;
2203 c = GETC(stream);
2204 if (c == EOF) break;
2205 } else {
2207 ** Note that c == EOF also brings us here,
2208 ** so we're okay if the last char in the file
2209 ** is a CR.
2211 newlinetypes |= NEWLINE_CR;
2214 if (c == '\r') {
2215 /* A \r is translated into a \n, and we skip
2216 ** an adjacent \n, if any. We don't set the
2217 ** newlinetypes flag until we've seen the next char.
2219 skipnextlf = 1;
2220 c = '\n';
2221 } else if ( c == '\n') {
2222 newlinetypes |= NEWLINE_LF;
2224 *p++ = c;
2225 if (c == '\n') break;
2227 if ( c == EOF && skipnextlf )
2228 newlinetypes |= NEWLINE_CR;
2229 FUNLOCKFILE(stream);
2230 *p = '\0';
2231 if (fobj) {
2232 ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2233 ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2234 } else if ( skipnextlf ) {
2235 /* If we have no file object we cannot save the
2236 ** skipnextlf flag. We have to readahead, which
2237 ** will cause a pause if we're reading from an
2238 ** interactive stream, but that is very unlikely
2239 ** unless we're doing something silly like
2240 ** execfile("/dev/tty").
2242 c = GETC(stream);
2243 if ( c != '\n' )
2244 ungetc(c, stream);
2246 if (p == buf)
2247 return NULL;
2248 return buf;
2252 ** Py_UniversalNewlineFread is an fread variation that understands
2253 ** all of \r, \n and \r\n conventions.
2254 ** The stream should be opened in binary mode.
2255 ** fobj must be a PyFileObject. In this case there
2256 ** is no readahead but in stead a flag is used to skip a following
2257 ** \n on the next read. Also, if the file is open in binary mode
2258 ** the whole conversion is skipped. Finally, the routine keeps track of
2259 ** the different types of newlines seen.
2261 size_t
2262 Py_UniversalNewlineFread(char *buf, size_t n,
2263 FILE *stream, PyObject *fobj)
2265 char *dst = buf;
2266 PyFileObject *f = (PyFileObject *)fobj;
2267 int newlinetypes, skipnextlf;
2269 assert(buf != NULL);
2270 assert(stream != NULL);
2272 if (!fobj || !PyFile_Check(fobj)) {
2273 errno = ENXIO; /* What can you do... */
2274 return 0;
2276 if (!f->f_univ_newline)
2277 return fread(buf, 1, n, stream);
2278 newlinetypes = f->f_newlinetypes;
2279 skipnextlf = f->f_skipnextlf;
2280 /* Invariant: n is the number of bytes remaining to be filled
2281 * in the buffer.
2283 while (n) {
2284 size_t nread;
2285 int shortread;
2286 char *src = dst;
2288 nread = fread(dst, 1, n, stream);
2289 assert(nread <= n);
2290 if (nread == 0)
2291 break;
2293 n -= nread; /* assuming 1 byte out for each in; will adjust */
2294 shortread = n != 0; /* true iff EOF or error */
2295 while (nread--) {
2296 char c = *src++;
2297 if (c == '\r') {
2298 /* Save as LF and set flag to skip next LF. */
2299 *dst++ = '\n';
2300 skipnextlf = 1;
2302 else if (skipnextlf && c == '\n') {
2303 /* Skip LF, and remember we saw CR LF. */
2304 skipnextlf = 0;
2305 newlinetypes |= NEWLINE_CRLF;
2306 ++n;
2308 else {
2309 /* Normal char to be stored in buffer. Also
2310 * update the newlinetypes flag if either this
2311 * is an LF or the previous char was a CR.
2313 if (c == '\n')
2314 newlinetypes |= NEWLINE_LF;
2315 else if (skipnextlf)
2316 newlinetypes |= NEWLINE_CR;
2317 *dst++ = c;
2318 skipnextlf = 0;
2321 if (shortread) {
2322 /* If this is EOF, update type flags. */
2323 if (skipnextlf && feof(stream))
2324 newlinetypes |= NEWLINE_CR;
2325 break;
2328 f->f_newlinetypes = newlinetypes;
2329 f->f_skipnextlf = skipnextlf;
2330 return dst - buf;