1 """ path.py - An object representing a path to a file or directory.
6 d = path('/home/guido/bin')
7 for f in d.files('*.py'):
10 This module requires Python 2.2 or later.
13 URL: http://www.jorendorff.com/articles/python/path
14 Author: Jason Orendorff <jason.orendorff\x40gmail\x2ecom> (and others - see the url!)
20 # - Tree-walking functions don't avoid symlink loops. Matt Harrison sent me a patch for this.
21 # - Tree-walking functions can't ignore errors. Matt Harrison asked for this.
23 # - Two people asked for path.chdir(). This just seems wrong to me,
24 # I dunno. chdir() is moderately evil anyway.
26 # - Bug in write_text(). It doesn't support Universal newline mode.
27 # - Better error message in listdir() when self isn't a
28 # directory. (On Windows, the error message really sucks.)
29 # - Make sure everything has a good docstring.
30 # - Add methods for regex find and replace.
31 # - guess_content_type() method?
32 # - Perhaps support arguments to touch().
33 # - Could add split() and join() methods that generate warnings.
35 from __future__
import generators
37 import sys
, warnings
, os
, fnmatch
, glob
, shutil
, codecs
, md5
42 # Platform-specific support for path.owner
54 # Pre-2.3 support. Are unicode filenames supported?
58 if os
.path
.supports_unicode_filenames
:
61 except AttributeError:
64 # Pre-2.3 workaround for booleans
70 # Pre-2.3 workaround for basestring.
74 basestring
= (str, unicode)
76 # Universal newline support
78 if hasattr(file, 'newlines'):
82 class TreeWalkWarning(Warning):
86 """ Represents a filesystem path.
88 For documentation on individual methods, consult their
89 counterparts in os.path.
92 # --- Special Python methods.
95 return 'path(%s)' % _base
.__repr
__(self
)
97 # Adding a path and a string yields a path.
98 def __add__(self
, more
):
100 resultStr
= _base
.__add
__(self
, more
)
101 except TypeError: #Python bug
102 resultStr
= NotImplemented
103 if resultStr
is NotImplemented:
105 return self
.__class
__(resultStr
)
107 def __radd__(self
, other
):
108 if isinstance(other
, basestring
):
109 return self
.__class
__(other
.__add
__(self
))
111 return NotImplemented
113 # The / operator joins paths.
114 def __div__(self
, rel
):
115 """ fp.__div__(rel) == fp / rel == fp.joinpath(rel)
117 Join two path components, adding a separator character if
120 return self
.__class
__(os
.path
.join(self
, rel
))
122 # Make the / operator work even when true division is enabled.
123 __truediv__
= __div__
126 """ Return the current working directory as a path object. """
127 return cls(_getcwd())
128 getcwd
= classmethod(getcwd
)
131 # --- Operations on path strings.
133 isabs
= os
.path
.isabs
134 def abspath(self
): return self
.__class
__(os
.path
.abspath(self
))
135 def normcase(self
): return self
.__class
__(os
.path
.normcase(self
))
136 def normpath(self
): return self
.__class
__(os
.path
.normpath(self
))
137 def realpath(self
): return self
.__class
__(os
.path
.realpath(self
))
138 def expanduser(self
): return self
.__class
__(os
.path
.expanduser(self
))
139 def expandvars(self
): return self
.__class
__(os
.path
.expandvars(self
))
140 def dirname(self
): return self
.__class
__(os
.path
.dirname(self
))
141 basename
= os
.path
.basename
144 """ Clean up a filename by calling expandvars(),
145 expanduser(), and normpath() on it.
147 This is commonly everything needed to clean up a filename
148 read from a configuration file, for example.
150 return self
.expandvars().expanduser().normpath()
152 def _get_namebase(self
):
153 base
, ext
= os
.path
.splitext(self
.name
)
157 f
, ext
= os
.path
.splitext(_base(self
))
160 def _get_drive(self
):
161 drive
, r
= os
.path
.splitdrive(self
)
162 return self
.__class
__(drive
)
166 """ This path's parent directory, as a new path object.
168 For example, path('/usr/local/lib/libpython.so').parent == path('/usr/local/lib')
172 basename
, None, None,
173 """ The name of this file or directory without the full path.
175 For example, path('/usr/local/lib/libpython.so').name == 'libpython.so'
179 _get_namebase
, None, None,
180 """ The same as path.name, but with one file extension stripped off.
182 For example, path('/home/guido/python.tar.gz').name == 'python.tar.gz',
183 but path('/home/guido/python.tar.gz').namebase == 'python.tar'
187 _get_ext
, None, None,
188 """ The file extension, for example '.py'. """)
191 _get_drive
, None, None,
192 """ The drive specifier, for example 'C:'.
193 This is always empty on systems that don't use drive specifiers.
197 """ p.splitpath() -> Return (p.parent, p.name). """
198 parent
, child
= os
.path
.split(self
)
199 return self
.__class
__(parent
), child
201 def splitdrive(self
):
202 """ p.splitdrive() -> Return (p.drive, <the rest of p>).
204 Split the drive specifier from this path. If there is
205 no drive specifier, p.drive is empty, so the return value
206 is simply (path(''), p). This is always the case on Unix.
208 drive
, rel
= os
.path
.splitdrive(self
)
209 return self
.__class
__(drive
), rel
212 """ p.splitext() -> Return (p.stripext(), p.ext).
214 Split the filename extension from this path and return
215 the two parts. Either part may be empty.
217 The extension is everything from '.' to the end of the
218 last path segment. This has the property that if
219 (a, b) == p.splitext(), then a + b == p.
221 filename
, ext
= os
.path
.splitext(self
)
222 return self
.__class
__(filename
), ext
225 """ p.stripext() -> Remove one file extension from the path.
227 For example, path('/home/guido/python.tar.gz').stripext()
228 returns path('/home/guido/python.tar').
230 return self
.splitext()[0]
232 if hasattr(os
.path
, 'splitunc'):
234 unc
, rest
= os
.path
.splitunc(self
)
235 return self
.__class
__(unc
), rest
237 def _get_uncshare(self
):
238 unc
, r
= os
.path
.splitunc(self
)
239 return self
.__class
__(unc
)
242 _get_uncshare
, None, None,
243 """ The UNC mount point for this path.
244 This is empty for paths on local drives. """)
246 def joinpath(self
, *args
):
247 """ Join two or more path components, adding a separator
248 character (os.sep) if needed. Returns a new path
251 return self
.__class
__(os
.path
.join(self
, *args
))
254 r
""" Return a list of the path components in this path.
256 The first item in the list will be a path. Its value will be
257 either os.curdir, os.pardir, empty, or the root directory of
258 this path (for example, '/' or 'C:\\'). The other items in
259 the list will be strings.
261 path.path.joinpath(*result) will yield the original path.
265 while loc
!= os
.curdir
and loc
!= os
.pardir
:
267 loc
, child
= prev
.splitpath()
276 """ Return this path as a relative path,
277 based from the current working directory.
279 cwd
= self
.__class
__(os
.getcwd())
280 return cwd
.relpathto(self
)
282 def relpathto(self
, dest
):
283 """ Return a relative path from self to dest.
285 If there is no relative path from self to dest, for example if
286 they reside on different drives in Windows, then this returns
289 origin
= self
.abspath()
290 dest
= self
.__class
__(dest
).abspath()
292 orig_list
= origin
.normcase().splitall()
293 # Don't normcase dest! We want to preserve the case.
294 dest_list
= dest
.splitall()
296 if orig_list
[0] != os
.path
.normcase(dest_list
[0]):
297 # Can't get here from there.
300 # Find the location where the two paths start to differ.
302 for start_seg
, dest_seg
in zip(orig_list
, dest_list
):
303 if start_seg
!= os
.path
.normcase(dest_seg
):
307 # Now i is the point where the two paths diverge.
308 # Need a certain number of "os.pardir"s to work up
309 # from the origin to the point of divergence.
310 segments
= [os
.pardir
] * (len(orig_list
) - i
)
311 # Need to add the diverging part of dest_list.
312 segments
+= dest_list
[i
:]
313 if len(segments
) == 0:
314 # If they happen to be identical, use os.curdir.
317 relpath
= os
.path
.join(*segments
)
318 return self
.__class
__(relpath
)
320 # --- Listing, searching, walking, and matching
322 def listdir(self
, pattern
=None):
323 """ D.listdir() -> List of items in this directory.
325 Use D.files() or D.dirs() instead if you want a listing
326 of just files or just subdirectories.
328 The elements of the list are path objects.
330 With the optional 'pattern' argument, this only lists
331 items whose names match the given pattern.
333 names
= os
.listdir(self
)
334 if pattern
is not None:
335 names
= fnmatch
.filter(names
, pattern
)
336 return [self
/ child
for child
in names
]
338 def dirs(self
, pattern
=None):
339 """ D.dirs() -> List of this directory's subdirectories.
341 The elements of the list are path objects.
342 This does not walk recursively into subdirectories
343 (but see path.walkdirs).
345 With the optional 'pattern' argument, this only lists
346 directories whose names match the given pattern. For
347 example, d.dirs('build-*').
349 return [p
for p
in self
.listdir(pattern
) if p
.isdir()]
351 def files(self
, pattern
=None):
352 """ D.files() -> List of the files in this directory.
354 The elements of the list are path objects.
355 This does not walk into subdirectories (see path.walkfiles).
357 With the optional 'pattern' argument, this only lists files
358 whose names match the given pattern. For example,
362 return [p
for p
in self
.listdir(pattern
) if p
.isfile()]
364 def walk(self
, pattern
=None, errors
='strict'):
365 """ D.walk() -> iterator over files and subdirs, recursively.
367 The iterator yields path objects naming each child item of
368 this directory and its descendants. This requires that
371 This performs a depth-first traversal of the directory tree.
372 Each directory is returned just before all its children.
374 The errors= keyword argument controls behavior when an
375 error occurs. The default is 'strict', which causes an
376 exception. The other allowed values are 'warn', which
377 reports the error via warnings.warn(), and 'ignore'.
379 if errors
not in ('strict', 'warn', 'ignore'):
380 raise ValueError("invalid errors parameter")
383 childList
= self
.listdir()
385 if errors
== 'ignore':
387 elif errors
== 'warn':
389 "Unable to list directory '%s': %s"
390 % (self
, sys
.exc_info()[1]),
395 for child
in childList
:
396 if pattern
is None or child
.fnmatch(pattern
):
399 isdir
= child
.isdir()
401 if errors
== 'ignore':
403 elif errors
== 'warn':
405 "Unable to access '%s': %s"
406 % (child
, sys
.exc_info()[1]),
413 for item
in child
.walk(pattern
, errors
):
416 def walkdirs(self
, pattern
=None, errors
='strict'):
417 """ D.walkdirs() -> iterator over subdirs, recursively.
419 With the optional 'pattern' argument, this yields only
420 directories whose names match the given pattern. For
421 example, mydir.walkdirs('*test') yields only directories
422 with names ending in 'test'.
424 The errors= keyword argument controls behavior when an
425 error occurs. The default is 'strict', which causes an
426 exception. The other allowed values are 'warn', which
427 reports the error via warnings.warn(), and 'ignore'.
429 if errors
not in ('strict', 'warn', 'ignore'):
430 raise ValueError("invalid errors parameter")
435 if errors
== 'ignore':
437 elif errors
== 'warn':
439 "Unable to list directory '%s': %s"
440 % (self
, sys
.exc_info()[1]),
446 if pattern
is None or child
.fnmatch(pattern
):
448 for subsubdir
in child
.walkdirs(pattern
, errors
):
451 def walkfiles(self
, pattern
=None, errors
='strict'):
452 """ D.walkfiles() -> iterator over files in D, recursively.
454 The optional argument, pattern, limits the results to files
455 with names that match the pattern. For example,
456 mydir.walkfiles('*.tmp') yields only files with the .tmp
459 if errors
not in ('strict', 'warn', 'ignore'):
460 raise ValueError("invalid errors parameter")
463 childList
= self
.listdir()
465 if errors
== 'ignore':
467 elif errors
== 'warn':
469 "Unable to list directory '%s': %s"
470 % (self
, sys
.exc_info()[1]),
475 for child
in childList
:
477 isfile
= child
.isfile()
478 isdir
= not isfile
and child
.isdir()
480 if errors
== 'ignore':
482 elif errors
== 'warn':
484 "Unable to access '%s': %s"
485 % (self
, sys
.exc_info()[1]),
491 if pattern
is None or child
.fnmatch(pattern
):
494 for f
in child
.walkfiles(pattern
, errors
):
497 def fnmatch(self
, pattern
):
498 """ Return True if self.name matches the given pattern.
500 pattern - A filename pattern with wildcards,
503 return fnmatch
.fnmatch(self
.name
, pattern
)
505 def glob(self
, pattern
):
506 """ Return a list of path objects that match the pattern.
508 pattern - a path relative to this directory, with wildcards.
510 For example, path('/users').glob('*/bin/*') returns a list
511 of all the files users have in their bin directories.
514 return [cls(s
) for s
in glob
.glob(_base(self
/ pattern
))]
517 # --- Reading or writing an entire file at once.
519 def open(self
, mode
='r'):
520 """ Open this file. Return a file object. """
521 return file(self
, mode
)
524 """ Open this file, read all bytes, return them as a string. """
531 def write_bytes(self
, bytes
, append
=False):
532 """ Open this file and write the given bytes to it.
534 Default behavior is to overwrite any existing file.
535 Call p.write_bytes(bytes, append=True) to append instead.
547 def text(self
, encoding
=None, errors
='strict'):
548 r
""" Open this file, read it in, return the content as a string.
550 This uses 'U' mode in Python 2.3 and later, so '\r\n' and '\r'
551 are automatically translated to '\n'.
555 encoding - The Unicode encoding (or character set) of
556 the file. If present, the content of the file is
557 decoded and returned as a unicode object; otherwise
558 it is returned as an 8-bit str.
559 errors - How to handle Unicode errors; see help(str.decode)
560 for the options. Default is 'strict'.
564 f
= self
.open(_textmode
)
571 f
= codecs
.open(self
, 'r', encoding
, errors
)
572 # (Note - Can't use 'U' mode here, since codecs.open
573 # doesn't support 'U' mode, even in Python 2.3.)
578 return (t
.replace(u
'\r\n', u
'\n')
579 .replace(u
'\r\x85', u
'\n')
580 .replace(u
'\r', u
'\n')
581 .replace(u
'\x85', u
'\n')
582 .replace(u
'\u2028', u
'\n'))
584 def write_text(self
, text
, encoding
=None, errors
='strict', linesep
=os
.linesep
, append
=False):
585 r
""" Write the given text to this file.
587 The default behavior is to overwrite any existing file;
588 to append instead, use the 'append=True' keyword argument.
590 There are two differences between path.write_text() and
591 path.write_bytes(): newline handling and Unicode handling.
596 - text - str/unicode - The text to be written.
598 - encoding - str - The Unicode encoding that will be used.
599 This is ignored if 'text' isn't a Unicode string.
601 - errors - str - How to handle Unicode encoding errors.
602 Default is 'strict'. See help(unicode.encode) for the
603 options. This is ignored if 'text' isn't a Unicode
606 - linesep - keyword argument - str/unicode - The sequence of
607 characters to be used to mark end-of-line. The default is
608 os.linesep. You can also specify None; this means to
609 leave all newlines as they are in 'text'.
611 - append - keyword argument - bool - Specifies what to do if
612 the file already exists (True: append to the end of it;
613 False: overwrite it.) The default is False.
616 --- Newline handling.
618 write_text() converts all standard end-of-line sequences
619 ('\n', '\r', and '\r\n') to your platform's default end-of-line
620 sequence (see os.linesep; on Windows, for example, the
621 end-of-line marker is '\r\n').
623 If you don't like your platform's default, you can override it
624 using the 'linesep=' keyword argument. If you specifically want
625 write_text() to preserve the newlines as-is, use 'linesep=None'.
627 This applies to Unicode text the same as to 8-bit text, except
628 there are three additional standard Unicode end-of-line sequences:
629 u'\x85', u'\r\x85', and u'\u2028'.
631 (This is slightly different from when you open a file for
632 writing with fopen(filename, "w") in C or file(filename, 'w')
638 If 'text' isn't Unicode, then apart from newline handling, the
639 bytes are written verbatim to the file. The 'encoding' and
640 'errors' arguments are not used and must be omitted.
642 If 'text' is Unicode, it is first converted to bytes using the
643 specified 'encoding' (or the default encoding if 'encoding'
644 isn't specified). The 'errors' argument applies only to this
648 if isinstance(text
, unicode):
649 if linesep
is not None:
650 # Convert all standard end-of-line sequences to
651 # ordinary newline characters.
652 text
= (text
.replace(u
'\r\n', u
'\n')
653 .replace(u
'\r\x85', u
'\n')
654 .replace(u
'\r', u
'\n')
655 .replace(u
'\x85', u
'\n')
656 .replace(u
'\u2028', u
'\n'))
657 text
= text
.replace(u
'\n', linesep
)
659 encoding
= sys
.getdefaultencoding()
660 bytes
= text
.encode(encoding
, errors
)
662 # It is an error to specify an encoding if 'text' is
664 assert encoding
is None
666 if linesep
is not None:
667 text
= (text
.replace('\r\n', '\n')
668 .replace('\r', '\n'))
669 bytes
= text
.replace('\n', linesep
)
671 self
.write_bytes(bytes
, append
)
673 def lines(self
, encoding
=None, errors
='strict', retain
=True):
674 r
""" Open this file, read all lines, return them in a list.
677 encoding - The Unicode encoding (or character set) of
678 the file. The default is None, meaning the content
679 of the file is read as 8-bit characters and returned
680 as a list of (non-Unicode) str objects.
681 errors - How to handle Unicode errors; see help(str.decode)
682 for the options. Default is 'strict'
683 retain - If true, retain newline characters; but all newline
684 character combinations ('\r', '\n', '\r\n') are
685 translated to '\n'. If false, newline characters are
686 stripped off. Default is True.
688 This uses 'U' mode in Python 2.3 and later.
690 if encoding
is None and retain
:
691 f
= self
.open(_textmode
)
697 return self
.text(encoding
, errors
).splitlines(retain
)
699 def write_lines(self
, lines
, encoding
=None, errors
='strict',
700 linesep
=os
.linesep
, append
=False):
701 r
""" Write the given lines of text to this file.
703 By default this overwrites any existing file at this path.
705 This puts a platform-specific newline sequence on every line.
708 lines - A list of strings.
710 encoding - A Unicode encoding to use. This applies only if
711 'lines' contains any Unicode strings.
713 errors - How to handle errors in Unicode encoding. This
714 also applies only to Unicode strings.
716 linesep - The desired line-ending. This line-ending is
717 applied to every line. If a line already has any
718 standard line ending ('\r', '\n', '\r\n', u'\x85',
719 u'\r\x85', u'\u2028'), that will be stripped off and
720 this will be used instead. The default is os.linesep,
721 which is platform-dependent ('\r\n' on Windows, '\n' on
722 Unix, etc.) Specify None to write the lines as-is,
723 like file.writelines().
725 Use the keyword argument append=True to append lines to the
726 file. The default is to overwrite the file. Warning:
727 When you use this with Unicode data, if the encoding of the
728 existing data in the file is different from the encoding
729 you specify with the encoding= parameter, the result is
730 mixed-encoding data, which can really confuse someone trying
731 to read the file later.
740 isUnicode
= isinstance(line
, unicode)
741 if linesep
is not None:
742 # Strip off any existing line-end and add the
743 # specified linesep string.
745 if line
[-2:] in (u
'\r\n', u
'\x0d\x85'):
747 elif line
[-1:] in (u
'\r', u
'\n',
751 if line
[-2:] == '\r\n':
753 elif line
[-1:] in ('\r', '\n'):
758 encoding
= sys
.getdefaultencoding()
759 line
= line
.encode(encoding
, errors
)
765 """ Calculate the md5 hash for this file.
767 This reads through the entire file.
781 # --- Methods for querying the filesystem.
783 exists
= os
.path
.exists
784 isdir
= os
.path
.isdir
785 isfile
= os
.path
.isfile
786 islink
= os
.path
.islink
787 ismount
= os
.path
.ismount
789 if hasattr(os
.path
, 'samefile'):
790 samefile
= os
.path
.samefile
792 getatime
= os
.path
.getatime
794 getatime
, None, None,
795 """ Last access time of the file. """)
797 getmtime
= os
.path
.getmtime
799 getmtime
, None, None,
800 """ Last-modified time of the file. """)
802 if hasattr(os
.path
, 'getctime'):
803 getctime
= os
.path
.getctime
805 getctime
, None, None,
806 """ Creation time of the file. """)
808 getsize
= os
.path
.getsize
811 """ Size of the file, in bytes. """)
813 if hasattr(os
, 'access'):
814 def access(self
, mode
):
815 """ Return true if current user has access to this path.
817 mode - One of the constants os.F_OK, os.R_OK, os.W_OK, os.X_OK
819 return os
.access(self
, mode
)
822 """ Perform a stat() system call on this path. """
826 """ Like path.stat(), but do not follow symbolic links. """
827 return os
.lstat(self
)
830 r
""" Return the name of the owner of this file or directory.
832 This follows symbolic links.
834 On Windows, this returns a name of the form ur'DOMAIN\User Name'.
835 On Windows, a group can own a file or directory.
838 if win32security
is None:
839 raise Exception("path.owner requires win32all to be installed")
840 desc
= win32security
.GetFileSecurity(
841 self
, win32security
.OWNER_SECURITY_INFORMATION
)
842 sid
= desc
.GetSecurityDescriptorOwner()
843 account
, domain
, typecode
= win32security
.LookupAccountSid(None, sid
)
844 return domain
+ u
'\\' + account
847 raise NotImplementedError("path.owner is not implemented on this platform.")
849 return pwd
.getpwuid(st
.st_uid
).pw_name
852 get_owner
, None, None,
853 """ Name of the owner of this file or directory. """)
855 if hasattr(os
, 'statvfs'):
857 """ Perform a statvfs() system call on this path. """
858 return os
.statvfs(self
)
860 if hasattr(os
, 'pathconf'):
861 def pathconf(self
, name
):
862 return os
.pathconf(self
, name
)
865 # --- Modifying operations on files and directories
867 def utime(self
, times
):
868 """ Set the access and modified times of this file. """
869 os
.utime(self
, times
)
871 def chmod(self
, mode
):
874 if hasattr(os
, 'chown'):
875 def chown(self
, uid
, gid
):
876 os
.chown(self
, uid
, gid
)
878 def rename(self
, new
):
881 def renames(self
, new
):
882 os
.renames(self
, new
)
885 # --- Create/delete operations on directories
887 def mkdir(self
, mode
=0777):
890 def makedirs(self
, mode
=0777):
891 os
.makedirs(self
, mode
)
896 def removedirs(self
):
900 # --- Modifying operations on files
903 """ Set the access/modified times of this file to the current time.
904 Create the file if it does not exist.
906 fd
= os
.open(self
, os
.O_WRONLY | os
.O_CREAT
, 0666)
919 if hasattr(os
, 'link'):
920 def link(self
, newpath
):
921 """ Create a hard link at 'newpath', pointing to this file. """
922 os
.link(self
, newpath
)
924 if hasattr(os
, 'symlink'):
925 def symlink(self
, newlink
):
926 """ Create a symbolic link at 'newlink', pointing here. """
927 os
.symlink(self
, newlink
)
929 if hasattr(os
, 'readlink'):
931 """ Return the path to which this symbolic link points.
933 The result may be an absolute or a relative path.
935 return self
.__class
__(os
.readlink(self
))
937 def readlinkabs(self
):
938 """ Return the path to which this symbolic link points.
940 The result is always an absolute path.
946 return (self
.parent
/ p
).abspath()
949 # --- High-level functions from shutil
951 copyfile
= shutil
.copyfile
952 copymode
= shutil
.copymode
953 copystat
= shutil
.copystat
956 copytree
= shutil
.copytree
957 if hasattr(shutil
, 'move'):
959 rmtree
= shutil
.rmtree
962 # --- Special stuff from os
964 if hasattr(os
, 'chroot'):
968 if hasattr(os
, 'startfile'):