1 """ path.py - An object representing a path to a file or directory.
6 d = path('/home/guido/bin')
7 for f in d.files('*.py'):
10 This module requires Python 2.2 or later.
13 URL: http://www.jorendorff.com/articles/python/path
14 Author: Jason Orendorff <jason.orendorff\x40gmail\x2ecom> (and others - see the url!)
20 # - Tree-walking functions don't avoid symlink loops. Matt Harrison
21 # sent me a patch for this.
22 # - Bug in write_text(). It doesn't support Universal newline mode.
23 # - Better error message in listdir() when self isn't a
24 # directory. (On Windows, the error message really sucks.)
25 # - Make sure everything has a good docstring.
26 # - Add methods for regex find and replace.
27 # - guess_content_type() method?
28 # - Perhaps support arguments to touch().
30 from __future__
import generators
32 import sys
, warnings
, os
, fnmatch
, glob
, shutil
, codecs
, md5
37 # Platform-specific support for path.owner
49 # Pre-2.3 support. Are unicode filenames supported?
53 if os
.path
.supports_unicode_filenames
:
56 except AttributeError:
59 # Pre-2.3 workaround for booleans
65 # Pre-2.3 workaround for basestring.
69 basestring
= (str, unicode)
71 # Universal newline support
73 if hasattr(file, 'newlines'):
77 class TreeWalkWarning(Warning):
81 """ Represents a filesystem path.
83 For documentation on individual methods, consult their
84 counterparts in os.path.
87 # --- Special Python methods.
90 return 'path(%s)' % _base
.__repr
__(self
)
92 # Adding a path and a string yields a path.
93 def __add__(self
, more
):
95 resultStr
= _base
.__add
__(self
, more
)
96 except TypeError: #Python bug
97 resultStr
= NotImplemented
98 if resultStr
is NotImplemented:
100 return self
.__class
__(resultStr
)
102 def __radd__(self
, other
):
103 if isinstance(other
, basestring
):
104 return self
.__class
__(other
.__add
__(self
))
106 return NotImplemented
108 # The / operator joins paths.
109 def __div__(self
, rel
):
110 """ fp.__div__(rel) == fp / rel == fp.joinpath(rel)
112 Join two path components, adding a separator character if
115 return self
.__class
__(os
.path
.join(self
, rel
))
117 # Make the / operator work even when true division is enabled.
118 __truediv__
= __div__
121 """ Return the current working directory as a path object. """
122 return cls(_getcwd())
123 getcwd
= classmethod(getcwd
)
126 # --- Operations on path strings.
128 isabs
= os
.path
.isabs
129 def abspath(self
): return self
.__class
__(os
.path
.abspath(self
))
130 def normcase(self
): return self
.__class
__(os
.path
.normcase(self
))
131 def normpath(self
): return self
.__class
__(os
.path
.normpath(self
))
132 def realpath(self
): return self
.__class
__(os
.path
.realpath(self
))
133 def expanduser(self
): return self
.__class
__(os
.path
.expanduser(self
))
134 def expandvars(self
): return self
.__class
__(os
.path
.expandvars(self
))
135 def dirname(self
): return self
.__class
__(os
.path
.dirname(self
))
136 basename
= os
.path
.basename
139 """ Clean up a filename by calling expandvars(),
140 expanduser(), and normpath() on it.
142 This is commonly everything needed to clean up a filename
143 read from a configuration file, for example.
145 return self
.expandvars().expanduser().normpath()
147 def _get_namebase(self
):
148 base
, ext
= os
.path
.splitext(self
.name
)
152 f
, ext
= os
.path
.splitext(_base(self
))
155 def _get_drive(self
):
156 drive
, r
= os
.path
.splitdrive(self
)
157 return self
.__class
__(drive
)
161 """ This path's parent directory, as a new path object.
163 For example, path('/usr/local/lib/libpython.so').parent == path('/usr/local/lib')
167 basename
, None, None,
168 """ The name of this file or directory without the full path.
170 For example, path('/usr/local/lib/libpython.so').name == 'libpython.so'
174 _get_namebase
, None, None,
175 """ The same as path.name, but with one file extension stripped off.
177 For example, path('/home/guido/python.tar.gz').name == 'python.tar.gz',
178 but path('/home/guido/python.tar.gz').namebase == 'python.tar'
182 _get_ext
, None, None,
183 """ The file extension, for example '.py'. """)
186 _get_drive
, None, None,
187 """ The drive specifier, for example 'C:'.
188 This is always empty on systems that don't use drive specifiers.
192 """ p.splitpath() -> Return (p.parent, p.name). """
193 parent
, child
= os
.path
.split(self
)
194 return self
.__class
__(parent
), child
196 def splitdrive(self
):
197 """ p.splitdrive() -> Return (p.drive, <the rest of p>).
199 Split the drive specifier from this path. If there is
200 no drive specifier, p.drive is empty, so the return value
201 is simply (path(''), p). This is always the case on Unix.
203 drive
, rel
= os
.path
.splitdrive(self
)
204 return self
.__class
__(drive
), rel
207 """ p.splitext() -> Return (p.stripext(), p.ext).
209 Split the filename extension from this path and return
210 the two parts. Either part may be empty.
212 The extension is everything from '.' to the end of the
213 last path segment. This has the property that if
214 (a, b) == p.splitext(), then a + b == p.
216 filename
, ext
= os
.path
.splitext(self
)
217 return self
.__class
__(filename
), ext
220 """ p.stripext() -> Remove one file extension from the path.
222 For example, path('/home/guido/python.tar.gz').stripext()
223 returns path('/home/guido/python.tar').
225 return self
.splitext()[0]
227 if hasattr(os
.path
, 'splitunc'):
229 unc
, rest
= os
.path
.splitunc(self
)
230 return self
.__class
__(unc
), rest
232 def _get_uncshare(self
):
233 unc
, r
= os
.path
.splitunc(self
)
234 return self
.__class
__(unc
)
237 _get_uncshare
, None, None,
238 """ The UNC mount point for this path.
239 This is empty for paths on local drives. """)
241 def joinpath(self
, *args
):
242 """ Join two or more path components, adding a separator
243 character (os.sep) if needed. Returns a new path
246 return self
.__class
__(os
.path
.join(self
, *args
))
249 r
""" Return a list of the path components in this path.
251 The first item in the list will be a path. Its value will be
252 either os.curdir, os.pardir, empty, or the root directory of
253 this path (for example, '/' or 'C:\\'). The other items in
254 the list will be strings.
256 path.path.joinpath(*result) will yield the original path.
260 while loc
!= os
.curdir
and loc
!= os
.pardir
:
262 loc
, child
= prev
.splitpath()
271 """ Return this path as a relative path,
272 based from the current working directory.
274 cwd
= self
.__class
__(os
.getcwd())
275 return cwd
.relpathto(self
)
277 def relpathto(self
, dest
):
278 """ Return a relative path from self to dest.
280 If there is no relative path from self to dest, for example if
281 they reside on different drives in Windows, then this returns
284 origin
= self
.abspath()
285 dest
= self
.__class
__(dest
).abspath()
287 orig_list
= origin
.normcase().splitall()
288 # Don't normcase dest! We want to preserve the case.
289 dest_list
= dest
.splitall()
291 if orig_list
[0] != os
.path
.normcase(dest_list
[0]):
292 # Can't get here from there.
295 # Find the location where the two paths start to differ.
297 for start_seg
, dest_seg
in zip(orig_list
, dest_list
):
298 if start_seg
!= os
.path
.normcase(dest_seg
):
302 # Now i is the point where the two paths diverge.
303 # Need a certain number of "os.pardir"s to work up
304 # from the origin to the point of divergence.
305 segments
= [os
.pardir
] * (len(orig_list
) - i
)
306 # Need to add the diverging part of dest_list.
307 segments
+= dest_list
[i
:]
308 if len(segments
) == 0:
309 # If they happen to be identical, use os.curdir.
312 relpath
= os
.path
.join(*segments
)
313 return self
.__class
__(relpath
)
315 # --- Listing, searching, walking, and matching
317 def listdir(self
, pattern
=None):
318 """ D.listdir() -> List of items in this directory.
320 Use D.files() or D.dirs() instead if you want a listing
321 of just files or just subdirectories.
323 The elements of the list are path objects.
325 With the optional 'pattern' argument, this only lists
326 items whose names match the given pattern.
328 names
= os
.listdir(self
)
329 if pattern
is not None:
330 names
= fnmatch
.filter(names
, pattern
)
331 return [self
/ child
for child
in names
]
333 def dirs(self
, pattern
=None):
334 """ D.dirs() -> List of this directory's subdirectories.
336 The elements of the list are path objects.
337 This does not walk recursively into subdirectories
338 (but see path.walkdirs).
340 With the optional 'pattern' argument, this only lists
341 directories whose names match the given pattern. For
342 example, d.dirs('build-*').
344 return [p
for p
in self
.listdir(pattern
) if p
.isdir()]
346 def files(self
, pattern
=None):
347 """ D.files() -> List of the files in this directory.
349 The elements of the list are path objects.
350 This does not walk into subdirectories (see path.walkfiles).
352 With the optional 'pattern' argument, this only lists files
353 whose names match the given pattern. For example,
357 return [p
for p
in self
.listdir(pattern
) if p
.isfile()]
359 def walk(self
, pattern
=None, errors
='strict'):
360 """ D.walk() -> iterator over files and subdirs, recursively.
362 The iterator yields path objects naming each child item of
363 this directory and its descendants. This requires that
366 This performs a depth-first traversal of the directory tree.
367 Each directory is returned just before all its children.
369 The errors= keyword argument controls behavior when an
370 error occurs. The default is 'strict', which causes an
371 exception. The other allowed values are 'warn', which
372 reports the error via warnings.warn(), and 'ignore'.
374 if errors
not in ('strict', 'warn', 'ignore'):
375 raise ValueError("invalid errors parameter")
378 childList
= self
.listdir()
380 if errors
== 'ignore':
382 elif errors
== 'warn':
384 "Unable to list directory '%s': %s"
385 % (self
, sys
.exc_info()[1]),
391 for child
in childList
:
392 if pattern
is None or child
.fnmatch(pattern
):
395 isdir
= child
.isdir()
397 if errors
== 'ignore':
399 elif errors
== 'warn':
401 "Unable to access '%s': %s"
402 % (child
, sys
.exc_info()[1]),
409 for item
in child
.walk(pattern
, errors
):
412 def walkdirs(self
, pattern
=None, errors
='strict'):
413 """ D.walkdirs() -> iterator over subdirs, recursively.
415 With the optional 'pattern' argument, this yields only
416 directories whose names match the given pattern. For
417 example, mydir.walkdirs('*test') yields only directories
418 with names ending in 'test'.
420 The errors= keyword argument controls behavior when an
421 error occurs. The default is 'strict', which causes an
422 exception. The other allowed values are 'warn', which
423 reports the error via warnings.warn(), and 'ignore'.
425 if errors
not in ('strict', 'warn', 'ignore'):
426 raise ValueError("invalid errors parameter")
431 if errors
== 'ignore':
433 elif errors
== 'warn':
435 "Unable to list directory '%s': %s"
436 % (self
, sys
.exc_info()[1]),
443 if pattern
is None or child
.fnmatch(pattern
):
445 for subsubdir
in child
.walkdirs(pattern
, errors
):
448 def walkfiles(self
, pattern
=None, errors
='strict'):
449 """ D.walkfiles() -> iterator over files in D, recursively.
451 The optional argument, pattern, limits the results to files
452 with names that match the pattern. For example,
453 mydir.walkfiles('*.tmp') yields only files with the .tmp
456 if errors
not in ('strict', 'warn', 'ignore'):
457 raise ValueError("invalid errors parameter")
460 childList
= self
.listdir()
462 if errors
== 'ignore':
464 elif errors
== 'warn':
466 "Unable to list directory '%s': %s"
467 % (self
, sys
.exc_info()[1]),
473 for child
in childList
:
475 isfile
= child
.isfile()
476 isdir
= not isfile
and child
.isdir()
478 if errors
== 'ignore':
480 elif errors
== 'warn':
482 "Unable to access '%s': %s"
483 % (self
, sys
.exc_info()[1]),
490 if pattern
is None or child
.fnmatch(pattern
):
493 for f
in child
.walkfiles(pattern
, errors
):
496 def fnmatch(self
, pattern
):
497 """ Return True if self.name matches the given pattern.
499 pattern - A filename pattern with wildcards,
502 return fnmatch
.fnmatch(self
.name
, pattern
)
504 def glob(self
, pattern
):
505 """ Return a list of path objects that match the pattern.
507 pattern - a path relative to this directory, with wildcards.
509 For example, path('/users').glob('*/bin/*') returns a list
510 of all the files users have in their bin directories.
513 return [cls(s
) for s
in glob
.glob(_base(self
/ pattern
))]
516 # --- Reading or writing an entire file at once.
518 def open(self
, mode
='r'):
519 """ Open this file. Return a file object. """
520 return file(self
, mode
)
523 """ Open this file, read all bytes, return them as a string. """
530 def write_bytes(self
, bytes
, append
=False):
531 """ Open this file and write the given bytes to it.
533 Default behavior is to overwrite any existing file.
534 Call p.write_bytes(bytes, append=True) to append instead.
546 def text(self
, encoding
=None, errors
='strict'):
547 r
""" Open this file, read it in, return the content as a string.
549 This uses 'U' mode in Python 2.3 and later, so '\r\n' and '\r'
550 are automatically translated to '\n'.
554 encoding - The Unicode encoding (or character set) of
555 the file. If present, the content of the file is
556 decoded and returned as a unicode object; otherwise
557 it is returned as an 8-bit str.
558 errors - How to handle Unicode errors; see help(str.decode)
559 for the options. Default is 'strict'.
563 f
= self
.open(_textmode
)
570 f
= codecs
.open(self
, 'r', encoding
, errors
)
571 # (Note - Can't use 'U' mode here, since codecs.open
572 # doesn't support 'U' mode, even in Python 2.3.)
577 return (t
.replace(u
'\r\n', u
'\n')
578 .replace(u
'\r\x85', u
'\n')
579 .replace(u
'\r', u
'\n')
580 .replace(u
'\x85', u
'\n')
581 .replace(u
'\u2028', u
'\n'))
583 def write_text(self
, text
, encoding
=None, errors
='strict', linesep
=os
.linesep
, append
=False):
584 r
""" Write the given text to this file.
586 The default behavior is to overwrite any existing file;
587 to append instead, use the 'append=True' keyword argument.
589 There are two differences between path.write_text() and
590 path.write_bytes(): newline handling and Unicode handling.
595 - text - str/unicode - The text to be written.
597 - encoding - str - The Unicode encoding that will be used.
598 This is ignored if 'text' isn't a Unicode string.
600 - errors - str - How to handle Unicode encoding errors.
601 Default is 'strict'. See help(unicode.encode) for the
602 options. This is ignored if 'text' isn't a Unicode
605 - linesep - keyword argument - str/unicode - The sequence of
606 characters to be used to mark end-of-line. The default is
607 os.linesep. You can also specify None; this means to
608 leave all newlines as they are in 'text'.
610 - append - keyword argument - bool - Specifies what to do if
611 the file already exists (True: append to the end of it;
612 False: overwrite it.) The default is False.
615 --- Newline handling.
617 write_text() converts all standard end-of-line sequences
618 ('\n', '\r', and '\r\n') to your platform's default end-of-line
619 sequence (see os.linesep; on Windows, for example, the
620 end-of-line marker is '\r\n').
622 If you don't like your platform's default, you can override it
623 using the 'linesep=' keyword argument. If you specifically want
624 write_text() to preserve the newlines as-is, use 'linesep=None'.
626 This applies to Unicode text the same as to 8-bit text, except
627 there are three additional standard Unicode end-of-line sequences:
628 u'\x85', u'\r\x85', and u'\u2028'.
630 (This is slightly different from when you open a file for
631 writing with fopen(filename, "w") in C or file(filename, 'w')
637 If 'text' isn't Unicode, then apart from newline handling, the
638 bytes are written verbatim to the file. The 'encoding' and
639 'errors' arguments are not used and must be omitted.
641 If 'text' is Unicode, it is first converted to bytes using the
642 specified 'encoding' (or the default encoding if 'encoding'
643 isn't specified). The 'errors' argument applies only to this
647 if isinstance(text
, unicode):
648 if linesep
is not None:
649 # Convert all standard end-of-line sequences to
650 # ordinary newline characters.
651 text
= (text
.replace(u
'\r\n', u
'\n')
652 .replace(u
'\r\x85', u
'\n')
653 .replace(u
'\r', u
'\n')
654 .replace(u
'\x85', u
'\n')
655 .replace(u
'\u2028', u
'\n'))
656 text
= text
.replace(u
'\n', linesep
)
658 encoding
= sys
.getdefaultencoding()
659 bytes
= text
.encode(encoding
, errors
)
661 # It is an error to specify an encoding if 'text' is
663 assert encoding
is None
665 if linesep
is not None:
666 text
= (text
.replace('\r\n', '\n')
667 .replace('\r', '\n'))
668 bytes
= text
.replace('\n', linesep
)
670 self
.write_bytes(bytes
, append
)
672 def lines(self
, encoding
=None, errors
='strict', retain
=True):
673 r
""" Open this file, read all lines, return them in a list.
676 encoding - The Unicode encoding (or character set) of
677 the file. The default is None, meaning the content
678 of the file is read as 8-bit characters and returned
679 as a list of (non-Unicode) str objects.
680 errors - How to handle Unicode errors; see help(str.decode)
681 for the options. Default is 'strict'
682 retain - If true, retain newline characters; but all newline
683 character combinations ('\r', '\n', '\r\n') are
684 translated to '\n'. If false, newline characters are
685 stripped off. Default is True.
687 This uses 'U' mode in Python 2.3 and later.
689 if encoding
is None and retain
:
690 f
= self
.open(_textmode
)
696 return self
.text(encoding
, errors
).splitlines(retain
)
698 def write_lines(self
, lines
, encoding
=None, errors
='strict',
699 linesep
=os
.linesep
, append
=False):
700 r
""" Write the given lines of text to this file.
702 By default this overwrites any existing file at this path.
704 This puts a platform-specific newline sequence on every line.
707 lines - A list of strings.
709 encoding - A Unicode encoding to use. This applies only if
710 'lines' contains any Unicode strings.
712 errors - How to handle errors in Unicode encoding. This
713 also applies only to Unicode strings.
715 linesep - The desired line-ending. This line-ending is
716 applied to every line. If a line already has any
717 standard line ending ('\r', '\n', '\r\n', u'\x85',
718 u'\r\x85', u'\u2028'), that will be stripped off and
719 this will be used instead. The default is os.linesep,
720 which is platform-dependent ('\r\n' on Windows, '\n' on
721 Unix, etc.) Specify None to write the lines as-is,
722 like file.writelines().
724 Use the keyword argument append=True to append lines to the
725 file. The default is to overwrite the file. Warning:
726 When you use this with Unicode data, if the encoding of the
727 existing data in the file is different from the encoding
728 you specify with the encoding= parameter, the result is
729 mixed-encoding data, which can really confuse someone trying
730 to read the file later.
739 isUnicode
= isinstance(line
, unicode)
740 if linesep
is not None:
741 # Strip off any existing line-end and add the
742 # specified linesep string.
744 if line
[-2:] in (u
'\r\n', u
'\x0d\x85'):
746 elif line
[-1:] in (u
'\r', u
'\n',
750 if line
[-2:] == '\r\n':
752 elif line
[-1:] in ('\r', '\n'):
757 encoding
= sys
.getdefaultencoding()
758 line
= line
.encode(encoding
, errors
)
764 """ Calculate the md5 hash for this file.
766 This reads through the entire file.
780 # --- Methods for querying the filesystem.
782 exists
= os
.path
.exists
783 isdir
= os
.path
.isdir
784 isfile
= os
.path
.isfile
785 islink
= os
.path
.islink
786 ismount
= os
.path
.ismount
788 if hasattr(os
.path
, 'samefile'):
789 samefile
= os
.path
.samefile
791 getatime
= os
.path
.getatime
793 getatime
, None, None,
794 """ Last access time of the file. """)
796 getmtime
= os
.path
.getmtime
798 getmtime
, None, None,
799 """ Last-modified time of the file. """)
801 if hasattr(os
.path
, 'getctime'):
802 getctime
= os
.path
.getctime
804 getctime
, None, None,
805 """ Creation time of the file. """)
807 getsize
= os
.path
.getsize
810 """ Size of the file, in bytes. """)
812 if hasattr(os
, 'access'):
813 def access(self
, mode
):
814 """ Return true if current user has access to this path.
816 mode - One of the constants os.F_OK, os.R_OK, os.W_OK, os.X_OK
818 return os
.access(self
, mode
)
821 """ Perform a stat() system call on this path. """
825 """ Like path.stat(), but do not follow symbolic links. """
826 return os
.lstat(self
)
829 r
""" Return the name of the owner of this file or directory.
831 This follows symbolic links.
833 On Windows, this returns a name of the form ur'DOMAIN\User Name'.
834 On Windows, a group can own a file or directory.
837 if win32security
is None:
838 raise Exception("path.owner requires win32all to be installed")
839 desc
= win32security
.GetFileSecurity(
840 self
, win32security
.OWNER_SECURITY_INFORMATION
)
841 sid
= desc
.GetSecurityDescriptorOwner()
842 account
, domain
, typecode
= win32security
.LookupAccountSid(None, sid
)
843 return domain
+ u
'\\' + account
846 raise NotImplementedError("path.owner is not implemented on this platform.")
848 return pwd
.getpwuid(st
.st_uid
).pw_name
851 get_owner
, None, None,
852 """ Name of the owner of this file or directory. """)
854 if hasattr(os
, 'statvfs'):
856 """ Perform a statvfs() system call on this path. """
857 return os
.statvfs(self
)
859 if hasattr(os
, 'pathconf'):
860 def pathconf(self
, name
):
861 return os
.pathconf(self
, name
)
864 # --- Modifying operations on files and directories
866 def utime(self
, times
):
867 """ Set the access and modified times of this file. """
868 os
.utime(self
, times
)
870 def chmod(self
, mode
):
873 if hasattr(os
, 'chown'):
874 def chown(self
, uid
, gid
):
875 os
.chown(self
, uid
, gid
)
877 def rename(self
, new
):
880 def renames(self
, new
):
881 os
.renames(self
, new
)
884 # --- Create/delete operations on directories
886 def mkdir(self
, mode
=0777):
889 def makedirs(self
, mode
=0777):
890 os
.makedirs(self
, mode
)
895 def removedirs(self
):
899 # --- Modifying operations on files
902 """ Set the access/modified times of this file to the current time.
903 Create the file if it does not exist.
905 fd
= os
.open(self
, os
.O_WRONLY | os
.O_CREAT
, 0666)
918 if hasattr(os
, 'link'):
919 def link(self
, newpath
):
920 """ Create a hard link at 'newpath', pointing to this file. """
921 os
.link(self
, newpath
)
923 if hasattr(os
, 'symlink'):
924 def symlink(self
, newlink
):
925 """ Create a symbolic link at 'newlink', pointing here. """
926 os
.symlink(self
, newlink
)
928 if hasattr(os
, 'readlink'):
930 """ Return the path to which this symbolic link points.
932 The result may be an absolute or a relative path.
934 return self
.__class
__(os
.readlink(self
))
936 def readlinkabs(self
):
937 """ Return the path to which this symbolic link points.
939 The result is always an absolute path.
945 return (self
.parent
/ p
).abspath()
948 # --- High-level functions from shutil
950 copyfile
= shutil
.copyfile
951 copymode
= shutil
.copymode
952 copystat
= shutil
.copystat
955 copytree
= shutil
.copytree
956 if hasattr(shutil
, 'move'):
958 rmtree
= shutil
.rmtree
961 # --- Special stuff from os
963 if hasattr(os
, 'chroot'):
967 if hasattr(os
, 'startfile'):