2 * Copyright (c) 2003-2007 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * This is a new directory-walking system that addresses a number
29 * of problems I've had with fts(3). In particular, it has no
30 * pathname-length limits (other than the size of 'int'), handles
31 * deep logical traversals, uses considerably less memory, and has
32 * an opaque interface (easier to modify in the future).
34 * Internally, it keeps a single list of "tree_entry" items that
35 * represent filesystem objects that require further attention.
36 * Non-directories are not kept in memory: they are pulled from
37 * readdir(), returned to the client, then freed as soon as possible.
38 * Any directory entry to be traversed gets pushed onto the stack.
40 * There is surprisingly little information that needs to be kept for
41 * each item on the stack. Just the name, depth (represented here as the
42 * string length of the parent directory's pathname), and some markers
43 * indicating how to get back to the parent (via chdir("..") for a
44 * regular dir or via fchdir(2) for a symlink).
46 #include "bsdtar_platform.h"
47 __FBSDID("$FreeBSD: src/usr.bin/tar/tree.c,v 1.9 2008/11/27 05:49:52 kientzle Exp $");
49 #ifdef HAVE_SYS_STAT_H
73 #if defined(HAVE_WINDOWS_H) && !defined(__CYGWIN__)
82 * 3) Arbitrary logical traversals by closing/reopening intermediate fds.
87 struct tree_entry
*next
;
88 struct tree_entry
*parent
;
90 size_t dirname_length
;
94 /* How to return back to the parent of a symlink. */
96 int symlink_parent_fd
;
97 #elif defined(_WIN32) && !defined(__CYGWIN__)
98 char *symlink_parent_path
;
100 #error fchdir function required.
104 /* Definitions for tree_entry.flags bitmap. */
105 #define isDir 1 /* This entry is a regular directory. */
106 #define isDirLink 2 /* This entry is a symbolic link to a directory. */
107 #define needsFirstVisit 4 /* This is an initial entry. */
108 #define needsDescent 8 /* This entry needs to be previsited. */
109 #define needsOpen 16 /* This is a directory that needs to be opened. */
110 #define needsAscent 32 /* This entry needs to be postvisited. */
113 * On Windows, "first visit" is handled as a pattern to be handed to
114 * _findfirst(). This is consistent with Windows conventions that
115 * file patterns are handled within the application. On Posix,
116 * "first visit" is just returned to the client.
120 * Local data for this package.
123 struct tree_entry
*stack
;
124 struct tree_entry
*current
;
125 #if defined(HAVE_WINDOWS_H) && !defined(__CYGWIN__)
127 BY_HANDLE_FILE_INFORMATION fileInfo
;
128 #define INVALID_DIR_HANDLE INVALID_HANDLE_VALUE
129 WIN32_FIND_DATA _findData
;
130 WIN32_FIND_DATA
*findData
;
133 #define INVALID_DIR_HANDLE NULL
138 int tree_errno
; /* Error code from last failed operation. */
140 /* Dynamically-sized buffer for holding path */
144 const char *basename
; /* Last path element */
145 size_t dirname_length
; /* Leading dir length */
146 size_t path_length
; /* Total path length */
156 /* Definitions for tree.flags bitmap. */
157 #define hasStat 16 /* The st entry is valid. */
158 #define hasLstat 32 /* The lst entry is valid. */
159 #define hasFileInfo 64 /* The Windows fileInfo entry is valid. */
161 #if defined(_WIN32) && !defined(__CYGWIN__)
163 tree_dir_next_windows(struct tree
*t
, const char *pattern
);
166 tree_dir_next_posix(struct tree
*t
);
169 #ifdef HAVE_DIRENT_D_NAMLEN
170 /* BSD extension; avoids need for a strlen() call. */
171 #define D_NAMELEN(dp) (dp)->d_namlen
173 #define D_NAMELEN(dp) (strlen((dp)->d_name))
178 tree_dump(struct tree
*t
, FILE *out
)
181 struct tree_entry
*te
;
183 fprintf(out
, "\tdepth: %d\n", t
->depth
);
184 fprintf(out
, "\tbuff: %s\n", t
->buff
);
185 fprintf(out
, "\tpwd: %s\n", getcwd(buff
, sizeof(buff
)));
186 fprintf(out
, "\tbasename: %s\n", t
->basename
);
187 fprintf(out
, "\tstack:\n");
188 for (te
= t
->stack
; te
!= NULL
; te
= te
->next
) {
189 fprintf(out
, "\t\t%s%d:\"%s\" %s%s%s%s%s%s\n",
190 t
->current
== te
? "*" : " ",
193 te
->flags
& needsFirstVisit
? "V" : "",
194 te
->flags
& needsDescent
? "D" : "",
195 te
->flags
& needsOpen
? "O" : "",
196 te
->flags
& needsAscent
? "A" : "",
197 te
->flags
& isDirLink
? "L" : "",
198 (t
->current
== te
&& t
->d
) ? "+" : ""
204 * Add a directory path to the current stack.
207 tree_push(struct tree
*t
, const char *path
)
209 struct tree_entry
*te
;
211 te
= malloc(sizeof(*te
));
212 memset(te
, 0, sizeof(*te
));
214 te
->parent
= t
->current
;
216 te
->depth
= te
->parent
->depth
+ 1;
219 te
->symlink_parent_fd
= -1;
220 te
->name
= strdup(path
);
221 #elif defined(_WIN32) && !defined(__CYGWIN__)
222 te
->symlink_parent_path
= NULL
;
223 te
->name
= strdup(path
);
225 te
->flags
= needsDescent
| needsOpen
| needsAscent
;
226 te
->dirname_length
= t
->dirname_length
;
230 * Append a name to the current dir path.
233 tree_append(struct tree
*t
, const char *name
, size_t name_length
)
239 t
->buff
[t
->dirname_length
] = '\0';
240 /* Strip trailing '/' from name, unless entire name is "/". */
241 while (name_length
> 1 && name
[name_length
- 1] == '/')
244 /* Resize pathname buffer as needed. */
245 size_needed
= name_length
+ 1 + t
->dirname_length
;
246 if (t
->buff_length
< size_needed
) {
247 if (t
->buff_length
< 1024)
248 t
->buff_length
= 1024;
249 while (t
->buff_length
< size_needed
)
251 t
->buff
= realloc(t
->buff
, t
->buff_length
);
255 p
= t
->buff
+ t
->dirname_length
;
256 t
->path_length
= t
->dirname_length
+ name_length
;
257 /* Add a separating '/' if it's needed. */
258 if (t
->dirname_length
> 0 && p
[-1] != '/') {
263 strncpy_s(p
, t
->buff_length
- (p
- t
->buff
), name
, name_length
);
265 strncpy(p
, name
, name_length
);
267 p
[name_length
] = '\0';
272 * Open a directory tree for traversal.
275 tree_open(const char *path
)
280 t
= malloc(sizeof(*t
));
281 memset(t
, 0, sizeof(*t
));
282 /* First item is set up a lot like a symlink traversal. */
284 t
->stack
->flags
= needsFirstVisit
| isDirLink
| needsAscent
;
285 t
->stack
->symlink_parent_fd
= open(".", O_RDONLY
);
287 t
->d
= INVALID_DIR_HANDLE
;
289 #elif defined(_WIN32) && !defined(__CYGWIN__)
291 char *cwd
= _getcwd(NULL
, 0);
292 char *pathname
= strdup(path
), *p
, *base
;
294 if (pathname
== NULL
)
296 for (p
= pathname
; *p
!= '\0'; ++p
) {
302 t
= malloc(sizeof(*t
));
303 memset(t
, 0, sizeof(*t
));
304 /* First item is set up a lot like a symlink traversal. */
305 /* printf("Looking for wildcard in %s\n", path); */
306 /* TODO: wildcard detection here screws up on \\?\c:\ UNC names */
307 if (strchr(base
, '*') || strchr(base
, '?')) {
308 // It has a wildcard in it...
309 // Separate the last element.
310 p
= strrchr(base
, '/');
314 tree_append(t
, base
, p
- base
);
315 t
->dirname_length
= t
->path_length
;
321 t
->stack
->flags
= needsFirstVisit
| isDirLink
| needsAscent
;
322 t
->stack
->symlink_parent_path
= cwd
;
323 t
->d
= INVALID_DIR_HANDLE
;
329 * We've finished a directory; ascend back to the parent.
332 tree_ascend(struct tree
*t
)
334 struct tree_entry
*te
;
339 if (te
->flags
& isDirLink
) {
341 if (fchdir(te
->symlink_parent_fd
) != 0) {
342 t
->tree_errno
= errno
;
343 r
= TREE_ERROR_FATAL
;
345 close(te
->symlink_parent_fd
);
346 #elif defined(_WIN32) && !defined(__CYGWIN__)
347 if (SetCurrentDirectory(te
->symlink_parent_path
) == 0) {
348 t
->tree_errno
= errno
;
349 r
= TREE_ERROR_FATAL
;
351 free(te
->symlink_parent_path
);
352 te
->symlink_parent_path
= NULL
;
356 #if defined(_WIN32) && !defined(__CYGWIN__)
357 if (SetCurrentDirectory("..") == 0) {
359 if (chdir("..") != 0) {
361 t
->tree_errno
= errno
;
362 r
= TREE_ERROR_FATAL
;
369 * Pop the working stack.
372 tree_pop(struct tree
*t
)
374 struct tree_entry
*te
;
377 t
->buff
[t
->dirname_length
] = '\0';
378 if (t
->stack
== t
->current
&& t
->current
!= NULL
)
379 t
->current
= t
->current
->parent
;
382 t
->dirname_length
= te
->dirname_length
;
384 t
->basename
= t
->buff
+ t
->dirname_length
;
385 while (t
->basename
[0] == '/')
393 * Get the next item in the tree traversal.
396 tree_next(struct tree
*t
)
400 /* If we're called again after a fatal error, that's an API
401 * violation. Just crash now. */
402 if (t
->visit_type
== TREE_ERROR_FATAL
) {
403 fprintf(stderr
, "Unable to continue traversing"
404 " directory heirarchy after a fatal error.");
408 while (t
->stack
!= NULL
) {
409 /* If there's an open dir, get the next entry from there. */
410 if (t
->d
!= INVALID_DIR_HANDLE
) {
411 #if defined(_WIN32) && !defined(__CYGWIN__)
412 r
= tree_dir_next_windows(t
, NULL
);
414 r
= tree_dir_next_posix(t
);
421 if (t
->stack
->flags
& needsFirstVisit
) {
422 #if defined(_WIN32) && !defined(__CYGWIN__)
423 char *d
= t
->stack
->name
;
424 t
->stack
->flags
&= ~needsFirstVisit
;
425 if (strchr(d
, '*') || strchr(d
, '?')) {
426 r
= tree_dir_next_windows(t
, d
);
431 // Not a pattern, handle it as-is...
433 /* Top stack item needs a regular visit. */
434 t
->current
= t
->stack
;
435 tree_append(t
, t
->stack
->name
, strlen(t
->stack
->name
));
436 //t->dirname_length = t->path_length;
438 t
->stack
->flags
&= ~needsFirstVisit
;
439 return (t
->visit_type
= TREE_REGULAR
);
440 } else if (t
->stack
->flags
& needsDescent
) {
441 /* Top stack item is dir to descend into. */
442 t
->current
= t
->stack
;
443 tree_append(t
, t
->stack
->name
, strlen(t
->stack
->name
));
444 t
->stack
->flags
&= ~needsDescent
;
445 /* If it is a link, set up fd for the ascent. */
446 if (t
->stack
->flags
& isDirLink
) {
448 t
->stack
->symlink_parent_fd
= open(".", O_RDONLY
);
450 if (t
->openCount
> t
->maxOpenCount
)
451 t
->maxOpenCount
= t
->openCount
;
452 #elif defined(_WIN32) && !defined(__CYGWIN__)
453 t
->stack
->symlink_parent_path
= _getcwd(NULL
, 0);
456 t
->dirname_length
= t
->path_length
;
457 #if defined(_WIN32) && !defined(__CYGWIN__)
458 if (t
->path_length
== 259 || !SetCurrentDirectory(t
->stack
->name
) != 0)
460 if (chdir(t
->stack
->name
) != 0)
463 /* chdir() failed; return error */
465 t
->tree_errno
= errno
;
466 return (t
->visit_type
= TREE_ERROR_DIR
);
469 return (t
->visit_type
= TREE_POSTDESCENT
);
470 } else if (t
->stack
->flags
& needsOpen
) {
471 t
->stack
->flags
&= ~needsOpen
;
472 #if defined(_WIN32) && !defined(__CYGWIN__)
473 r
= tree_dir_next_windows(t
, "*");
475 r
= tree_dir_next_posix(t
);
480 } else if (t
->stack
->flags
& needsAscent
) {
481 /* Top stack item is dir and we're done with it. */
484 t
->visit_type
= r
!= 0 ? r
: TREE_POSTASCENT
;
485 return (t
->visit_type
);
487 /* Top item on stack is dead. */
489 t
->flags
&= ~hasLstat
;
490 t
->flags
&= ~hasStat
;
493 return (t
->visit_type
= 0);
496 #if defined(_WIN32) && !defined(__CYGWIN__)
498 tree_dir_next_windows(struct tree
*t
, const char *pattern
)
505 if (pattern
!= NULL
) {
506 t
->d
= FindFirstFile(pattern
, &t
->_findData
);
507 if (t
->d
== INVALID_DIR_HANDLE
) {
508 r
= tree_ascend(t
); /* Undo "chdir" */
510 t
->tree_errno
= errno
;
511 t
->visit_type
= r
!= 0 ? r
: TREE_ERROR_DIR
;
512 return (t
->visit_type
);
514 t
->findData
= &t
->_findData
;
516 } else if (!FindNextFile(t
->d
, &t
->_findData
)) {
518 t
->d
= INVALID_DIR_HANDLE
;
522 name
= t
->findData
->cFileName
;
523 namelen
= strlen(name
);
524 t
->flags
&= ~hasLstat
;
525 t
->flags
&= ~hasStat
;
526 if (name
[0] == '.' && name
[1] == '\0')
528 if (name
[0] == '.' && name
[1] == '.' && name
[2] == '\0')
530 tree_append(t
, name
, namelen
);
531 return (t
->visit_type
= TREE_REGULAR
);
536 tree_dir_next_posix(struct tree
*t
)
543 if ((t
->d
= opendir(".")) == NULL
) {
544 r
= tree_ascend(t
); /* Undo "chdir" */
546 t
->tree_errno
= errno
;
547 t
->visit_type
= r
!= 0 ? r
: TREE_ERROR_DIR
;
548 return (t
->visit_type
);
552 t
->de
= readdir(t
->d
);
555 t
->d
= INVALID_DIR_HANDLE
;
558 name
= t
->de
->d_name
;
559 namelen
= D_NAMELEN(t
->de
);
560 t
->flags
&= ~hasLstat
;
561 t
->flags
&= ~hasStat
;
562 if (name
[0] == '.' && name
[1] == '\0')
564 if (name
[0] == '.' && name
[1] == '.' && name
[2] == '\0')
566 tree_append(t
, name
, namelen
);
567 return (t
->visit_type
= TREE_REGULAR
);
576 tree_errno(struct tree
*t
)
578 return (t
->tree_errno
);
582 * Called by the client to mark the directory just returned from
583 * tree_next() as needing to be visited.
586 tree_descend(struct tree
*t
)
588 if (t
->visit_type
!= TREE_REGULAR
)
591 if (tree_current_is_physical_dir(t
)) {
592 tree_push(t
, t
->basename
);
593 t
->stack
->flags
|= isDir
;
594 } else if (tree_current_is_dir(t
)) {
595 tree_push(t
, t
->basename
);
596 t
->stack
->flags
|= isDirLink
;
601 * Get the stat() data for the entry just returned from tree_next().
604 tree_current_stat(struct tree
*t
)
606 if (!(t
->flags
& hasStat
)) {
607 if (stat(tree_current_access_path(t
), &t
->st
) != 0)
614 #if defined(HAVE_WINDOWS_H) && !defined(__CYGWIN__)
615 const BY_HANDLE_FILE_INFORMATION
*
616 tree_current_file_information(struct tree
*t
)
618 if (!(t
->flags
& hasFileInfo
)) {
619 HANDLE h
= CreateFile(tree_current_access_path(t
),
622 FILE_FLAG_BACKUP_SEMANTICS
| FILE_FLAG_OPEN_REPARSE_POINT
,
624 if (h
== INVALID_HANDLE_VALUE
)
626 if (!GetFileInformationByHandle(h
, &t
->fileInfo
)) {
631 t
->flags
|= hasFileInfo
;
633 return (&t
->fileInfo
);
637 * Get the lstat() data for the entry just returned from tree_next().
640 tree_current_lstat(struct tree
*t
)
642 #if defined(_WIN32) && !defined(__CYGWIN__)
643 return (tree_current_stat(t
));
645 if (!(t
->flags
& hasLstat
)) {
646 if (lstat(tree_current_access_path(t
), &t
->lst
) != 0)
648 t
->flags
|= hasLstat
;
655 * Test whether current entry is a dir or link to a dir.
658 tree_current_is_dir(struct tree
*t
)
660 #if defined(_WIN32) && !defined(__CYGWIN__)
662 return (t
->findData
->dwFileAttributes
& FILE_ATTRIBUTE_DIRECTORY
);
663 if (tree_current_file_information(t
))
664 return (t
->fileInfo
.dwFileAttributes
& FILE_ATTRIBUTE_DIRECTORY
);
667 const struct stat
*st
;
669 * If we already have lstat() info, then try some
670 * cheap tests to determine if this is a dir.
672 if (t
->flags
& hasLstat
) {
673 /* If lstat() says it's a dir, it must be a dir. */
674 if (S_ISDIR(tree_current_lstat(t
)->st_mode
))
676 /* Not a dir; might be a link to a dir. */
677 /* If it's not a link, then it's not a link to a dir. */
678 if (!S_ISLNK(tree_current_lstat(t
)->st_mode
))
681 * It's a link, but we don't know what it's a link to,
682 * so we'll have to use stat().
686 st
= tree_current_stat(t
);
687 /* If we can't stat it, it's not a dir. */
690 /* Use the definitive test. Hopefully this is cached. */
691 return (S_ISDIR(st
->st_mode
));
696 * Test whether current entry is a physical directory. Usually, we
697 * already have at least one of stat() or lstat() in memory, so we
698 * use tricks to try to avoid an extra trip to the disk.
701 tree_current_is_physical_dir(struct tree
*t
)
703 #if defined(_WIN32) && !defined(__CYGWIN__)
704 if (tree_current_is_physical_link(t
))
706 return (tree_current_is_dir(t
));
708 const struct stat
*st
;
711 * If stat() says it isn't a dir, then it's not a dir.
712 * If stat() data is cached, this check is free, so do it first.
714 if ((t
->flags
& hasStat
)
715 && (!S_ISDIR(tree_current_stat(t
)->st_mode
)))
719 * Either stat() said it was a dir (in which case, we have
720 * to determine whether it's really a link to a dir) or
721 * stat() info wasn't available. So we use lstat(), which
722 * hopefully is already cached.
725 st
= tree_current_lstat(t
);
726 /* If we can't stat it, it's not a dir. */
729 /* Use the definitive test. Hopefully this is cached. */
730 return (S_ISDIR(st
->st_mode
));
735 * Test whether current entry is a symbolic link.
738 tree_current_is_physical_link(struct tree
*t
)
740 #if defined(_WIN32) && !defined(__CYGWIN__)
741 #ifndef IO_REPARSE_TAG_SYMLINK
742 /* Old SDKs do not provide IO_REPARSE_TAG_SYMLINK */
743 #define IO_REPARSE_TAG_SYMLINK 0xA000000CL
746 return ((t
->findData
->dwFileAttributes
& FILE_ATTRIBUTE_REPARSE_POINT
)
747 && (t
->findData
->dwReserved0
== IO_REPARSE_TAG_SYMLINK
));
750 const struct stat
*st
= tree_current_lstat(t
);
753 return (S_ISLNK(st
->st_mode
));
758 * Return the access path for the entry just returned from tree_next().
761 tree_current_access_path(struct tree
*t
)
763 return (t
->basename
);
767 * Return the full path for the entry just returned from tree_next().
770 tree_current_path(struct tree
*t
)
776 * Return the length of the path for the entry just returned from tree_next().
779 tree_current_pathlen(struct tree
*t
)
781 return (t
->path_length
);
785 * Return the nesting depth of the entry just returned from tree_next().
788 tree_current_depth(struct tree
*t
)
794 * Terminate the traversal and release any resources.
797 tree_close(struct tree
*t
)
799 /* Release anything remaining in the stack. */
800 while (t
->stack
!= NULL
)
803 /* TODO: Ensure that premature close() resets cwd */
806 if (t
->initialDirFd
>= 0) {
807 int s
= fchdir(t
->initialDirFd
);
808 (void)s
; /* UNUSED */
809 close(t
->initialDirFd
);
810 t
->initialDirFd
= -1;
812 #elif defined(_WIN32) && !defined(__CYGWIN__)
813 if (t
->initialDir
!= NULL
) {
814 SetCurrentDir(t
->initialDir
);
816 t
->initialDir
= NULL
;