Patrick Welche <prlw1@cam.ac.uk>
[netbsd-mini2440.git] / external / bsd / libarchive / dist / tar / tree.c
blob23e64e3da0644e72891e97a1374b31a03aea00ca
1 /*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 /*-
27 * This is a new directory-walking system that addresses a number
28 * of problems I've had with fts(3). In particular, it has no
29 * pathname-length limits (other than the size of 'int'), handles
30 * deep logical traversals, uses considerably less memory, and has
31 * an opaque interface (easier to modify in the future).
33 * Internally, it keeps a single list of "tree_entry" items that
34 * represent filesystem objects that require further attention.
35 * Non-directories are not kept in memory: they are pulled from
36 * readdir(), returned to the client, then freed as soon as possible.
37 * Any directory entry to be traversed gets pushed onto the stack.
39 * There is surprisingly little information that needs to be kept for
40 * each item on the stack. Just the name, depth (represented here as the
41 * string length of the parent directory's pathname), and some markers
42 * indicating how to get back to the parent (via chdir("..") for a
43 * regular dir or via fchdir(2) for a symlink).
45 #include "bsdtar_platform.h"
46 __FBSDID("$FreeBSD: src/usr.bin/tar/tree.c,v 1.8 2007/03/11 10:36:42 kientzle Exp $");
48 #ifdef HAVE_SYS_STAT_H
49 #include <sys/stat.h>
50 #endif
51 #ifdef HAVE_DIRENT_H
52 #include <dirent.h>
53 #endif
54 #ifdef HAVE_ERRNO_H
55 #include <errno.h>
56 #endif
57 #ifdef HAVE_FCNTL_H
58 #include <fcntl.h>
59 #endif
60 #ifdef HAVE_STDLIB_H
61 #include <stdlib.h>
62 #endif
63 #ifdef HAVE_STRING_H
64 #include <string.h>
65 #endif
66 #ifdef HAVE_UNISTD_H
67 #include <unistd.h>
68 #endif
70 #include "tree.h"
73 * TODO:
74 * 1) Loop checking.
75 * 3) Arbitrary logical traversals by closing/reopening intermediate fds.
78 struct tree_entry {
79 struct tree_entry *next;
80 struct tree_entry *parent;
81 char *name;
82 size_t dirname_length;
83 dev_t dev;
84 ino_t ino;
85 int fd;
86 int flags;
89 /* Definitions for tree_entry.flags bitmap. */
90 #define isDir 1 /* This entry is a regular directory. */
91 #define isDirLink 2 /* This entry is a symbolic link to a directory. */
92 #define needsPreVisit 4 /* This entry needs to be previsited. */
93 #define needsPostVisit 8 /* This entry needs to be postvisited. */
96 * Local data for this package.
98 struct tree {
99 struct tree_entry *stack;
100 struct tree_entry *current;
101 DIR *d;
102 int initialDirFd;
103 int flags;
104 int visit_type;
105 int tree_errno; /* Error code from last failed operation. */
107 char *buff;
108 const char *basename;
109 size_t buff_length;
110 size_t path_length;
111 size_t dirname_length;
113 int depth;
114 int openCount;
115 int maxOpenCount;
117 struct stat lst;
118 struct stat st;
121 /* Definitions for tree.flags bitmap. */
122 #define needsReturn 8 /* Marks first entry as not having been returned yet. */
123 #define hasStat 16 /* The st entry is set. */
124 #define hasLstat 32 /* The lst entry is set. */
127 #ifdef HAVE_DIRENT_D_NAMLEN
128 /* BSD extension; avoids need for a strlen() call. */
129 #define D_NAMELEN(dp) (dp)->d_namlen
130 #else
131 #define D_NAMELEN(dp) (strlen((dp)->d_name))
132 #endif
134 #if 0
135 #include <stdio.h>
136 void
137 tree_dump(struct tree *t, FILE *out)
139 struct tree_entry *te;
141 fprintf(out, "\tdepth: %d\n", t->depth);
142 fprintf(out, "\tbuff: %s\n", t->buff);
143 fprintf(out, "\tpwd: "); fflush(stdout); system("pwd");
144 fprintf(out, "\taccess: %s\n", t->basename);
145 fprintf(out, "\tstack:\n");
146 for (te = t->stack; te != NULL; te = te->next) {
147 fprintf(out, "\t\tte->name: %s%s%s\n", te->name,
148 te->flags & needsPreVisit ? "" : " *",
149 t->current == te ? " (current)" : "");
152 #endif
155 * Add a directory path to the current stack.
157 static void
158 tree_push(struct tree *t, const char *path)
160 struct tree_entry *te;
162 te = malloc(sizeof(*te));
163 memset(te, 0, sizeof(*te));
164 te->next = t->stack;
165 t->stack = te;
166 te->fd = -1;
167 te->name = strdup(path);
168 te->flags = needsPreVisit | needsPostVisit;
169 te->dirname_length = t->dirname_length;
173 * Append a name to the current path.
175 static void
176 tree_append(struct tree *t, const char *name, size_t name_length)
178 char *p;
180 if (t->buff != NULL)
181 t->buff[t->dirname_length] = '\0';
182 /* Strip trailing '/' from name, unless entire name is "/". */
183 while (name_length > 1 && name[name_length - 1] == '/')
184 name_length--;
186 /* Resize pathname buffer as needed. */
187 while (name_length + 1 + t->dirname_length >= t->buff_length) {
188 t->buff_length *= 2;
189 if (t->buff_length < 1024)
190 t->buff_length = 1024;
191 t->buff = realloc(t->buff, t->buff_length);
193 p = t->buff + t->dirname_length;
194 t->path_length = t->dirname_length + name_length;
195 /* Add a separating '/' if it's needed. */
196 if (t->dirname_length > 0 && p[-1] != '/') {
197 *p++ = '/';
198 t->path_length ++;
200 strncpy(p, name, name_length);
201 p[name_length] = '\0';
202 t->basename = p;
206 * Open a directory tree for traversal.
208 struct tree *
209 tree_open(const char *path)
211 struct tree *t;
213 t = malloc(sizeof(*t));
214 memset(t, 0, sizeof(*t));
215 tree_append(t, path, strlen(path));
216 t->initialDirFd = open(".", O_RDONLY);
218 * During most of the traversal, items are set up and then
219 * returned immediately from tree_next(). That doesn't work
220 * for the very first entry, so we set a flag for this special
221 * case.
223 t->flags = needsReturn;
224 return (t);
228 * We've finished a directory; ascend back to the parent.
230 static void
231 tree_ascend(struct tree *t)
233 struct tree_entry *te;
235 te = t->stack;
236 t->depth--;
237 if (te->flags & isDirLink) {
238 fchdir(te->fd);
239 close(te->fd);
240 t->openCount--;
241 } else {
242 chdir("..");
247 * Pop the working stack.
249 static void
250 tree_pop(struct tree *t)
252 struct tree_entry *te;
254 t->buff[t->dirname_length] = '\0';
255 if (t->stack == t->current && t->current != NULL)
256 t->current = t->current->parent;
257 te = t->stack;
258 t->stack = te->next;
259 t->dirname_length = te->dirname_length;
260 t->basename = t->buff + t->dirname_length;
261 /* Special case: starting dir doesn't skip leading '/'. */
262 if (t->dirname_length > 0)
263 t->basename++;
264 free(te->name);
265 free(te);
269 * Get the next item in the tree traversal.
272 tree_next(struct tree *t)
274 struct dirent *de = NULL;
276 /* Handle the startup case by returning the initial entry. */
277 if (t->flags & needsReturn) {
278 t->flags &= ~needsReturn;
279 return (t->visit_type = TREE_REGULAR);
282 while (t->stack != NULL) {
283 /* If there's an open dir, get the next entry from there. */
284 while (t->d != NULL) {
285 de = readdir(t->d);
286 if (de == NULL) {
287 closedir(t->d);
288 t->d = NULL;
289 } else if (de->d_name[0] == '.'
290 && de->d_name[1] == '\0') {
291 /* Skip '.' */
292 } else if (de->d_name[0] == '.'
293 && de->d_name[1] == '.'
294 && de->d_name[2] == '\0') {
295 /* Skip '..' */
296 } else {
298 * Append the path to the current path
299 * and return it.
301 tree_append(t, de->d_name, D_NAMELEN(de));
302 t->flags &= ~hasLstat;
303 t->flags &= ~hasStat;
304 return (t->visit_type = TREE_REGULAR);
308 /* If the current dir needs to be visited, set it up. */
309 if (t->stack->flags & needsPreVisit) {
310 t->current = t->stack;
311 tree_append(t, t->stack->name, strlen(t->stack->name));
312 t->stack->flags &= ~needsPreVisit;
313 /* If it is a link, set up fd for the ascent. */
314 if (t->stack->flags & isDirLink) {
315 t->stack->fd = open(".", O_RDONLY);
316 t->openCount++;
317 if (t->openCount > t->maxOpenCount)
318 t->maxOpenCount = t->openCount;
320 t->dirname_length = t->path_length;
321 if (chdir(t->stack->name) != 0) {
322 /* chdir() failed; return error */
323 tree_pop(t);
324 t->tree_errno = errno;
325 return (t->visit_type = TREE_ERROR_DIR);
327 t->depth++;
328 t->d = opendir(".");
329 if (t->d == NULL) {
330 tree_ascend(t); /* Undo "chdir" */
331 tree_pop(t);
332 t->tree_errno = errno;
333 return (t->visit_type = TREE_ERROR_DIR);
335 t->flags &= ~hasLstat;
336 t->flags &= ~hasStat;
337 t->basename = ".";
338 return (t->visit_type = TREE_POSTDESCENT);
341 /* We've done everything necessary for the top stack entry. */
342 if (t->stack->flags & needsPostVisit) {
343 tree_ascend(t);
344 tree_pop(t);
345 t->flags &= ~hasLstat;
346 t->flags &= ~hasStat;
347 return (t->visit_type = TREE_POSTASCENT);
350 return (t->visit_type = 0);
354 * Return error code.
357 tree_errno(struct tree *t)
359 return (t->tree_errno);
363 * Called by the client to mark the directory just returned from
364 * tree_next() as needing to be visited.
366 void
367 tree_descend(struct tree *t)
369 if (t->visit_type != TREE_REGULAR)
370 return;
372 if (tree_current_is_physical_dir(t)) {
373 tree_push(t, t->basename);
374 t->stack->flags |= isDir;
375 } else if (tree_current_is_dir(t)) {
376 tree_push(t, t->basename);
377 t->stack->flags |= isDirLink;
382 * Get the stat() data for the entry just returned from tree_next().
384 const struct stat *
385 tree_current_stat(struct tree *t)
387 if (!(t->flags & hasStat)) {
388 if (stat(t->basename, &t->st) != 0)
389 return NULL;
390 t->flags |= hasStat;
392 return (&t->st);
396 * Get the lstat() data for the entry just returned from tree_next().
398 const struct stat *
399 tree_current_lstat(struct tree *t)
401 if (!(t->flags & hasLstat)) {
402 if (lstat(t->basename, &t->lst) != 0)
403 return NULL;
404 t->flags |= hasLstat;
406 return (&t->lst);
410 * Test whether current entry is a dir or link to a dir.
413 tree_current_is_dir(struct tree *t)
415 const struct stat *st;
418 * If we already have lstat() info, then try some
419 * cheap tests to determine if this is a dir.
421 if (t->flags & hasLstat) {
422 /* If lstat() says it's a dir, it must be a dir. */
423 if (S_ISDIR(tree_current_lstat(t)->st_mode))
424 return 1;
425 /* Not a dir; might be a link to a dir. */
426 /* If it's not a link, then it's not a link to a dir. */
427 if (!S_ISLNK(tree_current_lstat(t)->st_mode))
428 return 0;
430 * It's a link, but we don't know what it's a link to,
431 * so we'll have to use stat().
435 st = tree_current_stat(t);
436 /* If we can't stat it, it's not a dir. */
437 if (st == NULL)
438 return 0;
439 /* Use the definitive test. Hopefully this is cached. */
440 return (S_ISDIR(st->st_mode));
444 * Test whether current entry is a physical directory. Usually, we
445 * already have at least one of stat() or lstat() in memory, so we
446 * use tricks to try to avoid an extra trip to the disk.
449 tree_current_is_physical_dir(struct tree *t)
451 const struct stat *st;
454 * If stat() says it isn't a dir, then it's not a dir.
455 * If stat() data is cached, this check is free, so do it first.
457 if ((t->flags & hasStat)
458 && (!S_ISDIR(tree_current_stat(t)->st_mode)))
459 return 0;
462 * Either stat() said it was a dir (in which case, we have
463 * to determine whether it's really a link to a dir) or
464 * stat() info wasn't available. So we use lstat(), which
465 * hopefully is already cached.
468 st = tree_current_lstat(t);
469 /* If we can't stat it, it's not a dir. */
470 if (st == NULL)
471 return 0;
472 /* Use the definitive test. Hopefully this is cached. */
473 return (S_ISDIR(st->st_mode));
477 * Test whether current entry is a symbolic link.
480 tree_current_is_physical_link(struct tree *t)
482 const struct stat *st = tree_current_lstat(t);
483 if (st == NULL)
484 return 0;
485 return (S_ISLNK(st->st_mode));
489 * Return the access path for the entry just returned from tree_next().
491 const char *
492 tree_current_access_path(struct tree *t)
494 return (t->basename);
498 * Return the full path for the entry just returned from tree_next().
500 const char *
501 tree_current_path(struct tree *t)
503 return (t->buff);
507 * Return the length of the path for the entry just returned from tree_next().
509 size_t
510 tree_current_pathlen(struct tree *t)
512 return (t->path_length);
516 * Return the nesting depth of the entry just returned from tree_next().
519 tree_current_depth(struct tree *t)
521 return (t->depth);
525 * Terminate the traversal and release any resources.
527 void
528 tree_close(struct tree *t)
530 /* Release anything remaining in the stack. */
531 while (t->stack != NULL)
532 tree_pop(t);
533 if (t->buff)
534 free(t->buff);
535 /* chdir() back to where we started. */
536 if (t->initialDirFd >= 0) {
537 fchdir(t->initialDirFd);
538 close(t->initialDirFd);
539 t->initialDirFd = -1;
541 free(t);