*** empty log message ***
[coreutils.git] / src / du.c
blobab47a620f94badb6e4688b3731e7137ebbef5999
1 /* du -- summarize disk usage
2 Copyright (C) 88, 89, 90, 91, 1995-2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Differences from the Unix du:
19 * Doesn't simply ignore the names of regular files given as arguments
20 when -a is given.
21 * Additional options:
22 -l Count the size of all files, even if they have appeared
23 already in another hard link.
24 -x Do not cross file-system boundaries during the recursion.
25 -c Write a grand total of all of the arguments after all
26 arguments have been processed. This can be used to find
27 out the disk usage of a directory, with some files excluded.
28 -h Print sizes in human readable format (1k 234M 2G, etc).
29 -H Similar, but use powers of 1000 not 1024.
30 -k Print sizes in kilobytes.
31 -m Print sizes in megabytes.
32 -b Print sizes in bytes.
33 -S Count the size of each directory separately, not including
34 the sizes of subdirectories.
35 -D Dereference only symbolic links given on the command line.
36 -L Dereference all symbolic links.
37 --exclude=PAT Exclude files that match PAT.
38 -X FILE Exclude files that match patterns taken from FILE.
40 By tege@sics.se, Torbjorn Granlund,
41 and djm@ai.mit.edu, David MacKenzie.
42 Variable blocks added by lm@sgi.com and eggert@twinsun.com.
45 #include <config.h>
46 #if HAVE_INTTYPES_H
47 # include <inttypes.h>
48 #endif
49 #include <stdio.h>
50 #include <getopt.h>
51 #include <sys/types.h>
52 #include <assert.h>
54 #include "system.h"
55 #include "error.h"
56 #include "exclude.h"
57 #include "human.h"
58 #include "quote.h"
59 #include "save-cwd.h"
60 #include "savedir.h"
61 #include "xstrtol.h"
63 /* The official name of this program (e.g., no `g' prefix). */
64 #define PROGRAM_NAME "du"
66 #define AUTHORS \
67 "Torbjorn Granlund, David MacKenzie, Larry McVoy, and Paul Eggert"
69 /* Initial number of entries in each hash table entry's table of inodes. */
70 #define INITIAL_HASH_MODULE 100
72 /* Initial number of entries in the inode hash table. */
73 #define INITIAL_ENTRY_TAB_SIZE 70
75 /* Initial size to allocate for `path'. */
76 #define INITIAL_PATH_SIZE 100
78 /* Hash structure for inode and device numbers. The separate entry
79 structure makes it easier to rehash "in place". */
81 struct entry
83 ino_t ino;
84 dev_t dev;
85 struct entry *coll_link;
88 /* Structure for a hash table for inode numbers. */
90 struct htab
92 unsigned modulus; /* Size of the `hash' pointer vector. */
93 struct entry *entry_tab; /* Pointer to dynamically growing vector. */
94 unsigned entry_tab_size; /* Size of current `entry_tab' allocation. */
95 unsigned first_free_entry; /* Index in `entry_tab'. */
96 struct entry *hash[1]; /* Vector of pointers in `entry_tab'. */
100 /* Structure for dynamically resizable strings. */
102 struct String
104 unsigned alloc; /* Size of allocation for the text. */
105 unsigned length; /* Length of the text currently. */
106 char *text; /* Pointer to the text. */
108 typedef struct String String;
110 int stat ();
111 int lstat ();
113 /* Name under which this program was invoked. */
114 char *program_name;
116 /* If nonzero, display counts for all files, not just directories. */
117 static int opt_all = 0;
119 /* If nonzero, count each hard link of files with multiple links. */
120 static int opt_count_all = 0;
122 /* If nonzero, do not cross file-system boundaries. */
123 static int opt_one_file_system = 0;
125 /* If nonzero, print a grand total at the end. */
126 static int print_totals = 0;
128 /* If nonzero, do not add sizes of subdirectories. */
129 static int opt_separate_dirs = 0;
131 /* If nonzero, dereference symlinks that are command line arguments. */
132 static int opt_dereference_arguments = 0;
134 /* Show the total for each directory (and file if --all) that is at
135 most MAX_DEPTH levels down from the root of the hierarchy. The root
136 is at level 0, so `du --max-depth=0' is equivalent to `du -s'. */
137 static int max_depth = INT_MAX;
139 /* If positive, the units to use when printing sizes;
140 if negative, the human-readable base. */
141 static int output_block_size;
143 /* Accumulated path for file or directory being processed. */
144 static String *path;
146 /* Pointer to hash structure, used by the hash routines. */
147 static struct htab *htab;
149 /* A pointer to either lstat or stat, depending on whether
150 dereferencing of all symbolic links is to be done. */
151 static int (*xstat) ();
153 /* The exit status to use if we don't get any fatal errors. */
154 static int exit_status;
156 /* File name patterns to exclude. */
157 static struct exclude *exclude;
159 /* Grand total size of all args, in units of ST_NBLOCKSIZE-byte blocks. */
160 static uintmax_t tot_size = 0;
162 /* For long options that have no equivalent short option, use a
163 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
164 enum
166 EXCLUDE_OPTION = CHAR_MAX + 1,
167 BLOCK_SIZE_OPTION,
168 MAX_DEPTH_OPTION
171 static struct option const long_options[] =
173 {"all", no_argument, NULL, 'a'},
174 {"block-size", required_argument, 0, BLOCK_SIZE_OPTION},
175 {"bytes", no_argument, NULL, 'b'},
176 {"count-links", no_argument, NULL, 'l'},
177 {"dereference", no_argument, NULL, 'L'},
178 {"dereference-args", no_argument, NULL, 'D'},
179 {"exclude", required_argument, 0, EXCLUDE_OPTION},
180 {"exclude-from", required_argument, 0, 'X'},
181 {"human-readable", no_argument, NULL, 'h'},
182 {"si", no_argument, 0, 'H'},
183 {"kilobytes", no_argument, NULL, 'k'},
184 {"max-depth", required_argument, NULL, MAX_DEPTH_OPTION},
185 {"megabytes", no_argument, NULL, 'm'},
186 {"one-file-system", no_argument, NULL, 'x'},
187 {"separate-dirs", no_argument, NULL, 'S'},
188 {"summarize", no_argument, NULL, 's'},
189 {"total", no_argument, NULL, 'c'},
190 {GETOPT_HELP_OPTION_DECL},
191 {GETOPT_VERSION_OPTION_DECL},
192 {NULL, 0, NULL, 0}
195 void
196 usage (int status)
198 if (status != 0)
199 fprintf (stderr, _("Try `%s --help' for more information.\n"),
200 program_name);
201 else
203 printf (_("Usage: %s [OPTION]... [FILE]...\n"), program_name);
204 printf (_("\
205 Summarize disk usage of each FILE, recursively for directories.\n\
207 -a, --all write counts for all files, not just directories\n\
208 --block-size=SIZE use SIZE-byte blocks\n\
209 -b, --bytes print size in bytes\n\
210 -c, --total produce a grand total\n\
211 -D, --dereference-args dereference PATHs when symbolic link\n\
212 -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\n\
213 -H, --si likewise, but use powers of 1000 not 1024\n\
214 -k, --kilobytes like --block-size=1024\n\
215 -l, --count-links count sizes many times if hard linked\n\
216 -L, --dereference dereference all symbolic links\n\
217 -m, --megabytes like --block-size=1048576\n\
218 -S, --separate-dirs do not include size of subdirectories\n\
219 -s, --summarize display only a total for each argument\n\
220 -x, --one-file-system skip directories on different filesystems\n\
221 -X FILE, --exclude-from=FILE Exclude files that match any pattern in FILE.\n\
222 --exclude=PAT Exclude files that match PAT.\n\
223 --max-depth=N print the total for a directory (or file, with --all)\n\
224 only if it is N or fewer levels below the command\n\
225 line argument; --max-depth=0 is the same as\n\
226 --summarize\n\
227 --help display this help and exit\n\
228 --version output version information and exit\n\
229 "));
230 puts (_("\nReport bugs to <bug-fileutils@gnu.org>."));
232 exit (status);
235 /* Initialize string S1 to hold SIZE characters. */
237 static void
238 str_init (String **s1, unsigned int size)
240 String *s;
242 s = (String *) xmalloc (sizeof (struct String));
243 s->text = xmalloc (size + 1);
245 s->alloc = size;
246 *s1 = s;
249 static void
250 ensure_space (String *s, unsigned int size)
252 if (s->alloc < size)
254 s->text = xrealloc (s->text, size + 1);
255 s->alloc = size;
259 /* Assign the null-terminated C-string CSTR to S1. */
261 static void
262 str_copyc (String *s1, const char *cstr)
264 unsigned l = strlen (cstr);
265 ensure_space (s1, l);
266 strcpy (s1->text, cstr);
267 s1->length = l;
270 static void
271 str_concatc (String *s1, const char *cstr)
273 unsigned l1 = s1->length;
274 unsigned l2 = strlen (cstr);
275 unsigned l = l1 + l2;
277 ensure_space (s1, l);
278 strcpy (s1->text + l1, cstr);
279 s1->length = l;
282 /* Truncate the string S1 to have length LENGTH. */
284 static void
285 str_trunc (String *s1, unsigned int length)
287 if (s1->length > length)
289 s1->text[length] = 0;
290 s1->length = length;
294 /* Print N_BLOCKS followed by STRING on a line. NBLOCKS is the number of
295 ST_NBLOCKSIZE-byte blocks; convert it to OUTPUT_BLOCK_SIZE units before
296 printing. If OUTPUT_BLOCK_SIZE is negative, use a human readable
297 notation instead. */
299 static void
300 print_size (uintmax_t n_blocks, const char *string)
302 char buf[LONGEST_HUMAN_READABLE + 1];
303 printf ("%s\t%s\n",
304 human_readable_inexact (n_blocks, buf, ST_NBLOCKSIZE,
305 output_block_size, human_ceiling),
306 string);
307 fflush (stdout);
310 /* Reset the hash structure in the global variable `htab' to
311 contain no entries. */
313 static void
314 hash_reset (void)
316 int i;
317 struct entry **p;
319 htab->first_free_entry = 0;
321 p = htab->hash;
322 for (i = htab->modulus; i > 0; i--)
323 *p++ = NULL;
326 /* Allocate space for the hash structures, and set the global
327 variable `htab' to point to it. The initial hash module is specified in
328 MODULUS, and the number of entries are specified in ENTRY_TAB_SIZE. (The
329 hash structure will be rebuilt when ENTRY_TAB_SIZE entries have been
330 inserted, and MODULUS and ENTRY_TAB_SIZE in the global `htab' will be
331 doubled.) */
333 static void
334 hash_init (unsigned int modulus, unsigned int entry_tab_size)
336 struct htab *htab_r;
338 htab_r = (struct htab *)
339 xmalloc (sizeof (struct htab) + sizeof (struct entry *) * modulus);
341 htab_r->entry_tab = (struct entry *)
342 xmalloc (sizeof (struct entry) * entry_tab_size);
344 htab_r->modulus = modulus;
345 htab_r->entry_tab_size = entry_tab_size;
346 htab = htab_r;
348 hash_reset ();
351 /* Insert INO and DEV in the hash structure HTAB, if not
352 already present. Return zero if inserted and nonzero if it
353 already existed. */
355 static int
356 hash_insert2 (struct htab *ht, ino_t ino, dev_t dev)
358 struct entry **hp, *ep2, *ep;
359 hp = &ht->hash[ino % ht->modulus];
360 ep2 = *hp;
362 /* Collision? */
364 if (ep2 != NULL)
366 ep = ep2;
368 /* Search for an entry with the same data. */
372 if (ep->ino == ino && ep->dev == dev)
373 return 1; /* Found an entry with the same data. */
374 ep = ep->coll_link;
376 while (ep != NULL);
378 /* Did not find it. */
382 ep = *hp = &ht->entry_tab[ht->first_free_entry++];
383 ep->ino = ino;
384 ep->dev = dev;
385 ep->coll_link = ep2; /* `ep2' is NULL if no collision. */
387 return 0;
390 /* Insert an item (inode INO and device DEV) in the hash
391 structure in the global variable `htab', if an entry with the same data
392 was not found already. Return zero if the item was inserted and nonzero
393 if it wasn't. */
395 static int
396 hash_insert (ino_t ino, dev_t dev)
398 struct htab *htab_r = htab; /* Initially a copy of the global `htab'. */
400 if (htab_r->first_free_entry >= htab_r->entry_tab_size)
402 int i;
403 struct entry *ep;
404 unsigned modulus;
405 unsigned entry_tab_size;
407 /* Increase the number of hash entries, and re-hash the data.
408 The method of shrimping and increasing is made to compactify
409 the heap. If twice as much data would be allocated
410 straightforwardly, we would never re-use a byte of memory. */
412 /* Let `htab' shrimp. Keep only the header, not the pointer vector. */
414 htab_r = (struct htab *)
415 xrealloc ((char *) htab_r, sizeof (struct htab));
417 modulus = 2 * htab_r->modulus;
418 entry_tab_size = 2 * htab_r->entry_tab_size;
420 /* Increase the number of possible entries. */
422 htab_r->entry_tab = (struct entry *)
423 xrealloc ((char *) htab_r->entry_tab,
424 sizeof (struct entry) * entry_tab_size);
426 /* Increase the size of htab again. */
428 htab_r = (struct htab *)
429 xrealloc ((char *) htab_r,
430 sizeof (struct htab) + sizeof (struct entry *) * modulus);
432 htab_r->modulus = modulus;
433 htab_r->entry_tab_size = entry_tab_size;
434 htab = htab_r;
436 i = htab_r->first_free_entry;
438 /* Make the increased hash table empty. The entries are still
439 available in htab->entry_tab. */
441 hash_reset ();
443 /* Go through the entries and install them in the pointer vector
444 htab->hash. The items are actually inserted in htab->entry_tab at
445 the position where they already are. The htab->coll_link need
446 however be updated. Could be made a little more efficient. */
448 for (ep = htab_r->entry_tab; i > 0; i--)
450 hash_insert2 (htab_r, ep->ino, ep->dev);
451 ep++;
455 return hash_insert2 (htab_r, ino, dev);
458 /* Restore the previous working directory or exit.
459 If CWD is null, simply call `chdir ("..")'. Otherwise,
460 use CWD and free it. CURR_DIR_NAME is the name of the current directory
461 and is used solely in failure diagnostics. */
463 static void
464 pop_dir (struct saved_cwd *cwd, const char *curr_dir_name)
466 if (cwd)
468 if (restore_cwd (cwd, "..", curr_dir_name))
469 exit (1);
470 free_cwd (cwd);
472 else if (chdir ("..") < 0)
474 error (1, errno, _("cannot change to `..' from directory %s"),
475 quote (curr_dir_name));
479 /* Print (if appropriate) the size (in units determined by `output_block_size')
480 of file or directory ENT. Return the size of ENT in units of 512-byte
481 blocks. TOP is one for external calls, zero for recursive calls.
482 LAST_DEV is the device that the parent directory of ENT is on.
483 DEPTH is the number of levels (in hierarchy) down from a command
484 line argument. Don't print if DEPTH > max_depth.
485 An important invariant is that when this function returns, the current
486 working directory is the same as when it was called. */
488 static uintmax_t
489 count_entry (const char *ent, int top, dev_t last_dev, int depth)
491 uintmax_t size;
492 struct stat stat_buf;
494 if (((top && opt_dereference_arguments)
495 ? stat (ent, &stat_buf)
496 : (*xstat) (ent, &stat_buf)) < 0)
498 error (0, errno, "%s", quote (path->text));
499 exit_status = 1;
500 return 0;
503 if (!opt_count_all
504 && stat_buf.st_nlink > 1
505 && hash_insert (stat_buf.st_ino, stat_buf.st_dev))
506 return 0; /* Have counted this already. */
508 size = ST_NBLOCKS (stat_buf);
509 tot_size += size;
511 if (S_ISDIR (stat_buf.st_mode))
513 unsigned pathlen;
514 dev_t dir_dev;
515 char *name_space;
516 char *namep;
517 struct saved_cwd *cwd;
518 struct saved_cwd cwd_buf;
519 struct stat e_buf;
521 dir_dev = stat_buf.st_dev;
523 /* Return `0' here, not SIZE, since the SIZE bytes
524 would reside in the new filesystem. */
525 if (opt_one_file_system && !top && last_dev != dir_dev)
526 return 0; /* Don't enter a new file system. */
528 #ifndef S_ISLNK
529 # define S_ISLNK(s) 0
530 #endif
531 /* If we're traversing more than one level, or if we're
532 dereferencing symlinks and we're about to chdir through a
533 symlink, remember the current directory so we can return to
534 it later. In other cases, chdir ("..") works fine.
535 Treat `.' and `..' like multi-level paths, since `chdir ("..")'
536 wont't restore the current working directory after a `chdir'
537 to one of those. */
538 if (strchr (ent, '/')
539 || DOT_OR_DOTDOT (ent)
540 || (xstat == stat
541 && lstat (ent, &e_buf) == 0
542 && S_ISLNK (e_buf.st_mode)))
544 if (save_cwd (&cwd_buf))
545 exit (1);
546 cwd = &cwd_buf;
548 else
549 cwd = NULL;
551 if (chdir (ent) < 0)
553 error (0, errno, _("cannot change to directory %s"),
554 quote (path->text));
555 if (cwd)
556 free_cwd (cwd);
557 exit_status = 1;
558 /* Do return SIZE, here, since even though we can't chdir into ENT,
559 we *can* count the blocks used by its directory entry. */
560 return opt_separate_dirs ? 0 : size;
563 name_space = savedir (".", stat_buf.st_size);
564 if (name_space == NULL)
566 error (0, errno, "%s", quote (path->text));
567 pop_dir (cwd, path->text);
568 exit_status = 1;
569 /* Do count the SIZE bytes. */
570 return opt_separate_dirs ? 0 : size;
573 /* Remember the current path. */
575 str_concatc (path, "/");
576 pathlen = path->length;
578 for (namep = name_space; *namep; namep += strlen (namep) + 1)
580 if (!excluded_filename (exclude, namep, 0))
582 str_concatc (path, namep);
583 size += count_entry (namep, 0, dir_dev, depth + 1);
584 str_trunc (path, pathlen);
588 free (name_space);
589 pop_dir (cwd, path->text);
591 str_trunc (path, pathlen - 1); /* Remove the "/" we added. */
592 if (depth <= max_depth || top)
593 print_size (size, path->length > 0 ? path->text : "/");
594 return opt_separate_dirs ? 0 : size;
596 else if ((opt_all && depth <= max_depth) || top)
598 /* FIXME: make this an option. */
599 int print_only_dir_size = 0;
600 if (!print_only_dir_size)
601 print_size (size, path->length > 0 ? path->text : "/");
604 return size;
607 /* Recursively print the sizes of the directories (and, if selected, files)
608 named in FILES, the last entry of which is NULL. */
610 static void
611 du_files (char **files)
613 int i; /* Index in FILES. */
615 for (i = 0; files[i]; i++)
617 char *arg;
618 int s;
620 arg = files[i];
622 /* Delete final slash in the argument, unless the slash is alone. */
623 s = strlen (arg) - 1;
624 if (s != 0)
626 if (arg[s] == '/')
627 arg[s] = 0;
629 str_copyc (path, arg);
631 else if (arg[0] == '/')
632 str_trunc (path, 0); /* Null path for root directory. */
633 else
634 str_copyc (path, arg);
636 if (!print_totals)
637 hash_reset ();
639 count_entry (arg, 1, 0, 0);
642 if (print_totals)
643 print_size (tot_size, _("total"));
647 main (int argc, char **argv)
649 int c;
650 char *cwd_only[2];
651 int max_depth_specified = 0;
653 /* If nonzero, display only a total for each argument. */
654 int opt_summarize_only = 0;
656 cwd_only[0] = ".";
657 cwd_only[1] = NULL;
659 program_name = argv[0];
660 setlocale (LC_ALL, "");
661 bindtextdomain (PACKAGE, LOCALEDIR);
662 textdomain (PACKAGE);
664 atexit (close_stdout);
666 exclude = new_exclude ();
667 xstat = lstat;
669 human_block_size (getenv ("DU_BLOCK_SIZE"), 0, &output_block_size);
671 while ((c = getopt_long (argc, argv, "abchHklmsxDLSX:", long_options, NULL))
672 != -1)
674 long int tmp_long;
675 switch (c)
677 case 0: /* Long option. */
678 break;
680 case 'a':
681 opt_all = 1;
682 break;
684 case 'b':
685 output_block_size = 1;
686 break;
688 case 'c':
689 print_totals = 1;
690 break;
692 case 'h':
693 output_block_size = -1024;
694 break;
696 case 'H':
697 output_block_size = -1000;
698 break;
700 case 'k':
701 output_block_size = 1024;
702 break;
704 case MAX_DEPTH_OPTION: /* --max-depth=N */
705 if (xstrtol (optarg, NULL, 0, &tmp_long, NULL) != LONGINT_OK
706 || tmp_long < 0 || tmp_long > INT_MAX)
707 error (1, 0, _("invalid maximum depth %s"), quote (optarg));
709 max_depth_specified = 1;
710 max_depth = (int) tmp_long;
711 break;
713 case 'm':
714 output_block_size = 1024 * 1024;
715 break;
717 case 'l':
718 opt_count_all = 1;
719 break;
721 case 's':
722 opt_summarize_only = 1;
723 break;
725 case 'x':
726 opt_one_file_system = 1;
727 break;
729 case 'D':
730 opt_dereference_arguments = 1;
731 break;
733 case 'L':
734 xstat = stat;
735 break;
737 case 'S':
738 opt_separate_dirs = 1;
739 break;
741 case 'X':
742 if (add_exclude_file (add_exclude, exclude, optarg, '\n') != 0)
743 error (1, errno, "%s", quote (optarg));
744 break;
746 case EXCLUDE_OPTION:
747 add_exclude (exclude, optarg);
748 break;
750 case BLOCK_SIZE_OPTION:
751 human_block_size (optarg, 1, &output_block_size);
752 break;
754 case_GETOPT_HELP_CHAR;
756 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
758 default:
759 usage (1);
763 if (opt_all && opt_summarize_only)
765 error (0, 0, _("cannot both summarize and show all entries"));
766 usage (1);
769 if (opt_summarize_only && max_depth_specified && max_depth == 0)
771 error (0, 0,
772 _("warning: summarizing is the same as using --max-depth=0"));
775 if (opt_summarize_only && max_depth_specified && max_depth != 0)
777 error (0, 0,
778 _("warning: summarizing conflicts with --max-depth=%d"),
779 max_depth);
780 usage (1);
783 if (opt_summarize_only)
784 max_depth = 0;
786 /* Initialize the hash structure for inode numbers. */
787 hash_init (INITIAL_HASH_MODULE, INITIAL_ENTRY_TAB_SIZE);
789 str_init (&path, INITIAL_PATH_SIZE);
791 du_files (optind == argc ? cwd_only : argv + optind);
793 exit (exit_status);