.
[coreutils.git] / src / du.c
blob165d0dfd82ab3af344d8e5cbc35072345a1e02db
1 /* du -- summarize disk usage
2 Copyright (C) 88, 89, 90, 91, 95, 96, 97, 1998 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Differences from the Unix du:
19 * Doesn't simply ignore the names of regular files given as arguments
20 when -a is given.
21 * Additional options:
22 -l Count the size of all files, even if they have appeared
23 already in another hard link.
24 -x Do not cross file-system boundaries during the recursion.
25 -c Write a grand total of all of the arguments after all
26 arguments have been processed. This can be used to find
27 out the disk usage of a directory, with some files excluded.
28 -h Print sizes in human readable format (1k 234M 2G, etc).
29 -k Print sizes in kilobytes instead of 512 byte blocks
30 (the default required by POSIX).
31 -m Print sizes in megabytes instead of 512 byte blocks
32 -b Print sizes in bytes.
33 -S Count the size of each directory separately, not including
34 the sizes of subdirectories.
35 -D Dereference only symbolic links given on the command line.
36 -L Dereference all symbolic links.
37 --exclude=PAT Exclude files that match PAT.
38 -X FILE Exclude files that match patterns taken from FILE.
40 By tege@sics.se, Torbjorn Granlund,
41 and djm@ai.mit.edu, David MacKenzie.
42 Variable blocks added by lm@sgi.com and eggert@twinsun.com.
45 #include <config.h>
46 #if HAVE_INTTYPES_H
47 # include <inttypes.h>
48 #endif
49 #include <stdio.h>
50 #include <getopt.h>
51 #include <sys/types.h>
52 #include <assert.h>
54 #include "exclude.h"
55 #include "system.h"
56 #include "save-cwd.h"
57 #include "closeout.h"
58 #include "error.h"
59 #include "human.h"
60 #include "xstrtol.h"
61 #include "savedir.h"
63 /* Initial number of entries in each hash table entry's table of inodes. */
64 #define INITIAL_HASH_MODULE 100
66 /* Initial number of entries in the inode hash table. */
67 #define INITIAL_ENTRY_TAB_SIZE 70
69 /* Initial size to allocate for `path'. */
70 #define INITIAL_PATH_SIZE 100
72 /* Hash structure for inode and device numbers. The separate entry
73 structure makes it easier to rehash "in place". */
75 struct entry
77 ino_t ino;
78 dev_t dev;
79 struct entry *coll_link;
82 /* Structure for a hash table for inode numbers. */
84 struct htab
86 unsigned modulus; /* Size of the `hash' pointer vector. */
87 struct entry *entry_tab; /* Pointer to dynamically growing vector. */
88 unsigned entry_tab_size; /* Size of current `entry_tab' allocation. */
89 unsigned first_free_entry; /* Index in `entry_tab'. */
90 struct entry *hash[1]; /* Vector of pointers in `entry_tab'. */
94 /* Structure for dynamically resizable strings. */
96 struct String
98 unsigned alloc; /* Size of allocation for the text. */
99 unsigned length; /* Length of the text currently. */
100 char *text; /* Pointer to the text. */
102 typedef struct String String;
104 int stat ();
105 int lstat ();
107 static int hash_insert PARAMS ((ino_t ino, dev_t dev));
108 static int hash_insert2 PARAMS ((struct htab *_htab, ino_t ino, dev_t dev));
109 static uintmax_t count_entry PARAMS ((const char *ent, int top, dev_t last_dev,
110 int depth));
111 static void du_files PARAMS ((char **files));
112 static void hash_init PARAMS ((unsigned int modulus,
113 unsigned int entry_tab_size));
114 static void hash_reset PARAMS ((void));
115 static void str_concatc PARAMS ((String *s1, char *cstr));
116 static void str_copyc PARAMS ((String *s1, char *cstr));
117 static void str_init PARAMS ((String **s1, unsigned int size));
118 static void str_trunc PARAMS ((String *s1, unsigned int length));
120 /* Name under which this program was invoked. */
121 char *program_name;
123 /* If nonzero, display counts for all files, not just directories. */
124 static int opt_all = 0;
126 /* If nonzero, count each hard link of files with multiple links. */
127 static int opt_count_all = 0;
129 /* If nonzero, do not cross file-system boundaries. */
130 static int opt_one_file_system = 0;
132 /* If nonzero, print a grand total at the end. */
133 static int opt_combined_arguments = 0;
135 /* If nonzero, do not add sizes of subdirectories. */
136 static int opt_separate_dirs = 0;
138 /* If nonzero, dereference symlinks that are command line arguments. */
139 static int opt_dereference_arguments = 0;
141 /* Show the total for each directory (and file if --all) that is at
142 most MAX_DEPTH levels down from the root of the hierarchy. The root
143 is at level 0, so `du --max-depth=0' is equivalent to `du -s'. */
144 static int max_depth = INT_MAX;
146 /* base used for human style output */
147 static int human_readable_base;
149 /* The units to count in. */
150 static int output_units;
152 /* Accumulated path for file or directory being processed. */
153 static String *path;
155 /* Pointer to hash structure, used by the hash routines. */
156 static struct htab *htab;
158 /* Globally used stat buffer. */
159 static struct stat stat_buf;
161 /* A pointer to either lstat or stat, depending on whether
162 dereferencing of all symbolic links is to be done. */
163 static int (*xstat) ();
165 /* The exit status to use if we don't get any fatal errors. */
166 static int exit_status;
168 /* If nonzero, display usage information and exit. */
169 static int show_help;
171 /* If nonzero, print the version on standard output and exit. */
172 static int show_version;
174 /* File name patterns to exclude. */
175 static struct exclude *exclude;
177 /* Grand total size of all args, in units of ST_NBLOCKSIZE-byte blocks. */
178 static uintmax_t tot_size = 0;
180 static struct option const long_options[] =
182 {"all", no_argument, &opt_all, 1},
183 {"bytes", no_argument, NULL, 'b'},
184 {"count-links", no_argument, &opt_count_all, 1},
185 {"dereference", no_argument, NULL, 'L'},
186 {"dereference-args", no_argument, &opt_dereference_arguments, 1},
187 {"exclude", required_argument, 0, 128},
188 {"exclude-from", required_argument, 0, 'X'},
189 {"human-readable", no_argument, NULL, 'h'},
190 {"si", no_argument, 0, 'H'},
191 {"kilobytes", no_argument, NULL, 'k'},
192 {"max-depth", required_argument, NULL, 13},
193 {"megabytes", no_argument, NULL, 'm'},
194 {"one-file-system", no_argument, &opt_one_file_system, 1},
195 {"separate-dirs", no_argument, &opt_separate_dirs, 1},
196 {"summarize", no_argument, NULL, 's'},
197 {"total", no_argument, &opt_combined_arguments, 1},
198 {"help", no_argument, &show_help, 1},
199 {"version", no_argument, &show_version, 1},
200 {NULL, 0, NULL, 0}
203 static void
204 usage (int status, char *reason)
206 if (reason != NULL)
207 fprintf (status == 0 ? stdout : stderr, "%s: %s\n",
208 program_name, reason);
210 if (status != 0)
211 fprintf (stderr, _("Try `%s --help' for more information.\n"),
212 program_name);
213 else
215 printf (_("Usage: %s [OPTION]... [FILE]...\n"), program_name);
216 printf (_("\
217 Summarize disk usage of each FILE, recursively for directories.\n\
219 -a, --all write counts for all files, not just directories\n\
220 -b, --bytes print size in bytes\n\
221 -c, --total produce a grand total\n\
222 -D, --dereference-args dereference PATHs when symbolic link\n\
223 -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\n\
224 -H, --si likewise, but use powers of 1000 not 1024\n\
225 -k, --kilobytes use 1024-byte blocks\n\
226 -l, --count-links count sizes many times if hard linked\n\
227 -L, --dereference dereference all symbolic links\n\
228 -m, --megabytes use 1048576-byte blocks\n\
229 -S, --separate-dirs do not include size of subdirectories\n\
230 -s, --summarize display only a total for each argument\n\
231 -x, --one-file-system skip directories on different filesystems\n\
232 -X FILE, --exclude-from=FILE Exclude files that match any pattern in FILE.\n\
233 --exclude=PAT Exclude files that match PAT.\n\
234 --max-depth=N print the total for a directory (or file, with --all)\n\
235 only if it is N or fewer levels below the command\n\
236 line argument; --max-depth=0 is the same as\n\
237 --summarize\n\
238 --help display this help and exit\n\
239 --version output version information and exit\n\
240 "));
241 puts (_("\nReport bugs to <fileutils-bugs@gnu.org>."));
242 close_stdout ();
244 exit (status);
248 main (int argc, char **argv)
250 int c;
251 char *cwd_only[2];
252 char *bs;
253 int max_depth_specified = 0;
255 /* If nonzero, display only a total for each argument. */
256 int opt_summarize_only = 0;
258 cwd_only[0] = ".";
259 cwd_only[1] = NULL;
261 program_name = argv[0];
262 setlocale (LC_ALL, "");
263 bindtextdomain (PACKAGE, LOCALEDIR);
264 textdomain (PACKAGE);
266 exclude = new_exclude ();
267 xstat = lstat;
269 if (getenv ("POSIXLY_CORRECT"))
270 output_units = 512;
271 else if ((bs = getenv ("BLOCKSIZE"))
272 && strncmp (bs, "HUMAN", sizeof ("HUMAN") - 1) == 0)
274 human_readable_base = 1024;
275 output_units = 1;
277 else if (bs && strcmp (bs, "SI") == 0)
279 human_readable_base = 1000;
280 output_units = 1;
282 else
283 output_units = 1024;
285 while ((c = getopt_long (argc, argv, "abchHklmsxDLSX:", long_options, NULL))
286 != -1)
288 long int tmp_long;
289 switch (c)
291 case 0: /* Long option. */
292 break;
294 case 'a':
295 opt_all = 1;
296 break;
298 case 'b':
299 human_readable_base = 0;
300 output_units = 1;
301 break;
303 case 'c':
304 opt_combined_arguments = 1;
305 break;
307 case 'h':
308 human_readable_base = 1024;
309 output_units = 1;
310 break;
312 case 'H':
313 human_readable_base = 1000;
314 output_units = 1;
315 break;
317 case 'k':
318 human_readable_base = 0;
319 output_units = 1024;
320 break;
322 case 13: /* --max-depth=N */
323 if (xstrtol (optarg, NULL, 0, &tmp_long, NULL) != LONGINT_OK
324 || tmp_long < 0 || tmp_long > INT_MAX)
325 error (1, 0, _("invalid maximum depth `%s'"), optarg);
327 max_depth_specified = 1;
328 max_depth = (int) tmp_long;
329 break;
331 case 'm':
332 human_readable_base = 0;
333 output_units = 1024 * 1024;
334 break;
336 case 'l':
337 opt_count_all = 1;
338 break;
340 case 's':
341 opt_summarize_only = 1;
342 break;
344 case 'x':
345 opt_one_file_system = 1;
346 break;
348 case 'D':
349 opt_dereference_arguments = 1;
350 break;
352 case 'L':
353 xstat = stat;
354 break;
356 case 'S':
357 opt_separate_dirs = 1;
358 break;
360 case 'X':
361 if (add_exclude_file (exclude, optarg, '\n') != 0)
362 error (1, errno, "%s", optarg);
363 break;
365 case 128:
366 add_exclude (exclude, optarg);
367 break;
369 default:
370 usage (1, (char *) 0);
374 if (show_version)
376 printf ("du (%s) %s\n", GNU_PACKAGE, VERSION);
377 close_stdout ();
378 exit (0);
381 if (show_help)
382 usage (0, NULL);
384 if (opt_all && opt_summarize_only)
385 usage (1, _("cannot both summarize and show all entries"));
387 if (opt_summarize_only && max_depth_specified && max_depth == 0)
389 error (0, 0,
390 _("warning: summarizing is the same as using --max-depth=0"));
393 if (opt_summarize_only && max_depth_specified && max_depth != 0)
395 error (0, 0,
396 _("warning: summarizing conflicts with --max-depth=%d"),
397 max_depth);
398 usage (1, NULL);
401 if (opt_summarize_only)
402 max_depth = 0;
404 /* Initialize the hash structure for inode numbers. */
405 hash_init (INITIAL_HASH_MODULE, INITIAL_ENTRY_TAB_SIZE);
407 str_init (&path, INITIAL_PATH_SIZE);
409 du_files (optind == argc ? cwd_only : argv + optind);
411 close_stdout ();
412 exit (exit_status);
415 /* Print N_BLOCKS followed by STRING on a line. NBLOCKS is the number of
416 ST_NBLOCKSIZE-byte blocks; convert it to OUTPUT_UNITS units before
417 printing. If HUMAN_READABLE_BASE is nonzero, use a human readable
418 notation instead. */
420 static void
421 print_size (uintmax_t n_blocks, const char *string)
423 char buf[LONGEST_HUMAN_READABLE + 1];
424 printf ("%s\t%s\n",
425 human_readable (n_blocks, buf, ST_NBLOCKSIZE, output_units,
426 human_readable_base),
427 string);
428 fflush (stdout);
431 /* Recursively print the sizes of the directories (and, if selected, files)
432 named in FILES, the last entry of which is NULL. */
434 static void
435 du_files (char **files)
437 struct saved_cwd cwd;
438 ino_t initial_ino; /* Initial directory's inode. */
439 dev_t initial_dev; /* Initial directory's device. */
440 int i; /* Index in FILES. */
442 if (save_cwd (&cwd))
443 exit (1);
445 /* Remember the inode and device number of the current directory. */
446 if (stat (".", &stat_buf))
447 error (1, errno, _("current directory"));
448 initial_ino = stat_buf.st_ino;
449 initial_dev = stat_buf.st_dev;
451 for (i = 0; files[i]; i++)
453 char *arg;
454 int s;
456 arg = files[i];
458 /* Delete final slash in the argument, unless the slash is alone. */
459 s = strlen (arg) - 1;
460 if (s != 0)
462 if (arg[s] == '/')
463 arg[s] = 0;
465 str_copyc (path, arg);
467 else if (arg[0] == '/')
468 str_trunc (path, 0); /* Null path for root directory. */
469 else
470 str_copyc (path, arg);
472 if (!opt_combined_arguments)
473 hash_reset ();
475 count_entry (arg, 1, 0, 0);
477 /* chdir if `count_entry' has changed the working directory. */
478 if (stat (".", &stat_buf))
479 error (1, errno, ".");
480 if (stat_buf.st_ino != initial_ino || stat_buf.st_dev != initial_dev)
482 if (restore_cwd (&cwd, _("starting directory"), NULL))
483 exit (1);
487 if (opt_combined_arguments)
488 print_size (tot_size, _("total"));
490 free_cwd (&cwd);
493 /* Print (if appropriate) the size (in units determined by `output_units')
494 of file or directory ENT. Return the size of ENT in units of 512-byte
495 blocks. TOP is one for external calls, zero for recursive calls.
496 LAST_DEV is the device that the parent directory of ENT is on.
497 DEPTH is the number of levels (in hierarchy) down from a command
498 line argument. Don't print if DEPTH > max_depth. */
500 static uintmax_t
501 count_entry (const char *ent, int top, dev_t last_dev, int depth)
503 uintmax_t size;
505 if (((top && opt_dereference_arguments)
506 ? stat (ent, &stat_buf)
507 : (*xstat) (ent, &stat_buf)) < 0)
509 error (0, errno, "%s", path->text);
510 exit_status = 1;
511 return 0;
514 if (!opt_count_all
515 && stat_buf.st_nlink > 1
516 && hash_insert (stat_buf.st_ino, stat_buf.st_dev))
517 return 0; /* Have counted this already. */
519 size = ST_NBLOCKS (stat_buf);
520 tot_size += size;
522 if (S_ISDIR (stat_buf.st_mode))
524 unsigned pathlen;
525 dev_t dir_dev;
526 char *name_space;
527 char *namep;
528 struct saved_cwd cwd;
529 int through_symlink;
530 struct stat e_buf;
532 dir_dev = stat_buf.st_dev;
534 if (opt_one_file_system && !top && last_dev != dir_dev)
535 return 0; /* Don't enter a new file system. */
537 #ifndef S_ISLNK
538 # define S_ISLNK(s) 0
539 #endif
540 /* If we're dereferencing symlinks and we're about to chdir through
541 a symlink, remember the current directory so we can return to it
542 later. In other cases, chdir ("..") works fine. */
543 through_symlink = (xstat == stat
544 && lstat (ent, &e_buf) == 0
545 && S_ISLNK (e_buf.st_mode));
546 if (through_symlink && save_cwd (&cwd))
547 exit (1);
549 if (chdir (ent) < 0)
551 error (0, errno, _("cannot change to directory %s"), path->text);
552 exit_status = 1;
553 return 0;
556 errno = 0;
557 name_space = savedir (".", (unsigned int) stat_buf.st_size);
558 if (name_space == NULL)
560 if (errno)
562 error (0, errno, "%s", path->text);
563 if (through_symlink)
565 if (restore_cwd (&cwd, "..", path->text))
566 exit (1);
567 free_cwd (&cwd);
569 else if (chdir ("..") < 0)
570 error (1, errno, _("cannot change to `..' from directory %s"),
571 path->text);
572 exit_status = 1;
573 return 0;
575 else
576 error (1, 0, _("virtual memory exhausted"));
579 /* Remember the current path. */
581 str_concatc (path, "/");
582 pathlen = path->length;
584 for (namep = name_space; *namep; namep += strlen (namep) + 1)
586 if (!excluded_filename (exclude, namep))
588 str_concatc (path, namep);
589 size += count_entry (namep, 0, dir_dev, depth + 1);
590 str_trunc (path, pathlen);
594 free (name_space);
595 if (through_symlink)
597 restore_cwd (&cwd, "..", path->text);
598 free_cwd (&cwd);
600 else if (chdir ("..") < 0)
602 error (1, errno,
603 _("cannot change to `..' from directory %s"), path->text);
606 str_trunc (path, pathlen - 1); /* Remove the "/" we added. */
607 if (depth <= max_depth || top)
608 print_size (size, path->length > 0 ? path->text : "/");
609 return opt_separate_dirs ? 0 : size;
611 else if ((opt_all && depth <= max_depth) || top)
613 /* FIXME: make this an option. */
614 int print_only_dir_size = 0;
615 if (!print_only_dir_size)
616 print_size (size, path->length > 0 ? path->text : "/");
619 return size;
622 /* Allocate space for the hash structures, and set the global
623 variable `htab' to point to it. The initial hash module is specified in
624 MODULUS, and the number of entries are specified in ENTRY_TAB_SIZE. (The
625 hash structure will be rebuilt when ENTRY_TAB_SIZE entries have been
626 inserted, and MODULUS and ENTRY_TAB_SIZE in the global `htab' will be
627 doubled.) */
629 static void
630 hash_init (unsigned int modulus, unsigned int entry_tab_size)
632 struct htab *htab_r;
634 htab_r = (struct htab *)
635 xmalloc (sizeof (struct htab) + sizeof (struct entry *) * modulus);
637 htab_r->entry_tab = (struct entry *)
638 xmalloc (sizeof (struct entry) * entry_tab_size);
640 htab_r->modulus = modulus;
641 htab_r->entry_tab_size = entry_tab_size;
642 htab = htab_r;
644 hash_reset ();
647 /* Reset the hash structure in the global variable `htab' to
648 contain no entries. */
650 static void
651 hash_reset (void)
653 int i;
654 struct entry **p;
656 htab->first_free_entry = 0;
658 p = htab->hash;
659 for (i = htab->modulus; i > 0; i--)
660 *p++ = NULL;
663 /* Insert an item (inode INO and device DEV) in the hash
664 structure in the global variable `htab', if an entry with the same data
665 was not found already. Return zero if the item was inserted and nonzero
666 if it wasn't. */
668 static int
669 hash_insert (ino_t ino, dev_t dev)
671 struct htab *htab_r = htab; /* Initially a copy of the global `htab'. */
673 if (htab_r->first_free_entry >= htab_r->entry_tab_size)
675 int i;
676 struct entry *ep;
677 unsigned modulus;
678 unsigned entry_tab_size;
680 /* Increase the number of hash entries, and re-hash the data.
681 The method of shrimping and increasing is made to compactify
682 the heap. If twice as much data would be allocated
683 straightforwardly, we would never re-use a byte of memory. */
685 /* Let `htab' shrimp. Keep only the header, not the pointer vector. */
687 htab_r = (struct htab *)
688 xrealloc ((char *) htab_r, sizeof (struct htab));
690 modulus = 2 * htab_r->modulus;
691 entry_tab_size = 2 * htab_r->entry_tab_size;
693 /* Increase the number of possible entries. */
695 htab_r->entry_tab = (struct entry *)
696 xrealloc ((char *) htab_r->entry_tab,
697 sizeof (struct entry) * entry_tab_size);
699 /* Increase the size of htab again. */
701 htab_r = (struct htab *)
702 xrealloc ((char *) htab_r,
703 sizeof (struct htab) + sizeof (struct entry *) * modulus);
705 htab_r->modulus = modulus;
706 htab_r->entry_tab_size = entry_tab_size;
707 htab = htab_r;
709 i = htab_r->first_free_entry;
711 /* Make the increased hash table empty. The entries are still
712 available in htab->entry_tab. */
714 hash_reset ();
716 /* Go through the entries and install them in the pointer vector
717 htab->hash. The items are actually inserted in htab->entry_tab at
718 the position where they already are. The htab->coll_link need
719 however be updated. Could be made a little more efficient. */
721 for (ep = htab_r->entry_tab; i > 0; i--)
723 hash_insert2 (htab_r, ep->ino, ep->dev);
724 ep++;
728 return hash_insert2 (htab_r, ino, dev);
731 /* Insert INO and DEV in the hash structure HTAB, if not
732 already present. Return zero if inserted and nonzero if it
733 already existed. */
735 static int
736 hash_insert2 (struct htab *ht, ino_t ino, dev_t dev)
738 struct entry **hp, *ep2, *ep;
739 hp = &ht->hash[ino % ht->modulus];
740 ep2 = *hp;
742 /* Collision? */
744 if (ep2 != NULL)
746 ep = ep2;
748 /* Search for an entry with the same data. */
752 if (ep->ino == ino && ep->dev == dev)
753 return 1; /* Found an entry with the same data. */
754 ep = ep->coll_link;
756 while (ep != NULL);
758 /* Did not find it. */
762 ep = *hp = &ht->entry_tab[ht->first_free_entry++];
763 ep->ino = ino;
764 ep->dev = dev;
765 ep->coll_link = ep2; /* `ep2' is NULL if no collision. */
767 return 0;
770 /* Initialize string S1 to hold SIZE characters. */
772 static void
773 str_init (String **s1, unsigned int size)
775 String *s;
777 s = (String *) xmalloc (sizeof (struct String));
778 s->text = xmalloc (size + 1);
780 s->alloc = size;
781 *s1 = s;
784 static void
785 ensure_space (String *s, unsigned int size)
787 if (s->alloc < size)
789 s->text = xrealloc (s->text, size + 1);
790 s->alloc = size;
794 /* Assign the null-terminated C-string CSTR to S1. */
796 static void
797 str_copyc (String *s1, char *cstr)
799 unsigned l = strlen (cstr);
800 ensure_space (s1, l);
801 strcpy (s1->text, cstr);
802 s1->length = l;
805 static void
806 str_concatc (String *s1, char *cstr)
808 unsigned l1 = s1->length;
809 unsigned l2 = strlen (cstr);
810 unsigned l = l1 + l2;
812 ensure_space (s1, l);
813 strcpy (s1->text + l1, cstr);
814 s1->length = l;
817 /* Truncate the string S1 to have length LENGTH. */
819 static void
820 str_trunc (String *s1, unsigned int length)
822 if (s1->length > length)
824 s1->text[length] = 0;
825 s1->length = length;