(main): Stat all non-`-' input file files (and fail if a
[coreutils.git] / src / du.c
blob54b321bba5e2745c3a086d9baafc52d5146d15a1
1 /* du -- summarize disk usage
2 Copyright (C) 88, 89, 90, 91, 95, 96, 97, 1998 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Differences from the Unix du:
19 * Doesn't simply ignore the names of regular files given as arguments
20 when -a is given.
21 * Additional options:
22 -l Count the size of all files, even if they have appeared
23 already in another hard link.
24 -x Do not cross file-system boundaries during the recursion.
25 -c Write a grand total of all of the arguments after all
26 arguments have been processed. This can be used to find
27 out the disk usage of a directory, with some files excluded.
28 -h Print sizes in human readable format (1k 234M 2G, etc).
29 -H Similar, but use powers of 1000 not 1024.
30 -k Print sizes in kilobytes.
31 -m Print sizes in megabytes.
32 -b Print sizes in bytes.
33 -S Count the size of each directory separately, not including
34 the sizes of subdirectories.
35 -D Dereference only symbolic links given on the command line.
36 -L Dereference all symbolic links.
37 --exclude=PAT Exclude files that match PAT.
38 -X FILE Exclude files that match patterns taken from FILE.
40 By tege@sics.se, Torbjorn Granlund,
41 and djm@ai.mit.edu, David MacKenzie.
42 Variable blocks added by lm@sgi.com and eggert@twinsun.com.
45 #include <config.h>
46 #if HAVE_INTTYPES_H
47 # include <inttypes.h>
48 #endif
49 #include <stdio.h>
50 #include <getopt.h>
51 #include <sys/types.h>
52 #include <assert.h>
54 #include "exclude.h"
55 #include "system.h"
56 #include "save-cwd.h"
57 #include "closeout.h"
58 #include "error.h"
59 #include "human.h"
60 #include "xstrtol.h"
61 #include "savedir.h"
63 /* Initial number of entries in each hash table entry's table of inodes. */
64 #define INITIAL_HASH_MODULE 100
66 /* Initial number of entries in the inode hash table. */
67 #define INITIAL_ENTRY_TAB_SIZE 70
69 /* Initial size to allocate for `path'. */
70 #define INITIAL_PATH_SIZE 100
72 /* Hash structure for inode and device numbers. The separate entry
73 structure makes it easier to rehash "in place". */
75 struct entry
77 ino_t ino;
78 dev_t dev;
79 struct entry *coll_link;
82 /* Structure for a hash table for inode numbers. */
84 struct htab
86 unsigned modulus; /* Size of the `hash' pointer vector. */
87 struct entry *entry_tab; /* Pointer to dynamically growing vector. */
88 unsigned entry_tab_size; /* Size of current `entry_tab' allocation. */
89 unsigned first_free_entry; /* Index in `entry_tab'. */
90 struct entry *hash[1]; /* Vector of pointers in `entry_tab'. */
94 /* Structure for dynamically resizable strings. */
96 struct String
98 unsigned alloc; /* Size of allocation for the text. */
99 unsigned length; /* Length of the text currently. */
100 char *text; /* Pointer to the text. */
102 typedef struct String String;
104 int stat ();
105 int lstat ();
107 static int hash_insert PARAMS ((ino_t ino, dev_t dev));
108 static int hash_insert2 PARAMS ((struct htab *_htab, ino_t ino, dev_t dev));
109 static uintmax_t count_entry PARAMS ((const char *ent, int top, dev_t last_dev,
110 int depth));
111 static void du_files PARAMS ((char **files));
112 static void hash_init PARAMS ((unsigned int modulus,
113 unsigned int entry_tab_size));
114 static void hash_reset PARAMS ((void));
115 static void str_concatc PARAMS ((String *s1, char *cstr));
116 static void str_copyc PARAMS ((String *s1, char *cstr));
117 static void str_init PARAMS ((String **s1, unsigned int size));
118 static void str_trunc PARAMS ((String *s1, unsigned int length));
120 /* Name under which this program was invoked. */
121 char *program_name;
123 /* If nonzero, display counts for all files, not just directories. */
124 static int opt_all = 0;
126 /* If nonzero, count each hard link of files with multiple links. */
127 static int opt_count_all = 0;
129 /* If nonzero, do not cross file-system boundaries. */
130 static int opt_one_file_system = 0;
132 /* If nonzero, print a grand total at the end. */
133 static int opt_combined_arguments = 0;
135 /* If nonzero, do not add sizes of subdirectories. */
136 static int opt_separate_dirs = 0;
138 /* If nonzero, dereference symlinks that are command line arguments. */
139 static int opt_dereference_arguments = 0;
141 /* Show the total for each directory (and file if --all) that is at
142 most MAX_DEPTH levels down from the root of the hierarchy. The root
143 is at level 0, so `du --max-depth=0' is equivalent to `du -s'. */
144 static int max_depth = INT_MAX;
146 /* If positive, the units to use when printing sizes;
147 if negative, the human-readable base. */
148 static int output_block_size;
150 /* Accumulated path for file or directory being processed. */
151 static String *path;
153 /* Pointer to hash structure, used by the hash routines. */
154 static struct htab *htab;
156 /* Globally used stat buffer. */
157 static struct stat stat_buf;
159 /* A pointer to either lstat or stat, depending on whether
160 dereferencing of all symbolic links is to be done. */
161 static int (*xstat) ();
163 /* The exit status to use if we don't get any fatal errors. */
164 static int exit_status;
166 /* If nonzero, display usage information and exit. */
167 static int show_help;
169 /* If nonzero, print the version on standard output and exit. */
170 static int show_version;
172 /* File name patterns to exclude. */
173 static struct exclude *exclude;
175 /* Grand total size of all args, in units of ST_NBLOCKSIZE-byte blocks. */
176 static uintmax_t tot_size = 0;
178 static struct option const long_options[] =
180 {"all", no_argument, &opt_all, 1},
181 {"block-size", required_argument, 0, 129},
182 {"bytes", no_argument, NULL, 'b'},
183 {"count-links", no_argument, &opt_count_all, 1},
184 {"dereference", no_argument, NULL, 'L'},
185 {"dereference-args", no_argument, &opt_dereference_arguments, 1},
186 {"exclude", required_argument, 0, 128},
187 {"exclude-from", required_argument, 0, 'X'},
188 {"human-readable", no_argument, NULL, 'h'},
189 {"si", no_argument, 0, 'H'},
190 {"kilobytes", no_argument, NULL, 'k'},
191 {"max-depth", required_argument, NULL, 13},
192 {"megabytes", no_argument, NULL, 'm'},
193 {"one-file-system", no_argument, &opt_one_file_system, 1},
194 {"separate-dirs", no_argument, &opt_separate_dirs, 1},
195 {"summarize", no_argument, NULL, 's'},
196 {"total", no_argument, &opt_combined_arguments, 1},
197 {"help", no_argument, &show_help, 1},
198 {"version", no_argument, &show_version, 1},
199 {NULL, 0, NULL, 0}
202 static void
203 usage (int status, char *reason)
205 if (reason != NULL)
206 fprintf (status == 0 ? stdout : stderr, "%s: %s\n",
207 program_name, reason);
209 if (status != 0)
210 fprintf (stderr, _("Try `%s --help' for more information.\n"),
211 program_name);
212 else
214 printf (_("Usage: %s [OPTION]... [FILE]...\n"), program_name);
215 printf (_("\
216 Summarize disk usage of each FILE, recursively for directories.\n\
218 -a, --all write counts for all files, not just directories\n\
219 --block-size=SIZE use SIZE-byte blocks\n\
220 -b, --bytes print size in bytes\n\
221 -c, --total produce a grand total\n\
222 -D, --dereference-args dereference PATHs when symbolic link\n\
223 -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\n\
224 -H, --si likewise, but use powers of 1000 not 1024\n\
225 -k, --kilobytes like --block-size=1024\n\
226 -l, --count-links count sizes many times if hard linked\n\
227 -L, --dereference dereference all symbolic links\n\
228 -m, --megabytes like --block-size=1048576\n\
229 -S, --separate-dirs do not include size of subdirectories\n\
230 -s, --summarize display only a total for each argument\n\
231 -x, --one-file-system skip directories on different filesystems\n\
232 -X FILE, --exclude-from=FILE Exclude files that match any pattern in FILE.\n\
233 --exclude=PAT Exclude files that match PAT.\n\
234 --max-depth=N print the total for a directory (or file, with --all)\n\
235 only if it is N or fewer levels below the command\n\
236 line argument; --max-depth=0 is the same as\n\
237 --summarize\n\
238 --help display this help and exit\n\
239 --version output version information and exit\n\
240 "));
241 puts (_("\nReport bugs to <fileutils-bugs@gnu.org>."));
242 close_stdout ();
244 exit (status);
248 main (int argc, char **argv)
250 int c;
251 char *cwd_only[2];
252 int max_depth_specified = 0;
254 /* If nonzero, display only a total for each argument. */
255 int opt_summarize_only = 0;
257 cwd_only[0] = ".";
258 cwd_only[1] = NULL;
260 program_name = argv[0];
261 setlocale (LC_ALL, "");
262 bindtextdomain (PACKAGE, LOCALEDIR);
263 textdomain (PACKAGE);
265 exclude = new_exclude ();
266 xstat = lstat;
268 human_block_size (getenv ("DU_BLOCK_SIZE"), 0, &output_block_size);
270 while ((c = getopt_long (argc, argv, "abchHklmsxDLSX:", long_options, NULL))
271 != -1)
273 long int tmp_long;
274 switch (c)
276 case 0: /* Long option. */
277 break;
279 case 'a':
280 opt_all = 1;
281 break;
283 case 'b':
284 output_block_size = 1;
285 break;
287 case 'c':
288 opt_combined_arguments = 1;
289 break;
291 case 'h':
292 output_block_size = -1024;
293 break;
295 case 'H':
296 output_block_size = -1000;
297 break;
299 case 'k':
300 output_block_size = 1024;
301 break;
303 case 13: /* --max-depth=N */
304 if (xstrtol (optarg, NULL, 0, &tmp_long, NULL) != LONGINT_OK
305 || tmp_long < 0 || tmp_long > INT_MAX)
306 error (1, 0, _("invalid maximum depth `%s'"), optarg);
308 max_depth_specified = 1;
309 max_depth = (int) tmp_long;
310 break;
312 case 'm':
313 output_block_size = 1024 * 1024;
314 break;
316 case 'l':
317 opt_count_all = 1;
318 break;
320 case 's':
321 opt_summarize_only = 1;
322 break;
324 case 'x':
325 opt_one_file_system = 1;
326 break;
328 case 'D':
329 opt_dereference_arguments = 1;
330 break;
332 case 'L':
333 xstat = stat;
334 break;
336 case 'S':
337 opt_separate_dirs = 1;
338 break;
340 case 'X':
341 if (add_exclude_file (exclude, optarg, '\n') != 0)
342 error (1, errno, "%s", optarg);
343 break;
345 case 128:
346 add_exclude (exclude, optarg);
347 break;
349 case 129:
350 human_block_size (optarg, 1, &output_block_size);
351 break;
353 default:
354 usage (1, (char *) 0);
358 if (show_version)
360 printf ("du (%s) %s\n", GNU_PACKAGE, VERSION);
361 close_stdout ();
362 exit (0);
365 if (show_help)
366 usage (0, NULL);
368 if (opt_all && opt_summarize_only)
369 usage (1, _("cannot both summarize and show all entries"));
371 if (opt_summarize_only && max_depth_specified && max_depth == 0)
373 error (0, 0,
374 _("warning: summarizing is the same as using --max-depth=0"));
377 if (opt_summarize_only && max_depth_specified && max_depth != 0)
379 error (0, 0,
380 _("warning: summarizing conflicts with --max-depth=%d"),
381 max_depth);
382 usage (1, NULL);
385 if (opt_summarize_only)
386 max_depth = 0;
388 /* Initialize the hash structure for inode numbers. */
389 hash_init (INITIAL_HASH_MODULE, INITIAL_ENTRY_TAB_SIZE);
391 str_init (&path, INITIAL_PATH_SIZE);
393 du_files (optind == argc ? cwd_only : argv + optind);
395 close_stdout ();
396 exit (exit_status);
399 /* Print N_BLOCKS followed by STRING on a line. NBLOCKS is the number of
400 ST_NBLOCKSIZE-byte blocks; convert it to OUTPUT_BLOCK_SIZE units before
401 printing. If OUTPUT_BLOCK_SIZE is negative, use a human readable
402 notation instead. */
404 static void
405 print_size (uintmax_t n_blocks, const char *string)
407 char buf[LONGEST_HUMAN_READABLE + 1];
408 printf ("%s\t%s\n",
409 human_readable (n_blocks, buf, ST_NBLOCKSIZE, output_block_size),
410 string);
411 fflush (stdout);
414 /* Recursively print the sizes of the directories (and, if selected, files)
415 named in FILES, the last entry of which is NULL. */
417 static void
418 du_files (char **files)
420 struct saved_cwd cwd;
421 ino_t initial_ino; /* Initial directory's inode. */
422 dev_t initial_dev; /* Initial directory's device. */
423 int i; /* Index in FILES. */
425 if (save_cwd (&cwd))
426 exit (1);
428 /* Remember the inode and device number of the current directory. */
429 if (stat (".", &stat_buf))
430 error (1, errno, _("current directory"));
431 initial_ino = stat_buf.st_ino;
432 initial_dev = stat_buf.st_dev;
434 for (i = 0; files[i]; i++)
436 char *arg;
437 int s;
439 arg = files[i];
441 /* Delete final slash in the argument, unless the slash is alone. */
442 s = strlen (arg) - 1;
443 if (s != 0)
445 if (arg[s] == '/')
446 arg[s] = 0;
448 str_copyc (path, arg);
450 else if (arg[0] == '/')
451 str_trunc (path, 0); /* Null path for root directory. */
452 else
453 str_copyc (path, arg);
455 if (!opt_combined_arguments)
456 hash_reset ();
458 count_entry (arg, 1, 0, 0);
460 /* chdir if `count_entry' has changed the working directory. */
461 if (stat (".", &stat_buf))
462 error (1, errno, ".");
463 if (stat_buf.st_ino != initial_ino || stat_buf.st_dev != initial_dev)
465 if (restore_cwd (&cwd, _("starting directory"), NULL))
466 exit (1);
470 if (opt_combined_arguments)
471 print_size (tot_size, _("total"));
473 free_cwd (&cwd);
476 /* Print (if appropriate) the size (in units determined by `output_block_size')
477 of file or directory ENT. Return the size of ENT in units of 512-byte
478 blocks. TOP is one for external calls, zero for recursive calls.
479 LAST_DEV is the device that the parent directory of ENT is on.
480 DEPTH is the number of levels (in hierarchy) down from a command
481 line argument. Don't print if DEPTH > max_depth. */
483 static uintmax_t
484 count_entry (const char *ent, int top, dev_t last_dev, int depth)
486 uintmax_t size;
488 if (((top && opt_dereference_arguments)
489 ? stat (ent, &stat_buf)
490 : (*xstat) (ent, &stat_buf)) < 0)
492 error (0, errno, "%s", path->text);
493 exit_status = 1;
494 return 0;
497 if (!opt_count_all
498 && stat_buf.st_nlink > 1
499 && hash_insert (stat_buf.st_ino, stat_buf.st_dev))
500 return 0; /* Have counted this already. */
502 size = ST_NBLOCKS (stat_buf);
503 tot_size += size;
505 if (S_ISDIR (stat_buf.st_mode))
507 unsigned pathlen;
508 dev_t dir_dev;
509 char *name_space;
510 char *namep;
511 struct saved_cwd cwd;
512 int through_symlink;
513 struct stat e_buf;
515 dir_dev = stat_buf.st_dev;
517 if (opt_one_file_system && !top && last_dev != dir_dev)
518 return 0; /* Don't enter a new file system. */
520 #ifndef S_ISLNK
521 # define S_ISLNK(s) 0
522 #endif
523 /* If we're dereferencing symlinks and we're about to chdir through
524 a symlink, remember the current directory so we can return to it
525 later. In other cases, chdir ("..") works fine. */
526 through_symlink = (xstat == stat
527 && lstat (ent, &e_buf) == 0
528 && S_ISLNK (e_buf.st_mode));
529 if (through_symlink && save_cwd (&cwd))
530 exit (1);
532 if (chdir (ent) < 0)
534 error (0, errno, _("cannot change to directory %s"), path->text);
535 exit_status = 1;
536 return 0;
539 errno = 0;
540 name_space = savedir (".", (unsigned int) stat_buf.st_size);
541 if (name_space == NULL)
543 if (errno)
545 error (0, errno, "%s", path->text);
546 if (through_symlink)
548 if (restore_cwd (&cwd, "..", path->text))
549 exit (1);
550 free_cwd (&cwd);
552 else if (chdir ("..") < 0)
553 error (1, errno, _("cannot change to `..' from directory %s"),
554 path->text);
555 exit_status = 1;
556 return 0;
558 else
559 error (1, 0, _("virtual memory exhausted"));
562 /* Remember the current path. */
564 str_concatc (path, "/");
565 pathlen = path->length;
567 for (namep = name_space; *namep; namep += strlen (namep) + 1)
569 if (!excluded_filename (exclude, namep))
571 str_concatc (path, namep);
572 size += count_entry (namep, 0, dir_dev, depth + 1);
573 str_trunc (path, pathlen);
577 free (name_space);
578 if (through_symlink)
580 restore_cwd (&cwd, "..", path->text);
581 free_cwd (&cwd);
583 else if (chdir ("..") < 0)
585 error (1, errno,
586 _("cannot change to `..' from directory %s"), path->text);
589 str_trunc (path, pathlen - 1); /* Remove the "/" we added. */
590 if (depth <= max_depth || top)
591 print_size (size, path->length > 0 ? path->text : "/");
592 return opt_separate_dirs ? 0 : size;
594 else if ((opt_all && depth <= max_depth) || top)
596 /* FIXME: make this an option. */
597 int print_only_dir_size = 0;
598 if (!print_only_dir_size)
599 print_size (size, path->length > 0 ? path->text : "/");
602 return size;
605 /* Allocate space for the hash structures, and set the global
606 variable `htab' to point to it. The initial hash module is specified in
607 MODULUS, and the number of entries are specified in ENTRY_TAB_SIZE. (The
608 hash structure will be rebuilt when ENTRY_TAB_SIZE entries have been
609 inserted, and MODULUS and ENTRY_TAB_SIZE in the global `htab' will be
610 doubled.) */
612 static void
613 hash_init (unsigned int modulus, unsigned int entry_tab_size)
615 struct htab *htab_r;
617 htab_r = (struct htab *)
618 xmalloc (sizeof (struct htab) + sizeof (struct entry *) * modulus);
620 htab_r->entry_tab = (struct entry *)
621 xmalloc (sizeof (struct entry) * entry_tab_size);
623 htab_r->modulus = modulus;
624 htab_r->entry_tab_size = entry_tab_size;
625 htab = htab_r;
627 hash_reset ();
630 /* Reset the hash structure in the global variable `htab' to
631 contain no entries. */
633 static void
634 hash_reset (void)
636 int i;
637 struct entry **p;
639 htab->first_free_entry = 0;
641 p = htab->hash;
642 for (i = htab->modulus; i > 0; i--)
643 *p++ = NULL;
646 /* Insert an item (inode INO and device DEV) in the hash
647 structure in the global variable `htab', if an entry with the same data
648 was not found already. Return zero if the item was inserted and nonzero
649 if it wasn't. */
651 static int
652 hash_insert (ino_t ino, dev_t dev)
654 struct htab *htab_r = htab; /* Initially a copy of the global `htab'. */
656 if (htab_r->first_free_entry >= htab_r->entry_tab_size)
658 int i;
659 struct entry *ep;
660 unsigned modulus;
661 unsigned entry_tab_size;
663 /* Increase the number of hash entries, and re-hash the data.
664 The method of shrimping and increasing is made to compactify
665 the heap. If twice as much data would be allocated
666 straightforwardly, we would never re-use a byte of memory. */
668 /* Let `htab' shrimp. Keep only the header, not the pointer vector. */
670 htab_r = (struct htab *)
671 xrealloc ((char *) htab_r, sizeof (struct htab));
673 modulus = 2 * htab_r->modulus;
674 entry_tab_size = 2 * htab_r->entry_tab_size;
676 /* Increase the number of possible entries. */
678 htab_r->entry_tab = (struct entry *)
679 xrealloc ((char *) htab_r->entry_tab,
680 sizeof (struct entry) * entry_tab_size);
682 /* Increase the size of htab again. */
684 htab_r = (struct htab *)
685 xrealloc ((char *) htab_r,
686 sizeof (struct htab) + sizeof (struct entry *) * modulus);
688 htab_r->modulus = modulus;
689 htab_r->entry_tab_size = entry_tab_size;
690 htab = htab_r;
692 i = htab_r->first_free_entry;
694 /* Make the increased hash table empty. The entries are still
695 available in htab->entry_tab. */
697 hash_reset ();
699 /* Go through the entries and install them in the pointer vector
700 htab->hash. The items are actually inserted in htab->entry_tab at
701 the position where they already are. The htab->coll_link need
702 however be updated. Could be made a little more efficient. */
704 for (ep = htab_r->entry_tab; i > 0; i--)
706 hash_insert2 (htab_r, ep->ino, ep->dev);
707 ep++;
711 return hash_insert2 (htab_r, ino, dev);
714 /* Insert INO and DEV in the hash structure HTAB, if not
715 already present. Return zero if inserted and nonzero if it
716 already existed. */
718 static int
719 hash_insert2 (struct htab *ht, ino_t ino, dev_t dev)
721 struct entry **hp, *ep2, *ep;
722 hp = &ht->hash[ino % ht->modulus];
723 ep2 = *hp;
725 /* Collision? */
727 if (ep2 != NULL)
729 ep = ep2;
731 /* Search for an entry with the same data. */
735 if (ep->ino == ino && ep->dev == dev)
736 return 1; /* Found an entry with the same data. */
737 ep = ep->coll_link;
739 while (ep != NULL);
741 /* Did not find it. */
745 ep = *hp = &ht->entry_tab[ht->first_free_entry++];
746 ep->ino = ino;
747 ep->dev = dev;
748 ep->coll_link = ep2; /* `ep2' is NULL if no collision. */
750 return 0;
753 /* Initialize string S1 to hold SIZE characters. */
755 static void
756 str_init (String **s1, unsigned int size)
758 String *s;
760 s = (String *) xmalloc (sizeof (struct String));
761 s->text = xmalloc (size + 1);
763 s->alloc = size;
764 *s1 = s;
767 static void
768 ensure_space (String *s, unsigned int size)
770 if (s->alloc < size)
772 s->text = xrealloc (s->text, size + 1);
773 s->alloc = size;
777 /* Assign the null-terminated C-string CSTR to S1. */
779 static void
780 str_copyc (String *s1, char *cstr)
782 unsigned l = strlen (cstr);
783 ensure_space (s1, l);
784 strcpy (s1->text, cstr);
785 s1->length = l;
788 static void
789 str_concatc (String *s1, char *cstr)
791 unsigned l1 = s1->length;
792 unsigned l2 = strlen (cstr);
793 unsigned l = l1 + l2;
795 ensure_space (s1, l);
796 strcpy (s1->text + l1, cstr);
797 s1->length = l;
800 /* Truncate the string S1 to have length LENGTH. */
802 static void
803 str_trunc (String *s1, unsigned int length)
805 if (s1->length > length)
807 s1->text[length] = 0;
808 s1->length = length;