*** empty log message ***
[coreutils.git] / src / du.c
blobdc265ba35272fc7f8ce18349a02b10ec4e9e08e7
1 /* du -- summarize disk usage
2 Copyright (C) 88, 89, 90, 91, 1995-1999 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Differences from the Unix du:
19 * Doesn't simply ignore the names of regular files given as arguments
20 when -a is given.
21 * Additional options:
22 -l Count the size of all files, even if they have appeared
23 already in another hard link.
24 -x Do not cross file-system boundaries during the recursion.
25 -c Write a grand total of all of the arguments after all
26 arguments have been processed. This can be used to find
27 out the disk usage of a directory, with some files excluded.
28 -h Print sizes in human readable format (1k 234M 2G, etc).
29 -H Similar, but use powers of 1000 not 1024.
30 -k Print sizes in kilobytes.
31 -m Print sizes in megabytes.
32 -b Print sizes in bytes.
33 -S Count the size of each directory separately, not including
34 the sizes of subdirectories.
35 -D Dereference only symbolic links given on the command line.
36 -L Dereference all symbolic links.
37 --exclude=PAT Exclude files that match PAT.
38 -X FILE Exclude files that match patterns taken from FILE.
40 By tege@sics.se, Torbjorn Granlund,
41 and djm@ai.mit.edu, David MacKenzie.
42 Variable blocks added by lm@sgi.com and eggert@twinsun.com.
45 #include <config.h>
46 #if HAVE_INTTYPES_H
47 # include <inttypes.h>
48 #endif
49 #include <stdio.h>
50 #include <getopt.h>
51 #include <sys/types.h>
52 #include <assert.h>
54 #include "system.h"
55 #include "error.h"
56 #include "exclude.h"
57 #include "human.h"
58 #include "save-cwd.h"
59 #include "savedir.h"
60 #include "xstrtol.h"
62 /* The official name of this program (e.g., no `g' prefix). */
63 #define PROGRAM_NAME "du"
65 #define AUTHORS \
66 "Torbjorn Granlund, David MacKenzie, Larry McVoy, and Paul Eggert"
68 /* Initial number of entries in each hash table entry's table of inodes. */
69 #define INITIAL_HASH_MODULE 100
71 /* Initial number of entries in the inode hash table. */
72 #define INITIAL_ENTRY_TAB_SIZE 70
74 /* Initial size to allocate for `path'. */
75 #define INITIAL_PATH_SIZE 100
77 /* Hash structure for inode and device numbers. The separate entry
78 structure makes it easier to rehash "in place". */
80 struct entry
82 ino_t ino;
83 dev_t dev;
84 struct entry *coll_link;
87 /* Structure for a hash table for inode numbers. */
89 struct htab
91 unsigned modulus; /* Size of the `hash' pointer vector. */
92 struct entry *entry_tab; /* Pointer to dynamically growing vector. */
93 unsigned entry_tab_size; /* Size of current `entry_tab' allocation. */
94 unsigned first_free_entry; /* Index in `entry_tab'. */
95 struct entry *hash[1]; /* Vector of pointers in `entry_tab'. */
99 /* Structure for dynamically resizable strings. */
101 struct String
103 unsigned alloc; /* Size of allocation for the text. */
104 unsigned length; /* Length of the text currently. */
105 char *text; /* Pointer to the text. */
107 typedef struct String String;
109 int stat ();
110 int lstat ();
112 /* Name under which this program was invoked. */
113 char *program_name;
115 /* If nonzero, display counts for all files, not just directories. */
116 static int opt_all = 0;
118 /* If nonzero, count each hard link of files with multiple links. */
119 static int opt_count_all = 0;
121 /* If nonzero, do not cross file-system boundaries. */
122 static int opt_one_file_system = 0;
124 /* If nonzero, print a grand total at the end. */
125 static int opt_combined_arguments = 0;
127 /* If nonzero, do not add sizes of subdirectories. */
128 static int opt_separate_dirs = 0;
130 /* If nonzero, dereference symlinks that are command line arguments. */
131 static int opt_dereference_arguments = 0;
133 /* Show the total for each directory (and file if --all) that is at
134 most MAX_DEPTH levels down from the root of the hierarchy. The root
135 is at level 0, so `du --max-depth=0' is equivalent to `du -s'. */
136 static int max_depth = INT_MAX;
138 /* If positive, the units to use when printing sizes;
139 if negative, the human-readable base. */
140 static int output_block_size;
142 /* Accumulated path for file or directory being processed. */
143 static String *path;
145 /* Pointer to hash structure, used by the hash routines. */
146 static struct htab *htab;
148 /* Globally used stat buffer. */
149 static struct stat stat_buf;
151 /* A pointer to either lstat or stat, depending on whether
152 dereferencing of all symbolic links is to be done. */
153 static int (*xstat) ();
155 /* The exit status to use if we don't get any fatal errors. */
156 static int exit_status;
158 /* File name patterns to exclude. */
159 static struct exclude *exclude;
161 /* Grand total size of all args, in units of ST_NBLOCKSIZE-byte blocks. */
162 static uintmax_t tot_size = 0;
164 static struct option const long_options[] =
166 {"all", no_argument, NULL, 'a'},
167 {"block-size", required_argument, 0, CHAR_MAX + 2},
168 {"bytes", no_argument, NULL, 'b'},
169 {"count-links", no_argument, NULL, 'l'},
170 {"dereference", no_argument, NULL, 'L'},
171 {"dereference-args", no_argument, NULL, 'D'},
172 {"exclude", required_argument, 0, CHAR_MAX + 1},
173 {"exclude-from", required_argument, 0, 'X'},
174 {"human-readable", no_argument, NULL, 'h'},
175 {"si", no_argument, 0, 'H'},
176 {"kilobytes", no_argument, NULL, 'k'},
177 {"max-depth", required_argument, NULL, CHAR_MAX + 3},
178 {"megabytes", no_argument, NULL, 'm'},
179 {"one-file-system", no_argument, NULL, 'x'},
180 {"separate-dirs", no_argument, NULL, 'S'},
181 {"summarize", no_argument, NULL, 's'},
182 {"total", no_argument, NULL, 'c'},
183 {GETOPT_HELP_OPTION_DECL},
184 {GETOPT_VERSION_OPTION_DECL},
185 {NULL, 0, NULL, 0}
188 void
189 usage (int status)
191 if (status != 0)
192 fprintf (stderr, _("Try `%s --help' for more information.\n"),
193 program_name);
194 else
196 printf (_("Usage: %s [OPTION]... [FILE]...\n"), program_name);
197 printf (_("\
198 Summarize disk usage of each FILE, recursively for directories.\n\
200 -a, --all write counts for all files, not just directories\n\
201 --block-size=SIZE use SIZE-byte blocks\n\
202 -b, --bytes print size in bytes\n\
203 -c, --total produce a grand total\n\
204 -D, --dereference-args dereference PATHs when symbolic link\n\
205 -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\n\
206 -H, --si likewise, but use powers of 1000 not 1024\n\
207 -k, --kilobytes like --block-size=1024\n\
208 -l, --count-links count sizes many times if hard linked\n\
209 -L, --dereference dereference all symbolic links\n\
210 -m, --megabytes like --block-size=1048576\n\
211 -S, --separate-dirs do not include size of subdirectories\n\
212 -s, --summarize display only a total for each argument\n\
213 -x, --one-file-system skip directories on different filesystems\n\
214 -X FILE, --exclude-from=FILE Exclude files that match any pattern in FILE.\n\
215 --exclude=PAT Exclude files that match PAT.\n\
216 --max-depth=N print the total for a directory (or file, with --all)\n\
217 only if it is N or fewer levels below the command\n\
218 line argument; --max-depth=0 is the same as\n\
219 --summarize\n\
220 --help display this help and exit\n\
221 --version output version information and exit\n\
222 "));
223 puts (_("\nReport bugs to <bug-fileutils@gnu.org>."));
224 close_stdout ();
226 exit (status);
229 /* Initialize string S1 to hold SIZE characters. */
231 static void
232 str_init (String **s1, unsigned int size)
234 String *s;
236 s = (String *) xmalloc (sizeof (struct String));
237 s->text = xmalloc (size + 1);
239 s->alloc = size;
240 *s1 = s;
243 static void
244 ensure_space (String *s, unsigned int size)
246 if (s->alloc < size)
248 s->text = xrealloc (s->text, size + 1);
249 s->alloc = size;
253 /* Assign the null-terminated C-string CSTR to S1. */
255 static void
256 str_copyc (String *s1, const char *cstr)
258 unsigned l = strlen (cstr);
259 ensure_space (s1, l);
260 strcpy (s1->text, cstr);
261 s1->length = l;
264 static void
265 str_concatc (String *s1, const char *cstr)
267 unsigned l1 = s1->length;
268 unsigned l2 = strlen (cstr);
269 unsigned l = l1 + l2;
271 ensure_space (s1, l);
272 strcpy (s1->text + l1, cstr);
273 s1->length = l;
276 /* Truncate the string S1 to have length LENGTH. */
278 static void
279 str_trunc (String *s1, unsigned int length)
281 if (s1->length > length)
283 s1->text[length] = 0;
284 s1->length = length;
288 /* Print N_BLOCKS followed by STRING on a line. NBLOCKS is the number of
289 ST_NBLOCKSIZE-byte blocks; convert it to OUTPUT_BLOCK_SIZE units before
290 printing. If OUTPUT_BLOCK_SIZE is negative, use a human readable
291 notation instead. */
293 static void
294 print_size (uintmax_t n_blocks, const char *string)
296 char buf[LONGEST_HUMAN_READABLE + 1];
297 printf ("%s\t%s\n",
298 human_readable (n_blocks, buf, ST_NBLOCKSIZE, output_block_size),
299 string);
300 fflush (stdout);
303 /* Reset the hash structure in the global variable `htab' to
304 contain no entries. */
306 static void
307 hash_reset (void)
309 int i;
310 struct entry **p;
312 htab->first_free_entry = 0;
314 p = htab->hash;
315 for (i = htab->modulus; i > 0; i--)
316 *p++ = NULL;
319 /* Allocate space for the hash structures, and set the global
320 variable `htab' to point to it. The initial hash module is specified in
321 MODULUS, and the number of entries are specified in ENTRY_TAB_SIZE. (The
322 hash structure will be rebuilt when ENTRY_TAB_SIZE entries have been
323 inserted, and MODULUS and ENTRY_TAB_SIZE in the global `htab' will be
324 doubled.) */
326 static void
327 hash_init (unsigned int modulus, unsigned int entry_tab_size)
329 struct htab *htab_r;
331 htab_r = (struct htab *)
332 xmalloc (sizeof (struct htab) + sizeof (struct entry *) * modulus);
334 htab_r->entry_tab = (struct entry *)
335 xmalloc (sizeof (struct entry) * entry_tab_size);
337 htab_r->modulus = modulus;
338 htab_r->entry_tab_size = entry_tab_size;
339 htab = htab_r;
341 hash_reset ();
344 /* Insert INO and DEV in the hash structure HTAB, if not
345 already present. Return zero if inserted and nonzero if it
346 already existed. */
348 static int
349 hash_insert2 (struct htab *ht, ino_t ino, dev_t dev)
351 struct entry **hp, *ep2, *ep;
352 hp = &ht->hash[ino % ht->modulus];
353 ep2 = *hp;
355 /* Collision? */
357 if (ep2 != NULL)
359 ep = ep2;
361 /* Search for an entry with the same data. */
365 if (ep->ino == ino && ep->dev == dev)
366 return 1; /* Found an entry with the same data. */
367 ep = ep->coll_link;
369 while (ep != NULL);
371 /* Did not find it. */
375 ep = *hp = &ht->entry_tab[ht->first_free_entry++];
376 ep->ino = ino;
377 ep->dev = dev;
378 ep->coll_link = ep2; /* `ep2' is NULL if no collision. */
380 return 0;
383 /* Insert an item (inode INO and device DEV) in the hash
384 structure in the global variable `htab', if an entry with the same data
385 was not found already. Return zero if the item was inserted and nonzero
386 if it wasn't. */
388 static int
389 hash_insert (ino_t ino, dev_t dev)
391 struct htab *htab_r = htab; /* Initially a copy of the global `htab'. */
393 if (htab_r->first_free_entry >= htab_r->entry_tab_size)
395 int i;
396 struct entry *ep;
397 unsigned modulus;
398 unsigned entry_tab_size;
400 /* Increase the number of hash entries, and re-hash the data.
401 The method of shrimping and increasing is made to compactify
402 the heap. If twice as much data would be allocated
403 straightforwardly, we would never re-use a byte of memory. */
405 /* Let `htab' shrimp. Keep only the header, not the pointer vector. */
407 htab_r = (struct htab *)
408 xrealloc ((char *) htab_r, sizeof (struct htab));
410 modulus = 2 * htab_r->modulus;
411 entry_tab_size = 2 * htab_r->entry_tab_size;
413 /* Increase the number of possible entries. */
415 htab_r->entry_tab = (struct entry *)
416 xrealloc ((char *) htab_r->entry_tab,
417 sizeof (struct entry) * entry_tab_size);
419 /* Increase the size of htab again. */
421 htab_r = (struct htab *)
422 xrealloc ((char *) htab_r,
423 sizeof (struct htab) + sizeof (struct entry *) * modulus);
425 htab_r->modulus = modulus;
426 htab_r->entry_tab_size = entry_tab_size;
427 htab = htab_r;
429 i = htab_r->first_free_entry;
431 /* Make the increased hash table empty. The entries are still
432 available in htab->entry_tab. */
434 hash_reset ();
436 /* Go through the entries and install them in the pointer vector
437 htab->hash. The items are actually inserted in htab->entry_tab at
438 the position where they already are. The htab->coll_link need
439 however be updated. Could be made a little more efficient. */
441 for (ep = htab_r->entry_tab; i > 0; i--)
443 hash_insert2 (htab_r, ep->ino, ep->dev);
444 ep++;
448 return hash_insert2 (htab_r, ino, dev);
451 /* Print (if appropriate) the size (in units determined by `output_block_size')
452 of file or directory ENT. Return the size of ENT in units of 512-byte
453 blocks. TOP is one for external calls, zero for recursive calls.
454 LAST_DEV is the device that the parent directory of ENT is on.
455 DEPTH is the number of levels (in hierarchy) down from a command
456 line argument. Don't print if DEPTH > max_depth. */
458 static uintmax_t
459 count_entry (const char *ent, int top, dev_t last_dev, int depth)
461 uintmax_t size;
463 if (((top && opt_dereference_arguments)
464 ? stat (ent, &stat_buf)
465 : (*xstat) (ent, &stat_buf)) < 0)
467 error (0, errno, "%s", path->text);
468 exit_status = 1;
469 return 0;
472 if (!opt_count_all
473 && stat_buf.st_nlink > 1
474 && hash_insert (stat_buf.st_ino, stat_buf.st_dev))
475 return 0; /* Have counted this already. */
477 size = ST_NBLOCKS (stat_buf);
478 tot_size += size;
480 if (S_ISDIR (stat_buf.st_mode))
482 unsigned pathlen;
483 dev_t dir_dev;
484 char *name_space;
485 char *namep;
486 struct saved_cwd cwd;
487 int through_symlink;
488 struct stat e_buf;
490 dir_dev = stat_buf.st_dev;
492 if (opt_one_file_system && !top && last_dev != dir_dev)
493 return 0; /* Don't enter a new file system. */
495 #ifndef S_ISLNK
496 # define S_ISLNK(s) 0
497 #endif
498 /* If we're dereferencing symlinks and we're about to chdir through
499 a symlink, remember the current directory so we can return to it
500 later. In other cases, chdir ("..") works fine. */
501 through_symlink = (xstat == stat
502 && lstat (ent, &e_buf) == 0
503 && S_ISLNK (e_buf.st_mode));
504 if (through_symlink && save_cwd (&cwd))
505 exit (1);
507 if (chdir (ent) < 0)
509 error (0, errno, _("cannot change to directory %s"), path->text);
510 exit_status = 1;
511 return 0;
514 errno = 0;
515 name_space = savedir (".", (unsigned int) stat_buf.st_size);
516 if (name_space == NULL)
518 if (errno)
520 error (0, errno, "%s", path->text);
521 if (through_symlink)
523 if (restore_cwd (&cwd, "..", path->text))
524 exit (1);
525 free_cwd (&cwd);
527 else if (chdir ("..") < 0)
528 error (1, errno, _("cannot change to `..' from directory %s"),
529 path->text);
530 exit_status = 1;
531 return 0;
533 else
534 error (1, 0, _("virtual memory exhausted"));
537 /* Remember the current path. */
539 str_concatc (path, "/");
540 pathlen = path->length;
542 for (namep = name_space; *namep; namep += strlen (namep) + 1)
544 if (!excluded_filename (exclude, namep))
546 str_concatc (path, namep);
547 size += count_entry (namep, 0, dir_dev, depth + 1);
548 str_trunc (path, pathlen);
552 free (name_space);
553 if (through_symlink)
555 restore_cwd (&cwd, "..", path->text);
556 free_cwd (&cwd);
558 else if (chdir ("..") < 0)
560 error (1, errno,
561 _("cannot change to `..' from directory %s"), path->text);
564 str_trunc (path, pathlen - 1); /* Remove the "/" we added. */
565 if (depth <= max_depth || top)
566 print_size (size, path->length > 0 ? path->text : "/");
567 return opt_separate_dirs ? 0 : size;
569 else if ((opt_all && depth <= max_depth) || top)
571 /* FIXME: make this an option. */
572 int print_only_dir_size = 0;
573 if (!print_only_dir_size)
574 print_size (size, path->length > 0 ? path->text : "/");
577 return size;
580 /* Recursively print the sizes of the directories (and, if selected, files)
581 named in FILES, the last entry of which is NULL. */
583 static void
584 du_files (char **files)
586 struct saved_cwd cwd;
587 ino_t initial_ino; /* Initial directory's inode. */
588 dev_t initial_dev; /* Initial directory's device. */
589 int i; /* Index in FILES. */
591 if (save_cwd (&cwd))
592 exit (1);
594 /* Remember the inode and device number of the current directory. */
595 if (stat (".", &stat_buf))
596 error (1, errno, _("current directory"));
597 initial_ino = stat_buf.st_ino;
598 initial_dev = stat_buf.st_dev;
600 for (i = 0; files[i]; i++)
602 char *arg;
603 int s;
605 arg = files[i];
607 /* Delete final slash in the argument, unless the slash is alone. */
608 s = strlen (arg) - 1;
609 if (s != 0)
611 if (arg[s] == '/')
612 arg[s] = 0;
614 str_copyc (path, arg);
616 else if (arg[0] == '/')
617 str_trunc (path, 0); /* Null path for root directory. */
618 else
619 str_copyc (path, arg);
621 if (!opt_combined_arguments)
622 hash_reset ();
624 count_entry (arg, 1, 0, 0);
626 /* chdir if `count_entry' has changed the working directory. */
627 if (stat (".", &stat_buf))
628 error (1, errno, ".");
629 if (stat_buf.st_ino != initial_ino || stat_buf.st_dev != initial_dev)
631 if (restore_cwd (&cwd, _("starting directory"), NULL))
632 exit (1);
636 if (opt_combined_arguments)
637 print_size (tot_size, _("total"));
639 free_cwd (&cwd);
643 main (int argc, char **argv)
645 int c;
646 char *cwd_only[2];
647 int max_depth_specified = 0;
649 /* If nonzero, display only a total for each argument. */
650 int opt_summarize_only = 0;
652 cwd_only[0] = ".";
653 cwd_only[1] = NULL;
655 program_name = argv[0];
656 setlocale (LC_ALL, "");
657 bindtextdomain (PACKAGE, LOCALEDIR);
658 textdomain (PACKAGE);
660 exclude = new_exclude ();
661 xstat = lstat;
663 human_block_size (getenv ("DU_BLOCK_SIZE"), 0, &output_block_size);
665 while ((c = getopt_long (argc, argv, "abchHklmsxDLSX:", long_options, NULL))
666 != -1)
668 long int tmp_long;
669 switch (c)
671 case 0: /* Long option. */
672 break;
674 case 'a':
675 opt_all = 1;
676 break;
678 case 'b':
679 output_block_size = 1;
680 break;
682 case 'c':
683 opt_combined_arguments = 1;
684 break;
686 case 'h':
687 output_block_size = -1024;
688 break;
690 case 'H':
691 output_block_size = -1000;
692 break;
694 case 'k':
695 output_block_size = 1024;
696 break;
698 case CHAR_MAX + 3: /* --max-depth=N */
699 if (xstrtol (optarg, NULL, 0, &tmp_long, NULL) != LONGINT_OK
700 || tmp_long < 0 || tmp_long > INT_MAX)
701 error (1, 0, _("invalid maximum depth `%s'"), optarg);
703 max_depth_specified = 1;
704 max_depth = (int) tmp_long;
705 break;
707 case 'm':
708 output_block_size = 1024 * 1024;
709 break;
711 case 'l':
712 opt_count_all = 1;
713 break;
715 case 's':
716 opt_summarize_only = 1;
717 break;
719 case 'x':
720 opt_one_file_system = 1;
721 break;
723 case 'D':
724 opt_dereference_arguments = 1;
725 break;
727 case 'L':
728 xstat = stat;
729 break;
731 case 'S':
732 opt_separate_dirs = 1;
733 break;
735 case 'X':
736 if (add_exclude_file (exclude, optarg, '\n') != 0)
737 error (1, errno, "%s", optarg);
738 break;
740 case CHAR_MAX + 1:
741 add_exclude (exclude, optarg);
742 break;
744 case CHAR_MAX + 2:
745 human_block_size (optarg, 1, &output_block_size);
746 break;
748 case_GETOPT_HELP_CHAR;
750 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
752 default:
753 usage (1);
757 if (opt_all && opt_summarize_only)
759 error (0, 0, _("cannot both summarize and show all entries"));
760 usage (1);
763 if (opt_summarize_only && max_depth_specified && max_depth == 0)
765 error (0, 0,
766 _("warning: summarizing is the same as using --max-depth=0"));
769 if (opt_summarize_only && max_depth_specified && max_depth != 0)
771 error (0, 0,
772 _("warning: summarizing conflicts with --max-depth=%d"),
773 max_depth);
774 usage (1);
777 if (opt_summarize_only)
778 max_depth = 0;
780 /* Initialize the hash structure for inode numbers. */
781 hash_init (INITIAL_HASH_MODULE, INITIAL_ENTRY_TAB_SIZE);
783 str_init (&path, INITIAL_PATH_SIZE);
785 du_files (optind == argc ? cwd_only : argv + optind);
787 close_stdout ();
788 exit (exit_status);