.
[coreutils.git] / src / du.c
blobb6047f08429971ef573e2725176003aaaaeba3a1
1 /* du -- summarize disk usage
2 Copyright (C) 1988-1991, 1995-2004 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Differences from the Unix du:
19 * Doesn't simply ignore the names of regular files given as arguments
20 when -a is given.
22 By tege@sics.se, Torbjorn Granlund,
23 and djm@ai.mit.edu, David MacKenzie.
24 Variable blocks added by lm@sgi.com and eggert@twinsun.com.
25 Rewritten to use nftw, then to use fts by Jim Meyering. */
27 #include <config.h>
28 #include <stdio.h>
29 #include <getopt.h>
30 #include <sys/types.h>
31 #include <assert.h>
33 #include "system.h"
34 #include "dirname.h" /* for strip_trailing_slashes */
35 #include "error.h"
36 #include "exclude.h"
37 #include "hash.h"
38 #include "human.h"
39 #include "quote.h"
40 #include "quotearg.h"
41 #include "same.h"
42 #include "xfts.h"
43 #include "xstrtol.h"
45 extern int fts_debug;
47 /* The official name of this program (e.g., no `g' prefix). */
48 #define PROGRAM_NAME "du"
50 #define AUTHORS \
51 "Torbjorn Granlund", "David MacKenzie, Paul Eggert", "Jim Meyering"
53 #if DU_DEBUG
54 # define FTS_CROSS_CHECK(Fts) fts_cross_check (Fts)
55 # define DEBUG_OPT "d"
56 #else
57 # define FTS_CROSS_CHECK(Fts)
58 # define DEBUG_OPT
59 #endif
61 /* Initial size of the hash table. */
62 #define INITIAL_TABLE_SIZE 103
64 /* Hash structure for inode and device numbers. The separate entry
65 structure makes it easier to rehash "in place". */
67 struct entry
69 ino_t st_ino;
70 dev_t st_dev;
73 /* A set of dev/ino pairs. */
74 static Hash_table *htab;
76 /* Name under which this program was invoked. */
77 char *program_name;
79 /* If nonzero, display counts for all files, not just directories. */
80 static int opt_all = 0;
82 /* If nonzero, rather than using the disk usage of each file,
83 use the apparent size (a la stat.st_size). */
84 static int apparent_size = 0;
86 /* If nonzero, count each hard link of files with multiple links. */
87 static int opt_count_all = 0;
89 /* If true, output the NUL byte instead of a newline at the end of each line. */
90 bool opt_nul_terminate_output = false;
92 /* If nonzero, print a grand total at the end. */
93 static int print_totals = 0;
95 /* If nonzero, do not add sizes of subdirectories. */
96 static int opt_separate_dirs = 0;
98 /* Show the total for each directory (and file if --all) that is at
99 most MAX_DEPTH levels down from the root of the hierarchy. The root
100 is at level 0, so `du --max-depth=0' is equivalent to `du -s'. */
101 static int max_depth = INT_MAX;
103 /* Human-readable options for output. */
104 static int human_output_opts;
106 /* The units to use when printing sizes. */
107 static uintmax_t output_block_size;
109 /* File name patterns to exclude. */
110 static struct exclude *exclude;
112 /* Grand total size of all args, in bytes. */
113 static uintmax_t tot_size = 0;
115 /* Nonzero indicates that du should exit with EXIT_FAILURE upon completion. */
116 int G_fail;
118 #define IS_DIR_TYPE(Type) \
119 ((Type) == FTS_DP \
120 || (Type) == FTS_DNR)
122 /* For long options that have no equivalent short option, use a
123 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
124 enum
126 APPARENT_SIZE_OPTION = CHAR_MAX + 1,
127 EXCLUDE_OPTION,
128 HUMAN_SI_OPTION,
129 MAX_DEPTH_OPTION
132 static struct option const long_options[] =
134 {"all", no_argument, NULL, 'a'},
135 {"apparent-size", no_argument, NULL, APPARENT_SIZE_OPTION},
136 {"block-size", required_argument, 0, 'B'},
137 {"bytes", no_argument, NULL, 'b'},
138 {"count-links", no_argument, NULL, 'l'},
139 {"dereference", no_argument, NULL, 'L'},
140 {"dereference-args", no_argument, NULL, 'D'},
141 {"exclude", required_argument, 0, EXCLUDE_OPTION},
142 {"exclude-from", required_argument, 0, 'X'},
143 {"human-readable", no_argument, NULL, 'h'},
144 {"si", no_argument, 0, HUMAN_SI_OPTION},
145 {"kilobytes", no_argument, NULL, 'k'}, /* long form is obsolescent */
146 {"max-depth", required_argument, NULL, MAX_DEPTH_OPTION},
147 {"null", no_argument, NULL, '0'},
148 {"megabytes", no_argument, NULL, 'm'}, /* obsolescent */
149 {"no-dereference", no_argument, NULL, 'P'},
150 {"one-file-system", no_argument, NULL, 'x'},
151 {"separate-dirs", no_argument, NULL, 'S'},
152 {"summarize", no_argument, NULL, 's'},
153 {"total", no_argument, NULL, 'c'},
154 {GETOPT_HELP_OPTION_DECL},
155 {GETOPT_VERSION_OPTION_DECL},
156 {NULL, 0, NULL, 0}
159 void
160 usage (int status)
162 if (status != EXIT_SUCCESS)
163 fprintf (stderr, _("Try `%s --help' for more information.\n"),
164 program_name);
165 else
167 printf (_("Usage: %s [OPTION]... [FILE]...\n"), program_name);
168 fputs (_("\
169 Summarize disk usage of each FILE, recursively for directories.\n\
171 "), stdout);
172 fputs (_("\
173 Mandatory arguments to long options are mandatory for short options too.\n\
174 "), stdout);
175 fputs (_("\
176 -a, --all write counts for all files, not just directories\n\
177 --apparent-size print apparent sizes, rather than disk usage; although\n\
178 the apparent size is usually smaller, it may be\n\
179 larger due to holes in (`sparse') files, internal\n\
180 fragmentation, indirect blocks, and the like\n\
181 -B, --block-size=SIZE use SIZE-byte blocks\n\
182 -b, --bytes equivalent to `--apparent-size --block-size=1'\n\
183 -c, --total produce a grand total\n\
184 -D, --dereference-args dereference FILEs that are symbolic links\n\
185 "), stdout);
186 fputs (_("\
187 -H like --si, but also evokes a warning; will soon\n\
188 change to be equivalent to --dereference-args (-D)\n\
189 -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\n\
190 --si like -h, but use powers of 1000 not 1024\n\
191 -k like --block-size=1K\n\
192 -l, --count-links count sizes many times if hard linked\n\
193 "), stdout);
194 fputs (_("\
195 -L, --dereference dereference all symbolic links\n\
196 -P, --no-dereference don't follow any symbolic links (this is the default)\n\
197 -0, --null end each output line with 0 byte rather than newline\n\
198 -S, --separate-dirs do not include size of subdirectories\n\
199 -s, --summarize display only a total for each argument\n\
200 "), stdout);
201 fputs (_("\
202 -x, --one-file-system skip directories on different filesystems\n\
203 -X FILE, --exclude-from=FILE Exclude files that match any pattern in FILE.\n\
204 --exclude=PATTERN Exclude files that match PATTERN.\n\
205 --max-depth=N print the total for a directory (or file, with --all)\n\
206 only if it is N or fewer levels below the command\n\
207 line argument; --max-depth=0 is the same as\n\
208 --summarize\n\
209 "), stdout);
210 fputs (HELP_OPTION_DESCRIPTION, stdout);
211 fputs (VERSION_OPTION_DESCRIPTION, stdout);
212 fputs (_("\n\
213 SIZE may be (or may be an integer optionally followed by) one of following:\n\
214 kB 1000, K 1024, MB 1000*1000, M 1024*1024, and so on for G, T, P, E, Z, Y.\n\
215 "), stdout);
216 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
218 exit (status);
221 static size_t
222 entry_hash (void const *x, size_t table_size)
224 struct entry const *p = x;
226 /* Ignoring the device number here should be fine. */
227 /* The cast to uintmax_t prevents negative remainders
228 if st_ino is negative. */
229 return (uintmax_t) p->st_ino % table_size;
232 /* Compare two dev/ino pairs. Return true if they are the same. */
233 static bool
234 entry_compare (void const *x, void const *y)
236 struct entry const *a = x;
237 struct entry const *b = y;
238 return SAME_INODE (*a, *b) ? true : false;
241 /* Try to insert the INO/DEV pair into the global table, HTAB.
242 If the pair is successfully inserted, return zero.
243 Upon failed memory allocation exit nonzero.
244 If the pair is already in the table, return nonzero. */
245 static int
246 hash_ins (ino_t ino, dev_t dev)
248 struct entry *ent;
249 struct entry *ent_from_table;
251 ent = xmalloc (sizeof *ent);
252 ent->st_ino = ino;
253 ent->st_dev = dev;
255 ent_from_table = hash_insert (htab, ent);
256 if (ent_from_table == NULL)
258 /* Insertion failed due to lack of memory. */
259 xalloc_die ();
262 if (ent_from_table == ent)
264 /* Insertion succeeded. */
265 return 0;
268 /* That pair is already in the table, so ENT was not inserted. Free it. */
269 free (ent);
271 return 1;
274 /* Initialize the hash table. */
275 static void
276 hash_init (void)
278 htab = hash_initialize (INITIAL_TABLE_SIZE, NULL,
279 entry_hash, entry_compare, free);
280 if (htab == NULL)
281 xalloc_die ();
284 /* Print N_BYTES. Convert it to a readable value before printing. */
286 static void
287 print_only_size (uintmax_t n_bytes)
289 char buf[LONGEST_HUMAN_READABLE + 1];
290 fputs (human_readable (n_bytes, buf, human_output_opts,
291 1, output_block_size), stdout);
294 /* Print N_BYTES followed by STRING on a line.
295 Convert N_BYTES to a readable value before printing. */
297 static void
298 print_size (uintmax_t n_bytes, const char *string)
300 print_only_size (n_bytes);
301 printf ("\t%s%c", string, opt_nul_terminate_output ? '\0' : '\n');
302 fflush (stdout);
305 /* This function is called once for every file system object that fts
306 encounters. fts does a depth-first traversal. This function knows
307 that and accumulates per-directory totals based on changes in
308 the depth of the current entry. */
310 static void
311 process_file (FTS *fts, FTSENT *ent)
313 uintmax_t size;
314 uintmax_t size_to_print;
315 static int first_call = 1;
316 static size_t prev_level;
317 static size_t n_alloc;
318 /* The sum of the st_size values of all entries in the single directory
319 at the corresponding level. Although this does include the st_size
320 corresponding to each subdirectory, it does not include the size of
321 any file in a subdirectory. */
322 static uintmax_t *sum_ent;
324 /* The sum of the sizes of all entries in the hierarchy at or below the
325 directory at the specified level. */
326 static uintmax_t *sum_subdir;
327 int print = 1;
329 const char *file = ent->fts_path;
330 const struct stat *sb = ent->fts_statp;
331 int skip;
333 /* If necessary, set FTS_SKIP before returning. */
334 skip = excluded_filename (exclude, ent->fts_name);
335 if (skip)
336 fts_set (fts, ent, FTS_SKIP);
338 switch (ent->fts_info)
340 case FTS_NS:
341 error (0, ent->fts_errno, _("cannot access %s"), quote (file));
342 G_fail = 1;
343 return;
345 case FTS_ERR:
346 /* if (S_ISDIR (ent->fts_statp->st_mode) && FIXME */
347 error (0, ent->fts_errno, _("%s"), quote (file));
348 G_fail = 1;
349 return;
351 case FTS_DNR:
352 /* Don't return just yet, since although the directory is not readable,
353 we were able to stat it, so we do have a size. */
354 error (0, ent->fts_errno, _("cannot read directory %s"), quote (file));
355 G_fail = 1;
356 break;
358 default:
359 break;
362 /* If this is the first (pre-order) encounter with a directory,
363 or if it's the second encounter for a skipped directory, then
364 return right away. */
365 if (ent->fts_info == FTS_D || skip)
366 return;
368 /* If the file is being excluded or if it has already been counted
369 via a hard link, then don't let it contribute to the sums. */
370 if (skip
371 || (!opt_count_all
372 && 1 < sb->st_nlink
373 && hash_ins (sb->st_ino, sb->st_dev)))
375 /* Note that we must not simply return here.
376 We still have to update prev_level and maybe propagate
377 some sums up the hierarchy. */
378 size = 0;
379 print = 0;
381 else
383 size = (apparent_size
384 ? sb->st_size
385 : ST_NBLOCKS (*sb) * ST_NBLOCKSIZE);
388 if (first_call)
390 n_alloc = ent->fts_level + 10;
391 sum_ent = XCALLOC (uintmax_t, n_alloc);
392 sum_subdir = XCALLOC (uintmax_t, n_alloc);
394 else
396 /* FIXME: it's a shame that we need these `size_t' casts to avoid
397 warnings from gcc about `comparison between signed and unsigned'.
398 Probably unavoidable, assuming that the struct members
399 are of type `int' (historical), since I want variables like
400 n_alloc and prev_level to have types that make sense. */
401 if (n_alloc <= (size_t) ent->fts_level)
403 n_alloc = ent->fts_level * 2;
404 sum_ent = XREALLOC (sum_ent, uintmax_t, n_alloc);
405 sum_subdir = XREALLOC (sum_subdir, uintmax_t, n_alloc);
409 size_to_print = size;
411 if (! first_call)
413 if ((size_t) ent->fts_level == prev_level)
415 /* This is usually the most common case. Do nothing. */
417 else if (ent->fts_level > prev_level)
419 /* Descending the hierarchy.
420 Clear the accumulators for *all* levels between prev_level
421 and the current one. The depth may change dramatically,
422 e.g., from 1 to 10. */
423 int i;
424 for (i = prev_level + 1; i <= ent->fts_level; i++)
426 sum_ent[i] = 0;
427 sum_subdir[i] = 0;
430 else /* ent->fts_level < prev_level */
432 /* Ascending the hierarchy.
433 Process a directory only after all entries in that
434 directory have been processed. When the depth decreases,
435 propagate sums from the children (prev_level) to the parent.
436 Here, the current level is always one smaller than the
437 previous one. */
438 assert ((size_t) ent->fts_level == prev_level - 1);
439 size_to_print += sum_ent[prev_level];
440 if (!opt_separate_dirs)
441 size_to_print += sum_subdir[prev_level];
442 sum_subdir[ent->fts_level] += (sum_ent[prev_level]
443 + sum_subdir[prev_level]);
447 prev_level = ent->fts_level;
448 first_call = 0;
450 /* Let the size of a directory entry contribute to the total for the
451 containing directory, unless --separate-dirs (-S) is specified. */
452 if ( ! (opt_separate_dirs && IS_DIR_TYPE (ent->fts_info)))
453 sum_ent[ent->fts_level] += size;
455 /* Even if this directory is unreadable or we can't chdir into it,
456 do let its size contribute to the total, ... */
457 tot_size += size;
459 /* ... but don't print out a total for it, since without the size(s)
460 of any potential entries, it could be very misleading. */
461 if (ent->fts_info == FTS_DNR)
462 return;
464 /* If we're not counting an entry, e.g., because it's a hard link
465 to a file we've already counted (and --count-links), then don't
466 print a line for it. */
467 if (!print)
468 return;
470 if ((IS_DIR_TYPE (ent->fts_info) && ent->fts_level <= max_depth)
471 || ((opt_all && ent->fts_level <= max_depth) || ent->fts_level == 0))
473 print_only_size (size_to_print);
474 fputc ('\t', stdout);
475 fputs (file, stdout);
476 fputc (opt_nul_terminate_output ? '\0' : '\n', stdout);
477 fflush (stdout);
481 /* Recursively print the sizes of the directories (and, if selected, files)
482 named in FILES, the last entry of which is NULL.
483 BIT_FLAGS controls how fts works.
484 If the fts_open call fails, exit nonzero.
485 Otherwise, return nonzero upon error. */
487 static int
488 du_files (char **files, int bit_flags)
490 int fail = 0;
492 FTS *fts = xfts_open (files, bit_flags, NULL);
494 while (1)
496 FTSENT *ent;
498 ent = fts_read (fts);
499 if (ent == NULL)
501 if (errno != 0)
503 /* FIXME: try to give a better message */
504 error (0, errno, _("fts_read failed"));
505 fail = 1;
507 break;
509 FTS_CROSS_CHECK (fts);
511 /* This is a space optimization. If we aren't printing totals,
512 then it's ok to clear the duplicate-detection tables after
513 each command line hierarchy has been processed. */
514 if (ent->fts_level == 0 && ent->fts_info == FTS_D && !print_totals)
515 hash_clear (htab);
517 process_file (fts, ent);
520 /* Ignore failure, since the only way it can do so is in failing to
521 return to the original directory, and since we're about to exit,
522 that doesn't matter. */
523 fts_close (fts);
525 if (print_totals)
526 print_size (tot_size, _("total"));
528 return fail;
532 main (int argc, char **argv)
534 int c;
535 char *cwd_only[2];
536 int max_depth_specified = 0;
537 char **files;
538 int fail;
540 /* Bit flags that control how fts works. */
541 int bit_flags = FTS_PHYSICAL | FTS_TIGHT_CYCLE_CHECK;
543 /* If nonzero, display only a total for each argument. */
544 int opt_summarize_only = 0;
546 cwd_only[0] = ".";
547 cwd_only[1] = NULL;
549 initialize_main (&argc, &argv);
550 program_name = argv[0];
551 setlocale (LC_ALL, "");
552 bindtextdomain (PACKAGE, LOCALEDIR);
553 textdomain (PACKAGE);
555 atexit (close_stdout);
557 exclude = new_exclude ();
559 human_output_opts = human_options (getenv ("DU_BLOCK_SIZE"), false,
560 &output_block_size);
562 fail = 0;
563 while ((c = getopt_long (argc, argv, DEBUG_OPT "0abchHklmsxB:DLPSX:",
564 long_options, NULL)) != -1)
566 long int tmp_long;
567 switch (c)
569 case 0: /* Long option. */
570 break;
572 #if DU_DEBUG
573 case 'd':
574 fts_debug = 1;
575 break;
576 #endif
578 case '0':
579 opt_nul_terminate_output = true;
580 break;
582 case 'a':
583 opt_all = 1;
584 break;
586 case APPARENT_SIZE_OPTION:
587 apparent_size = 1;
588 break;
590 case 'b':
591 apparent_size = 1;
592 human_output_opts = 0;
593 output_block_size = 1;
594 break;
596 case 'c':
597 print_totals = 1;
598 break;
600 case 'h':
601 human_output_opts = human_autoscale | human_SI | human_base_1024;
602 output_block_size = 1;
603 break;
605 case 'H':
606 error (0, 0, _("WARNING: use --si, not -H; the meaning of the -H\
607 option will soon\nchange to be the same as that of --dereference-args (-D)"));
608 /* fall through */
609 case HUMAN_SI_OPTION:
610 human_output_opts = human_autoscale | human_SI;
611 output_block_size = 1;
612 break;
614 case 'k':
615 human_output_opts = 0;
616 output_block_size = 1024;
617 break;
619 case MAX_DEPTH_OPTION: /* --max-depth=N */
620 if (xstrtol (optarg, NULL, 0, &tmp_long, NULL) == LONGINT_OK
621 && 0 <= tmp_long && tmp_long <= INT_MAX)
623 max_depth_specified = 1;
624 max_depth = (int) tmp_long;
626 else
628 error (0, 0, _("invalid maximum depth %s"),
629 quote (optarg));
630 fail = 1;
632 break;
634 case 'm': /* obsolescent: FIXME: remove in 2005. */
635 human_output_opts = 0;
636 output_block_size = 1024 * 1024;
637 break;
639 case 'l':
640 opt_count_all = 1;
641 break;
643 case 's':
644 opt_summarize_only = 1;
645 break;
647 case 'x':
648 bit_flags |= FTS_XDEV;
649 break;
651 case 'B':
652 human_output_opts = human_options (optarg, true, &output_block_size);
653 break;
655 case 'D': /* This will eventually be 'H' (-H), too. */
656 bit_flags = FTS_COMFOLLOW;
657 break;
659 case 'L': /* --dereference */
660 bit_flags = FTS_LOGICAL;
661 break;
663 case 'P': /* --no-dereference */
664 bit_flags = FTS_PHYSICAL;
665 break;
667 case 'S':
668 opt_separate_dirs = 1;
669 break;
671 case 'X':
672 if (add_exclude_file (add_exclude, exclude, optarg,
673 EXCLUDE_WILDCARDS, '\n'))
675 error (0, errno, "%s", quotearg_colon (optarg));
676 fail = 1;
678 break;
680 case EXCLUDE_OPTION:
681 add_exclude (exclude, optarg, EXCLUDE_WILDCARDS);
682 break;
684 case_GETOPT_HELP_CHAR;
686 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
688 default:
689 fail = 1;
693 if (fail)
694 usage (EXIT_FAILURE);
696 if (opt_all && opt_summarize_only)
698 error (0, 0, _("cannot both summarize and show all entries"));
699 usage (EXIT_FAILURE);
702 if (opt_summarize_only && max_depth_specified && max_depth == 0)
704 error (0, 0,
705 _("warning: summarizing is the same as using --max-depth=0"));
708 if (opt_summarize_only && max_depth_specified && max_depth != 0)
710 error (0, 0,
711 _("warning: summarizing conflicts with --max-depth=%d"),
712 max_depth);
713 usage (EXIT_FAILURE);
716 if (opt_summarize_only)
717 max_depth = 0;
719 files = (optind < argc ? argv + optind : cwd_only);
721 /* Initialize the hash structure for inode numbers. */
722 hash_init ();
724 exit (du_files (files, bit_flags) || G_fail
725 ? EXIT_FAILURE : EXIT_SUCCESS);