(process_file): Always reset size_to_propagate_to_parent for
[coreutils.git] / src / du.c
blobc58559422314b2c0d8c750ea4ea6892c08cb54ff
1 /* du -- summarize disk usage
2 Copyright (C) 1988-1991, 1995-2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Differences from the Unix du:
19 * Doesn't simply ignore the names of regular files given as arguments
20 when -a is given.
22 By tege@sics.se, Torbjorn Granlund,
23 and djm@ai.mit.edu, David MacKenzie.
24 Variable blocks added by lm@sgi.com and eggert@twinsun.com.
25 Rewritten to use nftw by Jim Meyering. */
27 #include <config.h>
28 #include <stdio.h>
29 #include <getopt.h>
30 #include <sys/types.h>
31 #include <assert.h>
33 #include "system.h"
34 #include "dirname.h" /* for strip_trailing_slashes */
35 #include "error.h"
36 #include "exclude.h"
37 #include "ftw.h"
38 #include "hash.h"
39 #include "human.h"
40 #include "mmap-stack.h"
41 #include "quote.h"
42 #include "quotearg.h"
43 #include "same.h"
44 #include "xstrtol.h"
46 /* The official name of this program (e.g., no `g' prefix). */
47 #define PROGRAM_NAME "du"
49 #define AUTHORS \
50 N_ ("Torbjorn Granlund, David MacKenzie, Larry McVoy, Paul Eggert, and Jim Meyering")
52 /* Initial size of the hash table. */
53 #define INITIAL_TABLE_SIZE 103
55 /* The maximum number of simultaneously open file handles that
56 may be used by ftw. */
57 #define MAX_N_DESCRIPTORS \
58 (UTILS_OPEN_MAX < 20 \
59 ? UTILS_OPEN_MAX \
60 : UTILS_OPEN_MAX - 10)
62 /* Hash structure for inode and device numbers. The separate entry
63 structure makes it easier to rehash "in place". */
65 struct entry
67 ino_t st_ino;
68 dev_t st_dev;
71 /* A set of dev/ino pairs. */
72 static Hash_table *htab;
74 /* Name under which this program was invoked. */
75 char *program_name;
77 /* If nonzero, display counts for all files, not just directories. */
78 static int opt_all = 0;
80 /* If nonzero, count each hard link of files with multiple links. */
81 static int opt_count_all = 0;
83 /* If nonzero, print a grand total at the end. */
84 static int print_totals = 0;
86 /* If nonzero, do not add sizes of subdirectories. */
87 static int opt_separate_dirs = 0;
89 /* If nonzero, dereference symlinks that are command line arguments.
90 Implementing this while still using nftw is a little tricky.
91 For each command line argument that is a symlink-to-directory,
92 call nftw with "command_line_arg/." and remember to omit the
93 added `/.' when printing. */
94 static int opt_dereference_arguments = 0;
96 /* Show the total for each directory (and file if --all) that is at
97 most MAX_DEPTH levels down from the root of the hierarchy. The root
98 is at level 0, so `du --max-depth=0' is equivalent to `du -s'. */
99 static int max_depth = INT_MAX;
101 /* Human-readable options for output. */
102 static int human_output_opts;
104 /* The units to use when printing sizes. */
105 static uintmax_t output_block_size;
107 /* File name patterns to exclude. */
108 static struct exclude *exclude;
110 /* Grand total size of all args, in units of ST_NBLOCKSIZE-byte blocks. */
111 static uintmax_t tot_size = 0;
113 /* In some cases, we have to append `/.' or just `.' to an argument
114 (to dereference a symlink). When we do that, we don't want to
115 expose this artifact when printing file/directory names, so these
116 variables keep track of the length of the original command line
117 argument and the length of the suffix we've added, respectively.
118 ARG_LENGTH == 0 indicates that we haven't added a suffix.
119 This information is used to omit any such added characters when
120 printing names. */
121 size_t arg_length;
122 size_t suffix_length;
124 /* Nonzero indicates that du should exit with EXIT_FAILURE upon completion. */
125 int G_fail;
127 #define IS_FTW_DIR_TYPE(Type) \
128 ((Type) == FTW_D \
129 || (Type) == FTW_DP \
130 || (Type) == FTW_DNR)
132 /* For long options that have no equivalent short option, use a
133 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
134 enum
136 EXCLUDE_OPTION = CHAR_MAX + 1,
137 MAX_DEPTH_OPTION
140 static struct option const long_options[] =
142 {"all", no_argument, NULL, 'a'},
143 {"block-size", required_argument, 0, 'B'},
144 {"bytes", no_argument, NULL, 'b'},
145 {"count-links", no_argument, NULL, 'l'},
146 {"dereference", no_argument, NULL, 'L'},
147 {"dereference-args", no_argument, NULL, 'D'},
148 {"exclude", required_argument, 0, EXCLUDE_OPTION},
149 {"exclude-from", required_argument, 0, 'X'},
150 {"human-readable", no_argument, NULL, 'h'},
151 {"si", no_argument, 0, 'H'},
152 {"kilobytes", no_argument, NULL, 'k'}, /* long form is obsolescent */
153 {"max-depth", required_argument, NULL, MAX_DEPTH_OPTION},
154 {"megabytes", no_argument, NULL, 'm'}, /* obsolescent */
155 {"one-file-system", no_argument, NULL, 'x'},
156 {"separate-dirs", no_argument, NULL, 'S'},
157 {"summarize", no_argument, NULL, 's'},
158 {"total", no_argument, NULL, 'c'},
159 {GETOPT_HELP_OPTION_DECL},
160 {GETOPT_VERSION_OPTION_DECL},
161 {NULL, 0, NULL, 0}
164 void
165 usage (int status)
167 if (status != 0)
168 fprintf (stderr, _("Try `%s --help' for more information.\n"),
169 program_name);
170 else
172 printf (_("Usage: %s [OPTION]... [FILE]...\n"), program_name);
173 fputs (_("\
174 Summarize disk usage of each FILE, recursively for directories.\n\
176 "), stdout);
177 fputs (_("\
178 Mandatory arguments to long options are mandatory for short options too.\n\
179 "), stdout);
180 fputs (_("\
181 -a, --all write counts for all files, not just directories\n\
182 -B, --block-size=SIZE use SIZE-byte blocks\n\
183 -b, --bytes print size in bytes\n\
184 -c, --total produce a grand total\n\
185 -D, --dereference-args dereference FILEs that are symbolic links\n\
186 "), stdout);
187 fputs (_("\
188 -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\n\
189 -H, --si likewise, but use powers of 1000 not 1024\n\
190 -k like --block-size=1K\n\
191 -l, --count-links count sizes many times if hard linked\n\
192 "), stdout);
193 fputs (_("\
194 -L, --dereference dereference all symbolic links\n\
195 -S, --separate-dirs do not include size of subdirectories\n\
196 -s, --summarize display only a total for each argument\n\
197 "), stdout);
198 fputs (_("\
199 -x, --one-file-system skip directories on different filesystems\n\
200 -X FILE, --exclude-from=FILE Exclude files that match any pattern in FILE.\n\
201 --exclude=PATTERN Exclude files that match PATTERN.\n\
202 --max-depth=N print the total for a directory (or file, with --all)\n\
203 only if it is N or fewer levels below the command\n\
204 line argument; --max-depth=0 is the same as\n\
205 --summarize\n\
206 "), stdout);
207 fputs (HELP_OPTION_DESCRIPTION, stdout);
208 fputs (VERSION_OPTION_DESCRIPTION, stdout);
209 fputs (_("\n\
210 SIZE may be (or may be an integer optionally followed by) one of following:\n\
211 kB 1000, K 1024, MB 1,000,000, M 1,048,576, and so on for G, T, P, E, Z, Y.\n\
212 "), stdout);
213 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
215 exit (status);
218 static unsigned int
219 entry_hash (void const *x, unsigned int table_size)
221 struct entry const *p = x;
223 /* Ignoring the device number here should be fine. */
224 /* The cast to uintmax_t prevents negative remainders
225 if st_ino is negative. */
226 return (uintmax_t) p->st_ino % table_size;
229 /* Compare two dev/ino pairs. Return true if they are the same. */
230 static bool
231 entry_compare (void const *x, void const *y)
233 struct entry const *a = x;
234 struct entry const *b = y;
235 return SAME_INODE (*a, *b) ? true : false;
238 /* Try to insert the INO/DEV pair into the global table, HTAB.
239 If the pair is successfully inserted, return zero.
240 Upon failed memory allocation exit nonzero.
241 If the pair is already in the table, return nonzero. */
242 static int
243 hash_ins (ino_t ino, dev_t dev)
245 struct entry *ent;
246 struct entry *ent_from_table;
248 ent = (struct entry *) xmalloc (sizeof *ent);
249 ent->st_ino = ino;
250 ent->st_dev = dev;
252 ent_from_table = hash_insert (htab, ent);
253 if (ent_from_table == NULL)
255 /* Insertion failed due to lack of memory. */
256 xalloc_die ();
259 if (ent_from_table == ent)
261 /* Insertion succeeded. */
262 return 0;
265 /* That pair is already in the table, so ENT was not inserted. Free it. */
266 free (ent);
268 return 1;
271 /* Initialize the hash table. */
272 static void
273 hash_init (void)
275 htab = hash_initialize (INITIAL_TABLE_SIZE, NULL,
276 entry_hash, entry_compare, free);
277 if (htab == NULL)
278 xalloc_die ();
281 /* Print N_BLOCKS. NBLOCKS is the number of
282 ST_NBLOCKSIZE-byte blocks; convert it to a readable value before
283 printing. */
285 static void
286 print_only_size (uintmax_t n_blocks)
288 char buf[LONGEST_HUMAN_READABLE + 1];
289 fputs (human_readable (n_blocks, buf, human_output_opts,
290 ST_NBLOCKSIZE, output_block_size), stdout);
293 /* Print N_BLOCKS followed by STRING on a line. NBLOCKS is the number of
294 ST_NBLOCKSIZE-byte blocks; convert it to a readable value before
295 printing. */
297 static void
298 print_size (uintmax_t n_blocks, const char *string)
300 char buf[LONGEST_HUMAN_READABLE + 1];
301 printf ("%s\t%s\n",
302 human_readable (n_blocks, buf, human_output_opts,
303 ST_NBLOCKSIZE, output_block_size),
304 string);
305 fflush (stdout);
308 /* This function is called once for every file system object that nftw
309 encounters. nftw does a depth-first traversal. This function knows
310 that and accumulates per-directory totals based on changes in
311 the depth of the current entry. */
313 static int
314 process_file (const char *file, const struct stat *sb, int file_type,
315 struct FTW *info)
317 size_t size;
318 size_t size_including_subdirs;
319 size_t size_to_propagate_to_parent;
320 static int first_call = 1;
321 static size_t prev_level;
322 static size_t n_alloc;
323 static uintmax_t *sum;
325 /* Always define info->skip before returning. */
326 info->skip = excluded_filename (exclude, file + info->base);
328 switch (file_type)
330 case FTW_NS:
331 error (0, errno, _("cannot access %s"), quote (file));
332 G_fail = 1;
333 return 0;
335 case FTW_DCHP:
336 error (0, errno, _("cannot change to parent of directory %s"),
337 quote (file));
338 G_fail = 1;
339 return 0;
341 case FTW_DCH:
342 /* Don't return just yet, since although nftw couldn't chdir into the
343 directory, it was able to stat it, so we do have a size. */
344 error (0, errno, _("cannot change to directory %s"), quote (file));
345 G_fail = 1;
346 break;
348 case FTW_DNR:
349 /* Don't return just yet, since although nftw couldn't read the
350 directory, it was able to stat it, so we do have a size. */
351 error (0, errno, _("cannot read directory %s"), quote (file));
352 G_fail = 1;
353 break;
355 default:
356 break;
359 /* If this is the first (pre-order) encounter with a directory,
360 return right away. */
361 if (file_type == FTW_DPRE)
362 return 0;
364 /* If the file is being excluded or if it has already been counted
365 via a hard link, then don't let it contribute to the sums. */
366 if (info->skip
367 || (!opt_count_all
368 && 1 < sb->st_nlink
369 && hash_ins (sb->st_ino, sb->st_dev)))
371 /* Note that we must not simply return here.
372 We still have to update prev_level and maybe propagate
373 some sums up the hierarchy. */
374 size = 0;
376 else
378 size = ST_NBLOCKS (*sb);
381 if (first_call)
383 n_alloc = info->level + 10;
384 sum = XCALLOC (uintmax_t, n_alloc);
386 else
388 /* FIXME: it's a shame that we need these `size_t' casts to avoid
389 warnings from gcc about `comparison between signed and unsigned'.
390 Probably unavoidable, assuming that the members of struct FTW
391 are of type `int' (historical), since I want variables like
392 n_alloc and prev_level to have types that make sense. */
393 if (n_alloc <= (size_t) info->level)
395 n_alloc = info->level * 2;
396 sum = XREALLOC (sum, uintmax_t, n_alloc);
400 size_to_propagate_to_parent = size_including_subdirs = size;
402 if (! first_call)
404 if ((size_t) info->level == prev_level)
406 /* This is usually the most common case. Do nothing. */
408 else if ((size_t) info->level > prev_level)
410 /* Descending the hierarchy.
411 Clear the accumulators for *all* levels between prev_level
412 and the current one. The depth may change dramatically,
413 e.g., from 1 to 10. */
414 int i;
415 for (i = prev_level + 1; i <= info->level; i++)
416 sum[i] = 0;
418 else /* info->level < prev_level */
420 /* Ascending the hierarchy.
421 nftw processes a directory only after all entries in that
422 directory have been processed. When the depth decreases,
423 propagate sums from the children (prev_level) to the parent.
424 Here, the current level is always one smaller than the
425 previous one. */
426 assert ((size_t) info->level == prev_level - 1);
427 size_to_propagate_to_parent = size_including_subdirs
428 = size + sum[prev_level];
432 if (opt_separate_dirs)
433 size_to_propagate_to_parent = 0;
435 prev_level = info->level;
436 first_call = 0;
438 sum[info->level] += size_to_propagate_to_parent;
440 /* Even if this directory was unreadable or we couldn't chdir into it,
441 do let its size contribute to the total, ... */
442 tot_size += size;
444 /* ... but don't print out a total for it, since without the size(s)
445 of any potential entries, it could be very misleading. */
446 if (file_type == FTW_DNR || file_type == FTW_DCH)
447 return 0;
449 /* FIXME: This looks suspiciously like it could be simplified. */
450 if ((IS_FTW_DIR_TYPE (file_type) &&
451 (info->level <= max_depth || info->level == 0))
452 || ((opt_all && info->level <= max_depth) || info->level == 0))
454 print_only_size (size_including_subdirs);
455 fputc ('\t', stdout);
456 if (arg_length)
458 /* Print the file name, but without the `.' or `/.'
459 directory suffix that we may have added in main. */
460 /* Print everything before the part we appended. */
461 fwrite (file, arg_length, 1, stdout);
462 /* Print everything after what we appended. */
463 fputs (file + arg_length + suffix_length
464 + (file[arg_length + suffix_length] == '/'), stdout);
466 else
468 fputs (file, stdout);
470 fputc ('\n', stdout);
471 fflush (stdout);
474 return 0;
477 static int
478 is_symlink_to_dir (char const *file)
480 char *f;
481 struct stat sb;
483 ASSIGN_STRDUPA (f, file);
484 strip_trailing_slashes (f);
485 return (lstat (f, &sb) == 0 && S_ISLNK (sb.st_mode)
486 && stat (f, &sb) == 0 && S_ISDIR (sb.st_mode));
489 /* Recursively print the sizes of the directories (and, if selected, files)
490 named in FILES, the last entry of which is NULL.
491 FTW_FLAGS controls how nftw works.
492 Return nonzero upon error. */
494 static void du_files (char **files, int ftw_flags) ATTRIBUTE_NORETURN;
495 static void
496 du_files (char **files, int ftw_flags)
498 int fail = 0;
499 int i;
500 for (i = 0; files[i]; i++)
502 char *file = files[i];
503 char *orig = file;
504 int err;
505 arg_length = 0;
507 if (!print_totals)
508 hash_clear (htab);
510 /* When dereferencing only command line arguments, we're using
511 nftw's FTW_PHYS flag, so a symlink-to-directory specified on
512 the command line wouldn't normally be dereferenced. To work
513 around that, we incur the overhead of appending `/.' (or `.')
514 now, and later removing it each time we output the name of
515 a derived file or directory name. */
516 if (opt_dereference_arguments && is_symlink_to_dir (file))
518 size_t len = strlen (file);
519 /* Append `/.', but if there's already a trailing slash,
520 append only the `.'. */
521 char const *suffix = (file[len - 1] == '/' ? "." : "/.");
522 char *new_file;
523 suffix_length = strlen (suffix);
524 new_file = xmalloc (len + suffix_length + 1);
525 memcpy (mempcpy (new_file, file, len), suffix, suffix_length + 1);
526 arg_length = len;
527 file = new_file;
530 err = nftw (file, process_file, MAX_N_DESCRIPTORS, ftw_flags);
531 if (err)
532 error (0, errno, "%s", quote (orig));
533 fail |= err;
535 if (arg_length)
536 free (file);
539 if (print_totals)
540 print_size (tot_size, _("total"));
542 exit (fail || G_fail ? EXIT_FAILURE : EXIT_SUCCESS);
546 main (int argc, char **argv)
548 int c;
549 char *cwd_only[2];
550 int max_depth_specified = 0;
551 char **files;
552 int fail;
554 /* Bit flags that control how nftw works. */
555 int ftw_flags = FTW_DEPTH | FTW_PHYS | FTW_CHDIR;
557 /* If nonzero, display only a total for each argument. */
558 int opt_summarize_only = 0;
560 cwd_only[0] = ".";
561 cwd_only[1] = NULL;
563 program_name = argv[0];
564 setlocale (LC_ALL, "");
565 bindtextdomain (PACKAGE, LOCALEDIR);
566 textdomain (PACKAGE);
568 atexit (close_stdout);
570 exclude = new_exclude ();
572 human_output_opts = human_options (getenv ("DU_BLOCK_SIZE"), false,
573 &output_block_size);
575 fail = 0;
576 while ((c = getopt_long (argc, argv, "abchHklmsxB:DLSX:", long_options, NULL))
577 != -1)
579 long int tmp_long;
580 switch (c)
582 case 0: /* Long option. */
583 break;
585 case 'a':
586 opt_all = 1;
587 break;
589 case 'b':
590 human_output_opts = 0;
591 output_block_size = 1;
592 break;
594 case 'c':
595 print_totals = 1;
596 break;
598 case 'h':
599 human_output_opts = human_autoscale | human_SI | human_base_1024;
600 output_block_size = 1;
601 break;
603 case 'H':
604 human_output_opts = human_autoscale | human_SI;
605 output_block_size = 1;
606 break;
608 case 'k':
609 human_output_opts = 0;
610 output_block_size = 1024;
611 break;
613 case MAX_DEPTH_OPTION: /* --max-depth=N */
614 if (xstrtol (optarg, NULL, 0, &tmp_long, NULL) == LONGINT_OK
615 && 0 <= tmp_long && tmp_long <= INT_MAX)
617 max_depth_specified = 1;
618 max_depth = (int) tmp_long;
620 else
622 error (0, 0, _("invalid maximum depth %s"),
623 quote (optarg));
624 fail = 1;
626 break;
628 case 'm': /* obsolescent */
629 human_output_opts = 0;
630 output_block_size = 1024 * 1024;
631 break;
633 case 'l':
634 opt_count_all = 1;
635 break;
637 case 's':
638 opt_summarize_only = 1;
639 break;
641 case 'x':
642 ftw_flags |= FTW_MOUNT;
643 break;
645 case 'B':
646 human_output_opts = human_options (optarg, true, &output_block_size);
647 break;
649 case 'D':
650 opt_dereference_arguments = 1;
651 break;
653 case 'L':
654 ftw_flags &= ~FTW_PHYS;
655 break;
657 case 'S':
658 opt_separate_dirs = 1;
659 break;
661 case 'X':
662 if (add_exclude_file (add_exclude, exclude, optarg,
663 EXCLUDE_WILDCARDS, '\n'))
665 error (0, errno, "%s", quotearg_colon (optarg));
666 fail = 1;
668 break;
670 case EXCLUDE_OPTION:
671 add_exclude (exclude, optarg, EXCLUDE_WILDCARDS);
672 break;
674 case_GETOPT_HELP_CHAR;
676 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
678 default:
679 fail = 1;
683 if (fail)
684 usage (EXIT_FAILURE);
686 if (opt_all && opt_summarize_only)
688 error (0, 0, _("cannot both summarize and show all entries"));
689 usage (EXIT_FAILURE);
692 if (opt_summarize_only && max_depth_specified && max_depth == 0)
694 error (0, 0,
695 _("warning: summarizing is the same as using --max-depth=0"));
698 if (opt_summarize_only && max_depth_specified && max_depth != 0)
700 error (0, 0,
701 _("warning: summarizing conflicts with --max-depth=%d"),
702 max_depth);
703 usage (EXIT_FAILURE);
706 if (opt_summarize_only)
707 max_depth = 0;
709 files = (optind == argc ? cwd_only : argv + optind);
711 /* Initialize the hash structure for inode numbers. */
712 hash_init ();
714 RUN_WITH_BIG_STACK_2 (du_files, files, ftw_flags);