(libfetish_a_SOURCES): Add mmap-stack.h.
[coreutils.git] / src / du.c
blobfbc07b534741a5f4161820a6ab2b813c59f58e10
1 /* du -- summarize disk usage
2 Copyright (C) 1988-1991, 1995-2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Differences from the Unix du:
19 * Doesn't simply ignore the names of regular files given as arguments
20 when -a is given.
22 By tege@sics.se, Torbjorn Granlund,
23 and djm@ai.mit.edu, David MacKenzie.
24 Variable blocks added by lm@sgi.com and eggert@twinsun.com.
25 Rewritten to use nftw by Jim Meyering. */
27 #include <config.h>
28 #include <stdio.h>
29 #include <getopt.h>
30 #include <sys/types.h>
31 #include <assert.h>
33 #include "system.h"
34 #include "dirname.h" /* for strip_trailing_slashes */
35 #include "error.h"
36 #include "exclude.h"
37 #include "ftw.h"
38 #include "hash.h"
39 #include "human.h"
40 #include "quote.h"
41 #include "quotearg.h"
42 #include "same.h"
43 #include "xstrtol.h"
45 /* The official name of this program (e.g., no `g' prefix). */
46 #define PROGRAM_NAME "du"
48 #define AUTHORS \
49 N_ ("Torbjorn Granlund, David MacKenzie, Larry McVoy, Paul Eggert, and Jim Meyering")
51 /* Initial size of the hash table. */
52 #define INITIAL_TABLE_SIZE 103
54 /* The maximum number of simultaneously open file handles that
55 may be used by ftw. */
56 #define MAX_N_DESCRIPTORS \
57 (UTILS_OPEN_MAX < 20 \
58 ? UTILS_OPEN_MAX \
59 : UTILS_OPEN_MAX - 10)
61 /* Hash structure for inode and device numbers. The separate entry
62 structure makes it easier to rehash "in place". */
64 struct entry
66 ino_t st_ino;
67 dev_t st_dev;
70 /* A set of dev/ino pairs. */
71 static Hash_table *htab;
73 /* Name under which this program was invoked. */
74 char *program_name;
76 /* If nonzero, display counts for all files, not just directories. */
77 static int opt_all = 0;
79 /* If nonzero, count each hard link of files with multiple links. */
80 static int opt_count_all = 0;
82 /* If nonzero, print a grand total at the end. */
83 static int print_totals = 0;
85 /* If nonzero, do not add sizes of subdirectories. */
86 static int opt_separate_dirs = 0;
88 /* If nonzero, dereference symlinks that are command line arguments.
89 Implementing this while still using nftw is a little tricky.
90 For each command line argument that is a symlink-to-directory,
91 call nftw with "command_line_arg/." and remember to omit the
92 added `/.' when printing. */
93 static int opt_dereference_arguments = 0;
95 /* Show the total for each directory (and file if --all) that is at
96 most MAX_DEPTH levels down from the root of the hierarchy. The root
97 is at level 0, so `du --max-depth=0' is equivalent to `du -s'. */
98 static int max_depth = INT_MAX;
100 /* Human-readable options for output. */
101 static int human_output_opts;
103 /* The units to use when printing sizes. */
104 static uintmax_t output_block_size;
106 /* File name patterns to exclude. */
107 static struct exclude *exclude;
109 /* Grand total size of all args, in units of ST_NBLOCKSIZE-byte blocks. */
110 static uintmax_t tot_size = 0;
112 /* In some cases, we have to append `/.' or just `.' to an argument
113 (to dereference a symlink). When we do that, we don't want to
114 expose this artifact when printing file/directory names, so these
115 variables keep track of the length of the original command line
116 argument and the length of the suffix we've added, respectively.
117 ARG_LENGTH == 0 indicates that we haven't added a suffix.
118 This information is used to omit any such added characters when
119 printing names. */
120 size_t arg_length;
121 size_t suffix_length;
123 /* Nonzero indicates that du should exit with EXIT_FAILURE upon completion. */
124 int G_fail;
126 #define IS_FTW_DIR_TYPE(Type) \
127 ((Type) == FTW_D \
128 || (Type) == FTW_DP \
129 || (Type) == FTW_DNR)
131 /* For long options that have no equivalent short option, use a
132 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
133 enum
135 EXCLUDE_OPTION = CHAR_MAX + 1,
136 MAX_DEPTH_OPTION
139 static struct option const long_options[] =
141 {"all", no_argument, NULL, 'a'},
142 {"block-size", required_argument, 0, 'B'},
143 {"bytes", no_argument, NULL, 'b'},
144 {"count-links", no_argument, NULL, 'l'},
145 {"dereference", no_argument, NULL, 'L'},
146 {"dereference-args", no_argument, NULL, 'D'},
147 {"exclude", required_argument, 0, EXCLUDE_OPTION},
148 {"exclude-from", required_argument, 0, 'X'},
149 {"human-readable", no_argument, NULL, 'h'},
150 {"si", no_argument, 0, 'H'},
151 {"kilobytes", no_argument, NULL, 'k'}, /* long form is obsolescent */
152 {"max-depth", required_argument, NULL, MAX_DEPTH_OPTION},
153 {"megabytes", no_argument, NULL, 'm'}, /* obsolescent */
154 {"one-file-system", no_argument, NULL, 'x'},
155 {"separate-dirs", no_argument, NULL, 'S'},
156 {"summarize", no_argument, NULL, 's'},
157 {"total", no_argument, NULL, 'c'},
158 {GETOPT_HELP_OPTION_DECL},
159 {GETOPT_VERSION_OPTION_DECL},
160 {NULL, 0, NULL, 0}
163 void
164 usage (int status)
166 if (status != 0)
167 fprintf (stderr, _("Try `%s --help' for more information.\n"),
168 program_name);
169 else
171 printf (_("Usage: %s [OPTION]... [FILE]...\n"), program_name);
172 fputs (_("\
173 Summarize disk usage of each FILE, recursively for directories.\n\
175 "), stdout);
176 fputs (_("\
177 Mandatory arguments to long options are mandatory for short options too.\n\
178 "), stdout);
179 fputs (_("\
180 -a, --all write counts for all files, not just directories\n\
181 -B, --block-size=SIZE use SIZE-byte blocks\n\
182 -b, --bytes print size in bytes\n\
183 -c, --total produce a grand total\n\
184 -D, --dereference-args dereference FILEs that are symbolic links\n\
185 "), stdout);
186 fputs (_("\
187 -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\n\
188 -H, --si likewise, but use powers of 1000 not 1024\n\
189 -k like --block-size=1K\n\
190 -l, --count-links count sizes many times if hard linked\n\
191 "), stdout);
192 fputs (_("\
193 -L, --dereference dereference all symbolic links\n\
194 -S, --separate-dirs do not include size of subdirectories\n\
195 -s, --summarize display only a total for each argument\n\
196 "), stdout);
197 fputs (_("\
198 -x, --one-file-system skip directories on different filesystems\n\
199 -X FILE, --exclude-from=FILE Exclude files that match any pattern in FILE.\n\
200 --exclude=PATTERN Exclude files that match PATTERN.\n\
201 --max-depth=N print the total for a directory (or file, with --all)\n\
202 only if it is N or fewer levels below the command\n\
203 line argument; --max-depth=0 is the same as\n\
204 --summarize\n\
205 "), stdout);
206 fputs (HELP_OPTION_DESCRIPTION, stdout);
207 fputs (VERSION_OPTION_DESCRIPTION, stdout);
208 fputs (_("\n\
209 SIZE may be (or may be an integer optionally followed by) one of following:\n\
210 kB 1000, K 1024, MB 1,000,000, M 1,048,576, and so on for G, T, P, E, Z, Y.\n\
211 "), stdout);
212 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
214 exit (status);
217 static unsigned int
218 entry_hash (void const *x, unsigned int table_size)
220 struct entry const *p = x;
222 /* Ignoring the device number here should be fine. */
223 /* The cast to uintmax_t prevents negative remainders
224 if st_ino is negative. */
225 return (uintmax_t) p->st_ino % table_size;
228 /* Compare two dev/ino pairs. Return true if they are the same. */
229 static bool
230 entry_compare (void const *x, void const *y)
232 struct entry const *a = x;
233 struct entry const *b = y;
234 return SAME_INODE (*a, *b) ? true : false;
237 /* Try to insert the INO/DEV pair into the global table, HTAB.
238 If the pair is successfully inserted, return zero.
239 Upon failed memory allocation exit nonzero.
240 If the pair is already in the table, return nonzero. */
241 static int
242 hash_ins (ino_t ino, dev_t dev)
244 struct entry *ent;
245 struct entry *ent_from_table;
247 ent = (struct entry *) xmalloc (sizeof *ent);
248 ent->st_ino = ino;
249 ent->st_dev = dev;
251 ent_from_table = hash_insert (htab, ent);
252 if (ent_from_table == NULL)
254 /* Insertion failed due to lack of memory. */
255 xalloc_die ();
258 if (ent_from_table == ent)
260 /* Insertion succeeded. */
261 return 0;
264 /* That pair is already in the table, so ENT was not inserted. Free it. */
265 free (ent);
267 return 1;
270 /* Initialize the hash table. */
271 static void
272 hash_init (void)
274 htab = hash_initialize (INITIAL_TABLE_SIZE, NULL,
275 entry_hash, entry_compare, free);
276 if (htab == NULL)
277 xalloc_die ();
280 /* Print N_BLOCKS. NBLOCKS is the number of
281 ST_NBLOCKSIZE-byte blocks; convert it to a readable value before
282 printing. */
284 static void
285 print_only_size (uintmax_t n_blocks)
287 char buf[LONGEST_HUMAN_READABLE + 1];
288 fputs (human_readable (n_blocks, buf, human_output_opts,
289 ST_NBLOCKSIZE, output_block_size), stdout);
292 /* Print N_BLOCKS followed by STRING on a line. NBLOCKS is the number of
293 ST_NBLOCKSIZE-byte blocks; convert it to a readable value before
294 printing. */
296 static void
297 print_size (uintmax_t n_blocks, const char *string)
299 char buf[LONGEST_HUMAN_READABLE + 1];
300 printf ("%s\t%s\n",
301 human_readable (n_blocks, buf, human_output_opts,
302 ST_NBLOCKSIZE, output_block_size),
303 string);
304 fflush (stdout);
307 /* This function is called once for every file system object that nftw
308 encounters. nftw does a depth-first traversal. This function knows
309 that and accumulates per-directory totals based on changes in
310 the depth of the current entry. */
312 static int
313 process_file (const char *file, const struct stat *sb, int file_type,
314 struct FTW *info)
316 size_t size;
317 size_t s;
318 static int first_call = 1;
319 static size_t prev_level;
320 static size_t n_alloc;
321 /* The sum of the st_size values of all entries in the single directory
322 at the corresponding level. Although this does include the st_size
323 corresponding to each subdirectory, it does not include the size of
324 any file in a subdirectory. */
325 static uintmax_t *sum_ent;
327 /* The sum of the sizes of all entries in the hierarchy at or below the
328 directory at the specified level. */
329 static uintmax_t *sum_subdir;
331 switch (file_type)
333 case FTW_NS:
334 error (0, errno, _("cannot access %s"), quote (file));
335 G_fail = 1;
336 return 0;
338 case FTW_DCHP:
339 error (0, errno, _("cannot change to parent of directory %s"),
340 quote (file));
341 G_fail = 1;
342 return 0;
344 case FTW_DCH:
345 /* Don't return just yet, since although nftw couldn't chdir into the
346 directory, it was able to stat it, so we do have a size. */
347 error (0, errno, _("cannot change to directory %s"), quote (file));
348 G_fail = 1;
349 break;
351 case FTW_DNR:
352 /* Don't return just yet, since although nftw couldn't read the
353 directory, it was able to stat it, so we do have a size. */
354 error (0, errno, _("cannot read directory %s"), quote (file));
355 G_fail = 1;
356 break;
358 default:
359 break;
362 /* If the file is being excluded or if it has already been counted
363 via a hard link, then don't let it contribute to the sums. */
364 if ((info->skip = excluded_filename (exclude, file + info->base))
365 || (!opt_count_all
366 && 1 < sb->st_nlink
367 && hash_ins (sb->st_ino, sb->st_dev)))
369 /* Note that we must not simply return here.
370 We still have to update prev_level and maybe propagate
371 some sums up the hierarchy. */
372 s = size = 0;
374 else
376 s = size = ST_NBLOCKS (*sb);
379 /* If this is the first (pre-order) encounter with a directory,
380 return right away. */
381 if (file_type == FTW_DPRE)
382 return 0;
384 if (first_call)
386 n_alloc = info->level + 10;
387 sum_ent = XCALLOC (uintmax_t, n_alloc);
388 sum_subdir = XCALLOC (uintmax_t, n_alloc);
390 else
392 /* FIXME: it's a shame that we need these `size_t' casts to avoid
393 warnings from gcc about `comparison between signed and unsigned'.
394 Probably unavoidable, assuming that the members of struct FTW
395 are of type `int' (historical), since I want variables like
396 n_alloc and prev_level to have types that make sense. */
397 if (n_alloc <= (size_t) info->level)
399 n_alloc = info->level * 2;
400 sum_ent = XREALLOC (sum_ent, uintmax_t, n_alloc);
401 sum_subdir = XREALLOC (sum_subdir, uintmax_t, n_alloc);
405 if (! first_call)
407 if ((size_t) info->level == prev_level)
409 /* This is usually the most common case. Do nothing. */
411 else if ((size_t) info->level < prev_level)
413 /* Ascending the hierarchy.
414 nftw processes a directory only after all entries in that
415 directory have been processed. When the depth decreases,
416 propagate sums from the children (prev_level) to the parent.
417 Here, the current level is always one smaller than the
418 previous one. */
419 assert ((size_t) info->level == prev_level - 1);
420 s += sum_ent[prev_level];
421 if (!opt_separate_dirs)
422 s += sum_subdir[prev_level];
423 sum_subdir[info->level] += (sum_ent[prev_level]
424 + sum_subdir[prev_level]);
426 else /* info->level > prev_level */
428 /* Descending the hierarchy.
429 Clear the accumulators for *all* levels between prev_level
430 and the current one. The depth may change dramatically,
431 e.g., from 1 to 10. */
432 int i;
433 for (i = prev_level + 1; i <= info->level; i++)
435 sum_ent[i] = 0;
436 sum_subdir[i] = 0;
441 prev_level = info->level;
442 first_call = 0;
444 sum_ent[info->level] += size;
446 /* Even if this directory was unreadable or we couldn't chdir into it,
447 do let its size contribute to the total, ... */
448 tot_size += size;
450 /* ... but don't print out a total for it, since without the size(s)
451 of any potential entries, it could be very misleading. */
452 if (file_type == FTW_DNR || file_type == FTW_DCH)
453 return 0;
455 /* FIXME: This looks suspiciously like it could be simplified. */
456 if ((IS_FTW_DIR_TYPE (file_type) &&
457 (info->level <= max_depth || info->level == 0))
458 || ((opt_all && info->level <= max_depth) || info->level == 0))
460 print_only_size (s);
461 fputc ('\t', stdout);
462 if (arg_length)
464 /* Print the file name, but without the `.' or `/.'
465 directory suffix that we may have added in main. */
466 /* Print everything before the part we appended. */
467 fwrite (file, arg_length, 1, stdout);
468 /* Print everything after what we appended. */
469 fputs (file + arg_length + suffix_length
470 + (file[arg_length + suffix_length] == '/'), stdout);
472 else
474 fputs (file, stdout);
476 fputc ('\n', stdout);
477 fflush (stdout);
480 return 0;
483 static int
484 is_symlink_to_dir (char const *file)
486 char *f;
487 struct stat sb;
489 ASSIGN_STRDUPA (f, file);
490 strip_trailing_slashes (f);
491 return (lstat (f, &sb) == 0 && S_ISLNK (sb.st_mode)
492 && stat (f, &sb) == 0 && S_ISDIR (sb.st_mode));
495 /* Recursively print the sizes of the directories (and, if selected, files)
496 named in FILES, the last entry of which is NULL.
497 FTW_FLAGS controls how nftw works.
498 Return nonzero upon error. */
500 static int
501 du_files (char **files, int ftw_flags)
503 int fail = 0;
504 int i;
505 for (i = 0; files[i]; i++)
507 char *file = files[i];
508 char *orig = file;
509 int err;
510 arg_length = 0;
512 if (!print_totals)
513 hash_clear (htab);
515 /* When dereferencing only command line arguments, we're using
516 nftw's FTW_PHYS flag, so a symlink-to-directory specified on
517 the command line wouldn't normally be dereferenced. To work
518 around that, we incur the overhead of appending `/.' (or `.')
519 now, and later removing it each time we output the name of
520 a derived file or directory name. */
521 if (opt_dereference_arguments && is_symlink_to_dir (file))
523 size_t len = strlen (file);
524 /* Append `/.', but if there's already a trailing slash,
525 append only the `.'. */
526 char const *suffix = (file[len - 1] == '/' ? "." : "/.");
527 char *new_file;
528 suffix_length = strlen (suffix);
529 new_file = xmalloc (len + suffix_length + 1);
530 memcpy (mempcpy (new_file, file, len), suffix, suffix_length + 1);
531 arg_length = len;
532 file = new_file;
535 err = nftw (file, process_file, MAX_N_DESCRIPTORS, ftw_flags);
536 if (err)
537 error (0, errno, "%s", quote (orig));
538 fail |= err;
540 if (arg_length)
541 free (file);
544 if (print_totals)
545 print_size (tot_size, _("total"));
547 return fail;
551 main (int argc, char **argv)
553 int c;
554 char *cwd_only[2];
555 int max_depth_specified = 0;
556 char **files;
557 int fail;
559 /* Bit flags that control how nftw works. */
560 int ftw_flags = FTW_DEPTH | FTW_PHYS | FTW_CHDIR;
562 /* If nonzero, display only a total for each argument. */
563 int opt_summarize_only = 0;
565 cwd_only[0] = ".";
566 cwd_only[1] = NULL;
568 program_name = argv[0];
569 setlocale (LC_ALL, "");
570 bindtextdomain (PACKAGE, LOCALEDIR);
571 textdomain (PACKAGE);
573 atexit (close_stdout);
575 exclude = new_exclude ();
577 human_output_opts = human_options (getenv ("DU_BLOCK_SIZE"), false,
578 &output_block_size);
580 fail = 0;
581 while ((c = getopt_long (argc, argv, "abchHklmsxB:DLSX:", long_options, NULL))
582 != -1)
584 long int tmp_long;
585 switch (c)
587 case 0: /* Long option. */
588 break;
590 case 'a':
591 opt_all = 1;
592 break;
594 case 'b':
595 human_output_opts = 0;
596 output_block_size = 1;
597 break;
599 case 'c':
600 print_totals = 1;
601 break;
603 case 'h':
604 human_output_opts = human_autoscale | human_SI | human_base_1024;
605 output_block_size = 1;
606 break;
608 case 'H':
609 human_output_opts = human_autoscale | human_SI;
610 output_block_size = 1;
611 break;
613 case 'k':
614 human_output_opts = 0;
615 output_block_size = 1024;
616 break;
618 case MAX_DEPTH_OPTION: /* --max-depth=N */
619 if (xstrtol (optarg, NULL, 0, &tmp_long, NULL) == LONGINT_OK
620 && 0 <= tmp_long && tmp_long <= INT_MAX)
622 max_depth_specified = 1;
623 max_depth = (int) tmp_long;
625 else
627 error (0, 0, _("invalid maximum depth %s"),
628 quote (optarg));
629 fail = 1;
631 break;
633 case 'm': /* obsolescent */
634 human_output_opts = 0;
635 output_block_size = 1024 * 1024;
636 break;
638 case 'l':
639 opt_count_all = 1;
640 break;
642 case 's':
643 opt_summarize_only = 1;
644 break;
646 case 'x':
647 ftw_flags |= FTW_MOUNT;
648 break;
650 case 'B':
651 human_output_opts = human_options (optarg, true, &output_block_size);
652 break;
654 case 'D':
655 opt_dereference_arguments = 1;
656 break;
658 case 'L':
659 ftw_flags &= ~FTW_PHYS;
660 break;
662 case 'S':
663 opt_separate_dirs = 1;
664 break;
666 case 'X':
667 if (add_exclude_file (add_exclude, exclude, optarg,
668 EXCLUDE_WILDCARDS, '\n'))
670 error (0, errno, "%s", quotearg_colon (optarg));
671 fail = 1;
673 break;
675 case EXCLUDE_OPTION:
676 add_exclude (exclude, optarg, EXCLUDE_WILDCARDS);
677 break;
679 case_GETOPT_HELP_CHAR;
681 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
683 default:
684 fail = 1;
688 if (fail)
689 usage (EXIT_FAILURE);
691 if (opt_all && opt_summarize_only)
693 error (0, 0, _("cannot both summarize and show all entries"));
694 usage (EXIT_FAILURE);
697 if (opt_summarize_only && max_depth_specified && max_depth == 0)
699 error (0, 0,
700 _("warning: summarizing is the same as using --max-depth=0"));
703 if (opt_summarize_only && max_depth_specified && max_depth != 0)
705 error (0, 0,
706 _("warning: summarizing conflicts with --max-depth=%d"),
707 max_depth);
708 usage (EXIT_FAILURE);
711 if (opt_summarize_only)
712 max_depth = 0;
714 files = (optind == argc ? cwd_only : argv + optind);
716 /* Initialize the hash structure for inode numbers. */
717 hash_init ();
719 exit (du_files (files, ftw_flags) || G_fail
720 ? EXIT_FAILURE : EXIT_SUCCESS);