1 /* du -- summarize disk usage
2 Copyright (C) 1988-1991, 1995-2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Differences from the Unix du:
19 * Doesn't simply ignore the names of regular files given as arguments
22 By tege@sics.se, Torbjorn Granlund,
23 and djm@ai.mit.edu, David MacKenzie.
24 Variable blocks added by lm@sgi.com and eggert@twinsun.com.
25 Rewritten to use nftw by Jim Meyering. */
30 #include <sys/types.h>
34 #include "dirname.h" /* for strip_trailing_slashes */
40 #include "mmap-stack.h"
46 /* The official name of this program (e.g., no `g' prefix). */
47 #define PROGRAM_NAME "du"
50 N_ ("Torbjorn Granlund, David MacKenzie, Larry McVoy, Paul Eggert, and Jim Meyering")
52 /* Initial size of the hash table. */
53 #define INITIAL_TABLE_SIZE 103
55 /* The maximum number of simultaneously open file handles that
56 may be used by ftw. */
57 #define MAX_N_DESCRIPTORS \
58 (UTILS_OPEN_MAX < 20 \
60 : UTILS_OPEN_MAX - 10)
62 /* Hash structure for inode and device numbers. The separate entry
63 structure makes it easier to rehash "in place". */
71 /* A set of dev/ino pairs. */
72 static Hash_table
*htab
;
74 /* Name under which this program was invoked. */
77 /* If nonzero, display counts for all files, not just directories. */
78 static int opt_all
= 0;
80 /* If nonzero, count each hard link of files with multiple links. */
81 static int opt_count_all
= 0;
83 /* If nonzero, print a grand total at the end. */
84 static int print_totals
= 0;
86 /* If nonzero, do not add sizes of subdirectories. */
87 static int opt_separate_dirs
= 0;
89 /* If nonzero, dereference symlinks that are command line arguments.
90 Implementing this while still using nftw is a little tricky.
91 For each command line argument that is a symlink-to-directory,
92 call nftw with "command_line_arg/." and remember to omit the
93 added `/.' when printing. */
94 static int opt_dereference_arguments
= 0;
96 /* Show the total for each directory (and file if --all) that is at
97 most MAX_DEPTH levels down from the root of the hierarchy. The root
98 is at level 0, so `du --max-depth=0' is equivalent to `du -s'. */
99 static int max_depth
= INT_MAX
;
101 /* Human-readable options for output. */
102 static int human_output_opts
;
104 /* The units to use when printing sizes. */
105 static uintmax_t output_block_size
;
107 /* File name patterns to exclude. */
108 static struct exclude
*exclude
;
110 /* Grand total size of all args, in units of ST_NBLOCKSIZE-byte blocks. */
111 static uintmax_t tot_size
= 0;
113 /* In some cases, we have to append `/.' or just `.' to an argument
114 (to dereference a symlink). When we do that, we don't want to
115 expose this artifact when printing file/directory names, so these
116 variables keep track of the length of the original command line
117 argument and the length of the suffix we've added, respectively.
118 ARG_LENGTH == 0 indicates that we haven't added a suffix.
119 This information is used to omit any such added characters when
122 size_t suffix_length
;
124 /* Nonzero indicates that du should exit with EXIT_FAILURE upon completion. */
127 #define IS_FTW_DIR_TYPE(Type) \
129 || (Type) == FTW_DP \
130 || (Type) == FTW_DNR)
132 /* For long options that have no equivalent short option, use a
133 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
136 EXCLUDE_OPTION
= CHAR_MAX
+ 1,
140 static struct option
const long_options
[] =
142 {"all", no_argument
, NULL
, 'a'},
143 {"block-size", required_argument
, 0, 'B'},
144 {"bytes", no_argument
, NULL
, 'b'},
145 {"count-links", no_argument
, NULL
, 'l'},
146 {"dereference", no_argument
, NULL
, 'L'},
147 {"dereference-args", no_argument
, NULL
, 'D'},
148 {"exclude", required_argument
, 0, EXCLUDE_OPTION
},
149 {"exclude-from", required_argument
, 0, 'X'},
150 {"human-readable", no_argument
, NULL
, 'h'},
151 {"si", no_argument
, 0, 'H'},
152 {"kilobytes", no_argument
, NULL
, 'k'}, /* long form is obsolescent */
153 {"max-depth", required_argument
, NULL
, MAX_DEPTH_OPTION
},
154 {"megabytes", no_argument
, NULL
, 'm'}, /* obsolescent */
155 {"one-file-system", no_argument
, NULL
, 'x'},
156 {"separate-dirs", no_argument
, NULL
, 'S'},
157 {"summarize", no_argument
, NULL
, 's'},
158 {"total", no_argument
, NULL
, 'c'},
159 {GETOPT_HELP_OPTION_DECL
},
160 {GETOPT_VERSION_OPTION_DECL
},
168 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
172 printf (_("Usage: %s [OPTION]... [FILE]...\n"), program_name
);
174 Summarize disk usage of each FILE, recursively for directories.\n\
178 Mandatory arguments to long options are mandatory for short options too.\n\
181 -a, --all write counts for all files, not just directories\n\
182 -B, --block-size=SIZE use SIZE-byte blocks\n\
183 -b, --bytes print size in bytes\n\
184 -c, --total produce a grand total\n\
185 -D, --dereference-args dereference FILEs that are symbolic links\n\
188 -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\n\
189 -H, --si likewise, but use powers of 1000 not 1024\n\
190 -k like --block-size=1K\n\
191 -l, --count-links count sizes many times if hard linked\n\
194 -L, --dereference dereference all symbolic links\n\
195 -S, --separate-dirs do not include size of subdirectories\n\
196 -s, --summarize display only a total for each argument\n\
199 -x, --one-file-system skip directories on different filesystems\n\
200 -X FILE, --exclude-from=FILE Exclude files that match any pattern in FILE.\n\
201 --exclude=PATTERN Exclude files that match PATTERN.\n\
202 --max-depth=N print the total for a directory (or file, with --all)\n\
203 only if it is N or fewer levels below the command\n\
204 line argument; --max-depth=0 is the same as\n\
207 fputs (HELP_OPTION_DESCRIPTION
, stdout
);
208 fputs (VERSION_OPTION_DESCRIPTION
, stdout
);
210 SIZE may be (or may be an integer optionally followed by) one of following:\n\
211 kB 1000, K 1024, MB 1,000,000, M 1,048,576, and so on for G, T, P, E, Z, Y.\n\
213 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT
);
219 entry_hash (void const *x
, unsigned int table_size
)
221 struct entry
const *p
= x
;
223 /* Ignoring the device number here should be fine. */
224 /* The cast to uintmax_t prevents negative remainders
225 if st_ino is negative. */
226 return (uintmax_t) p
->st_ino
% table_size
;
229 /* Compare two dev/ino pairs. Return true if they are the same. */
231 entry_compare (void const *x
, void const *y
)
233 struct entry
const *a
= x
;
234 struct entry
const *b
= y
;
235 return SAME_INODE (*a
, *b
) ? true : false;
238 /* Try to insert the INO/DEV pair into the global table, HTAB.
239 If the pair is successfully inserted, return zero.
240 Upon failed memory allocation exit nonzero.
241 If the pair is already in the table, return nonzero. */
243 hash_ins (ino_t ino
, dev_t dev
)
246 struct entry
*ent_from_table
;
248 ent
= (struct entry
*) xmalloc (sizeof *ent
);
252 ent_from_table
= hash_insert (htab
, ent
);
253 if (ent_from_table
== NULL
)
255 /* Insertion failed due to lack of memory. */
259 if (ent_from_table
== ent
)
261 /* Insertion succeeded. */
265 /* That pair is already in the table, so ENT was not inserted. Free it. */
271 /* Initialize the hash table. */
275 htab
= hash_initialize (INITIAL_TABLE_SIZE
, NULL
,
276 entry_hash
, entry_compare
, free
);
281 /* Print N_BLOCKS. NBLOCKS is the number of
282 ST_NBLOCKSIZE-byte blocks; convert it to a readable value before
286 print_only_size (uintmax_t n_blocks
)
288 char buf
[LONGEST_HUMAN_READABLE
+ 1];
289 fputs (human_readable (n_blocks
, buf
, human_output_opts
,
290 ST_NBLOCKSIZE
, output_block_size
), stdout
);
293 /* Print N_BLOCKS followed by STRING on a line. NBLOCKS is the number of
294 ST_NBLOCKSIZE-byte blocks; convert it to a readable value before
298 print_size (uintmax_t n_blocks
, const char *string
)
300 char buf
[LONGEST_HUMAN_READABLE
+ 1];
302 human_readable (n_blocks
, buf
, human_output_opts
,
303 ST_NBLOCKSIZE
, output_block_size
),
308 /* This function is called once for every file system object that nftw
309 encounters. nftw does a depth-first traversal. This function knows
310 that and accumulates per-directory totals based on changes in
311 the depth of the current entry. */
314 process_file (const char *file
, const struct stat
*sb
, int file_type
,
318 size_t size_including_subdirs
;
319 size_t size_to_propagate_to_parent
;
320 static int first_call
= 1;
321 static size_t prev_level
;
322 static size_t n_alloc
;
323 static uintmax_t *sum
;
325 /* Always define info->skip before returning. */
326 info
->skip
= excluded_filename (exclude
, file
+ info
->base
);
331 error (0, errno
, _("cannot access %s"), quote (file
));
336 error (0, errno
, _("cannot change to parent of directory %s"),
342 /* Don't return just yet, since although nftw couldn't chdir into the
343 directory, it was able to stat it, so we do have a size. */
344 error (0, errno
, _("cannot change to directory %s"), quote (file
));
349 /* Don't return just yet, since although nftw couldn't read the
350 directory, it was able to stat it, so we do have a size. */
351 error (0, errno
, _("cannot read directory %s"), quote (file
));
359 /* If this is the first (pre-order) encounter with a directory,
360 return right away. */
361 if (file_type
== FTW_DPRE
)
364 /* If the file is being excluded or if it has already been counted
365 via a hard link, then don't let it contribute to the sums. */
369 && hash_ins (sb
->st_ino
, sb
->st_dev
)))
371 /* Note that we must not simply return here.
372 We still have to update prev_level and maybe propagate
373 some sums up the hierarchy. */
378 size
= ST_NBLOCKS (*sb
);
383 n_alloc
= info
->level
+ 10;
384 sum
= XCALLOC (uintmax_t, n_alloc
);
388 /* FIXME: it's a shame that we need these `size_t' casts to avoid
389 warnings from gcc about `comparison between signed and unsigned'.
390 Probably unavoidable, assuming that the members of struct FTW
391 are of type `int' (historical), since I want variables like
392 n_alloc and prev_level to have types that make sense. */
393 if (n_alloc
<= (size_t) info
->level
)
395 n_alloc
= info
->level
* 2;
396 sum
= XREALLOC (sum
, uintmax_t, n_alloc
);
400 size_to_propagate_to_parent
= size_including_subdirs
= size
;
404 if ((size_t) info
->level
== prev_level
)
406 /* This is usually the most common case. Do nothing. */
408 else if ((size_t) info
->level
> prev_level
)
410 /* Descending the hierarchy.
411 Clear the accumulators for *all* levels between prev_level
412 and the current one. The depth may change dramatically,
413 e.g., from 1 to 10. */
415 for (i
= prev_level
+ 1; i
<= info
->level
; i
++)
418 else /* info->level < prev_level */
420 /* Ascending the hierarchy.
421 nftw processes a directory only after all entries in that
422 directory have been processed. When the depth decreases,
423 propagate sums from the children (prev_level) to the parent.
424 Here, the current level is always one smaller than the
426 assert ((size_t) info
->level
== prev_level
- 1);
427 size_to_propagate_to_parent
= size_including_subdirs
428 = size
+ sum
[prev_level
];
432 if (opt_separate_dirs
)
433 size_to_propagate_to_parent
= 0;
435 prev_level
= info
->level
;
438 sum
[info
->level
] += size_to_propagate_to_parent
;
440 /* Even if this directory was unreadable or we couldn't chdir into it,
441 do let its size contribute to the total, ... */
444 /* ... but don't print out a total for it, since without the size(s)
445 of any potential entries, it could be very misleading. */
446 if (file_type
== FTW_DNR
|| file_type
== FTW_DCH
)
449 /* FIXME: This looks suspiciously like it could be simplified. */
450 if ((IS_FTW_DIR_TYPE (file_type
) &&
451 (info
->level
<= max_depth
|| info
->level
== 0))
452 || ((opt_all
&& info
->level
<= max_depth
) || info
->level
== 0))
454 print_only_size (size_including_subdirs
);
455 fputc ('\t', stdout
);
458 /* Print the file name, but without the `.' or `/.'
459 directory suffix that we may have added in main. */
460 /* Print everything before the part we appended. */
461 fwrite (file
, arg_length
, 1, stdout
);
462 /* Print everything after what we appended. */
463 fputs (file
+ arg_length
+ suffix_length
464 + (file
[arg_length
+ suffix_length
] == '/'), stdout
);
468 fputs (file
, stdout
);
470 fputc ('\n', stdout
);
478 is_symlink_to_dir (char const *file
)
483 ASSIGN_STRDUPA (f
, file
);
484 strip_trailing_slashes (f
);
485 return (lstat (f
, &sb
) == 0 && S_ISLNK (sb
.st_mode
)
486 && stat (f
, &sb
) == 0 && S_ISDIR (sb
.st_mode
));
489 /* Recursively print the sizes of the directories (and, if selected, files)
490 named in FILES, the last entry of which is NULL.
491 FTW_FLAGS controls how nftw works.
492 Return nonzero upon error. */
494 static void du_files (char **files
, int ftw_flags
) ATTRIBUTE_NORETURN
;
496 du_files (char **files
, int ftw_flags
)
500 for (i
= 0; files
[i
]; i
++)
502 char *file
= files
[i
];
510 /* When dereferencing only command line arguments, we're using
511 nftw's FTW_PHYS flag, so a symlink-to-directory specified on
512 the command line wouldn't normally be dereferenced. To work
513 around that, we incur the overhead of appending `/.' (or `.')
514 now, and later removing it each time we output the name of
515 a derived file or directory name. */
516 if (opt_dereference_arguments
&& is_symlink_to_dir (file
))
518 size_t len
= strlen (file
);
519 /* Append `/.', but if there's already a trailing slash,
520 append only the `.'. */
521 char const *suffix
= (file
[len
- 1] == '/' ? "." : "/.");
523 suffix_length
= strlen (suffix
);
524 new_file
= xmalloc (len
+ suffix_length
+ 1);
525 memcpy (mempcpy (new_file
, file
, len
), suffix
, suffix_length
+ 1);
530 err
= nftw (file
, process_file
, MAX_N_DESCRIPTORS
, ftw_flags
);
532 error (0, errno
, "%s", quote (orig
));
540 print_size (tot_size
, _("total"));
542 exit (fail
|| G_fail
? EXIT_FAILURE
: EXIT_SUCCESS
);
546 main (int argc
, char **argv
)
550 int max_depth_specified
= 0;
554 /* Bit flags that control how nftw works. */
555 int ftw_flags
= FTW_DEPTH
| FTW_PHYS
| FTW_CHDIR
;
557 /* If nonzero, display only a total for each argument. */
558 int opt_summarize_only
= 0;
563 program_name
= argv
[0];
564 setlocale (LC_ALL
, "");
565 bindtextdomain (PACKAGE
, LOCALEDIR
);
566 textdomain (PACKAGE
);
568 atexit (close_stdout
);
570 exclude
= new_exclude ();
572 human_output_opts
= human_options (getenv ("DU_BLOCK_SIZE"), false,
576 while ((c
= getopt_long (argc
, argv
, "abchHklmsxB:DLSX:", long_options
, NULL
))
582 case 0: /* Long option. */
590 human_output_opts
= 0;
591 output_block_size
= 1;
599 human_output_opts
= human_autoscale
| human_SI
| human_base_1024
;
600 output_block_size
= 1;
604 human_output_opts
= human_autoscale
| human_SI
;
605 output_block_size
= 1;
609 human_output_opts
= 0;
610 output_block_size
= 1024;
613 case MAX_DEPTH_OPTION
: /* --max-depth=N */
614 if (xstrtol (optarg
, NULL
, 0, &tmp_long
, NULL
) == LONGINT_OK
615 && 0 <= tmp_long
&& tmp_long
<= INT_MAX
)
617 max_depth_specified
= 1;
618 max_depth
= (int) tmp_long
;
622 error (0, 0, _("invalid maximum depth %s"),
628 case 'm': /* obsolescent */
629 human_output_opts
= 0;
630 output_block_size
= 1024 * 1024;
638 opt_summarize_only
= 1;
642 ftw_flags
|= FTW_MOUNT
;
646 human_output_opts
= human_options (optarg
, true, &output_block_size
);
650 opt_dereference_arguments
= 1;
654 ftw_flags
&= ~FTW_PHYS
;
658 opt_separate_dirs
= 1;
662 if (add_exclude_file (add_exclude
, exclude
, optarg
,
663 EXCLUDE_WILDCARDS
, '\n'))
665 error (0, errno
, "%s", quotearg_colon (optarg
));
671 add_exclude (exclude
, optarg
, EXCLUDE_WILDCARDS
);
674 case_GETOPT_HELP_CHAR
;
676 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
684 usage (EXIT_FAILURE
);
686 if (opt_all
&& opt_summarize_only
)
688 error (0, 0, _("cannot both summarize and show all entries"));
689 usage (EXIT_FAILURE
);
692 if (opt_summarize_only
&& max_depth_specified
&& max_depth
== 0)
695 _("warning: summarizing is the same as using --max-depth=0"));
698 if (opt_summarize_only
&& max_depth_specified
&& max_depth
!= 0)
701 _("warning: summarizing conflicts with --max-depth=%d"),
703 usage (EXIT_FAILURE
);
706 if (opt_summarize_only
)
709 files
= (optind
== argc
? cwd_only
: argv
+ optind
);
711 /* Initialize the hash structure for inode numbers. */
714 RUN_WITH_BIG_STACK_2 (du_files
, files
, ftw_flags
);