1 /* du -- summarize disk usage
2 Copyright (C) 88, 89, 90, 91, 1995-2000 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Differences from the Unix du:
19 * Doesn't simply ignore the names of regular files given as arguments
22 -l Count the size of all files, even if they have appeared
23 already in another hard link.
24 -x Do not cross file-system boundaries during the recursion.
25 -c Write a grand total of all of the arguments after all
26 arguments have been processed. This can be used to find
27 out the disk usage of a directory, with some files excluded.
28 -h Print sizes in human readable format (1k 234M 2G, etc).
29 -H Similar, but use powers of 1000 not 1024.
30 -k Print sizes in kilobytes.
31 -m Print sizes in megabytes.
32 -b Print sizes in bytes.
33 -S Count the size of each directory separately, not including
34 the sizes of subdirectories.
35 -D Dereference only symbolic links given on the command line.
36 -L Dereference all symbolic links.
37 --exclude=PAT Exclude files that match PAT.
38 -X FILE Exclude files that match patterns taken from FILE.
40 By tege@sics.se, Torbjorn Granlund,
41 and djm@ai.mit.edu, David MacKenzie.
42 Variable blocks added by lm@sgi.com and eggert@twinsun.com.
47 # include <inttypes.h>
51 #include <sys/types.h>
63 /* The official name of this program (e.g., no `g' prefix). */
64 #define PROGRAM_NAME "du"
67 "Torbjorn Granlund, David MacKenzie, Larry McVoy, and Paul Eggert"
69 /* Initial number of entries in each hash table entry's table of inodes. */
70 #define INITIAL_HASH_MODULE 100
72 /* Initial number of entries in the inode hash table. */
73 #define INITIAL_ENTRY_TAB_SIZE 70
75 /* Initial size to allocate for `path'. */
76 #define INITIAL_PATH_SIZE 100
78 /* Hash structure for inode and device numbers. The separate entry
79 structure makes it easier to rehash "in place". */
85 struct entry
*coll_link
;
88 /* Structure for a hash table for inode numbers. */
92 unsigned modulus
; /* Size of the `hash' pointer vector. */
93 struct entry
*entry_tab
; /* Pointer to dynamically growing vector. */
94 unsigned entry_tab_size
; /* Size of current `entry_tab' allocation. */
95 unsigned first_free_entry
; /* Index in `entry_tab'. */
96 struct entry
*hash
[1]; /* Vector of pointers in `entry_tab'. */
100 /* Structure for dynamically resizable strings. */
104 unsigned alloc
; /* Size of allocation for the text. */
105 unsigned length
; /* Length of the text currently. */
106 char *text
; /* Pointer to the text. */
108 typedef struct String String
;
113 /* Name under which this program was invoked. */
116 /* If nonzero, display counts for all files, not just directories. */
117 static int opt_all
= 0;
119 /* If nonzero, count each hard link of files with multiple links. */
120 static int opt_count_all
= 0;
122 /* If nonzero, do not cross file-system boundaries. */
123 static int opt_one_file_system
= 0;
125 /* If nonzero, print a grand total at the end. */
126 static int print_totals
= 0;
128 /* If nonzero, do not add sizes of subdirectories. */
129 static int opt_separate_dirs
= 0;
131 /* If nonzero, dereference symlinks that are command line arguments. */
132 static int opt_dereference_arguments
= 0;
134 /* Show the total for each directory (and file if --all) that is at
135 most MAX_DEPTH levels down from the root of the hierarchy. The root
136 is at level 0, so `du --max-depth=0' is equivalent to `du -s'. */
137 static int max_depth
= INT_MAX
;
139 /* If positive, the units to use when printing sizes;
140 if negative, the human-readable base. */
141 static int output_block_size
;
143 /* Accumulated path for file or directory being processed. */
146 /* Pointer to hash structure, used by the hash routines. */
147 static struct htab
*htab
;
149 /* A pointer to either lstat or stat, depending on whether
150 dereferencing of all symbolic links is to be done. */
151 static int (*xstat
) ();
153 /* The exit status to use if we don't get any fatal errors. */
154 static int exit_status
;
156 /* File name patterns to exclude. */
157 static struct exclude
*exclude
;
159 /* Grand total size of all args, in units of ST_NBLOCKSIZE-byte blocks. */
160 static uintmax_t tot_size
= 0;
162 /* For long options that have no equivalent short option, use a
163 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
166 EXCLUDE_OPTION
= CHAR_MAX
+ 1,
171 static struct option
const long_options
[] =
173 {"all", no_argument
, NULL
, 'a'},
174 {"block-size", required_argument
, 0, BLOCK_SIZE_OPTION
},
175 {"bytes", no_argument
, NULL
, 'b'},
176 {"count-links", no_argument
, NULL
, 'l'},
177 {"dereference", no_argument
, NULL
, 'L'},
178 {"dereference-args", no_argument
, NULL
, 'D'},
179 {"exclude", required_argument
, 0, EXCLUDE_OPTION
},
180 {"exclude-from", required_argument
, 0, 'X'},
181 {"human-readable", no_argument
, NULL
, 'h'},
182 {"si", no_argument
, 0, 'H'},
183 {"kilobytes", no_argument
, NULL
, 'k'},
184 {"max-depth", required_argument
, NULL
, MAX_DEPTH_OPTION
},
185 {"megabytes", no_argument
, NULL
, 'm'},
186 {"one-file-system", no_argument
, NULL
, 'x'},
187 {"separate-dirs", no_argument
, NULL
, 'S'},
188 {"summarize", no_argument
, NULL
, 's'},
189 {"total", no_argument
, NULL
, 'c'},
190 {GETOPT_HELP_OPTION_DECL
},
191 {GETOPT_VERSION_OPTION_DECL
},
199 fprintf (stderr
, _("Try `%s --help' for more information.\n"),
203 printf (_("Usage: %s [OPTION]... [FILE]...\n"), program_name
);
205 Summarize disk usage of each FILE, recursively for directories.\n\
207 -a, --all write counts for all files, not just directories\n\
208 --block-size=SIZE use SIZE-byte blocks\n\
209 -b, --bytes print size in bytes\n\
210 -c, --total produce a grand total\n\
211 -D, --dereference-args dereference PATHs when symbolic link\n\
212 -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\n\
213 -H, --si likewise, but use powers of 1000 not 1024\n\
214 -k, --kilobytes like --block-size=1024\n\
215 -l, --count-links count sizes many times if hard linked\n\
216 -L, --dereference dereference all symbolic links\n\
217 -m, --megabytes like --block-size=1048576\n\
218 -S, --separate-dirs do not include size of subdirectories\n\
219 -s, --summarize display only a total for each argument\n\
220 -x, --one-file-system skip directories on different filesystems\n\
221 -X FILE, --exclude-from=FILE Exclude files that match any pattern in FILE.\n\
222 --exclude=PAT Exclude files that match PAT.\n\
223 --max-depth=N print the total for a directory (or file, with --all)\n\
224 only if it is N or fewer levels below the command\n\
225 line argument; --max-depth=0 is the same as\n\
227 --help display this help and exit\n\
228 --version output version information and exit\n\
230 puts (_("\nReport bugs to <bug-fileutils@gnu.org>."));
235 /* Initialize string S1 to hold SIZE characters. */
238 str_init (String
**s1
, unsigned int size
)
242 s
= (String
*) xmalloc (sizeof (struct String
));
243 s
->text
= xmalloc (size
+ 1);
250 ensure_space (String
*s
, unsigned int size
)
254 s
->text
= xrealloc (s
->text
, size
+ 1);
259 /* Assign the null-terminated C-string CSTR to S1. */
262 str_copyc (String
*s1
, const char *cstr
)
264 unsigned l
= strlen (cstr
);
265 ensure_space (s1
, l
);
266 strcpy (s1
->text
, cstr
);
271 str_concatc (String
*s1
, const char *cstr
)
273 unsigned l1
= s1
->length
;
274 unsigned l2
= strlen (cstr
);
275 unsigned l
= l1
+ l2
;
277 ensure_space (s1
, l
);
278 strcpy (s1
->text
+ l1
, cstr
);
282 /* Truncate the string S1 to have length LENGTH. */
285 str_trunc (String
*s1
, unsigned int length
)
287 if (s1
->length
> length
)
289 s1
->text
[length
] = 0;
294 /* Print N_BLOCKS followed by STRING on a line. NBLOCKS is the number of
295 ST_NBLOCKSIZE-byte blocks; convert it to OUTPUT_BLOCK_SIZE units before
296 printing. If OUTPUT_BLOCK_SIZE is negative, use a human readable
300 print_size (uintmax_t n_blocks
, const char *string
)
302 char buf
[LONGEST_HUMAN_READABLE
+ 1];
304 human_readable_inexact (n_blocks
, buf
, ST_NBLOCKSIZE
,
305 output_block_size
, human_ceiling
),
310 /* Reset the hash structure in the global variable `htab' to
311 contain no entries. */
319 htab
->first_free_entry
= 0;
322 for (i
= htab
->modulus
; i
> 0; i
--)
326 /* Allocate space for the hash structures, and set the global
327 variable `htab' to point to it. The initial hash module is specified in
328 MODULUS, and the number of entries are specified in ENTRY_TAB_SIZE. (The
329 hash structure will be rebuilt when ENTRY_TAB_SIZE entries have been
330 inserted, and MODULUS and ENTRY_TAB_SIZE in the global `htab' will be
334 hash_init (unsigned int modulus
, unsigned int entry_tab_size
)
338 htab_r
= (struct htab
*)
339 xmalloc (sizeof (struct htab
) + sizeof (struct entry
*) * modulus
);
341 htab_r
->entry_tab
= (struct entry
*)
342 xmalloc (sizeof (struct entry
) * entry_tab_size
);
344 htab_r
->modulus
= modulus
;
345 htab_r
->entry_tab_size
= entry_tab_size
;
351 /* Insert INO and DEV in the hash structure HTAB, if not
352 already present. Return zero if inserted and nonzero if it
356 hash_insert2 (struct htab
*ht
, ino_t ino
, dev_t dev
)
358 struct entry
**hp
, *ep2
, *ep
;
359 hp
= &ht
->hash
[ino
% ht
->modulus
];
368 /* Search for an entry with the same data. */
372 if (ep
->ino
== ino
&& ep
->dev
== dev
)
373 return 1; /* Found an entry with the same data. */
378 /* Did not find it. */
382 ep
= *hp
= &ht
->entry_tab
[ht
->first_free_entry
++];
385 ep
->coll_link
= ep2
; /* `ep2' is NULL if no collision. */
390 /* Insert an item (inode INO and device DEV) in the hash
391 structure in the global variable `htab', if an entry with the same data
392 was not found already. Return zero if the item was inserted and nonzero
396 hash_insert (ino_t ino
, dev_t dev
)
398 struct htab
*htab_r
= htab
; /* Initially a copy of the global `htab'. */
400 if (htab_r
->first_free_entry
>= htab_r
->entry_tab_size
)
405 unsigned entry_tab_size
;
407 /* Increase the number of hash entries, and re-hash the data.
408 The method of shrimping and increasing is made to compactify
409 the heap. If twice as much data would be allocated
410 straightforwardly, we would never re-use a byte of memory. */
412 /* Let `htab' shrimp. Keep only the header, not the pointer vector. */
414 htab_r
= (struct htab
*)
415 xrealloc ((char *) htab_r
, sizeof (struct htab
));
417 modulus
= 2 * htab_r
->modulus
;
418 entry_tab_size
= 2 * htab_r
->entry_tab_size
;
420 /* Increase the number of possible entries. */
422 htab_r
->entry_tab
= (struct entry
*)
423 xrealloc ((char *) htab_r
->entry_tab
,
424 sizeof (struct entry
) * entry_tab_size
);
426 /* Increase the size of htab again. */
428 htab_r
= (struct htab
*)
429 xrealloc ((char *) htab_r
,
430 sizeof (struct htab
) + sizeof (struct entry
*) * modulus
);
432 htab_r
->modulus
= modulus
;
433 htab_r
->entry_tab_size
= entry_tab_size
;
436 i
= htab_r
->first_free_entry
;
438 /* Make the increased hash table empty. The entries are still
439 available in htab->entry_tab. */
443 /* Go through the entries and install them in the pointer vector
444 htab->hash. The items are actually inserted in htab->entry_tab at
445 the position where they already are. The htab->coll_link need
446 however be updated. Could be made a little more efficient. */
448 for (ep
= htab_r
->entry_tab
; i
> 0; i
--)
450 hash_insert2 (htab_r
, ep
->ino
, ep
->dev
);
455 return hash_insert2 (htab_r
, ino
, dev
);
458 /* Restore the previous working directory or exit.
459 If CWD is null, simply call `chdir ("..")'. Otherwise,
460 use CWD and free it. CURR_DIR_NAME is the name of the current directory
461 and is used solely in failure diagnostics. */
464 pop_dir (struct saved_cwd
*cwd
, const char *curr_dir_name
)
468 if (restore_cwd (cwd
, "..", curr_dir_name
))
472 else if (chdir ("..") < 0)
474 error (1, errno
, _("cannot change to `..' from directory %s"),
475 quote (curr_dir_name
));
479 /* Print (if appropriate) the size (in units determined by `output_block_size')
480 of file or directory ENT. Return the size of ENT in units of 512-byte
481 blocks. TOP is one for external calls, zero for recursive calls.
482 LAST_DEV is the device that the parent directory of ENT is on.
483 DEPTH is the number of levels (in hierarchy) down from a command
484 line argument. Don't print if DEPTH > max_depth.
485 An important invariant is that when this function returns, the current
486 working directory is the same as when it was called. */
489 count_entry (const char *ent
, int top
, dev_t last_dev
, int depth
)
492 struct stat stat_buf
;
494 if (((top
&& opt_dereference_arguments
)
495 ? stat (ent
, &stat_buf
)
496 : (*xstat
) (ent
, &stat_buf
)) < 0)
498 error (0, errno
, "%s", quote (path
->text
));
504 && stat_buf
.st_nlink
> 1
505 && hash_insert (stat_buf
.st_ino
, stat_buf
.st_dev
))
506 return 0; /* Have counted this already. */
508 size
= ST_NBLOCKS (stat_buf
);
511 if (S_ISDIR (stat_buf
.st_mode
))
517 struct saved_cwd
*cwd
;
518 struct saved_cwd cwd_buf
;
521 dir_dev
= stat_buf
.st_dev
;
523 /* Return `0' here, not SIZE, since the SIZE bytes
524 would reside in the new filesystem. */
525 if (opt_one_file_system
&& !top
&& last_dev
!= dir_dev
)
526 return 0; /* Don't enter a new file system. */
529 # define S_ISLNK(s) 0
531 /* If we're traversing more than one level, or if we're
532 dereferencing symlinks and we're about to chdir through a
533 symlink, remember the current directory so we can return to
534 it later. In other cases, chdir ("..") works fine.
535 Treat `.' and `..' like multi-level paths, since `chdir ("..")'
536 wont't restore the current working directory after a `chdir'
538 if (strchr (ent
, '/')
539 || DOT_OR_DOTDOT (ent
)
541 && lstat (ent
, &e_buf
) == 0
542 && S_ISLNK (e_buf
.st_mode
)))
544 if (save_cwd (&cwd_buf
))
553 error (0, errno
, _("cannot change to directory %s"),
558 /* Do return SIZE, here, since even though we can't chdir into ENT,
559 we *can* count the blocks used by its directory entry. */
560 return opt_separate_dirs
? 0 : size
;
563 name_space
= savedir (".", stat_buf
.st_size
);
564 if (name_space
== NULL
)
566 error (0, errno
, "%s", quote (path
->text
));
567 pop_dir (cwd
, path
->text
);
569 /* Do count the SIZE bytes. */
570 return opt_separate_dirs
? 0 : size
;
573 /* Remember the current path. */
575 str_concatc (path
, "/");
576 pathlen
= path
->length
;
578 for (namep
= name_space
; *namep
; namep
+= strlen (namep
) + 1)
580 if (!excluded_filename (exclude
, namep
, 0))
582 str_concatc (path
, namep
);
583 size
+= count_entry (namep
, 0, dir_dev
, depth
+ 1);
584 str_trunc (path
, pathlen
);
589 pop_dir (cwd
, path
->text
);
591 str_trunc (path
, pathlen
- 1); /* Remove the "/" we added. */
592 if (depth
<= max_depth
|| top
)
593 print_size (size
, path
->length
> 0 ? path
->text
: "/");
594 return opt_separate_dirs
? 0 : size
;
596 else if ((opt_all
&& depth
<= max_depth
) || top
)
598 /* FIXME: make this an option. */
599 int print_only_dir_size
= 0;
600 if (!print_only_dir_size
)
601 print_size (size
, path
->length
> 0 ? path
->text
: "/");
607 /* Recursively print the sizes of the directories (and, if selected, files)
608 named in FILES, the last entry of which is NULL. */
611 du_files (char **files
)
613 int i
; /* Index in FILES. */
615 for (i
= 0; files
[i
]; i
++)
622 /* Delete final slash in the argument, unless the slash is alone. */
623 s
= strlen (arg
) - 1;
629 str_copyc (path
, arg
);
631 else if (arg
[0] == '/')
632 str_trunc (path
, 0); /* Null path for root directory. */
634 str_copyc (path
, arg
);
639 count_entry (arg
, 1, 0, 0);
643 print_size (tot_size
, _("total"));
647 main (int argc
, char **argv
)
651 int max_depth_specified
= 0;
653 /* If nonzero, display only a total for each argument. */
654 int opt_summarize_only
= 0;
659 program_name
= argv
[0];
660 setlocale (LC_ALL
, "");
661 bindtextdomain (PACKAGE
, LOCALEDIR
);
662 textdomain (PACKAGE
);
664 atexit (close_stdout
);
666 exclude
= new_exclude ();
669 human_block_size (getenv ("DU_BLOCK_SIZE"), 0, &output_block_size
);
671 while ((c
= getopt_long (argc
, argv
, "abchHklmsxDLSX:", long_options
, NULL
))
677 case 0: /* Long option. */
685 output_block_size
= 1;
693 output_block_size
= -1024;
697 output_block_size
= -1000;
701 output_block_size
= 1024;
704 case MAX_DEPTH_OPTION
: /* --max-depth=N */
705 if (xstrtol (optarg
, NULL
, 0, &tmp_long
, NULL
) != LONGINT_OK
706 || tmp_long
< 0 || tmp_long
> INT_MAX
)
707 error (1, 0, _("invalid maximum depth %s"), quote (optarg
));
709 max_depth_specified
= 1;
710 max_depth
= (int) tmp_long
;
714 output_block_size
= 1024 * 1024;
722 opt_summarize_only
= 1;
726 opt_one_file_system
= 1;
730 opt_dereference_arguments
= 1;
738 opt_separate_dirs
= 1;
742 if (add_exclude_file (add_exclude
, exclude
, optarg
, '\n') != 0)
743 error (1, errno
, "%s", quote (optarg
));
747 add_exclude (exclude
, optarg
);
750 case BLOCK_SIZE_OPTION
:
751 human_block_size (optarg
, 1, &output_block_size
);
754 case_GETOPT_HELP_CHAR
;
756 case_GETOPT_VERSION_CHAR (PROGRAM_NAME
, AUTHORS
);
763 if (opt_all
&& opt_summarize_only
)
765 error (0, 0, _("cannot both summarize and show all entries"));
769 if (opt_summarize_only
&& max_depth_specified
&& max_depth
== 0)
772 _("warning: summarizing is the same as using --max-depth=0"));
775 if (opt_summarize_only
&& max_depth_specified
&& max_depth
!= 0)
778 _("warning: summarizing conflicts with --max-depth=%d"),
783 if (opt_summarize_only
)
786 /* Initialize the hash structure for inode numbers. */
787 hash_init (INITIAL_HASH_MODULE
, INITIAL_ENTRY_TAB_SIZE
);
789 str_init (&path
, INITIAL_PATH_SIZE
);
791 du_files (optind
== argc
? cwd_only
: argv
+ optind
);