*** empty log message ***
[coreutils.git] / src / shred.c
blob8ca8df388f14608f6a9f34d1d604f5a32f9b910f
1 /* TODO:
2 x use getopt_long
3 x use error, not pferror
4 - don't use pfstatus (at least vprintf isn't portable) -- or maybe
5 leave it in and see who complains
6 x bracket strings with _(...) for gettext
7 - use consistent non-capitalization in error messages
8 - add standard GNU copyleft comment
9 */
12 * shred.c - by Colin Plumb.
14 * Do a secure overwrite of given files or devices, so that not even
15 * very expensive hardware probing can recover the data.
17 * Although this process is also known as "wiping", I prefer the longer
18 * name both because I think it is more evocative of what is happening and
19 * because a longer name conveys a more appropriate sense of deliberateness.
21 * For the theory behind this, see "Secure Deletion of Data from Magnetic
22 * and Solid-State Memory", on line at
23 * http://www.cs.auckland.ac.nz/~pgut001/pubs/secure_del.html
25 * Just for the record, reversing one or two passes of disk overwrite
26 * is not terribly difficult with hardware help. Hook up a good-quality
27 * digitizing oscilloscope to the output of the head preamplifier and copy
28 * the high-res digitized data to a computer for some off-line analysis.
29 * Read the "current" data and average all the pulses together to get an
30 * "average" pulse on the disk. Subtract this average pulse from all of
31 * the actual pulses and you can clearly see the "echo" of the previous
32 * data on the disk.
34 * Real hard drives have to balance the cost of the media, the head,
35 * and the read circuitry. They use better-quality media than absolutely
36 * necessary to limit the cost of the read circuitry. By throwing that
37 * assumption out, and the assumption that you want the data processed
38 * as fast as the hard drive can spin, you can do better.
40 * If asked to wipe a file, this also deletes it, renaming it to in a
41 * clever way to try to leave no trace of the original filename.
43 * Copyright 1997, 1998 Colin Plumb <colin@nyx.net>. This program may
44 * be freely distributed under the terms of the GNU GPL, the BSD license,
45 * or Larry Wall's "Artistic License" Even if you use the BSD license,
46 * which does not require it, I'd really like to get improvements back.
48 * The ISAAC code still bears some resemblance to the code written
49 * by Bob Jenkins, but he permits pretty unlimited use.
51 * This was inspired by a desire to improve on some code titled:
52 * Wipe V1.0-- Overwrite and delete files. S. 2/3/96
53 * but I've rewritten everything here so completely that no trace of
54 * the original remains.
56 * Things to think about:
57 * - Security: Is there any risk to the race
58 * between overwriting and unlinking a file? Will it do anything
59 * drastically bad if told to attack a named pipes or a sockets?
63 #include <config.h>
64 #include <getopt.h>
65 #include <stdio.h>
66 #include <stdarg.h> /* Used by pferror */
68 #include "system.h"
69 #include "xstrtoul.h"
70 #include "closeout.h"
71 #include "error.h"
73 #define DEFAULT_PASSES 25 /* Default */
75 /* How often to update wiping display */
76 #define VERBOSE_UPDATE 100*1024
78 /* FIXME: add comments */
79 struct Options
81 int allow_devices;
82 int force;
83 unsigned int n_iterations;
84 int remove_file;
85 int verbose;
86 int exact;
87 int zero_fill;
90 /* If nonzero, display usage information and exit. */
91 static int show_help;
93 /* If nonzero, print the version on standard output and exit. */
94 static int show_version;
96 static struct option const long_opts[] =
98 {"device", no_argument, NULL, 'd'},
99 {"exact", required_argument, NULL, 'x'},
100 {"force", no_argument, NULL, 'f'},
101 {"iterations", required_argument, NULL, 'n'},
102 {"preserve", no_argument, NULL, 'p'},
103 {"verbose", no_argument, NULL, 'v'},
104 {"zero", required_argument, NULL, 'z'},
105 {"help", no_argument, &show_help, 1},
106 {"version", no_argument, &show_version, 1},
107 {NULL, 0, NULL, 0}
110 /* Global variable for error printing purposes */
111 char const *program_name;
113 void
114 usage (int status)
116 if (status != 0)
117 fprintf (stderr, _("Try `%s --help' for more information.\n"),
118 program_name);
119 else
121 printf (_("Usage: %s [OPTIONS] FILE [...]\n"), program_name);
122 printf (_("\
123 Delete a file securely, first overwriting it to hide its contents.\n\
125 -d, --device allow operation on devices (devices are never deleted)\n\
126 -f, --force change permissions to allow writing if necessary\n\
127 -n, --iterations=N Overwrite N times instead of the default (25)\n\
128 -p, --preserve do not delete file after overwriting\n\
129 -v, --verbose indicate progress (-vv to leave progress on screen)\n\
130 -x, --exact do not round file sizes up to the next full block\n\
131 -z, --zero add a final overwrite with zeros to hide shredding\n\
132 - shred standard input (but don't delete it);\n\
133 this will fail unless you use <>file, a safety feature\n\
134 --help display this help and exit\n\
135 --version print version information and exit\n\
137 FIXME maybe add more discussion here?\n\
138 "));
139 puts (_("\nReport bugs to <bug-fileutils@gnu.org>."));
140 close_stdout ();
142 exit (status);
145 #if ! HAVE_FDATASYNC
146 # define fdatasync(Fd) fsync (Fd)
147 #endif
150 * --------------------------------------------------------------------
151 * Bob Jenkins' cryptographic random number generator, ISAAC.
152 * Hacked by Colin Plumb.
154 * We need a source of random numbers for some of the overwrite data.
155 * Cryptographically secure is desirable, but it's not life-or-death
156 * so I can be a little bit experimental in the choice of RNGs here.
158 * This generator is based somewhat on RC4, but has analysis
159 * (http://ourworld.compuserve.com/homepages/bob_jenkins/randomnu.htm)
160 * pointing to it actually being better. I like it because it's nice
161 * and fast, and because the author did good work analyzing it.
162 * --------------------------------------------------------------------
165 #if ULONG_MAX == 0xffffffff
166 typedef unsigned long word32;
167 #else
168 # if UINT_MAX == 0xffffffff
169 typedef unsigned word32;
170 # else
171 # if USHRT_MAX == 0xffffffff
172 typedef unsigned short word32;
173 # else
174 # if UCHAR_MAX == 0xffffffff
175 typedef unsigned char word32;
176 # else
177 # error No 32-bit type available!
178 # endif
179 # endif
180 # endif
181 #endif
183 /* Size of the state tables to use. (You may change ISAAC_LOG) */
184 #define ISAAC_LOG 8
185 #define ISAAC_WORDS (1 << ISAAC_LOG)
186 #define ISAAC_BYTES (ISAAC_WORDS * sizeof (word32))
188 /* RNG state variables */
189 struct isaac_state
191 word32 mm[ISAAC_WORDS]; /* Main state array */
192 word32 iv[8]; /* Seeding initial vector */
193 word32 a, b, c; /* Extra index variables */
196 /* This index operation is more efficient on many processors */
197 #define ind(mm, x) *(unsigned *)((char *)(mm) + ( (x) & (ISAAC_WORDS-1)<<2 ))
200 * The central step. This uses two temporaries, x and y. mm is the
201 * whole state array, while m is a pointer to the current word. off is
202 * the offset from m to the word ISAAC_WORDS/2 words away in the mm array,
203 * i.e. +/- ISAAC_WORDS/2.
205 #define isaac_step(mix, a, b, mm, m, off, r) \
207 a = ((a) ^ (mix)) + (m)[off], \
208 x = *(m), \
209 *(m) = y = ind ((mm), x) + (a) + (b), \
210 *(r) = b = ind ((mm), (y) >> ISAAC_LOG) + x \
214 * Refill the entire R array, and update S.
216 static void
217 isaac_refill (struct isaac_state *s, word32 r[ISAAC_WORDS])
219 register word32 a, b; /* Caches of a and b */
220 register word32 x, y; /* Temps needed by isaac_step() macro */
221 register word32 *m = s->mm; /* Pointer into state array */
223 a = s->a;
224 b = s->b + (++s->c);
228 isaac_step (a << 13, a, b, s->mm, m, ISAAC_WORDS / 2, r);
229 isaac_step (a >> 6, a, b, s->mm, m + 1, ISAAC_WORDS / 2, r + 1);
230 isaac_step (a << 2, a, b, s->mm, m + 2, ISAAC_WORDS / 2, r + 2);
231 isaac_step (a >> 16, a, b, s->mm, m + 3, ISAAC_WORDS / 2, r + 3);
232 r += 4;
234 while ((m += 4) < s->mm + ISAAC_WORDS / 2);
237 isaac_step (a << 13, a, b, s->mm, m, -ISAAC_WORDS / 2, r);
238 isaac_step (a >> 6, a, b, s->mm, m + 1, -ISAAC_WORDS / 2, r + 1);
239 isaac_step (a << 2, a, b, s->mm, m + 2, -ISAAC_WORDS / 2, r + 2);
240 isaac_step (a >> 16, a, b, s->mm, m + 3, -ISAAC_WORDS / 2, r + 3);
241 r += 4;
243 while ((m += 4) < s->mm + ISAAC_WORDS);
244 s->a = a;
245 s->b = b;
249 * The basic seed-scrambling step for initialization, based on Bob
250 * Jenkins' 256-bit hash.
252 #define mix(a,b,c,d,e,f,g,h) \
253 ( a ^= b << 11, d += a, \
254 b += c, b ^= c >> 2, e += b, \
255 c += d, c ^= d << 8, f += c, \
256 d += e, d ^= e >> 16, g += d, \
257 e += f, e ^= f << 10, h += e, \
258 f += g, f ^= g >> 4, a += f, \
259 g += h, g ^= h << 8, b += g, \
260 h += a, h ^= a >> 9, c += h, \
261 a += b )
263 /* The basic ISAAC initialization pass. */
264 static void
265 isaac_mix (struct isaac_state *s, word32 const seed[ISAAC_WORDS])
267 int i;
268 word32 a = s->iv[0];
269 word32 b = s->iv[1];
270 word32 c = s->iv[2];
271 word32 d = s->iv[3];
272 word32 e = s->iv[4];
273 word32 f = s->iv[5];
274 word32 g = s->iv[6];
275 word32 h = s->iv[7];
277 for (i = 0; i < ISAAC_WORDS; i += 8)
279 a += seed[i];
280 b += seed[i + 1];
281 c += seed[i + 2];
282 d += seed[i + 3];
283 e += seed[i + 4];
284 f += seed[i + 5];
285 g += seed[i + 6];
286 h += seed[i + 7];
288 mix (a, b, c, d, e, f, g, h);
290 s->mm[i] = a;
291 s->mm[i + 1] = b;
292 s->mm[i + 2] = c;
293 s->mm[i + 3] = d;
294 s->mm[i + 4] = e;
295 s->mm[i + 5] = f;
296 s->mm[i + 6] = g;
297 s->mm[i + 7] = h;
300 s->iv[0] = a;
301 s->iv[1] = b;
302 s->iv[2] = c;
303 s->iv[3] = d;
304 s->iv[4] = e;
305 s->iv[5] = f;
306 s->iv[6] = g;
307 s->iv[7] = h;
311 * Initialize the ISAAC RNG with the given seed material.
312 * Its size MUST be a multiple of ISAAC_BYTES, and may be
313 * stored in the s->mm array.
315 * This is a generalization of the original ISAAC initialization code
316 * to support larger seed sizes. For seed sizes of 0 and ISAAC_BYTES,
317 * it is identical.
319 static void
320 isaac_init (struct isaac_state *s, word32 const *seed, size_t seedsize)
322 static word32 const iv[8] =
324 0x1367df5a, 0x95d90059, 0xc3163e4b, 0x0f421ad8,
325 0xd92a4a78, 0xa51a3c49, 0xc4efea1b, 0x30609119};
326 int i;
328 #if 0
329 /* The initialization of iv is a precomputed form of: */
330 for (i = 0; i < 7; i++)
331 iv[i] = 0x9e3779b9; /* the golden ratio */
332 for (i = 0; i < 4; ++i) /* scramble it */
333 mix (iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7]);
334 #endif
335 s->a = s->b = s->c = 0;
337 for (i = 0; i < 8; i++)
338 s->iv[i] = iv[i];
340 if (seedsize)
342 /* First pass (as in reference ISAAC code) */
343 isaac_mix (s, seed);
344 /* Second and subsequent passes (extension to ISAAC) */
345 while (seedsize -= ISAAC_BYTES)
347 seed += ISAAC_WORDS;
348 for (i = 0; i < ISAAC_WORDS; i++)
349 s->mm[i] += seed[i];
350 isaac_mix (s, s->mm);
353 else
355 /* The no seed case (as in reference ISAAC code) */
356 for (i = 0; i < ISAAC_WORDS; i++)
357 s->mm[i] = 0;
360 /* Final pass */
361 isaac_mix (s, s->mm);
365 * Get seed material. 16 bytes (128 bits) is plenty, but if we have
366 * /dev/urandom, we get 32 bytes = 256 bits for complete overkill.
368 static void
369 isaac_seed (struct isaac_state *s)
371 s->mm[0] = getpid ();
372 s->mm[1] = getppid ();
375 #ifdef HAVE_CLOCK_GETTIME /* POSIX ns-resolution */
376 struct timespec ts;
377 clock_gettime (CLOCK_REALTIME, &ts);
378 s->mm[2] = ts.tv_sec;
379 s->mm[3] = ts.tv_nsec;
380 #else
381 struct timeval tv;
382 gettimeofday (&tv, (struct timezone *) 0);
383 s->mm[2] = tv.tv_sec;
384 s->mm[3] = tv.tv_usec;
385 #endif
389 int fd = open ("/dev/urandom", O_RDONLY);
390 if (fd >= 0)
392 read (fd, (char *) (s->mm + 4), 32);
393 close (fd);
395 else
397 fd = open ("/dev/random", O_RDONLY | O_NONBLOCK);
398 if (fd >= 0)
400 /* /dev/random is more precious, so use less */
401 read (fd, (char *) (s->mm + 4), 16);
402 close (fd);
407 isaac_init (s, s->mm, sizeof (s->mm));
411 * Read up to "size" bytes from the given fd and use them as additional
412 * ISAAC seed material. Returns the number of bytes actually read.
414 static off_t
415 isaac_seedfd (struct isaac_state *s, int fd, off_t size)
417 off_t sizeleft = size;
418 size_t lim, soff;
419 ssize_t ssize;
420 int i;
421 word32 seed[ISAAC_WORDS];
423 while (sizeleft)
425 lim = sizeof (seed);
426 if ((off_t) lim > sizeleft)
427 lim = (size_t) sizeleft;
428 soff = 0;
431 ssize = read (fd, (char *) seed + soff, lim - soff);
433 while (ssize > 0 && (soff += (size_t) ssize) < lim);
434 /* Mix in what was read */
435 if (soff)
437 /* Garbage after the sofff position is harmless */
438 for (i = 0; i < ISAAC_WORDS; i++)
439 s->mm[i] += seed[i];
440 isaac_mix (s, s->mm);
441 sizeleft -= soff;
443 if (ssize <= 0)
444 break;
446 /* Wipe the copy of the file in "seed" */
447 memset (seed, 0, sizeof (seed));
449 /* Final mix, as in isaac_init */
450 isaac_mix (s, s->mm);
451 return size - sizeleft;
454 /* Single-word RNG built on top of ISAAC */
455 struct irand_state
457 word32 r[ISAAC_WORDS];
458 unsigned numleft;
459 struct isaac_state *s;
462 static void
463 irand_init (struct irand_state *r, struct isaac_state *s)
465 r->numleft = 0;
466 r->s = s;
470 * We take from the end of the block deliberately, so if we need
471 * only a small number of values, we choose the final ones which are
472 * marginally better mixed than the initial ones.
474 static word32
475 irand32 (struct irand_state *r)
477 if (!r->numleft)
479 isaac_refill (r->s, r->r);
480 r->numleft = ISAAC_WORDS;
482 return r->r[--r->numleft];
486 * Return a uniformly distributed random number between 0 and n,
487 * inclusive. Thus, the result is modulo n+1.
489 * Theory of operation: as x steps through every possible 32-bit number,
490 * x % n takes each value at least 2^32 / n times (rounded down), but
491 * the values less than 2^32 % n are taken one additional time. Thus,
492 * x % n is not perfectly uniform. To fix this, the values of x less
493 * than 2^32 % n are disallowed, and if the RNG produces one, we ask
494 * for a new value.
496 static word32
497 irand_mod (struct irand_state *r, word32 n)
499 word32 x;
500 word32 lim;
502 if (!++n)
503 return irand32 (r);
505 lim = -n % n; /* == (2**32-n) % n == 2**32 % n */
508 x = irand32 (r);
510 while (x < lim);
511 return x % n;
515 * Like perror() but fancier. (And fmt is not allowed to be NULL)
516 * This apparent use of features specific to GNU C is actually portable;
517 * see the definitions in error.h.
519 static void pfstatus (char const *,...)
520 __attribute__ ((__format__ (__printf__, 1, 2)));
523 * Maintain a status line on stdout. This is done by using CR and
524 * overprinting a new line when it changes, padding with trailing blanks
525 * as needed to hide all of the previous line. (Assuming that the return
526 * value of printf is an accurate width.)
527 FIXME: we can't assume that
529 static int status_visible = 0; /* Number of visible characters */
530 static int status_pos = 0; /* Current position, including padding */
532 /* Print a new status line, overwriting the previous one. */
533 static void
534 pfstatus (char const *fmt,...)
536 int new; /* New status_visible value */
537 va_list ap;
539 /* If we weren't at beginning, go there. */
540 if (status_pos)
541 putchar ('\r');
542 va_start (ap, fmt);
543 new = vprintf (fmt, ap);
544 va_end (ap);
545 if (new >=0)
547 status_pos = new;
548 while (status_pos < status_visible)
550 putchar (' ');
551 status_pos++;
553 status_visible = new;
555 fflush (stdout);
558 /* Leave current status (if any) visible and go to the next free line. */
559 static void
560 flushstatus (void)
562 if (status_visible)
564 putchar ('\n'); /* Leave line visible */
565 fflush (stdout);
566 status_visible = status_pos = 0;
568 else if (status_pos)
570 putchar ('\r'); /* Go back to beginning of line */
571 fflush (stdout);
572 status_pos = 0;
577 * Get the size of a file that doesn't want to cooperate (such as a
578 * device) by doing a binary search for the last readable byte. The size
579 * of the file is the least offset at which it is not possible to read
580 * a byte.
582 * This is also a nice example of using loop invariants to correctly
583 * implement an algorithm that is potentially full of fencepost errors.
584 * We assume that if it is possible to read a byte at offset x, it is
585 * also possible at all offsets <= x.
587 static off_t
588 sizefd (int fd)
590 off_t hi, lo, mid;
591 char c; /* One-byte buffer for dummy reads */
593 /* Binary doubling upwards to find the right range */
594 lo = 0;
595 hi = 0; /* Any number, preferably 2^x-1, is okay here. */
598 * Loop invariant: we have verified that it is possible to read a
599 * byte at all offsets < lo. Probe at offset hi >= lo until it
600 * is not possible to read a byte at that offset, establishing
601 * the loop invariant for the following loop.
603 for (;;)
605 if (lseek (fd, hi, SEEK_SET) == (off_t) -1
606 || read (fd, &c, 1) < 1)
607 break;
608 lo = hi + 1; /* This preserves the loop invariant. */
609 hi += lo; /* Exponential doubling. */
612 * Binary search to find the exact endpoint.
613 * Loop invariant: it is not possible to read a byte at hi,
614 * but it is possible at all offsets < lo. Thus, the
615 * offset we seek is between lo and hi inclusive.
617 while (hi > lo)
619 mid = (hi + lo) / 2; /* Rounded down, so lo <= mid < hi */
620 if (lseek (fd, mid, SEEK_SET) == (off_t) -1
621 || read (fd, &c, 1) < 1)
622 hi = mid; /* mid < hi, so this makes progress */
623 else
624 lo = mid + 1; /* Because mid < hi, lo <= hi */
626 /* lo == hi, so we have an exact answer */
627 return hi;
631 * Fill a buffer with a fixed pattern.
633 * The buffer must be at least 3 bytes long, even if
634 * size is less. Larger sizes are filled exactly.
636 static void
637 fillpattern (int type, unsigned char *r, size_t size)
639 size_t i;
640 unsigned bits = type & 0xfff;
642 bits |= bits << 12;
643 ((unsigned char *) r)[0] = (bits >> 4) & 255;
644 ((unsigned char *) r)[1] = (bits >> 8) & 255;
645 ((unsigned char *) r)[2] = bits & 255;
646 for (i = 3; i < size / 2; i *= 2)
647 memcpy ((char *) r + i, (char *) r, i);
648 if (i < size)
649 memcpy ((char *) r + i, (char *) r, size - i);
651 /* Invert the first bit of every 512-byte sector. */
652 if (type & 0x1000)
653 for (i = 0; i < size; i += 512)
654 r[i] ^= 0x80;
658 * Fill a buffer with random data.
659 * size is rounded UP to a multiple of ISAAC_BYTES.
661 static void
662 fillrand (struct isaac_state *s, word32 *r, size_t size)
664 size = (size + ISAAC_BYTES - 1) / ISAAC_BYTES;
666 while (size--)
668 isaac_refill (s, r);
669 r += ISAAC_WORDS;
673 /* Generate a 6-character (+ nul) pass name string */
674 #define PASS_NAME_SIZE 7
675 static void
676 passname (unsigned char const *data, char name[PASS_NAME_SIZE])
678 if (data)
679 sprintf (name, "%02x%02x%02x", data[0], data[1], data[2]);
680 else
681 memcpy (name, "random", PASS_NAME_SIZE);
685 * Do pass number k of n, writing "size" bytes of the given pattern "type"
686 * to the file descriptor fd. Name, k and n are passed in only for verbose
687 * progress message purposes. If n == 0, no progress messages are printed.
689 static int
690 dopass (int fd, char const *name, off_t size, int type,
691 struct isaac_state *s, unsigned long k, unsigned long n)
693 off_t cursize; /* Amount of file remaining to wipe (counts down) */
694 off_t thresh; /* cursize at which next status update is printed */
695 size_t lim; /* Amount of data to try writing */
696 size_t soff; /* Offset into buffer for next write */
697 ssize_t ssize; /* Return value from write() */
698 #if ISAAC_WORDS > 1024
699 word32 r[ISAAC_WORDS * 3]; /* Multiple of 4K and of pattern size */
700 #else
701 word32 r[1024 * 3]; /* Multiple of 4K and of pattern size */
702 #endif
703 char pass_string[PASS_NAME_SIZE]; /* Name of current pass */
705 if (lseek (fd, 0, SEEK_SET) < 0)
707 error (0, 0, _("Error seeking `%s'"), name);
708 return -1;
711 /* Constant fill patterns need only be set up once. */
712 if (type >= 0)
714 lim = sizeof (r);
715 if ((off_t) lim > size)
717 lim = (size_t) size;
719 fillpattern (type, (unsigned char *) r, lim);
720 passname ((unsigned char *) r, pass_string);
722 else
724 passname (0, pass_string);
727 /* Set position if first status update */
728 thresh = 0;
729 if (n)
731 pfstatus (_("%s: pass %lu/%lu (%s)...)"), name, k, n, pass_string);
732 if (size > VERBOSE_UPDATE)
733 thresh = size - VERBOSE_UPDATE;
736 for (cursize = size; cursize;)
738 /* How much to write this time? */
739 lim = sizeof (r);
740 if ((off_t) lim > cursize)
741 lim = (size_t) cursize;
742 if (type < 0)
743 fillrand (s, r, lim);
744 /* Loop to retry partial writes. */
745 for (soff = 0; soff < lim; soff += ssize)
747 ssize = write (fd, (char *) r + soff, lim - soff);
748 if (ssize < 0)
750 int e = errno;
751 error (0, 0, _("Error writing `%s' at %lu"),
752 name, size - cursize + soff);
753 /* FIXME: this is slightly fragile in that some systems
754 may fail with a different errno. */
755 /* This error confuses people. */
756 if (e == EBADF && fd == 0)
757 fputs (_("(Did you remember to open stdin read/write with \"<>file\"?)\n"),
758 stderr);
759 return -1;
763 /* Okay, we have written "lim" bytes. */
764 cursize -= lim;
766 /* Time to print progress? */
767 if (cursize <= thresh && n)
769 pfstatus (_("%s: pass %lu/%lu (%s)...%lu/%lu K"),
770 name, k, n, pass_string,
771 (size - cursize + 1023) / 1024, (size + 1023) / 1024);
772 if (thresh > VERBOSE_UPDATE)
773 thresh -= VERBOSE_UPDATE;
774 else
775 thresh = 0;
778 /* Force what we just wrote to hit the media. */
779 if (fdatasync (fd) < 0)
781 error (0, 0, _("Error syncing `%s'"), name);
782 return -1;
784 return 0;
788 * The passes start and end with a random pass, and the passes in between
789 * are done in random order. The idea is to deprive someone trying to
790 * reverse the process of knowledge of the overwrite patterns, so they
791 * have the additional step of figuring out what was done to the disk
792 * before they can try to reverse or cancel it.
794 * First, all possible 1-bit patterns. There are two of them.
795 * Then, all possible 2-bit patterns. There are four, but the two
796 * which are also 1-bit patterns can be omitted.
797 * Then, all possible 3-bit patterns. Again, 8-2 = 6.
798 * Then, all possible 4-bit patterns. 16-4 = 12.
800 * The basic passes are:
801 * 1-bit: 0x000, 0xFFF
802 * 2-bit: 0x555, 0xAAA
803 * 3-bit: 0x249, 0x492, 0x924, 0x6DB, 0xB6D, 0xDB6 (+ 1-bit)
804 * 100100100100 110110110110
805 * 9 2 4 D B 6
806 * 4-bit: 0x111, 0x222, 0x333, 0x444, 0x666, 0x777,
807 * 0x888, 0x999, 0xBBB, 0xCCC, 0xDDD, 0xEEE (+ 1-bit, 2-bit)
808 * Adding three random passes at the beginning, middle and end
809 * produces the default 25-pass structure.
811 * The next extension would be to 5-bit and 6-bit patterns.
812 * There are 30 uncovered 5-bit patterns and 64-8-2 = 46 uncovered
813 * 6-bit patterns, so they would increase the time required
814 * significantly. 4-bit patterns are enough for most purposes.
816 * The main gotcha is that this would require a trickier encoding,
817 * since lcm(2,3,4) = 12 bits is easy to fit into an int, but
818 * lcm(2,3,4,5) = 60 bits is not.
820 * One extension that is included is to complement the first bit in each
821 * 512-byte block, to alter the phase of the encoded data in the more
822 * complex encodings. This doesn't apply to MFM, so the 1-bit patterns
823 * are considered part of the 3-bit ones and the 2-bit patterns are
824 * considered part of the 4-bit patterns.
827 * How does the generalization to variable numbers of passes work?
829 * Here's how...
830 * Have an ordered list of groups of passes. Each group is a set.
831 * Take as many groups as will fit, plus a random subset of the
832 * last partial group, and place them into the passes list.
833 * Then shuffle the passes list into random order and use that.
835 * One extra detail: if we can't include a large enough fraction of the
836 * last group to be interesting, then just substitute random passes.
838 * If you want more passes than the entire list of groups can
839 * provide, just start repeating from the beginning of the list.
841 static int const
842 patterns[] =
844 -2, /* 2 random passes */
845 2, 0x000, 0xFFF, /* 1-bit */
846 2, 0x555, 0xAAA, /* 2-bit */
847 -1, /* 1 random pass */
848 6, 0x249, 0x492, 0x6DB, 0x924, 0xB6D, 0xDB6, /* 3-bit */
849 12, 0x111, 0x222, 0x333, 0x444, 0x666, 0x777,
850 0x888, 0x999, 0xBBB, 0xCCC, 0xDDD, 0xEEE, /* 4-bit */
851 -1, /* 1 random pass */
852 /* The following patterns have the frst bit per block flipped */
853 8, 0x1000, 0x1249, 0x1492, 0x16DB, 0x1924, 0x1B6D, 0x1DB6, 0x1FFF,
854 14, 0x1111, 0x1222, 0x1333, 0x1444, 0x1555, 0x1666, 0x1777,
855 0x1888, 0x1999, 0x1AAA, 0x1BBB, 0x1CCC, 0x1DDD, 0x1EEE,
856 -1, /* 1 random pass */
857 0 /* End */
861 * Generate a random wiping pass pattern with num passes.
862 * This is a two-stage process. First, the passes to include
863 * are chosen, and then they are shuffled into the desired
864 * order.
866 static void
867 genpattern (int *dest, size_t num, struct isaac_state *s)
869 struct irand_state r;
870 size_t randpasses;
871 int const *p;
872 int *d;
873 size_t n;
874 size_t accum, top, swap;
875 int k;
877 if (!num)
878 return;
880 irand_init (&r, s);
882 /* Stage 1: choose the passes to use */
883 p = patterns;
884 randpasses = 0;
885 d = dest; /* Destination for generated pass list */
886 n = num; /* Passes remaining to fill */
888 for (;;)
890 k = *p++; /* Block descriptor word */
891 if (!k)
892 { /* Loop back to the beginning */
893 p = patterns;
895 else if (k < 0)
896 { /* -k random passes */
897 k = -k;
898 if ((size_t) k >= n)
900 randpasses += n;
901 n = 0;
902 break;
904 randpasses += k;
905 n -= k;
907 else if ((size_t) k <= n)
908 { /* Full block of patterns */
909 memcpy (d, p, k * sizeof (int));
910 p += k;
911 d += k;
912 n -= k;
914 else if (n < 2 || 3 * n < (size_t) k)
915 { /* Finish with random */
916 randpasses += n;
917 break;
919 else
920 { /* Pad out with k of the n available */
923 if (n == (size_t) k-- || irand_mod (&r, k) < n)
925 *d++ = *p;
926 n--;
928 p++;
930 while (n);
931 break;
934 top = num - randpasses; /* Top of initialized data */
936 /* assert(d == dest+top); */
939 * We now have fixed patterns in the dest buffer up to
940 * "top", and we need to scramble them, with "randpasses"
941 * random passes evenly spaced among them.
943 * We want one at the beginning, one at the end, and
944 * evenly spaced in between. To do this, we basically
945 * use Bresenham's line draw (a.k.a DDA) algorithm
946 * to draw a line with slope (randpasses-1)/(num-1).
947 * (We use a positive accumulator and count down to
948 * do this.)
950 * So for each desired output value, we do the following:
951 * - If it should be a random pass, copy the pass type
952 * to top++, out of the way of the other passes, and
953 * set the current pass to -1 (random).
954 * - If it should be a normal pattern pass, choose an
955 * entry at random between here and top-1 (inclusive)
956 * and swap the current entry with that one.
959 randpasses--; /* To speed up later math */
960 accum = randpasses; /* Bresenham DDA accumulator */
961 for (n = 0; n < num; n++)
963 if (accum <= randpasses)
965 accum += num - 1;
966 dest[top++] = dest[n];
967 dest[n] = -1;
969 else
971 swap = n + irand_mod (&r, top - n - 1);
972 k = dest[n];
973 dest[n] = dest[swap];
974 dest[swap] = k;
976 accum -= randpasses;
978 /* assert(top == num); */
980 memset (&r, 0, sizeof (r)); /* Wipe this on general principles */
984 * The core routine to actually do the work. This overwrites the first
985 * size bytes of the given fd. Returns -1 on error, 0 on success with
986 * regular files, and 1 on success with non-regular files.
988 static int
989 wipefd (int fd, char const *name, struct isaac_state *s,
990 size_t passes, struct Options const *flags)
992 size_t i;
993 struct stat st;
994 off_t size, seedsize; /* Size to write, size to read */
995 unsigned long n; /* Number of passes for printing purposes */
996 int *passarray;
998 if (!passes)
999 passes = DEFAULT_PASSES;
1001 n = 0; /* dopass takes n -- 0 to mean "don't print progress" */
1002 if (flags->verbose)
1003 n = passes + ((flags->zero_fill) != 0);
1005 if (fstat (fd, &st))
1007 error (0, 0, _("Can't fstat file `%s'"), name);
1008 return -1;
1011 /* Check for devices */
1012 if (!S_ISREG (st.st_mode) && !(flags->allow_devices))
1014 error (0, 0,
1015 _("`%s' is not a regular file: use -d to enable operations on devices"),
1016 name);
1017 return -1;
1020 /* Allocate pass array */
1021 passarray = malloc (passes * sizeof (int));
1022 if (!passarray)
1024 error (0, 0, _("unable to allocate storage for %lu passes"),
1025 (unsigned long) passes);
1026 return -1;
1029 seedsize = size = st.st_size;
1030 if (!size)
1032 /* Reluctant to talk? Apply thumbscrews. */
1033 seedsize = size = sizefd (fd);
1035 else if (st.st_blksize && !(flags->exact))
1037 /* Round up to the next st_blksize to include "slack" */
1038 size += st.st_blksize - 1 - (size - 1) % st.st_blksize;
1042 * Use the file itself as seed material. Avoid wasting "lots"
1043 * of time (>10% of the write time) reading "large" (>16K)
1044 * files for seed material if there aren't many passes.
1046 * Note that "seedsize*passes/10" risks overflow, while
1047 * "seedsize/10*passes is slightly inaccurate. The hack
1048 * here manages perfection with no overflow.
1050 if (passes < 10 && seedsize > 16384)
1052 seedsize -= 16384;
1053 seedsize = seedsize / 10 * passes + seedsize % 10 * passes / 10;
1054 seedsize += 16384;
1056 (void) isaac_seedfd (s, fd, seedsize);
1058 /* Schedule the passes in random order. */
1059 genpattern (passarray, passes, s);
1061 /* Do the work */
1062 for (i = 0; i < passes; i++)
1064 if (dopass (fd, name, size, passarray[i], s, i + 1, n) < 0)
1066 memset (passarray, 0, passes * sizeof (int));
1067 free (passarray);
1068 return -1;
1070 if (flags->verbose > 1)
1071 flushstatus ();
1074 memset (passarray, 0, passes * sizeof (int));
1075 free (passarray);
1077 if (flags->zero_fill)
1078 if (dopass (fd, name, size, 0, s, passes + 1, n) < 0)
1079 return -1;
1081 return !S_ISREG (st.st_mode);
1084 /* Characters allowed in a file name - a safe universal set. */
1085 static char const nameset[] =
1086 "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_+=%@#.";
1089 * This increments the name, considering it as a big-endian base-N number
1090 * with the digits taken from nameset. Characters not in the nameset
1091 * are considered to come before nameset[0].
1093 * It's not obvious, but this will explode if name[0..len-1] contains
1094 * any 0 bytes.
1096 * This returns the carry (1 on overflow).
1098 static int
1099 incname (char *name, unsigned len)
1101 char const *p;
1103 if (!len)
1104 return -1;
1106 p = strchr (nameset, name[--len]);
1107 /* If the character is not found, replace it with a 0 digit */
1108 if (!p)
1110 name[len] = nameset[0];
1111 return 0;
1113 /* If this character has a successor, use it */
1114 if (p[1])
1116 name[len] = p[1];
1117 return 0;
1119 /* Otherwise, set this digit to 0 and increment the prefix */
1120 name[len] = nameset[0];
1121 return incname (name, len);
1125 * Repeatedly rename a file with shorter and shorter names,
1126 * to obliterate all traces of the file name on any system that
1127 * adds a trailing delimiter to on-disk file names and reuses
1128 * the same directory slot. Finally, delete it.
1129 * The passed-in filename is modified in place to the new filename.
1130 * (Which is deleted if this function succeeds, but is still present if
1131 * it fails for some reason.)
1133 * The main loop is written carefully to not get stuck if all possible
1134 * names of a given length are occupied. It counts down the length from
1135 * the original to 0. While the length is non-zero, it tries to find an
1136 * unused file name of the given length. It continues until either the
1137 * name is available and the rename succeeds, or it runs out of names
1138 * to try (incname() wraps and returns 1). Finally, it deletes the file.
1140 * Note that rename() and remove() are both in the ANSI C standard,
1141 * so that part, at least, is NOT Unix-specific.
1143 * FIXME: update this comment.
1144 * To force the directory data out, we try to open() the directory and
1145 * invoke fdatasync() on it. This is rather non-standard, so we don't
1146 * insist that it works, just fall back to a global sync() in that case.
1147 * Unfortunately, this code is Unix-specific.
1150 wipename (char *oldname, struct Options const *flags)
1152 char *newname, *origname = 0;
1153 char *base; /* Pointer to filename component, after directories. */
1154 unsigned len;
1155 int err;
1156 int dir_fd; /* Try to open directory to sync *it* */
1158 if (flags->verbose)
1159 pfstatus (_("%s: deleting"), oldname);
1161 newname = strdup (oldname); /* This is a malloc */
1162 if (!newname)
1164 error (0, 0, _("malloc failed"));
1165 return -1;
1167 if (flags->verbose)
1169 origname = strdup (oldname);
1170 if (!origname)
1172 error (0, 0, _("malloc failed"));
1173 free (newname);
1174 return -1;
1178 /* Find the file name portion */
1179 base = strrchr (newname, '/');
1180 /* Temporary hackery to get a directory fd */
1181 if (base)
1183 *base = '\0';
1184 dir_fd = open (newname, O_RDONLY);
1185 *base = '/';
1187 else
1189 dir_fd = open (".", O_RDONLY);
1191 base = base ? base + 1 : newname;
1192 len = strlen (base);
1194 while (len)
1196 memset (base, nameset[0], len);
1197 base[len] = 0;
1200 if (access (newname, F_OK) < 0
1201 && !rename (oldname, newname))
1203 if (dir_fd < 0 || fdatasync (dir_fd) < 0)
1204 sync (); /* Force directory out */
1205 if (origname)
1207 /* The use of origname (rather than oldname) here is
1208 deliberate. It makes the -v output more intelligible
1209 at the expense of making the `renamed to ...' messages
1210 use the logical (original) file name. */
1211 pfstatus (_("%s: renamed to `%s'"), origname, newname);
1212 if (flags->verbose > 1)
1213 flushstatus ();
1215 memcpy (oldname + (base - newname), newname, len + 1);
1216 break;
1219 while (!incname (base, len));
1220 len--;
1222 free (newname);
1223 err = remove (oldname);
1224 if (dir_fd < 0 || fdatasync (dir_fd) < 0)
1225 sync ();
1226 close (dir_fd);
1227 if (origname)
1229 if (!err && flags->verbose)
1230 pfstatus (_("%s: deleted"), origname);
1231 free (origname);
1233 return err;
1237 * Finally, the function that actually takes a filename and grinds
1238 * it into hamburger. Returns 1 if it was not a regular file.
1240 * FIXME
1241 * Detail to note: since we do not restore errno to EACCES after
1242 * a failed chmod, we end up printing the error code from the chmod.
1243 * This is probably either EACCES again or EPERM, which both give
1244 * reasonable error messages. But it might be better to change that.
1246 static int
1247 wipefile (char *name, struct isaac_state *s, size_t passes,
1248 struct Options const *flags)
1250 int err, fd;
1252 fd = open (name, O_RDWR);
1253 if (fd < 0 && errno == EACCES && flags->force)
1255 if (chmod (name, 0600) >= 0)
1256 fd = open (name, O_RDWR);
1258 if (fd < 0)
1260 error (0, 0, _("Unable to open `%s'"), name);
1261 return -1;
1264 err = wipefd (fd, name, s, passes, flags);
1265 close (fd);
1267 * Wipe the name and unlink - regular files only, no devices!
1268 * (wipefd returns 1 for non-regular files.)
1270 if (err == 0 && flags->remove_file)
1272 err = wipename (name, flags);
1273 if (err < 0)
1274 error (0, 0, _("Unable to delete file `%s'"), name);
1276 return err;
1280 main (int argc, char **argv)
1282 struct isaac_state s;
1283 int err = 0;
1284 struct Options flags;
1285 unsigned long n_passes = 0;
1286 char **file;
1287 int n_files;
1288 int c;
1289 int i;
1291 program_name = argv[0];
1292 setlocale (LC_ALL, "");
1293 bindtextdomain (PACKAGE, LOCALEDIR);
1294 textdomain (PACKAGE);
1296 isaac_seed (&s);
1298 memset (&flags, 0, sizeof flags);
1300 /* By default, remove each file after sanitization. */
1301 flags.remove_file = 1;
1303 while ((c = getopt_long (argc, argv, "dfn:pvxz", long_opts, NULL)) != -1)
1305 switch (c)
1307 case 0:
1308 break;
1310 case 'd':
1311 flags.allow_devices = 1;
1312 break;
1314 case 'f':
1315 flags.force = 1;
1316 break;
1318 case 'n':
1320 unsigned long int tmp_ulong;
1321 if (xstrtoul (optarg, NULL, 10, &tmp_ulong, NULL) != LONGINT_OK
1322 || (word32) tmp_ulong != tmp_ulong
1323 || ((size_t) (tmp_ulong * sizeof (int)) / sizeof (int)
1324 != tmp_ulong))
1326 error (1, 0, _("invalid number of passes: %s"), optarg);
1328 n_passes = tmp_ulong;
1330 break;
1332 case 'p':
1333 flags.remove_file = 0;
1334 break;
1336 case 'v':
1337 flags.verbose++;
1338 break;
1340 case 'x':
1341 flags.exact = 1;
1342 break;
1344 case 'z':
1345 flags.zero_fill = 1;
1346 break;
1348 default:
1349 usage (1);
1353 if (show_version)
1355 printf ("shred (%s) %s\n", GNU_PACKAGE, VERSION);
1356 close_stdout ();
1357 exit (0);
1360 if (show_help)
1361 usage (0);
1363 file = argv + optind;
1364 n_files = argc - optind;
1366 if (n_files == 0)
1368 error (0, 0, _("missing file argument"));
1369 usage (1);
1372 for (i = 0; i < n_files; i++)
1374 if (STREQ (file[i], "-"))
1376 if (wipefd (0, file[i], &s, (size_t) n_passes, &flags) < 0)
1377 err = 1;
1379 else
1381 /* Plain filename - Note that this overwrites *argv! */
1382 if (wipefile (file[i], &s, (size_t) n_passes, &flags) < 0)
1383 err = 1;
1385 flushstatus ();
1388 /* Just on general principles, wipe s. */
1389 memset (&s, 0, sizeof (s));
1391 close_stdout ();
1393 exit (err);