src/shred.c

   1 /* TODO:
   2    x use getopt_long
   3    x use error, not pferror
   4    - don't use pfstatus (at least vprintf isn't portable) -- or maybe
   5        leave it in and see who complains
   6    x bracket strings with _(...) for gettext
   7    - use consistent non-capitalization in error messages
   8    - add standard GNU copyleft comment
   9  */
  10
  11 /*
  12  * shred.c - by Colin Plumb.
  13  *
  14  * Do a secure overwrite of given files or devices, so that not even
  15  * very expensive hardware probing can recover the data.
  16  *
  17  * Although this process is also known as "wiping", I prefer the longer
  18  * name both because I think it is more evocative of what is happening and
  19  * because a longer name conveys a more appropriate sense of deliberateness.
  20  *
  21  * For the theory behind this, see "Secure Deletion of Data from Magnetic
  22  * and Solid-State Memory", on line at
  23  * http://www.cs.auckland.ac.nz/~pgut001/pubs/secure_del.html
  24  *
  25  * Just for the record, reversing one or two passes of disk overwrite
  26  * is not terribly difficult with hardware help.  Hook up a good-quality
  27  * digitizing oscilloscope to the output of the head preamplifier and copy
  28  * the high-res digitized data to a computer for some off-line analysis.
  29  * Read the "current" data and average all the pulses together to get an
  30  * "average" pulse on the disk.  Subtract this average pulse from all of
  31  * the actual pulses and you can clearly see the "echo" of the previous
  32  * data on the disk.
  33  *
  34  * Real hard drives have to balance the cost of the media, the head,
  35  * and the read circuitry.  They use better-quality media than absolutely
  36  * necessary to limit the cost of the read circuitry.  By throwing that
  37  * assumption out, and the assumption that you want the data processed
  38  * as fast as the hard drive can spin, you can do better.
  39  *
  40  * If asked to wipe a file, this also deletes it, renaming it to in a
  41  * clever way to try to leave no trace of the original filename.
  42  *
  43  * Copyright 1997, 1998 Colin Plumb <colin@nyx.net>.  This program may
  44  * be freely distributed under the terms of the GNU GPL, the BSD license,
  45  * or Larry Wall's "Artistic License"   Even if you use the BSD license,
  46  * which does not require it, I'd really like to get improvements back.
  47  *
  48  * The ISAAC code still bears some resemblance to the code written
  49  * by Bob Jenkins, but he permits pretty unlimited use.
  50  *
  51  * This was inspired by a desire to improve on some code titled:
  52  * Wipe V1.0-- Overwrite and delete files.  S. 2/3/96
  53  * but I've rewritten everything here so completely that no trace of
  54  * the original remains.
  55  *
  56  * Things to think about:
  57  * - Security: Is there any risk to the race
  58  *   between overwriting and unlinking a file?  Will it do anything
  59  *   drastically bad if told to attack a named pipes or a sockets?
  60  */
  61
  62
  63 #include <config.h>
  64 #include <getopt.h>
  65 #include <stdio.h>
  66 #include <stdarg.h>             /* Used by pferror */
  67
  68 #include "system.h"
  69 #include "xstrtoul.h"
  70 #include "closeout.h"
  71 #include "error.h"
  72
  73 #define DEFAULT_PASSES 25       /* Default */
  74
  75 /* How often to update wiping display */
  76 #define VERBOSE_UPDATE  100*1024
  77
  78 /* FIXME: add comments */
  79 struct Options
  80 {
  81   int allow_devices;
  82   int force;
  83   unsigned int n_iterations;
  84   int remove_file;
  85   int verbose;
  86   int exact;
  87   int zero_fill;
  88 };
  89
  90 /* If nonzero, display usage information and exit.  */
  91 static int show_help;
  92
  93 /* If nonzero, print the version on standard output and exit.  */
  94 static int show_version;
  95
  96 static struct option const long_opts[] =
  97 {
  98   {"device", no_argument, NULL, 'd'},
  99   {"exact", required_argument, NULL, 'x'},
 100   {"force", no_argument, NULL, 'f'},
 101   {"iterations", required_argument, NULL, 'n'},
 102   {"preserve", no_argument, NULL, 'p'},
 103   {"verbose", no_argument, NULL, 'v'},
 104   {"zero", required_argument, NULL, 'z'},
 105   {"help", no_argument, &show_help, 1},
 106   {"version", no_argument, &show_version, 1},
 107   {NULL, 0, NULL, 0}
 108 };
 109
 110 /* Global variable for error printing purposes */
 111 char const *program_name;
 112
 113 void
 114 usage (int status)
 115 {
 116   if (status != 0)
 117     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 118              program_name);
 119   else
 120     {
 121       printf (_("Usage: %s [OPTIONS] FILE [...]\n"), program_name);
 122       printf (_("\
 123 Delete a file securely, first overwriting it to hide its contents.\n\
 124 \n\
 125   -d, --device   allow operation on devices (devices are never deleted)\n\
 126   -f, --force    change permissions to allow writing if necessary\n\
 127   -n, --iterations=N  Overwrite N times instead of the default (25)\n\
 128   -p, --preserve do not delete file after overwriting\n\
 129   -v, --verbose  indicate progress (-vv to leave progress on screen)\n\
 130   -x, --exact    do not round file sizes up to the next full block\n\
 131   -z, --zero     add a final overwrite with zeros to hide shredding\n\
 132   -              shred standard input (but don't delete it);\n\
 133                    this will fail unless you use <>file, a safety feature\n\
 134       --help     display this help and exit\n\
 135       --version  print version information and exit\n\
 136 \n\
 137 FIXME maybe add more discussion here?\n\
 138 "));
 139       puts (_("\nReport bugs to <bug-fileutils@gnu.org>."));
 140       close_stdout ();
 141     }
 142   exit (status);
 143 }
 144
 145 #if ! HAVE_FDATASYNC
 146 # define fdatasync(Fd) fsync (Fd)
 147 #endif
 148
 149 /*
 150  * --------------------------------------------------------------------
 151  *     Bob Jenkins' cryptographic random number generator, ISAAC.
 152  *     Hacked by Colin Plumb.
 153  *
 154  * We need a source of random numbers for some of the overwrite data.
 155  * Cryptographically secure is desirable, but it's not life-or-death
 156  * so I can be a little bit experimental in the choice of RNGs here.
 157  *
 158  * This generator is based somewhat on RC4, but has analysis
 159  * (http://ourworld.compuserve.com/homepages/bob_jenkins/randomnu.htm)
 160  * pointing to it actually being better.  I like it because it's nice
 161  * and fast, and because the author did good work analyzing it.
 162  * --------------------------------------------------------------------
 163  */
 164
 165 #if ULONG_MAX == 0xffffffff
 166 typedef unsigned long word32;
 167 #else
 168 # if UINT_MAX == 0xffffffff
 169 typedef unsigned word32;
 170 # else
 171 #  if USHRT_MAX == 0xffffffff
 172 typedef unsigned short word32;
 173 #  else
 174 #   if UCHAR_MAX == 0xffffffff
 175 typedef unsigned char word32;
 176 #   else
 177 #    error No 32-bit type available!
 178 #   endif
 179 #  endif
 180 # endif
 181 #endif
 182
 183 /* Size of the state tables to use.  (You may change ISAAC_LOG) */
 184 #define ISAAC_LOG 8
 185 #define ISAAC_WORDS (1 << ISAAC_LOG)
 186 #define ISAAC_BYTES (ISAAC_WORDS * sizeof (word32))
 187
 188 /* RNG state variables */
 189 struct isaac_state
 190   {
 191     word32 mm[ISAAC_WORDS];     /* Main state array */
 192     word32 iv[8];               /* Seeding initial vector */
 193     word32 a, b, c;             /* Extra index variables */
 194   };
 195
 196 /* This index operation is more efficient on many processors */
 197 #define ind(mm, x)  *(unsigned *)((char *)(mm) + ( (x) & (ISAAC_WORDS-1)<<2 ))
 198
 199 /*
 200  * The central step.  This uses two temporaries, x and y.  mm is the
 201  * whole state array, while m is a pointer to the current word.  off is
 202  * the offset from m to the word ISAAC_WORDS/2 words away in the mm array,
 203  * i.e. +/- ISAAC_WORDS/2.
 204  */
 205 #define isaac_step(mix, a, b, mm, m, off, r) \
 206 ( \
 207   a = ((a) ^ (mix)) + (m)[off], \
 208   x = *(m), \
 209   *(m) = y = ind ((mm), x) + (a) + (b), \
 210   *(r) = b = ind ((mm), (y) >> ISAAC_LOG) + x \
 211 )
 212
 213 /*
 214  * Refill the entire R array, and update S.
 215  */
 216 static void
 217 isaac_refill (struct isaac_state *s, word32 r[ISAAC_WORDS])
 218 {
 219   register word32 a, b;         /* Caches of a and b */
 220   register word32 x, y;         /* Temps needed by isaac_step() macro */
 221   register word32 *m = s->mm;   /* Pointer into state array */
 222
 223   a = s->a;
 224   b = s->b + (++s->c);
 225
 226   do
 227     {
 228       isaac_step (a << 13, a, b, s->mm, m, ISAAC_WORDS / 2, r);
 229       isaac_step (a >> 6, a, b, s->mm, m + 1, ISAAC_WORDS / 2, r + 1);
 230       isaac_step (a << 2, a, b, s->mm, m + 2, ISAAC_WORDS / 2, r + 2);
 231       isaac_step (a >> 16, a, b, s->mm, m + 3, ISAAC_WORDS / 2, r + 3);
 232       r += 4;
 233     }
 234   while ((m += 4) < s->mm + ISAAC_WORDS / 2);
 235   do
 236     {
 237       isaac_step (a << 13, a, b, s->mm, m, -ISAAC_WORDS / 2, r);
 238       isaac_step (a >> 6, a, b, s->mm, m + 1, -ISAAC_WORDS / 2, r + 1);
 239       isaac_step (a << 2, a, b, s->mm, m + 2, -ISAAC_WORDS / 2, r + 2);
 240       isaac_step (a >> 16, a, b, s->mm, m + 3, -ISAAC_WORDS / 2, r + 3);
 241       r += 4;
 242     }
 243   while ((m += 4) < s->mm + ISAAC_WORDS);
 244   s->a = a;
 245   s->b = b;
 246 }
 247
 248 /*
 249  * The basic seed-scrambling step for initialization, based on Bob
 250  * Jenkins' 256-bit hash.
 251  */
 252 #define mix(a,b,c,d,e,f,g,h) \
 253    (       a ^= b << 11, d += a, \
 254    b += c, b ^= c >>  2, e += b, \
 255    c += d, c ^= d <<  8, f += c, \
 256    d += e, d ^= e >> 16, g += d, \
 257    e += f, e ^= f << 10, h += e, \
 258    f += g, f ^= g >>  4, a += f, \
 259    g += h, g ^= h <<  8, b += g, \
 260    h += a, h ^= a >>  9, c += h, \
 261    a += b                        )
 262
 263 /* The basic ISAAC initialization pass.  */
 264 static void
 265 isaac_mix (struct isaac_state *s, word32 const seed[ISAAC_WORDS])
 266 {
 267   int i;
 268   word32 a = s->iv[0];
 269   word32 b = s->iv[1];
 270   word32 c = s->iv[2];
 271   word32 d = s->iv[3];
 272   word32 e = s->iv[4];
 273   word32 f = s->iv[5];
 274   word32 g = s->iv[6];
 275   word32 h = s->iv[7];
 276
 277   for (i = 0; i < ISAAC_WORDS; i += 8)
 278     {
 279       a += seed[i];
 280       b += seed[i + 1];
 281       c += seed[i + 2];
 282       d += seed[i + 3];
 283       e += seed[i + 4];
 284       f += seed[i + 5];
 285       g += seed[i + 6];
 286       h += seed[i + 7];
 287
 288       mix (a, b, c, d, e, f, g, h);
 289
 290       s->mm[i] = a;
 291       s->mm[i + 1] = b;
 292       s->mm[i + 2] = c;
 293       s->mm[i + 3] = d;
 294       s->mm[i + 4] = e;
 295       s->mm[i + 5] = f;
 296       s->mm[i + 6] = g;
 297       s->mm[i + 7] = h;
 298     }
 299
 300   s->iv[0] = a;
 301   s->iv[1] = b;
 302   s->iv[2] = c;
 303   s->iv[3] = d;
 304   s->iv[4] = e;
 305   s->iv[5] = f;
 306   s->iv[6] = g;
 307   s->iv[7] = h;
 308 }
 309
 310 /*
 311  * Initialize the ISAAC RNG with the given seed material.
 312  * Its size MUST be a multiple of ISAAC_BYTES, and may be
 313  * stored in the s->mm array.
 314  *
 315  * This is a generalization of the original ISAAC initialization code
 316  * to support larger seed sizes.  For seed sizes of 0 and ISAAC_BYTES,
 317  * it is identical.
 318  */
 319 static void
 320 isaac_init (struct isaac_state *s, word32 const *seed, size_t seedsize)
 321 {
 322   static word32 const iv[8] =
 323   {
 324     0x1367df5a, 0x95d90059, 0xc3163e4b, 0x0f421ad8,
 325     0xd92a4a78, 0xa51a3c49, 0xc4efea1b, 0x30609119};
 326   int i;
 327
 328 #if 0
 329   /* The initialization of iv is a precomputed form of: */
 330   for (i = 0; i < 7; i++)
 331     iv[i] = 0x9e3779b9;         /* the golden ratio */
 332   for (i = 0; i < 4; ++i)       /* scramble it */
 333     mix (iv[0], iv[1], iv[2], iv[3], iv[4], iv[5], iv[6], iv[7]);
 334 #endif
 335   s->a = s->b = s->c = 0;
 336
 337   for (i = 0; i < 8; i++)
 338     s->iv[i] = iv[i];
 339
 340   if (seedsize)
 341     {
 342       /* First pass (as in reference ISAAC code) */
 343       isaac_mix (s, seed);
 344       /* Second and subsequent passes (extension to ISAAC) */
 345       while (seedsize -= ISAAC_BYTES)
 346         {
 347           seed += ISAAC_WORDS;
 348           for (i = 0; i < ISAAC_WORDS; i++)
 349             s->mm[i] += seed[i];
 350           isaac_mix (s, s->mm);
 351         }
 352     }
 353   else
 354     {
 355       /* The no seed case (as in reference ISAAC code) */
 356       for (i = 0; i < ISAAC_WORDS; i++)
 357         s->mm[i] = 0;
 358     }
 359
 360   /* Final pass */
 361   isaac_mix (s, s->mm);
 362 }
 363
 364 /*
 365  * Get seed material.  16 bytes (128 bits) is plenty, but if we have
 366  * /dev/urandom, we get 32 bytes = 256 bits for complete overkill.
 367  */
 368 static void
 369 isaac_seed (struct isaac_state *s)
 370 {
 371   s->mm[0] = getpid ();
 372   s->mm[1] = getppid ();
 373
 374   {
 375 #ifdef HAVE_CLOCK_GETTIME               /* POSIX ns-resolution */
 376     struct timespec ts;
 377     clock_gettime (CLOCK_REALTIME, &ts);
 378     s->mm[2] = ts.tv_sec;
 379     s->mm[3] = ts.tv_nsec;
 380 #else
 381     struct timeval tv;
 382     gettimeofday (&tv, (struct timezone *) 0);
 383     s->mm[2] = tv.tv_sec;
 384     s->mm[3] = tv.tv_usec;
 385 #endif
 386   }
 387
 388   {
 389     int fd = open ("/dev/urandom", O_RDONLY);
 390     if (fd >= 0)
 391       {
 392         read (fd, (char *) (s->mm + 4), 32);
 393         close (fd);
 394       }
 395     else
 396       {
 397         fd = open ("/dev/random", O_RDONLY | O_NONBLOCK);
 398         if (fd >= 0)
 399           {
 400             /* /dev/random is more precious, so use less */
 401             read (fd, (char *) (s->mm + 4), 16);
 402             close (fd);
 403           }
 404       }
 405   }
 406
 407   isaac_init (s, s->mm, sizeof (s->mm));
 408 }
 409
 410 /*
 411  * Read up to "size" bytes from the given fd and use them as additional
 412  * ISAAC seed material.  Returns the number of bytes actually read.
 413  */
 414 static off_t
 415 isaac_seedfd (struct isaac_state *s, int fd, off_t size)
 416 {
 417   off_t sizeleft = size;
 418   size_t lim, soff;
 419   ssize_t ssize;
 420   int i;
 421   word32 seed[ISAAC_WORDS];
 422
 423   while (sizeleft)
 424     {
 425       lim = sizeof (seed);
 426       if ((off_t) lim > sizeleft)
 427         lim = (size_t) sizeleft;
 428       soff = 0;
 429       do
 430         {
 431           ssize = read (fd, (char *) seed + soff, lim - soff);
 432         }
 433       while (ssize > 0 && (soff += (size_t) ssize) < lim);
 434       /* Mix in what was read */
 435       if (soff)
 436         {
 437           /* Garbage after the sofff position is harmless */
 438           for (i = 0; i < ISAAC_WORDS; i++)
 439             s->mm[i] += seed[i];
 440           isaac_mix (s, s->mm);
 441           sizeleft -= soff;
 442         }
 443       if (ssize <= 0)
 444         break;
 445     }
 446   /* Wipe the copy of the file in "seed" */
 447   memset (seed, 0, sizeof (seed));
 448
 449   /* Final mix, as in isaac_init */
 450   isaac_mix (s, s->mm);
 451   return size - sizeleft;
 452 }
 453
 454 /* Single-word RNG built on top of ISAAC */
 455 struct irand_state
 456 {
 457   word32 r[ISAAC_WORDS];
 458   unsigned numleft;
 459   struct isaac_state *s;
 460 };
 461
 462 static void
 463 irand_init (struct irand_state *r, struct isaac_state *s)
 464 {
 465   r->numleft = 0;
 466   r->s = s;
 467 }
 468
 469 /*
 470  * We take from the end of the block deliberately, so if we need
 471  * only a small number of values, we choose the final ones which are
 472  * marginally better mixed than the initial ones.
 473  */
 474 static word32
 475 irand32 (struct irand_state *r)
 476 {
 477   if (!r->numleft)
 478     {
 479       isaac_refill (r->s, r->r);
 480       r->numleft = ISAAC_WORDS;
 481     }
 482   return r->r[--r->numleft];
 483 }
 484
 485 /*
 486  * Return a uniformly distributed random number between 0 and n,
 487  * inclusive.  Thus, the result is modulo n+1.
 488  *
 489  * Theory of operation: as x steps through every possible 32-bit number,
 490  * x % n takes each value at least 2^32 / n times (rounded down), but
 491  * the values less than 2^32 % n are taken one additional time.  Thus,
 492  * x % n is not perfectly uniform.  To fix this, the values of x less
 493  * than 2^32 % n are disallowed, and if the RNG produces one, we ask
 494  * for a new value.
 495  */
 496 static word32
 497 irand_mod (struct irand_state *r, word32 n)
 498 {
 499   word32 x;
 500   word32 lim;
 501
 502   if (!++n)
 503     return irand32 (r);
 504
 505   lim = -n % n;                 /* == (2**32-n) % n == 2**32 % n */
 506   do
 507     {
 508       x = irand32 (r);
 509     }
 510   while (x < lim);
 511   return x % n;
 512 }
 513
 514 /*
 515  * Like perror() but fancier.  (And fmt is not allowed to be NULL)
 516  * This apparent use of features specific to GNU C is actually portable;
 517  * see the definitions in error.h.
 518  */
 519 static void pfstatus (char const *,...)
 520      __attribute__ ((__format__ (__printf__, 1, 2)));
 521
 522 /*
 523  * Maintain a status line on stdout.  This is done by using CR and
 524  * overprinting a new line when it changes, padding with trailing blanks
 525  * as needed to hide all of the previous line.  (Assuming that the return
 526  * value of printf is an accurate width.)
 527  FIXME: we can't assume that
 528  */
 529 static int status_visible = 0;  /* Number of visible characters */
 530 static int status_pos = 0;      /* Current position, including padding */
 531
 532 /* Print a new status line, overwriting the previous one. */
 533 static void
 534 pfstatus (char const *fmt,...)
 535 {
 536   int new;                      /* New status_visible value */
 537   va_list ap;
 538
 539   /* If we weren't at beginning, go there. */
 540   if (status_pos)
 541     putchar ('\r');
 542   va_start (ap, fmt);
 543   new = vprintf (fmt, ap);
 544   va_end (ap);
 545   if (new >=0)
 546     {
 547       status_pos = new;
 548       while (status_pos < status_visible)
 549         {
 550           putchar (' ');
 551           status_pos++;
 552         }
 553       status_visible = new;
 554     }
 555   fflush (stdout);
 556 }
 557
 558 /* Leave current status (if any) visible and go to the next free line. */
 559 static void
 560 flushstatus (void)
 561 {
 562   if (status_visible)
 563     {
 564       putchar ('\n');           /* Leave line visible */
 565       fflush (stdout);
 566       status_visible = status_pos = 0;
 567     }
 568   else if (status_pos)
 569     {
 570       putchar ('\r');           /* Go back to beginning of line */
 571       fflush (stdout);
 572       status_pos = 0;
 573     }
 574 }
 575
 576 /*
 577  * Get the size of a file that doesn't want to cooperate (such as a
 578  * device) by doing a binary search for the last readable byte.  The size
 579  * of the file is the least offset at which it is not possible to read
 580  * a byte.
 581  *
 582  * This is also a nice example of using loop invariants to correctly
 583  * implement an algorithm that is potentially full of fencepost errors.
 584  * We assume that if it is possible to read a byte at offset x, it is
 585  * also possible at all offsets <= x.
 586  */
 587 static off_t
 588 sizefd (int fd)
 589 {
 590   off_t hi, lo, mid;
 591   char c;                       /* One-byte buffer for dummy reads */
 592
 593   /* Binary doubling upwards to find the right range */
 594   lo = 0;
 595   hi = 0;               /* Any number, preferably 2^x-1, is okay here. */
 596
 597   /*
 598    * Loop invariant: we have verified that it is possible to read a
 599    * byte at all offsets < lo.  Probe at offset hi >= lo until it
 600    * is not possible to read a byte at that offset, establishing
 601    * the loop invariant for the following loop.
 602    */
 603   for (;;)
 604     {
 605       if (lseek (fd, hi, SEEK_SET) == (off_t) -1
 606           || read (fd, &c, 1) < 1)
 607         break;
 608       lo = hi + 1;              /* This preserves the loop invariant. */
 609       hi += lo;                 /* Exponential doubling. */
 610     }
 611   /*
 612    * Binary search to find the exact endpoint.
 613    * Loop invariant: it is not possible to read a byte at hi,
 614    * but it is possible at all offsets < lo.  Thus, the
 615    * offset we seek is between lo and hi inclusive.
 616    */
 617   while (hi > lo)
 618     {
 619       mid = (hi + lo) / 2;      /* Rounded down, so lo <= mid < hi */
 620       if (lseek (fd, mid, SEEK_SET) == (off_t) -1
 621           || read (fd, &c, 1) < 1)
 622         hi = mid;               /* mid < hi, so this makes progress */
 623       else
 624         lo = mid + 1;           /* Because mid < hi, lo <= hi */
 625     }
 626   /* lo == hi, so we have an exact answer */
 627   return hi;
 628 }
 629
 630 /*
 631  * Fill a buffer with a fixed pattern.
 632  *
 633  * The buffer must be at least 3 bytes long, even if
 634  * size is less.  Larger sizes are filled exactly.
 635  */
 636 static void
 637 fillpattern (int type, unsigned char *r, size_t size)
 638 {
 639   size_t i;
 640   unsigned bits = type & 0xfff;
 641
 642   bits |= bits << 12;
 643   ((unsigned char *) r)[0] = (bits >> 4) & 255;
 644   ((unsigned char *) r)[1] = (bits >> 8) & 255;
 645   ((unsigned char *) r)[2] = bits & 255;
 646   for (i = 3; i < size / 2; i *= 2)
 647     memcpy ((char *) r + i, (char *) r, i);
 648   if (i < size)
 649     memcpy ((char *) r + i, (char *) r, size - i);
 650
 651   /* Invert the first bit of every 512-byte sector. */
 652   if (type & 0x1000)
 653     for (i = 0; i < size; i += 512)
 654       r[i] ^= 0x80;
 655 }
 656
 657 /*
 658  * Fill a buffer with random data.
 659  * size is rounded UP to a multiple of ISAAC_BYTES.
 660  */
 661 static void
 662 fillrand (struct isaac_state *s, word32 *r, size_t size)
 663 {
 664   size = (size + ISAAC_BYTES - 1) / ISAAC_BYTES;
 665
 666   while (size--)
 667     {
 668       isaac_refill (s, r);
 669       r += ISAAC_WORDS;
 670     }
 671 }
 672
 673 /* Generate a 6-character (+ nul) pass name string */
 674 #define PASS_NAME_SIZE 7
 675 static void
 676 passname (unsigned char const *data, char name[PASS_NAME_SIZE])
 677 {
 678   if (data)
 679     sprintf (name, "%02x%02x%02x", data[0], data[1], data[2]);
 680   else
 681     memcpy (name, "random", PASS_NAME_SIZE);
 682 }
 683
 684 /*
 685  * Do pass number k of n, writing "size" bytes of the given pattern "type"
 686  * to the file descriptor fd.   Name, k and n are passed in only for verbose
 687  * progress message purposes.  If n == 0, no progress messages are printed.
 688  */
 689 static int
 690 dopass (int fd, char const *name, off_t size, int type,
 691         struct isaac_state *s, unsigned long k, unsigned long n)
 692 {
 693   off_t cursize;                /* Amount of file remaining to wipe (counts down) */
 694   off_t thresh;                 /* cursize at which next status update is printed */
 695   size_t lim;                   /* Amount of data to try writing */
 696   size_t soff;                  /* Offset into buffer for next write */
 697   ssize_t ssize;                /* Return value from write() */
 698 #if ISAAC_WORDS > 1024
 699   word32 r[ISAAC_WORDS * 3];    /* Multiple of 4K and of pattern size */
 700 #else
 701   word32 r[1024 * 3];           /* Multiple of 4K and of pattern size */
 702 #endif
 703   char pass_string[PASS_NAME_SIZE];     /* Name of current pass */
 704
 705   if (lseek (fd, 0, SEEK_SET) < 0)
 706     {
 707       error (0, 0, _("Error seeking `%s'"), name);
 708       return -1;
 709     }
 710
 711   /* Constant fill patterns need only be set up once. */
 712   if (type >= 0)
 713     {
 714       lim = sizeof (r);
 715       if ((off_t) lim > size)
 716         {
 717           lim = (size_t) size;
 718         }
 719       fillpattern (type, (unsigned char *) r, lim);
 720       passname ((unsigned char *) r, pass_string);
 721     }
 722   else
 723     {
 724       passname (0, pass_string);
 725     }
 726
 727   /* Set position if first status update */
 728   thresh = 0;
 729   if (n)
 730     {
 731       pfstatus (_("%s: pass %lu/%lu (%s)...)"), name, k, n, pass_string);
 732       if (size > VERBOSE_UPDATE)
 733         thresh = size - VERBOSE_UPDATE;
 734     }
 735
 736   for (cursize = size; cursize;)
 737     {
 738       /* How much to write this time? */
 739       lim = sizeof (r);
 740       if ((off_t) lim > cursize)
 741         lim = (size_t) cursize;
 742       if (type < 0)
 743         fillrand (s, r, lim);
 744       /* Loop to retry partial writes. */
 745       for (soff = 0; soff < lim; soff += ssize)
 746         {
 747           ssize = write (fd, (char *) r + soff, lim - soff);
 748           if (ssize < 0)
 749             {
 750               int e = errno;
 751               error (0, 0, _("Error writing `%s' at %lu"),
 752                      name, size - cursize + soff);
 753               /* FIXME: this is slightly fragile in that some systems
 754                  may fail with a different errno.  */
 755               /* This error confuses people. */
 756               if (e == EBADF && fd == 0)
 757                 fputs (_("(Did you remember to open stdin read/write with \"<>file\"?)\n"),
 758                         stderr);
 759               return -1;
 760             }
 761         }
 762
 763       /* Okay, we have written "lim" bytes. */
 764       cursize -= lim;
 765
 766       /* Time to print progress? */
 767       if (cursize <= thresh && n)
 768         {
 769           pfstatus (_("%s: pass %lu/%lu (%s)...%lu/%lu K"),
 770                     name, k, n, pass_string,
 771                     (size - cursize + 1023) / 1024, (size + 1023) / 1024);
 772           if (thresh > VERBOSE_UPDATE)
 773             thresh -= VERBOSE_UPDATE;
 774           else
 775             thresh = 0;
 776         }
 777     }
 778   /* Force what we just wrote to hit the media. */
 779   if (fdatasync (fd) < 0)
 780     {
 781       error (0, 0, _("Error syncing `%s'"), name);
 782       return -1;
 783     }
 784   return 0;
 785 }
 786
 787 /*
 788  * The passes start and end with a random pass, and the passes in between
 789  * are done in random order.  The idea is to deprive someone trying to
 790  * reverse the process of knowledge of the overwrite patterns, so they
 791  * have the additional step of figuring out what was done to the disk
 792  * before they can try to reverse or cancel it.
 793  *
 794  * First, all possible 1-bit patterns.  There are two of them.
 795  * Then, all possible 2-bit patterns.  There are four, but the two
 796  * which are also 1-bit patterns can be omitted.
 797  * Then, all possible 3-bit patterns.  Again, 8-2 = 6.
 798  * Then, all possible 4-bit patterns.  16-4 = 12.
 799  *
 800  * The basic passes are:
 801  * 1-bit: 0x000, 0xFFF
 802  * 2-bit: 0x555, 0xAAA
 803  * 3-bit: 0x249, 0x492, 0x924, 0x6DB, 0xB6D, 0xDB6 (+ 1-bit)
 804  *        100100100100         110110110110
 805  *           9   2   4            D   B   6
 806  * 4-bit: 0x111, 0x222, 0x333, 0x444, 0x666, 0x777,
 807  *        0x888, 0x999, 0xBBB, 0xCCC, 0xDDD, 0xEEE (+ 1-bit, 2-bit)
 808  * Adding three random passes at the beginning, middle and end
 809  * produces the default 25-pass structure.
 810  *
 811  * The next extension would be to 5-bit and 6-bit patterns.
 812  * There are 30 uncovered 5-bit patterns and 64-8-2 = 46 uncovered
 813  * 6-bit patterns, so they would increase the time required
 814  * significantly.  4-bit patterns are enough for most purposes.
 815  *
 816  * The main gotcha is that this would require a trickier encoding,
 817  * since lcm(2,3,4) = 12 bits is easy to fit into an int, but
 818  * lcm(2,3,4,5) = 60 bits is not.
 819  *
 820  * One extension that is included is to complement the first bit in each
 821  * 512-byte block, to alter the phase of the encoded data in the more
 822  * complex encodings.  This doesn't apply to MFM, so the 1-bit patterns
 823  * are considered part of the 3-bit ones and the 2-bit patterns are
 824  * considered part of the 4-bit patterns.
 825  *
 826  *
 827  * How does the generalization to variable numbers of passes work?
 828  *
 829  * Here's how...
 830  * Have an ordered list of groups of passes.  Each group is a set.
 831  * Take as many groups as will fit, plus a random subset of the
 832  * last partial group, and place them into the passes list.
 833  * Then shuffle the passes list into random order and use that.
 834  *
 835  * One extra detail: if we can't include a large enough fraction of the
 836  * last group to be interesting, then just substitute random passes.
 837  *
 838  * If you want more passes than the entire list of groups can
 839  * provide, just start repeating from the beginning of the list.
 840  */
 841 static int const
 842   patterns[] =
 843 {
 844   -2,                           /* 2 random passes */
 845   2, 0x000, 0xFFF,              /* 1-bit */
 846   2, 0x555, 0xAAA,              /* 2-bit */
 847   -1,                           /* 1 random pass */
 848   6, 0x249, 0x492, 0x6DB, 0x924, 0xB6D, 0xDB6,  /* 3-bit */
 849   12, 0x111, 0x222, 0x333, 0x444, 0x666, 0x777,
 850   0x888, 0x999, 0xBBB, 0xCCC, 0xDDD, 0xEEE,     /* 4-bit */
 851   -1,                           /* 1 random pass */
 852         /* The following patterns have the frst bit per block flipped */
 853   8, 0x1000, 0x1249, 0x1492, 0x16DB, 0x1924, 0x1B6D, 0x1DB6, 0x1FFF,
 854   14, 0x1111, 0x1222, 0x1333, 0x1444, 0x1555, 0x1666, 0x1777,
 855   0x1888, 0x1999, 0x1AAA, 0x1BBB, 0x1CCC, 0x1DDD, 0x1EEE,
 856   -1,                           /* 1 random pass */
 857   0                             /* End */
 858 };
 859
 860 /*
 861  * Generate a random wiping pass pattern with num passes.
 862  * This is a two-stage process.  First, the passes to include
 863  * are chosen, and then they are shuffled into the desired
 864  * order.
 865  */
 866 static void
 867 genpattern (int *dest, size_t num, struct isaac_state *s)
 868 {
 869   struct irand_state r;
 870   size_t randpasses;
 871   int const *p;
 872   int *d;
 873   size_t n;
 874   size_t accum, top, swap;
 875   int k;
 876
 877   if (!num)
 878     return;
 879
 880   irand_init (&r, s);
 881
 882   /* Stage 1: choose the passes to use */
 883   p = patterns;
 884   randpasses = 0;
 885   d = dest;                     /* Destination for generated pass list */
 886   n = num;                      /* Passes remaining to fill */
 887
 888   for (;;)
 889     {
 890       k = *p++;                 /* Block descriptor word */
 891       if (!k)
 892         {                       /* Loop back to the beginning */
 893           p = patterns;
 894         }
 895       else if (k < 0)
 896         {                       /* -k random passes */
 897           k = -k;
 898           if ((size_t) k >= n)
 899             {
 900               randpasses += n;
 901               n = 0;
 902               break;
 903             }
 904           randpasses += k;
 905           n -= k;
 906         }
 907       else if ((size_t) k <= n)
 908         {                       /* Full block of patterns */
 909           memcpy (d, p, k * sizeof (int));
 910           p += k;
 911           d += k;
 912           n -= k;
 913         }
 914       else if (n < 2 || 3 * n < (size_t) k)
 915         {                       /* Finish with random */
 916           randpasses += n;
 917           break;
 918         }
 919       else
 920         {                       /* Pad out with k of the n available */
 921           do
 922             {
 923               if (n == (size_t) k-- || irand_mod (&r, k) < n)
 924                 {
 925                   *d++ = *p;
 926                   n--;
 927                 }
 928               p++;
 929             }
 930           while (n);
 931           break;
 932         }
 933     }
 934   top = num - randpasses;       /* Top of initialized data */
 935
 936   /* assert(d == dest+top); */
 937
 938   /*
 939    * We now have fixed patterns in the dest buffer up to
 940    * "top", and we need to scramble them, with "randpasses"
 941    * random passes evenly spaced among them.
 942    *
 943    * We want one at the beginning, one at the end, and
 944    * evenly spaced in between.  To do this, we basically
 945    * use Bresenham's line draw (a.k.a DDA) algorithm
 946    * to draw a line with slope (randpasses-1)/(num-1).
 947    * (We use a positive accumulator and count down to
 948    * do this.)
 949    *
 950    * So for each desired output value, we do the following:
 951    * - If it should be a random pass, copy the pass type
 952    *   to top++, out of the way of the other passes, and
 953    *   set the current pass to -1 (random).
 954    * - If it should be a normal pattern pass, choose an
 955    *   entry at random between here and top-1 (inclusive)
 956    *   and swap the current entry with that one.
 957    */
 958
 959   randpasses--;                 /* To speed up later math */
 960   accum = randpasses;           /* Bresenham DDA accumulator */
 961   for (n = 0; n < num; n++)
 962     {
 963       if (accum <= randpasses)
 964         {
 965           accum += num - 1;
 966           dest[top++] = dest[n];
 967           dest[n] = -1;
 968         }
 969       else
 970         {
 971           swap = n + irand_mod (&r, top - n - 1);
 972           k = dest[n];
 973           dest[n] = dest[swap];
 974           dest[swap] = k;
 975         }
 976       accum -= randpasses;
 977     }
 978   /* assert(top == num); */
 979
 980   memset (&r, 0, sizeof (r));   /* Wipe this on general principles */
 981 }
 982
 983 /*
 984  * The core routine to actually do the work.  This overwrites the first
 985  * size bytes of the given fd.  Returns -1 on error, 0 on success with
 986  * regular files, and 1 on success with non-regular files.
 987  */
 988 static int
 989 wipefd (int fd, char const *name, struct isaac_state *s,
 990         size_t passes, struct Options const *flags)
 991 {
 992   size_t i;
 993   struct stat st;
 994   off_t size, seedsize;         /* Size to write, size to read */
 995   unsigned long n;              /* Number of passes for printing purposes */
 996   int *passarray;
 997
 998   if (!passes)
 999     passes = DEFAULT_PASSES;
1000
1001   n = 0;                /* dopass takes n -- 0 to mean "don't print progress" */
1002   if (flags->verbose)
1003     n = passes + ((flags->zero_fill) != 0);
1004
1005   if (fstat (fd, &st))
1006     {
1007       error (0, 0, _("Can't fstat file `%s'"), name);
1008       return -1;
1009     }
1010
1011   /* Check for devices */
1012   if (!S_ISREG (st.st_mode) && !(flags->allow_devices))
1013     {
1014       error (0, 0,
1015              _("`%s' is not a regular file: use -d to enable operations on devices"),
1016              name);
1017       return -1;
1018     }
1019
1020   /* Allocate pass array */
1021   passarray = malloc (passes * sizeof (int));
1022   if (!passarray)
1023     {
1024       error (0, 0, _("unable to allocate storage for %lu passes"),
1025              (unsigned long) passes);
1026       return -1;
1027     }
1028
1029   seedsize = size = st.st_size;
1030   if (!size)
1031     {
1032       /* Reluctant to talk?  Apply thumbscrews. */
1033       seedsize = size = sizefd (fd);
1034     }
1035   else if (st.st_blksize && !(flags->exact))
1036     {
1037       /* Round up to the next st_blksize to include "slack" */
1038       size += st.st_blksize - 1 - (size - 1) % st.st_blksize;
1039     }
1040
1041   /*
1042    * Use the file itself as seed material.  Avoid wasting "lots"
1043    * of time (>10% of the write time) reading "large" (>16K)
1044    * files for seed material if there aren't many passes.
1045    *
1046    * Note that "seedsize*passes/10" risks overflow, while
1047    * "seedsize/10*passes is slightly inaccurate.  The hack
1048    * here manages perfection with no overflow.
1049    */
1050   if (passes < 10 && seedsize > 16384)
1051     {
1052       seedsize -= 16384;
1053       seedsize = seedsize / 10 * passes + seedsize % 10 * passes / 10;
1054       seedsize += 16384;
1055     }
1056   (void) isaac_seedfd (s, fd, seedsize);
1057
1058   /* Schedule the passes in random order. */
1059   genpattern (passarray, passes, s);
1060
1061   /* Do the work */
1062   for (i = 0; i < passes; i++)
1063     {
1064       if (dopass (fd, name, size, passarray[i], s, i + 1, n) < 0)
1065         {
1066           memset (passarray, 0, passes * sizeof (int));
1067           free (passarray);
1068           return -1;
1069         }
1070       if (flags->verbose > 1)
1071         flushstatus ();
1072     }
1073
1074   memset (passarray, 0, passes * sizeof (int));
1075   free (passarray);
1076
1077   if (flags->zero_fill)
1078     if (dopass (fd, name, size, 0, s, passes + 1, n) < 0)
1079       return -1;
1080
1081   return !S_ISREG (st.st_mode);
1082 }
1083
1084 /* Characters allowed in a file name - a safe universal set. */
1085 static char const nameset[] =
1086 "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_+=%@#.";
1087
1088 /*
1089  * This increments the name, considering it as a big-endian base-N number
1090  * with the digits taken from nameset.  Characters not in the nameset
1091  * are considered to come before nameset[0].
1092  *
1093  * It's not obvious, but this will explode if name[0..len-1] contains
1094  * any 0 bytes.
1095  *
1096  * This returns the carry (1 on overflow).
1097  */
1098 static int
1099 incname (char *name, unsigned len)
1100 {
1101   char const *p;
1102
1103   if (!len)
1104     return -1;
1105
1106   p = strchr (nameset, name[--len]);
1107   /* If the character is not found, replace it with a 0 digit */
1108   if (!p)
1109     {
1110       name[len] = nameset[0];
1111       return 0;
1112     }
1113   /* If this character has a successor, use it */
1114   if (p[1])
1115     {
1116       name[len] = p[1];
1117       return 0;
1118     }
1119   /* Otherwise, set this digit to 0 and increment the prefix */
1120   name[len] = nameset[0];
1121   return incname (name, len);
1122 }
1123
1124 /*
1125  * Repeatedly rename a file with shorter and shorter names,
1126  * to obliterate all traces of the file name on any system that
1127  * adds a trailing delimiter to on-disk file names and reuses
1128  * the same directory slot.  Finally, delete it.
1129  * The passed-in filename is modified in place to the new filename.
1130  * (Which is deleted if this function succeeds, but is still present if
1131  * it fails for some reason.)
1132  *
1133  * The main loop is written carefully to not get stuck if all possible
1134  * names of a given length are occupied.  It counts down the length from
1135  * the original to 0.  While the length is non-zero, it tries to find an
1136  * unused file name of the given length.  It continues until either the
1137  * name is available and the rename succeeds, or it runs out of names
1138  * to try (incname() wraps and returns 1).  Finally, it deletes the file.
1139  *
1140  * Note that rename() and remove() are both in the ANSI C standard,
1141  * so that part, at least, is NOT Unix-specific.
1142  *
1143  * FIXME: update this comment.
1144  * To force the directory data out, we try to open() the directory and
1145  * invoke fdatasync() on it.  This is rather non-standard, so we don't
1146  * insist that it works, just fall back to a global sync() in that case.
1147  * Unfortunately, this code is Unix-specific.
1148  */
1149 int
1150 wipename (char *oldname, struct Options const *flags)
1151 {
1152   char *newname, *origname = 0;
1153   char *base;                   /* Pointer to filename component, after directories. */
1154   unsigned len;
1155   int err;
1156   int dir_fd;                   /* Try to open directory to sync *it* */
1157
1158   if (flags->verbose)
1159     pfstatus (_("%s: deleting"), oldname);
1160
1161   newname = strdup (oldname);   /* This is a malloc */
1162   if (!newname)
1163     {
1164       error (0, 0, _("malloc failed"));
1165       return -1;
1166     }
1167   if (flags->verbose)
1168     {
1169       origname = strdup (oldname);
1170       if (!origname)
1171         {
1172           error (0, 0, _("malloc failed"));
1173           free (newname);
1174           return -1;
1175         }
1176     }
1177
1178   /* Find the file name portion */
1179   base = strrchr (newname, '/');
1180   /* Temporary hackery to get a directory fd */
1181   if (base)
1182     {
1183       *base = '\0';
1184       dir_fd = open (newname, O_RDONLY);
1185       *base = '/';
1186     }
1187   else
1188     {
1189       dir_fd = open (".", O_RDONLY);
1190     }
1191   base = base ? base + 1 : newname;
1192   len = strlen (base);
1193
1194   while (len)
1195     {
1196       memset (base, nameset[0], len);
1197       base[len] = 0;
1198       do
1199         {
1200           if (access (newname, F_OK) < 0
1201               && !rename (oldname, newname))
1202             {
1203               if (dir_fd < 0 || fdatasync (dir_fd) < 0)
1204                 sync ();        /* Force directory out */
1205               if (origname)
1206                 {
1207                   /* The use of origname (rather than oldname) here is
1208                      deliberate.  It makes the -v output more intelligible
1209                      at the expense of making the `renamed to ...' messages
1210                      use the logical (original) file name.  */
1211                   pfstatus (_("%s: renamed to `%s'"), origname, newname);
1212                   if (flags->verbose > 1)
1213                     flushstatus ();
1214                 }
1215               memcpy (oldname + (base - newname), newname, len + 1);
1216               break;
1217             }
1218         }
1219       while (!incname (base, len));
1220       len--;
1221     }
1222   free (newname);
1223   err = remove (oldname);
1224   if (dir_fd < 0 || fdatasync (dir_fd) < 0)
1225     sync ();
1226   close (dir_fd);
1227   if (origname)
1228     {
1229       if (!err && flags->verbose)
1230         pfstatus (_("%s: deleted"), origname);
1231       free (origname);
1232     }
1233   return err;
1234 }
1235
1236 /*
1237  * Finally, the function that actually takes a filename and grinds
1238  * it into hamburger.  Returns 1 if it was not a regular file.
1239  *
1240  * FIXME
1241  * Detail to note: since we do not restore errno to EACCES after
1242  * a failed chmod, we end up printing the error code from the chmod.
1243  * This is probably either EACCES again or EPERM, which both give
1244  * reasonable error messages.  But it might be better to change that.
1245  */
1246 static int
1247 wipefile (char *name, struct isaac_state *s, size_t passes,
1248           struct Options const *flags)
1249 {
1250   int err, fd;
1251
1252   fd = open (name, O_RDWR);
1253   if (fd < 0 && errno == EACCES && flags->force)
1254     {
1255       if (chmod (name, 0600) >= 0)
1256         fd = open (name, O_RDWR);
1257     }
1258   if (fd < 0)
1259     {
1260       error (0, 0, _("Unable to open `%s'"), name);
1261       return -1;
1262     }
1263
1264   err = wipefd (fd, name, s, passes, flags);
1265   close (fd);
1266   /*
1267    * Wipe the name and unlink - regular files only, no devices!
1268    * (wipefd returns 1 for non-regular files.)
1269    */
1270   if (err == 0 && flags->remove_file)
1271     {
1272       err = wipename (name, flags);
1273       if (err < 0)
1274         error (0, 0, _("Unable to delete file `%s'"), name);
1275     }
1276   return err;
1277 }
1278
1279 int
1280 main (int argc, char **argv)
1281 {
1282   struct isaac_state s;
1283   int err = 0;
1284   struct Options flags;
1285   unsigned long n_passes = 0;
1286   char **file;
1287   int n_files;
1288   int c;
1289   int i;
1290
1291   program_name = argv[0];
1292   setlocale (LC_ALL, "");
1293   bindtextdomain (PACKAGE, LOCALEDIR);
1294   textdomain (PACKAGE);
1295
1296   isaac_seed (&s);
1297
1298   memset (&flags, 0, sizeof flags);
1299
1300   /* By default, remove each file after sanitization.  */
1301   flags.remove_file = 1;
1302
1303   while ((c = getopt_long (argc, argv, "dfn:pvxz", long_opts, NULL)) != -1)
1304     {
1305       switch (c)
1306         {
1307         case 0:
1308           break;
1309
1310         case 'd':
1311           flags.allow_devices = 1;
1312           break;
1313
1314         case 'f':
1315           flags.force = 1;
1316           break;
1317
1318         case 'n':
1319           {
1320             unsigned long int tmp_ulong;
1321             if (xstrtoul (optarg, NULL, 10, &tmp_ulong, NULL) != LONGINT_OK
1322                 || (word32) tmp_ulong != tmp_ulong
1323                 || ((size_t) (tmp_ulong * sizeof (int)) / sizeof (int)
1324                     != tmp_ulong))
1325               {
1326                 error (1, 0, _("invalid number of passes: %s"), optarg);
1327               }
1328             n_passes = tmp_ulong;
1329           }
1330           break;
1331
1332         case 'p':
1333           flags.remove_file = 0;
1334           break;
1335
1336         case 'v':
1337           flags.verbose++;
1338           break;
1339
1340         case 'x':
1341           flags.exact = 1;
1342           break;
1343
1344         case 'z':
1345           flags.zero_fill = 1;
1346           break;
1347
1348         default:
1349           usage (1);
1350         }
1351     }
1352
1353   if (show_version)
1354     {
1355       printf ("shred (%s) %s\n", GNU_PACKAGE, VERSION);
1356       close_stdout ();
1357       exit (0);
1358     }
1359
1360   if (show_help)
1361     usage (0);
1362
1363   file = argv + optind;
1364   n_files = argc - optind;
1365
1366   if (n_files == 0)
1367     {
1368       error (0, 0, _("missing file argument"));
1369       usage (1);
1370     }
1371
1372   for (i = 0; i < n_files; i++)
1373     {
1374       if (STREQ (file[i], "-"))
1375         {
1376           if (wipefd (0, file[i], &s, (size_t) n_passes, &flags) < 0)
1377             err = 1;
1378         }
1379       else
1380         {
1381           /* Plain filename - Note that this overwrites *argv! */
1382           if (wipefile (file[i], &s, (size_t) n_passes, &flags) < 0)
1383             err = 1;
1384         }
1385       flushstatus ();
1386     }
1387
1388   /* Just on general principles, wipe s. */
1389   memset (&s, 0, sizeof (s));
1390
1391   close_stdout ();
1392
1393   exit (err);
1394 }