.
[coreutils.git] / src / dd.c
blobaba75fcdb75e17fbc41ef25282d3bad466a220e0
1 /* dd -- convert a file while copying it.
2 Copyright (C) 85, 90, 91, 1995-2002 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Written by Paul Rubin, David MacKenzie, and Stuart Kemp. */
20 #include <config.h>
21 #include <stdio.h>
23 #define SWAB_ALIGN_OFFSET 2
25 #include <sys/types.h>
26 #include <signal.h>
27 #include <getopt.h>
29 #include "system.h"
30 #include "closeout.h"
31 #include "error.h"
32 #include "full-write.h"
33 #include "getpagesize.h"
34 #include "inttostr.h"
35 #include "long-options.h"
36 #include "quote.h"
37 #include "safe-read.h"
38 #include "xstrtol.h"
40 /* The official name of this program (e.g., no `g' prefix). */
41 #define PROGRAM_NAME "dd"
43 #define AUTHORS N_ ("Paul Rubin, David MacKenzie, and Stuart Kemp")
45 #ifndef SIGINFO
46 # define SIGINFO SIGUSR1
47 #endif
49 #ifndef S_TYPEISSHM
50 # define S_TYPEISSHM(Stat_ptr) 0
51 #endif
53 #define ROUND_UP_OFFSET(X, M) ((M) - 1 - (((X) + (M) - 1) % (M)))
54 #define PTR_ALIGN(Ptr, M) ((Ptr) \
55 + ROUND_UP_OFFSET ((char *)(Ptr) - (char *)0, (M)))
57 #define max(a, b) ((a) > (b) ? (a) : (b))
58 #define output_char(c) \
59 do \
60 { \
61 obuf[oc++] = (c); \
62 if (oc >= output_blocksize) \
63 write_output (); \
64 } \
65 while (0)
67 /* Default input and output blocksize. */
68 #define DEFAULT_BLOCKSIZE 512
70 /* Conversions bit masks. */
71 #define C_ASCII 01
72 #define C_EBCDIC 02
73 #define C_IBM 04
74 #define C_BLOCK 010
75 #define C_UNBLOCK 020
76 #define C_LCASE 040
77 #define C_UCASE 0100
78 #define C_SWAB 0200
79 #define C_NOERROR 0400
80 #define C_NOTRUNC 01000
81 #define C_SYNC 02000
82 /* Use separate input and output buffers, and combine partial input blocks. */
83 #define C_TWOBUFS 04000
85 /* The name this program was run with. */
86 char *program_name;
88 /* The name of the input file, or NULL for the standard input. */
89 static char const *input_file = NULL;
91 /* The name of the output file, or NULL for the standard output. */
92 static char const *output_file = NULL;
94 /* The number of bytes in which atomic reads are done. */
95 static size_t input_blocksize = 0;
97 /* The number of bytes in which atomic writes are done. */
98 static size_t output_blocksize = 0;
100 /* Conversion buffer size, in bytes. 0 prevents conversions. */
101 static size_t conversion_blocksize = 0;
103 /* Skip this many records of `input_blocksize' bytes before input. */
104 static uintmax_t skip_records = 0;
106 /* Skip this many records of `output_blocksize' bytes before output. */
107 static uintmax_t seek_records = 0;
109 /* Copy only this many records. The default is effectively infinity. */
110 static uintmax_t max_records = (uintmax_t) -1;
112 /* Bit vector of conversions to apply. */
113 static int conversions_mask = 0;
115 /* If nonzero, filter characters through the translation table. */
116 static int translation_needed = 0;
118 /* Number of partial blocks written. */
119 static uintmax_t w_partial = 0;
121 /* Number of full blocks written. */
122 static uintmax_t w_full = 0;
124 /* Number of partial blocks read. */
125 static uintmax_t r_partial = 0;
127 /* Number of full blocks read. */
128 static uintmax_t r_full = 0;
130 /* Records truncated by conv=block. */
131 static uintmax_t r_truncate = 0;
133 /* Output representation of newline and space characters.
134 They change if we're converting to EBCDIC. */
135 static char newline_character = '\n';
136 static char space_character = ' ';
138 /* Output buffer. */
139 static char *obuf;
141 /* Current index into `obuf'. */
142 static size_t oc = 0;
144 /* Index into current line, for `conv=block' and `conv=unblock'. */
145 static size_t col = 0;
147 struct conversion
149 char *convname;
150 int conversion;
153 static struct conversion conversions[] =
155 {"ascii", C_ASCII | C_TWOBUFS}, /* EBCDIC to ASCII. */
156 {"ebcdic", C_EBCDIC | C_TWOBUFS}, /* ASCII to EBCDIC. */
157 {"ibm", C_IBM | C_TWOBUFS}, /* Slightly different ASCII to EBCDIC. */
158 {"block", C_BLOCK | C_TWOBUFS}, /* Variable to fixed length records. */
159 {"unblock", C_UNBLOCK | C_TWOBUFS}, /* Fixed to variable length records. */
160 {"lcase", C_LCASE | C_TWOBUFS}, /* Translate upper to lower case. */
161 {"ucase", C_UCASE | C_TWOBUFS}, /* Translate lower to upper case. */
162 {"swab", C_SWAB | C_TWOBUFS}, /* Swap bytes of input. */
163 {"noerror", C_NOERROR}, /* Ignore i/o errors. */
164 {"notrunc", C_NOTRUNC}, /* Do not truncate output file. */
165 {"sync", C_SYNC}, /* Pad input records to ibs with NULs. */
166 {NULL, 0}
169 /* Translation table formed by applying successive transformations. */
170 static unsigned char trans_table[256];
172 static char const ascii_to_ebcdic[] =
174 '\000', '\001', '\002', '\003', '\067', '\055', '\056', '\057',
175 '\026', '\005', '\045', '\013', '\014', '\015', '\016', '\017',
176 '\020', '\021', '\022', '\023', '\074', '\075', '\062', '\046',
177 '\030', '\031', '\077', '\047', '\034', '\035', '\036', '\037',
178 '\100', '\117', '\177', '\173', '\133', '\154', '\120', '\175',
179 '\115', '\135', '\134', '\116', '\153', '\140', '\113', '\141',
180 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
181 '\370', '\371', '\172', '\136', '\114', '\176', '\156', '\157',
182 '\174', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
183 '\310', '\311', '\321', '\322', '\323', '\324', '\325', '\326',
184 '\327', '\330', '\331', '\342', '\343', '\344', '\345', '\346',
185 '\347', '\350', '\351', '\112', '\340', '\132', '\137', '\155',
186 '\171', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
187 '\210', '\211', '\221', '\222', '\223', '\224', '\225', '\226',
188 '\227', '\230', '\231', '\242', '\243', '\244', '\245', '\246',
189 '\247', '\250', '\251', '\300', '\152', '\320', '\241', '\007',
190 '\040', '\041', '\042', '\043', '\044', '\025', '\006', '\027',
191 '\050', '\051', '\052', '\053', '\054', '\011', '\012', '\033',
192 '\060', '\061', '\032', '\063', '\064', '\065', '\066', '\010',
193 '\070', '\071', '\072', '\073', '\004', '\024', '\076', '\341',
194 '\101', '\102', '\103', '\104', '\105', '\106', '\107', '\110',
195 '\111', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
196 '\130', '\131', '\142', '\143', '\144', '\145', '\146', '\147',
197 '\150', '\151', '\160', '\161', '\162', '\163', '\164', '\165',
198 '\166', '\167', '\170', '\200', '\212', '\213', '\214', '\215',
199 '\216', '\217', '\220', '\232', '\233', '\234', '\235', '\236',
200 '\237', '\240', '\252', '\253', '\254', '\255', '\256', '\257',
201 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
202 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
203 '\312', '\313', '\314', '\315', '\316', '\317', '\332', '\333',
204 '\334', '\335', '\336', '\337', '\352', '\353', '\354', '\355',
205 '\356', '\357', '\372', '\373', '\374', '\375', '\376', '\377'
208 static char const ascii_to_ibm[] =
210 '\000', '\001', '\002', '\003', '\067', '\055', '\056', '\057',
211 '\026', '\005', '\045', '\013', '\014', '\015', '\016', '\017',
212 '\020', '\021', '\022', '\023', '\074', '\075', '\062', '\046',
213 '\030', '\031', '\077', '\047', '\034', '\035', '\036', '\037',
214 '\100', '\132', '\177', '\173', '\133', '\154', '\120', '\175',
215 '\115', '\135', '\134', '\116', '\153', '\140', '\113', '\141',
216 '\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
217 '\370', '\371', '\172', '\136', '\114', '\176', '\156', '\157',
218 '\174', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
219 '\310', '\311', '\321', '\322', '\323', '\324', '\325', '\326',
220 '\327', '\330', '\331', '\342', '\343', '\344', '\345', '\346',
221 '\347', '\350', '\351', '\255', '\340', '\275', '\137', '\155',
222 '\171', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
223 '\210', '\211', '\221', '\222', '\223', '\224', '\225', '\226',
224 '\227', '\230', '\231', '\242', '\243', '\244', '\245', '\246',
225 '\247', '\250', '\251', '\300', '\117', '\320', '\241', '\007',
226 '\040', '\041', '\042', '\043', '\044', '\025', '\006', '\027',
227 '\050', '\051', '\052', '\053', '\054', '\011', '\012', '\033',
228 '\060', '\061', '\032', '\063', '\064', '\065', '\066', '\010',
229 '\070', '\071', '\072', '\073', '\004', '\024', '\076', '\341',
230 '\101', '\102', '\103', '\104', '\105', '\106', '\107', '\110',
231 '\111', '\121', '\122', '\123', '\124', '\125', '\126', '\127',
232 '\130', '\131', '\142', '\143', '\144', '\145', '\146', '\147',
233 '\150', '\151', '\160', '\161', '\162', '\163', '\164', '\165',
234 '\166', '\167', '\170', '\200', '\212', '\213', '\214', '\215',
235 '\216', '\217', '\220', '\232', '\233', '\234', '\235', '\236',
236 '\237', '\240', '\252', '\253', '\254', '\255', '\256', '\257',
237 '\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
238 '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
239 '\312', '\313', '\314', '\315', '\316', '\317', '\332', '\333',
240 '\334', '\335', '\336', '\337', '\352', '\353', '\354', '\355',
241 '\356', '\357', '\372', '\373', '\374', '\375', '\376', '\377'
244 static char const ebcdic_to_ascii[] =
246 '\000', '\001', '\002', '\003', '\234', '\011', '\206', '\177',
247 '\227', '\215', '\216', '\013', '\014', '\015', '\016', '\017',
248 '\020', '\021', '\022', '\023', '\235', '\205', '\010', '\207',
249 '\030', '\031', '\222', '\217', '\034', '\035', '\036', '\037',
250 '\200', '\201', '\202', '\203', '\204', '\012', '\027', '\033',
251 '\210', '\211', '\212', '\213', '\214', '\005', '\006', '\007',
252 '\220', '\221', '\026', '\223', '\224', '\225', '\226', '\004',
253 '\230', '\231', '\232', '\233', '\024', '\025', '\236', '\032',
254 '\040', '\240', '\241', '\242', '\243', '\244', '\245', '\246',
255 '\247', '\250', '\133', '\056', '\074', '\050', '\053', '\041',
256 '\046', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
257 '\260', '\261', '\135', '\044', '\052', '\051', '\073', '\136',
258 '\055', '\057', '\262', '\263', '\264', '\265', '\266', '\267',
259 '\270', '\271', '\174', '\054', '\045', '\137', '\076', '\077',
260 '\272', '\273', '\274', '\275', '\276', '\277', '\300', '\301',
261 '\302', '\140', '\072', '\043', '\100', '\047', '\075', '\042',
262 '\303', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
263 '\150', '\151', '\304', '\305', '\306', '\307', '\310', '\311',
264 '\312', '\152', '\153', '\154', '\155', '\156', '\157', '\160',
265 '\161', '\162', '\313', '\314', '\315', '\316', '\317', '\320',
266 '\321', '\176', '\163', '\164', '\165', '\166', '\167', '\170',
267 '\171', '\172', '\322', '\323', '\324', '\325', '\326', '\327',
268 '\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
269 '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
270 '\173', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
271 '\110', '\111', '\350', '\351', '\352', '\353', '\354', '\355',
272 '\175', '\112', '\113', '\114', '\115', '\116', '\117', '\120',
273 '\121', '\122', '\356', '\357', '\360', '\361', '\362', '\363',
274 '\134', '\237', '\123', '\124', '\125', '\126', '\127', '\130',
275 '\131', '\132', '\364', '\365', '\366', '\367', '\370', '\371',
276 '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
277 '\070', '\071', '\372', '\373', '\374', '\375', '\376', '\377'
280 void
281 usage (int status)
283 if (status != 0)
284 fprintf (stderr, _("Try `%s --help' for more information.\n"),
285 program_name);
286 else
288 printf (_("Usage: %s [OPTION]...\n"), program_name);
289 fputs (_("\
290 Copy a file, converting and formatting according to the options.\n\
292 bs=BYTES force ibs=BYTES and obs=BYTES\n\
293 cbs=BYTES convert BYTES bytes at a time\n\
294 conv=KEYWORDS convert the file as per the comma separated keyword list\n\
295 count=BLOCKS copy only BLOCKS input blocks\n\
296 ibs=BYTES read BYTES bytes at a time\n\
297 "), stdout);
298 fputs (_("\
299 if=FILE read from FILE instead of stdin\n\
300 obs=BYTES write BYTES bytes at a time\n\
301 of=FILE write to FILE instead of stdout\n\
302 seek=BLOCKS skip BLOCKS obs-sized blocks at start of output\n\
303 skip=BLOCKS skip BLOCKS ibs-sized blocks at start of input\n\
304 "), stdout);
305 fputs (HELP_OPTION_DESCRIPTION, stdout);
306 fputs (VERSION_OPTION_DESCRIPTION, stdout);
307 fputs (_("\
309 BLOCKS and BYTES may be followed by the following multiplicative suffixes:\n\
310 xM M, c 1, w 2, b 512, kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
311 GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y.\n\
312 Each KEYWORD may be:\n\
314 "), stdout);
315 fputs (_("\
316 ascii from EBCDIC to ASCII\n\
317 ebcdic from ASCII to EBCDIC\n\
318 ibm from ASCII to alternated EBCDIC\n\
319 block pad newline-terminated records with spaces to cbs-size\n\
320 unblock replace trailing spaces in cbs-size records with newline\n\
321 lcase change upper case to lower case\n\
322 "), stdout);
323 fputs (_("\
324 notrunc do not truncate the output file\n\
325 ucase change lower case to upper case\n\
326 swab swap every pair of input bytes\n\
327 noerror continue after read errors\n\
328 sync pad every input block with NULs to ibs-size; when used\n\
329 with block or unblock, pad with spaces rather than NULs\n\
330 "), stdout);
331 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
333 exit (status);
336 static void
337 translate_charset (char const *new_trans)
339 int i;
341 for (i = 0; i < 256; i++)
342 trans_table[i] = new_trans[trans_table[i]];
343 translation_needed = 1;
346 /* Return the number of 1 bits in `i'. */
348 static int
349 bit_count (register int i)
351 register int set_bits;
353 for (set_bits = 0; i != 0; set_bits++)
354 i &= i - 1;
355 return set_bits;
358 static void
359 print_stats (void)
361 char buf[2][INT_BUFSIZE_BOUND (uintmax_t)];
362 fprintf (stderr, _("%s+%s records in\n"),
363 umaxtostr (r_full, buf[0]), umaxtostr (r_partial, buf[1]));
364 fprintf (stderr, _("%s+%s records out\n"),
365 umaxtostr (w_full, buf[0]), umaxtostr (w_partial, buf[1]));
366 if (r_truncate > 0)
368 fprintf (stderr, "%s %s\n",
369 umaxtostr (r_truncate, buf[0]),
370 (r_truncate == 1
371 ? _("truncated record")
372 : _("truncated records")));
376 static void
377 cleanup (void)
379 print_stats ();
380 if (close (STDIN_FILENO) < 0)
381 error (EXIT_FAILURE, errno,
382 _("closing input file %s"), quote (input_file));
383 if (close (STDOUT_FILENO) < 0)
384 error (EXIT_FAILURE, errno,
385 _("closing output file %s"), quote (output_file));
388 static inline void
389 quit (int code)
391 cleanup ();
392 exit (code);
395 static RETSIGTYPE
396 interrupt_handler (int sig)
398 #ifdef SA_NOCLDSTOP
399 struct sigaction sigact;
401 sigact.sa_handler = SIG_DFL;
402 sigemptyset (&sigact.sa_mask);
403 sigact.sa_flags = 0;
404 sigaction (sig, &sigact, NULL);
405 #else
406 signal (sig, SIG_DFL);
407 #endif
408 cleanup ();
409 raise (sig);
412 static RETSIGTYPE
413 siginfo_handler (int sig ATTRIBUTE_UNUSED)
415 print_stats ();
418 /* Encapsulate portability mess of establishing signal handlers. */
420 static void
421 install_handler (int sig_num, RETSIGTYPE (*sig_handler) (int sig))
423 #ifdef SA_NOCLDSTOP
424 struct sigaction sigact;
425 sigaction (sig_num, NULL, &sigact);
426 if (sigact.sa_handler != SIG_IGN)
428 sigact.sa_handler = sig_handler;
429 sigemptyset (&sigact.sa_mask);
430 sigact.sa_flags = 0;
431 sigaction (sig_num, &sigact, NULL);
433 #else
434 if (signal (sig_num, SIG_IGN) != SIG_IGN)
435 signal (sig_num, sig_handler);
436 #endif
439 /* Open a file to a particular file descriptor. This is like standard
440 `open', except it always returns DESIRED_FD if successful. */
441 static int
442 open_fd (int desired_fd, char const *filename, int options, mode_t mode)
444 int fd;
445 close (desired_fd);
446 fd = open (filename, options, mode);
447 if (fd < 0)
448 return -1;
450 if (fd != desired_fd)
452 if (dup2 (fd, desired_fd) != desired_fd)
453 desired_fd = -1;
454 if (close (fd) != 0)
455 return -1;
458 return desired_fd;
461 /* Write, then empty, the output buffer `obuf'. */
463 static void
464 write_output (void)
466 size_t nwritten = full_write (STDOUT_FILENO, obuf, output_blocksize);
467 if (nwritten != output_blocksize)
469 error (0, errno, _("writing to %s"), quote (output_file));
470 if (nwritten != 0)
471 w_partial++;
472 quit (1);
474 else
475 w_full++;
476 oc = 0;
479 /* Interpret one "conv=..." option.
480 As a by product, this function replaces each `,' in STR with a NUL byte. */
482 static void
483 parse_conversion (char *str)
485 char *new;
486 int i;
490 new = strchr (str, ',');
491 if (new != NULL)
492 *new++ = '\0';
493 for (i = 0; conversions[i].convname != NULL; i++)
494 if (STREQ (conversions[i].convname, str))
496 conversions_mask |= conversions[i].conversion;
497 break;
499 if (conversions[i].convname == NULL)
501 error (0, 0, _("invalid conversion: %s"), quote (str));
502 usage (EXIT_FAILURE);
504 str = new;
505 } while (new != NULL);
508 /* Return the value of STR, interpreted as a non-negative decimal integer,
509 optionally multiplied by various values.
510 Assign nonzero to *INVALID if STR does not represent a number in
511 this format. */
513 static uintmax_t
514 parse_integer (const char *str, int *invalid)
516 uintmax_t n;
517 char *suffix;
518 enum strtol_error e = xstrtoumax (str, &suffix, 10, &n, "bcEGkKMPTwYZ0");
520 if (e == LONGINT_INVALID_SUFFIX_CHAR && *suffix == 'x')
522 uintmax_t multiplier = parse_integer (suffix + 1, invalid);
524 if (multiplier != 0 && n * multiplier / multiplier != n)
526 *invalid = 1;
527 return 0;
530 n *= multiplier;
532 else if (e != LONGINT_OK)
534 *invalid = 1;
535 return 0;
538 return n;
541 static void
542 scanargs (int argc, char **argv)
544 int i;
546 --argc;
547 ++argv;
549 for (i = optind; i < argc; i++)
551 char *name, *val;
553 name = argv[i];
554 val = strchr (name, '=');
555 if (val == NULL)
557 error (0, 0, _("unrecognized option %s"), quote (name));
558 usage (EXIT_FAILURE);
560 *val++ = '\0';
562 if (STREQ (name, "if"))
563 input_file = val;
564 else if (STREQ (name, "of"))
565 output_file = val;
566 else if (STREQ (name, "conv"))
567 parse_conversion (val);
568 else
570 int invalid = 0;
571 uintmax_t n = parse_integer (val, &invalid);
573 if (STREQ (name, "ibs"))
575 /* Ensure that each blocksize is <= SSIZE_MAX. */
576 invalid |= SSIZE_MAX < n;
577 input_blocksize = n;
578 invalid |= input_blocksize != n || input_blocksize == 0;
579 conversions_mask |= C_TWOBUFS;
581 else if (STREQ (name, "obs"))
583 /* Ensure that each blocksize is <= SSIZE_MAX. */
584 invalid |= SSIZE_MAX < n;
585 output_blocksize = n;
586 invalid |= output_blocksize != n || output_blocksize == 0;
587 conversions_mask |= C_TWOBUFS;
589 else if (STREQ (name, "bs"))
591 /* Ensure that each blocksize is <= SSIZE_MAX. */
592 invalid |= SSIZE_MAX < n;
593 output_blocksize = input_blocksize = n;
594 invalid |= output_blocksize != n || output_blocksize == 0;
596 else if (STREQ (name, "cbs"))
598 conversion_blocksize = n;
599 invalid |= (conversion_blocksize != n
600 || conversion_blocksize == 0);
602 else if (STREQ (name, "skip"))
603 skip_records = n;
604 else if (STREQ (name, "seek"))
605 seek_records = n;
606 else if (STREQ (name, "count"))
607 max_records = n;
608 else
610 error (0, 0, _("unrecognized option %s=%s"),
611 quote_n (0, name), quote_n (1, val));
612 usage (EXIT_FAILURE);
615 if (invalid)
616 error (EXIT_FAILURE, 0, _("invalid number %s"), quote (val));
620 /* If bs= was given, both `input_blocksize' and `output_blocksize' will
621 have been set to positive values. If either has not been set,
622 bs= was not given, so make sure two buffers are used. */
623 if (input_blocksize == 0 || output_blocksize == 0)
624 conversions_mask |= C_TWOBUFS;
625 if (input_blocksize == 0)
626 input_blocksize = DEFAULT_BLOCKSIZE;
627 if (output_blocksize == 0)
628 output_blocksize = DEFAULT_BLOCKSIZE;
629 if (conversion_blocksize == 0)
630 conversions_mask &= ~(C_BLOCK | C_UNBLOCK);
633 /* Fix up translation table. */
635 static void
636 apply_translations (void)
638 int i;
640 #define MX(a) (bit_count (conversions_mask & (a)))
641 if ((MX (C_ASCII | C_EBCDIC | C_IBM) > 1)
642 || (MX (C_BLOCK | C_UNBLOCK) > 1)
643 || (MX (C_LCASE | C_UCASE) > 1)
644 || (MX (C_UNBLOCK | C_SYNC) > 1))
646 error (EXIT_FAILURE, 0, _("\
647 only one conv in {ascii,ebcdic,ibm}, {lcase,ucase}, {block,unblock}, {unblock,sync}"));
649 #undef MX
651 if (conversions_mask & C_ASCII)
652 translate_charset (ebcdic_to_ascii);
654 if (conversions_mask & C_UCASE)
656 for (i = 0; i < 256; i++)
657 if (ISLOWER (trans_table[i]))
658 trans_table[i] = TOUPPER (trans_table[i]);
659 translation_needed = 1;
661 else if (conversions_mask & C_LCASE)
663 for (i = 0; i < 256; i++)
664 if (ISUPPER (trans_table[i]))
665 trans_table[i] = TOLOWER (trans_table[i]);
666 translation_needed = 1;
669 if (conversions_mask & C_EBCDIC)
671 translate_charset (ascii_to_ebcdic);
672 newline_character = ascii_to_ebcdic['\n'];
673 space_character = ascii_to_ebcdic[' '];
675 else if (conversions_mask & C_IBM)
677 translate_charset (ascii_to_ibm);
678 newline_character = ascii_to_ibm['\n'];
679 space_character = ascii_to_ibm[' '];
683 /* Apply the character-set translations specified by the user
684 to the NREAD bytes in BUF. */
686 static void
687 translate_buffer (char *buf, size_t nread)
689 char *cp;
690 size_t i;
692 for (i = nread, cp = buf; i; i--, cp++)
693 *cp = trans_table[(unsigned char) *cp];
696 /* If nonnzero, the last char from the previous call to `swab_buffer'
697 is saved in `saved_char'. */
698 static int char_is_saved = 0;
700 /* Odd char from previous call. */
701 static char saved_char;
703 /* Swap NREAD bytes in BUF, plus possibly an initial char from the
704 previous call. If NREAD is odd, save the last char for the
705 next call. Return the new start of the BUF buffer. */
707 static char *
708 swab_buffer (char *buf, size_t *nread)
710 char *bufstart = buf;
711 register char *cp;
712 register int i;
714 /* Is a char left from last time? */
715 if (char_is_saved)
717 *--bufstart = saved_char;
718 (*nread)++;
719 char_is_saved = 0;
722 if (*nread & 1)
724 /* An odd number of chars are in the buffer. */
725 saved_char = bufstart[--*nread];
726 char_is_saved = 1;
729 /* Do the byte-swapping by moving every second character two
730 positions toward the end, working from the end of the buffer
731 toward the beginning. This way we only move half of the data. */
733 cp = bufstart + *nread; /* Start one char past the last. */
734 for (i = *nread / 2; i; i--, cp -= 2)
735 *cp = *(cp - 2);
737 return ++bufstart;
740 /* This is a wrapper for lseek. It detects and warns about a kernel
741 bug that makes lseek a no-op for tape devices, even though the kernel
742 lseek return value suggests that the function succeeded.
744 The parameters are the same as those of the lseek function, but
745 with the addition of FILENAME, the name of the file associated with
746 descriptor FDESC. The file name is used solely in the warning that's
747 printed when the bug is detected. Return the same value that lseek
748 would have returned, but when the lseek bug is detected, return -1
749 to indicate that lseek failed.
751 The offending behavior has been confirmed with an Exabyte SCSI tape
752 drive accessed via /dev/nst0 on both Linux-2.2.17 and Linux-2.4.16. */
754 #ifdef __linux__
756 # include <sys/mtio.h>
758 # define MT_SAME_POSITION(P, Q) \
759 ((P).mt_resid == (Q).mt_resid \
760 && (P).mt_fileno == (Q).mt_fileno \
761 && (P).mt_blkno == (Q).mt_blkno)
763 static off_t
764 skip_via_lseek (char const *filename, int fdesc, off_t offset, int whence)
766 struct mtget s1;
767 struct mtget s2;
768 off_t new_position;
769 int got_original_tape_position;
771 got_original_tape_position = (ioctl (fdesc, MTIOCGET, &s1) == 0);
772 /* known bad device type */
773 /* && s.mt_type == MT_ISSCSI2 */
775 new_position = lseek (fdesc, offset, whence);
776 if (0 <= new_position
777 && got_original_tape_position
778 && ioctl (fdesc, MTIOCGET, &s2) == 0
779 && MT_SAME_POSITION (s1, s2))
781 error (0, 0, _("warning: working around lseek kernel bug for file (%s)\n\
782 of mt_type=0x%0lx -- see <sys/mtio.h> for the list of types"),
783 filename, s2.mt_type);
784 new_position = -1;
787 return new_position;
789 #else
790 # define skip_via_lseek(Filename, Fd, Offset, Whence) lseek (Fd, Offset, Whence)
791 #endif
793 /* Throw away RECORDS blocks of BLOCKSIZE bytes on file descriptor FDESC,
794 which is open with read permission for FILE. Store up to BLOCKSIZE
795 bytes of the data at a time in BUF, if necessary. RECORDS must be
796 nonzero. */
798 static void
799 skip (int fdesc, char const *file, uintmax_t records, size_t blocksize,
800 char *buf)
802 off_t offset = records * blocksize;
804 /* Try lseek and if an error indicates it was an inappropriate operation --
805 or if the the file offset is not representable as an off_t --
806 fall back on using read. */
808 if ((uintmax_t) offset / blocksize != records
809 || skip_via_lseek (file, fdesc, offset, SEEK_CUR) < 0)
811 while (records--)
813 size_t nread = safe_read (fdesc, buf, blocksize);
814 if (nread == SAFE_READ_ERROR)
816 error (0, errno, _("reading %s"), quote (file));
817 quit (1);
819 /* POSIX doesn't say what to do when dd detects it has been
820 asked to skip past EOF, so I assume it's non-fatal.
821 FIXME: maybe give a warning. */
822 if (nread == 0)
823 break;
828 /* Copy NREAD bytes of BUF, with no conversions. */
830 static void
831 copy_simple (char const *buf, int nread)
833 int nfree; /* Number of unused bytes in `obuf'. */
834 const char *start = buf; /* First uncopied char in BUF. */
838 nfree = output_blocksize - oc;
839 if (nfree > nread)
840 nfree = nread;
842 memcpy (obuf + oc, start, nfree);
844 nread -= nfree; /* Update the number of bytes left to copy. */
845 start += nfree;
846 oc += nfree;
847 if (oc >= output_blocksize)
848 write_output ();
850 while (nread > 0);
853 /* Copy NREAD bytes of BUF, doing conv=block
854 (pad newline-terminated records to `conversion_blocksize',
855 replacing the newline with trailing spaces). */
857 static void
858 copy_with_block (char const *buf, size_t nread)
860 size_t i;
862 for (i = nread; i; i--, buf++)
864 if (*buf == newline_character)
866 if (col < conversion_blocksize)
868 size_t j;
869 for (j = col; j < conversion_blocksize; j++)
870 output_char (space_character);
872 col = 0;
874 else
876 if (col == conversion_blocksize)
877 r_truncate++;
878 else if (col < conversion_blocksize)
879 output_char (*buf);
880 col++;
885 /* Copy NREAD bytes of BUF, doing conv=unblock
886 (replace trailing spaces in `conversion_blocksize'-sized records
887 with a newline). */
889 static void
890 copy_with_unblock (char const *buf, size_t nread)
892 size_t i;
893 char c;
894 static int pending_spaces = 0;
896 for (i = 0; i < nread; i++)
898 c = buf[i];
900 if (col++ >= conversion_blocksize)
902 col = pending_spaces = 0; /* Wipe out any pending spaces. */
903 i--; /* Push the char back; get it later. */
904 output_char (newline_character);
906 else if (c == space_character)
907 pending_spaces++;
908 else
910 /* `c' is the character after a run of spaces that were not
911 at the end of the conversion buffer. Output them. */
912 while (pending_spaces)
914 output_char (space_character);
915 --pending_spaces;
917 output_char (c);
922 /* The main loop. */
924 static int
925 dd_copy (void)
927 char *ibuf, *bufstart; /* Input buffer. */
928 char *real_buf; /* real buffer address before alignment */
929 char *real_obuf;
930 size_t nread; /* Bytes read in the current block. */
931 int exit_status = 0;
932 size_t page_size = getpagesize ();
933 size_t n_bytes_read;
935 /* Leave at least one extra byte at the beginning and end of `ibuf'
936 for conv=swab, but keep the buffer address even. But some peculiar
937 device drivers work only with word-aligned buffers, so leave an
938 extra two bytes. */
940 /* Some devices require alignment on a sector or page boundary
941 (e.g. character disk devices). Align the input buffer to a
942 page boundary to cover all bases. Note that due to the swab
943 algorithm, we must have at least one byte in the page before
944 the input buffer; thus we allocate 2 pages of slop in the
945 real buffer. 8k above the blocksize shouldn't bother anyone.
947 The page alignment is necessary on any linux system that supports
948 either the SGI raw I/O patch or Steven Tweedies raw I/O patch.
949 It is necessary when accessing raw (i.e. character special) disk
950 devices on Unixware or other SVR4-derived system. */
952 real_buf = xmalloc (input_blocksize
953 + 2 * SWAB_ALIGN_OFFSET
954 + 2 * page_size - 1);
955 ibuf = real_buf;
956 ibuf += SWAB_ALIGN_OFFSET; /* allow space for swab */
958 ibuf = PTR_ALIGN (ibuf, page_size);
960 if (conversions_mask & C_TWOBUFS)
962 /* Page-align the output buffer, too. */
963 real_obuf = xmalloc (output_blocksize + page_size - 1);
964 obuf = PTR_ALIGN (real_obuf, page_size);
966 else
968 real_obuf = NULL;
969 obuf = ibuf;
972 if (skip_records != 0)
973 skip (STDIN_FILENO, input_file, skip_records, input_blocksize, ibuf);
975 if (seek_records != 0)
977 /* FIXME: this loses for
978 % ./dd if=dd seek=1 |:
979 ./dd: standard output: Bad file descriptor
980 0+0 records in
981 0+0 records out
984 skip (STDOUT_FILENO, output_file, seek_records, output_blocksize, obuf);
987 if (max_records == 0)
988 quit (exit_status);
990 while (1)
992 if (r_partial + r_full >= max_records)
993 break;
995 /* Zero the buffer before reading, so that if we get a read error,
996 whatever data we are able to read is followed by zeros.
997 This minimizes data loss. */
998 if ((conversions_mask & C_SYNC) && (conversions_mask & C_NOERROR))
999 memset (ibuf,
1000 (conversions_mask & (C_BLOCK | C_UNBLOCK)) ? ' ' : '\0',
1001 input_blocksize);
1003 nread = safe_read (STDIN_FILENO, ibuf, input_blocksize);
1005 if (nread == 0)
1006 break; /* EOF. */
1008 if (nread == SAFE_READ_ERROR)
1010 error (0, errno, _("reading %s"), quote (input_file));
1011 if (conversions_mask & C_NOERROR)
1013 print_stats ();
1014 /* Seek past the bad block if possible. */
1015 lseek (STDIN_FILENO, (off_t) input_blocksize, SEEK_CUR);
1016 if (conversions_mask & C_SYNC)
1017 /* Replace the missing input with null bytes and
1018 proceed normally. */
1019 nread = 0;
1020 else
1021 continue;
1023 else
1025 /* Write any partial block. */
1026 exit_status = 2;
1027 break;
1031 n_bytes_read = nread;
1033 if (n_bytes_read < input_blocksize)
1035 r_partial++;
1036 if (conversions_mask & C_SYNC)
1038 if (!(conversions_mask & C_NOERROR))
1039 /* If C_NOERROR, we zeroed the block before reading. */
1040 memset (ibuf + n_bytes_read,
1041 (conversions_mask & (C_BLOCK | C_UNBLOCK)) ? ' ' : '\0',
1042 input_blocksize - n_bytes_read);
1043 n_bytes_read = input_blocksize;
1046 else
1047 r_full++;
1049 if (ibuf == obuf) /* If not C_TWOBUFS. */
1051 size_t nwritten = full_write (STDOUT_FILENO, obuf, n_bytes_read);
1052 if (nwritten != n_bytes_read)
1054 error (0, errno, _("writing %s"), quote (output_file));
1055 quit (1);
1057 else if (n_bytes_read == input_blocksize)
1058 w_full++;
1059 else
1060 w_partial++;
1061 continue;
1064 /* Do any translations on the whole buffer at once. */
1066 if (translation_needed)
1067 translate_buffer (ibuf, n_bytes_read);
1069 if (conversions_mask & C_SWAB)
1070 bufstart = swab_buffer (ibuf, &n_bytes_read);
1071 else
1072 bufstart = ibuf;
1074 if (conversions_mask & C_BLOCK)
1075 copy_with_block (bufstart, n_bytes_read);
1076 else if (conversions_mask & C_UNBLOCK)
1077 copy_with_unblock (bufstart, n_bytes_read);
1078 else
1079 copy_simple (bufstart, n_bytes_read);
1082 /* If we have a char left as a result of conv=swab, output it. */
1083 if (char_is_saved)
1085 if (conversions_mask & C_BLOCK)
1086 copy_with_block (&saved_char, 1);
1087 else if (conversions_mask & C_UNBLOCK)
1088 copy_with_unblock (&saved_char, 1);
1089 else
1090 output_char (saved_char);
1093 if ((conversions_mask & C_BLOCK) && col > 0)
1095 /* If the final input line didn't end with a '\n', pad
1096 the output block to `conversion_blocksize' chars. */
1097 size_t i;
1098 for (i = col; i < conversion_blocksize; i++)
1099 output_char (space_character);
1102 if ((conversions_mask & C_UNBLOCK) && col == conversion_blocksize)
1103 /* Add a final '\n' if there are exactly `conversion_blocksize'
1104 characters in the final record. */
1105 output_char (newline_character);
1107 /* Write out the last block. */
1108 if (oc != 0)
1110 size_t nwritten = full_write (STDOUT_FILENO, obuf, oc);
1111 if (nwritten != 0)
1112 w_partial++;
1113 if (nwritten != oc)
1115 error (0, errno, _("writing %s"), quote (output_file));
1116 quit (1);
1120 free (real_buf);
1121 if (real_obuf)
1122 free (real_obuf);
1124 return exit_status;
1127 /* This is gross, but necessary, because of the way close_stdout
1128 works and because this program closes STDOUT_FILENO directly. */
1129 static void (*closeout_func) (void) = close_stdout;
1131 static void
1132 close_stdout_wrapper (void)
1134 if (closeout_func)
1135 (*closeout_func) ();
1139 main (int argc, char **argv)
1141 int i;
1142 int exit_status;
1144 program_name = argv[0];
1145 setlocale (LC_ALL, "");
1146 bindtextdomain (PACKAGE, LOCALEDIR);
1147 textdomain (PACKAGE);
1149 /* Arrange to close stdout if parse_long_options exits. */
1150 atexit (close_stdout_wrapper);
1152 parse_long_options (argc, argv, PROGRAM_NAME, PACKAGE, VERSION,
1153 AUTHORS, usage);
1155 /* Don't close stdout on exit from here on. */
1156 closeout_func = NULL;
1158 /* Initialize translation table to identity translation. */
1159 for (i = 0; i < 256; i++)
1160 trans_table[i] = i;
1162 /* Decode arguments. */
1163 scanargs (argc, argv);
1165 apply_translations ();
1167 if (input_file != NULL)
1169 if (open_fd (STDIN_FILENO, input_file, O_RDONLY, 0) < 0)
1170 error (EXIT_FAILURE, errno, _("opening %s"), quote (input_file));
1172 else
1173 input_file = _("standard input");
1175 if (output_file != NULL)
1177 mode_t perms = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
1178 int opts
1179 = (O_CREAT
1180 | (seek_records || (conversions_mask & C_NOTRUNC) ? 0 : O_TRUNC));
1182 /* Open the output file with *read* access only if we might
1183 need to read to satisfy a `seek=' request. If we can't read
1184 the file, go ahead with write-only access; it might work. */
1185 if ((! seek_records
1186 || open_fd (STDOUT_FILENO, output_file, O_RDWR | opts, perms) < 0)
1187 && open_fd (STDOUT_FILENO, output_file, O_WRONLY | opts, perms) < 0)
1188 error (EXIT_FAILURE, errno, _("opening %s"), quote (output_file));
1190 #if HAVE_FTRUNCATE
1191 if (seek_records != 0 && !(conversions_mask & C_NOTRUNC))
1193 struct stat stdout_stat;
1194 off_t o = seek_records * output_blocksize;
1195 if ((uintmax_t) o / output_blocksize != seek_records)
1196 error (EXIT_FAILURE, 0, _("file offset out of range"));
1198 if (fstat (STDOUT_FILENO, &stdout_stat) != 0)
1199 error (EXIT_FAILURE, errno, _("cannot fstat %s"),
1200 quote (output_file));
1202 /* Complain only when ftruncate fails on a regular file, a
1203 directory, or a shared memory object, as the 2000-08
1204 POSIX draft specifies ftruncate's behavior only for these
1205 file types. For example, do not complain when Linux 2.4
1206 ftruncate fails on /dev/fd0. */
1207 if (ftruncate (STDOUT_FILENO, o) != 0
1208 && (S_ISREG (stdout_stat.st_mode)
1209 || S_ISDIR (stdout_stat.st_mode)
1210 || S_TYPEISSHM (&stdout_stat)))
1212 char buf[INT_BUFSIZE_BOUND (off_t)];
1213 error (EXIT_FAILURE, errno,
1214 _("advancing past %s bytes in output file %s"),
1215 offtostr (o, buf), quote (output_file));
1218 #endif
1220 else
1222 output_file = _("standard output");
1225 install_handler (SIGINT, interrupt_handler);
1226 install_handler (SIGQUIT, interrupt_handler);
1227 install_handler (SIGPIPE, interrupt_handler);
1228 install_handler (SIGINFO, siginfo_handler);
1230 exit_status = dd_copy ();
1232 quit (exit_status);