2 * Copyright (c) 2007 Kai Wang
3 * Copyright (c) 2007 Tim Kientzle
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer
11 * in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "archive_platform.h"
29 __FBSDID("$FreeBSD: src/lib/libarchive/archive_read_support_format_ar.c,v 1.6 2007/05/29 01:00:19 kientzle Exp $");
31 #ifdef HAVE_SYS_STAT_H
45 #include "archive_entry.h"
46 #include "archive_private.h"
47 #include "archive_read_private.h"
50 off_t entry_bytes_remaining
;
58 * Define structure of the "ar" header.
60 #define AR_name_offset 0
61 #define AR_name_size 16
62 #define AR_date_offset 16
63 #define AR_date_size 12
64 #define AR_uid_offset 28
66 #define AR_gid_offset 34
68 #define AR_mode_offset 40
69 #define AR_mode_size 8
70 #define AR_size_offset 48
71 #define AR_size_size 10
72 #define AR_fmag_offset 58
73 #define AR_fmag_size 2
75 #define isdigit(x) (x) >= '0' && (x) <= '9'
77 static int archive_read_format_ar_bid(struct archive_read
*a
);
78 static int archive_read_format_ar_cleanup(struct archive_read
*a
);
79 static int archive_read_format_ar_read_data(struct archive_read
*a
,
80 const void **buff
, size_t *size
, off_t
*offset
);
81 static int archive_read_format_ar_skip(struct archive_read
*a
);
82 static int archive_read_format_ar_read_header(struct archive_read
*a
,
83 struct archive_entry
*e
);
84 static uint64_t ar_atol8(const char *p
, unsigned char_cnt
);
85 static uint64_t ar_atol10(const char *p
, unsigned char_cnt
);
86 static int ar_parse_gnu_filename_table(struct archive_read
*, struct ar
*,
87 const void *, size_t);
88 static int ar_parse_common_header(struct ar
*ar
, struct archive_entry
*,
92 archive_read_support_format_ar(struct archive
*_a
)
94 struct archive_read
*a
= (struct archive_read
*)_a
;
98 ar
= (struct ar
*)malloc(sizeof(*ar
));
100 archive_set_error(&a
->archive
, ENOMEM
,
101 "Can't allocate ar data");
102 return (ARCHIVE_FATAL
);
104 memset(ar
, 0, sizeof(*ar
));
107 r
= __archive_read_register_format(a
,
109 archive_read_format_ar_bid
,
110 archive_read_format_ar_read_header
,
111 archive_read_format_ar_read_data
,
112 archive_read_format_ar_skip
,
113 archive_read_format_ar_cleanup
);
115 if (r
!= ARCHIVE_OK
) {
123 archive_read_format_ar_cleanup(struct archive_read
*a
)
127 ar
= (struct ar
*)(a
->format
->data
);
131 (a
->format
->data
) = NULL
;
136 archive_read_format_ar_bid(struct archive_read
*a
)
142 if (a
->archive
.archive_format
!= 0 &&
143 (a
->archive
.archive_format
& ARCHIVE_FORMAT_BASE_MASK
) !=
147 ar
= (struct ar
*)(a
->format
->data
);
150 * Verify the 8-byte file signature.
151 * TODO: Do we need to check more than this?
153 bytes_read
= (a
->decompressor
->read_ahead
)(a
, &h
, 8);
156 if (strncmp((const char*)h
, "!<arch>\n", 8) == 0) {
163 archive_read_format_ar_read_header(struct archive_read
*a
,
164 struct archive_entry
*entry
)
166 char filename
[AR_name_size
+ 1];
168 uint64_t number
; /* Used to hold parsed numbers before validation. */
170 size_t bsd_name_length
, entry_size
;
176 ar
= (struct ar
*)(a
->format
->data
);
178 if (a
->archive
.file_position
== 0) {
180 * We are now at the beginning of the archive,
181 * so we need first consume the ar global header.
183 (a
->decompressor
->consume
)(a
, 8);
184 /* Set a default format code for now. */
185 a
->archive
.archive_format
= ARCHIVE_FORMAT_AR
;
188 /* Read the header for the next file entry. */
189 bytes_read
= (a
->decompressor
->read_ahead
)(a
, &b
, 60);
190 if (bytes_read
< 60) {
192 return (ARCHIVE_EOF
);
194 (a
->decompressor
->consume
)(a
, 60);
197 /* Verify the magic signature on the file header. */
198 if (strncmp(h
+ AR_fmag_offset
, "`\n", 2) != 0) {
199 archive_set_error(&a
->archive
, EINVAL
,
200 "Consistency check failed");
201 return (ARCHIVE_WARN
);
204 /* Copy filename into work buffer. */
205 strncpy(filename
, h
+ AR_name_offset
, AR_name_size
);
206 filename
[AR_name_size
] = '\0';
209 * Guess the format variant based on the filename.
211 if (a
->archive
.archive_format
== ARCHIVE_FORMAT_AR
) {
212 /* We don't already know the variant, so let's guess. */
214 * Biggest clue is presence of '/': GNU starts special
215 * filenames with '/', appends '/' as terminator to
216 * non-special names, so anything with '/' should be
217 * GNU except for BSD long filenames.
219 if (strncmp(filename
, "#1/", 3) == 0)
220 a
->archive
.archive_format
= ARCHIVE_FORMAT_AR_BSD
;
221 else if (strchr(filename
, '/') != NULL
)
222 a
->archive
.archive_format
= ARCHIVE_FORMAT_AR_GNU
;
223 else if (strncmp(filename
, "__.SYMDEF", 9) == 0)
224 a
->archive
.archive_format
= ARCHIVE_FORMAT_AR_BSD
;
226 * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/'
227 * if name exactly fills 16-byte field? If so, we
228 * can't assume entries without '/' are BSD. XXX
232 /* Update format name from the code. */
233 if (a
->archive
.archive_format
== ARCHIVE_FORMAT_AR_GNU
)
234 a
->archive
.archive_format_name
= "ar (GNU/SVR4)";
235 else if (a
->archive
.archive_format
== ARCHIVE_FORMAT_AR_BSD
)
236 a
->archive
.archive_format_name
= "ar (BSD)";
238 a
->archive
.archive_format_name
= "ar";
241 * Remove trailing spaces from the filename. GNU and BSD
242 * variants both pad filename area out with spaces.
243 * This will only be wrong if GNU/SVR4 'ar' implementations
244 * omit trailing '/' for 16-char filenames and we have
245 * a 16-char filename that ends in ' '.
247 p
= filename
+ AR_name_size
- 1;
248 while (p
>= filename
&& *p
== ' ') {
254 * Remove trailing slash unless first character is '/'.
255 * (BSD entries never end in '/', so this will only trim
256 * GNU-format entries. GNU special entries start with '/'
257 * and are not terminated in '/', so we don't trim anything
258 * that starts with '/'.)
260 if (filename
[0] != '/' && *p
== '/')
264 * '//' is the GNU filename table.
265 * Later entries can refer to names in this table.
267 if (strcmp(filename
, "//") == 0) {
268 /* This must come before any call to _read_ahead. */
269 ar_parse_common_header(ar
, entry
, h
);
270 archive_entry_copy_pathname(entry
, filename
);
271 archive_entry_set_mode(entry
,
272 S_IFREG
| (archive_entry_mode(entry
) & 0777));
273 /* Get the size of the filename table. */
274 number
= ar_atol10(h
+ AR_size_offset
, AR_size_size
);
275 if (number
> SIZE_MAX
) {
276 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
277 "Filename table too large");
278 return (ARCHIVE_FATAL
);
280 entry_size
= (size_t)number
;
281 /* Read the filename table into memory. */
282 bytes_read
= (a
->decompressor
->read_ahead
)(a
, &b
, entry_size
);
284 return (ARCHIVE_FATAL
);
285 if ((size_t)bytes_read
< entry_size
) {
286 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
287 "Truncated input file");
288 return (ARCHIVE_FATAL
);
291 * Don't consume the contents, so the client will
292 * also get a shot at reading it.
295 /* Parse the filename table. */
296 return (ar_parse_gnu_filename_table(a
, ar
, b
, entry_size
));
300 * GNU variant handles long filenames by storing /<number>
301 * to indicate a name stored in the filename table.
303 if (filename
[0] == '/' && isdigit(filename
[1])) {
304 number
= ar_atol10(h
+ AR_name_offset
+ 1, AR_name_size
- 1);
306 * If we can't look up the real name, warn and return
307 * the entry with the wrong name.
309 if (ar
->strtab
== NULL
|| number
> ar
->strtab_size
) {
310 archive_set_error(&a
->archive
, EINVAL
,
311 "Can't find long filename for entry");
312 archive_entry_copy_pathname(entry
, filename
);
313 /* Parse the time, owner, mode, size fields. */
314 ar_parse_common_header(ar
, entry
, h
);
315 return (ARCHIVE_WARN
);
318 archive_entry_copy_pathname(entry
, &ar
->strtab
[(size_t)number
]);
319 /* Parse the time, owner, mode, size fields. */
320 return (ar_parse_common_header(ar
, entry
, h
));
324 * BSD handles long filenames by storing "#1/" followed by the
325 * length of filename as a decimal number, then prepends the
326 * the filename to the file contents.
328 if (strncmp(filename
, "#1/", 3) == 0) {
329 /* Parse the time, owner, mode, size fields. */
330 /* This must occur before _read_ahead is called again. */
331 ar_parse_common_header(ar
, entry
, h
);
333 /* Parse the size of the name, adjust the file size. */
334 number
= ar_atol10(h
+ AR_name_offset
+ 3, AR_name_size
- 3);
335 if ((off_t
)number
> ar
->entry_bytes_remaining
) {
336 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
337 "Bad input file size");
338 return (ARCHIVE_FATAL
);
340 bsd_name_length
= (size_t)number
;
341 ar
->entry_bytes_remaining
-= bsd_name_length
;
342 /* Adjust file size reported to client. */
343 archive_entry_set_size(entry
, ar
->entry_bytes_remaining
);
345 /* Read the long name into memory. */
346 bytes_read
= (a
->decompressor
->read_ahead
)(a
, &b
, bsd_name_length
);
348 return (ARCHIVE_FATAL
);
349 if ((size_t)bytes_read
< bsd_name_length
) {
350 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
351 "Truncated input file");
352 return (ARCHIVE_FATAL
);
354 (a
->decompressor
->consume
)(a
, bsd_name_length
);
356 /* Store it in the entry. */
357 p
= (char *)malloc(bsd_name_length
+ 1);
359 archive_set_error(&a
->archive
, ENOMEM
,
360 "Can't allocate fname buffer");
361 return (ARCHIVE_FATAL
);
363 strncpy(p
, b
, bsd_name_length
);
364 p
[bsd_name_length
] = '\0';
365 archive_entry_copy_pathname(entry
, p
);
371 * "/" is the SVR4/GNU archive symbol table.
373 if (strcmp(filename
, "/") == 0) {
374 archive_entry_copy_pathname(entry
, "/");
375 /* Parse the time, owner, mode, size fields. */
376 r
= ar_parse_common_header(ar
, entry
, h
);
377 /* Force the file type to a regular file. */
378 archive_entry_set_mode(entry
,
379 S_IFREG
| (archive_entry_mode(entry
) & 0777));
384 * "__.SYMDEF" is a BSD archive symbol table.
386 if (strcmp(filename
, "__.SYMDEF") == 0) {
387 archive_entry_copy_pathname(entry
, filename
);
388 /* Parse the time, owner, mode, size fields. */
389 return (ar_parse_common_header(ar
, entry
, h
));
393 * Otherwise, this is a standard entry. The filename
394 * has already been trimmed as much as possible, based
395 * on our current knowledge of the format.
397 archive_entry_copy_pathname(entry
, filename
);
398 return (ar_parse_common_header(ar
, entry
, h
));
402 ar_parse_common_header(struct ar
*ar
, struct archive_entry
*entry
,
407 /* Copy remaining header */
408 archive_entry_set_mtime(entry
,
409 (time_t)ar_atol10(h
+ AR_date_offset
, AR_date_size
), 0L);
410 archive_entry_set_uid(entry
,
411 (uid_t
)ar_atol10(h
+ AR_uid_offset
, AR_uid_size
));
412 archive_entry_set_gid(entry
,
413 (gid_t
)ar_atol10(h
+ AR_gid_offset
, AR_gid_size
));
414 archive_entry_set_mode(entry
,
415 (mode_t
)ar_atol8(h
+ AR_mode_offset
, AR_mode_size
));
416 n
= ar_atol10(h
+ AR_size_offset
, AR_size_size
);
418 ar
->entry_offset
= 0;
419 ar
->entry_padding
= n
% 2;
420 archive_entry_set_size(entry
, n
);
421 ar
->entry_bytes_remaining
= n
;
426 archive_read_format_ar_read_data(struct archive_read
*a
,
427 const void **buff
, size_t *size
, off_t
*offset
)
432 ar
= (struct ar
*)(a
->format
->data
);
434 if (ar
->entry_bytes_remaining
> 0) {
435 bytes_read
= (a
->decompressor
->read_ahead
)(a
, buff
, 1);
436 if (bytes_read
== 0) {
437 archive_set_error(&a
->archive
, ARCHIVE_ERRNO_MISC
,
438 "Truncated ar archive");
439 return (ARCHIVE_FATAL
);
442 return (ARCHIVE_FATAL
);
443 if (bytes_read
> ar
->entry_bytes_remaining
)
444 bytes_read
= (ssize_t
)ar
->entry_bytes_remaining
;
446 *offset
= ar
->entry_offset
;
447 ar
->entry_offset
+= bytes_read
;
448 ar
->entry_bytes_remaining
-= bytes_read
;
449 (a
->decompressor
->consume
)(a
, (size_t)bytes_read
);
452 while (ar
->entry_padding
> 0) {
453 bytes_read
= (a
->decompressor
->read_ahead
)(a
, buff
, 1);
455 return (ARCHIVE_FATAL
);
456 if (bytes_read
> ar
->entry_padding
)
457 bytes_read
= (ssize_t
)ar
->entry_padding
;
458 (a
->decompressor
->consume
)(a
, (size_t)bytes_read
);
459 ar
->entry_padding
-= bytes_read
;
463 *offset
= ar
->entry_offset
;
464 return (ARCHIVE_EOF
);
469 archive_read_format_ar_skip(struct archive_read
*a
)
474 const void *b
; /* Dummy variables */
478 ar
= (struct ar
*)(a
->format
->data
);
479 if (a
->decompressor
->skip
== NULL
) {
480 while (r
== ARCHIVE_OK
)
481 r
= archive_read_format_ar_read_data(a
, &b
, &s
, &o
);
485 bytes_skipped
= (a
->decompressor
->skip
)(a
, ar
->entry_bytes_remaining
+
487 if (bytes_skipped
< 0)
488 return (ARCHIVE_FATAL
);
490 ar
->entry_bytes_remaining
= 0;
491 ar
->entry_padding
= 0;
497 ar_parse_gnu_filename_table(struct archive_read
*a
, struct ar
*ar
,
498 const void *h
, size_t size
)
502 if (ar
->strtab
!= NULL
) {
503 archive_set_error(&a
->archive
, EINVAL
,
504 "More than one string tables exist");
505 return (ARCHIVE_WARN
);
509 archive_set_error(&a
->archive
, EINVAL
, "Invalid string table");
510 return (ARCHIVE_WARN
);
513 ar
->strtab_size
= size
;
514 ar
->strtab
= malloc(size
);
515 if (ar
->strtab
== NULL
) {
516 archive_set_error(&a
->archive
, ENOMEM
,
517 "Can't allocate string table buffer");
518 return (ARCHIVE_FATAL
);
521 (void)memcpy(ar
->strtab
, h
, size
);
522 for (p
= ar
->strtab
; p
< ar
->strtab
+ size
- 1; ++p
) {
526 goto bad_string_table
;
531 * Sanity check, last two chars must be `/\n' or '\n\n',
532 * depending on whether the string table is padded by a '\n'
533 * (string table produced by GNU ar always has a even size).
535 if (p
!= ar
->strtab
+ size
&& *p
!= '\n')
536 goto bad_string_table
;
538 /* Enforce zero termination. */
539 ar
->strtab
[size
- 1] = '\0';
544 archive_set_error(&a
->archive
, EINVAL
,
545 "Invalid string table");
548 return (ARCHIVE_WARN
);
552 ar_atol8(const char *p
, unsigned char_cnt
)
554 uint64_t l
, limit
, last_digit_limit
;
555 unsigned int digit
, base
;
558 limit
= UINT64_MAX
/ base
;
559 last_digit_limit
= UINT64_MAX
% base
;
561 while ((*p
== ' ' || *p
== '\t') && char_cnt
-- > 0)
566 while (*p
>= '0' && digit
< base
&& char_cnt
-- > 0) {
567 if (l
>limit
|| (l
== limit
&& digit
> last_digit_limit
)) {
568 l
= UINT64_MAX
; /* Truncate on overflow. */
571 l
= (l
* base
) + digit
;
578 ar_atol10(const char *p
, unsigned char_cnt
)
580 uint64_t l
, limit
, last_digit_limit
;
581 unsigned int base
, digit
;
584 limit
= UINT64_MAX
/ base
;
585 last_digit_limit
= UINT64_MAX
% base
;
587 while ((*p
== ' ' || *p
== '\t') && char_cnt
-- > 0)
591 while (*p
>= '0' && digit
< base
&& char_cnt
-- > 0) {
592 if (l
> limit
|| (l
== limit
&& digit
> last_digit_limit
)) {
593 l
= UINT64_MAX
; /* Truncate on overflow. */
596 l
= (l
* base
) + digit
;