1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
28 #include "data/val-type.h"
29 #include "libpspp/cast.h"
30 #include "libpspp/compiler.h"
31 #include "libpspp/float-format.h"
32 #include "libpspp/integer-format.h"
33 #include "libpspp/misc.h"
36 #include "gl/minmax.h"
37 #include "gl/progname.h"
38 #include "gl/version-etc.h"
39 #include "gl/xalloc.h"
52 const char *file_name
;
55 int n_variable_records
, n_variables
;
58 size_t n_var_widths
, allocated_var_widths
;
60 enum integer_format integer_format
;
61 enum float_format float_format
;
63 enum compression compression
;
67 static void read_header (struct sfm_reader
*);
68 static void read_variable_record (struct sfm_reader
*);
69 static void read_value_label_record (struct sfm_reader
*);
70 static void read_document_record (struct sfm_reader
*);
71 static void read_extension_record (struct sfm_reader
*);
72 static void read_machine_integer_info (struct sfm_reader
*,
73 size_t size
, size_t count
);
74 static void read_machine_float_info (struct sfm_reader
*,
75 size_t size
, size_t count
);
76 static void read_extra_product_info (struct sfm_reader
*,
77 size_t size
, size_t count
);
78 static void read_mrsets (struct sfm_reader
*, size_t size
, size_t count
);
79 static void read_display_parameters (struct sfm_reader
*,
80 size_t size
, size_t count
);
81 static void read_long_var_name_map (struct sfm_reader
*r
,
82 size_t size
, size_t count
);
83 static void read_long_string_map (struct sfm_reader
*r
,
84 size_t size
, size_t count
);
85 static void read_datafile_attributes (struct sfm_reader
*r
,
86 size_t size
, size_t count
);
87 static void read_variable_attributes (struct sfm_reader
*r
,
88 size_t size
, size_t count
);
89 static void read_ncases64 (struct sfm_reader
*, size_t size
, size_t count
);
90 static void read_character_encoding (struct sfm_reader
*r
,
91 size_t size
, size_t count
);
92 static void read_long_string_value_labels (struct sfm_reader
*r
,
93 size_t size
, size_t count
);
94 static void read_long_string_missing_values (struct sfm_reader
*r
,
95 size_t size
, size_t count
);
96 static void read_unknown_extension (struct sfm_reader
*,
97 size_t size
, size_t count
);
98 static void read_simple_compressed_data (struct sfm_reader
*, int max_cases
);
99 static void read_zlib_compressed_data (struct sfm_reader
*);
101 static struct text_record
*open_text_record (
102 struct sfm_reader
*, size_t size
);
103 static void close_text_record (struct text_record
*);
104 static bool read_variable_to_value_pair (struct text_record
*,
105 char **key
, char **value
);
106 static char *text_tokenize (struct text_record
*, int delimiter
);
107 static bool text_match (struct text_record
*text
, int c
);
108 static const char *text_parse_counted_string (struct text_record
*);
109 static size_t text_pos (const struct text_record
*);
110 static const char *text_get_all (const struct text_record
*);
112 static void usage (void);
113 static void sys_warn (struct sfm_reader
*, const char *, ...)
114 PRINTF_FORMAT (2, 3);
115 static void sys_error (struct sfm_reader
*, const char *, ...)
119 static void read_bytes (struct sfm_reader
*, void *, size_t);
120 static bool try_read_bytes (struct sfm_reader
*, void *, size_t);
121 static int read_int (struct sfm_reader
*);
122 static int64_t read_int64 (struct sfm_reader
*);
123 static double read_float (struct sfm_reader
*);
124 static void read_string (struct sfm_reader
*, char *, size_t);
125 static void skip_bytes (struct sfm_reader
*, size_t);
126 static void trim_spaces (char *);
128 static void print_string (const char *s
, size_t len
);
131 main (int argc
, char *argv
[])
137 set_program_name (argv
[0]);
141 static const struct option long_options
[] =
143 { "data", optional_argument
, NULL
, 'd' },
144 { "help", no_argument
, NULL
, 'h' },
145 { "version", no_argument
, NULL
, 'v' },
146 { NULL
, 0, NULL
, 0 },
151 c
= getopt_long (argc
, argv
, "d::hv", long_options
, NULL
);
158 max_cases
= optarg
? atoi (optarg
) : INT_MAX
;
162 version_etc (stdout
, "pspp-dump-sav", PACKAGE_NAME
, PACKAGE_VERSION
,
163 "Ben Pfaff", "John Darrington", NULL_SENTINEL
);
176 error (1, 0, "at least one non-option argument is required; "
177 "use --help for help");
179 for (i
= optind
; i
< argc
; i
++)
183 r
.file_name
= argv
[i
];
184 r
.file
= fopen (r
.file_name
, "rb");
186 error (EXIT_FAILURE
, errno
, "error opening `%s'", r
.file_name
);
187 r
.n_variable_records
= 0;
190 r
.allocated_var_widths
= 0;
192 r
.compression
= COMP_NONE
;
194 if (argc
- optind
> 1)
195 printf ("Reading \"%s\":\n", r
.file_name
);
198 while ((rec_type
= read_int (&r
)) != 999)
203 read_variable_record (&r
);
207 read_value_label_record (&r
);
211 sys_error (&r
, "Misplaced type 4 record.");
214 read_document_record (&r
);
218 read_extension_record (&r
);
222 sys_error (&r
, "Unrecognized record type %d.", rec_type
);
225 printf ("%08llx: end-of-dictionary record "
226 "(first byte of data at %08llx)\n",
227 (long long int) ftello (r
.file
),
228 (long long int) ftello (r
.file
) + 4);
230 if (r
.compression
== COMP_SIMPLE
)
233 read_simple_compressed_data (&r
, max_cases
);
235 else if (r
.compression
== COMP_ZLIB
)
236 read_zlib_compressed_data (&r
);
245 read_header (struct sfm_reader
*r
)
248 char eye_catcher
[61];
249 uint8_t raw_layout_code
[4];
252 int32_t weight_index
;
255 char creation_date
[10];
256 char creation_time
[9];
260 read_string (r
, rec_type
, sizeof rec_type
);
261 read_string (r
, eye_catcher
, sizeof eye_catcher
);
263 if (!strcmp ("$FL2", rec_type
))
265 else if (!strcmp ("$FL3", rec_type
))
268 sys_error (r
, "This is not an SPSS system file.");
270 /* Identify integer format. */
271 read_bytes (r
, raw_layout_code
, sizeof raw_layout_code
);
272 if ((!integer_identify (2, raw_layout_code
, sizeof raw_layout_code
,
274 && !integer_identify (3, raw_layout_code
, sizeof raw_layout_code
,
276 || (r
->integer_format
!= INTEGER_MSB_FIRST
277 && r
->integer_format
!= INTEGER_LSB_FIRST
))
278 sys_error (r
, "This is not an SPSS system file.");
279 layout_code
= integer_get (r
->integer_format
,
280 raw_layout_code
, sizeof raw_layout_code
);
282 read_int (r
); /* Nominal case size (not actually useful). */
283 compressed
= read_int (r
);
284 weight_index
= read_int (r
);
285 ncases
= read_int (r
);
290 r
->compression
= COMP_NONE
;
291 else if (compressed
== 1)
292 r
->compression
= COMP_SIMPLE
;
293 else if (compressed
!= 0)
294 sys_error (r
, "SAV file header has invalid compression value "
295 "%"PRId32
".", compressed
);
300 r
->compression
= COMP_ZLIB
;
302 sys_error (r
, "ZSAV file header has invalid compression value "
303 "%"PRId32
".", compressed
);
306 /* Identify floating-point format and obtain compression bias. */
307 read_bytes (r
, raw_bias
, sizeof raw_bias
);
308 if (float_identify (100.0, raw_bias
, sizeof raw_bias
, &r
->float_format
) == 0)
310 sys_warn (r
, "Compression bias is not the usual value of 100, or system "
311 "file uses unrecognized floating-point format.");
312 if (r
->integer_format
== INTEGER_MSB_FIRST
)
313 r
->float_format
= FLOAT_IEEE_DOUBLE_BE
;
315 r
->float_format
= FLOAT_IEEE_DOUBLE_LE
;
317 r
->bias
= float_get_double (r
->float_format
, raw_bias
);
319 read_string (r
, creation_date
, sizeof creation_date
);
320 read_string (r
, creation_time
, sizeof creation_time
);
321 read_string (r
, file_label
, sizeof file_label
);
322 trim_spaces (file_label
);
325 printf ("File header record:\n");
326 printf ("\t%17s: %s\n", "Product name", eye_catcher
);
327 printf ("\t%17s: %"PRId32
"\n", "Layout code", layout_code
);
328 printf ("\t%17s: %"PRId32
" (%s)\n", "Compressed",
330 r
->compression
== COMP_NONE
? "no compression"
331 : r
->compression
== COMP_SIMPLE
? "simple compression"
332 : r
->compression
== COMP_ZLIB
? "ZLIB compression"
334 printf ("\t%17s: %"PRId32
"\n", "Weight index", weight_index
);
335 printf ("\t%17s: %"PRId32
"\n", "Number of cases", ncases
);
336 printf ("\t%17s: %.*g\n", "Compression bias", DBL_DIG
+ 1, r
->bias
);
337 printf ("\t%17s: %s\n", "Creation date", creation_date
);
338 printf ("\t%17s: %s\n", "Creation time", creation_time
);
339 printf ("\t%17s: \"%s\"\n", "File label", file_label
);
343 format_name (int format
)
345 switch ((format
>> 16) & 0xff)
348 case 2: return "AHEX";
349 case 3: return "COMMA";
350 case 4: return "DOLLAR";
353 case 7: return "PIBHEX";
355 case 9: return "PIB";
356 case 10: return "PK";
357 case 11: return "RB";
358 case 12: return "RBHEX";
362 case 20: return "DATE";
363 case 21: return "TIME";
364 case 22: return "DATETIME";
365 case 23: return "ADATE";
366 case 24: return "JDATE";
367 case 25: return "DTIME";
368 case 26: return "WKDAY";
369 case 27: return "MONTH";
370 case 28: return "MOYR";
371 case 29: return "QYR";
372 case 30: return "WKYR";
373 case 31: return "PCT";
374 case 32: return "DOT";
375 case 33: return "CCA";
376 case 34: return "CCB";
377 case 35: return "CCC";
378 case 36: return "CCD";
379 case 37: return "CCE";
380 case 38: return "EDATE";
381 case 39: return "SDATE";
382 default: return "invalid";
386 /* Reads a variable (type 2) record from R and adds the
387 corresponding variable to DICT.
388 Also skips past additional variable records for long string
391 read_variable_record (struct sfm_reader
*r
)
394 int has_variable_label
;
395 int missing_value_code
;
400 printf ("%08llx: variable record #%d\n",
401 (long long int) ftello (r
->file
), r
->n_variable_records
++);
403 width
= read_int (r
);
404 has_variable_label
= read_int (r
);
405 missing_value_code
= read_int (r
);
406 print_format
= read_int (r
);
407 write_format
= read_int (r
);
408 read_string (r
, name
, sizeof name
);
409 name
[strcspn (name
, " ")] = '\0';
414 if (r
->n_var_widths
>= r
->allocated_var_widths
)
415 r
->var_widths
= x2nrealloc (r
->var_widths
, &r
->allocated_var_widths
,
416 sizeof *r
->var_widths
);
417 r
->var_widths
[r
->n_var_widths
++] = width
;
419 printf ("\tWidth: %d (%s)\n",
422 : width
== 0 ? "numeric"
423 : "long string continuation record");
424 printf ("\tVariable label: %d\n", has_variable_label
);
425 printf ("\tMissing values code: %d (%s)\n", missing_value_code
,
426 (missing_value_code
== 0 ? "no missing values"
427 : missing_value_code
== 1 ? "one missing value"
428 : missing_value_code
== 2 ? "two missing values"
429 : missing_value_code
== 3 ? "three missing values"
430 : missing_value_code
== -2 ? "one missing value range"
431 : missing_value_code
== -3 ? "one missing value, one range"
433 printf ("\tPrint format: %06x (%s%d.%d)\n",
434 print_format
, format_name (print_format
),
435 (print_format
>> 8) & 0xff, print_format
& 0xff);
436 printf ("\tWrite format: %06x (%s%d.%d)\n",
437 write_format
, format_name (write_format
),
438 (write_format
>> 8) & 0xff, write_format
& 0xff);
439 printf ("\tName: %s\n", name
);
441 /* Get variable label, if any. */
442 if (has_variable_label
!= 0 && has_variable_label
!= 1)
443 sys_error (r
, "Variable label indicator field is not 0 or 1.");
444 if (has_variable_label
== 1)
446 long long int offset
= ftello (r
->file
);
452 /* Read up to 255 bytes of label. */
453 label
= xmalloc (len
+ 1);
454 read_string (r
, label
, len
+ 1);
455 printf("\t%08llx Variable label: \"%s\"\n", offset
, label
);
458 /* Skip label padding up to multiple of 4 bytes. */
459 skip_bytes (r
, ROUND_UP (len
, 4) - len
);
462 /* Set missing values. */
463 if (missing_value_code
!= 0)
467 printf ("\t%08llx Missing values:", (long long int) ftello (r
->file
));
470 if (missing_value_code
< -3 || missing_value_code
> 3
471 || missing_value_code
== -1)
472 sys_error (r
, "Numeric missing value indicator field is not "
473 "-3, -2, 0, 1, 2, or 3.");
474 if (missing_value_code
< 0)
476 double low
= read_float (r
);
477 double high
= read_float (r
);
478 printf (" %.*g...%.*g", DBL_DIG
+ 1, low
, DBL_DIG
+ 1, high
);
479 missing_value_code
= -missing_value_code
- 2;
481 for (i
= 0; i
< missing_value_code
; i
++)
482 printf (" %.*g", DBL_DIG
+ 1, read_float (r
));
486 if (missing_value_code
< 1 || missing_value_code
> 3)
487 sys_error (r
, "String missing value indicator field is not "
489 for (i
= 0; i
< missing_value_code
; i
++)
492 read_string (r
, string
, sizeof string
);
493 printf (" \"%s\"", string
);
501 print_untyped_value (struct sfm_reader
*r
, char raw_value
[8])
506 value
= float_get_double (r
->float_format
, raw_value
);
507 for (n_printable
= 0; n_printable
< 8; n_printable
++)
508 if (!isprint (raw_value
[n_printable
]))
511 printf ("%.*g/\"%.*s\"", DBL_DIG
+ 1, value
, n_printable
, raw_value
);
514 /* Reads value labels from sysfile R and inserts them into the
515 associated dictionary. */
517 read_value_label_record (struct sfm_reader
*r
)
519 int label_cnt
, var_cnt
;
522 printf ("%08llx: value labels record\n", (long long int) ftello (r
->file
));
524 /* Read number of labels. */
525 label_cnt
= read_int (r
);
526 for (i
= 0; i
< label_cnt
; i
++)
529 unsigned char label_len
;
533 read_bytes (r
, raw_value
, sizeof raw_value
);
535 /* Read label length. */
536 read_bytes (r
, &label_len
, sizeof label_len
);
537 padded_len
= ROUND_UP (label_len
+ 1, 8);
539 /* Read label, padding. */
540 read_bytes (r
, label
, padded_len
- 1);
541 label
[label_len
] = 0;
544 print_untyped_value (r
, raw_value
);
545 printf (": \"%s\"\n", label
);
548 /* Now, read the type 4 record that has the list of variables
549 to which the value labels are to be applied. */
551 /* Read record type of type 4 record. */
552 if (read_int (r
) != 4)
553 sys_error (r
, "Variable index record (type 4) does not immediately "
554 "follow value label record (type 3) as it should.");
556 /* Read number of variables associated with value label from type 4
558 printf ("\t%08llx: apply to variables", (long long int) ftello (r
->file
));
559 var_cnt
= read_int (r
);
560 for (i
= 0; i
< var_cnt
; i
++)
561 printf (" #%d", read_int (r
));
566 read_document_record (struct sfm_reader
*r
)
571 printf ("%08llx: document record\n", (long long int) ftello (r
->file
));
572 n_lines
= read_int (r
);
573 printf ("\t%d lines of documents\n", n_lines
);
575 for (i
= 0; i
< n_lines
; i
++)
578 printf ("\t%08llx: ", (long long int) ftello (r
->file
));
579 read_string (r
, line
, sizeof line
);
581 printf ("line %d: \"%s\"\n", i
, line
);
586 read_extension_record (struct sfm_reader
*r
)
588 long long int offset
= ftello (r
->file
);
589 int subtype
= read_int (r
);
590 size_t size
= read_int (r
);
591 size_t count
= read_int (r
);
592 size_t bytes
= size
* count
;
594 printf ("%08llx: Record 7, subtype %d, size=%zu, count=%zu\n",
595 offset
, subtype
, size
, count
);
600 read_machine_integer_info (r
, size
, count
);
604 read_machine_float_info (r
, size
, count
);
608 /* DATE variable information. We don't use it yet, but we
614 read_mrsets (r
, size
, count
);
618 read_extra_product_info (r
, size
, count
);
622 read_display_parameters (r
, size
, count
);
626 read_long_var_name_map (r
, size
, count
);
630 read_long_string_map (r
, size
, count
);
634 read_ncases64 (r
, size
, count
);
638 read_datafile_attributes (r
, size
, count
);
642 read_variable_attributes (r
, size
, count
);
646 read_character_encoding (r
, size
, count
);
650 read_long_string_value_labels (r
, size
, count
);
654 read_long_string_missing_values (r
, size
, count
);
658 sys_warn (r
, "Unrecognized record type 7, subtype %d.", subtype
);
659 read_unknown_extension (r
, size
, count
);
663 skip_bytes (r
, bytes
);
667 read_machine_integer_info (struct sfm_reader
*r
, size_t size
, size_t count
)
669 long long int offset
= ftello (r
->file
);
670 int version_major
= read_int (r
);
671 int version_minor
= read_int (r
);
672 int version_revision
= read_int (r
);
673 int machine_code
= read_int (r
);
674 int float_representation
= read_int (r
);
675 int compression_code
= read_int (r
);
676 int integer_representation
= read_int (r
);
677 int character_code
= read_int (r
);
679 printf ("%08llx: machine integer info\n", offset
);
680 if (size
!= 4 || count
!= 8)
681 sys_error (r
, "Bad size (%zu) or count (%zu) field on record type 7, "
682 "subtype 3.", size
, count
);
684 printf ("\tVersion: %d.%d.%d\n",
685 version_major
, version_minor
, version_revision
);
686 printf ("\tMachine code: %d\n", machine_code
);
687 printf ("\tFloating point representation: %d (%s)\n",
688 float_representation
,
689 float_representation
== 1 ? "IEEE 754"
690 : float_representation
== 2 ? "IBM 370"
691 : float_representation
== 3 ? "DEC VAX"
693 printf ("\tCompression code: %d\n", compression_code
);
694 printf ("\tEndianness: %d (%s)\n", integer_representation
,
695 integer_representation
== 1 ? "big"
696 : integer_representation
== 2 ? "little" : "unknown");
697 printf ("\tCharacter code: %d\n", character_code
);
700 /* Read record type 7, subtype 4. */
702 read_machine_float_info (struct sfm_reader
*r
, size_t size
, size_t count
)
704 long long int offset
= ftello (r
->file
);
705 double sysmis
= read_float (r
);
706 double highest
= read_float (r
);
707 double lowest
= read_float (r
);
709 printf ("%08llx: machine float info\n", offset
);
710 if (size
!= 8 || count
!= 3)
711 sys_error (r
, "Bad size (%zu) or count (%zu) on extension 4.",
714 printf ("\tsysmis: %.*g (%a)\n", DBL_DIG
+ 1, sysmis
, sysmis
);
715 if (sysmis
!= SYSMIS
)
716 sys_warn (r
, "File specifies unexpected value %.*g (%a) as %s.",
717 DBL_DIG
+ 1, sysmis
, sysmis
, "SYSMIS");
719 printf ("\thighest: %.*g (%a)\n", DBL_DIG
+ 1, highest
, highest
);
720 if (highest
!= HIGHEST
)
721 sys_warn (r
, "File specifies unexpected value %.*g (%a) as %s.",
722 DBL_DIG
+ 1, highest
, highest
, "HIGHEST");
724 printf ("\tlowest: %.*g (%a)\n", DBL_DIG
+ 1, lowest
, lowest
);
725 if (lowest
!= LOWEST
&& lowest
!= SYSMIS
)
726 sys_warn (r
, "File specifies unexpected value %.*g (%a) as %s.",
727 DBL_DIG
+ 1, lowest
, lowest
, "LOWEST");
731 read_extra_product_info (struct sfm_reader
*r
,
732 size_t size
, size_t count
)
734 struct text_record
*text
;
737 printf ("%08llx: extra product info\n", (long long int) ftello (r
->file
));
738 text
= open_text_record (r
, size
* count
);
739 s
= text_get_all (text
);
740 print_string (s
, strlen (s
));
741 close_text_record (text
);
744 /* Read record type 7, subtype 7. */
746 read_mrsets (struct sfm_reader
*r
, size_t size
, size_t count
)
748 struct text_record
*text
;
750 printf ("%08llx: multiple response sets\n",
751 (long long int) ftello (r
->file
));
752 text
= open_text_record (r
, size
* count
);
756 enum { MRSET_MC
, MRSET_MD
} type
;
757 bool cat_label_from_counted_values
= false;
758 bool label_from_var_label
= false;
761 const char *variables
;
763 while (text_match (text
, '\n'))
766 name
= text_tokenize (text
, '=');
770 if (text_match (text
, 'C'))
774 if (!text_match (text
, ' '))
776 sys_warn (r
, "missing space following 'C' at offset %zu "
777 "in mrsets record", text_pos (text
));
781 else if (text_match (text
, 'D'))
785 else if (text_match (text
, 'E'))
790 cat_label_from_counted_values
= true;
792 if (!text_match (text
, ' '))
794 sys_warn (r
, "Missing space following `%c' at offset %zu "
795 "in MRSETS record", 'E', text_pos (text
));
799 number
= text_tokenize (text
, ' ');
800 if (!strcmp (number
, "11"))
801 label_from_var_label
= true;
802 else if (strcmp (number
, "1"))
803 sys_warn (r
, "Unexpected label source value `%s' "
804 "following `E' at offset %zu in MRSETS record",
805 number
, text_pos (text
));
810 sys_warn (r
, "missing `C', `D', or `E' at offset %zu "
811 "in mrsets record", text_pos (text
));
815 if (type
== MRSET_MD
)
817 counted
= text_parse_counted_string (text
);
822 label
= text_parse_counted_string (text
);
826 variables
= text_tokenize (text
, '\n');
828 printf ("\t\"%s\": multiple %s set",
829 name
, type
== MRSET_MC
? "category" : "dichotomy");
831 printf (", counted value \"%s\"", counted
);
832 if (cat_label_from_counted_values
)
833 printf (", category labels from counted values");
834 if (label
[0] != '\0')
835 printf (", label \"%s\"", label
);
836 if (label_from_var_label
)
837 printf (", label from variable label");
838 if (variables
!= NULL
)
839 printf(", variables \"%s\"\n", variables
);
841 printf(", no variables\n");
843 close_text_record (text
);
846 /* Read record type 7, subtype 11. */
848 read_display_parameters (struct sfm_reader
*r
, size_t size
, size_t count
)
854 printf ("%08llx: variable display parameters\n",
855 (long long int) ftello (r
->file
));
858 sys_warn (r
, "Bad size %zu on extension 11.", size
);
859 skip_bytes (r
, size
* count
);
863 n_vars
= r
->n_variables
;
864 if (count
== 3 * n_vars
)
865 includes_width
= true;
866 else if (count
== 2 * n_vars
)
867 includes_width
= false;
870 sys_warn (r
, "Extension 11 has bad count %zu (for %zu variables.",
872 skip_bytes (r
, size
* count
);
876 for (i
= 0; i
< n_vars
; ++i
)
878 int measure
= read_int (r
);
879 int width
= includes_width
? read_int (r
) : 0;
880 int align
= read_int (r
);
882 printf ("\tVar #%zu: measure=%d (%s)", i
, measure
,
883 (measure
== 1 ? "nominal"
884 : measure
== 2 ? "ordinal"
885 : measure
== 3 ? "scale"
888 printf (", width=%d", width
);
889 printf (", align=%d (%s)\n", align
,
891 : align
== 1 ? "right"
892 : align
== 2 ? "centre"
897 /* Reads record type 7, subtype 13, which gives the long name
898 that corresponds to each short name. */
900 read_long_var_name_map (struct sfm_reader
*r
, size_t size
, size_t count
)
902 struct text_record
*text
;
906 printf ("%08llx: long variable names (short => long)\n",
907 (long long int) ftello (r
->file
));
908 text
= open_text_record (r
, size
* count
);
909 while (read_variable_to_value_pair (text
, &var
, &long_name
))
910 printf ("\t%s => %s\n", var
, long_name
);
911 close_text_record (text
);
914 /* Reads record type 7, subtype 14, which gives the real length
915 of each very long string. Rearranges DICT accordingly. */
917 read_long_string_map (struct sfm_reader
*r
, size_t size
, size_t count
)
919 struct text_record
*text
;
923 printf ("%08llx: very long strings (variable => length)\n",
924 (long long int) ftello (r
->file
));
925 text
= open_text_record (r
, size
* count
);
926 while (read_variable_to_value_pair (text
, &var
, &length_s
))
927 printf ("\t%s => %d\n", var
, atoi (length_s
));
928 close_text_record (text
);
932 read_attributes (struct sfm_reader
*r
, struct text_record
*text
,
933 const char *variable
)
940 key
= text_tokenize (text
, '(');
944 for (index
= 1; ; index
++)
946 /* Parse the value. */
947 const char *value
= text_tokenize (text
, '\n');
950 sys_warn (r
, "%s: Error parsing attribute value %s[%d]",
951 variable
, key
, index
);
954 if (strlen (value
) < 2
955 || value
[0] != '\'' || value
[strlen (value
) - 1] != '\'')
956 sys_warn (r
, "%s: Attribute value %s[%d] is not quoted: %s",
957 variable
, key
, index
, value
);
959 printf ("\t%s: %s[%d] = \"%.*s\"\n",
960 variable
, key
, index
, (int) strlen (value
) - 2, value
+ 1);
962 /* Was this the last value for this attribute? */
963 if (text_match (text
, ')'))
967 if (text_match (text
, '/'))
972 /* Read extended number of cases record. */
974 read_ncases64 (struct sfm_reader
*r
, size_t size
, size_t count
)
976 int64_t unknown
, ncases64
;
980 sys_warn (r
, "Bad size %zu for extended number of cases.", size
);
981 skip_bytes (r
, size
* count
);
986 sys_warn (r
, "Bad count %zu for extended number of cases.", size
);
987 skip_bytes (r
, size
* count
);
990 unknown
= read_int64 (r
);
991 ncases64
= read_int64 (r
);
992 printf ("%08llx: extended number of cases: "
993 "unknown=%"PRId64
", ncases64=%"PRId64
"\n",
994 (long long int) ftello (r
->file
), unknown
, ncases64
);
998 read_datafile_attributes (struct sfm_reader
*r
, size_t size
, size_t count
)
1000 struct text_record
*text
;
1002 printf ("%08llx: datafile attributes\n", (long long int) ftello (r
->file
));
1003 text
= open_text_record (r
, size
* count
);
1004 read_attributes (r
, text
, "datafile");
1005 close_text_record (text
);
1009 read_character_encoding (struct sfm_reader
*r
, size_t size
, size_t count
)
1011 long long int posn
= ftello (r
->file
);
1012 char *encoding
= xcalloc (size
, count
+ 1);
1013 read_string (r
, encoding
, count
+ 1);
1015 printf ("%08llx: Character Encoding: %s\n", posn
, encoding
);
1019 read_long_string_value_labels (struct sfm_reader
*r
, size_t size
, size_t count
)
1021 long long int start
= ftello (r
->file
);
1023 printf ("%08llx: long string value labels\n", start
);
1024 while (ftello (r
->file
) - start
< size
* count
)
1026 long long posn
= ftello (r
->file
);
1027 char var_name
[ID_MAX_LEN
+ 1];
1033 /* Read variable name. */
1034 var_name_len
= read_int (r
);
1035 if (var_name_len
> ID_MAX_LEN
)
1036 sys_error (r
, "Variable name length in long string value label "
1037 "record (%d) exceeds %d-byte limit.",
1038 var_name_len
, ID_MAX_LEN
);
1039 read_string (r
, var_name
, var_name_len
+ 1);
1041 /* Read width, number of values. */
1042 width
= read_int (r
);
1043 n_values
= read_int (r
);
1045 printf ("\t%08llx: %s, width %d, %d values\n",
1046 posn
, var_name
, width
, n_values
);
1049 for (i
= 0; i
< n_values
; i
++)
1057 posn
= ftello (r
->file
);
1060 value_length
= read_int (r
);
1061 value
= xmalloc (value_length
+ 1);
1062 read_string (r
, value
, value_length
+ 1);
1065 label_length
= read_int (r
);
1066 label
= xmalloc (label_length
+ 1);
1067 read_string (r
, label
, label_length
+ 1);
1069 printf ("\t\t%08llx: \"%s\" (%d bytes) => \"%s\" (%d bytes)\n",
1070 posn
, value
, value_length
, label
, label_length
);
1079 read_long_string_missing_values (struct sfm_reader
*r
,
1080 size_t size
, size_t count
)
1082 long long int start
= ftello (r
->file
);
1084 printf ("%08llx: long string missing values\n", start
);
1085 while (ftello (r
->file
) - start
< size
* count
)
1087 long long posn
= ftello (r
->file
);
1088 char var_name
[ID_MAX_LEN
+ 1];
1089 uint8_t n_missing_values
;
1093 /* Read variable name. */
1094 var_name_len
= read_int (r
);
1095 if (var_name_len
> ID_MAX_LEN
)
1096 sys_error (r
, "Variable name length in long string value label "
1097 "record (%d) exceeds %d-byte limit.",
1098 var_name_len
, ID_MAX_LEN
);
1099 read_string (r
, var_name
, var_name_len
+ 1);
1101 /* Read number of values. */
1102 read_bytes (r
, &n_missing_values
, 1);
1104 printf ("\t%08llx: %s, %d missing values:",
1105 posn
, var_name
, n_missing_values
);
1108 for (i
= 0; i
< n_missing_values
; i
++)
1113 posn
= ftello (r
->file
);
1116 value_length
= read_int (r
);
1117 value
= xmalloc (value_length
+ 1);
1118 read_string (r
, value
, value_length
+ 1);
1120 printf (" \"%s\"", value
);
1129 hex_dump (size_t offset
, const void *buffer_
, size_t buffer_size
)
1131 const uint8_t *buffer
= buffer_
;
1133 while (buffer_size
> 0)
1135 size_t n
= MIN (buffer_size
, 16);
1138 printf ("%04zx", offset
);
1139 for (i
= 0; i
< 16; i
++)
1142 printf ("%c%02x", i
== 8 ? '-' : ' ', buffer
[i
]);
1148 for (i
= 0; i
< 16; i
++)
1150 unsigned char c
= i
< n
? buffer
[i
] : ' ';
1151 putchar (isprint (c
) ? c
: '.');
1161 /* Reads and prints any type 7 record that we don't understand. */
1163 read_unknown_extension (struct sfm_reader
*r
, size_t size
, size_t count
)
1165 unsigned char *buffer
;
1168 if (size
== 0 || count
> 65536 / size
)
1169 skip_bytes (r
, size
* count
);
1172 buffer
= xmalloc (size
);
1173 for (i
= 0; i
< count
; i
++)
1175 read_bytes (r
, buffer
, size
);
1176 hex_dump (i
* size
, buffer
, size
);
1182 buffer
= xmalloc (count
);
1183 read_bytes (r
, buffer
, count
);
1184 print_string (CHAR_CAST (char *, buffer
), count
);
1190 read_variable_attributes (struct sfm_reader
*r
, size_t size
, size_t count
)
1192 struct text_record
*text
;
1194 printf ("%08llx: variable attributes\n", (long long int) ftello (r
->file
));
1195 text
= open_text_record (r
, size
* count
);
1198 const char *variable
= text_tokenize (text
, ':');
1199 if (variable
== NULL
|| !read_attributes (r
, text
, variable
))
1202 close_text_record (text
);
1206 read_simple_compressed_data (struct sfm_reader
*r
, int max_cases
)
1208 enum { N_OPCODES
= 8 };
1209 uint8_t opcodes
[N_OPCODES
];
1210 long long int opcode_ofs
;
1216 printf ("\n%08llx: compressed data:\n", (long long int) ftello (r
->file
));
1218 opcode_idx
= N_OPCODES
;
1221 for (case_num
= 0; case_num
< max_cases
; case_num
++)
1223 printf ("%08llx: case %d's uncompressible data begins\n",
1224 (long long int) ftello (r
->file
), case_num
);
1225 for (i
= 0; i
< r
->n_var_widths
; )
1227 int width
= r
->var_widths
[i
];
1231 if (opcode_idx
>= N_OPCODES
)
1233 opcode_ofs
= ftello (r
->file
);
1236 if (!try_read_bytes (r
, opcodes
, 8))
1240 read_bytes (r
, opcodes
, 8);
1243 opcode
= opcodes
[opcode_idx
];
1244 printf ("%08llx: variable %d: opcode %d: ",
1245 opcode_ofs
+ opcode_idx
, i
, opcode
);
1250 printf ("%.*g", DBL_DIG
+ 1, opcode
- r
->bias
);
1252 printf (", but this is a string variable (width=%d)", width
);
1258 printf ("ignored padding\n");
1262 printf ("end of data\n");
1266 read_bytes (r
, raw_value
, 8);
1267 printf ("uncompressible data: ");
1268 print_untyped_value (r
, raw_value
);
1276 printf (", but this is a numeric variable");
1284 printf (", but this is a string variable (width=%d)", width
);
1296 read_zlib_compressed_data (struct sfm_reader
*r
)
1299 long long int this_ofs
, next_ofs
, next_len
;
1300 long long int bias
, zero
;
1301 long long int expected_uncmp_ofs
, expected_cmp_ofs
;
1302 unsigned int block_size
, n_blocks
;
1306 ofs
= ftello (r
->file
);
1307 printf ("\n%08llx: ZLIB compressed data header:\n", ofs
);
1309 this_ofs
= read_int64 (r
);
1310 next_ofs
= read_int64 (r
);
1311 next_len
= read_int64 (r
);
1313 printf ("\tzheader_ofs: 0x%llx\n", this_ofs
);
1314 if (this_ofs
!= ofs
)
1315 printf ("\t\t(Expected 0x%llx.)\n", ofs
);
1316 printf ("\tztrailer_ofs: 0x%llx\n", next_ofs
);
1317 printf ("\tztrailer_len: %lld\n", next_len
);
1318 if (next_len
< 24 || next_len
% 24)
1319 printf ("\t\t(Trailer length is not a positive multiple of 24.)\n");
1321 printf ("\n%08llx: 0x%llx bytes of ZLIB compressed data\n",
1322 ofs
+ 8 * 3, next_ofs
- (ofs
+ 8 * 3));
1324 skip_bytes (r
, next_ofs
- (ofs
+ 8 * 3));
1326 printf ("\n%08llx: ZLIB trailer fixed header:\n", next_ofs
);
1327 bias
= read_int64 (r
);
1328 zero
= read_int64 (r
);
1329 block_size
= read_int (r
);
1330 n_blocks
= read_int (r
);
1331 printf ("\tbias: %lld\n", bias
);
1332 printf ("\tzero: 0x%llx\n", zero
);
1334 printf ("\t\t(Expected 0.)\n");
1335 printf ("\tblock_size: 0x%x\n", block_size
);
1336 if (block_size
!= 0x3ff000)
1337 printf ("\t\t(Expected 0x3ff000.)\n");
1338 printf ("\tn_blocks: %u\n", n_blocks
);
1339 if (n_blocks
!= next_len
/ 24 - 1)
1340 printf ("\t\t(Expected %llu.)\n", next_len
/ 24 - 1);
1342 expected_uncmp_ofs
= ofs
;
1343 expected_cmp_ofs
= ofs
+ 24;
1344 for (i
= 0; i
< n_blocks
; i
++)
1346 long long int blockinfo_ofs
= ftello (r
->file
);
1347 unsigned long long int uncompressed_ofs
= read_int64 (r
);
1348 unsigned long long int compressed_ofs
= read_int64 (r
);
1349 unsigned int uncompressed_size
= read_int (r
);
1350 unsigned int compressed_size
= read_int (r
);
1352 printf ("\n%08llx: ZLIB block descriptor %d\n", blockinfo_ofs
, i
+ 1);
1354 printf ("\tuncompressed_ofs: 0x%llx\n", uncompressed_ofs
);
1355 if (uncompressed_ofs
!= expected_uncmp_ofs
)
1356 printf ("\t\t(Expected 0x%llx.)\n", ofs
);
1358 printf ("\tcompressed_ofs: 0x%llx\n", compressed_ofs
);
1359 if (compressed_ofs
!= expected_cmp_ofs
)
1360 printf ("\t\t(Expected 0x%llx.)\n", ofs
+ 24);
1362 printf ("\tuncompressed_size: 0x%x\n", uncompressed_size
);
1363 if (i
< n_blocks
- 1 && uncompressed_size
!= block_size
)
1364 printf ("\t\t(Expected 0x%x.)\n", block_size
);
1366 printf ("\tcompressed_size: 0x%x\n", compressed_size
);
1367 if (i
== n_blocks
- 1 && compressed_ofs
+ compressed_size
!= next_ofs
)
1368 printf ("\t\t(This was expected to be 0x%llx.)\n",
1369 next_ofs
- compressed_size
);
1371 expected_uncmp_ofs
+= uncompressed_size
;
1372 expected_cmp_ofs
+= compressed_size
;
1376 /* Helpers for reading records that consist of structured text
1382 struct sfm_reader
*reader
; /* Reader. */
1383 char *buffer
; /* Record contents. */
1384 size_t size
; /* Size of buffer. */
1385 size_t pos
; /* Current position in buffer. */
1388 /* Reads SIZE bytes into a text record for R,
1389 and returns the new text record. */
1390 static struct text_record
*
1391 open_text_record (struct sfm_reader
*r
, size_t size
)
1393 struct text_record
*text
= xmalloc (sizeof *text
);
1394 char *buffer
= xmalloc (size
+ 1);
1395 read_bytes (r
, buffer
, size
);
1396 buffer
[size
] = '\0';
1398 text
->buffer
= buffer
;
1404 /* Closes TEXT and frees its storage.
1405 Not really needed, because the pool will free the text record anyway,
1406 but can be used to free it earlier. */
1408 close_text_record (struct text_record
*text
)
1410 free (text
->buffer
);
1415 text_tokenize (struct text_record
*text
, int delimiter
)
1417 size_t start
= text
->pos
;
1418 while (text
->pos
< text
->size
1419 && text
->buffer
[text
->pos
] != delimiter
1420 && text
->buffer
[text
->pos
] != '\0')
1422 if (start
== text
->pos
)
1424 text
->buffer
[text
->pos
++] = '\0';
1425 return &text
->buffer
[start
];
1429 text_match (struct text_record
*text
, int c
)
1431 if (text
->pos
< text
->size
&& text
->buffer
[text
->pos
] == c
)
1440 /* Reads a integer value expressed in decimal, then a space, then a string that
1441 consists of exactly as many bytes as specified by the integer, then a space,
1442 from TEXT. Returns the string, null-terminated, as a subset of TEXT's
1443 buffer (so the caller should not free the string). */
1445 text_parse_counted_string (struct text_record
*text
)
1453 while (isdigit ((unsigned char) text
->buffer
[text
->pos
]))
1454 n
= (n
* 10) + (text
->buffer
[text
->pos
++] - '0');
1455 if (start
== text
->pos
)
1457 sys_error (text
->reader
, "expecting digit at offset %zu in record",
1462 if (!text_match (text
, ' '))
1464 sys_error (text
->reader
, "expecting space at offset %zu in record",
1469 if (text
->pos
+ n
> text
->size
)
1471 sys_error (text
->reader
, "%zu-byte string starting at offset %zu "
1472 "exceeds record length %zu", n
, text
->pos
, text
->size
);
1476 s
= &text
->buffer
[text
->pos
];
1479 sys_error (text
->reader
, "expecting space at offset %zu following "
1480 "%zu-byte string", text
->pos
+ n
, n
);
1488 /* Reads a variable=value pair from TEXT.
1489 Looks up the variable in DICT and stores it into *VAR.
1490 Stores a null-terminated value into *VALUE. */
1492 read_variable_to_value_pair (struct text_record
*text
,
1493 char **key
, char **value
)
1495 *key
= text_tokenize (text
, '=');
1496 *value
= text_tokenize (text
, '\t');
1497 if (!*key
|| !*value
)
1500 while (text
->pos
< text
->size
1501 && (text
->buffer
[text
->pos
] == '\t'
1502 || text
->buffer
[text
->pos
] == '\0'))
1507 /* Returns the current byte offset inside the TEXT's string. */
1509 text_pos (const struct text_record
*text
)
1515 text_get_all (const struct text_record
*text
)
1517 return text
->buffer
;
1524 %s, a utility for dissecting system files.\n\
1525 Usage: %s [OPTION]... SYSFILE...\n\
1526 where each SYSFILE is the name of a system file.\n\
1529 --data[=MAXCASES] print (up to MAXCASES cases of) compressed data\n\
1530 --help display this help and exit\n\
1531 --version output version information and exit\n",
1532 program_name
, program_name
);
1535 /* Displays a corruption message. */
1537 sys_msg (struct sfm_reader
*r
, const char *format
, va_list args
)
1539 printf ("\"%s\" near offset 0x%llx: ",
1540 r
->file_name
, (long long int) ftello (r
->file
));
1541 vprintf (format
, args
);
1545 /* Displays a warning for the current file position. */
1547 sys_warn (struct sfm_reader
*r
, const char *format
, ...)
1551 va_start (args
, format
);
1552 sys_msg (r
, format
, args
);
1556 /* Displays an error for the current file position,
1557 marks it as in an error state,
1558 and aborts reading it using longjmp. */
1560 sys_error (struct sfm_reader
*r
, const char *format
, ...)
1564 va_start (args
, format
);
1565 sys_msg (r
, format
, args
);
1568 exit (EXIT_FAILURE
);
1571 /* Reads BYTE_CNT bytes into BUF.
1572 Returns true if exactly BYTE_CNT bytes are successfully read.
1573 Aborts if an I/O error or a partial read occurs.
1574 If EOF_IS_OK, then an immediate end-of-file causes false to be
1575 returned; otherwise, immediate end-of-file causes an abort
1578 read_bytes_internal (struct sfm_reader
*r
, bool eof_is_ok
,
1579 void *buf
, size_t byte_cnt
)
1581 size_t bytes_read
= fread (buf
, 1, byte_cnt
, r
->file
);
1582 if (bytes_read
== byte_cnt
)
1584 else if (ferror (r
->file
))
1585 sys_error (r
, "System error: %s.", strerror (errno
));
1586 else if (!eof_is_ok
|| bytes_read
!= 0)
1587 sys_error (r
, "Unexpected end of file.");
1592 /* Reads BYTE_CNT into BUF.
1593 Aborts upon I/O error or if end-of-file is encountered. */
1595 read_bytes (struct sfm_reader
*r
, void *buf
, size_t byte_cnt
)
1597 read_bytes_internal (r
, false, buf
, byte_cnt
);
1600 /* Reads BYTE_CNT bytes into BUF.
1601 Returns true if exactly BYTE_CNT bytes are successfully read.
1602 Returns false if an immediate end-of-file is encountered.
1603 Aborts if an I/O error or a partial read occurs. */
1605 try_read_bytes (struct sfm_reader
*r
, void *buf
, size_t byte_cnt
)
1607 return read_bytes_internal (r
, true, buf
, byte_cnt
);
1610 /* Reads a 32-bit signed integer from R and returns its value in
1613 read_int (struct sfm_reader
*r
)
1616 read_bytes (r
, integer
, sizeof integer
);
1617 return integer_get (r
->integer_format
, integer
, sizeof integer
);
1620 /* Reads a 64-bit signed integer from R and returns its value in
1623 read_int64 (struct sfm_reader
*r
)
1626 read_bytes (r
, integer
, sizeof integer
);
1627 return integer_get (r
->integer_format
, integer
, sizeof integer
);
1630 /* Reads a 64-bit floating-point number from R and returns its
1631 value in host format. */
1633 read_float (struct sfm_reader
*r
)
1636 read_bytes (r
, number
, sizeof number
);
1637 return float_get_double (r
->float_format
, number
);
1640 /* Reads exactly SIZE - 1 bytes into BUFFER
1641 and stores a null byte into BUFFER[SIZE - 1]. */
1643 read_string (struct sfm_reader
*r
, char *buffer
, size_t size
)
1646 read_bytes (r
, buffer
, size
- 1);
1647 buffer
[size
- 1] = '\0';
1650 /* Skips BYTES bytes forward in R. */
1652 skip_bytes (struct sfm_reader
*r
, size_t bytes
)
1657 size_t chunk
= MIN (sizeof buffer
, bytes
);
1658 read_bytes (r
, buffer
, chunk
);
1664 trim_spaces (char *s
)
1666 char *end
= strchr (s
, '\0');
1667 while (end
> s
&& end
[-1] == ' ')
1673 print_string (const char *s
, size_t len
)
1675 if (memchr (s
, 0, len
) == 0)
1679 for (i
= 0; i
< len
; i
++)
1681 unsigned char c
= s
[i
];
1685 else if (c
== '\n' || isprint (c
))
1688 printf ("\\%02x", c
);
1693 hex_dump (0, s
, len
);