1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
29 #include "libpspp/assertion.h"
30 #include "libpspp/compiler.h"
31 #include "libpspp/float-format.h"
32 #include "libpspp/hash-functions.h"
33 #include "libpspp/hmap.h"
34 #include "libpspp/integer-format.h"
36 #include "gl/c-ctype.h"
38 #include "gl/intprops.h"
39 #include "gl/progname.h"
40 #include "gl/xalloc.h"
41 #include "gl/xbinary-io.h"
50 static void buffer_put (struct buffer
*, const void *, size_t);
51 static void *buffer_put_uninit (struct buffer
*, size_t);
77 static enum token_type token
;
78 static unsigned long long int tok_integer
;
79 static double tok_float
;
80 static char *tok_string
;
81 static size_t tok_strlen
, tok_allocated
;
86 struct hmap_node hmap_node
;
91 static struct hmap symbol_table
= HMAP_INITIALIZER (symbol_table
);
93 /* --be, --le: Integer and floating-point formats. */
94 static enum float_format float_format
= FLOAT_IEEE_DOUBLE_BE
;
95 static enum integer_format integer_format
= INTEGER_MSB_FIRST
;
97 /* Input file and current position. */
99 static const char *input_file_name
;
100 static int line_number
;
102 static void PRINTF_FORMAT (1, 2)
103 fatal (const char *message
, ...)
107 fprintf (stderr
, "%s:%d: ", input_file_name
, line_number
);
108 va_start (args
, message
);
109 vfprintf (stderr
, message
, args
);
119 if (tok_strlen
>= tok_allocated
)
120 tok_string
= x2realloc (tok_string
, &tok_allocated
);
122 tok_string
[tok_strlen
] = c
;
142 while ((c
= getc (input
)) != '\n' && c
!= EOF
)
148 while (isspace (c
) || c
== '<' || c
== '>');
154 fatal ("unexpected end of input");
157 else if (isdigit (c
) || c
== '-')
164 while (isdigit (c
) || isalpha (c
) || c
== '.');
168 if (!strcmp (tok_string
, "-"))
175 if (strchr (tok_string
, '.') == NULL
)
178 tok_integer
= strtoull (tok_string
, &tail
, 0);
183 tok_float
= strtod (tok_string
, &tail
);
186 fatal ("invalid numeric syntax \"%s\"", tok_string
);
192 while ((c
= getc (input
)) != '"')
195 fatal ("new-line inside string");
210 else if (isalpha (c
) || c
== '@' || c
== '_')
217 while (isdigit (c
) || isalpha (c
) || c
== '.' || c
== '_');
226 if (tok_string
[0] == '@')
232 if (!strcmp (tok_string
, "i8"))
234 else if (!strcmp (tok_string
, "i16"))
236 else if (!strcmp (tok_string
, "i64"))
238 else if (tok_string
[0] == 's')
241 tok_integer
= atoi (tok_string
+ 1);
243 else if (!strcmp (tok_string
, "SYSMIS"))
246 tok_float
= -DBL_MAX
;
248 else if (!strcmp (tok_string
, "PCSYSMIS"))
250 else if (!strcmp (tok_string
, "LOWEST"))
253 tok_float
= float_get_lowest ();
255 else if (!strcmp (tok_string
, "HIGHEST"))
260 else if (!strcmp (tok_string
, "ENDIAN"))
263 tok_integer
= integer_format
== INTEGER_MSB_FIRST
? 1 : 2;
265 else if (!strcmp (tok_string
, "COUNT"))
267 else if (!strcmp (tok_string
, "COUNT8"))
269 else if (!strcmp (tok_string
, "hex"))
272 fatal ("invalid token `%s'", tok_string
);
275 fatal ("invalid input byte `%c'", c
);
279 buffer_put (struct buffer
*buffer
, const void *data
, size_t n
)
281 memcpy (buffer_put_uninit (buffer
, n
), data
, n
);
285 buffer_put_uninit (struct buffer
*buffer
, size_t n
)
288 if (buffer
->size
> buffer
->allocated
)
290 buffer
->allocated
= buffer
->size
* 2;
291 buffer
->data
= xrealloc (buffer
->data
, buffer
->allocated
);
293 return &buffer
->data
[buffer
->size
- n
];
296 /* Returns the integer value of hex digit C. */
300 const char s
[] = "0123456789abcdef";
301 const char *cp
= strchr (s
, c_tolower ((unsigned char) c
));
311 %s, SAv Construction Kit\n\
312 usage: %s [OPTIONS] INPUT\n\
314 --be big-endian output format (default)\n\
315 --le little-endian output format\n\
316 --help print this help message and exit\n\
318 The input is a sequence of data items, each followed by a semicolon.\n\
319 Each data item is converted to the output format and written on\n\
320 stdout. A data item is one of the following\n\
322 - An integer in decimal, in hexadecimal prefixed by 0x, or in octal\n\
323 prefixed by 0. Output as a 32-bit binary integer.\n\
325 - A floating-point number. Output in 64-bit IEEE 754 format.\n\
327 - A string enclosed in double quotes. Output literally. There is\n\
328 no syntax for \"escapes\". Strings may not contain new-lines.\n\
330 - A literal of the form s<number> followed by a quoted string as\n\
331 above. Output as the string's contents followed by enough spaces\n\
332 to fill up <number> bytes. For example, s8 \"foo\" is output as\n\
333 the \"foo\" followed by 5 spaces.\n\
335 - The literal \"i8\", \"i16\", or \"i64\" followed by an integer. Output\n\
336 as a binary integer with the specified number of bits.\n\
338 - One of the literals SYSMIS, LOWEST, or HIGHEST. Output as a\n\
339 64-bit IEEE 754 float of the appropriate PSPP value.\n\
341 - PCSYSMIS. Output as SPSS/PC+ system-missing value.\n\
343 - The literal ENDIAN. Output as a 32-bit binary integer, either\n\
344 with value 1 if --be is in effect or 2 if --le is in effect.\n\
346 - A pair of parentheses enclosing a sequence of data items, each\n\
347 followed by a semicolon (the last semicolon is optional).\n\
348 Output as the enclosed data items in sequence.\n\
350 - The literal COUNT or COUNT8 followed by a sequence of parenthesized\n\
351 data items, as above. Output as a 32-bit or 8-bit binary integer whose\n\
352 value is the number of bytes enclosed within the parentheses, followed\n\
353 by the enclosed data items themselves.\n\
355 optionally followed by an asterisk and a positive integer, which\n\
356 specifies a repeat count for the data item.\n",
357 program_name
, program_name
);
362 parse_options (int argc
, char **argv
)
367 OPT_BE
= UCHAR_MAX
+ 1,
371 static const struct option options
[] =
373 {"be", no_argument
, NULL
, OPT_BE
},
374 {"le", no_argument
, NULL
, OPT_LE
},
375 {"help", no_argument
, NULL
, OPT_HELP
},
379 int c
= getopt_long (argc
, argv
, "", options
, NULL
);
386 float_format
= FLOAT_IEEE_DOUBLE_BE
;
387 integer_format
= INTEGER_MSB_FIRST
;
391 float_format
= FLOAT_IEEE_DOUBLE_LE
;
392 integer_format
= INTEGER_LSB_FIRST
;
411 if (optind
+ 1 != argc
)
412 error (1, 0, "exactly one non-option argument required; "
413 "use --help for help");
417 static struct symbol
*
418 symbol_find (const char *name
)
420 struct symbol
*symbol
;
425 hash
= hash_string (name
, 0);
426 HMAP_FOR_EACH_WITH_HASH (symbol
, struct symbol
, hmap_node
,
428 if (!strcmp (name
, symbol
->name
))
431 symbol
= xmalloc (sizeof *symbol
);
432 hmap_insert (&symbol_table
, &symbol
->hmap_node
, hash
);
433 symbol
->name
= xstrdup (name
);
434 symbol
->offset
= UINT_MAX
;
439 parse_data_item (struct buffer
*output
)
441 size_t old_size
= output
->size
;
443 if (token
== T_INTEGER
)
445 integer_put (tok_integer
, integer_format
,
446 buffer_put_uninit (output
, 4), 4);
449 else if (token
== T_FLOAT
)
451 float_convert (FLOAT_NATIVE_DOUBLE
, &tok_float
,
452 float_format
, buffer_put_uninit (output
, 8));
455 else if (token
== T_PCSYSMIS
)
457 static const uint8_t pcsysmis
[] =
458 { 0xf5, 0x1e, 0x26, 0x02, 0x8a, 0x8c, 0xed, 0xff, };
459 buffer_put (output
, pcsysmis
, sizeof pcsysmis
);
462 else if (token
== T_I8
)
469 if (token
!= T_INTEGER
)
470 fatal ("integer expected after `i8'");
472 buffer_put (output
, &byte
, 1);
475 while (token
== T_INTEGER
);
477 else if (token
== T_I16
)
482 if (token
!= T_INTEGER
)
483 fatal ("integer expected after `i16'");
484 integer_put (tok_integer
, integer_format
,
485 buffer_put_uninit (output
, 2), 2);
488 while (token
== T_INTEGER
);
490 else if (token
== T_I64
)
495 if (token
!= T_INTEGER
)
496 fatal ("integer expected after `i64'");
497 integer_put (tok_integer
, integer_format
,
498 buffer_put_uninit (output
, 8), 8);
501 while (token
== T_INTEGER
);
503 else if (token
== T_STRING
)
505 buffer_put (output
, tok_string
, tok_strlen
);
508 else if (token
== T_S
)
515 if (token
!= T_STRING
)
516 fatal ("string expected");
518 fatal ("%zu-byte string is longer than pad length %d",
521 buffer_put (output
, tok_string
, tok_strlen
);
522 memset (buffer_put_uninit (output
, n
- tok_strlen
), ' ',
526 else if (token
== T_LPAREN
)
530 while (token
!= T_RPAREN
)
531 parse_data_item (output
);
535 else if (token
== T_COUNT
)
537 buffer_put_uninit (output
, 4);
540 if (token
!= T_LPAREN
)
541 fatal ("`(' expected after COUNT");
544 while (token
!= T_RPAREN
)
545 parse_data_item (output
);
548 integer_put (output
->size
- old_size
- 4, integer_format
,
549 output
->data
+ old_size
, 4);
551 else if (token
== T_COUNT8
)
553 buffer_put_uninit (output
, 1);
556 if (token
!= T_LPAREN
)
557 fatal ("`(' expected after COUNT8");
560 while (token
!= T_RPAREN
)
561 parse_data_item (output
);
564 integer_put (output
->size
- old_size
- 1, integer_format
,
565 output
->data
+ old_size
, 1);
567 else if (token
== T_HEX
)
573 if (token
!= T_STRING
)
574 fatal ("string expected");
576 for (p
= tok_string
; *p
; p
++)
578 if (isspace ((unsigned char) *p
))
580 else if (isxdigit ((unsigned char) p
[0])
581 && isxdigit ((unsigned char) p
[1]))
583 int high
= hexit_value (p
[0]);
584 int low
= hexit_value (p
[1]);
585 uint8_t byte
= high
* 16 + low
;
586 buffer_put (output
, &byte
, 1);
590 fatal ("invalid format in hex string");
594 else if (token
== T_LABEL
)
596 struct symbol
*sym
= symbol_find (tok_string
);
597 if (sym
->offset
== UINT_MAX
)
598 sym
->offset
= output
->size
;
599 else if (sym
->offset
!= output
->size
)
600 fatal ("%s: can't redefine label for offset %u with offset %zu",
601 tok_string
, sym
->offset
, output
->size
);
605 else if (token
== T_AT
)
607 unsigned int value
= symbol_find (tok_string
)->offset
;
610 while (token
== T_MINUS
|| token
== T_PLUS
)
612 enum token_type op
= token
;
613 unsigned int operand
;
616 operand
= symbol_find (tok_string
)->offset
;
617 else if (token
== T_INTEGER
)
618 operand
= tok_integer
;
620 fatal ("expecting @label");
628 integer_put (value
, integer_format
, buffer_put_uninit (output
, 4), 4);
631 fatal ("syntax error");
633 if (token
== T_ASTERISK
)
635 size_t n
= output
->size
- old_size
;
640 if (token
!= T_INTEGER
|| tok_integer
< 1)
641 fatal ("positive integer expected after `*'");
642 p
= buffer_put_uninit (output
, (tok_integer
- 1) * n
);
643 while (--tok_integer
> 0)
645 memcpy (p
, output
->data
+ old_size
, n
);
652 if (token
== T_SEMICOLON
)
654 else if (token
!= T_RPAREN
)
655 fatal ("`;' expected");
659 main (int argc
, char **argv
)
661 struct buffer output
;
663 set_program_name (argv
[0]);
664 input_file_name
= parse_options (argc
, argv
);
666 if (!strcmp (input_file_name
, "-"))
670 input
= fopen (input_file_name
, "r");
672 error (1, errno
, "%s: open failed", input_file_name
);
675 if (isatty (STDOUT_FILENO
))
676 error (1, 0, "not writing binary data to a terminal; redirect to a file");
680 output
.allocated
= 0;
684 while (token
!= T_EOF
)
685 parse_data_item (&output
);
687 if (!hmap_is_empty (&symbol_table
))
689 struct symbol
*symbol
;
691 HMAP_FOR_EACH (symbol
, struct symbol
, hmap_node
, &symbol_table
)
692 if (symbol
->offset
== UINT_MAX
)
693 error (1, 0, "label %s used but never defined", symbol
->name
);
696 if (fseek (input
, 0, SEEK_SET
) != 0)
697 error (1, 0, "failed to rewind stdin for second pass");
701 while (token
!= T_EOF
)
702 parse_data_item (&output
);
708 xset_binary_mode (fileno (stdout
), O_BINARY
);
709 fwrite (output
.data
, output
.size
, 1, stdout
);