po: Update ca,es,de,nl,uk translations from translationproject.org.
[pspp.git] / tests / data / sack.c
blob0a1ada7fdaab03ed351768b7ae8e4f53e5e860ad
1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 #include <config.h>
19 #include <ctype.h>
20 #include <errno.h>
21 #include <float.h>
22 #include <getopt.h>
23 #include <limits.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <unistd.h>
29 #include "libpspp/assertion.h"
30 #include "libpspp/compiler.h"
31 #include "libpspp/float-format.h"
32 #include "libpspp/hash-functions.h"
33 #include "libpspp/hmap.h"
34 #include "libpspp/integer-format.h"
36 #include "gl/c-ctype.h"
37 #include "gl/error.h"
38 #include "gl/intprops.h"
39 #include "gl/progname.h"
40 #include "gl/xalloc.h"
41 #include "gl/xbinary-io.h"
43 struct buffer
45 uint8_t *data;
46 size_t size;
47 size_t allocated;
50 static void buffer_put (struct buffer *, const void *, size_t);
51 static void *buffer_put_uninit (struct buffer *, size_t);
53 enum token_type
55 T_EOF,
56 T_INTEGER,
57 T_FLOAT,
58 T_PCSYSMIS,
59 T_STRING,
60 T_SEMICOLON,
61 T_ASTERISK,
62 T_LPAREN,
63 T_RPAREN,
64 T_I8,
65 T_I16,
66 T_I64,
67 T_S,
68 T_COUNT,
69 T_COUNT8,
70 T_HEX,
71 T_LABEL,
72 T_AT,
73 T_MINUS,
74 T_PLUS,
77 static enum token_type token;
78 static unsigned long long int tok_integer;
79 static double tok_float;
80 static char *tok_string;
81 static size_t tok_strlen, tok_allocated;
83 /* Symbol table. */
84 struct symbol
86 struct hmap_node hmap_node;
87 const char *name;
88 unsigned int offset;
91 static struct hmap symbol_table = HMAP_INITIALIZER (symbol_table);
93 /* --be, --le: Integer and floating-point formats. */
94 static enum float_format float_format = FLOAT_IEEE_DOUBLE_BE;
95 static enum integer_format integer_format = INTEGER_MSB_FIRST;
97 /* Input file and current position. */
98 static FILE *input;
99 static const char *input_file_name;
100 static int line_number;
102 static void PRINTF_FORMAT (1, 2)
103 fatal (const char *message, ...)
105 va_list args;
107 fprintf (stderr, "%s:%d: ", input_file_name, line_number);
108 va_start (args, message);
109 vfprintf (stderr, message, args);
110 va_end (args);
111 putc ('\n', stderr);
113 exit (EXIT_FAILURE);
116 static void
117 add_char__ (int c)
119 if (tok_strlen >= tok_allocated)
120 tok_string = x2realloc (tok_string, &tok_allocated);
122 tok_string[tok_strlen] = c;
125 static void
126 add_char (int c)
128 add_char__ (c);
129 tok_strlen++;
132 static void
133 get_token (void)
135 int c;
139 c = getc (input);
140 if (c == '#')
142 while ((c = getc (input)) != '\n' && c != EOF)
143 continue;
145 if (c == '\n')
146 line_number++;
148 while (isspace (c) || c == '<' || c == '>');
150 tok_strlen = 0;
151 if (c == EOF)
153 if (token == T_EOF)
154 fatal ("unexpected end of input");
155 token = T_EOF;
157 else if (isdigit (c) || c == '-')
161 add_char (c);
162 c = getc (input);
164 while (isdigit (c) || isalpha (c) || c == '.');
165 add_char__ ('\0');
166 ungetc (c, input);
168 if (!strcmp (tok_string, "-"))
169 token = T_MINUS;
170 else
172 char *tail;
174 errno = 0;
175 if (strchr (tok_string, '.') == NULL)
177 token = T_INTEGER;
178 tok_integer = strtoull (tok_string, &tail, 0);
180 else
182 token = T_FLOAT;
183 tok_float = strtod (tok_string, &tail);
185 if (errno || *tail)
186 fatal ("invalid numeric syntax \"%s\"", tok_string);
189 else if (c == '"')
191 token = T_STRING;
192 while ((c = getc (input)) != '"')
194 if (c == '\n')
195 fatal ("new-line inside string");
196 add_char (c);
198 add_char__ ('\0');
200 else if (c == ';')
201 token = T_SEMICOLON;
202 else if (c == '*')
203 token = T_ASTERISK;
204 else if (c == '+')
205 token = T_PLUS;
206 else if (c == '(')
207 token = T_LPAREN;
208 else if (c == ')')
209 token = T_RPAREN;
210 else if (isalpha (c) || c == '@' || c == '_')
214 add_char (c);
215 c = getc (input);
217 while (isdigit (c) || isalpha (c) || c == '.' || c == '_');
218 add_char ('\0');
220 if (c == ':')
222 token = T_LABEL;
223 return;
225 ungetc (c, input);
226 if (tok_string[0] == '@')
228 token = T_AT;
229 return;
232 if (!strcmp (tok_string, "i8"))
233 token = T_I8;
234 else if (!strcmp (tok_string, "i16"))
235 token = T_I16;
236 else if (!strcmp (tok_string, "i64"))
237 token = T_I64;
238 else if (tok_string[0] == 's')
240 token = T_S;
241 tok_integer = atoi (tok_string + 1);
243 else if (!strcmp (tok_string, "SYSMIS"))
245 token = T_FLOAT;
246 tok_float = -DBL_MAX;
248 else if (!strcmp (tok_string, "PCSYSMIS"))
249 token = T_PCSYSMIS;
250 else if (!strcmp (tok_string, "LOWEST"))
252 token = T_FLOAT;
253 tok_float = float_get_lowest ();
255 else if (!strcmp (tok_string, "HIGHEST"))
257 token = T_FLOAT;
258 tok_float = DBL_MAX;
260 else if (!strcmp (tok_string, "ENDIAN"))
262 token = T_INTEGER;
263 tok_integer = integer_format == INTEGER_MSB_FIRST ? 1 : 2;
265 else if (!strcmp (tok_string, "COUNT"))
266 token = T_COUNT;
267 else if (!strcmp (tok_string, "COUNT8"))
268 token = T_COUNT8;
269 else if (!strcmp (tok_string, "hex"))
270 token = T_HEX;
271 else
272 fatal ("invalid token `%s'", tok_string);
274 else
275 fatal ("invalid input byte `%c'", c);
278 static void
279 buffer_put (struct buffer *buffer, const void *data, size_t n)
281 memcpy (buffer_put_uninit (buffer, n), data, n);
284 static void *
285 buffer_put_uninit (struct buffer *buffer, size_t n)
287 buffer->size += n;
288 if (buffer->size > buffer->allocated)
290 buffer->allocated = buffer->size * 2;
291 buffer->data = xrealloc (buffer->data, buffer->allocated);
293 return &buffer->data[buffer->size - n];
296 /* Returns the integer value of hex digit C. */
297 static int
298 hexit_value (int c)
300 const char s[] = "0123456789abcdef";
301 const char *cp = strchr (s, c_tolower ((unsigned char) c));
303 assert (cp != NULL);
304 return cp - s;
307 static void
308 usage (void)
310 printf ("\
311 %s, SAv Construction Kit\n\
312 usage: %s [OPTIONS] INPUT\n\
313 \nOptions:\n\
314 --be big-endian output format (default)\n\
315 --le little-endian output format\n\
316 --help print this help message and exit\n\
318 The input is a sequence of data items, each followed by a semicolon.\n\
319 Each data item is converted to the output format and written on\n\
320 stdout. A data item is one of the following\n\
322 - An integer in decimal, in hexadecimal prefixed by 0x, or in octal\n\
323 prefixed by 0. Output as a 32-bit binary integer.\n\
325 - A floating-point number. Output in 64-bit IEEE 754 format.\n\
327 - A string enclosed in double quotes. Output literally. There is\n\
328 no syntax for \"escapes\". Strings may not contain new-lines.\n\
330 - A literal of the form s<number> followed by a quoted string as\n\
331 above. Output as the string's contents followed by enough spaces\n\
332 to fill up <number> bytes. For example, s8 \"foo\" is output as\n\
333 the \"foo\" followed by 5 spaces.\n\
335 - The literal \"i8\", \"i16\", or \"i64\" followed by an integer. Output\n\
336 as a binary integer with the specified number of bits.\n\
338 - One of the literals SYSMIS, LOWEST, or HIGHEST. Output as a\n\
339 64-bit IEEE 754 float of the appropriate PSPP value.\n\
341 - PCSYSMIS. Output as SPSS/PC+ system-missing value.\n\
343 - The literal ENDIAN. Output as a 32-bit binary integer, either\n\
344 with value 1 if --be is in effect or 2 if --le is in effect.\n\
346 - A pair of parentheses enclosing a sequence of data items, each\n\
347 followed by a semicolon (the last semicolon is optional).\n\
348 Output as the enclosed data items in sequence.\n\
350 - The literal COUNT or COUNT8 followed by a sequence of parenthesized\n\
351 data items, as above. Output as a 32-bit or 8-bit binary integer whose\n\
352 value is the number of bytes enclosed within the parentheses, followed\n\
353 by the enclosed data items themselves.\n\
355 optionally followed by an asterisk and a positive integer, which\n\
356 specifies a repeat count for the data item.\n",
357 program_name, program_name);
358 exit (EXIT_SUCCESS);
361 static const char *
362 parse_options (int argc, char **argv)
364 for (;;)
366 enum {
367 OPT_BE = UCHAR_MAX + 1,
368 OPT_LE,
369 OPT_HELP
371 static const struct option options[] =
373 {"be", no_argument, NULL, OPT_BE},
374 {"le", no_argument, NULL, OPT_LE},
375 {"help", no_argument, NULL, OPT_HELP},
376 {NULL, 0, NULL, 0},
379 int c = getopt_long (argc, argv, "", options, NULL);
380 if (c == -1)
381 break;
383 switch (c)
385 case OPT_BE:
386 float_format = FLOAT_IEEE_DOUBLE_BE;
387 integer_format = INTEGER_MSB_FIRST;
388 break;
390 case OPT_LE:
391 float_format = FLOAT_IEEE_DOUBLE_LE;
392 integer_format = INTEGER_LSB_FIRST;
393 break;
395 case OPT_HELP:
396 usage ();
398 case 0:
399 break;
401 case '?':
402 exit (EXIT_FAILURE);
403 break;
405 default:
406 NOT_REACHED ();
411 if (optind + 1 != argc)
412 error (1, 0, "exactly one non-option argument required; "
413 "use --help for help");
414 return argv[optind];
417 static struct symbol *
418 symbol_find (const char *name)
420 struct symbol *symbol;
421 unsigned int hash;
423 if (name[0] == '@')
424 name++;
425 hash = hash_string (name, 0);
426 HMAP_FOR_EACH_WITH_HASH (symbol, struct symbol, hmap_node,
427 hash, &symbol_table)
428 if (!strcmp (name, symbol->name))
429 return symbol;
431 symbol = xmalloc (sizeof *symbol);
432 hmap_insert (&symbol_table, &symbol->hmap_node, hash);
433 symbol->name = xstrdup (name);
434 symbol->offset = UINT_MAX;
435 return symbol;
438 static void
439 parse_data_item (struct buffer *output)
441 size_t old_size = output->size;
443 if (token == T_INTEGER)
445 integer_put (tok_integer, integer_format,
446 buffer_put_uninit (output, 4), 4);
447 get_token ();
449 else if (token == T_FLOAT)
451 float_convert (FLOAT_NATIVE_DOUBLE, &tok_float,
452 float_format, buffer_put_uninit (output, 8));
453 get_token ();
455 else if (token == T_PCSYSMIS)
457 static const uint8_t pcsysmis[] =
458 { 0xf5, 0x1e, 0x26, 0x02, 0x8a, 0x8c, 0xed, 0xff, };
459 buffer_put (output, pcsysmis, sizeof pcsysmis);
460 get_token ();
462 else if (token == T_I8)
464 uint8_t byte;
466 get_token ();
469 if (token != T_INTEGER)
470 fatal ("integer expected after `i8'");
471 byte = tok_integer;
472 buffer_put (output, &byte, 1);
473 get_token ();
475 while (token == T_INTEGER);
477 else if (token == T_I16)
479 get_token ();
482 if (token != T_INTEGER)
483 fatal ("integer expected after `i16'");
484 integer_put (tok_integer, integer_format,
485 buffer_put_uninit (output, 2), 2);
486 get_token ();
488 while (token == T_INTEGER);
490 else if (token == T_I64)
492 get_token ();
495 if (token != T_INTEGER)
496 fatal ("integer expected after `i64'");
497 integer_put (tok_integer, integer_format,
498 buffer_put_uninit (output, 8), 8);
499 get_token ();
501 while (token == T_INTEGER);
503 else if (token == T_STRING)
505 buffer_put (output, tok_string, tok_strlen);
506 get_token ();
508 else if (token == T_S)
510 int n;
512 n = tok_integer;
513 get_token ();
515 if (token != T_STRING)
516 fatal ("string expected");
517 if (tok_strlen > n)
518 fatal ("%zu-byte string is longer than pad length %d",
519 tok_strlen, n);
521 buffer_put (output, tok_string, tok_strlen);
522 memset (buffer_put_uninit (output, n - tok_strlen), ' ',
523 n - tok_strlen);
524 get_token ();
526 else if (token == T_LPAREN)
528 get_token ();
530 while (token != T_RPAREN)
531 parse_data_item (output);
533 get_token ();
535 else if (token == T_COUNT)
537 buffer_put_uninit (output, 4);
539 get_token ();
540 if (token != T_LPAREN)
541 fatal ("`(' expected after COUNT");
542 get_token ();
544 while (token != T_RPAREN)
545 parse_data_item (output);
546 get_token ();
548 integer_put (output->size - old_size - 4, integer_format,
549 output->data + old_size, 4);
551 else if (token == T_COUNT8)
553 buffer_put_uninit (output, 1);
555 get_token ();
556 if (token != T_LPAREN)
557 fatal ("`(' expected after COUNT8");
558 get_token ();
560 while (token != T_RPAREN)
561 parse_data_item (output);
562 get_token ();
564 integer_put (output->size - old_size - 1, integer_format,
565 output->data + old_size, 1);
567 else if (token == T_HEX)
569 const char *p;
571 get_token ();
573 if (token != T_STRING)
574 fatal ("string expected");
576 for (p = tok_string; *p; p++)
578 if (isspace ((unsigned char) *p))
579 continue;
580 else if (isxdigit ((unsigned char) p[0])
581 && isxdigit ((unsigned char) p[1]))
583 int high = hexit_value (p[0]);
584 int low = hexit_value (p[1]);
585 uint8_t byte = high * 16 + low;
586 buffer_put (output, &byte, 1);
587 p++;
589 else
590 fatal ("invalid format in hex string");
592 get_token ();
594 else if (token == T_LABEL)
596 struct symbol *sym = symbol_find (tok_string);
597 if (sym->offset == UINT_MAX)
598 sym->offset = output->size;
599 else if (sym->offset != output->size)
600 fatal ("%s: can't redefine label for offset %u with offset %zu",
601 tok_string, sym->offset, output->size);
602 get_token ();
603 return;
605 else if (token == T_AT)
607 unsigned int value = symbol_find (tok_string)->offset;
608 get_token ();
610 while (token == T_MINUS || token == T_PLUS)
612 enum token_type op = token;
613 unsigned int operand;
614 get_token ();
615 if (token == T_AT)
616 operand = symbol_find (tok_string)->offset;
617 else if (token == T_INTEGER)
618 operand = tok_integer;
619 else
620 fatal ("expecting @label");
621 get_token ();
623 if (op == T_PLUS)
624 value += operand;
625 else
626 value -= operand;
628 integer_put (value, integer_format, buffer_put_uninit (output, 4), 4);
630 else
631 fatal ("syntax error");
633 if (token == T_ASTERISK)
635 size_t n = output->size - old_size;
636 char *p;
638 get_token ();
640 if (token != T_INTEGER || tok_integer < 1)
641 fatal ("positive integer expected after `*'");
642 p = buffer_put_uninit (output, (tok_integer - 1) * n);
643 while (--tok_integer > 0)
645 memcpy (p, output->data + old_size, n);
646 p += n;
649 get_token ();
652 if (token == T_SEMICOLON)
653 get_token ();
654 else if (token != T_RPAREN)
655 fatal ("`;' expected");
659 main (int argc, char **argv)
661 struct buffer output;
663 set_program_name (argv[0]);
664 input_file_name = parse_options (argc, argv);
666 if (!strcmp (input_file_name, "-"))
667 input = stdin;
668 else
670 input = fopen (input_file_name, "r");
671 if (input == NULL)
672 error (1, errno, "%s: open failed", input_file_name);
675 if (isatty (STDOUT_FILENO))
676 error (1, 0, "not writing binary data to a terminal; redirect to a file");
678 output.data = NULL;
679 output.size = 0;
680 output.allocated = 0;
682 line_number = 1;
683 get_token ();
684 while (token != T_EOF)
685 parse_data_item (&output);
687 if (!hmap_is_empty (&symbol_table))
689 struct symbol *symbol;
691 HMAP_FOR_EACH (symbol, struct symbol, hmap_node, &symbol_table)
692 if (symbol->offset == UINT_MAX)
693 error (1, 0, "label %s used but never defined", symbol->name);
695 output.size = 0;
696 if (fseek (input, 0, SEEK_SET) != 0)
697 error (1, 0, "failed to rewind stdin for second pass");
699 line_number = 1;
700 get_token ();
701 while (token != T_EOF)
702 parse_data_item (&output);
705 if (input != stdin)
706 fclose (input);
708 xset_binary_mode (fileno (stdout), O_BINARY);
709 fwrite (output.data, output.size, 1, stdout);
710 free (output.data);
712 return 0;