update devspec.en_US/1.0.general.md.
[devspec.git] / devspec.en_US / project / recutils / utils / csv2rec.c
blobfbb013fde1b57f59085d1dec0903177ed3073808
1 /* -*- mode: C -*-
3 * File: csv2rec.c
4 * Date: Fri Aug 20 16:35:25 2010
6 * GNU recutils - csv to rec converter.
8 */
10 /* Copyright (C) 2010-2019 Jose E. Marchesi */
12 /* This program is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation, either version 3 of the License, or
15 * (at your option) any later version.
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with this program. If not, see <http://www.gnu.org/licenses/>.
26 #include <config.h>
28 #include <getopt.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <xalloc.h>
32 #include <gettext.h>
33 #define _(str) gettext (str)
35 #include <csv.h>
36 #include <rec.h>
37 #include <recutl.h>
39 /* Forward declarations. */
40 static void parse_args (int argc, char **argv);
41 static rec_db_t process_csv (void);
42 static int is_space (unsigned char c);
43 static int is_term (unsigned char c);
44 static void field_cb (void *s, size_t len, void *data);
45 static void record_cb (int c, void *data);
48 * Types
51 struct csv2rec_ctx
53 rec_db_t db;
54 rec_rset_t rset;
55 rec_record_t record;
57 size_t num_fields;
58 size_t lineno;
60 #define ALLOC_FIELDS 256
61 bool header_p;
62 size_t num_field_names;
63 char **field_names;
67 * Global variables
70 char *csv2rec_record_type = NULL;
71 char *csv2rec_csv_file = NULL;
72 bool csv2rec_strict = false;
73 bool csv2rec_omit_empty = false;
76 * Command line options management
79 enum
81 COMMON_ARGS,
82 RECORD_TYPE_ARG,
83 STRICT_ARG,
84 OMIT_EMPTY_ARG
87 static const struct option GNU_longOptions[] =
89 COMMON_LONG_ARGS,
90 {"type", required_argument, NULL, RECORD_TYPE_ARG},
91 {"strict", no_argument, NULL, STRICT_ARG},
92 {"omit-empty", no_argument, NULL, OMIT_EMPTY_ARG},
93 {NULL, 0, NULL, 0}
97 * Functions.
100 void
101 recutl_print_help (void)
103 /* TRANSLATORS: --help output, csv2rec synopsis.
104 no-wrap */
105 printf (_("\
106 Usage: csv2rec [OPTIONS]... [CSV_FILE]\n"));
108 /* TRANSLATORS: --help output, csv2rec short description.
109 no-wrap */
110 fputs (_("\
111 Convert csv data into rec data.\n"), stdout);
113 puts ("");
114 /* TRANSLATORS: --help output, csv2rec options.
115 no-wrap */
116 fputs (_("\
117 -t, --type=TYPE type name for the converted records; if this\n\
118 parameter is ommited then no type is used.\n\
119 -s, --strict be strict parsing the csv file.\n\
120 -e, --omit-empty omit empty fields.\n"), stdout);
122 recutl_print_help_common ();
123 puts ("");
124 recutl_print_help_footer ();
127 static void
128 parse_args (int argc,
129 char **argv)
131 int ret;
132 char c;
134 while ((ret = getopt_long (argc,
135 argv,
136 "t:se",
137 GNU_longOptions,
138 NULL)) != -1)
140 c = ret;
141 switch (c)
143 COMMON_ARGS_CASES
144 case RECORD_TYPE_ARG:
145 case 't':
147 csv2rec_record_type = xstrdup (optarg);
148 break;
150 case STRICT_ARG:
151 case 's':
153 csv2rec_strict = true;
154 break;
156 case OMIT_EMPTY_ARG:
157 case 'e':
159 csv2rec_omit_empty = true;
160 break;
162 default:
164 exit (EXIT_FAILURE);
169 /* Read the name of the csv file, if any. */
170 if (optind < argc)
172 if ((argc - optind) != 1)
174 recutl_print_help ();
175 exit (EXIT_FAILURE);
178 csv2rec_csv_file = argv[optind++];
182 static int
183 is_space (unsigned char c)
185 return (c == CSV_SPACE) || (c == CSV_TAB);
188 static int
189 is_term (unsigned char c)
191 return (c == CSV_CR) || (c == CSV_LF);
194 void
195 field_cb (void *s, size_t len, void *data)
197 char *str;
198 char *field_name;
199 rec_field_t field;
200 struct csv2rec_ctx *ctx;
201 size_t i;
203 ctx = (struct csv2rec_ctx *) data;
204 str = xmalloc (len + 1);
205 memcpy (str, s, len);
206 str[len] = '\0';
208 if (ctx->header_p)
210 /* Add a new field name to ctx.field_names. */
212 if ((ctx->num_field_names % ALLOC_FIELDS) == 0)
213 ctx->field_names =
214 realloc (ctx->field_names, ((ctx->num_field_names / ALLOC_FIELDS) + 1) * (sizeof(char *) * ALLOC_FIELDS));
216 /* Normalize the name: spaces and tabs are turned into dashes
217 '_'. */
218 for (i = 0; i < strlen (str); i++)
220 if ((str[i] == ' ') || (str[i] == '\t'))
222 str[i] = '_';
226 /* Verify that it is a valid field name. */
227 field_name = str;
228 if (!rec_field_name_p (field_name))
230 recutl_fatal (_("invalid field name '%s' in header\n"),
231 str);
233 ctx->field_names[ctx->num_field_names++] = str;
235 else
237 /* Create a new field and insert it in the current record. */
239 if (!ctx->record)
241 /* Create a new record. */
242 ctx->record = rec_record_new ();
243 if (!ctx->record)
244 recutl_out_of_memory ();
247 if (!csv2rec_omit_empty || (strlen(str) > 0))
249 if (ctx->num_fields > ctx->num_field_names)
251 char *source = csv2rec_csv_file;
253 if (!source)
255 source = "stdin";
258 fprintf (stderr,
259 _("%s: %lu: this line contains %lu fields, but %lu header fields were read\n"),
260 source,
261 ctx->lineno, ctx->num_field_names, ctx->num_fields);
262 exit (EXIT_FAILURE);
264 field = rec_field_new (ctx->field_names[ctx->num_fields], str);
265 rec_mset_append (rec_record_mset (ctx->record), MSET_FIELD, (void *) field, MSET_ANY);
268 ctx->num_fields++;
272 void
273 record_cb (int c, void *data)
275 struct csv2rec_ctx *ctx;
276 ctx = (struct csv2rec_ctx *) data;
278 ctx->lineno++;
280 if (ctx->header_p)
282 ctx->header_p = false;
284 else
286 if (!ctx->rset)
288 /* Create a new record set. */
289 ctx->rset = rec_rset_new ();
290 if (!ctx->rset)
291 recutl_out_of_memory ();
293 /* Add a type, if needed. */
294 if (csv2rec_record_type)
296 rec_rset_set_type (ctx->rset, csv2rec_record_type);
299 /* Add it to the database. */
300 if (!ctx->db)
302 ctx->db = rec_db_new ();
303 if (!ctx->db)
304 recutl_out_of_memory ();
306 rec_db_insert_rset (ctx->db, ctx->rset, rec_db_size (ctx->db));
309 /* Add the current record to the record set. */
310 rec_mset_append (rec_rset_mset (ctx->rset), MSET_RECORD, (void *) ctx->record, MSET_ANY);
311 ctx->record = NULL;
313 /* Reset the field counter. */
314 ctx->num_fields = 0;
318 static rec_db_t
319 process_csv (void)
321 struct csv2rec_ctx ctx;
322 FILE *in;
323 struct csv_parser p;
324 unsigned char options = 0;
325 char buf[1024];
326 size_t bytes_read = 0;
328 /* Initialize the data in the context. */
329 ctx.db = NULL;
330 ctx.rset = NULL;
331 ctx.record = NULL;
332 ctx.header_p = true;
333 ctx.field_names = NULL;
334 ctx.num_field_names = 0;
335 ctx.num_fields = 0;
336 ctx.lineno = 0;
338 /* Set the files to read/write from/to.
340 If a filename was specified, read the csv file from there.
341 Otherwise use the standard input. The output is written to the
342 standard output in any case. */
343 if (csv2rec_csv_file)
345 if (!(in = fopen (csv2rec_csv_file, "r")))
347 recutl_fatal (_("cannot read file %s\n"), csv2rec_csv_file);
350 else
352 in = stdin;
355 /* Initialize the csv library. */
356 if (csv_init (&p, options) != 0)
358 recutl_fatal (_("failed to initialize csv parser\n"));
361 /* Set some properties of the parser. */
362 if (csv2rec_strict)
364 options |= CSV_STRICT;
365 csv_set_opts (&p, options);
368 csv_set_space_func (&p, is_space);
369 csv_set_term_func (&p, is_term);
371 /* Parse the input file in chunks of data. */
372 while ((bytes_read = fread (buf, 1, 1024, in)) > 0)
374 if (csv_parse (&p, buf, bytes_read, field_cb, record_cb, &ctx) != bytes_read)
376 recutl_fatal (_("error while parsing CSV file: %s\n"),
377 csv_strerror (csv_error (&p)));
382 return ctx.db;
386 main (int argc, char *argv[])
388 int ret;
389 rec_db_t db;
390 rec_writer_t writer;
392 recutl_init ("csv2rec");
394 parse_args (argc, argv);
395 db = process_csv ();
396 ret = EXIT_SUCCESS;
398 if (db)
400 writer = rec_writer_new (stdout);
401 rec_write_db (writer, db);
403 rec_writer_destroy (writer);
404 rec_db_destroy (db);
406 else
408 ret = EXIT_FAILURE;
411 return ret;
414 /* End of csv2rec.c */