4 * Date: Fri Aug 20 16:35:25 2010
6 * GNU recutils - csv to rec converter.
10 /* Copyright (C) 2010-2019 Jose E. Marchesi */
12 /* This program is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation, either version 3 of the License, or
15 * (at your option) any later version.
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with this program. If not, see <http://www.gnu.org/licenses/>.
33 #define _(str) gettext (str)
39 /* Forward declarations. */
40 static void parse_args (int argc
, char **argv
);
41 static rec_db_t
process_csv (void);
42 static int is_space (unsigned char c
);
43 static int is_term (unsigned char c
);
44 static void field_cb (void *s
, size_t len
, void *data
);
45 static void record_cb (int c
, void *data
);
60 #define ALLOC_FIELDS 256
62 size_t num_field_names
;
70 char *csv2rec_record_type
= NULL
;
71 char *csv2rec_csv_file
= NULL
;
72 bool csv2rec_strict
= false;
73 bool csv2rec_omit_empty
= false;
76 * Command line options management
87 static const struct option GNU_longOptions
[] =
90 {"type", required_argument
, NULL
, RECORD_TYPE_ARG
},
91 {"strict", no_argument
, NULL
, STRICT_ARG
},
92 {"omit-empty", no_argument
, NULL
, OMIT_EMPTY_ARG
},
101 recutl_print_help (void)
103 /* TRANSLATORS: --help output, csv2rec synopsis.
106 Usage: csv2rec [OPTIONS]... [CSV_FILE]\n"));
108 /* TRANSLATORS: --help output, csv2rec short description.
111 Convert csv data into rec data.\n"), stdout
);
114 /* TRANSLATORS: --help output, csv2rec options.
117 -t, --type=TYPE type name for the converted records; if this\n\
118 parameter is ommited then no type is used.\n\
119 -s, --strict be strict parsing the csv file.\n\
120 -e, --omit-empty omit empty fields.\n"), stdout
);
122 recutl_print_help_common ();
124 recutl_print_help_footer ();
128 parse_args (int argc
,
134 while ((ret
= getopt_long (argc
,
144 case RECORD_TYPE_ARG
:
147 csv2rec_record_type
= xstrdup (optarg
);
153 csv2rec_strict
= true;
159 csv2rec_omit_empty
= true;
169 /* Read the name of the csv file, if any. */
172 if ((argc
- optind
) != 1)
174 recutl_print_help ();
178 csv2rec_csv_file
= argv
[optind
++];
183 is_space (unsigned char c
)
185 return (c
== CSV_SPACE
) || (c
== CSV_TAB
);
189 is_term (unsigned char c
)
191 return (c
== CSV_CR
) || (c
== CSV_LF
);
195 field_cb (void *s
, size_t len
, void *data
)
200 struct csv2rec_ctx
*ctx
;
203 ctx
= (struct csv2rec_ctx
*) data
;
204 str
= xmalloc (len
+ 1);
205 memcpy (str
, s
, len
);
210 /* Add a new field name to ctx.field_names. */
212 if ((ctx
->num_field_names
% ALLOC_FIELDS
) == 0)
214 realloc (ctx
->field_names
, ((ctx
->num_field_names
/ ALLOC_FIELDS
) + 1) * (sizeof(char *) * ALLOC_FIELDS
));
216 /* Normalize the name: spaces and tabs are turned into dashes
218 for (i
= 0; i
< strlen (str
); i
++)
220 if ((str
[i
] == ' ') || (str
[i
] == '\t'))
226 /* Verify that it is a valid field name. */
228 if (!rec_field_name_p (field_name
))
230 recutl_fatal (_("invalid field name '%s' in header\n"),
233 ctx
->field_names
[ctx
->num_field_names
++] = str
;
237 /* Create a new field and insert it in the current record. */
241 /* Create a new record. */
242 ctx
->record
= rec_record_new ();
244 recutl_out_of_memory ();
247 if (!csv2rec_omit_empty
|| (strlen(str
) > 0))
249 if (ctx
->num_fields
> ctx
->num_field_names
)
251 char *source
= csv2rec_csv_file
;
259 _("%s: %lu: this line contains %lu fields, but %lu header fields were read\n"),
261 ctx
->lineno
, ctx
->num_field_names
, ctx
->num_fields
);
264 field
= rec_field_new (ctx
->field_names
[ctx
->num_fields
], str
);
265 rec_mset_append (rec_record_mset (ctx
->record
), MSET_FIELD
, (void *) field
, MSET_ANY
);
273 record_cb (int c
, void *data
)
275 struct csv2rec_ctx
*ctx
;
276 ctx
= (struct csv2rec_ctx
*) data
;
282 ctx
->header_p
= false;
288 /* Create a new record set. */
289 ctx
->rset
= rec_rset_new ();
291 recutl_out_of_memory ();
293 /* Add a type, if needed. */
294 if (csv2rec_record_type
)
296 rec_rset_set_type (ctx
->rset
, csv2rec_record_type
);
299 /* Add it to the database. */
302 ctx
->db
= rec_db_new ();
304 recutl_out_of_memory ();
306 rec_db_insert_rset (ctx
->db
, ctx
->rset
, rec_db_size (ctx
->db
));
309 /* Add the current record to the record set. */
310 rec_mset_append (rec_rset_mset (ctx
->rset
), MSET_RECORD
, (void *) ctx
->record
, MSET_ANY
);
313 /* Reset the field counter. */
321 struct csv2rec_ctx ctx
;
324 unsigned char options
= 0;
326 size_t bytes_read
= 0;
328 /* Initialize the data in the context. */
333 ctx
.field_names
= NULL
;
334 ctx
.num_field_names
= 0;
338 /* Set the files to read/write from/to.
340 If a filename was specified, read the csv file from there.
341 Otherwise use the standard input. The output is written to the
342 standard output in any case. */
343 if (csv2rec_csv_file
)
345 if (!(in
= fopen (csv2rec_csv_file
, "r")))
347 recutl_fatal (_("cannot read file %s\n"), csv2rec_csv_file
);
355 /* Initialize the csv library. */
356 if (csv_init (&p
, options
) != 0)
358 recutl_fatal (_("failed to initialize csv parser\n"));
361 /* Set some properties of the parser. */
364 options
|= CSV_STRICT
;
365 csv_set_opts (&p
, options
);
368 csv_set_space_func (&p
, is_space
);
369 csv_set_term_func (&p
, is_term
);
371 /* Parse the input file in chunks of data. */
372 while ((bytes_read
= fread (buf
, 1, 1024, in
)) > 0)
374 if (csv_parse (&p
, buf
, bytes_read
, field_cb
, record_cb
, &ctx
) != bytes_read
)
376 recutl_fatal (_("error while parsing CSV file: %s\n"),
377 csv_strerror (csv_error (&p
)));
386 main (int argc
, char *argv
[])
392 recutl_init ("csv2rec");
394 parse_args (argc
, argv
);
400 writer
= rec_writer_new (stdout
);
401 rec_write_db (writer
, db
);
403 rec_writer_destroy (writer
);
414 /* End of csv2rec.c */