2 * libcsv - parse and write csv data
4 * Original Author: Robert Gamble.
6 * Copyright (C) 2008 Robert Gamble
7 * Copyright (C) 2010-2015 Jose E. Marchesi
10 /* This program is free software: you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License version
12 * 3 as published by the Free Software Foundation.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program. If not, see <http://www.gnu.org/licenses/>.
23 #if ___STDC_VERSION__ >= 199901L
26 # define SIZE_MAX ((size_t)-1) /* C89 doesn't have stdint.h or SIZE_MAX */
33 #define LIBCSV_VERSION "3.0.0-recutils"
35 #define ROW_NOT_BEGUN 0
36 #define FIELD_NOT_BEGUN 1
38 #define FIELD_MIGHT_HAVE_ENDED 3
42 ROW_NOT_BEGUN There have not been any fields encountered for this row
43 FIELD_NOT_BEGUN There have been fields but we are currently not in one
44 FIELD_BEGUN We are in a field
45 FIELD_MIGHT_HAVE_ENDED
46 We encountered a double quote inside a quoted field, the
47 field is either ended or the quote is literal
50 #define MEM_BLK_SIZE 128
52 #define SUBMIT_FIELD(p) \
55 entry_pos -= spaces; \
56 if (p->options & CSV_APPEND_NULL) \
57 ((p)->entry_buf[entry_pos+1]) = '\0'; \
59 cb1(p->entry_buf, entry_pos, data); \
60 pstate = FIELD_NOT_BEGUN; \
61 entry_pos = quoted = spaces = 0; \
64 #define SUBMIT_ROW(p, c) \
68 pstate = ROW_NOT_BEGUN; \
69 entry_pos = quoted = spaces = 0; \
72 #define SUBMIT_CHAR(p, c) ((p)->entry_buf[entry_pos++] = (c))
74 static char *csv_errors
[] = {"success",
75 "error parsing data while strict checking enabled",
76 "memory exhausted while increasing buffer size",
77 "data size too large",
78 "invalid status code"};
81 csv_error(struct csv_parser
*p
)
83 /* Return the current status of the parser */
88 csv_strerror(int status
)
90 /* Return a textual description of status */
91 if (status
>= CSV_EINVALID
|| status
< 0)
92 return csv_errors
[CSV_EINVALID
];
94 return csv_errors
[status
];
98 csv_get_opts(struct csv_parser
*p
)
100 /* Return the currently set options of parser */
108 csv_set_opts(struct csv_parser
*p
, unsigned char options
)
110 /* Set the options */
114 p
->options
= options
;
119 csv_init(struct csv_parser
*p
, unsigned char options
)
121 /* Initialize a csv_parser object returns 0 on success, -1 on error */
126 p
->pstate
= ROW_NOT_BEGUN
;
132 p
->options
= options
;
133 p
->quote_char
= CSV_QUOTE
;
134 p
->delim_char
= CSV_COMMA
;
137 p
->blk_size
= MEM_BLK_SIZE
;
138 p
->malloc_func
= NULL
;
139 p
->realloc_func
= realloc
;
146 csv_free(struct csv_parser
*p
)
148 /* Free the entry_buffer of csv_parser object */
153 p
->free_func(p
->entry_buf
);
162 csv_fini(struct csv_parser
*p
, void (*cb1
)(void *, size_t, void *), void (*cb2
)(int c
, void *), void *data
)
164 /* Finalize parsing. Needed, for example, when file does not end in a newline */
165 int quoted
= p
->quoted
;
166 int pstate
= p
->pstate
;
167 size_t spaces
= p
->spaces
;
168 size_t entry_pos
= p
->entry_pos
;
174 if (p
->pstate
== FIELD_BEGUN
&& p
->quoted
&& p
->options
& CSV_STRICT
&& p
->options
& CSV_STRICT_FINI
) {
175 /* Current field is quoted, no end-quote was seen, and CSV_STRICT_FINI is set */
176 p
->status
= CSV_EPARSE
;
181 case FIELD_MIGHT_HAVE_ENDED
:
182 p
->entry_pos
-= p
->spaces
+ 1; /* get rid of spaces and original quote */
184 case FIELD_NOT_BEGUN
:
186 quoted
= p
->quoted
, pstate
= p
->pstate
;
187 spaces
= p
->spaces
, entry_pos
= p
->entry_pos
;
190 case ROW_NOT_BEGUN
: /* Already ended properly */
195 p
->spaces
= p
->quoted
= p
->entry_pos
= p
->status
= 0;
196 p
->pstate
= ROW_NOT_BEGUN
;
202 csv_set_delim(struct csv_parser
*p
, unsigned char c
)
204 /* Set the delimiter */
205 if (p
) p
->delim_char
= c
;
209 csv_set_quote(struct csv_parser
*p
, unsigned char c
)
211 /* Set the quote character */
212 if (p
) p
->quote_char
= c
;
216 csv_get_delim(struct csv_parser
*p
)
218 /* Get the delimiter */
219 return p
->delim_char
;
223 csv_get_quote(struct csv_parser
*p
)
225 /* Get the quote character */
226 return p
->quote_char
;
230 csv_set_space_func(struct csv_parser
*p
, int (*f
)(unsigned char))
232 /* Set the space function */
233 if (p
) p
->is_space
= f
;
237 csv_set_term_func(struct csv_parser
*p
, int (*f
)(unsigned char))
239 /* Set the term function */
240 if (p
) p
->is_term
= f
;
244 csv_set_realloc_func(struct csv_parser
*p
, void *(*f
)(void *, size_t))
246 /* Set the realloc function used to increase buffer size */
247 if (p
&& f
) p
->realloc_func
= f
;
251 csv_set_free_func(struct csv_parser
*p
, void (*f
)(void *))
253 /* Set the free function used to free the buffer */
254 if (p
&& f
) p
->free_func
= f
;
258 csv_set_blk_size(struct csv_parser
*p
, size_t size
)
260 /* Set the block size used to increment buffer size */
261 if (p
) p
->blk_size
= size
;
265 csv_get_buffer_size(struct csv_parser
*p
)
267 /* Get the size of the entry buffer */
269 return p
->entry_size
;
274 csv_increase_buffer(struct csv_parser
*p
)
276 /* Increase the size of the entry buffer. Attempt to increase size by
277 * p->blk_size, if this is larger than SIZE_MAX try to increase current
278 * buffer size to SIZE_MAX. If allocation fails, try to allocate halve
279 * the size and try again until successful or increment size is zero.
282 size_t to_add
= p
->blk_size
;
285 if ( p
->entry_size
>= SIZE_MAX
- to_add
)
286 to_add
= SIZE_MAX
- p
->entry_size
;
289 p
->status
= CSV_ETOOBIG
;
293 while ((vp
= p
->realloc_func(p
->entry_buf
, p
->entry_size
+ to_add
)) == NULL
) {
296 p
->status
= CSV_ENOMEM
;
301 /* Update entry buffer pointer and entry_size if successful */
303 p
->entry_size
+= to_add
;
308 csv_parse(struct csv_parser
*p
, const void *s
, size_t len
, void (*cb1
)(void *, size_t, void *), void (*cb2
)(int c
, void *), void *data
)
310 unsigned const char *us
= s
; /* Access input data as array of unsigned char */
311 unsigned char c
; /* The character we are currently processing */
312 size_t pos
= 0; /* The number of characters we have processed in this call */
314 /* Store key fields into local variables for performance */
315 unsigned char delim
= p
->delim_char
;
316 unsigned char quote
= p
->quote_char
;
317 int (*is_space
)(unsigned char) = p
->is_space
;
318 int (*is_term
)(unsigned char) = p
->is_term
;
319 int quoted
= p
->quoted
;
320 int pstate
= p
->pstate
;
321 size_t spaces
= p
->spaces
;
322 size_t entry_pos
= p
->entry_pos
;
325 if (!p
->entry_buf
&& pos
< len
) {
326 /* Buffer hasn't been allocated yet and len > 0 */
327 if (csv_increase_buffer(p
) != 0) {
328 p
->quoted
= quoted
, p
->pstate
= pstate
, p
->spaces
= spaces
, p
->entry_pos
= entry_pos
;
334 /* Check memory usage, increase buffer if neccessary */
335 if (entry_pos
== ((p
->options
& CSV_APPEND_NULL
) ? p
->entry_size
- 1 : p
->entry_size
) ) {
336 if (csv_increase_buffer(p
) != 0) {
337 p
->quoted
= quoted
, p
->pstate
= pstate
, p
->spaces
= spaces
, p
->entry_pos
= entry_pos
;
346 case FIELD_NOT_BEGUN
:
347 if (is_space
? is_space(c
) : c
== CSV_SPACE
|| c
== CSV_TAB
) { /* Space or Tab */
349 } else if (is_term
? is_term(c
) : c
== CSV_CR
|| c
== CSV_LF
) { /* Carriage Return or Line Feed */
350 if (pstate
== FIELD_NOT_BEGUN
) {
352 SUBMIT_ROW(p
, (unsigned char)c
);
353 } else { /* ROW_NOT_BEGUN */
354 /* Don't submit empty rows by default */
355 if (p
->options
& CSV_REPALL_NL
) {
356 SUBMIT_ROW(p
, (unsigned char)c
);
360 } else if (c
== delim
) { /* Comma */
363 } else if (c
== quote
) { /* Quote */
364 pstate
= FIELD_BEGUN
;
366 } else { /* Anything else */
367 pstate
= FIELD_BEGUN
;
373 if (c
== quote
) { /* Quote */
376 pstate
= FIELD_MIGHT_HAVE_ENDED
;
378 /* STRICT ERROR - double quote inside non-quoted field */
379 if (p
->options
& CSV_STRICT
) {
380 p
->status
= CSV_EPARSE
;
381 p
->quoted
= quoted
, p
->pstate
= pstate
, p
->spaces
= spaces
, p
->entry_pos
= entry_pos
;
387 } else if (c
== delim
) { /* Comma */
393 } else if (is_term
? is_term(c
) : c
== CSV_CR
|| c
== CSV_LF
) { /* Carriage Return or Line Feed */
396 SUBMIT_ROW(p
, (unsigned char)c
);
400 } else if (!quoted
&& (is_space
? is_space(c
) : c
== CSV_SPACE
|| c
== CSV_TAB
)) { /* Tab or space for non-quoted field */
403 } else { /* Anything else */
408 case FIELD_MIGHT_HAVE_ENDED
:
409 /* This only happens when a quote character is encountered in a quoted field */
410 if (c
== delim
) { /* Comma */
411 entry_pos
-= spaces
+ 1; /* get rid of spaces and original quote */
413 } else if (is_term
? is_term(c
) : c
== CSV_CR
|| c
== CSV_LF
) { /* Carriage Return or Line Feed */
414 entry_pos
-= spaces
+ 1; /* get rid of spaces and original quote */
416 SUBMIT_ROW(p
, (unsigned char)c
);
417 } else if (is_space
? is_space(c
) : c
== CSV_SPACE
|| c
== CSV_TAB
) { /* Space or Tab */
420 } else if (c
== quote
) { /* Quote */
422 /* STRICT ERROR - unescaped double quote */
423 if (p
->options
& CSV_STRICT
) {
424 p
->status
= CSV_EPARSE
;
425 p
->quoted
= quoted
, p
->pstate
= pstate
, p
->spaces
= spaces
, p
->entry_pos
= entry_pos
;
431 /* Two quotes in a row */
432 pstate
= FIELD_BEGUN
;
434 } else { /* Anything else */
435 /* STRICT ERROR - unescaped double quote */
436 if (p
->options
& CSV_STRICT
) {
437 p
->status
= CSV_EPARSE
;
438 p
->quoted
= quoted
, p
->pstate
= pstate
, p
->spaces
= spaces
, p
->entry_pos
= entry_pos
;
441 pstate
= FIELD_BEGUN
;
450 p
->quoted
= quoted
, p
->pstate
= pstate
, p
->spaces
= spaces
, p
->entry_pos
= entry_pos
;
455 csv_write (void *dest
, size_t dest_size
, const void *src
, size_t src_size
)
457 unsigned char *cdest
= dest
;
458 const unsigned char *csrc
= src
;
473 if (dest_size
> chars
)
475 if (chars
< SIZE_MAX
) chars
++;
477 if (dest_size
> chars
)
479 if (chars
< SIZE_MAX
) chars
++;
484 if (dest_size
> chars
)
486 if (chars
< SIZE_MAX
) chars
++;
492 csv_fwrite (FILE *fp
, const void *src
, size_t src_size
)
494 const unsigned char *csrc
= src
;
496 if (fp
== NULL
|| src
== NULL
)
499 if (fputc('"', fp
) == EOF
)
504 if (fputc('"', fp
) == EOF
)
507 if (fputc(*csrc
, fp
) == EOF
)
513 if (fputc('"', fp
) == EOF
) {
521 csv_write2 (void *dest
, size_t dest_size
, const void *src
, size_t src_size
, unsigned char quote
)
523 unsigned char *cdest
= dest
;
524 const unsigned char *csrc
= src
;
538 if (*csrc
== quote
) {
539 if (dest_size
> chars
)
541 if (chars
< SIZE_MAX
) chars
++;
543 if (dest_size
> chars
)
545 if (chars
< SIZE_MAX
) chars
++;
550 if (dest_size
> chars
)
552 if (chars
< SIZE_MAX
) chars
++;
558 csv_fwrite2 (FILE *fp
, const void *src
, size_t src_size
, unsigned char quote
)
560 const unsigned char *csrc
= src
;
562 if (fp
== NULL
|| src
== NULL
)
565 if (fputc(quote
, fp
) == EOF
)
569 if (*csrc
== quote
) {
570 if (fputc(quote
, fp
) == EOF
)
573 if (fputc(*csrc
, fp
) == EOF
)
579 if (fputc(quote
, fp
) == EOF
) {
586 /* End of libcsv.c */