1 /* csv - read write comma separated value format
2 * Copyright (c) 2003 Michael B. Allen <mba2000 ioplex.com>
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
25 /* We (Juergen Haas and Tomasz Motylewski) execute our rights given above
26 * to distribute and sublicence this file (csv.c) and csv.h, csv_defines.h
27 * under General Pulic Licence version 2 or any later version.
29 * This file is derived from libmba : A library of generic C modules
30 * http://www.ioplex.com/~miallen/libmba/dl/libmba-0.8.9.tar.gz
34 \brief Parsing support functions for the pick and place parser
40 #endif /* HAVE_CONFIG_H */
53 #include "csv_defines.h"
56 #define ST_TAILSPACE 3
57 #define ST_END_QUOTE 4
58 #define istspace iswspace
77 snextch(struct sinput
*in
)
82 if ((ch
= fgetc(in
->in
)) == EOF
) {
84 GERB_MESSAGE("errno:%d", errno
);
93 ch
= (unsigned char) *(in
->src
)++;
103 wnextch(struct winput
*in
)
118 csv_parse_str(struct sinput
*in
, char *buf
, size_t bn
, char *row
[], int rn
, int sep
, int flags
)
120 int trim
, quotes
, ch
, state
, r
, j
, t
, inquotes
;
122 trim
= flags
& CSV_TRIM
;
123 quotes
= flags
& CSV_QUOTES
;
128 memset(row
, 0, sizeof(char *) * rn
);
130 while (rn
&& bn
&& (ch
= snextch(in
)) > 0) {
133 if (ch
!= '\n' && ch
!= sep
&& isspace(ch
)) {
139 } else if (quotes
&& ch
== '"') {
149 state
= ST_END_QUOTE
;
152 } else if (ch
== sep
|| ch
== '\n') {
153 row
[r
++] = buf
; rn
--;
164 } else if (quotes
&& ch
== '"') {
166 GERB_MESSAGE("%d: unexpected quote in element",errno
);
170 if (!trim
|| isspace(ch
) == 0) {
176 if (ch
== sep
|| ch
== '\n') {
177 row
[r
++] = buf
; rn
--;
187 } else if (quotes
&& ch
== '"' && state
!= ST_TAILSPACE
) {
188 buf
[j
++] = '"'; bn
--; /* nope, just an escaped quote */
192 } else if (isspace(ch
)) {
193 state
= ST_TAILSPACE
;
197 GERB_MESSAGE("%d: bad end quote in element", errno
);
202 /* treat EOF as EOL, so the last record is accepted even when
203 \n is not present. Some users parse strings, not lines */
204 if(state
== ST_TAILSPACE
|| state
== ST_END_QUOTE
205 || (state
== ST_COLLECT
&& ! inquotes
)) {
206 row
[r
++] = buf
; rn
--;
218 GERB_MESSAGE("E2BIG %d ", errno
);
224 GERB_MESSAGE("EILSEQ %d ", errno
);
230 // return error if we can't read the minimum number of fields
239 csv_parse_wcs(struct winput
*in
, wchar_t *buf
, size_t bn
, wchar_t *row
[], int rn
, wint_t sep
, int flags
)
241 int trim
, quotes
, state
, r
, j
, t
, inquotes
;
244 trim
= flags
& CSV_TRIM
;
245 quotes
= flags
& CSV_QUOTES
;
250 memset(row
, 0, sizeof(wchar_t *) * rn
);
252 while (rn
&& bn
&& (ch
= wnextch(in
)) > 0) {
255 if (ch
!= L
'\n' && ch
!= sep
&& iswspace(ch
)) {
261 } else if (quotes
&& ch
== L
'"') {
271 state
= ST_END_QUOTE
;
274 } else if (ch
== sep
|| ch
== L
'\n') {
275 row
[r
++] = buf
; rn
--;
276 buf
[t
] = L
'\0'; bn
--;
285 } else if (quotes
&& ch
== L
'"') {
287 GERB_MESSAGE("%d: unexpected quote in element", errno
);
291 if (!trim
|| iswspace(ch
) == 0) {
297 if (ch
== sep
|| ch
== L
'\n') {
298 row
[r
++] = buf
; rn
--;
299 buf
[j
] = L
'\0'; bn
--;
308 } else if (quotes
&& ch
== L
'"' && state
!= ST_TAILSPACE
) {
309 buf
[j
++] = L
'"'; bn
--; /* nope, just an escaped quote */
313 } else if (iswspace(ch
)) {
314 state
= ST_TAILSPACE
;
318 GERB_MESSAGE("%d: bad end quote in element ", errno
);
323 /* treat EOF as EOL, so the last record is accepted even when
324 \n is not present. Some users parse strings, not lines */
325 if(state
== ST_TAILSPACE
|| state
== ST_END_QUOTE
326 || (state
== ST_COLLECT
&& ! inquotes
)) {
327 row
[r
++] = buf
; rn
--;
328 buf
[j
] = L
'\0'; bn
--;
339 GERB_MESSAGE("%d", errno
);
345 GERB_MESSAGE("%d", errno
);
353 }/*csv_row_parse_wcs*/
357 csv_row_parse_wcs(const wchar_t *src
, size_t sn
, wchar_t *buf
, size_t bn
, wchar_t *row
[], int rn
, int sep
, int trim
)
363 return csv_parse_wcs(&input
, buf
, bn
, row
, rn
, (wint_t)sep
, trim
);
364 }/*csv_row_parse_wcs*/
368 csv_row_parse_str(const char *src
, size_t sn
, char *buf
, size_t bn
, char *row
[], int rn
, int sep
, int trim
)
375 return csv_parse_str(&input
, buf
, bn
, row
, rn
, sep
, trim
);
376 }/*csv_row_parse_str*/
380 csv_row_fread(FILE *in
, char *buf
, size_t bn
, char *row
[], int numcols
, int sep
, int trim
)
385 return csv_parse_str(&input
, buf
, bn
, row
, numcols
, sep
, trim
);