src/csv.c

   1 /* csv - read write comma separated value format
   2  * Copyright (c) 2003 Michael B. Allen <mba2000 ioplex.com>
   3  *
   4  * The MIT License
   5  *
   6  * Permission is hereby granted, free of charge, to any person obtaining a
   7  * copy of this software and associated documentation files (the "Software"),
   8  * to deal in the Software without restriction, including without limitation
   9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10  * and/or sell copies of the Software, and to permit persons to whom the
  11  * Software is furnished to do so, subject to the following conditions:
  12  *
  13  * The above copyright notice and this permission notice shall be included
  14  * in all copies or substantial portions of the Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  22  * OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 /* We (Juergen Haas and Tomasz Motylewski) execute our rights given above
  26  * to distribute and sublicence this file (csv.c) and csv.h, csv_defines.h
  27  * under General Pulic Licence version 2 or any later version.
  28  *
  29  * This file is derived from libmba : A library of generic C modules
  30  * http://www.ioplex.com/~miallen/libmba/dl/libmba-0.8.9.tar.gz
  31  */
  32
  33 /** \file csv.c
  34     \brief Parsing support functions for the pick and place parser
  35     \ingroup libgerbv
  36 */
  37
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif /* HAVE_CONFIG_H */
  41
  42
  43 #include <stdlib.h>
  44 #include <string.h>
  45 #include <stdio.h>
  46 #include <ctype.h>
  47 #include <errno.h>
  48 #include <wchar.h>
  49 #include <wctype.h>
  50
  51 #include "gerbv.h"
  52 #include "csv.h"
  53 #include "csv_defines.h"
  54 #define ST_START     1
  55 #define ST_COLLECT   2
  56 #define ST_TAILSPACE 3
  57 #define ST_END_QUOTE 4
  58 #define istspace iswspace
  59
  60
  61 struct sinput {
  62         FILE *in;
  63         const char *src;
  64         size_t sn;
  65         size_t count;
  66 };
  67
  68
  69 struct winput {
  70         const wchar_t *src;
  71         size_t sn;
  72         size_t count;
  73 };
  74
  75
  76 static int
  77 snextch(struct sinput *in)
  78 {
  79         int ch;
  80
  81         if (in->in) {
  82                 if ((ch = fgetc(in->in)) == EOF) {
  83                         if (ferror(in->in)) {
  84                                 GERB_MESSAGE("errno:%d", errno);
  85                                 return -1;
  86                         }
  87                         return 0;
  88                 }
  89         } else {
  90                 if (in->sn == 0) {
  91                         return 0;
  92                 }
  93                 ch = (unsigned char) *(in->src)++;
  94                 in->sn--;
  95         }
  96         in->count++;
  97
  98         return ch;
  99 }/* snextch */
 100
 101
 102 static int
 103 wnextch(struct winput *in)
 104 {
 105         int ch;
 106
 107         if (in->sn == 0) {
 108                 return 0;
 109         }
 110         ch = *(in->src)++;
 111         in->sn--;
 112         in->count++;
 113
 114         return ch;
 115 }/* wnextch */
 116
 117 static int
 118 csv_parse_str(struct sinput *in, char *buf, size_t bn, char *row[], int rn, int sep, int flags)
 119 {
 120         int trim, quotes, ch, state, r, j, t, inquotes;
 121
 122         trim = flags & CSV_TRIM;
 123         quotes = flags & CSV_QUOTES;
 124         state = ST_START;
 125         inquotes = 0;
 126         ch = r = j = t = 0;
 127
 128         memset(row, 0, sizeof(char *) * rn);
 129
 130         while (rn && bn && (ch = snextch(in)) > 0) {
 131                 switch (state) {
 132                         case ST_START:
 133                                 if (ch != '\n' && ch != sep && isspace(ch)) {
 134                                         if (!trim) {
 135                                                 buf[j++] = ch; bn--;
 136                                                 t = j;
 137                                         }
 138                                         break;
 139                                 } else if (quotes && ch == '"') {
 140                                         j = t = 0;
 141                                         state = ST_COLLECT;
 142                                         inquotes = 1;
 143                                         break;
 144                                 }
 145                                 state = ST_COLLECT;
 146                         case ST_COLLECT:
 147                                 if (inquotes) {
 148                                         if (ch == '"') {
 149                                                 state = ST_END_QUOTE;
 150                                                 break;
 151                                         }
 152                                 } else if (ch == sep || ch == '\n') {
 153                                         row[r++] = buf; rn--;
 154                                         buf[t] = '\0'; bn--;
 155                                         buf += t + 1;
 156                                         j = t = 0;
 157
 158                                         state = ST_START;
 159                                         inquotes = 0;
 160                                         if (ch == '\n') {
 161                                                 rn = 0;
 162                                         }
 163                                         break;
 164                                 } else if (quotes && ch == '"') {
 165                                         errno = EILSEQ;
 166                                         GERB_MESSAGE("%d: unexpected quote in element",errno);
 167                                         return -1;
 168                                 }
 169                                 buf[j++] = ch; bn--;
 170                                 if (!trim || isspace(ch) == 0) {
 171                                         t = j;
 172                                 }
 173                                 break;
 174                         case ST_TAILSPACE:
 175                         case ST_END_QUOTE:
 176                                 if (ch == sep || ch == '\n') {
 177                                         row[r++] = buf; rn--;
 178                                         buf[j] = '\0'; bn--;
 179                                         buf += j + 1;
 180                                         j = t =  0;
 181                                         state = ST_START;
 182                                         inquotes = 0;
 183                                         if (ch == '\n') {
 184                                                 rn = 0;
 185                                         }
 186                                         break;
 187                                 } else if (quotes && ch == '"' && state != ST_TAILSPACE) {
 188                                         buf[j++] = '"'; bn--;            /* nope, just an escaped quote */
 189                                         t = j;
 190                                         state = ST_COLLECT;
 191                                         break;
 192                                 } else if (isspace(ch)) {
 193                                         state = ST_TAILSPACE;
 194                                         break;
 195                                 }
 196                                 errno = EILSEQ;
 197                                 GERB_MESSAGE("%d: bad end quote in element", errno);
 198                                 return -1;
 199                 }
 200         }
 201         if (ch <= 0) {
 202                 /* treat EOF as EOL, so the last record is accepted even when
 203                    \n is not present. Some users parse strings, not lines */
 204                 if(state == ST_TAILSPACE || state == ST_END_QUOTE
 205                         || (state == ST_COLLECT && ! inquotes)) {
 206                         row[r++] = buf; rn--;
 207                         buf[j] = '\0'; bn--;
 208                         buf += j + 1;
 209                         inquotes = 0;
 210                         rn = 0;
 211                 } else {
 212         //              AMSG("");
 213                         return -1;
 214                 }
 215         }
 216         if (bn == 0) {
 217                 errno = E2BIG;
 218                 GERB_MESSAGE("E2BIG %d ", errno);
 219                 return -1;
 220         }
 221         if (rn) {
 222                 if (inquotes) {
 223                         errno = EILSEQ;
 224                         GERB_MESSAGE("EILSEQ %d ", errno);
 225                         return -1;
 226                 }
 227                 row[r] = buf;
 228                 buf[t] = '\0';
 229         }
 230         // return error if we can't read the minimum number of fields
 231         if (r < 4) {
 232                 return -1;
 233         }
 234         return in->count;
 235 }/* csv_parse_str */
 236
 237
 238 static int
 239 csv_parse_wcs(struct winput *in, wchar_t *buf, size_t bn, wchar_t *row[], int rn, wint_t sep, int flags)
 240 {
 241         int trim, quotes, state, r, j, t, inquotes;
 242         wint_t ch;
 243
 244         trim = flags & CSV_TRIM;
 245         quotes = flags & CSV_QUOTES;
 246         state = ST_START;
 247         inquotes = 0;
 248         ch = r = j = t = 0;
 249
 250         memset(row, 0, sizeof(wchar_t *) * rn);
 251
 252         while (rn && bn && (ch = wnextch(in)) > 0) {
 253                 switch (state) {
 254                         case ST_START:
 255                                 if (ch != L'\n' && ch != sep && iswspace(ch)) {
 256                                         if (!trim) {
 257                                                 buf[j++] = ch; bn--;
 258                                                 t = j;
 259                                         }
 260                                         break;
 261                                 } else if (quotes && ch == L'"') {
 262                                         j = t = 0;
 263                                         state = ST_COLLECT;
 264                                         inquotes = 1;
 265                                         break;
 266                                 }
 267                                 state = ST_COLLECT;
 268                         case ST_COLLECT:
 269                                 if (inquotes) {
 270                                         if (ch == L'"') {
 271                                                 state = ST_END_QUOTE;
 272                                                 break;
 273                                         }
 274                                 } else if (ch == sep || ch == L'\n') {
 275                                         row[r++] = buf; rn--;
 276                                         buf[t] = L'\0'; bn--;
 277                                         buf += t + 1;
 278                                         j = t = 0;
 279                                         state = ST_START;
 280                                         inquotes = 0;
 281                                         if (ch == L'\n') {
 282                                                 rn = 0;
 283                                         }
 284                                         break;
 285                                 } else if (quotes && ch == L'"') {
 286                                         errno = EILSEQ;
 287                                         GERB_MESSAGE("%d: unexpected quote in element", errno);
 288                                         return -1;
 289                                 }
 290                                 buf[j++] = ch; bn--;
 291                                 if (!trim || iswspace(ch) == 0) {
 292                                         t = j;
 293                                 }
 294                                 break;
 295                         case ST_TAILSPACE:
 296                         case ST_END_QUOTE:
 297                                 if (ch == sep || ch == L'\n') {
 298                                         row[r++] = buf; rn--;
 299                                         buf[j] = L'\0'; bn--;
 300                                         buf += j + 1;
 301                                         j = t =  0;
 302                                         state = ST_START;
 303                                         inquotes = 0;
 304                                         if (ch == L'\n') {
 305                                                 rn = 0;
 306                                         }
 307                                         break;
 308                                 } else if (quotes && ch == L'"' && state != ST_TAILSPACE) {
 309                                         buf[j++] = L'"'; bn--;           /* nope, just an escaped quote */
 310                                         t = j;
 311                                         state = ST_COLLECT;
 312                                         break;
 313                                 } else if (iswspace(ch)) {
 314                                         state = ST_TAILSPACE;
 315                                         break;
 316                                 }
 317                                 errno = EILSEQ;
 318                                 GERB_MESSAGE("%d: bad end quote in element ", errno);
 319                                 return -1;
 320                 }
 321         }
 322         if (ch <= 0) {
 323                 /* treat EOF as EOL, so the last record is accepted even when
 324                    \n is not present. Some users parse strings, not lines */
 325                 if(state == ST_TAILSPACE || state == ST_END_QUOTE
 326                         || (state == ST_COLLECT && ! inquotes)) {
 327                         row[r++] = buf; rn--;
 328                         buf[j] = L'\0'; bn--;
 329                         buf += j + 1;
 330                         inquotes = 0;
 331                         rn = 0;
 332                 } else {
 333         //              AMSG("");
 334                         return -1;
 335                 }
 336         }
 337         if (bn == 0) {
 338                 errno = E2BIG;
 339         GERB_MESSAGE("%d", errno);
 340                 return -1;
 341         }
 342         if (rn) {
 343                 if (inquotes) {
 344                         errno = EILSEQ;
 345                 GERB_MESSAGE("%d", errno);
 346                         return -1;
 347                 }
 348                 row[r] = buf;
 349                 buf[t] = L'\0';
 350         }
 351
 352         return in->count;
 353 }/*csv_row_parse_wcs*/
 354
 355
 356 int
 357 csv_row_parse_wcs(const wchar_t *src, size_t sn, wchar_t *buf, size_t bn, wchar_t *row[], int rn, int sep, int trim)
 358 {
 359         struct winput input;
 360         input.src = src;
 361         input.sn = sn;
 362         input.count = 0;
 363         return csv_parse_wcs(&input, buf, bn, row, rn, (wint_t)sep, trim);
 364 }/*csv_row_parse_wcs*/
 365
 366
 367 int
 368 csv_row_parse_str(const char *src, size_t sn, char *buf, size_t bn, char *row[], int rn, int sep, int trim)
 369 {
 370         struct sinput input;
 371         input.in = NULL;
 372         input.src = src;
 373         input.sn = sn;
 374         input.count = 0;
 375         return csv_parse_str(&input, buf, bn, row, rn, sep, trim);
 376 }/*csv_row_parse_str*/
 377
 378
 379 int
 380 csv_row_fread(FILE *in, char *buf, size_t bn, char *row[], int numcols, int sep, int trim)
 381 {
 382         struct sinput input;
 383         input.in = in;
 384         input.count = 0;
 385         return csv_parse_str(&input, buf, bn, row, numcols, sep, trim);
 386 }/*csv_row_fread*/
 387