1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/csv-file-writer.h"
29 #include "data/calendar.h"
30 #include "data/case.h"
31 #include "data/casewriter-provider.h"
32 #include "data/casewriter.h"
33 #include "data/data-out.h"
34 #include "data/dictionary.h"
35 #include "data/file-handle-def.h"
36 #include "data/format.h"
37 #include "data/make-file.h"
38 #include "data/missing-values.h"
39 #include "data/settings.h"
40 #include "data/value-labels.h"
41 #include "data/variable.h"
42 #include "libpspp/assertion.h"
43 #include "libpspp/i18n.h"
44 #include "libpspp/message.h"
45 #include "libpspp/str.h"
47 #include "gl/ftoastr.h"
48 #include "gl/minmax.h"
49 #include "gl/unlocked-io.h"
50 #include "gl/xalloc.h"
53 #define _(msgid) gettext (msgid)
54 #define N_(msgid) (msgid)
56 /* A variable in a CSV file. */
59 int width
; /* Variable width (0 to 32767). */
60 int case_index
; /* Index into case. */
61 struct fmt_spec format
; /* Print format. */
62 struct missing_values missing
; /* User-missing values, if recoding. */
63 struct val_labs
*val_labs
; /* Value labels, if any and they are in use. */
66 /* Comma-separated value (CSV) file writer. */
69 struct file_handle
*fh
; /* File handle. */
70 struct fh_lock
*lock
; /* Mutual exclusion for file. */
71 FILE *file
; /* File stream. */
72 struct replace_file
*rf
; /* Ticket for replacing output file. */
74 struct csv_writer_options opts
;
76 char *encoding
; /* Encoding used by variables. */
79 struct csv_var
*csv_vars
; /* Variables. */
80 size_t n_csv_vars
; /* Number of variables. */
83 static const struct casewriter_class csv_file_casewriter_class
;
85 static void write_var_names (struct csv_writer
*, const struct dictionary
*);
87 static bool write_error (const struct csv_writer
*);
88 static bool close_writer (struct csv_writer
*);
90 /* Initializes OPTS with default options for writing a CSV file. */
92 csv_writer_options_init (struct csv_writer_options
*opts
)
94 opts
->recode_user_missing
= false;
95 opts
->include_var_names
= false;
96 opts
->use_value_labels
= false;
97 opts
->use_print_formats
= false;
98 opts
->decimal
= settings_get_decimal_char (FMT_F
);
99 opts
->delimiter
= ',';
100 opts
->qualifier
= '"';
103 /* Opens the CSV file designated by file handle FH for writing cases from
104 dictionary DICT according to the given OPTS.
106 No reference to D is retained, so it may be modified or
107 destroyed at will after this function returns. */
109 csv_writer_open (struct file_handle
*fh
, const struct dictionary
*dict
,
110 const struct csv_writer_options
*opts
)
112 struct csv_writer
*w
;
115 /* Create and initialize writer. */
116 w
= xmalloc (sizeof *w
);
124 w
->encoding
= xstrdup (dict_get_encoding (dict
));
126 w
->n_csv_vars
= dict_get_var_cnt (dict
);
127 w
->csv_vars
= xnmalloc (w
->n_csv_vars
, sizeof *w
->csv_vars
);
128 for (i
= 0; i
< w
->n_csv_vars
; i
++)
130 const struct variable
*var
= dict_get_var (dict
, i
);
131 struct csv_var
*cv
= &w
->csv_vars
[i
];
133 cv
->width
= var_get_width (var
);
134 cv
->case_index
= var_get_case_index (var
);
136 cv
->format
= *var_get_print_format (var
);
137 if (opts
->recode_user_missing
)
138 mv_copy (&cv
->missing
, var_get_missing_values (var
));
140 mv_init (&cv
->missing
, cv
->width
);
142 if (opts
->use_value_labels
)
143 cv
->val_labs
= val_labs_clone (var_get_value_labels (var
));
148 /* Open file handle as an exclusive writer. */
149 /* TRANSLATORS: this fragment will be interpolated into messages in fh_lock()
150 that identify types of files. */
151 w
->lock
= fh_lock (fh
, FH_REF_FILE
, N_("CSV file"), FH_ACC_WRITE
, true);
155 /* Create the file on disk. */
156 w
->rf
= replace_file_start (fh
, "w", 0666, &w
->file
);
159 msg (ME
, _("Error opening `%s' for writing as a system file: %s."),
160 fh_get_file_name (fh
), strerror (errno
));
164 if (opts
->include_var_names
)
165 write_var_names (w
, dict
);
170 return casewriter_create (dict_get_proto (dict
),
171 &csv_file_casewriter_class
, w
);
179 csv_field_needs_quoting (struct csv_writer
*w
, const char *s
, size_t len
)
183 for (p
= s
; p
< &s
[len
]; p
++)
184 if (*p
== w
->opts
.qualifier
|| *p
== w
->opts
.delimiter
185 || *p
== '\n' || *p
== '\r')
192 csv_output_buffer (struct csv_writer
*w
, const char *s
, size_t len
)
194 if (csv_field_needs_quoting (w
, s
, len
))
198 putc (w
->opts
.qualifier
, w
->file
);
199 for (p
= s
; p
< &s
[len
]; p
++)
201 /* We are writing the output file in text mode, so transform any
202 explicit CR-LF line breaks into LF only, to allow the C library to
203 use correct system-specific new-lines. */
204 if (*p
== '\r' && p
[1] == '\n')
207 if (*p
== w
->opts
.qualifier
)
208 putc (w
->opts
.qualifier
, w
->file
);
211 putc (w
->opts
.qualifier
, w
->file
);
214 fwrite (s
, 1, len
, w
->file
);
218 csv_output_string (struct csv_writer
*w
, const char *s
)
220 csv_output_buffer (w
, s
, strlen (s
));
224 write_var_names (struct csv_writer
*w
, const struct dictionary
*d
)
228 for (i
= 0; i
< w
->n_csv_vars
; i
++)
231 putc (w
->opts
.delimiter
, w
->file
);
232 csv_output_string (w
, var_get_name (dict_get_var (d
, i
)));
234 putc ('\n', w
->file
);
238 csv_output_format (struct csv_writer
*w
, const struct csv_var
*cv
,
239 const union value
*value
)
241 char *s
= data_out (value
, w
->encoding
, &cv
->format
);
242 struct substring ss
= ss_cstr (s
);
243 if (cv
->format
.type
!= FMT_A
)
244 ss_trim (&ss
, ss_cstr (" "));
246 ss_rtrim (&ss
, ss_cstr (" "));
247 csv_output_buffer (w
, ss
.string
, ss
.length
);
252 extract_date (double number
, int *y
, int *m
, int *d
)
256 calendar_offset_to_gregorian (number
/ 60. / 60. / 24., y
, m
, d
, &yd
);
257 return fmod (number
, 60. * 60. * 24.);
261 extract_time (double number
, double *H
, int *M
, int *S
)
263 *H
= floor (number
/ 60. / 60.);
264 number
= fmod (number
, 60. * 60.);
266 *M
= floor (number
/ 60.);
267 number
= fmod (number
, 60.);
273 csv_write_var__ (struct csv_writer
*w
, const struct csv_var
*cv
,
274 const union value
*value
)
278 label
= val_labs_find (cv
->val_labs
, value
);
280 csv_output_string (w
, label
);
281 else if (cv
->width
== 0 && value
->f
== SYSMIS
)
282 csv_output_buffer (w
, " ", 1);
283 else if (w
->opts
.use_print_formats
)
284 csv_output_format (w
, cv
, value
);
287 char s
[MAX (DBL_STRLEN_BOUND
, 128)];
290 switch (cv
->format
.type
)
314 dtoastr (s
, sizeof s
, 0, 0, value
->f
);
315 cp
= strpbrk (s
, ".,");
317 *cp
= w
->opts
.decimal
;
334 extract_date (value
->f
, &y
, &m
, &d
);
335 snprintf (s
, sizeof s
, "%02d/%02d/%04d", m
, d
, y
);
347 extract_time (extract_date (value
->f
, &y
, &m
, &d
), &H
, &M
, &S
);
348 snprintf (s
, sizeof s
, "%02d/%02d/%04d %02.0f:%02d:%02d",
359 extract_time (fabs (value
->f
), &H
, &M
, &S
);
360 snprintf (s
, sizeof s
, "%s%02.0f:%02d:%02d",
361 value
->f
< 0 ? "-" : "", H
, M
, S
);
367 csv_output_format (w
, cv
, value
);
370 case FMT_NUMBER_OF_FORMATS
:
373 csv_output_string (w
, s
);
378 csv_write_var (struct csv_writer
*w
, const struct csv_var
*cv
,
379 const union value
*value
)
381 if (mv_is_value_missing (&cv
->missing
, value
, MV_USER
))
385 value_init (&missing
, cv
->width
);
386 value_set_missing (&missing
, cv
->width
);
387 csv_write_var__ (w
, cv
, &missing
);
388 value_destroy (&missing
, cv
->width
);
391 csv_write_var__ (w
, cv
, value
);
395 csv_write_case (struct csv_writer
*w
, const struct ccase
*c
)
399 for (i
= 0; i
< w
->n_csv_vars
; i
++)
401 const struct csv_var
*cv
= &w
->csv_vars
[i
];
404 putc (w
->opts
.delimiter
, w
->file
);
405 csv_write_var (w
, cv
, case_data_idx (c
, cv
->case_index
));
407 putc ('\n', w
->file
);
410 /* Writes case C to CSV file W. */
412 csv_file_casewriter_write (struct casewriter
*writer
, void *w_
,
415 struct csv_writer
*w
= w_
;
417 if (ferror (w
->file
))
419 casewriter_force_error (writer
);
424 csv_write_case (w
, c
);
428 /* Destroys CSV file writer W. */
430 csv_file_casewriter_destroy (struct casewriter
*writer
, void *w_
)
432 struct csv_writer
*w
= w_
;
433 if (!close_writer (w
))
434 casewriter_force_error (writer
);
437 /* Returns true if an I/O error has occurred on WRITER, false otherwise. */
439 write_error (const struct csv_writer
*writer
)
441 return ferror (writer
->file
);
444 /* Closes a CSV file after we're done with it.
445 Returns true if successful, false if an I/O error occurred. */
447 close_writer (struct csv_writer
*w
)
460 if (fclose (w
->file
) == EOF
)
464 msg (ME
, _("An I/O error occurred writing CSV file `%s'."),
465 fh_get_file_name (w
->fh
));
467 if (ok
? !replace_file_commit (w
->rf
) : !replace_file_abort (w
->rf
))
476 for (i
= 0; i
< w
->n_csv_vars
; i
++)
478 struct csv_var
*cv
= &w
->csv_vars
[i
];
479 mv_destroy (&cv
->missing
);
480 val_labs_destroy (cv
->val_labs
);
489 /* CSV file writer casewriter class. */
490 static const struct casewriter_class csv_file_casewriter_class
=
492 csv_file_casewriter_write
,
493 csv_file_casewriter_destroy
,