1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007, 2009, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/casereader.h"
23 #include "data/casereader-provider.h"
24 #include "data/casewriter.h"
25 #include "data/variable.h"
26 #include "data/dictionary.h"
27 #include "libpspp/taint.h"
28 #include "libpspp/message.h"
30 #include "gl/xalloc.h"
33 #define _(msgid) gettext (msgid)
35 /* A casereader that filters data coming from another
37 struct casereader_filter
39 struct casereader
*subreader
; /* The reader to filter. */
40 bool (*include
) (const struct ccase
*, void *aux
);
41 bool (*destroy
) (void *aux
);
43 struct casewriter
*exclude
; /* Writer that gets filtered cases, or NULL. */
46 static const struct casereader_class casereader_filter_class
;
48 /* Creates and returns a casereader whose content is a filtered
49 version of the data in SUBREADER. Only the cases for which
50 INCLUDE returns true will appear in the returned casereader,
51 in the original order.
53 If EXCLUDE is non-null, then cases for which INCLUDE returns
54 false are written to EXCLUDE. These cases will not
55 necessarily be fully written to EXCLUDE until the filtering casereader's
56 cases have been fully read or, if that never occurs, until the
57 filtering casereader is destroyed.
59 When the filtering casereader is destroyed, DESTROY will be
60 called to allow any state maintained by INCLUDE to be freed.
62 After this function is called, SUBREADER must not ever again
63 be referenced directly. It will be destroyed automatically
64 when the filtering casereader is destroyed. */
66 casereader_create_filter_func (struct casereader
*subreader
,
67 bool (*include
) (const struct ccase
*,
69 bool (*destroy
) (void *aux
),
71 struct casewriter
*exclude
)
73 struct casereader_filter
*filter
= xmalloc (sizeof *filter
);
74 struct casereader
*reader
;
75 filter
->subreader
= casereader_rename (subreader
);
76 filter
->include
= include
;
77 filter
->destroy
= destroy
;
79 filter
->exclude
= exclude
;
80 reader
= casereader_create_sequential (
81 NULL
, casereader_get_proto (filter
->subreader
), CASENUMBER_MAX
,
82 &casereader_filter_class
, filter
);
83 taint_propagate (casereader_get_taint (filter
->subreader
),
84 casereader_get_taint (reader
));
88 /* Internal read function for filtering casereader. */
90 casereader_filter_read (struct casereader
*reader UNUSED
, void *filter_
)
93 struct casereader_filter
*filter
= filter_
;
96 struct ccase
*c
= casereader_read (filter
->subreader
);
99 else if (filter
->include (c
, filter
->aux
))
101 else if (filter
->exclude
!= NULL
)
102 casewriter_write (filter
->exclude
, c
);
108 /* Internal destruction function for filtering casereader. */
110 casereader_filter_destroy (struct casereader
*reader
, void *filter_
)
112 struct casereader_filter
*filter
= filter_
;
114 /* Make sure we've written everything to the excluded cases
115 casewriter, if there is one. */
116 if (filter
->exclude
!= NULL
)
119 while ((c
= casereader_read (filter
->subreader
)) != NULL
)
120 if (filter
->include (c
, filter
->aux
))
123 casewriter_write (filter
->exclude
, c
);
126 casereader_destroy (filter
->subreader
);
127 if (filter
->destroy
!= NULL
&& !filter
->destroy (filter
->aux
))
128 casereader_force_error (reader
);
132 /* Filtering casereader class. */
133 static const struct casereader_class casereader_filter_class
=
135 casereader_filter_read
,
136 casereader_filter_destroy
,
138 /* We could in fact delegate clone to the subreader, if the
139 filter function is required to have no memory and if we
140 added reference counting. But it might be useful to have
141 filter functions with memory and in any case this would
142 require a little extra work. */
148 /* Casereader for filtering valid weights. */
150 /* Weight-filtering data. */
151 struct casereader_filter_weight
153 const struct variable
*weight_var
; /* Weight variable. */
154 bool *warn_on_invalid
; /* Have we already issued an error? */
155 bool local_warn_on_invalid
; /* warn_on_invalid might point here. */
158 static bool casereader_filter_weight_include (const struct ccase
*, void *);
159 static bool casereader_filter_weight_destroy (void *);
161 /* Creates and returns a casereader that filters cases from
162 READER by valid weights, that is, any cases with user- or
163 system-missing, zero, or negative weights are dropped. The
164 weight variable's information is taken from DICT. If DICT
165 does not have a weight variable, then no cases are filtered
168 When a case with an invalid weight is encountered,
169 *WARN_ON_INVALID is checked. If it is true, then an error
170 message is issued and *WARN_ON_INVALID is set false. If
171 WARN_ON_INVALID is a null pointer, then an internal bool that
172 is initially true is used instead of a caller-supplied bool.
174 If EXCLUDE is non-null, then dropped cases are written to
175 EXCLUDE. These cases will not necessarily be fully written to
176 EXCLUDE until the filtering casereader's cases have been fully
177 read or, if that never occurs, until the filtering casereader
180 After this function is called, READER must not ever again be
181 referenced directly. It will be destroyed automatically when
182 the filtering casereader is destroyed. */
184 casereader_create_filter_weight (struct casereader
*reader
,
185 const struct dictionary
*dict
,
186 bool *warn_on_invalid
,
187 struct casewriter
*exclude
)
189 struct variable
*weight_var
= dict_get_weight (dict
);
190 if (weight_var
!= NULL
)
192 struct casereader_filter_weight
*cfw
= xmalloc (sizeof *cfw
);
193 cfw
->weight_var
= weight_var
;
194 cfw
->warn_on_invalid
= (warn_on_invalid
196 : &cfw
->local_warn_on_invalid
);
197 cfw
->local_warn_on_invalid
= true;
198 reader
= casereader_create_filter_func (reader
,
199 casereader_filter_weight_include
,
200 casereader_filter_weight_destroy
,
204 reader
= casereader_rename (reader
);
208 /* Internal "include" function for weight-filtering
211 casereader_filter_weight_include (const struct ccase
*c
, void *cfw_
)
213 struct casereader_filter_weight
*cfw
= cfw_
;
214 double value
= case_num (c
, cfw
->weight_var
);
215 if (value
>= 0.0 && !var_is_num_missing (cfw
->weight_var
, value
))
219 if (*cfw
->warn_on_invalid
)
221 msg (SW
, _("At least one case in the data read had a weight value "
222 "that was user-missing, system-missing, zero, or "
223 "negative. These case(s) were ignored."));
224 *cfw
->warn_on_invalid
= false;
230 /* Internal "destroy" function for weight-filtering
233 casereader_filter_weight_destroy (void *cfw_
)
235 struct casereader_filter_weight
*cfw
= cfw_
;
240 /* Casereader for filtering missing values. */
242 /* Missing-value filtering data. */
243 struct casereader_filter_missing
245 struct variable
**vars
; /* Variables whose values to filter. */
246 size_t n_vars
; /* Number of variables. */
247 enum mv_class
class; /* Types of missing values to filter. */
248 casenumber
*n_missing
;
251 static bool casereader_filter_missing_include (const struct ccase
*, void *);
252 static bool casereader_filter_missing_destroy (void *);
254 /* Creates and returns a casereader that filters out cases from
255 READER that have a missing value in the given CLASS for any of
256 the N_VARS variables in VARS. Only cases that have
257 non-missing values for all of these variables are passed
260 Ownership of VARS is retained by the caller.
262 If EXCLUDE is non-null, then dropped cases are written to
263 EXCLUDE. These cases will not necessarily be fully written to
264 EXCLUDE until the filtering casereader's cases have been fully
265 read or, if that never occurs, until the filtering casereader
268 If N_MISSING is non-null, then after reading, it will be filled
269 with the total number of dropped cases.
271 After this function is called, READER must not ever again
272 be referenced directly. It will be destroyed automatically
273 when the filtering casereader is destroyed. */
275 casereader_create_filter_missing (struct casereader
*reader
,
276 const struct variable
*const *vars
, size_t n_vars
,
278 casenumber
*n_missing
,
279 struct casewriter
*exclude
)
281 if (n_vars
> 0 && class)
283 struct casereader_filter_missing
*cfm
= xmalloc (sizeof *cfm
);
284 cfm
->vars
= xmemdup (vars
, sizeof *vars
* n_vars
);
285 cfm
->n_vars
= n_vars
;
287 cfm
->n_missing
= n_missing
;
288 if (n_missing
) *n_missing
= 0;
289 return casereader_create_filter_func (reader
,
290 casereader_filter_missing_include
,
291 casereader_filter_missing_destroy
,
296 return casereader_rename (reader
);
299 /* Internal "include" function for missing value-filtering
302 casereader_filter_missing_include (const struct ccase
*c
, void *cfm_
)
304 const struct casereader_filter_missing
*cfm
= cfm_
;
307 for (i
= 0; i
< cfm
->n_vars
; i
++)
309 struct variable
*var
= cfm
->vars
[i
];
310 const union value
*value
= case_data (c
, var
);
311 if (var_is_value_missing (var
, value
) & cfm
->class)
321 /* Internal "destroy" function for missing value-filtering
324 casereader_filter_missing_destroy (void *cfm_
)
326 struct casereader_filter_missing
*cfm
= cfm_
;
332 /* Case-counting casereader. */
334 static bool casereader_counter_include (const struct ccase
*, void *);
336 /* Creates and returns a new casereader that counts the number of
337 cases that have been read from it. *COUNTER is initially set
338 to INITIAL_VALUE, then incremented by 1 each time a case is read.
340 Counting casereaders must be used very cautiously: if a
341 counting casereader is cloned or if the casereader_peek
342 function is used on it, then the counter's value can be higher
343 than expected because of the buffering that goes on behind the
346 The counter is only incremented as cases are actually read
347 from the casereader. In particular, if the casereader is
348 destroyed before all cases have been read from the casereader,
349 cases never read will not be included in the count.
351 After this function is called, READER must not ever again
352 be referenced directly. It will be destroyed automatically
353 when the filtering casereader is destroyed. */
355 casereader_create_counter (struct casereader
*reader
, casenumber
*counter
,
356 casenumber initial_value
)
358 *counter
= initial_value
;
359 return casereader_create_filter_func (reader
, casereader_counter_include
,
360 NULL
, counter
, NULL
);
363 /* Internal "include" function for counting casereader. */
365 casereader_counter_include (const struct ccase
*c UNUSED
, void *counter_
)
367 casenumber
*counter
= counter_
;