Independent Samples T-Test Dialog: Fix Crash
[pspp.git] / src / libpspp / sparse-xarray.c
blob5168d87922f46fb62b317b89172f4d6831f9acc8
1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2007, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 #include <config.h>
19 #include "libpspp/sparse-xarray.h"
21 #include <limits.h>
22 #include <stdint.h>
23 #include <stdlib.h>
24 #include <string.h>
26 #include "libpspp/assertion.h"
27 #include "libpspp/ext-array.h"
28 #include "libpspp/misc.h"
29 #include "libpspp/range-set.h"
30 #include "libpspp/sparse-array.h"
32 #include "gl/md4.h"
33 #include "gl/minmax.h"
34 #include "gl/xalloc.h"
36 /* A sparse array of arrays of bytes. */
37 struct sparse_xarray
39 size_t n_bytes; /* Number of bytes per row. */
40 uint8_t *default_row; /* Defaults for unwritten rows. */
41 unsigned long int max_memory_rows; /* Max rows before dumping to disk. */
42 struct sparse_array *memory; /* Backing, if stored in memory. */
43 struct ext_array *disk; /* Backing, if stored on disk. */
44 struct range_set *disk_rows; /* Allocated rows, if on disk. */
47 static bool UNUSED
48 range_is_valid (const struct sparse_xarray *sx, size_t ofs, size_t n)
50 return n <= sx->n_bytes && ofs <= sx->n_bytes && ofs + n <= sx->n_bytes;
53 /* Creates and returns a new sparse array of arrays of bytes.
54 Each row in the sparse array will consist of N_BYTES bytes.
55 If fewer than MAX_MEMORY_ROWS rows are added to the array,
56 then it will be kept in memory; if more than that are added,
57 then it will be stored on disk. */
58 struct sparse_xarray *
59 sparse_xarray_create (size_t n_bytes, size_t max_memory_rows)
61 struct sparse_xarray *sx = xmalloc (sizeof *sx);
62 sx->n_bytes = n_bytes;
63 sx->default_row = xzalloc (n_bytes);
64 sx->max_memory_rows = max_memory_rows;
65 sx->memory = sparse_array_create (sizeof (uint8_t *));
66 sx->disk = NULL;
67 sx->disk_rows = NULL;
68 return sx;
71 /* Creates and returns a new sparse array of rows that contains
72 the same data as OLD. Returns a null pointer if cloning
73 fails. */
74 struct sparse_xarray *
75 sparse_xarray_clone (const struct sparse_xarray *old)
77 struct sparse_xarray *new = xmalloc (sizeof *new);
79 new->n_bytes = old->n_bytes;
81 new->default_row = xmemdup (old->default_row, old->n_bytes);
83 new->max_memory_rows = old->max_memory_rows;
85 if (old->memory != NULL)
87 unsigned long int idx;
88 uint8_t **old_row;
90 new->memory = sparse_array_create (sizeof (uint8_t *));
91 for (old_row = sparse_array_first (old->memory, &idx); old_row != NULL;
92 old_row = sparse_array_next (old->memory, idx, &idx))
94 uint8_t **new_row = sparse_array_insert (new->memory, idx);
95 *new_row = xmemdup (*old_row, new->n_bytes);
98 else
99 new->memory = NULL;
101 if (old->disk != NULL)
103 const struct range_set_node *node;
104 void *tmp = xmalloc (old->n_bytes);
106 new->disk = ext_array_create ();
107 new->disk_rows = range_set_clone (old->disk_rows, NULL);
108 RANGE_SET_FOR_EACH (node, old->disk_rows)
110 unsigned long int start = range_set_node_get_start (node);
111 unsigned long int end = range_set_node_get_end (node);
112 unsigned long int idx;
114 for (idx = start; idx < end; idx++)
116 off_t offset = (off_t) idx * old->n_bytes;
117 if (!ext_array_read (old->disk, offset, old->n_bytes, tmp)
118 || !ext_array_write (new->disk, offset, old->n_bytes, tmp))
120 free (tmp);
121 sparse_xarray_destroy (new);
122 return NULL;
126 free (tmp);
128 else
130 new->disk = NULL;
131 new->disk_rows = NULL;
134 return new;
137 static void
138 free_memory_rows (struct sparse_xarray *sx)
140 if (sx->memory != NULL)
142 unsigned long int idx;
143 uint8_t **row;
144 for (row = sparse_array_first (sx->memory, &idx); row != NULL;
145 row = sparse_array_next (sx->memory, idx, &idx))
146 free (*row);
147 sparse_array_destroy (sx->memory);
148 sx->memory = NULL;
152 /* Destroys sparse array of rows SX. */
153 void
154 sparse_xarray_destroy (struct sparse_xarray *sx)
156 if (sx != NULL)
158 free (sx->default_row);
159 free_memory_rows (sx);
160 ext_array_destroy (sx->disk);
161 range_set_destroy (sx->disk_rows);
162 free (sx);
166 /* Returns the number of bytes in each row in SX. */
167 size_t
168 sparse_xarray_get_n_columns (const struct sparse_xarray *sx)
170 return sx->n_bytes;
173 /* Returns the number of rows in SX. */
174 size_t
175 sparse_xarray_get_n_rows (const struct sparse_xarray *sx)
177 if (sx->memory)
179 unsigned long int idx;
180 return sparse_array_last (sx->memory, &idx) != NULL ? idx + 1 : 0;
182 else
184 const struct range_set_node *last = range_set_last (sx->disk_rows);
185 return last != NULL ? range_set_node_get_end (last) : 0;
189 /* Dumps the rows in SX, which must currently be stored in
190 memory, to disk. Returns true if successful, false on I/O
191 error. */
192 static bool
193 dump_sparse_xarray_to_disk (struct sparse_xarray *sx)
195 unsigned long int idx;
196 uint8_t **row;
198 assert (sx->memory != NULL);
199 assert (sx->disk == NULL);
201 sx->disk = ext_array_create ();
202 sx->disk_rows = range_set_create ();
204 for (row = sparse_array_first (sx->memory, &idx); row != NULL;
205 row = sparse_array_next (sx->memory, idx, &idx))
207 if (!ext_array_write (sx->disk, (off_t) idx * sx->n_bytes, sx->n_bytes,
208 *row))
210 ext_array_destroy (sx->disk);
211 sx->disk = NULL;
212 range_set_destroy (sx->disk_rows);
213 sx->disk_rows = NULL;
214 return false;
216 range_set_set1 (sx->disk_rows, idx, 1);
218 free_memory_rows (sx);
219 return true;
222 /* Returns true if any data has ever been written to ROW in SX,
223 false otherwise. */
224 bool
225 sparse_xarray_contains_row (const struct sparse_xarray *sx,
226 unsigned long int row)
228 return (sx->memory != NULL
229 ? sparse_array_get (sx->memory, row) != NULL
230 : range_set_contains (sx->disk_rows, row));
233 /* Reads columns COLUMNS...(COLUMNS + VALUE_CNT), exclusive, in
234 the given ROW in SX, into the VALUE_CNT values in VALUES.
235 Returns true if successful, false on I/O error. */
236 bool
237 sparse_xarray_read (const struct sparse_xarray *sx, unsigned long int row,
238 size_t start, size_t n, void *data)
240 assert (range_is_valid (sx, start, n));
242 if (sx->memory != NULL)
244 uint8_t **p = sparse_array_get (sx->memory, row);
245 if (p != NULL)
247 memcpy (data, *p + start, n);
248 return true;
251 else
253 if (range_set_contains (sx->disk_rows, row))
254 return ext_array_read (sx->disk, (off_t) row * sx->n_bytes + start,
255 n, data);
258 memcpy (data, sx->default_row + start, n);
259 return true;
262 /* Implements sparse_xarray_write for an on-disk sparse_xarray. */
263 static bool
264 write_disk_row (struct sparse_xarray *sx, unsigned long int row,
265 size_t start, size_t n, const void *data)
267 off_t ofs = (off_t) row * sx->n_bytes;
268 if (range_set_contains (sx->disk_rows, row))
269 return ext_array_write (sx->disk, ofs + start, n, data);
270 else
272 range_set_set1 (sx->disk_rows, row, 1);
273 return (ext_array_write (sx->disk, ofs, start, sx->default_row)
274 && ext_array_write (sx->disk, ofs + start, n, data)
275 && ext_array_write (sx->disk, ofs + start + n,
276 sx->n_bytes - start - n,
277 sx->default_row + start + n));
281 /* Writes the VALUE_CNT values in VALUES into columns
282 COLUMNS...(COLUMNS + VALUE_CNT), exclusive, in the given ROW
283 in SX.
284 Returns true if successful, false on I/O error. */
285 bool
286 sparse_xarray_write (struct sparse_xarray *sx, unsigned long int row,
287 size_t start, size_t n, const void *data)
289 assert (range_is_valid (sx, start, n));
290 if (sx->memory != NULL)
292 uint8_t **p = sparse_array_get (sx->memory, row);
293 if (p == NULL)
295 if (sparse_array_count (sx->memory) < sx->max_memory_rows)
297 p = sparse_array_insert (sx->memory, row);
298 *p = xmemdup (sx->default_row, sx->n_bytes);
300 else
302 if (!dump_sparse_xarray_to_disk (sx))
303 return false;
304 return write_disk_row (sx, row, start, n, data);
307 memcpy (*p + start, data, n);
308 return true;
310 else
311 return write_disk_row (sx, row, start, n, data);
314 /* Writes the VALUE_CNT values in VALUES to columns
315 START_COLUMN...(START_COLUMN + VALUE_CNT), exclusive, in every
316 row in SX, even those rows that have not yet been written.
317 Returns true if successful, false on I/O error.
319 The runtime of this function is linear in the number of rows
320 in SX that have already been written. */
321 bool
322 sparse_xarray_write_columns (struct sparse_xarray *sx, size_t start,
323 size_t n, const void *data)
325 assert (range_is_valid (sx, start, n));
327 /* Set defaults. */
328 memcpy (sx->default_row + start, data, n);
330 /* Set individual rows. */
331 if (sx->memory != NULL)
333 unsigned long int idx;
334 uint8_t **p;
336 for (p = sparse_array_first (sx->memory, &idx); p != NULL;
337 p = sparse_array_next (sx->memory, idx, &idx))
338 memcpy (*p + start, data, n);
340 else
342 const struct range_set_node *node;
344 RANGE_SET_FOR_EACH (node, sx->disk_rows)
346 unsigned long int start_row = range_set_node_get_start (node);
347 unsigned long int end_row = range_set_node_get_end (node);
348 unsigned long int row;
350 for (row = start_row; row < end_row; row++)
352 off_t offset = (off_t) row * sx->n_bytes;
353 if (!ext_array_write (sx->disk, offset + start, n, data))
354 break;
358 if (ext_array_error (sx->disk))
359 return false;
361 return true;
364 static unsigned long int
365 scan_first (const struct sparse_xarray *sx)
367 if (sx->memory)
369 unsigned long int idx;
370 return sparse_array_first (sx->memory, &idx) ? idx : ULONG_MAX;
372 else
373 return range_set_scan (sx->disk_rows, 0);
376 static unsigned long int
377 scan_next (const struct sparse_xarray *sx, unsigned long int start)
379 if (sx->memory)
381 unsigned long int idx;
382 return sparse_array_next (sx->memory, start, &idx) ? idx : ULONG_MAX;
384 else
385 return range_set_scan (sx->disk_rows, start + 1);
388 /* Only works for rows for which sparse_xarray_contains_row()
389 would return true. */
390 static uint8_t *
391 get_row (const struct sparse_xarray *sx, unsigned long int idx,
392 uint8_t *buffer)
394 if (sx->memory)
396 uint8_t **p = sparse_array_get (sx->memory, idx);
397 return *p;
399 else if (ext_array_read (sx->disk, (off_t) idx * sx->n_bytes,
400 sx->n_bytes, buffer))
401 return buffer;
402 else
403 return NULL;
406 /* Iterates over all the rows in SX and DX, passing each pair of
407 rows with equal indexes to CB. CB's modifications, if any, to
408 destination rows are written back to DX.
410 All rows that are actually in use in SX or in DX or both are
411 passed to CB. If a row is in use in SX but not in DX, or vice
412 versa, then the "default" row (as set by
413 sparse_xarray_write_columns) is passed as the contents of the
414 other row.
416 CB is also called once with the default row from SX and the
417 default row from DX. Modifying the data passed as the default
418 row from DX will change DX's default row.
420 Returns true if successful, false if I/O on SX or DX fails or
421 if CB returns false. On failure, the contents of DX are
422 undefined. */
423 bool
424 sparse_xarray_copy (const struct sparse_xarray *sx, struct sparse_xarray *dx,
425 bool (*cb) (const void *src, void *dst, void *aux),
426 void *aux)
428 bool success = true;
430 if (!cb (sx->default_row, dx->default_row, aux))
431 return false;
433 if (sx == dx)
435 if (sx->memory)
437 unsigned long int idx;
438 uint8_t **row;
440 for (row = sparse_array_first (sx->memory, &idx); row != NULL;
441 row = sparse_array_next (sx->memory, idx, &idx))
443 success = cb (*row, *row, aux);
444 if (!success)
445 break;
448 else if (sx->disk)
450 const struct range_set_node *node;
451 void *tmp = xmalloc (sx->n_bytes);
453 RANGE_SET_FOR_EACH (node, sx->disk_rows)
455 unsigned long int start = range_set_node_get_start (node);
456 unsigned long int end = range_set_node_get_end (node);
457 unsigned long int row;
459 for (row = start; row < end; row++)
461 off_t offset = (off_t) row * sx->n_bytes;
462 success = (ext_array_read (sx->disk, offset, sx->n_bytes,
463 tmp)
464 && cb (tmp, tmp, aux)
465 && ext_array_write (dx->disk, offset,
466 dx->n_bytes, tmp));
467 if (!success)
468 break;
472 free (tmp);
475 else
477 unsigned long int src_idx = scan_first (sx);
478 unsigned long int dst_idx = scan_first (dx);
480 uint8_t *tmp_src_row = xmalloc (sx->n_bytes);
481 uint8_t *tmp_dst_row = xmalloc (dx->n_bytes);
483 for (;;)
485 unsigned long int idx;
486 const uint8_t *src_row;
487 uint8_t *dst_row;
489 /* Determine the index of the row to process. If
490 src_idx == dst_idx, then the row has been written in
491 both SX and DX. Otherwise, it has been written in
492 only the sparse_xarray corresponding to the smaller
493 index, and has the default contents in the other. */
494 idx = MIN (src_idx, dst_idx);
495 if (idx == ULONG_MAX)
496 break;
498 /* Obtain a copy of the source row as src_row. */
499 if (idx == src_idx)
500 src_row = get_row (sx, idx, tmp_src_row);
501 else
502 src_row = sx->default_row;
504 /* Obtain the destination row as dst_row. */
505 if (idx == dst_idx)
506 dst_row = get_row (dx, idx, tmp_dst_row);
507 else if (dx->memory
508 && sparse_array_count (dx->memory) < dx->max_memory_rows)
510 uint8_t **p = sparse_array_insert (dx->memory, idx);
511 dst_row = *p = xmemdup (dx->default_row, dx->n_bytes);
513 else
515 memcpy (tmp_dst_row, dx->default_row, dx->n_bytes);
516 dst_row = tmp_dst_row;
519 /* Run the callback. */
520 success = cb (src_row, dst_row, aux);
521 if (!success)
522 break;
524 /* Write back the destination row, if necessary. */
525 if (dst_row == tmp_dst_row)
527 success = sparse_xarray_write (dx, idx, 0, dx->n_bytes, dst_row);
528 if (!success)
529 break;
531 else
533 /* Nothing to do: we modified the destination row in-place. */
536 /* Advance to the next row. */
537 if (src_idx == idx)
538 src_idx = scan_next (sx, src_idx);
539 if (dst_idx == idx)
540 dst_idx = scan_next (dx, dst_idx);
543 free (tmp_src_row);
544 free (tmp_dst_row);
547 return success;
550 /* Returns a hash value for SX suitable for use with the model
551 checker. The value in BASIS is folded into the hash.
553 The returned hash value is *not* suitable for storage and
554 retrieval of sparse_xarrays that have identical contents,
555 because it will return different hash values for
556 sparse_xarrays that have the same contents (and it's slow).
558 We use MD4 because it is much faster than MD5 or SHA-1 but its
559 collision resistance is just as good. */
560 unsigned int
561 sparse_xarray_model_checker_hash (const struct sparse_xarray *sx,
562 unsigned int basis)
564 unsigned int hash[DIV_RND_UP (20, sizeof (unsigned int))];
565 struct md4_ctx ctx;
567 md4_init_ctx (&ctx);
568 md4_process_bytes (&basis, sizeof basis, &ctx);
569 md4_process_bytes (&sx->n_bytes, sizeof sx->n_bytes, &ctx);
570 md4_process_bytes (sx->default_row, sx->n_bytes, &ctx);
572 if (sx->memory)
574 unsigned long int idx;
575 uint8_t **row;
577 md4_process_bytes ("m", 1, &ctx);
578 md4_process_bytes (&sx->max_memory_rows, sizeof sx->max_memory_rows,
579 &ctx);
580 for (row = sparse_array_first (sx->memory, &idx); row != NULL;
581 row = sparse_array_next (sx->memory, idx, &idx))
583 md4_process_bytes (&idx, sizeof idx, &ctx);
584 md4_process_bytes (*row, sx->n_bytes, &ctx);
587 else
589 const struct range_set_node *node;
590 void *tmp = xmalloc (sx->n_bytes);
592 md4_process_bytes ("d", 1, &ctx);
593 RANGE_SET_FOR_EACH (node, sx->disk_rows)
595 unsigned long int start = range_set_node_get_start (node);
596 unsigned long int end = range_set_node_get_end (node);
597 unsigned long int idx;
599 for (idx = start; idx < end; idx++)
601 off_t offset = (off_t) idx * sx->n_bytes;
602 if (!ext_array_read (sx->disk, offset, sx->n_bytes, tmp))
603 NOT_REACHED ();
604 md4_process_bytes (&idx, sizeof idx, &ctx);
605 md4_process_bytes (tmp, sx->n_bytes, &ctx);
608 free (tmp);
610 md4_finish_ctx (&ctx, hash);
611 return hash[0];