1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 - How to calculate significance of some symmetric and directional measures?
20 - How to calculate ASE for symmetric Somers ' d?
21 - How to calculate ASE for Goodman and Kruskal's tau?
22 - How to calculate approx. T of symmetric uncertainty coefficient?
30 #include <gsl/gsl_cdf.h>
34 #include "data/case.h"
35 #include "data/casegrouper.h"
36 #include "data/casereader.h"
37 #include "data/data-out.h"
38 #include "data/dataset.h"
39 #include "data/dictionary.h"
40 #include "data/format.h"
41 #include "data/value-labels.h"
42 #include "data/variable.h"
43 #include "language/command.h"
44 #include "language/stats/freq.h"
45 #include "language/dictionary/split-file.h"
46 #include "language/lexer/lexer.h"
47 #include "language/lexer/variable-parser.h"
48 #include "libpspp/array.h"
49 #include "libpspp/assertion.h"
50 #include "libpspp/compiler.h"
51 #include "libpspp/hash-functions.h"
52 #include "libpspp/hmap.h"
53 #include "libpspp/hmapx.h"
54 #include "libpspp/message.h"
55 #include "libpspp/misc.h"
56 #include "libpspp/pool.h"
57 #include "libpspp/str.h"
58 #include "output/tab.h"
59 #include "output/chart-item.h"
60 #include "output/charts/barchart.h"
62 #include "gl/minmax.h"
63 #include "gl/xalloc.h"
67 #define _(msgid) gettext (msgid)
68 #define N_(msgid) msgid
76 missing=miss:!table/include/report;
77 count=roundwhat:asis/case/!cell,
78 roundhow:!round/truncate;
79 +write[wr_]=none,cells,all;
80 +format=val:!avalue/dvalue,
82 tabl:!tables/notables,
86 +cells[cl_]=count,expected,row,column,total,residual,sresidual,
88 +statistics[st_]=chisq,phi,cc,lambda,uc,none,btau,ctau,risk,gamma,d,
94 /* Number of chi-square statistics. */
97 /* Number of symmetric statistics. */
100 /* Number of directional statistics. */
101 #define N_DIRECTIONAL 13
104 /* Indexes into the 'vars' member of struct pivot_table and
105 struct crosstab member. */
108 ROW_VAR
= 0, /* Row variable. */
109 COL_VAR
= 1 /* Column variable. */
110 /* Higher indexes cause multiple tables to be output. */
113 /* A crosstabulation of 2 or more variables. */
116 struct crosstabs_proc
*proc
;
117 struct fmt_spec weight_format
; /* Format for weight variable. */
118 double missing
; /* Weight of missing cases. */
120 /* Variables (2 or more). */
122 const struct variable
**vars
;
124 /* Constants (0 or more). */
126 const struct variable
**const_vars
;
127 union value
*const_values
;
131 struct freq
**entries
;
134 /* Column values, number of columns. */
138 /* Row values, number of rows. */
142 /* Number of statistically interesting columns/rows
143 (columns/rows with data in them). */
144 int ns_cols
, ns_rows
;
146 /* Matrix contents. */
147 double
*mat
; /* Matrix proper. */
148 double
*row_tot
; /* Row totals. */
149 double
*col_tot
; /* Column totals. */
150 double total
; /* Grand total. */
153 /* Integer mode variable info. */
156 struct hmap_node hmap_node
; /* In struct crosstabs_proc var_ranges map. */
157 const struct variable
*var
; /* The variable. */
158 int min
; /* Minimum value. */
159 int max
; /* Maximum value + 1. */
160 int count
; /* max - min. */
163 struct crosstabs_proc
165 const struct dictionary
*dict
;
166 enum
{ INTEGER
, GENERAL
} mode
;
167 enum mv_class exclude
;
171 struct fmt_spec weight_format
;
173 /* Variables specifies on VARIABLES. */
174 const struct variable
**variables
;
176 struct hmap var_ranges
;
179 struct pivot_table
*pivots
;
183 int n_cells
; /* Number of cells requested. */
184 unsigned int cells
; /* Bit k is 1 if cell k is requested. */
185 int a_cells
[CRS_CL_count
]; /* 0...n_cells-1 are the requested cells. */
187 /* Rounding of cells. */
188 bool round_case_weights
; /* Round case weights? */
189 bool round_cells
; /* If !round_case_weights, round cells? */
190 bool round_down
; /* Round down? (otherwise to nearest) */
193 unsigned int statistics
; /* Bit k is 1 if statistic k is requested. */
195 bool descending
; /* True if descending sort order is requested. */
198 const struct var_range
*get_var_range (const struct crosstabs_proc
*,
199 const struct variable
*);
201 static bool
should_tabulate_case (const struct pivot_table
*,
202 const struct ccase
*, enum mv_class exclude
);
203 static void
tabulate_general_case (struct pivot_table
*, const struct ccase
*,
205 static void
tabulate_integer_case (struct pivot_table
*, const struct ccase
*,
207 static void
postcalc (struct crosstabs_proc
*);
208 static void
submit (struct pivot_table
*, struct tab_table
*);
211 round_weight (const struct crosstabs_proc
*proc
, double weight
)
213 return proc
->round_down ?
floor (weight
) : floor (weight
+ 0.5);
216 /* Parses and executes the CROSSTABS procedure. */
218 cmd_crosstabs (struct lexer
*lexer
, struct dataset
*ds
)
220 const struct variable
*wv
= dict_get_weight (dataset_dict (ds
));
221 struct var_range
*range
, *next_range
;
222 struct crosstabs_proc proc
;
223 struct casegrouper
*grouper
;
224 struct casereader
*input
, *group
;
225 struct cmd_crosstabs cmd
;
226 struct pivot_table
*pt
;
231 proc
.dict
= dataset_dict (ds
);
232 proc
.bad_warn
= true
;
233 proc
.variables
= NULL;
234 proc
.n_variables
= 0;
235 hmap_init (&proc
.var_ranges
);
238 proc
.descending
= false
;
239 proc
.weight_format
= wv ?
*var_get_print_format (wv
) : F_8_0
;
241 if (!parse_crosstabs (lexer
, ds
, &cmd
, &proc
))
243 result
= CMD_FAILURE
;
247 proc
.mode
= proc
.n_variables ? INTEGER
: GENERAL
;
248 proc
.barchart
= cmd
.sbc_barchart
> 0;
250 proc
.descending
= cmd
.val
== CRS_DVALUE
;
252 proc
.round_case_weights
= cmd
.sbc_count
&& cmd
.roundwhat
== CRS_CASE
;
253 proc
.round_cells
= cmd
.sbc_count
&& cmd
.roundwhat
== CRS_CELL
;
254 proc
.round_down
= cmd
.roundhow
== CRS_TRUNCATE
;
258 proc
.cells
= 1u << CRS_CL_COUNT
;
259 else if (cmd
.a_cells
[CRS_CL_ALL
])
260 proc
.cells
= UINT_MAX
;
264 for (i
= 0; i
< CRS_CL_count
; i
++)
266 proc
.cells |
= 1u << i
;
268 proc
.cells
= ((1u << CRS_CL_COUNT
)
270 |
(1u << CRS_CL_COLUMN
)
271 |
(1u << CRS_CL_TOTAL
));
273 proc
.cells
&= ((1u << CRS_CL_count
) - 1);
274 proc
.cells
&= ~((1u << CRS_CL_NONE
) |
(1u << CRS_CL_ALL
));
276 for (i
= 0; i
< CRS_CL_count
; i
++)
277 if (proc
.cells
& (1u << i
))
278 proc
.a_cells
[proc
.n_cells
++] = i
;
281 if (cmd
.a_statistics
[CRS_ST_ALL
])
282 proc
.statistics
= UINT_MAX
;
283 else if (cmd
.sbc_statistics
)
288 for (i
= 0; i
< CRS_ST_count
; i
++)
289 if (cmd
.a_statistics
[i
])
290 proc
.statistics |
= 1u << i
;
291 if (proc
.statistics
== 0)
292 proc
.statistics |
= 1u << CRS_ST_CHISQ
;
298 proc
.exclude
= (cmd
.miss
== CRS_TABLE ? MV_ANY
299 : cmd
.miss
== CRS_INCLUDE ? MV_SYSTEM
301 if (proc
.mode
== GENERAL
&& proc
.exclude
== MV_NEVER
)
303 msg (SE
, _("Missing mode %s not allowed in general mode. "
304 "Assuming %s."), "REPORT", "MISSING=TABLE");
305 proc
.exclude
= MV_ANY
;
309 proc
.pivot
= cmd
.pivot
== CRS_PIVOT
;
311 input
= casereader_create_filter_weight (proc_open (ds
), dataset_dict (ds
),
313 grouper
= casegrouper_create_splits (input
, dataset_dict (ds
));
314 while (casegrouper_get_next_group (grouper
, &group
))
318 /* Output SPLIT FILE variables. */
319 c
= casereader_peek (group
, 0);
322 output_split_file_values (ds
, c
);
326 /* Initialize hash tables. */
327 for (pt
= &proc
.pivots
[0]; pt
< &proc
.pivots
[proc
.n_pivots
]; pt
++)
328 hmap_init (&pt
->data
);
331 for (; (c
= casereader_read (group
)) != NULL; case_unref (c
))
332 for (pt
= &proc
.pivots
[0]; pt
< &proc
.pivots
[proc
.n_pivots
]; pt
++)
334 double weight
= dict_get_case_weight (dataset_dict (ds
), c
,
336 if (cmd
.roundwhat
== CRS_CASE
)
338 weight
= round_weight (&proc
, weight
);
342 if (should_tabulate_case (pt
, c
, proc
.exclude
))
344 if (proc
.mode
== GENERAL
)
345 tabulate_general_case (pt
, c
, weight
);
347 tabulate_integer_case (pt
, c
, weight
);
350 pt
->missing
+= weight
;
352 casereader_destroy (group
);
357 ok
= casegrouper_destroy (grouper
);
358 ok
= proc_commit (ds
) && ok
;
360 result
= ok ? CMD_SUCCESS
: CMD_CASCADING_FAILURE
;
363 free (proc
.variables
);
364 HMAP_FOR_EACH_SAFE (range
, next_range
, struct var_range
, hmap_node
,
367 hmap_delete (&proc
.var_ranges
, &range
->hmap_node
);
370 for (pt
= &proc
.pivots
[0]; pt
< &proc
.pivots
[proc
.n_pivots
]; pt
++)
373 free (pt
->const_vars
);
374 /* We must not call value_destroy on const_values because
375 it is a wild pointer; it never pointed to anything owned
378 The rest of the data was allocated and destroyed at a
379 lower level already. */
386 /* Parses the TABLES subcommand. */
388 crs_custom_tables (struct lexer
*lexer
, struct dataset
*ds
,
389 struct cmd_crosstabs
*cmd UNUSED
, void
*proc_
)
391 struct crosstabs_proc
*proc
= proc_
;
392 struct const_var_set
*var_set
;
394 const struct variable
***by = NULL;
396 size_t
*by_nvar
= NULL;
401 /* Ensure that this is a TABLES subcommand. */
402 if (!lex_match_id (lexer
, "TABLES")
403 && (lex_token (lexer
) != T_ID ||
404 dict_lookup_var (dataset_dict (ds
), lex_tokcstr (lexer
)) == NULL)
405 && lex_token (lexer
) != T_ALL
)
407 lex_match (lexer
, T_EQUALS
);
409 if (proc
->variables
!= NULL)
410 var_set
= const_var_set_create_from_array (proc
->variables
,
413 var_set
= const_var_set_create_from_dict (dataset_dict (ds
));
414 assert (var_set
!= NULL);
418 by = xnrealloc (by, n_by
+ 1, sizeof
*by);
419 by_nvar
= xnrealloc (by_nvar
, n_by
+ 1, sizeof
*by_nvar
);
420 if (!parse_const_var_set_vars (lexer
, var_set
, &by[n_by
], &by_nvar
[n_by
],
421 PV_NO_DUPLICATE | PV_NO_SCRATCH
))
423 if (xalloc_oversized (nx
, by_nvar
[n_by
]))
425 msg (SE
, _("Too many cross-tabulation variables or dimensions."));
431 if (!lex_match (lexer
, T_BY
))
440 by_iter
= xcalloc (n_by
, sizeof
*by_iter
);
441 proc
->pivots
= xnrealloc (proc
->pivots
,
442 proc
->n_pivots
+ nx
, sizeof
*proc
->pivots
);
443 for (i
= 0; i
< nx
; i
++)
445 struct pivot_table
*pt
= &proc
->pivots
[proc
->n_pivots
++];
449 pt
->weight_format
= proc
->weight_format
;
452 pt
->vars
= xmalloc (n_by
* sizeof
*pt
->vars
);
454 pt
->const_vars
= NULL;
455 pt
->const_values
= NULL;
457 for (j
= 0; j
< n_by
; j
++)
458 pt
->vars
[j
] = by[j
][by_iter
[j
]];
460 for (j
= n_by
- 1; j
>= 0; j
--)
462 if (++by_iter
[j
] < by_nvar
[j
])
471 /* All return paths lead here. */
472 for (i
= 0; i
< n_by
; i
++)
477 const_var_set_destroy (var_set
);
482 /* Parses the VARIABLES subcommand. */
484 crs_custom_variables (struct lexer
*lexer
, struct dataset
*ds
,
485 struct cmd_crosstabs
*cmd UNUSED
, void
*proc_
)
487 struct crosstabs_proc
*proc
= proc_
;
490 msg (SE
, _("%s must be specified before %s."), "VARIABLES", "TABLES");
494 lex_match (lexer
, T_EQUALS
);
498 size_t orig_nv
= proc
->n_variables
;
503 if (!parse_variables_const (lexer
, dataset_dict (ds
),
504 &proc
->variables
, &proc
->n_variables
,
505 (PV_APPEND | PV_NUMERIC
506 | PV_NO_DUPLICATE | PV_NO_SCRATCH
)))
509 if (!lex_force_match (lexer
, T_LPAREN
))
512 if (!lex_force_int (lexer
))
514 min
= lex_integer (lexer
);
517 lex_match (lexer
, T_COMMA
);
519 if (!lex_force_int (lexer
))
521 max
= lex_integer (lexer
);
524 msg (SE
, _("Maximum value (%ld) less than minimum value (%ld)."),
530 if (!lex_force_match (lexer
, T_RPAREN
))
533 for (i
= orig_nv
; i
< proc
->n_variables
; i
++)
535 const struct variable
*var
= proc
->variables
[i
];
536 struct var_range
*vr
= xmalloc (sizeof
*vr
);
541 vr
->count
= max
- min
+ 1;
542 hmap_insert (&proc
->var_ranges
, &vr
->hmap_node
,
543 hash_pointer (var
, 0));
546 if (lex_token (lexer
) == T_SLASH
)
553 free (proc
->variables
);
554 proc
->variables
= NULL;
555 proc
->n_variables
= 0;
559 /* Data file processing. */
561 const struct var_range
*
562 get_var_range (const struct crosstabs_proc
*proc
, const struct variable
*var
)
564 if (!hmap_is_empty (&proc
->var_ranges
))
566 const struct var_range
*range
;
568 HMAP_FOR_EACH_IN_BUCKET (range
, struct var_range
, hmap_node
,
569 hash_pointer (var
, 0), &proc
->var_ranges
)
570 if (range
->var
== var
)
578 should_tabulate_case (const struct pivot_table
*pt
, const struct ccase
*c
,
579 enum mv_class exclude
)
582 for (j
= 0; j
< pt
->n_vars
; j
++)
584 const struct variable
*var
= pt
->vars
[j
];
585 const struct var_range
*range
= get_var_range (pt
->proc
, var
);
587 if (var_is_value_missing (var
, case_data (c
, var
), exclude
))
592 double num
= case_num (c
, var
);
593 if (num
< range
->min || num
>= range
->max
+ 1.)
601 tabulate_integer_case (struct pivot_table
*pt
, const struct ccase
*c
,
609 for (j
= 0; j
< pt
->n_vars
; j
++)
611 /* Throw away fractional parts of values. */
612 hash
= hash_int (case_num (c
, pt
->vars
[j
]), hash
);
615 HMAP_FOR_EACH_WITH_HASH (te
, struct freq
, node
, hash
, &pt
->data
)
617 for (j
= 0; j
< pt
->n_vars
; j
++)
618 if ((int
) case_num (c
, pt
->vars
[j
]) != (int
) te
->values
[j
].f
)
621 /* Found an existing entry. */
628 /* No existing entry. Create a new one. */
629 te
= xmalloc (table_entry_size (pt
->n_vars
));
631 for (j
= 0; j
< pt
->n_vars
; j
++)
632 te
->values
[j
].f
= (int
) case_num (c
, pt
->vars
[j
]);
633 hmap_insert (&pt
->data
, &te
->node
, hash
);
637 tabulate_general_case (struct pivot_table
*pt
, const struct ccase
*c
,
645 for (j
= 0; j
< pt
->n_vars
; j
++)
647 const struct variable
*var
= pt
->vars
[j
];
648 hash
= value_hash (case_data (c
, var
), var_get_width (var
), hash
);
651 HMAP_FOR_EACH_WITH_HASH (te
, struct freq
, node
, hash
, &pt
->data
)
653 for (j
= 0; j
< pt
->n_vars
; j
++)
655 const struct variable
*var
= pt
->vars
[j
];
656 if (!value_equal (case_data (c
, var
), &te
->values
[j
],
657 var_get_width (var
)))
661 /* Found an existing entry. */
668 /* No existing entry. Create a new one. */
669 te
= xmalloc (table_entry_size (pt
->n_vars
));
671 for (j
= 0; j
< pt
->n_vars
; j
++)
673 const struct variable
*var
= pt
->vars
[j
];
674 value_clone (&te
->values
[j
], case_data (c
, var
), var_get_width (var
));
676 hmap_insert (&pt
->data
, &te
->node
, hash
);
679 /* Post-data reading calculations. */
681 static int
compare_table_entry_vars_3way (const struct freq
*a
,
682 const struct freq
*b
,
683 const struct pivot_table
*pt
,
685 static int
compare_table_entry_3way (const void
*ap_
, const void
*bp_
,
687 static int
compare_table_entry_3way_inv (const void
*ap_
, const void
*bp_
,
690 static void
enum_var_values (const struct pivot_table
*, int var_idx
,
691 union value
**valuesp
, int
*n_values
, bool descending
);
692 static void
output_pivot_table (struct crosstabs_proc
*,
693 struct pivot_table
*);
694 static void
make_pivot_table_subset (struct pivot_table
*pt
,
695 size_t row0
, size_t row1
,
696 struct pivot_table
*subset
);
697 static void
make_summary_table (struct crosstabs_proc
*);
698 static bool
find_crosstab (struct pivot_table
*, size_t
*row0p
, size_t
*row1p
);
701 postcalc (struct crosstabs_proc
*proc
)
704 /* Round hash table entries, if requested
706 If this causes any of the cell counts to fall to zero, delete those
708 if (proc
->round_cells
)
709 for (struct pivot_table
*pt
= proc
->pivots
;
710 pt
< &proc
->pivots
[proc
->n_pivots
]; pt
++)
712 struct freq
*e
, *next
;
713 HMAP_FOR_EACH_SAFE (e
, next
, struct freq
, node
, &pt
->data
)
715 e
->count
= round_weight (proc
, e
->count
);
718 hmap_delete (&pt
->data
, &e
->node
);
724 /* Convert hash tables into sorted arrays of entries. */
725 for (struct pivot_table
*pt
= proc
->pivots
;
726 pt
< &proc
->pivots
[proc
->n_pivots
]; pt
++)
730 pt
->n_entries
= hmap_count (&pt
->data
);
731 pt
->entries
= xnmalloc (pt
->n_entries
, sizeof
*pt
->entries
);
733 HMAP_FOR_EACH (e
, struct freq
, node
, &pt
->data
)
734 pt
->entries
[i
++] = e
;
735 hmap_destroy (&pt
->data
);
737 sort (pt
->entries
, pt
->n_entries
, sizeof
*pt
->entries
,
738 proc
->descending ? compare_table_entry_3way_inv
: compare_table_entry_3way
,
743 make_summary_table (proc
);
745 /* Output each pivot table. */
746 for (struct pivot_table
*pt
= proc
->pivots
;
747 pt
< &proc
->pivots
[proc
->n_pivots
]; pt
++)
749 if (proc
->pivot || pt
->n_vars
== 2)
750 output_pivot_table (proc
, pt
);
753 size_t row0
= 0, row1
= 0;
754 while (find_crosstab (pt
, &row0
, &row1
))
756 struct pivot_table subset
;
757 make_pivot_table_subset (pt
, row0
, row1
, &subset
);
758 output_pivot_table (proc
, &subset
);
763 (barchart_create (pt
->vars
, pt
->n_vars
, _("Count"), false
, pt
->entries
, pt
->n_entries
));
766 /* Free output and prepare for next split file. */
767 for (struct pivot_table
*pt
= proc
->pivots
;
768 pt
< &proc
->pivots
[proc
->n_pivots
]; pt
++)
772 /* Free the members that were allocated in this function(and the values
773 owned by the entries.
775 The other pointer members are either both allocated and destroyed at a
776 lower level (in output_pivot_table), or both allocated and destroyed
777 at a higher level (in crs_custom_tables and free_proc,
779 for (size_t i
= 0; i
< pt
->n_vars
; i
++)
781 int width
= var_get_width (pt
->vars
[i
]);
782 if (value_needs_init (width
))
786 for (j
= 0; j
< pt
->n_entries
; j
++)
787 value_destroy (&pt
->entries
[j
]->values
[i
], width
);
791 for (size_t i
= 0; i
< pt
->n_entries
; i
++)
792 free (pt
->entries
[i
]);
798 make_pivot_table_subset (struct pivot_table
*pt
, size_t row0
, size_t row1
,
799 struct pivot_table
*subset
)
804 assert (pt
->n_consts
== 0);
805 subset
->missing
= pt
->missing
;
807 subset
->vars
= pt
->vars
;
808 subset
->n_consts
= pt
->n_vars
- 2;
809 subset
->const_vars
= pt
->vars
+ 2;
810 subset
->const_values
= &pt
->entries
[row0
]->values
[2];
812 subset
->entries
= &pt
->entries
[row0
];
813 subset
->n_entries
= row1
- row0
;
817 compare_table_entry_var_3way (const struct freq
*a
,
818 const struct freq
*b
,
819 const struct pivot_table
*pt
,
822 return value_compare_3way (&a
->values
[idx
], &b
->values
[idx
],
823 var_get_width (pt
->vars
[idx
]));
827 compare_table_entry_vars_3way (const struct freq
*a
,
828 const struct freq
*b
,
829 const struct pivot_table
*pt
,
834 for (i
= idx1
- 1; i
>= idx0
; i
--)
836 int cmp
= compare_table_entry_var_3way (a
, b
, pt
, i
);
843 /* Compare the struct freq at *AP to the one at *BP and
844 return a strcmp()-type result. */
846 compare_table_entry_3way (const void
*ap_
, const void
*bp_
, const void
*pt_
)
848 const struct freq
*const *ap
= ap_
;
849 const struct freq
*const *bp
= bp_
;
850 const struct freq
*a
= *ap
;
851 const struct freq
*b
= *bp
;
852 const struct pivot_table
*pt
= pt_
;
855 cmp
= compare_table_entry_vars_3way (a
, b
, pt
, 2, pt
->n_vars
);
859 cmp
= compare_table_entry_var_3way (a
, b
, pt
, ROW_VAR
);
863 return compare_table_entry_var_3way (a
, b
, pt
, COL_VAR
);
866 /* Inverted version of compare_table_entry_3way */
868 compare_table_entry_3way_inv (const void
*ap_
, const void
*bp_
, const void
*pt_
)
870 return -compare_table_entry_3way (ap_
, bp_
, pt_
);
874 find_first_difference (const struct pivot_table
*pt
, size_t row
)
877 return pt
->n_vars
- 1;
880 const struct freq
*a
= pt
->entries
[row
];
881 const struct freq
*b
= pt
->entries
[row
- 1];
884 for (col
= pt
->n_vars
- 1; col
>= 0; col
--)
885 if (compare_table_entry_var_3way (a
, b
, pt
, col
))
891 /* Output a table summarizing the cases processed. */
893 make_summary_table (struct crosstabs_proc
*proc
)
895 struct tab_table
*summary
;
896 struct pivot_table
*pt
;
900 summary
= tab_create (7, 3 + proc
->n_pivots
);
901 tab_set_format (summary
, RC_WEIGHT
, &proc
->weight_format
);
902 tab_title (summary
, _("Summary."));
903 tab_headers (summary
, 1, 0, 3, 0);
904 tab_joint_text (summary
, 1, 0, 6, 0, TAB_CENTER
, _("Cases"));
905 tab_joint_text (summary
, 1, 1, 2, 1, TAB_CENTER
, _("Valid"));
906 tab_joint_text (summary
, 3, 1, 4, 1, TAB_CENTER
, _("Missing"));
907 tab_joint_text (summary
, 5, 1, 6, 1, TAB_CENTER
, _("Total"));
908 tab_hline (summary
, TAL_1
, 1, 6, 1);
909 tab_hline (summary
, TAL_1
, 1, 6, 2);
910 tab_vline (summary
, TAL_1
, 3, 1, 1);
911 tab_vline (summary
, TAL_1
, 5, 1, 1);
912 for (i
= 0; i
< 3; i
++)
914 tab_text (summary
, 1 + i
* 2, 2, TAB_RIGHT
, _("N"));
915 tab_text (summary
, 2 + i
* 2, 2, TAB_RIGHT
, _("Percent"));
917 tab_offset (summary
, 0, 3);
919 ds_init_empty (&name
);
920 for (pt
= &proc
->pivots
[0]; pt
< &proc
->pivots
[proc
->n_pivots
]; pt
++)
926 tab_hline (summary
, TAL_1
, 0, 6, 0);
929 for (i
= 0; i
< pt
->n_vars
; i
++)
932 ds_put_cstr (&name
, " * ");
933 ds_put_cstr (&name
, var_to_string (pt
->vars
[i
]));
935 tab_text (summary
, 0, 0, TAB_LEFT
, ds_cstr (&name
));
938 for (i
= 0; i
< pt
->n_entries
; i
++)
939 valid
+= pt
->entries
[i
]->count
;
944 for (i
= 0; i
< 3; i
++)
946 tab_double (summary
, i
* 2 + 1, 0, TAB_RIGHT
, n
[i
], NULL, RC_WEIGHT
);
947 tab_text_format (summary
, i
* 2 + 2, 0, TAB_RIGHT
, "%.1f%%",
951 tab_next_row (summary
);
955 submit (NULL, summary
);
960 static struct tab_table
*create_crosstab_table (struct crosstabs_proc
*,
961 struct pivot_table
*);
962 static struct tab_table
*create_chisq_table (struct crosstabs_proc
*proc
, struct pivot_table
*);
963 static struct tab_table
*create_sym_table (struct crosstabs_proc
*proc
, struct pivot_table
*);
964 static struct tab_table
*create_risk_table (struct crosstabs_proc
*proc
, struct pivot_table
*);
965 static struct tab_table
*create_direct_table (struct crosstabs_proc
*proc
, struct pivot_table
*);
966 static void
display_dimensions (struct crosstabs_proc
*, struct pivot_table
*,
967 struct tab_table
*, int first_difference
);
968 static void
display_crosstabulation (struct crosstabs_proc
*,
969 struct pivot_table
*,
971 static void
display_chisq (struct pivot_table
*, struct tab_table
*,
972 bool
*showed_fisher
);
973 static void
display_symmetric (struct crosstabs_proc
*, struct pivot_table
*,
975 static void
display_risk (struct pivot_table
*, struct tab_table
*);
976 static void
display_directional (struct crosstabs_proc
*, struct pivot_table
*,
978 static void
table_value_missing (struct crosstabs_proc
*proc
,
979 struct tab_table
*table
, int c
, int r
,
980 unsigned char opt
, const union value
*v
,
981 const struct variable
*var
);
982 static void
delete_missing (struct pivot_table
*);
983 static void
build_matrix (struct pivot_table
*);
985 /* Output pivot table PT in the context of PROC. */
987 output_pivot_table (struct crosstabs_proc
*proc
, struct pivot_table
*pt
)
989 struct tab_table
*table
= NULL; /* Crosstabulation table. */
990 struct tab_table
*chisq
= NULL; /* Chi-square table. */
991 bool showed_fisher
= false
;
992 struct tab_table
*sym
= NULL; /* Symmetric measures table. */
993 struct tab_table
*risk
= NULL; /* Risk estimate table. */
994 struct tab_table
*direct
= NULL; /* Directional measures table. */
997 enum_var_values (pt
, COL_VAR
, &pt
->cols
, &pt
->n_cols
, proc
->descending
);
1004 ds_init_cstr (&vars
, var_to_string (pt
->vars
[0]));
1005 for (i
= 1; i
< pt
->n_vars
; i
++)
1006 ds_put_format (&vars
, " * %s", var_to_string (pt
->vars
[i
]));
1008 /* TRANSLATORS: The %s here describes a crosstabulation. It takes the
1009 form "var1 * var2 * var3 * ...". */
1010 msg (SW
, _("Crosstabulation %s contained no non-missing cases."),
1019 table
= create_crosstab_table (proc
, pt
);
1020 if (proc
->statistics
& (1u << CRS_ST_CHISQ
))
1021 chisq
= create_chisq_table (proc
, pt
);
1022 if (proc
->statistics
& ((1u << CRS_ST_PHI
) |
(1u << CRS_ST_CC
)
1023 |
(1u << CRS_ST_BTAU
) |
(1u << CRS_ST_CTAU
)
1024 |
(1u << CRS_ST_GAMMA
) |
(1u << CRS_ST_CORR
)
1025 |
(1u << CRS_ST_KAPPA
)))
1026 sym
= create_sym_table (proc
, pt
);
1027 if (proc
->statistics
& (1u << CRS_ST_RISK
))
1028 risk
= create_risk_table (proc
, pt
);
1029 if (proc
->statistics
& ((1u << CRS_ST_LAMBDA
) |
(1u << CRS_ST_UC
)
1030 |
(1u << CRS_ST_D
) |
(1u << CRS_ST_ETA
)))
1031 direct
= create_direct_table (proc
, pt
);
1034 while (find_crosstab (pt
, &row0
, &row1
))
1036 struct pivot_table x
;
1037 int first_difference
;
1039 make_pivot_table_subset (pt
, row0
, row1
, &x
);
1041 /* Find all the row variable values. */
1042 enum_var_values (&x
, ROW_VAR
, &x
.rows
, &x
.n_rows
, proc
->descending
);
1044 if (size_overflow_p (xtimes (xtimes (x
.n_rows
, x
.n_cols
),
1047 x
.row_tot
= xmalloc (x
.n_rows
* sizeof
*x
.row_tot
);
1048 x
.col_tot
= xmalloc (x
.n_cols
* sizeof
*x
.col_tot
);
1049 x
.mat
= xmalloc (x
.n_rows
* x
.n_cols
* sizeof
*x
.mat
);
1051 /* Allocate table space for the matrix. */
1053 && tab_row (table
) + (x
.n_rows
+ 1) * proc
->n_cells
> tab_nr (table
))
1054 tab_realloc (table
, -1,
1055 MAX (tab_nr (table
) + (x
.n_rows
+ 1) * proc
->n_cells
,
1056 tab_nr (table
) * pt
->n_entries
/ x
.n_entries
));
1060 /* Find the first variable that differs from the last subtable. */
1061 first_difference
= find_first_difference (pt
, row0
);
1064 display_dimensions (proc
, &x
, table
, first_difference
);
1065 display_crosstabulation (proc
, &x
, table
);
1068 if (proc
->exclude
== MV_NEVER
)
1069 delete_missing (&x
);
1073 display_dimensions (proc
, &x
, chisq
, first_difference
);
1074 display_chisq (&x
, chisq
, &showed_fisher
);
1078 display_dimensions (proc
, &x
, sym
, first_difference
);
1079 display_symmetric (proc
, &x
, sym
);
1083 display_dimensions (proc
, &x
, risk
, first_difference
);
1084 display_risk (&x
, risk
);
1088 display_dimensions (proc
, &x
, direct
, first_difference
);
1089 display_directional (proc
, &x
, direct
);
1092 /* Free the parts of x that are not owned by pt. In
1093 particular we must not free x.cols, which is the same as
1094 pt->cols, which is freed at the end of this function. */
1102 submit (NULL, table
);
1107 tab_resize (chisq
, 4 + (pt
->n_vars
- 2), -1);
1113 submit (pt
, direct
);
1119 build_matrix (struct pivot_table
*x
)
1121 const int col_var_width
= var_get_width (x
->vars
[COL_VAR
]);
1122 const int row_var_width
= var_get_width (x
->vars
[ROW_VAR
]);
1129 for (p
= x
->entries
; p
< &x
->entries
[x
->n_entries
]; p
++)
1131 const struct freq
*te
= *p
;
1133 while (!value_equal (&x
->rows
[row
], &te
->values
[ROW_VAR
], row_var_width
))
1135 for (; col
< x
->n_cols
; col
++)
1141 while (!value_equal (&x
->cols
[col
], &te
->values
[COL_VAR
], col_var_width
))
1148 if (++col
>= x
->n_cols
)
1154 while (mp
< &x
->mat
[x
->n_cols
* x
->n_rows
])
1156 assert (mp
== &x
->mat
[x
->n_cols
* x
->n_rows
]);
1158 /* Column totals, row totals, ns_rows. */
1160 for (col
= 0; col
< x
->n_cols
; col
++)
1161 x
->col_tot
[col
] = 0.0;
1162 for (row
= 0; row
< x
->n_rows
; row
++)
1163 x
->row_tot
[row
] = 0.0;
1165 for (row
= 0; row
< x
->n_rows
; row
++)
1167 bool row_is_empty
= true
;
1168 for (col
= 0; col
< x
->n_cols
; col
++)
1172 row_is_empty
= false
;
1173 x
->col_tot
[col
] += *mp
;
1174 x
->row_tot
[row
] += *mp
;
1181 assert (mp
== &x
->mat
[x
->n_cols
* x
->n_rows
]);
1185 for (col
= 0; col
< x
->n_cols
; col
++)
1186 for (row
= 0; row
< x
->n_rows
; row
++)
1187 if (x
->mat
[col
+ row
* x
->n_cols
] != 0.0)
1195 for (col
= 0; col
< x
->n_cols
; col
++)
1196 x
->total
+= x
->col_tot
[col
];
1199 static struct tab_table
*
1200 create_crosstab_table (struct crosstabs_proc
*proc
, struct pivot_table
*pt
)
1207 static
const struct tuple names
[] =
1209 {CRS_CL_COUNT
, N_("count")},
1210 {CRS_CL_ROW
, N_("row %")},
1211 {CRS_CL_COLUMN
, N_("column %")},
1212 {CRS_CL_TOTAL
, N_("total %")},
1213 {CRS_CL_EXPECTED
, N_("expected")},
1214 {CRS_CL_RESIDUAL
, N_("residual")},
1215 {CRS_CL_SRESIDUAL
, N_("std. resid.")},
1216 {CRS_CL_ASRESIDUAL
, N_("adj. resid.")},
1218 const int n_names
= sizeof names
/ sizeof
*names
;
1219 const struct tuple
*t
;
1221 struct tab_table
*table
;
1222 struct string title
;
1223 struct pivot_table x
;
1227 make_pivot_table_subset (pt
, 0, 0, &x
);
1229 table
= tab_create (x
.n_consts
+ 1 + x
.n_cols
+ 1,
1230 (x
.n_entries
/ x
.n_cols
) * 3 / 2 * proc
->n_cells
+ 10);
1231 tab_headers (table
, x
.n_consts
+ 1, 0, 2, 0);
1232 tab_set_format (table
, RC_WEIGHT
, &proc
->weight_format
);
1234 /* First header line. */
1235 tab_joint_text (table
, x
.n_consts
+ 1, 0,
1236 (x
.n_consts
+ 1) + (x
.n_cols
- 1), 0,
1237 TAB_CENTER | TAT_TITLE
, var_to_string (x
.vars
[COL_VAR
]));
1239 tab_hline (table
, TAL_1
, x
.n_consts
+ 1,
1240 x
.n_consts
+ 2 + x
.n_cols
- 2, 1);
1242 /* Second header line. */
1243 for (i
= 2; i
< x
.n_consts
+ 2; i
++)
1244 tab_joint_text (table
, x
.n_consts
+ 2 - i
- 1, 0,
1245 x
.n_consts
+ 2 - i
- 1, 1,
1246 TAB_RIGHT | TAT_TITLE
, var_to_string (x
.vars
[i
]));
1247 tab_text (table
, x
.n_consts
+ 2 - 2, 1, TAB_RIGHT | TAT_TITLE
,
1248 var_to_string (x
.vars
[ROW_VAR
]));
1249 for (i
= 0; i
< x
.n_cols
; i
++)
1250 table_value_missing (proc
, table
, x
.n_consts
+ 2 + i
- 1, 1, TAB_RIGHT
,
1251 &x
.cols
[i
], x
.vars
[COL_VAR
]);
1252 tab_text (table
, x
.n_consts
+ 2 + x
.n_cols
- 1, 1, TAB_CENTER
, _("Total"));
1254 tab_hline (table
, TAL_1
, 0, x
.n_consts
+ 2 + x
.n_cols
- 1, 2);
1255 tab_vline (table
, TAL_1
, x
.n_consts
+ 2 + x
.n_cols
- 1, 0, 1);
1258 ds_init_empty (&title
);
1259 for (i
= 0; i
< x
.n_consts
+ 2; i
++)
1262 ds_put_cstr (&title
, " * ");
1263 ds_put_cstr (&title
, var_to_string (x
.vars
[i
]));
1265 for (i
= 0; i
< pt
->n_consts
; i
++)
1267 const struct variable
*var
= pt
->const_vars
[i
];
1270 ds_put_format (&title
, ", %s=", var_to_string (var
));
1272 /* Insert the formatted value of VAR without any leading spaces. */
1273 s
= data_out (&pt
->const_values
[i
], var_get_encoding (var
),
1274 var_get_print_format (var
));
1275 ds_put_cstr (&title
, s
+ strspn (s
, " "));
1279 ds_put_cstr (&title
, " [");
1281 for (t
= names
; t
< &names
[n_names
]; t
++)
1282 if (proc
->cells
& (1u << t
->value
))
1285 ds_put_cstr (&title
, ", ");
1286 ds_put_cstr (&title
, gettext (t
->name
));
1288 ds_put_cstr (&title
, "].");
1290 tab_title (table
, "%s", ds_cstr (&title
));
1291 ds_destroy (&title
);
1293 tab_offset (table
, 0, 2);
1297 static struct tab_table
*
1298 create_chisq_table (struct crosstabs_proc
*proc
, struct pivot_table
*pt
)
1300 struct tab_table
*chisq
;
1302 chisq
= tab_create (6 + (pt
->n_vars
- 2),
1303 pt
->n_entries
/ pt
->n_cols
* 3 / 2 * N_CHISQ
+ 10);
1304 tab_headers (chisq
, 1 + (pt
->n_vars
- 2), 0, 1, 0);
1305 tab_set_format (chisq
, RC_WEIGHT
, &proc
->weight_format
);
1307 tab_title (chisq
, _("Chi-square tests."));
1309 tab_offset (chisq
, pt
->n_vars
- 2, 0);
1310 tab_text (chisq
, 0, 0, TAB_LEFT | TAT_TITLE
, _("Statistic"));
1311 tab_text (chisq
, 1, 0, TAB_RIGHT | TAT_TITLE
, _("Value"));
1312 tab_text (chisq
, 2, 0, TAB_RIGHT | TAT_TITLE
, _("df"));
1313 tab_text (chisq
, 3, 0, TAB_RIGHT | TAT_TITLE
,
1314 _("Asymp. Sig. (2-tailed)"));
1315 tab_text_format (chisq
, 4, 0, TAB_RIGHT | TAT_TITLE
,
1316 _("Exact Sig. (%d-tailed)"), 2);
1317 tab_text_format (chisq
, 5, 0, TAB_RIGHT | TAT_TITLE
,
1318 _("Exact Sig. (%d-tailed)"), 1);
1319 tab_offset (chisq
, 0, 1);
1324 /* Symmetric measures. */
1325 static struct tab_table
*
1326 create_sym_table (struct crosstabs_proc
*proc
, struct pivot_table
*pt
)
1328 struct tab_table
*sym
;
1330 sym
= tab_create (6 + (pt
->n_vars
- 2),
1331 pt
->n_entries
/ pt
->n_cols
* 7 + 10);
1333 tab_set_format (sym
, RC_WEIGHT
, &proc
->weight_format
);
1335 tab_headers (sym
, 2 + (pt
->n_vars
- 2), 0, 1, 0);
1336 tab_title (sym
, _("Symmetric measures."));
1338 tab_offset (sym
, pt
->n_vars
- 2, 0);
1339 tab_text (sym
, 0, 0, TAB_LEFT | TAT_TITLE
, _("Category"));
1340 tab_text (sym
, 1, 0, TAB_LEFT | TAT_TITLE
, _("Statistic"));
1341 tab_text (sym
, 2, 0, TAB_RIGHT | TAT_TITLE
, _("Value"));
1342 tab_text (sym
, 3, 0, TAB_RIGHT | TAT_TITLE
, _("Asymp. Std. Error"));
1343 tab_text (sym
, 4, 0, TAB_RIGHT | TAT_TITLE
, _("Approx. T"));
1344 tab_text (sym
, 5, 0, TAB_RIGHT | TAT_TITLE
, _("Approx. Sig."));
1345 tab_offset (sym
, 0, 1);
1350 /* Risk estimate. */
1351 static struct tab_table
*
1352 create_risk_table (struct crosstabs_proc
*proc
, struct pivot_table
*pt
)
1354 struct tab_table
*risk
;
1356 risk
= tab_create (4 + (pt
->n_vars
- 2), pt
->n_entries
/ pt
->n_cols
* 4 + 10);
1357 tab_headers (risk
, 1 + pt
->n_vars
- 2, 0, 2, 0);
1358 tab_title (risk
, _("Risk estimate."));
1359 tab_set_format (risk
, RC_WEIGHT
, &proc
->weight_format
);
1361 tab_offset (risk
, pt
->n_vars
- 2, 0);
1362 tab_joint_text_format (risk
, 2, 0, 3, 0, TAB_CENTER | TAT_TITLE
,
1363 _("95%% Confidence Interval"));
1364 tab_text (risk
, 0, 1, TAB_LEFT | TAT_TITLE
, _("Statistic"));
1365 tab_text (risk
, 1, 1, TAB_RIGHT | TAT_TITLE
, _("Value"));
1366 tab_text (risk
, 2, 1, TAB_RIGHT | TAT_TITLE
, _("Lower"));
1367 tab_text (risk
, 3, 1, TAB_RIGHT | TAT_TITLE
, _("Upper"));
1368 tab_hline (risk
, TAL_1
, 2, 3, 1);
1369 tab_vline (risk
, TAL_1
, 2, 0, 1);
1370 tab_offset (risk
, 0, 2);
1375 /* Directional measures. */
1376 static struct tab_table
*
1377 create_direct_table (struct crosstabs_proc
*proc
, struct pivot_table
*pt
)
1379 struct tab_table
*direct
;
1381 direct
= tab_create (7 + (pt
->n_vars
- 2),
1382 pt
->n_entries
/ pt
->n_cols
* 7 + 10);
1383 tab_headers (direct
, 3 + (pt
->n_vars
- 2), 0, 1, 0);
1384 tab_title (direct
, _("Directional measures."));
1385 tab_set_format (direct
, RC_WEIGHT
, &proc
->weight_format
);
1387 tab_offset (direct
, pt
->n_vars
- 2, 0);
1388 tab_text (direct
, 0, 0, TAB_LEFT | TAT_TITLE
, _("Category"));
1389 tab_text (direct
, 1, 0, TAB_LEFT | TAT_TITLE
, _("Statistic"));
1390 tab_text (direct
, 2, 0, TAB_LEFT | TAT_TITLE
, _("Type"));
1391 tab_text (direct
, 3, 0, TAB_RIGHT | TAT_TITLE
, _("Value"));
1392 tab_text (direct
, 4, 0, TAB_RIGHT | TAT_TITLE
, _("Asymp. Std. Error"));
1393 tab_text (direct
, 5, 0, TAB_RIGHT | TAT_TITLE
, _("Approx. T"));
1394 tab_text (direct
, 6, 0, TAB_RIGHT | TAT_TITLE
, _("Approx. Sig."));
1395 tab_offset (direct
, 0, 1);
1401 /* Delete missing rows and columns for statistical analysis when
1404 delete_missing (struct pivot_table
*pt
)
1408 for (r
= 0; r
< pt
->n_rows
; r
++)
1409 if (var_is_num_missing (pt
->vars
[ROW_VAR
], pt
->rows
[r
].f
, MV_USER
))
1411 for (c
= 0; c
< pt
->n_cols
; c
++)
1412 pt
->mat
[c
+ r
* pt
->n_cols
] = 0.;
1417 for (c
= 0; c
< pt
->n_cols
; c
++)
1418 if (var_is_num_missing (pt
->vars
[COL_VAR
], pt
->cols
[c
].f
, MV_USER
))
1420 for (r
= 0; r
< pt
->n_rows
; r
++)
1421 pt
->mat
[c
+ r
* pt
->n_cols
] = 0.;
1426 /* Prepare table T for submission, and submit it. */
1428 submit (struct pivot_table
*pt
, struct tab_table
*t
)
1435 tab_resize (t
, -1, 0);
1436 if (tab_nr (t
) == tab_t (t
))
1438 table_unref (&t
->table
);
1441 tab_offset (t
, 0, 0);
1443 for (i
= 2; i
< pt
->n_vars
; i
++)
1444 tab_text (t
, pt
->n_vars
- i
- 1, 0, TAB_RIGHT | TAT_TITLE
,
1445 var_to_string (pt
->vars
[i
]));
1446 tab_box (t
, TAL_2
, TAL_2
, -1, -1, 0, 0, tab_nc (t
) - 1, tab_nr (t
) - 1);
1447 tab_box (t
, -1, -1, -1, TAL_1
, tab_l (t
), tab_t (t
) - 1, tab_nc (t
) - 1,
1449 tab_box (t
, -1, -1, -1, TAL_GAP
, 0, tab_t (t
), tab_l (t
) - 1,
1451 tab_vline (t
, TAL_2
, tab_l (t
), 0, tab_nr (t
) - 1);
1457 find_crosstab (struct pivot_table
*pt
, size_t
*row0p
, size_t
*row1p
)
1459 size_t row0
= *row1p
;
1462 if (row0
>= pt
->n_entries
)
1465 for (row1
= row0
+ 1; row1
< pt
->n_entries
; row1
++)
1467 struct freq
*a
= pt
->entries
[row0
];
1468 struct freq
*b
= pt
->entries
[row1
];
1469 if (compare_table_entry_vars_3way (a
, b
, pt
, 2, pt
->n_vars
) != 0)
1477 /* Compares `union value's A_ and B_ and returns a strcmp()-like
1478 result. WIDTH_ points to an int which is either 0 for a
1479 numeric value or a string width for a string value. */
1481 compare_value_3way (const void
*a_
, const void
*b_
, const void
*width_
)
1483 const union value
*a
= a_
;
1484 const union value
*b
= b_
;
1485 const int
*width
= width_
;
1487 return value_compare_3way (a
, b
, *width
);
1490 /* Inverted version of the above */
1492 compare_value_3way_inv (const void
*a_
, const void
*b_
, const void
*width_
)
1494 return -compare_value_3way (a_
, b_
, width_
);
1498 /* Given an array of ENTRY_CNT table_entry structures starting at
1499 ENTRIES, creates a sorted list of the values that the variable
1500 with index VAR_IDX takes on. The values are returned as a
1501 malloc()'d array stored in *VALUES, with the number of values
1502 stored in *VALUE_CNT.
1504 The caller must eventually free *VALUES, but each pointer in *VALUES points
1505 to existing data not owned by *VALUES itself. */
1507 enum_var_values (const struct pivot_table
*pt
, int var_idx
,
1508 union value
**valuesp
, int
*n_values
, bool descending
)
1510 const struct variable
*var
= pt
->vars
[var_idx
];
1511 const struct var_range
*range
= get_var_range (pt
->proc
, var
);
1512 union value
*values
;
1517 values
= *valuesp
= xnmalloc (range
->count
, sizeof
*values
);
1518 *n_values
= range
->count
;
1519 for (i
= 0; i
< range
->count
; i
++)
1520 values
[i
].f
= range
->min
+ i
;
1524 int width
= var_get_width (var
);
1525 struct hmapx_node
*node
;
1526 const union value
*iter
;
1530 for (i
= 0; i
< pt
->n_entries
; i
++)
1532 const struct freq
*te
= pt
->entries
[i
];
1533 const union value
*value
= &te
->values
[var_idx
];
1534 size_t hash
= value_hash (value
, width
, 0);
1536 HMAPX_FOR_EACH_WITH_HASH (iter
, node
, hash
, &set
)
1537 if (value_equal (iter
, value
, width
))
1540 hmapx_insert (&set
, (union value
*) value
, hash
);
1545 *n_values
= hmapx_count (&set
);
1546 values
= *valuesp
= xnmalloc (*n_values
, sizeof
*values
);
1548 HMAPX_FOR_EACH (iter
, node
, &set
)
1549 values
[i
++] = *iter
;
1550 hmapx_destroy (&set
);
1552 sort (values
, *n_values
, sizeof
*values
,
1553 descending ? compare_value_3way_inv
: compare_value_3way
,
1558 /* Sets cell (C,R) in TABLE, with options OPT, to have a value taken
1559 from V, displayed with print format spec from variable VAR. When
1560 in REPORT missing-value mode, missing values have an M appended. */
1562 table_value_missing (struct crosstabs_proc
*proc
,
1563 struct tab_table
*table
, int c
, int r
, unsigned char opt
,
1564 const union value
*v
, const struct variable
*var
)
1566 const char
*label
= var_lookup_value_label (var
, v
);
1568 tab_text (table
, c
, r
, TAB_LEFT
, label
);
1571 const struct fmt_spec
*print
= var_get_print_format (var
);
1572 if (proc
->exclude
== MV_NEVER
&& var_is_value_missing (var
, v
, MV_USER
))
1574 char
*s
= data_out (v
, dict_get_encoding (proc
->dict
), print
);
1575 tab_text_format (table
, c
, r
, opt
, "%sM", s
+ strspn (s
, " "));
1579 tab_value (table
, c
, r
, opt
, v
, var
, print
);
1583 /* Draws a line across TABLE at the current row to indicate the most
1584 major dimension variable with index FIRST_DIFFERENCE out of N_VARS
1585 that changed, and puts the values that changed into the table. TB
1586 and PT must be the corresponding table_entry and crosstab,
1589 display_dimensions (struct crosstabs_proc
*proc
, struct pivot_table
*pt
,
1590 struct tab_table
*table
, int first_difference
)
1592 tab_hline (table
, TAL_1
, pt
->n_consts
+ pt
->n_vars
- first_difference
- 1, tab_nc (table
) - 1, 0);
1594 for (; first_difference
>= 2; first_difference
--)
1595 table_value_missing (proc
, table
, pt
->n_consts
+ pt
->n_vars
- first_difference
- 1, 0,
1596 TAB_RIGHT
, &pt
->entries
[0]->values
[first_difference
],
1597 pt
->vars
[first_difference
]);
1600 /* Put VALUE into cell (C,R) of TABLE, suffixed with character
1601 SUFFIX if nonzero. If MARK_MISSING is true the entry is
1602 additionally suffixed with a letter `M'. */
1604 format_cell_entry (struct tab_table
*table
, int c
, int r
, double value
,
1605 char suffix
, bool mark_missing
, const struct dictionary
*dict
)
1613 s
= data_out (&v
, dict_get_encoding (dict
), settings_get_format ());
1617 suffixes
[suffix_len
++] = suffix
;
1619 suffixes
[suffix_len
++] = 'M';
1620 suffixes
[suffix_len
] = '\0';
1622 tab_text_format (table
, c
, r
, TAB_RIGHT
, "%s%s",
1623 s
+ strspn (s
, " "), suffixes
);
1628 /* Displays the crosstabulation table. */
1630 display_crosstabulation (struct crosstabs_proc
*proc
, struct pivot_table
*pt
,
1631 struct tab_table
*table
)
1637 for (r
= 0; r
< pt
->n_rows
; r
++)
1638 table_value_missing (proc
, table
, pt
->n_consts
+ pt
->n_vars
- 2,
1639 r
* proc
->n_cells
, TAB_RIGHT
, &pt
->rows
[r
],
1642 tab_text (table
, pt
->n_vars
- 2, pt
->n_rows
* proc
->n_cells
,
1643 TAB_LEFT
, _("Total"));
1645 /* Put in the actual cells. */
1647 tab_offset (table
, pt
->n_consts
+ pt
->n_vars
- 1, -1);
1648 for (r
= 0; r
< pt
->n_rows
; r
++)
1650 if (proc
->n_cells
> 1)
1651 tab_hline (table
, TAL_1
, -1, pt
->n_cols
, 0);
1652 for (c
= 0; c
< pt
->n_cols
; c
++)
1654 bool mark_missing
= false
;
1655 double expected_value
= pt
->row_tot
[r
] * pt
->col_tot
[c
] / pt
->total
;
1656 if (proc
->exclude
== MV_NEVER
1657 && (var_is_num_missing (pt
->vars
[COL_VAR
], pt
->cols
[c
].f
, MV_USER
)
1658 ||
var_is_num_missing (pt
->vars
[ROW_VAR
], pt
->rows
[r
].f
,
1660 mark_missing
= true
;
1661 for (i
= 0; i
< proc
->n_cells
; i
++)
1666 switch (proc
->a_cells
[i
])
1672 v
= *mp
/ pt
->row_tot
[r
] * 100.;
1676 v
= *mp
/ pt
->col_tot
[c
] * 100.;
1680 v
= *mp
/ pt
->total
* 100.;
1683 case CRS_CL_EXPECTED
:
1686 case CRS_CL_RESIDUAL
:
1687 v
= *mp
- expected_value
;
1689 case CRS_CL_SRESIDUAL
:
1690 v
= (*mp
- expected_value
) / sqrt (expected_value
);
1692 case CRS_CL_ASRESIDUAL
:
1693 v
= ((*mp
- expected_value
)
1694 / sqrt (expected_value
1695 * (1. - pt
->row_tot
[r
] / pt
->total
)
1696 * (1. - pt
->col_tot
[c
] / pt
->total
)));
1701 format_cell_entry (table
, c
, i
, v
, suffix
, mark_missing
, proc
->dict
);
1707 tab_offset (table
, -1, tab_row (table
) + proc
->n_cells
);
1711 tab_offset (table
, -1, tab_row (table
) - proc
->n_cells
* pt
->n_rows
);
1712 for (r
= 0; r
< pt
->n_rows
; r
++)
1714 bool mark_missing
= false
;
1716 if (proc
->exclude
== MV_NEVER
1717 && var_is_num_missing (pt
->vars
[ROW_VAR
], pt
->rows
[r
].f
, MV_USER
))
1718 mark_missing
= true
;
1720 for (i
= 0; i
< proc
->n_cells
; i
++)
1725 switch (proc
->a_cells
[i
])
1735 v
= pt
->row_tot
[r
] / pt
->total
* 100.;
1739 v
= pt
->row_tot
[r
] / pt
->total
* 100.;
1742 case CRS_CL_EXPECTED
:
1743 case CRS_CL_RESIDUAL
:
1744 case CRS_CL_SRESIDUAL
:
1745 case CRS_CL_ASRESIDUAL
:
1752 format_cell_entry (table
, pt
->n_cols
, 0, v
, suffix
, mark_missing
, proc
->dict
);
1753 tab_next_row (table
);
1757 /* Column totals, grand total. */
1759 if (proc
->n_cells
> 1)
1760 tab_hline (table
, TAL_1
, -1, pt
->n_cols
, 0);
1761 for (c
= 0; c
<= pt
->n_cols
; c
++)
1763 double ct
= c
< pt
->n_cols ? pt
->col_tot
[c
] : pt
->total
;
1764 bool mark_missing
= false
;
1767 if (proc
->exclude
== MV_NEVER
&& c
< pt
->n_cols
1768 && var_is_num_missing (pt
->vars
[COL_VAR
], pt
->cols
[c
].f
, MV_USER
))
1769 mark_missing
= true
;
1771 for (i
= 0; i
< proc
->n_cells
; i
++)
1776 switch (proc
->a_cells
[i
])
1782 v
= ct
/ pt
->total
* 100.;
1790 v
= ct
/ pt
->total
* 100.;
1793 case CRS_CL_EXPECTED
:
1794 case CRS_CL_RESIDUAL
:
1795 case CRS_CL_SRESIDUAL
:
1796 case CRS_CL_ASRESIDUAL
:
1802 format_cell_entry (table
, c
, i
, v
, suffix
, mark_missing
, proc
->dict
);
1807 tab_offset (table
, -1, tab_row (table
) + last_row
);
1808 tab_offset (table
, 0, -1);
1811 static void
calc_r (struct pivot_table
*,
1812 double
*PT
, double
*Y
, double
*, double
*, double
*);
1813 static void
calc_chisq (struct pivot_table
*,
1814 double
[N_CHISQ
], int
[N_CHISQ
], double
*, double
*);
1816 /* Display chi-square statistics. */
1818 display_chisq (struct pivot_table
*pt
, struct tab_table
*chisq
,
1819 bool
*showed_fisher
)
1821 static
const char
*chisq_stats
[N_CHISQ
] =
1823 N_("Pearson Chi-Square"),
1824 N_("Likelihood Ratio"),
1825 N_("Fisher's Exact Test"),
1826 N_("Continuity Correction"),
1827 N_("Linear-by-Linear Association"),
1829 double chisq_v
[N_CHISQ
];
1830 double fisher1
, fisher2
;
1835 calc_chisq (pt
, chisq_v
, df
, &fisher1
, &fisher2
);
1837 tab_offset (chisq
, pt
->n_consts
+ pt
->n_vars
- 2, -1);
1839 for (i
= 0; i
< N_CHISQ
; i
++)
1841 if ((i
!= 2 && chisq_v
[i
] == SYSMIS
)
1842 ||
(i
== 2 && fisher1
== SYSMIS
))
1845 tab_text (chisq
, 0, 0, TAB_LEFT
, gettext (chisq_stats
[i
]));
1848 tab_double (chisq
, 1, 0, TAB_RIGHT
, chisq_v
[i
], NULL, RC_OTHER
);
1849 tab_double (chisq
, 2, 0, TAB_RIGHT
, df
[i
], NULL, RC_WEIGHT
);
1850 tab_double (chisq
, 3, 0, TAB_RIGHT
,
1851 gsl_cdf_chisq_Q (chisq_v
[i
], df
[i
]), NULL, RC_PVALUE
);
1855 *showed_fisher
= true
;
1856 tab_double (chisq
, 4, 0, TAB_RIGHT
, fisher2
, NULL, RC_PVALUE
);
1857 tab_double (chisq
, 5, 0, TAB_RIGHT
, fisher1
, NULL, RC_PVALUE
);
1859 tab_next_row (chisq
);
1862 tab_text (chisq
, 0, 0, TAB_LEFT
, _("N of Valid Cases"));
1863 tab_double (chisq
, 1, 0, TAB_RIGHT
, pt
->total
, NULL, RC_WEIGHT
);
1864 tab_next_row (chisq
);
1866 tab_offset (chisq
, 0, -1);
1869 static int
calc_symmetric (struct crosstabs_proc
*, struct pivot_table
*,
1870 double
[N_SYMMETRIC
], double
[N_SYMMETRIC
],
1871 double
[N_SYMMETRIC
],
1872 double
[3], double
[3], double
[3]);
1874 /* Display symmetric measures. */
1876 display_symmetric (struct crosstabs_proc
*proc
, struct pivot_table
*pt
,
1877 struct tab_table
*sym
)
1879 static
const char
*categories
[] =
1881 N_("Nominal by Nominal"),
1882 N_("Ordinal by Ordinal"),
1883 N_("Interval by Interval"),
1884 N_("Measure of Agreement"),
1887 static
const char
*stats
[N_SYMMETRIC
] =
1891 N_("Contingency Coefficient"),
1892 N_("Kendall's tau-b"),
1893 N_("Kendall's tau-c"),
1895 N_("Spearman Correlation"),
1900 static
const int stats_categories
[N_SYMMETRIC
] =
1902 0, 0, 0, 1, 1, 1, 1, 2, 3,
1906 double sym_v
[N_SYMMETRIC
], sym_ase
[N_SYMMETRIC
], sym_t
[N_SYMMETRIC
];
1907 double somers_d_v
[3], somers_d_ase
[3], somers_d_t
[3];
1910 if (!calc_symmetric (proc
, pt
, sym_v
, sym_ase
, sym_t
,
1911 somers_d_v
, somers_d_ase
, somers_d_t
))
1914 tab_offset (sym
, pt
->n_consts
+ pt
->n_vars
- 2, -1);
1916 for (i
= 0; i
< N_SYMMETRIC
; i
++)
1918 if (sym_v
[i
] == SYSMIS
)
1921 if (stats_categories
[i
] != last_cat
)
1923 last_cat
= stats_categories
[i
];
1924 tab_text (sym
, 0, 0, TAB_LEFT
, gettext (categories
[last_cat
]));
1927 tab_text (sym
, 1, 0, TAB_LEFT
, gettext (stats
[i
]));
1928 tab_double (sym
, 2, 0, TAB_RIGHT
, sym_v
[i
], NULL, RC_OTHER
);
1929 if (sym_ase
[i
] != SYSMIS
)
1930 tab_double (sym
, 3, 0, TAB_RIGHT
, sym_ase
[i
], NULL, RC_OTHER
);
1931 if (sym_t
[i
] != SYSMIS
)
1932 tab_double (sym
, 4, 0, TAB_RIGHT
, sym_t
[i
], NULL, RC_OTHER
);
1933 /*tab_double (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), NULL, RC_PVALUE);*/
1937 tab_text (sym
, 0, 0, TAB_LEFT
, _("N of Valid Cases"));
1938 tab_double (sym
, 2, 0, TAB_RIGHT
, pt
->total
, NULL, RC_WEIGHT
);
1941 tab_offset (sym
, 0, -1);
1944 static int
calc_risk (struct pivot_table
*,
1945 double
[], double
[], double
[], union value
*);
1947 /* Display risk estimate. */
1949 display_risk (struct pivot_table
*pt
, struct tab_table
*risk
)
1952 double risk_v
[3], lower
[3], upper
[3];
1956 if (!calc_risk (pt
, risk_v
, upper
, lower
, c
))
1959 tab_offset (risk
, pt
->n_consts
+ pt
->n_vars
- 2, -1);
1961 for (i
= 0; i
< 3; i
++)
1963 const struct variable
*cv
= pt
->vars
[COL_VAR
];
1964 const struct variable
*rv
= pt
->vars
[ROW_VAR
];
1965 int cvw
= var_get_width (cv
);
1966 int rvw
= var_get_width (rv
);
1968 if (risk_v
[i
] == SYSMIS
)
1974 if (var_is_numeric (cv
))
1975 sprintf (buf
, _("Odds Ratio for %s (%g / %g)"),
1976 var_to_string (cv
), c
[0].f
, c
[1].f
);
1978 sprintf (buf
, _("Odds Ratio for %s (%.*s / %.*s)"),
1980 cvw
, value_str (&c
[0], cvw
),
1981 cvw
, value_str (&c
[1], cvw
));
1985 if (var_is_numeric (rv
))
1986 sprintf (buf
, _("For cohort %s = %.*g"),
1987 var_to_string (rv
), DBL_DIG
+ 1, pt
->rows
[i
- 1].f
);
1989 sprintf (buf
, _("For cohort %s = %.*s"),
1991 rvw
, value_str (&pt
->rows
[i
- 1], rvw
));
1995 tab_text (risk
, 0, 0, TAB_LEFT
, buf
);
1996 tab_double (risk
, 1, 0, TAB_RIGHT
, risk_v
[i
], NULL, RC_OTHER
);
1997 tab_double (risk
, 2, 0, TAB_RIGHT
, lower
[i
], NULL, RC_OTHER
);
1998 tab_double (risk
, 3, 0, TAB_RIGHT
, upper
[i
], NULL, RC_OTHER
);
1999 tab_next_row (risk
);
2002 tab_text (risk
, 0, 0, TAB_LEFT
, _("N of Valid Cases"));
2003 tab_double (risk
, 1, 0, TAB_RIGHT
, pt
->total
, NULL, RC_WEIGHT
);
2004 tab_next_row (risk
);
2006 tab_offset (risk
, 0, -1);
2009 static int
calc_directional (struct crosstabs_proc
*, struct pivot_table
*,
2010 double
[N_DIRECTIONAL
], double
[N_DIRECTIONAL
],
2011 double
[N_DIRECTIONAL
], double
[N_DIRECTIONAL
]);
2013 /* Display directional measures. */
2015 display_directional (struct crosstabs_proc
*proc
, struct pivot_table
*pt
,
2016 struct tab_table
*direct
)
2018 static
const char
*categories
[] =
2020 N_("Nominal by Nominal"),
2021 N_("Ordinal by Ordinal"),
2022 N_("Nominal by Interval"),
2025 static
const char
*stats
[] =
2028 N_("Goodman and Kruskal tau"),
2029 N_("Uncertainty Coefficient"),
2034 static
const char
*types
[] =
2041 static
const int stats_categories
[N_DIRECTIONAL
] =
2043 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2,
2046 static
const int stats_stats
[N_DIRECTIONAL
] =
2048 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4,
2051 static
const int stats_types
[N_DIRECTIONAL
] =
2053 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 2, 1, 2,
2056 static
const int
*stats_lookup
[] =
2063 static
const char
**stats_names
[] =
2075 double direct_v
[N_DIRECTIONAL
];
2076 double direct_ase
[N_DIRECTIONAL
];
2077 double direct_t
[N_DIRECTIONAL
];
2078 double sig
[N_DIRECTIONAL
];
2082 if (!calc_directional (proc
, pt
, direct_v
, direct_ase
, direct_t
, sig
))
2085 tab_offset (direct
, pt
->n_consts
+ pt
->n_vars
- 2, -1);
2087 for (i
= 0; i
< N_DIRECTIONAL
; i
++)
2089 if (direct_v
[i
] == SYSMIS
)
2095 for (j
= 0; j
< 3; j
++)
2096 if (last
[j
] != stats_lookup
[j
][i
])
2099 tab_hline (direct
, TAL_1
, j
, 6, 0);
2104 int k
= last
[j
] = stats_lookup
[j
][i
];
2109 string
= var_to_string (pt
->vars
[0]);
2111 string
= var_to_string (pt
->vars
[1]);
2113 tab_text_format (direct
, j
, 0, TAB_LEFT
,
2114 gettext (stats_names
[j
][k
]), string
);
2119 tab_double (direct
, 3, 0, TAB_RIGHT
, direct_v
[i
], NULL, RC_OTHER
);
2120 if (direct_ase
[i
] != SYSMIS
)
2121 tab_double (direct
, 4, 0, TAB_RIGHT
, direct_ase
[i
], NULL, RC_OTHER
);
2122 if (direct_t
[i
] != SYSMIS
)
2123 tab_double (direct
, 5, 0, TAB_RIGHT
, direct_t
[i
], NULL, RC_OTHER
);
2124 tab_double (direct
, 6, 0, TAB_RIGHT
, sig
[i
], NULL, RC_PVALUE
);
2125 tab_next_row (direct
);
2128 tab_offset (direct
, 0, -1);
2131 /* Statistical calculations. */
2133 /* Returns the value of the logarithm of gamma (factorial) function for an integer
2136 log_gamma_int (double pt
)
2141 for (i
= 2; i
< pt
; i
++)
2147 /* Calculate P_r as specified in _SPSS Statistical Algorithms_,
2149 static inline double
2150 Pr (int a
, int b
, int c
, int d
)
2152 return exp (log_gamma_int (a
+ b
+ 1.) - log_gamma_int (a
+ 1.)
2153 + log_gamma_int (c
+ d
+ 1.) - log_gamma_int (b
+ 1.)
2154 + log_gamma_int (a
+ c
+ 1.) - log_gamma_int (c
+ 1.)
2155 + log_gamma_int (b
+ d
+ 1.) - log_gamma_int (d
+ 1.)
2156 - log_gamma_int (a
+ b
+ c
+ d
+ 1.));
2159 /* Swap the contents of A and B. */
2161 swap (int
*a
, int
*b
)
2168 /* Calculate significance for Fisher's exact test as specified in
2169 _SPSS Statistical Algorithms_, Appendix 5. */
2171 calc_fisher (int a
, int b
, int c
, int d
, double
*fisher1
, double
*fisher2
)
2176 if (MIN (c
, d
) < MIN (a
, b
))
2177 swap (&a
, &c
), swap (&b
, &d
);
2178 if (MIN (b
, d
) < MIN (a
, c
))
2179 swap (&a
, &b
), swap (&c
, &d
);
2183 swap (&a
, &b
), swap (&c
, &d
);
2185 swap (&a
, &c
), swap (&b
, &d
);
2188 pn1
= Pr (a
, b
, c
, d
);
2190 for (pt
= 1; pt
<= a
; pt
++)
2192 *fisher1
+= Pr (a
- pt
, b
+ pt
, c
+ pt
, d
- pt
);
2195 *fisher2
= *fisher1
;
2197 for (pt
= 1; pt
<= b
; pt
++)
2199 double p
= Pr (a
+ pt
, b
- pt
, c
- pt
, d
+ pt
);
2205 /* Calculates chi-squares into CHISQ. MAT is a matrix with N_COLS
2206 columns with values COLS and N_ROWS rows with values ROWS. Values
2207 in the matrix sum to pt->total. */
2209 calc_chisq (struct pivot_table
*pt
,
2210 double chisq
[N_CHISQ
], int df
[N_CHISQ
],
2211 double
*fisher1
, double
*fisher2
)
2215 chisq
[0] = chisq
[1] = 0.;
2216 chisq
[2] = chisq
[3] = chisq
[4] = SYSMIS
;
2217 *fisher1
= *fisher2
= SYSMIS
;
2219 df
[0] = df
[1] = (pt
->ns_cols
- 1) * (pt
->ns_rows
- 1);
2221 if (pt
->ns_rows
<= 1 || pt
->ns_cols
<= 1)
2223 chisq
[0] = chisq
[1] = SYSMIS
;
2227 for (r
= 0; r
< pt
->n_rows
; r
++)
2228 for (c
= 0; c
< pt
->n_cols
; c
++)
2230 const double expected
= pt
->row_tot
[r
] * pt
->col_tot
[c
] / pt
->total
;
2231 const double freq
= pt
->mat
[pt
->n_cols
* r
+ c
];
2232 const double residual
= freq
- expected
;
2234 chisq
[0] += residual
* residual
/ expected
;
2236 chisq
[1] += freq
* log (expected
/ freq
);
2247 /* Calculate Yates and Fisher exact test. */
2248 if (pt
->ns_cols
== 2 && pt
->ns_rows
== 2)
2250 double f11
, f12
, f21
, f22
;
2256 for (i
= j
= 0; i
< pt
->n_cols
; i
++)
2257 if (pt
->col_tot
[i
] != 0.)
2266 f11
= pt
->mat
[nz_cols
[0]];
2267 f12
= pt
->mat
[nz_cols
[1]];
2268 f21
= pt
->mat
[nz_cols
[0] + pt
->n_cols
];
2269 f22
= pt
->mat
[nz_cols
[1] + pt
->n_cols
];
2274 const double pt_
= fabs (f11
* f22
- f12
* f21
) - 0.5 * pt
->total
;
2277 chisq
[3] = (pt
->total
* pow2 (pt_
)
2278 / (f11
+ f12
) / (f21
+ f22
)
2279 / (f11
+ f21
) / (f12
+ f22
));
2287 calc_fisher (f11
+ .5, f12
+ .5, f21
+ .5, f22
+ .5, fisher1
, fisher2
);
2290 /* Calculate Mantel-Haenszel. */
2291 if (var_is_numeric (pt
->vars
[ROW_VAR
]) && var_is_numeric (pt
->vars
[COL_VAR
]))
2293 double r
, ase_0
, ase_1
;
2294 calc_r (pt
, (double
*) pt
->rows
, (double
*) pt
->cols
, &r
, &ase_0
, &ase_1
);
2296 chisq
[4] = (pt
->total
- 1.) * r
* r
;
2301 /* Calculate the value of Pearson's r. r is stored into R, its T value into
2302 T, and standard error into ERROR. The row and column values must be
2303 passed in PT and Y. */
2305 calc_r (struct pivot_table
*pt
,
2306 double
*PT
, double
*Y
, double
*r
, double
*t
, double
*error
)
2308 double SX
, SY
, S
, T
;
2310 double sum_XYf
, sum_X2Y2f
;
2311 double sum_Xr
, sum_X2r
;
2312 double sum_Yc
, sum_Y2c
;
2315 for (sum_X2Y2f
= sum_XYf
= 0., i
= 0; i
< pt
->n_rows
; i
++)
2316 for (j
= 0; j
< pt
->n_cols
; j
++)
2318 double fij
= pt
->mat
[j
+ i
* pt
->n_cols
];
2319 double product
= PT
[i
] * Y
[j
];
2320 double temp
= fij
* product
;
2322 sum_X2Y2f
+= temp
* product
;
2325 for (sum_Xr
= sum_X2r
= 0., i
= 0; i
< pt
->n_rows
; i
++)
2327 sum_Xr
+= PT
[i
] * pt
->row_tot
[i
];
2328 sum_X2r
+= pow2 (PT
[i
]) * pt
->row_tot
[i
];
2330 Xbar
= sum_Xr
/ pt
->total
;
2332 for (sum_Yc
= sum_Y2c
= 0., i
= 0; i
< pt
->n_cols
; i
++)
2334 sum_Yc
+= Y
[i
] * pt
->col_tot
[i
];
2335 sum_Y2c
+= Y
[i
] * Y
[i
] * pt
->col_tot
[i
];
2337 Ybar
= sum_Yc
/ pt
->total
;
2339 S
= sum_XYf
- sum_Xr
* sum_Yc
/ pt
->total
;
2340 SX
= sum_X2r
- pow2 (sum_Xr
) / pt
->total
;
2341 SY
= sum_Y2c
- pow2 (sum_Yc
) / pt
->total
;
2344 *t
= *r
/ sqrt (1 - pow2 (*r
)) * sqrt (pt
->total
- 2);
2349 for (s
= c
= 0., i
= 0; i
< pt
->n_rows
; i
++)
2350 for (j
= 0; j
< pt
->n_cols
; j
++)
2352 double Xresid
, Yresid
;
2355 Xresid
= PT
[i
] - Xbar
;
2356 Yresid
= Y
[j
] - Ybar
;
2357 temp
= (T
* Xresid
* Yresid
2359 * (Xresid
* Xresid
* SY
+ Yresid
* Yresid
* SX
)));
2360 y
= pt
->mat
[j
+ i
* pt
->n_cols
] * temp
* temp
- c
;
2365 *error
= sqrt (s
) / (T
* T
);
2369 /* Calculate symmetric statistics and their asymptotic standard
2370 errors. Returns 0 if none could be calculated. */
2372 calc_symmetric (struct crosstabs_proc
*proc
, struct pivot_table
*pt
,
2373 double v
[N_SYMMETRIC
], double ase
[N_SYMMETRIC
],
2374 double t
[N_SYMMETRIC
],
2375 double somers_d_v
[3], double somers_d_ase
[3],
2376 double somers_d_t
[3])
2380 q
= MIN (pt
->ns_rows
, pt
->ns_cols
);
2384 for (i
= 0; i
< N_SYMMETRIC
; i
++)
2385 v
[i
] = ase
[i
] = t
[i
] = SYSMIS
;
2387 /* Phi, Cramer's V, contingency coefficient. */
2388 if (proc
->statistics
& ((1u << CRS_ST_PHI
) |
(1u << CRS_ST_CC
)))
2390 double Xp
= 0.; /* Pearson chi-square. */
2393 for (r
= 0; r
< pt
->n_rows
; r
++)
2394 for (c
= 0; c
< pt
->n_cols
; c
++)
2396 const double expected
= pt
->row_tot
[r
] * pt
->col_tot
[c
] / pt
->total
;
2397 const double freq
= pt
->mat
[pt
->n_cols
* r
+ c
];
2398 const double residual
= freq
- expected
;
2400 Xp
+= residual
* residual
/ expected
;
2403 if (proc
->statistics
& (1u << CRS_ST_PHI
))
2405 v
[0] = sqrt (Xp
/ pt
->total
);
2406 v
[1] = sqrt (Xp
/ (pt
->total
* (q
- 1)));
2408 if (proc
->statistics
& (1u << CRS_ST_CC
))
2409 v
[2] = sqrt (Xp
/ (Xp
+ pt
->total
));
2412 if (proc
->statistics
& ((1u << CRS_ST_BTAU
) |
(1u << CRS_ST_CTAU
)
2413 |
(1u << CRS_ST_GAMMA
) |
(1u << CRS_ST_D
)))
2418 double btau_cum
, ctau_cum
, gamma_cum
, d_yx_cum
, d_xy_cum
;
2422 Dr
= Dc
= pow2 (pt
->total
);
2423 for (r
= 0; r
< pt
->n_rows
; r
++)
2424 Dr
-= pow2 (pt
->row_tot
[r
]);
2425 for (c
= 0; c
< pt
->n_cols
; c
++)
2426 Dc
-= pow2 (pt
->col_tot
[c
]);
2428 cum
= xnmalloc (pt
->n_cols
* pt
->n_rows
, sizeof
*cum
);
2429 for (c
= 0; c
< pt
->n_cols
; c
++)
2433 for (r
= 0; r
< pt
->n_rows
; r
++)
2434 cum
[c
+ r
* pt
->n_cols
] = ct
+= pt
->mat
[c
+ r
* pt
->n_cols
];
2443 for (i
= 0; i
< pt
->n_rows
; i
++)
2447 for (j
= 1; j
< pt
->n_cols
; j
++)
2448 Cij
+= pt
->col_tot
[j
] - cum
[j
+ i
* pt
->n_cols
];
2451 for (j
= 1; j
< pt
->n_cols
; j
++)
2452 Dij
+= cum
[j
+ (i
- 1) * pt
->n_cols
];
2456 double fij
= pt
->mat
[j
+ i
* pt
->n_cols
];
2460 if (++j
== pt
->n_cols
)
2462 assert (j
< pt
->n_cols
);
2464 Cij
-= pt
->col_tot
[j
] - cum
[j
+ i
* pt
->n_cols
];
2465 Dij
+= pt
->col_tot
[j
- 1] - cum
[j
- 1 + i
* pt
->n_cols
];
2469 Cij
+= cum
[j
- 1 + (i
- 1) * pt
->n_cols
];
2470 Dij
-= cum
[j
+ (i
- 1) * pt
->n_cols
];
2476 if (proc
->statistics
& (1u << CRS_ST_BTAU
))
2477 v
[3] = (P
- Q
) / sqrt (Dr
* Dc
);
2478 if (proc
->statistics
& (1u << CRS_ST_CTAU
))
2479 v
[4] = (q
* (P
- Q
)) / (pow2 (pt
->total
) * (q
- 1));
2480 if (proc
->statistics
& (1u << CRS_ST_GAMMA
))
2481 v
[5] = (P
- Q
) / (P
+ Q
);
2483 /* ASE for tau-b, tau-c, gamma. Calculations could be
2484 eliminated here, at expense of memory. */
2489 btau_cum
= ctau_cum
= gamma_cum
= d_yx_cum
= d_xy_cum
= 0.;
2490 for (i
= 0; i
< pt
->n_rows
; i
++)
2494 for (j
= 1; j
< pt
->n_cols
; j
++)
2495 Cij
+= pt
->col_tot
[j
] - cum
[j
+ i
* pt
->n_cols
];
2498 for (j
= 1; j
< pt
->n_cols
; j
++)
2499 Dij
+= cum
[j
+ (i
- 1) * pt
->n_cols
];
2503 double fij
= pt
->mat
[j
+ i
* pt
->n_cols
];
2505 if (proc
->statistics
& (1u << CRS_ST_BTAU
))
2507 const double temp
= (2. * sqrt (Dr
* Dc
) * (Cij
- Dij
)
2508 + v
[3] * (pt
->row_tot
[i
] * Dc
2509 + pt
->col_tot
[j
] * Dr
));
2510 btau_cum
+= fij
* temp
* temp
;
2514 const double temp
= Cij
- Dij
;
2515 ctau_cum
+= fij
* temp
* temp
;
2518 if (proc
->statistics
& (1u << CRS_ST_GAMMA
))
2520 const double temp
= Q
* Cij
- P
* Dij
;
2521 gamma_cum
+= fij
* temp
* temp
;
2524 if (proc
->statistics
& (1u << CRS_ST_D
))
2526 d_yx_cum
+= fij
* pow2 (Dr
* (Cij
- Dij
)
2527 - (P
- Q
) * (pt
->total
- pt
->row_tot
[i
]));
2528 d_xy_cum
+= fij
* pow2 (Dc
* (Dij
- Cij
)
2529 - (Q
- P
) * (pt
->total
- pt
->col_tot
[j
]));
2532 if (++j
== pt
->n_cols
)
2534 assert (j
< pt
->n_cols
);
2536 Cij
-= pt
->col_tot
[j
] - cum
[j
+ i
* pt
->n_cols
];
2537 Dij
+= pt
->col_tot
[j
- 1] - cum
[j
- 1 + i
* pt
->n_cols
];
2541 Cij
+= cum
[j
- 1 + (i
- 1) * pt
->n_cols
];
2542 Dij
-= cum
[j
+ (i
- 1) * pt
->n_cols
];
2548 btau_var
= ((btau_cum
2549 - (pt
->total
* pow2 (pt
->total
* (P
- Q
) / sqrt (Dr
* Dc
) * (Dr
+ Dc
))))
2551 if (proc
->statistics
& (1u << CRS_ST_BTAU
))
2553 ase
[3] = sqrt (btau_var
);
2554 t
[3] = v
[3] / (2 * sqrt ((ctau_cum
- (P
- Q
) * (P
- Q
) / pt
->total
)
2557 if (proc
->statistics
& (1u << CRS_ST_CTAU
))
2559 ase
[4] = ((2 * q
/ ((q
- 1) * pow2 (pt
->total
)))
2560 * sqrt (ctau_cum
- (P
- Q
) * (P
- Q
) / pt
->total
));
2561 t
[4] = v
[4] / ase
[4];
2563 if (proc
->statistics
& (1u << CRS_ST_GAMMA
))
2565 ase
[5] = ((4. / ((P
+ Q
) * (P
+ Q
))) * sqrt (gamma_cum
));
2566 t
[5] = v
[5] / (2. / (P
+ Q
)
2567 * sqrt (ctau_cum
- (P
- Q
) * (P
- Q
) / pt
->total
));
2569 if (proc
->statistics
& (1u << CRS_ST_D
))
2571 somers_d_v
[0] = (P
- Q
) / (.5 * (Dc
+ Dr
));
2572 somers_d_ase
[0] = SYSMIS
;
2573 somers_d_t
[0] = (somers_d_v
[0]
2575 * sqrt (ctau_cum
- pow2 (P
- Q
) / pt
->total
)));
2576 somers_d_v
[1] = (P
- Q
) / Dc
;
2577 somers_d_ase
[1] = 2. / pow2 (Dc
) * sqrt (d_xy_cum
);
2578 somers_d_t
[1] = (somers_d_v
[1]
2580 * sqrt (ctau_cum
- pow2 (P
- Q
) / pt
->total
)));
2581 somers_d_v
[2] = (P
- Q
) / Dr
;
2582 somers_d_ase
[2] = 2. / pow2 (Dr
) * sqrt (d_yx_cum
);
2583 somers_d_t
[2] = (somers_d_v
[2]
2585 * sqrt (ctau_cum
- pow2 (P
- Q
) / pt
->total
)));
2591 /* Spearman correlation, Pearson's r. */
2592 if (proc
->statistics
& (1u << CRS_ST_CORR
))
2594 double
*R
= xmalloc (sizeof
*R
* pt
->n_rows
);
2595 double
*C
= xmalloc (sizeof
*C
* pt
->n_cols
);
2598 double y
, t
, c
= 0., s
= 0.;
2603 R
[i
] = s
+ (pt
->row_tot
[i
] + 1.) / 2.;
2604 y
= pt
->row_tot
[i
] - c
;
2608 if (++i
== pt
->n_rows
)
2610 assert (i
< pt
->n_rows
);
2615 double y
, t
, c
= 0., s
= 0.;
2620 C
[j
] = s
+ (pt
->col_tot
[j
] + 1.) / 2;
2621 y
= pt
->col_tot
[j
] - c
;
2625 if (++j
== pt
->n_cols
)
2627 assert (j
< pt
->n_cols
);
2631 calc_r (pt
, R
, C
, &v
[6], &t
[6], &ase
[6]);
2636 calc_r (pt
, (double
*) pt
->rows
, (double
*) pt
->cols
, &v
[7], &t
[7], &ase
[7]);
2639 /* Cohen's kappa. */
2640 if (proc
->statistics
& (1u << CRS_ST_KAPPA
) && pt
->ns_rows
== pt
->ns_cols
)
2642 double ase_under_h0
;
2643 double sum_fii
, sum_rici
, sum_fiiri_ci
, sum_fijri_ci2
, sum_riciri_ci
;
2646 for (sum_fii
= sum_rici
= sum_fiiri_ci
= sum_riciri_ci
= 0., i
= j
= 0;
2647 i
< pt
->ns_rows
; i
++, j
++)
2651 while (pt
->col_tot
[j
] == 0.)
2654 prod
= pt
->row_tot
[i
] * pt
->col_tot
[j
];
2655 sum
= pt
->row_tot
[i
] + pt
->col_tot
[j
];
2657 sum_fii
+= pt
->mat
[j
+ i
* pt
->n_cols
];
2659 sum_fiiri_ci
+= pt
->mat
[j
+ i
* pt
->n_cols
] * sum
;
2660 sum_riciri_ci
+= prod
* sum
;
2662 for (sum_fijri_ci2
= 0., i
= 0; i
< pt
->ns_rows
; i
++)
2663 for (j
= 0; j
< pt
->ns_cols
; j
++)
2665 double sum
= pt
->row_tot
[i
] + pt
->col_tot
[j
];
2666 sum_fijri_ci2
+= pt
->mat
[j
+ i
* pt
->n_cols
] * sum
* sum
;
2669 v
[8] = (pt
->total
* sum_fii
- sum_rici
) / (pow2 (pt
->total
) - sum_rici
);
2671 ase_under_h0
= sqrt ((pow2 (pt
->total
) * sum_rici
2672 + sum_rici
* sum_rici
2673 - pt
->total
* sum_riciri_ci
)
2674 / (pt
->total
* (pow2 (pt
->total
) - sum_rici
) * (pow2 (pt
->total
) - sum_rici
)));
2676 ase
[8] = sqrt (pt
->total
* (((sum_fii
* (pt
->total
- sum_fii
))
2677 / pow2 (pow2 (pt
->total
) - sum_rici
))
2678 + ((2. * (pt
->total
- sum_fii
)
2679 * (2. * sum_fii
* sum_rici
2680 - pt
->total
* sum_fiiri_ci
))
2681 / pow3 (pow2 (pt
->total
) - sum_rici
))
2682 + (pow2 (pt
->total
- sum_fii
)
2683 * (pt
->total
* sum_fijri_ci2
- 4.
2684 * sum_rici
* sum_rici
)
2685 / pow4 (pow2 (pt
->total
) - sum_rici
))));
2687 t
[8] = v
[8] / ase_under_h0
;
2693 /* Calculate risk estimate. */
2695 calc_risk (struct pivot_table
*pt
,
2696 double
*value
, double
*upper
, double
*lower
, union value
*c
)
2698 double f11
, f12
, f21
, f22
;
2704 for (i
= 0; i
< 3; i
++)
2705 value
[i
] = upper
[i
] = lower
[i
] = SYSMIS
;
2708 if (pt
->ns_rows
!= 2 || pt
->ns_cols
!= 2)
2715 for (i
= j
= 0; i
< pt
->n_cols
; i
++)
2716 if (pt
->col_tot
[i
] != 0.)
2725 f11
= pt
->mat
[nz_cols
[0]];
2726 f12
= pt
->mat
[nz_cols
[1]];
2727 f21
= pt
->mat
[nz_cols
[0] + pt
->n_cols
];
2728 f22
= pt
->mat
[nz_cols
[1] + pt
->n_cols
];
2730 c
[0] = pt
->cols
[nz_cols
[0]];
2731 c
[1] = pt
->cols
[nz_cols
[1]];
2734 value
[0] = (f11
* f22
) / (f12
* f21
);
2735 v
= sqrt (1. / f11
+ 1. / f12
+ 1. / f21
+ 1. / f22
);
2736 lower
[0] = value
[0] * exp (-1.960 * v
);
2737 upper
[0] = value
[0] * exp (1.960 * v
);
2739 value
[1] = (f11
* (f21
+ f22
)) / (f21
* (f11
+ f12
));
2740 v
= sqrt ((f12
/ (f11
* (f11
+ f12
)))
2741 + (f22
/ (f21
* (f21
+ f22
))));
2742 lower
[1] = value
[1] * exp (-1.960 * v
);
2743 upper
[1] = value
[1] * exp (1.960 * v
);
2745 value
[2] = (f12
* (f21
+ f22
)) / (f22
* (f11
+ f12
));
2746 v
= sqrt ((f11
/ (f12
* (f11
+ f12
)))
2747 + (f21
/ (f22
* (f21
+ f22
))));
2748 lower
[2] = value
[2] * exp (-1.960 * v
);
2749 upper
[2] = value
[2] * exp (1.960 * v
);
2754 /* Calculate directional measures. */
2756 calc_directional (struct crosstabs_proc
*proc
, struct pivot_table
*pt
,
2757 double v
[N_DIRECTIONAL
], double ase
[N_DIRECTIONAL
],
2758 double t
[N_DIRECTIONAL
], double sig
[N_DIRECTIONAL
])
2763 for (i
= 0; i
< N_DIRECTIONAL
; i
++)
2764 v
[i
] = ase
[i
] = t
[i
] = sig
[i
] = SYSMIS
;
2768 if (proc
->statistics
& (1u << CRS_ST_LAMBDA
))
2770 double
*fim
= xnmalloc (pt
->n_rows
, sizeof
*fim
);
2771 int
*fim_index
= xnmalloc (pt
->n_rows
, sizeof
*fim_index
);
2772 double
*fmj
= xnmalloc (pt
->n_cols
, sizeof
*fmj
);
2773 int
*fmj_index
= xnmalloc (pt
->n_cols
, sizeof
*fmj_index
);
2774 double sum_fim
, sum_fmj
;
2776 int rm_index
, cm_index
;
2779 /* Find maximum for each row and their sum. */
2780 for (sum_fim
= 0., i
= 0; i
< pt
->n_rows
; i
++)
2782 double max
= pt
->mat
[i
* pt
->n_cols
];
2785 for (j
= 1; j
< pt
->n_cols
; j
++)
2786 if (pt
->mat
[j
+ i
* pt
->n_cols
] > max
)
2788 max
= pt
->mat
[j
+ i
* pt
->n_cols
];
2792 sum_fim
+= fim
[i
] = max
;
2793 fim_index
[i
] = index
;
2796 /* Find maximum for each column. */
2797 for (sum_fmj
= 0., j
= 0; j
< pt
->n_cols
; j
++)
2799 double max
= pt
->mat
[j
];
2802 for (i
= 1; i
< pt
->n_rows
; i
++)
2803 if (pt
->mat
[j
+ i
* pt
->n_cols
] > max
)
2805 max
= pt
->mat
[j
+ i
* pt
->n_cols
];
2809 sum_fmj
+= fmj
[j
] = max
;
2810 fmj_index
[j
] = index
;
2813 /* Find maximum row total. */
2814 rm
= pt
->row_tot
[0];
2816 for (i
= 1; i
< pt
->n_rows
; i
++)
2817 if (pt
->row_tot
[i
] > rm
)
2819 rm
= pt
->row_tot
[i
];
2823 /* Find maximum column total. */
2824 cm
= pt
->col_tot
[0];
2826 for (j
= 1; j
< pt
->n_cols
; j
++)
2827 if (pt
->col_tot
[j
] > cm
)
2829 cm
= pt
->col_tot
[j
];
2833 v
[0] = (sum_fim
+ sum_fmj
- cm
- rm
) / (2. * pt
->total
- rm
- cm
);
2834 v
[1] = (sum_fmj
- rm
) / (pt
->total
- rm
);
2835 v
[2] = (sum_fim
- cm
) / (pt
->total
- cm
);
2837 /* ASE1 for Y given PT. */
2842 for (i
= 0; i
< pt
->n_rows
; i
++)
2843 if (cm_index
== fim_index
[i
])
2845 ase
[2] = sqrt ((pt
->total
- sum_fim
) * (sum_fim
+ cm
- 2. * accum
)
2846 / pow3 (pt
->total
- cm
));
2849 /* ASE0 for Y given PT. */
2853 for (accum
= 0., i
= 0; i
< pt
->n_rows
; i
++)
2854 if (cm_index
!= fim_index
[i
])
2855 accum
+= (pt
->mat
[i
* pt
->n_cols
+ fim_index
[i
]]
2856 + pt
->mat
[i
* pt
->n_cols
+ cm_index
]);
2857 t
[2] = v
[2] / (sqrt (accum
- pow2 (sum_fim
- cm
) / pt
->total
) / (pt
->total
- cm
));
2860 /* ASE1 for PT given Y. */
2865 for (j
= 0; j
< pt
->n_cols
; j
++)
2866 if (rm_index
== fmj_index
[j
])
2868 ase
[1] = sqrt ((pt
->total
- sum_fmj
) * (sum_fmj
+ rm
- 2. * accum
)
2869 / pow3 (pt
->total
- rm
));
2872 /* ASE0 for PT given Y. */
2876 for (accum
= 0., j
= 0; j
< pt
->n_cols
; j
++)
2877 if (rm_index
!= fmj_index
[j
])
2878 accum
+= (pt
->mat
[j
+ pt
->n_cols
* fmj_index
[j
]]
2879 + pt
->mat
[j
+ pt
->n_cols
* rm_index
]);
2880 t
[1] = v
[1] / (sqrt (accum
- pow2 (sum_fmj
- rm
) / pt
->total
) / (pt
->total
- rm
));
2883 /* Symmetric ASE0 and ASE1. */
2888 for (accum0
= accum1
= 0., i
= 0; i
< pt
->n_rows
; i
++)
2889 for (j
= 0; j
< pt
->n_cols
; j
++)
2891 int temp0
= (fmj_index
[j
] == i
) + (fim_index
[i
] == j
);
2892 int temp1
= (i
== rm_index
) + (j
== cm_index
);
2893 accum0
+= pt
->mat
[j
+ i
* pt
->n_cols
] * pow2 (temp0
- temp1
);
2894 accum1
+= (pt
->mat
[j
+ i
* pt
->n_cols
]
2895 * pow2 (temp0
+ (v
[0] - 1.) * temp1
));
2897 ase
[0] = sqrt (accum1
- 4. * pt
->total
* v
[0] * v
[0]) / (2. * pt
->total
- rm
- cm
);
2898 t
[0] = v
[0] / (sqrt (accum0
- pow2 (sum_fim
+ sum_fmj
- cm
- rm
) / pt
->total
)
2899 / (2. * pt
->total
- rm
- cm
));
2902 for (i
= 0; i
< 3; i
++)
2903 sig
[i
] = 2 * gsl_cdf_ugaussian_Q (t
[i
]);
2912 double sum_fij2_ri
, sum_fij2_ci
;
2913 double sum_ri2
, sum_cj2
;
2915 for (sum_fij2_ri
= sum_fij2_ci
= 0., i
= 0; i
< pt
->n_rows
; i
++)
2916 for (j
= 0; j
< pt
->n_cols
; j
++)
2918 double temp
= pow2 (pt
->mat
[j
+ i
* pt
->n_cols
]);
2919 sum_fij2_ri
+= temp
/ pt
->row_tot
[i
];
2920 sum_fij2_ci
+= temp
/ pt
->col_tot
[j
];
2923 for (sum_ri2
= 0., i
= 0; i
< pt
->n_rows
; i
++)
2924 sum_ri2
+= pow2 (pt
->row_tot
[i
]);
2926 for (sum_cj2
= 0., j
= 0; j
< pt
->n_cols
; j
++)
2927 sum_cj2
+= pow2 (pt
->col_tot
[j
]);
2929 v
[3] = (pt
->total
* sum_fij2_ci
- sum_ri2
) / (pow2 (pt
->total
) - sum_ri2
);
2930 v
[4] = (pt
->total
* sum_fij2_ri
- sum_cj2
) / (pow2 (pt
->total
) - sum_cj2
);
2934 if (proc
->statistics
& (1u << CRS_ST_UC
))
2936 double UX
, UY
, UXY
, P
;
2937 double ase1_yx
, ase1_xy
, ase1_sym
;
2940 for (UX
= 0., i
= 0; i
< pt
->n_rows
; i
++)
2941 if (pt
->row_tot
[i
] > 0.)
2942 UX
-= pt
->row_tot
[i
] / pt
->total
* log (pt
->row_tot
[i
] / pt
->total
);
2944 for (UY
= 0., j
= 0; j
< pt
->n_cols
; j
++)
2945 if (pt
->col_tot
[j
] > 0.)
2946 UY
-= pt
->col_tot
[j
] / pt
->total
* log (pt
->col_tot
[j
] / pt
->total
);
2948 for (UXY
= P
= 0., i
= 0; i
< pt
->n_rows
; i
++)
2949 for (j
= 0; j
< pt
->n_cols
; j
++)
2951 double entry
= pt
->mat
[j
+ i
* pt
->n_cols
];
2956 P
+= entry
* pow2 (log (pt
->col_tot
[j
] * pt
->row_tot
[i
] / (pt
->total
* entry
)));
2957 UXY
-= entry
/ pt
->total
* log (entry
/ pt
->total
);
2960 for (ase1_yx
= ase1_xy
= ase1_sym
= 0., i
= 0; i
< pt
->n_rows
; i
++)
2961 for (j
= 0; j
< pt
->n_cols
; j
++)
2963 double entry
= pt
->mat
[j
+ i
* pt
->n_cols
];
2968 ase1_yx
+= entry
* pow2 (UY
* log (entry
/ pt
->row_tot
[i
])
2969 + (UX
- UXY
) * log (pt
->col_tot
[j
] / pt
->total
));
2970 ase1_xy
+= entry
* pow2 (UX
* log (entry
/ pt
->col_tot
[j
])
2971 + (UY
- UXY
) * log (pt
->row_tot
[i
] / pt
->total
));
2972 ase1_sym
+= entry
* pow2 ((UXY
2973 * log (pt
->row_tot
[i
] * pt
->col_tot
[j
] / pow2 (pt
->total
)))
2974 - (UX
+ UY
) * log (entry
/ pt
->total
));
2977 v
[5] = 2. * ((UX
+ UY
- UXY
) / (UX
+ UY
));
2978 ase
[5] = (2. / (pt
->total
* pow2 (UX
+ UY
))) * sqrt (ase1_sym
);
2981 v
[6] = (UX
+ UY
- UXY
) / UX
;
2982 ase
[6] = sqrt (ase1_xy
) / (pt
->total
* UX
* UX
);
2983 t
[6] = v
[6] / (sqrt (P
- pt
->total
* pow2 (UX
+ UY
- UXY
)) / (pt
->total
* UX
));
2985 v
[7] = (UX
+ UY
- UXY
) / UY
;
2986 ase
[7] = sqrt (ase1_yx
) / (pt
->total
* UY
* UY
);
2987 t
[7] = v
[7] / (sqrt (P
- pt
->total
* pow2 (UX
+ UY
- UXY
)) / (pt
->total
* UY
));
2991 if (proc
->statistics
& (1u << CRS_ST_D
))
2993 double v_dummy
[N_SYMMETRIC
];
2994 double ase_dummy
[N_SYMMETRIC
];
2995 double t_dummy
[N_SYMMETRIC
];
2996 double somers_d_v
[3];
2997 double somers_d_ase
[3];
2998 double somers_d_t
[3];
3000 if (calc_symmetric (proc
, pt
, v_dummy
, ase_dummy
, t_dummy
,
3001 somers_d_v
, somers_d_ase
, somers_d_t
))
3004 for (i
= 0; i
< 3; i
++)
3006 v
[8 + i
] = somers_d_v
[i
];
3007 ase
[8 + i
] = somers_d_ase
[i
];
3008 t
[8 + i
] = somers_d_t
[i
];
3009 sig
[8 + i
] = 2 * gsl_cdf_ugaussian_Q (fabs (somers_d_t
[i
]));
3015 if (proc
->statistics
& (1u << CRS_ST_ETA
))
3018 double sum_Xr
, sum_X2r
;
3022 for (sum_Xr
= sum_X2r
= 0., i
= 0; i
< pt
->n_rows
; i
++)
3024 sum_Xr
+= pt
->rows
[i
].f
* pt
->row_tot
[i
];
3025 sum_X2r
+= pow2 (pt
->rows
[i
].f
) * pt
->row_tot
[i
];
3027 SX
= sum_X2r
- pow2 (sum_Xr
) / pt
->total
;
3029 for (SXW
= 0., j
= 0; j
< pt
->n_cols
; j
++)
3033 for (cum
= 0., i
= 0; i
< pt
->n_rows
; i
++)
3035 SXW
+= pow2 (pt
->rows
[i
].f
) * pt
->mat
[j
+ i
* pt
->n_cols
];
3036 cum
+= pt
->rows
[i
].f
* pt
->mat
[j
+ i
* pt
->n_cols
];
3039 SXW
-= cum
* cum
/ pt
->col_tot
[j
];
3041 v
[11] = sqrt (1. - SXW
/ SX
);
3045 double sum_Yc
, sum_Y2c
;
3049 for (sum_Yc
= sum_Y2c
= 0., i
= 0; i
< pt
->n_cols
; i
++)
3051 sum_Yc
+= pt
->cols
[i
].f
* pt
->col_tot
[i
];
3052 sum_Y2c
+= pow2 (pt
->cols
[i
].f
) * pt
->col_tot
[i
];
3054 SY
= sum_Y2c
- sum_Yc
* sum_Yc
/ pt
->total
;
3056 for (SYW
= 0., i
= 0; i
< pt
->n_rows
; i
++)
3060 for (cum
= 0., j
= 0; j
< pt
->n_cols
; j
++)
3062 SYW
+= pow2 (pt
->cols
[j
].f
) * pt
->mat
[j
+ i
* pt
->n_cols
];
3063 cum
+= pt
->cols
[j
].f
* pt
->mat
[j
+ i
* pt
->n_cols
];
3066 SYW
-= cum
* cum
/ pt
->row_tot
[i
];
3068 v
[12] = sqrt (1. - SYW
/ SY
);