1 AT_BANNER([LOGISTIC REGRESSION])
3 dnl These examples are adapted from
4 dnl http://www.uvm.edu/~dhowell/gradstat/psych341/lectures/Logistic%20Regression/LogisticReg1.html
8 m4_define([LOGIT_TEST_DATA],
9 [AT_DATA([lr-data.txt], dnl
10 105.00 1.00 33.00 3.00 2.00 .35 17.00 20.00 .50110 -2.00440 1
11 106.00 1.00 50.00 2.00 3.00 .38 7.00 15.00 .20168 -1.25264 1
12 107.00 1.00 91.00 3.00 2.00 .28 15.00 7.00 .00897 -1.00905 1
13 108.00 1.00 90.00 3.00 2.00 .20 2.00 2.00 .00972 -1.00982 1
14 109.00 1.00 70.00 3.00 3.00 .38 23.00 27.00 .04745 -1.04981 1
15 111.00 2.00 31.00 2.00 2.00 .00 19.00 10.00 .54159 1.84640 1
16 112.00 1.00 91.00 2.00 3.00 .18 6.00 16.00 .00897 -1.00905 1
17 113.00 1.00 81.00 3.00 2.00 .00 3.00 9.00 .01998 -1.02039 1
18 114.00 2.00 15.00 1.00 2.00 .13 19.00 13.00 .81241 1.23090 1
19 116.00 2.00 1.00 1.00 2.00 .88 15.00 7.00 .93102 1.07410 1
20 117.00 1.00 93.00 3.00 2.00 .18 9.00 15.00 .00764 -1.00770 1
21 118.00 2.00 14.00 1.00 3.00 .15 23.00 18.00 .82447 1.21289 1
22 120.00 1.00 91.00 2.00 2.00 .43 17.00 14.00 .00897 -1.00905 1
23 121.00 1.00 55.00 3.00 2.00 .69 20.00 14.00 .14409 -1.16834 1
24 122.00 1.00 70.00 2.00 3.00 .03 .00 6.00 .04745 -1.04981 1
25 123.00 1.00 25.00 2.00 2.00 .45 4.00 10.00 .65789 -2.92301 1
26 125.00 1.00 91.00 2.00 2.00 .13 .00 3.00 .00897 -1.00905 1
27 126.00 1.00 91.00 3.00 3.00 .23 4.00 6.00 .00897 -1.00905 1
28 127.00 1.00 91.00 3.00 2.00 .00 8.00 8.00 .00897 -1.00905 1
29 128.00 2.00 13.00 2.00 2.00 .65 16.00 14.00 .83592 1.19629 1
30 129.00 1.00 50.00 2.00 2.00 .25 20.00 23.00 .20168 -1.25264 1
31 135.00 1.00 90.00 3.00 3.00 .03 5.00 12.00 .00972 -1.00982 1
32 138.00 1.00 70.00 3.00 3.00 .10 1.00 6.00 .04745 -1.04981 1
33 139.00 2.00 19.00 3.00 3.00 .10 11.00 12.00 .75787 1.31949 1
34 149.00 2.00 50.00 3.00 2.00 .03 .00 .00 .20168 4.95826 1
35 204.00 1.00 50.00 3.00 1.00 .13 .00 1.00 .20168 -1.25264 1
36 205.00 1.00 91.00 3.00 3.00 .72 16.00 18.00 .00897 -1.00905 1
37 206.00 2.00 24.00 1.00 1.00 .10 5.00 21.00 .67592 1.47947 1
38 207.00 1.00 80.00 3.00 3.00 .13 6.00 7.00 .02164 -1.02212 1
39 208.00 1.00 87.00 2.00 2.00 .18 9.00 20.00 .01237 -1.01253 1
40 209.00 1.00 70.00 2.00 2.00 .53 15.00 12.00 .04745 -1.04981 1
41 211.00 1.00 55.00 2.00 1.00 .33 8.00 5.00 .14409 -1.16834 1
42 212.00 1.00 56.00 3.00 1.00 .30 6.00 20.00 .13436 -1.15522 1
43 214.00 1.00 54.00 2.00 2.00 .15 .00 16.00 .15439 -1.18258 1
44 215.00 1.00 71.00 3.00 3.00 .35 12.00 12.00 .04391 -1.04592 1
45 217.00 2.00 36.00 1.00 1.00 .10 12.00 8.00 .44049 2.27020 1
46 218.00 1.00 91.00 2.00 2.00 .05 11.00 25.00 .00897 -1.00905 1
47 219.00 1.00 91.00 2.00 2.00 1.23 11.00 24.00 .00897 -1.00905 1
48 220.00 1.00 91.00 2.00 3.00 .08 8.00 11.00 .00897 -1.00905 1
49 221.00 1.00 91.00 2.00 2.00 .33 5.00 11.00 .00897 -1.00905 1
50 222.00 2.00 36.00 2.00 1.00 .18 5.00 3.00 .44049 2.27020 1
51 223.00 1.00 70.00 2.00 3.00 .18 14.00 3.00 .04745 -1.04981 1
52 224.00 1.00 91.00 2.00 2.00 .43 2.00 10.00 .00897 -1.00905 1
53 225.00 1.00 55.00 2.00 1.00 .18 6.00 11.00 .14409 -1.16834 1
54 229.00 2.00 75.00 2.00 2.00 .40 30.00 25.00 .03212 31.12941 1
55 232.00 1.00 91.00 3.00 2.00 .15 6.00 3.00 .00897 -1.00905 1
56 233.00 1.00 70.00 2.00 1.00 .00 11.00 8.00 .04745 -1.04981 1
57 234.00 1.00 54.00 3.00 2.00 .10 .00 .00 .15439 -1.18258 1
58 237.00 1.00 70.00 3.00 2.00 .18 5.00 25.00 .04745 -1.04981 1
59 241.00 1.00 19.00 2.00 3.00 .33 13.00 9.00 .75787 -4.12995 1
60 304.00 2.00 18.00 2.00 2.00 .26 25.00 6.00 .77245 1.29458 1
61 305.00 1.00 88.00 3.00 2.00 1.35 17.00 29.00 .01142 -1.01155 1
62 306.00 1.00 70.00 2.00 3.00 .63 14.00 33.00 .04745 -1.04981 1
63 307.00 1.00 85.00 2.00 2.00 2.65 18.00 14.00 .01452 -1.01474 1
64 308.00 1.00 13.00 2.00 2.00 .23 5.00 5.00 .83592 -6.09442 1
65 309.00 2.00 13.00 2.00 2.00 .23 7.00 17.00 .83592 1.19629 1
66 311.00 2.00 1.00 2.00 2.00 .50 20.00 14.00 .93102 1.07410 1
67 315.00 1.00 19.00 2.00 3.00 .18 1.00 11.00 .75787 -4.12995 1
68 316.00 1.00 88.00 2.00 2.00 .38 12.00 11.00 .01142 -1.01155 2
69 318.00 1.00 88.00 3.00 2.00 .03 5.00 5.00 .01142 -1.01155 3
70 319.00 2.00 18.00 2.00 3.00 .30 15.00 16.00 .77245 1.29458 1
71 321.00 2.00 15.00 2.00 2.00 .63 15.00 18.00 .81241 1.23090 1
72 322.00 1.00 88.00 3.00 2.00 .40 18.00 15.00 .01142 -1.01155 1
73 325.00 2.00 18.00 2.00 3.00 1.00 28.00 18.00 .77245 1.29458 1
74 329.00 1.00 88.00 3.00 2.00 .03 7.00 11.00 .01142 -1.01155 4
75 332.00 2.00 2.00 2.00 2.00 .05 8.00 9.00 .92562 1.08036 1
78 dnl Note: In the above data cases 305, 316 318 and 329 have identical values
79 dnl of the 2nd and 3rd variables. We use this for weight testing.
81 AT_SETUP([LOGISTIC REGRESSION basic test])
85 AT_DATA([lr-data.sps], [dnl
88 data list notable file='lr-data.txt'
89 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
92 variables = outcome with survrate
96 AT_CHECK([pspp -O format=csv lr-data.sps], [0],
98 Table: Dependent Variable Encoding
99 Original Value,Internal Value
103 Table: Case Processing Summary
104 Unweighted Cases,N,Percent
105 Included in Analysis,66,100.000
109 note: Estimation terminated at iteration number 6 because parameter estimates changed by less than 0.001
112 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
115 Table: Classification Table
117 ,,,outcome,,"Percentage
119 ,Observed,,1.000,2.000,
120 Step 1,outcome,1.000,43,5,89.583
122 ,Overall Percentage,,,,86.364
124 Table: Variables in the Equation
125 ,,B,S.E.,Wald,df,Sig.,Exp(B)
126 Step 1,survrate,-.081,.019,17.756,1,.000,.922
127 ,Constant,2.684,.811,10.941,1,.001,14.639
133 AT_SETUP([LOGISTIC REGRESSION missing values])
137 AT_DATA([lr-data.sps], [dnl
140 data list notable file='lr-data.txt'
141 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
143 missing values survrate (999) avoid (44444) outcome (99).
146 variables = outcome with survrate avoid
150 AT_CHECK([pspp -O format=csv lr-data.sps > run0], [0], [ignore])
152 dnl Append some cases with missing values into the data.
153 cat >> lr-data.txt << HERE
154 105.00 1.00 999.00 3.00 2.00 .35 17.00 20.00 .50110 -2.00440 1
155 106.00 1.00 999.00 2.00 3.00 .38 7.00 15.00 .20168 -1.25264 1
156 107.00 1.00 5.00 3.00 2.00 .28 44444 34 .00897 -1.00905 1
157 108.00 99 5.00 3.00 2.00 .28 4 34 .00897 -1.00905 1
160 AT_CHECK([pspp -O format=csv lr-data.sps > run1], [0], [ignore])
162 dnl Only the summary information should be different
163 AT_CHECK([diff run0 run1], [1], [dnl
165 < Included in Analysis,66,100.000
166 < Missing Cases,0,.000
169 > Included in Analysis,66,94.286
170 > Missing Cases,4,5.714
178 dnl Check that a weighted dataset is interpreted correctly
179 dnl To do this, the same data set is used, one weighted, one not.
180 dnl The weighted dataset omits certain cases which are identical
181 AT_SETUP([LOGISTIC REGRESSION weights])
185 AT_DATA([lr-data-unweighted.sps], [dnl
188 data list notable file='lr-data.txt'
189 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
192 variables = outcome with survrate
196 AT_DATA([lr-data-weighted.sps], [dnl
199 data list notable file='lr-data.txt'
200 list /id outcome survrate prognos amttreat gsi avoid intrus pre_1 lre_1 w *.
204 * Omit duplicate cases.
205 select if id <> 305 and id <> 316 and id <> 318.
208 variables = outcome with survrate
213 AT_CHECK([pspp -O format=csv lr-data-unweighted.sps > unweighted-result], [0], [ignore])
214 AT_CHECK([pspp -O format=csv lr-data-weighted.sps > weighted-result], [0], [ignore])
216 dnl The only difference should be the summary information, since
217 dnl this displays the unweighted totals.
218 AT_CHECK([diff unweighted-result weighted-result], [1], [dnl
220 < Included in Analysis,66,100.000
222 > Included in Analysis,63,100.000
228 < Step 1,outcome,1.000,43,5,89.583
229 < ,,2.000,4,14,77.778
231 > Step 1,outcome,1.000,43.000,5.000,89.583
232 > ,,2.000,4.000,14.000,77.778
239 dnl Check that the /NOCONST option works as intended.
240 dnl The results this produces are very similar to those
241 dnl at the example in http://www.ats.ucla.edu/stat/SPSS/faq/logregconst.htm
242 AT_SETUP([LOGISTIC REGRESSION without constant])
244 AT_DATA([non-const.sps], [dnl
249 compute female = (#i > 91).
255 compute constant = 1.
257 logistic regression female with constant /noconst.
260 AT_CHECK([pspp -O format=csv non-const.sps], [0],
262 Table: Dependent Variable Encoding
263 Original Value,Internal Value
267 Table: Case Processing Summary
268 Unweighted Cases,N,Percent
269 Included in Analysis,200,100.000
273 note: Estimation terminated at iteration number 2 because parameter estimates changed by less than 0.001
276 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
279 Table: Classification Table
281 ,,,female,,"Percentage
284 Step 1,female,.00,0,91,.000
286 ,Overall Percentage,,,,54.500
288 Table: Variables in the Equation
289 ,,B,S.E.,Wald,df,Sig.,Exp(B)
290 Step 1,constant,.180,.142,1.616,1,.204,1.198
297 dnl Check that if somebody passes a dependent variable which is not dichtomous,
298 dnl then an error is raised.
299 AT_SETUP([LOGISTIC REGRESSION non-dichotomous dep var])
301 AT_DATA([non-dich.sps], [dnl
302 data list notable list /y x1 x2 x3 x4.
309 logistic regression y with x1 x2 x3 x4.
312 AT_CHECK([pspp -O format=csv non-dich.sps], [1],
314 error: Dependent variable's values are not dichotomous.
321 dnl An example to check the behaviour of LOGISTIC REGRESSION with a categorical
322 dnl variable. This examṕle was inspired from that at:
323 dnl http://www.ats.ucla.edu/stat/spss/dae/logit.htm
324 AT_SETUP([LOGISTIC REGRESSION with categorical])
326 AT_DATA([lr-cat.data], [dnl
729 AT_DATA([lr-cat.sps], [dnl
732 data list notable list file='lr-cat.data' /b1 b2 bcat y.
740 AT_CHECK([pspp -O format=csv lr-cat.sps], [0],
742 Table: Dependent Variable Encoding
743 Original Value,Internal Value
747 Table: Case Processing Summary
748 Unweighted Cases,N,Percent
749 Included in Analysis,400,100.000
753 note: Estimation terminated at iteration number 4 because parameter estimates changed by less than 0.001
756 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
759 Table: Categorical Variables' Codings
760 ,,,Parameter coding,,
761 ,,Frequency,(1),(2),(3)
767 Table: Classification Table
771 ,Observed,,4.000,9.000,
772 Step 1,y,4.000,254,19,93.040
774 ,Overall Percentage,,,,71.000
776 Table: Variables in the Equation
777 ,,B,S.E.,Wald,df,Sig.,Exp(B)
778 Step 1,b1,.002,.001,4.284,1,.038,1.002
779 ,b2,.804,.332,5.872,1,.015,2.235
780 ,bcat,,,20.895,3,.000,
781 ,bcat(1),1.551,.418,13.788,1,.000,4.718
782 ,bcat(2),.876,.367,5.706,1,.017,2.401
783 ,bcat(3),.211,.393,.289,1,.591,1.235
784 ,Constant,-5.541,1.138,23.709,1,.000,.004
791 dnl This example is inspired by http://www.ats.ucla.edu/stat/spss/output/logistic.htm
792 AT_SETUP([LOGISTIC REGRESSION with cat var 2])
794 AT_DATA([lr-cat2.data], [dnl
795 60.00 1.00 8.00 50.00
797 57.00 1.00 7.00 53.00
805 68.00 1.00 9.00 69.00
809 57.00 1.00 7.00 61.00
810 55.00 1.00 8.00 50.00
813 50.00 1.00 9.00 66.00
817 47.00 1.00 7.00 34.00
829 68.00 1.00 9.00 69.00
831 63.00 1.00 9.00 61.00
832 65.00 1.00 9.00 61.00
833 63.00 1.00 9.00 53.00
837 52.00 1.00 7.00 56.00
839 47.00 1.00 7.00 53.00
841 50.00 1.00 8.00 55.00
852 68.00 1.00 9.00 55.00
853 47.00 1.00 8.00 50.00
861 55.00 1.00 9.00 49.00
862 68.00 1.00 8.00 50.00
863 52.00 1.00 9.00 63.00
866 66.00 1.00 9.00 61.00
867 65.00 1.00 7.00 58.00
869 68.00 1.00 7.00 59.00
870 60.00 1.00 9.00 61.00
872 57.00 1.00 7.00 54.00
881 63.00 1.00 7.00 63.00
883 57.00 1.00 8.00 63.00
890 65.00 1.00 9.00 63.00
895 63.00 1.00 9.00 55.00
904 47.00 1.00 9.00 69.00
909 50.00 1.00 7.00 63.00
912 73.00 1.00 9.00 61.00
917 57.00 1.00 8.00 55.00
918 53.00 1.00 8.00 57.00
922 57.00 1.00 8.00 58.00
932 73.00 1.00 8.00 69.00
933 71.00 1.00 9.00 58.00
935 63.00 1.00 7.00 54.00
941 65.00 1.00 8.00 55.00
942 76.00 1.00 9.00 67.00
943 71.00 1.00 8.00 66.00
945 47.00 1.00 9.00 63.00
948 54.00 1.00 9.00 55.00
949 55.00 1.00 8.00 58.00
951 55.00 1.00 9.00 63.00
960 65.00 1.00 9.00 66.00
964 63.00 1.00 8.00 72.00
970 73.00 1.00 9.00 58.00
972 63.00 1.00 9.00 69.00
974 65.00 1.00 9.00 66.00
975 73.00 1.00 8.00 63.00
984 60.00 1.00 9.00 50.00
986 73.00 1.00 9.00 55.00
987 52.00 1.00 8.00 47.00
997 AT_DATA([stringcat.sps], [dnl
999 data list notable file='lr-cat2.data' list /read honcomp wiz science *.
1002 recode wiz (7 = "a") (8 = "b") (9 = "c") into ses.
1004 logistic regression honcomp with read science ses
1009 AT_CHECK([pspp -O format=csv stringcat.sps], [0],
1011 Table: Dependent Variable Encoding
1012 Original Value,Internal Value
1016 Table: Case Processing Summary
1017 Unweighted Cases,N,Percent
1018 Included in Analysis,200,100.000
1019 Missing Cases,0,.000
1022 note: Estimation terminated at iteration number 5 because parameter estimates changed by less than 0.001
1024 Table: Model Summary
1025 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
1028 Table: Categorical Variables' Codings
1029 ,,,Parameter coding,
1035 Table: Classification Table
1037 ,,,honcomp,,"Percentage
1039 ,Observed,,.000,1.000,
1040 Step 1,honcomp,.000,132,15,89.796
1041 ,,1.000,26,27,50.943
1042 ,Overall Percentage,,,,79.500
1044 Table: Variables in the Equation
1045 ,,B,S.E.,Wald,df,Sig.,Exp(B)
1046 Step 1,read,.098,.025,15.199,1,.000,1.103
1047 ,science,.066,.027,5.867,1,.015,1.068
1048 ,ses,,,6.690,2,.035,
1049 ,ses(1),.058,.532,.012,1,.913,1.060
1050 ,ses(2),-1.013,.444,5.212,1,.022,.363
1051 ,Constant,-9.561,1.662,33.113,1,.000,.000
1057 dnl Check that it doesn't crash if a categorical variable
1058 dnl has only one distinct value
1059 AT_SETUP([LOGISTIC REGRESSION identical categories])
1061 AT_DATA([crash.sps], [dnl
1062 data list notable list /y x1 x2*.
1068 logistic regression y with x1 x2
1072 AT_CHECK([pspp -O format=csv crash.sps], [1], [ignore])
1077 dnl Test that missing values on the categorical predictors are treated
1079 AT_SETUP([LOGISTIC REGRESSION missing categoricals])
1081 AT_DATA([data.txt], [dnl
1184 AT_DATA([miss.sps], [dnl
1185 data list notable file='data.txt' list /y x1 cat0*.
1187 logistic regression y with x1 cat0
1188 /categorical = cat0.
1191 AT_CHECK([pspp -O format=csv miss.sps > file1], [0], [ignore])
1193 dnl Append a case with a missing categorical.
1194 AT_CHECK([echo '1 34 .' >> data.txt], [0], [ignore])
1196 AT_CHECK([pspp -O format=csv miss.sps > file2], [0], [ignore])
1198 AT_CHECK([diff file1 file2], [1], [dnl
1200 < Included in Analysis,100,100.00
1201 < Missing Cases,0,.00
1204 > Included in Analysis,100,99.01
1205 > Missing Cases,1,.99
1212 dnl Check that the confidence intervals are properly reported.
1213 dnl Use an example with categoricals, because that was buggy at
1214 dnl one point. The data in this example comes from:
1215 dnl http://people.ysu.edu/~gchang/SPSSE/SPSS_lab2Regression.pdf
1216 AT_SETUP([LOGISTIC REGRESSION confidence interval])
1218 AT_DATA([ci.sps], [dnl
1220 data list notable list /disease age sciostat sector savings *.
1421 disease WITH age sciostat sector savings
1422 /categorical = sciostat sector
1426 AT_CHECK([pspp -O format=csv ci.sps], [0], [dnl
1427 Table: Dependent Variable Encoding
1428 Original Value,Internal Value
1432 Table: Case Processing Summary
1433 Unweighted Cases,N,Percent
1434 Included in Analysis,196,100.000
1435 Missing Cases,0,.000
1438 note: Estimation terminated at iteration number 4 because parameter estimates changed by less than 0.001
1440 Table: Model Summary
1441 Step 1,-2 Log likelihood,Cox & Snell R Square,Nagelkerke R Square
1444 Table: Categorical Variables' Codings
1445 ,,,Parameter coding,
1447 sciostat,1.000,77,1,0
1453 Table: Classification Table
1455 ,,,disease,,"Percentage
1457 ,Observed,,.000,1.000,
1458 Step 1,disease,.000,131,8,94.245
1459 ,,1.000,41,16,28.070
1460 ,Overall Percentage,,,,75.000
1462 Table: Variables in the Equation
1463 ,,,,,,,,95% CI for Exp(B),
1464 ,,B,S.E.,Wald,df,Sig.,Exp(B),Lower,Upper
1465 Step 1,age,.027,.009,8.647,1,.003,1.027,1.009,1.045
1466 ,savings,.061,.386,.025,1,.874,1.063,.499,2.264
1467 ,sciostat,,,.440,2,.803,,,
1468 ,sciostat(1),-.278,.434,.409,1,.522,.757,.323,1.775
1469 ,sciostat(2),-.219,.459,.227,1,.634,.803,.327,1.976
1470 ,sector,,,11.974,1,.001,,,
1471 ,sector(1),-1.235,.357,11.974,1,.001,.291,.145,.586
1472 ,Constant,-.814,.452,3.246,1,.072,.443,,