3 Copyright (C) 2005-2017 Mario Rodriguez Riotorto
5 This program is free software; you can redistribute
6 it and/or modify it under the terms of the
7 GNU General Public License as published by
8 the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it
12 will be useful, but WITHOUT ANY WARRANTY;
13 without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details at
16 http://www.gnu.org/copyleft/gpl.html
22 This is a set of Maxima functions for descriptive statistics.
24 This library supports two types of data:
25 a) lists storing univariate samples, like [34,23,76,45,32,...]
31 b) matrices storing multivariate samples, like
32 matrix([34,44,23],[87,23,54],....); in this case, the number
33 of columns equals the dimension of the multivariate random
34 variable, and the number of rows is the sample size.
36 (%i2) matrix([a,b],[c,d],[e,f]);
44 (%o3) [---------, ---------]
47 Lists of multiple samples with equal or different sizes are not
48 directly supported, but you can use the function 'map' as in the
50 (%i4) map(mean,[[a,b,c],[a,b]]);
52 (%o4) [---------, -----]
55 These are the functions implemented in this library,
56 (see comments bellow for interpretation):
60 continuous_freq: frequencies for continuous data
61 discrete_freq: frequencies for discrete data
62 subsample: subsample extraction
64 Univariate descriptive statistics:
66 smin: sample minimum value
67 smax: sample maximum value
69 noncentral_moment: non central moment
70 central_moment: central moment
71 var: variance (divided by n)
72 std: standard deviation based on var
73 var1: variance (divided by n-1)
74 std1: standard deviation based on var1
77 qrange: interquartilic range
78 skewness: skewness coefficient
79 kurtosis: kurtosis coefficient
80 harmonic_mean: harmonic mean
81 geometric_mean: geometric mean
82 cv: variation coefficient
83 mean_deviation: mean deviation
84 median_deviation: median deviation
85 pearson_skewness: Pearson's skewness coefficient
86 quartile_skewness: quartilic skewness coefficient
87 km: Kaplan-Meier estimator of the survival function
88 cdf_empirical: empirical cumulative distribution function
90 Multivariate descriptive statistics:
91 cov: covariance matrix (divided by n)
92 cov1: covariance matrix (divided by n-1)
93 cor: correlation matrix
94 global_variances: gives a list with
98 generalized standard deviation
100 effective standard deviation
101 list_correlations: gives a list with
103 multiple correlation coefficients
104 partial correlation coefficients
107 Statistical diagrams:
117 Johnson, A.J., Wichern, D.W. (1998) Applied Multivariate Statistical
118 Analysis. Prentice Hall.
119 Pe~na, D. (2002) An'alisis de datos multivariantes. McGraw-Hill.
121 Thanks to Robert Dodier and Barton Willis for their help.
123 For questions, suggestions, bugs and the like, feel free
126 riotorto AT yahoo DOT com
127 http://tecnostats.net
132 put('descriptive, 1, 'version) $
134 if not get('draw,'version) then load("draw") $
136 load("descriptive_util.lisp");
139 /* AUXILIARY FUNCTIONS */
143 /* Computes the trace of a matrix */
144 matrixtrace(m):=block([n:length(m)],
145 if matrixp(m) and n=length(m[1])
146 then apply("+",makelist(m[i,i],i,1,n)))$
149 /* True if the argument is a list of numbers, false otherwise. */
150 listofnumbersp(y):=listp(y) and every('identity,map('numberp,y))$
153 /* True if the argument is a list containing */
154 /* no lists, false otherwise. */
155 listofexpr(y):=listp(y) and not some('identity,map('listp,y))$
158 /* True if the argument is a list of lists containing only numbers, */
159 /* false otherwise. */
160 listoflistsp(y):=listp(y) and every('identity,map('listofnumbersp,y))$
163 /* True if the argument is a list of lists, all of */
164 /* them of equal size. */
165 listsofequalsize(y) :=
168 every(lambda([z], length(z) = length(y[1])), y) $
172 /* DATA TRANSFORMATION */
175 /* Sub-sample matrix selection. */
176 /* Example: subsample(m,lambda([v],v[1]<3 and v[4]=A),3,2) */
177 /* gives the 3rd an 2nd components, in this order, of */
178 /* those rows of matrix m whose first component is */
179 /* less than 3 and fourth component equals A. */
180 subsample(mat,cond,[cols]):=
181 block([tempvect, tempmat:[]],
183 then cols: makelist(i,i,1,length(mat[1])),
189 tempvect: endcons(obs[i], tempvect),
190 tempmat: endcons(tempvect, tempmat)),
191 apply('matrix, tempmat))$
194 /* Subtract the mean and divide by the standard deviation */
197 then (m-mean(m)) / std(m)
198 elseif listoflistsp(m)
199 then makelist(standardize(k), k, m)
201 then block([av, dev, m2: copymatrix(m)],
204 for k:1 thru length(m2) do m2[k] : (m2[k]-av) / dev,
206 else error("standardize: unknown data format") $
209 /* Returns a new matrix with transformed, repeated, reordered, or removed columns */
210 transform_sample(m, varnames, expressions) :=
213 then (tm: args(transpose(m)),
214 transpose(apply('matrix, map('ev, psubst(map("=",varnames,tm), expressions)))))
215 else error("transform_sample: argument must be a matrix")) $
218 /* Builds a sample from a table of absolute frequencies. */
219 /* The input table can be a matrix or a list of lists, all of */
220 /* them of equal size. The number of columns or the length of */
221 /* the lists must be greater than 1. The last element of each */
222 /* row or list is interpreted as the absolute frequency. */
223 /* The output is always a sample in matrix form */
228 elseif not listsofequalsize(tbl)
229 then error("Table frequency has not the correct format"),
233 block([butlast: reverse(rest(reverse(a)))],
234 makelist(butlast,i,1,last(a)))),tbl)))) $
241 /* FREQUENCY COUNTERS */
244 /* Divides the range in intervals and counts how many values */
245 /* are inside them. The second argument is optional and either */
246 /* equals the number of classes we want; 10 by default, OR */
247 /* equals a list containing the class limits and the number of */
248 /* classes we want, OR a list containing only the limits. */
249 /* If sample values are all equal, this function returns only */
250 /* one class of amplitude 2 */
251 continuous_freq (lis, [opt]) :=
253 then apply (continuous_freq_array, cons (fillarray (make_array (any, length (lis)), lis), opt))
255 then apply (continuous_freq_array, cons (lis, opt))
256 else error ("continuous_freq: argument must be a list or Lisp array; found: ", lis);
258 continuous_freq_array (lis,[opt]):=block([nc,mini,maxi,lim,amp,fr,ult,n,k,index,bins],
260 then [[minf, inf], [0]]
261 elseif length(opt) > 0 and setp(opt[1])
262 then /* arbitrary bins */
263 ( lim: sort(listify(opt[1])),
267 then error("continuous_freq: at least two bin numbers are needed.")
268 else ( bins: makelist([lim[i], lim[i + 1]], i, 1, length(lim) - 1),
269 fr: count_array_by_bins (lis, bins),
271 else /* bins of equal amplitude */
273 then /* either a list comprising min, max, and number of classes,
274 * or a list comprising just min and max,
275 * or a number of classes
277 (if length(opt) = 1 and listp(opt[1])
278 then (if length(opt[1]) = 2 or length(opt[1]) = 3
279 then ([mini, maxi]: [opt[1][1], opt[1][2]],
280 if length(opt[1]) = 3
283 elseif length(opt) = 1 and integerp(opt[1])
284 then ([mini, maxi]: vector_min_max (lis),
287 error ("continuous_freq: unrecognized optional argument:", opt))
288 else /* default classes */
289 ([mini, maxi] : vector_min_max (lis),
294 then [[mini-1,maxi+1],[?length(lis)]]
295 else ( for i:1 thru nc do lim:endcons(mini+amp*i,lim),
296 bins : makelist ([lim[i], lim[i + 1]], i, 1, length(lim) - 1),
297 fr : count_array_by_bins (lis, bins),
300 count_by_bins (xx, bins) :=
302 then count_array_by_bins (fillarray (make_array (any, length (xx)), xx), bins)
304 then count_array_by_bins (xx, bins);
306 count_array_by_bins (xx, bins) := block ([counts : makelist (0, length (bins))],
307 xx : ?sort (?copy\-seq (xx), 'orderlessp),
308 for k thru length (bins)
309 do block ([i_first : find_index_first (xx, bins[k][1],
310 if k > 1 and bins[k][1] = bins[k - 1][2] then ">" else ">="),
311 i_last : find_index_last (xx, bins[k][2], "<=")],
312 counts[k] : if i_last = false or i_first = false then 0 else i_last - i_first + 1),
315 /* assume xx is a sorted Lisp array; find least i s.t. xx[i] > x or xx[i] >= x */
317 find_index_first (xx, x, comparison) := block ([n : ?length (xx)],
318 if comparison(xx[n - 1], x)
319 then if comparison(xx[0], x)
321 else find_index_first_1 (xx, x, 0, n - 1, comparison));
323 find_index_first_1 (xx, x, i0, i1, comparison) :=
324 if i1 - i0 <= 1 then i1
326 block ([i : floor (i0 + (i1 - i0) / 2)],
327 if comparison(xx[i], x)
328 then find_index_first_1 (xx, x, i0, i, comparison)
329 else find_index_first_1 (xx, x, i, i1, comparison));
331 /* assume xx is a sorted Lisp array; find greatest i s.t. xx[i] < x or xx[i] <= x */
333 find_index_last (xx, x, comparison) := block ([n : ?length (xx)],
334 if comparison(xx[0], x)
335 then if comparison(xx[n - 1], x)
337 else find_index_last_1 (xx, x, 0, n - 1, comparison));
339 find_index_last_1 (xx, x, i0, i1, comparison) :=
340 if i1 - i0 <= 1 then i0
342 block ([i : floor (i0 + (i1 - i0) / 2)],
343 if comparison(xx[i], x)
344 then find_index_last_1 (xx, x, i, i1, comparison)
345 else find_index_last_1 (xx, x, i0, i, comparison));
348 /* Counts the frequency of each element in 'lis', its elements */
349 /* can be numbers, Maxima expressions or strings. */
353 then discrete_freq_array (fillarray (make_array (any, length (l)), l))
355 then discrete_freq_array (l)
356 else error ("discrete_freq: argument must be a list or Lisp array; found: ", l);
358 discrete_freq_array (a) := block ([u],
359 a : ?sort (?copy\-seq (a), 'orderlessp),
360 u : unique_in_sorted_array (a),
361 [u, map (lambda ([u1], find_index_last (a, u1, lambda ([x,y], not ordergreatp(x, y)))
362 - find_index_first (a, u1, lambda([x,y], not orderlessp(x, y)))
366 /* UNIVARIATE DESCRIPTIVE STATISTICS */
369 /* Arithmetic mean */
371 block ([listarith: true, sum_w, n],
373 [w, sum_w]: if w = [] then [1, n] else [w[1], if listp (w[1]) then lsum (w1, w1, w[1]) else n*w[1]],
374 verify_weights ('mean, w, n),
375 if listp(x) or matrixp(x)
376 then block ([e: apply ("+", w * args(x)) / sum_w],
377 ratsimp_nonnumeric (e))
378 else error("mean: first argument must be a list or matrix; found", x));
380 verify_weights (fn, w, n) :=
381 block ([negative_weights],
383 then (if length(w) # n
384 then error (fn, ": expected", n, "weights, found", length(w))
385 elseif length (negative_weights: sublist (w, lambda ([w1], w1 < 0))) > 0
386 then error (fn, ": expected all weights nonnegative, found", negative_weights)
387 elseif equal (lsum (w1, w1, w), 0)
388 then error (fn, ": expected sum of weights to be positive, found 0"))
389 else (if not (numberp (w) and equal (w, 1))
390 then error (fn, ": weight can only be 1, if not a list; found", w)));
392 /* We want to return algebraic results in a standard form,
393 * but avoid replacing floats and bigfloats with rationals.
394 * keepfloat preserves floats, but not bigfloats, therefore it's necessary
395 * to inspect element by element. This is kind of terrible.
397 ratsimp_nonnumeric (e) :=
401 then map (ratsimp_nonnumeric, e)
402 else block ([keepfloat: true], ratsimp(e));
408 then (t:transpose(x),
409 makelist(lmin(t[i]),i,1,length(t)))
410 else if listofexpr(x)
412 else error("Input to 'smin' must be a list of expressions or a matrix"))$
418 then (t:transpose(x),
419 makelist(lmax(t[i]),i,1,length(t)))
420 else if listofexpr(x)
422 else error("Input to 'smax' must be a list of expressions or a matrix"))$
425 /* mini and maxi are maintained for backward compatibility,
426 but removed from documentation. */
433 then (t:transpose(x),
434 makelist(range(t[i]),i,1,length(t)))
435 else if listofexpr(x)
436 then lmax(x) - lmin(x)
437 else error("Input to 'range' must be a list of expressions or a matrix"))$
440 /* Non central moment of order m */
441 noncentral_moment (x, m, [w]) :=
442 block ([listarith: true],
443 if matrixp(x) or listp(x)
444 then (if matrixp(x) then x: args(x),
445 w: if w = [] then 1 else w[1],
446 mean (map (lambda ([x1], x1^m), x), w))
447 else error ("noncentral_moment: first argument must be a list or matrix; found", x));
450 /* Central moment of order m */
451 central_moment (x, m, [w]) :=
452 block ([listarith: true, mean_x],
453 if matrixp(x) or listp(x)
454 then (if matrixp(x) then x: args(x),
455 w: if w = [] then 1 else w[1],
457 mean (map (lambda ([x1], (x1 - mean_x)^m), x), w))
458 else error ("central_moment: first argument must be a list or matrix; found", x));
461 /* Maximum likelihood estimator of variance */
463 (w: if w = [] then 1 else w[1],
464 central_moment (x, 2, w))$
467 /* Standard deviation as the root square of var */
469 (w: if w = [] then 1 else w[1],
473 /* Unbiased estimator of variance (divided by n-1) */
475 block ([n: length(x),
476 w: if w = [] then 1 else w[1]],
477 var(x, w) * n/(n - 1));
480 /* Standard deviation as the root square of var1 */
482 block ([listarith: true],
483 w: if w = [] then 1 else w[1],
488 median(x):=block([n,s,t],
494 else (s[n/2] + s[n/2 + 1]) / 2 )
496 then (t:transpose(x),
497 makelist(median(t[i]),i,1,length(t)))
498 else error("Input to 'median' must be a list of expressions or a matrix"))$
501 /* p-quantile, with 0<=p<=1. Linear interpolation */
502 quantile(x,p):=block([n,s,pos,int,dif,t],
503 if numberp(p) and p>=0 and p<=1
504 then if listofexpr(x)
512 else (1-dif)*s[int]+dif*s[int+1])
514 then (t:transpose(x),
515 makelist(quantile(t[i],p),i,1,length(t)))
516 else error("First argument of 'quantile' must be a list of expressions or a matrix")
517 else error("Second argument of 'quantile' must be a probability") )$
520 /* Interquartilic range */
521 qrange(x):=block([t],
523 then (t:transpose(x),
524 makelist(qrange(t[i]),i,1,length(t)))
525 else if listofexpr(x)
526 then quantile(x,3/4)-quantile(x,1/4)
527 else error("Input to 'qrange' must be a list of expressions or a matrix"))$
530 /* Skewness coefficient */
531 skewness(x):=block([listarith:true],central_moment(x,3)/std(x)^3)$
534 /* Kurtosis coefficient, sometimes called kurtosis excess (see the -3) */
535 kurtosis(x):=block([listarith:true],central_moment(x,4)/var(x)^2 - 3)$
539 harmonic_mean(x):=block([listarith:true],
540 if listofexpr(x) or matrixp(x)
541 then length(x) / apply("+", 1/args(x))
542 else error("Input to 'harmonic_mean' must be a list of expressions or a matrix"))$
546 geometric_mean(x):=block([listarith:true],
547 if listofexpr(x) or matrixp(x)
548 then apply("*", args(x))^(1/length(x))
549 else error("Input to 'geometric_mean' must be a list of expressions or a matrix"))$
552 /* Variation coefficient */
553 cv(x):=block([listarith:true], std(x) / mean(x))$
557 mean_deviation(x):=block([t,listarith:true],
559 then (t:transpose(x),
560 makelist(mean(abs(t[i]-mean(t[i]))),i,1,length(t)))
561 else if listofexpr(x)
562 then mean(abs(x-mean(x)))
563 else error("Input to 'mean_deviation' must be a list of expressions or a matrix"))$
566 /* Median deviation */
567 median_deviation(x):=block([t,listarith:true],
569 then (t:transpose(x),
570 makelist(median(abs(t[i]-median(t[i]))),i,1,length(t)))
571 else if listofexpr(x)
572 then median(abs(x-median(x)))
573 else error("Input to 'median_deviation' must be a list of expressions or a matrix"))$
576 /* Pearson's skewness */
577 pearson_skewness(x):=block([t,listarith:true],
579 then (t:transpose(x),
580 3*makelist((mean(t[i])-median(t[i]))/std1(t[i]),i,1,length(t)))
581 else if listofexpr(x)
582 then 3*(mean(x)-median(x))/std1(x)
583 else error("Input to 'pearson_skewness' must be a list of expressions or a matrix"))$
586 /* Quartile skewness */
587 quartile_skewness(x):=block([q1,q2,q3,t],
589 then (t:transpose(x),
590 makelist(quartile_skewness(t[i]),i,1,length(t)))
591 else if listofexpr(x)
592 then (q1:quantile(x,1/4),
595 (q3-2*q2+q1)/(q3-q1))
596 else error("Input to 'quartile_skewness' must be a list of expressions or a matrix"))$
599 /* Kaplan-Meier estimator of the survival function S(x) = 1-F(x). */
600 /* First argument x is a list of pairs or a two column matrix. The first */
601 /* component is the observed time, and the second component a censoring */
602 /* index (1=non censored, 0=right censored). */
603 /* The second optional argument of function km is the name of the variable, */
604 /* which is x by default. Example calls are: */
605 /* km([[2,1], [3,1], [5,0], [8,1]]); */
606 /* km(matrix([2,1], [3,1], [5,0], [8,1]), 't); */
608 block([s,ss,t,st,n,d,c,i:2,idx:1,hatS,vn],
609 if length(var) > 0 and atom(var[1])
616 t: subsample(s,lambda([z], second(z)=1),1),
618 then /* only censored data */
620 t: append([0],listify(setify(first(transpose(t))))),
627 (n[i]: n[i-1] - d[i-1] - c[i-1],
628 while idx <= ss and s[idx][1] <= t[i] do
631 elseif s[idx][1] < t[i]
638 hatS[k]: hatS[k-1] * hatS[k],
640 sum(charfun(t[i-1] <= vn and vn < t[i]) * hatS[i-1], i, 2, st) +
641 charfun(t[st] <= vn) * hatS[st] ) $
644 /* Empirical distribution function F(x). */
645 /* First argument x is a list of numbers or a one column matrix. */
646 /* The second optional argument is the name of the variable, x by default. */
647 /* Example calls are: */
648 /* cdf_empirical([1,3,3,5,7,7,7,8,9]); */
649 /* cdf_empirical(transpose(matrix([1,3,3,5,7,7,7,8,9])), 'u); */
650 cdf_empirical(x,[var]):=
651 block([vn,tb,hatF:0,actual,i:1],
652 if length(var) > 0 and atom(var[1])
656 then if length(x[1])=1
657 then x: sort(args(transpose(x)))
658 else error("cdf_empirical: input matrix must have only one column")
660 tb: discrete_freq(x),
661 apply("+", map(lambda([z1,z2], z2*charfun(vn >= z1)),first(tb),second(tb)))/length(x)) $
669 /* MULTIVARIATE DESCRIPTIVE STATISTICS */
672 /* Covariance matrix */
673 cov(x, [w]):=block([n:length(x),dim,m,xi,sum, sum_w],
674 w: if w = [] then 1 else w[1],
675 verify_weights ('cov, w, n),
677 then error("cov: the argument is not a matrix")
678 else (m:matrix(mean(x, w)),
680 sum:zeromatrix(dim,dim),
683 block ([w_i: if w = 1 then 1 else w[i]],
685 sum:sum+w_i*transpose(xi).xi,
687 sum/sum_w - transpose(m).m) )$
690 /* Covariance matrix (divided by n-1). The argument x must be a matrix.
692 * I don't see a sensible way to generalize cov1 when weights are given,
693 * so cov1 doesn't take w as an optional argument, even though cov does.
695 cov1(x):=block([n:length(x)], cov(x)*n/(n-1))$
698 /* A list of global variation measures. The argument x must be a matrix */
699 /* 1) total variance */
700 /* 2) mean variance */
701 /* 3) generalized variance */
702 /* 4) generalized standard deviation */
703 /* 5) effective variance */
704 /* 6) effective standard deviation */
705 /* Admits the following options: */
706 /* 'data='true: x stores sampled data and the covariance matrix 'cov1' */
707 /* must be computed; if false, x is the covariance matrix and 'cov1' */
708 /* is not recalculated. */
709 global_variances(x,[select]):=
710 block([s,p,aux,options,defaults,out:[]],
712 /* this is for backcompatibility */
713 if length(select)=1 and member(select[1], [true,false])
714 then select: ['data=select[1]],
716 /* check user options */
720 aux: ?position(lhs(i),options),
721 if numberp(aux) and aux <= length(options) and aux >= 1
722 then defaults[aux]: rhs(i)),
725 then(if defaults[1] /* does the matrix contain sample records? */
728 p:length(s), /* dimension */
730 out:cons(aux,out), /* total variance */
731 out:endcons(aux/p,out), /* mean variance */
733 out:endcons(aux,out), /* generalized variance */
734 out:endcons(sqrt(aux),out), /* generalized standard deviation */
736 out:endcons(aux,out), /* effective variance */
737 out:endcons(sqrt(aux),out), /* effective standard deviation */
739 else error("global_variances: the argument is not a matrix") )$
742 /* Correlation matrix. The argument x must be a matrix. */
743 /* cor(x,false)==> x is the covariance matrix and 'cov1' */
744 /* is not recalculated */
745 /* Admits the following options: */
746 /* 'data='true: x stores sampled data and the covariance matrix 'cov1' */
747 /* must be computed; if false, x is the covariance matrix and 'cov1' */
748 /* is not recalculated. */
750 /* cor calls cov1 to compute covariance, and cov1 doesn't take w as an
751 * optional argument (see comment above), so neither does cor.
755 block([m,s,d,defaults,options],
757 /* this is for backcompatibility */
758 if length(select)=1 and member(select[1], [true,false])
759 then select: ['data=select[1]],
761 /* check user options */
765 aux: ?position(lhs(i),options),
766 if numberp(aux) and aux <= length(options) and aux >= 1
767 then defaults[aux]: rhs(i)),
770 then(if defaults[1] /* does the matrix contain sample records? */
774 d:sqrt(sum(ematrix(m,m,1/s[i,i],i,i),i,1,m)),
776 else error("cor: the argument is not a matrix") )$
779 /* Returns a list of dependence measures. The argument x must be a matrix. */
780 /* 1) precision matrix */
781 /* 2) multiple correlation */
782 /* 3) partial correlation */
783 /* Admits the following options: */
784 /* 'data='true: x stores sampled data and the covariance matrix 'cov1' */
785 /* must be computed; if false, x is the covariance matrix and 'cov1' */
786 /* is not recalculated. */
787 list_correlations(x,[select]):=
788 block([s,p,s1,d,options,defaults,out:[],listarith:true],
790 /* this is for backcompatibility */
791 if length(select)=1 and member(select[1], [true,false])
792 then select: ['data=select[1]],
794 /* check user options */
798 aux: ?position(lhs(i),options),
799 if numberp(aux) and aux <= length(options) and aux >= 1
800 then defaults[aux]: rhs(i)),
803 then(if defaults[1] /* does the matrix contain sample records? */
806 p:length(s), /* dimension */
809 for i:1 thru p do d[i,i]:1/sqrt(s1[i,i]),
810 [s1, /* precision matrix */
811 1-1/makelist(s[i,i]*s1[i,i],i,1,p), /* mult. corr. */
812 -d.s1.d] /* part. corr. */ )
813 else error("list_correlations: the argument is not a matrix"))$
816 /* Principal components analysis for multivariate data. */
817 /* The argument x must be a matrix. This function returns a list containing: */
818 /* 1) Variances of the principal components in descending order */
819 /* 2) Proportions (%) of total variance explained by principal components */
820 /* 3) Matrix of coefficients for principal components */
821 /* Admits the following options: */
822 /* 'data='true: x stores sampled data and the covariance matrix 'cov1' must be */
823 /* computed; if false, x is the covariance matrix and 'cov1' is not */
825 principal_components(x,[select]):=
826 block([options,defaults,aux,s,p,val,vec,ord,percents,listarith:true],
828 /* check user options */
832 aux: ?position(lhs(i),options),
833 if numberp(aux) and aux <= length(options) and aux >= 1
834 then defaults[aux]: rhs(i)),
836 /* go ahead with calculations */
838 then(if defaults[1] /* does the matrix contain sample records? */
841 p:length(s), /* sample dimension */
842 [val,vec]: eigens_by_jacobi(s),
843 ord: sort(makelist([val[k], col(vec,k)], k, p), lambda([u,v], first(u)>=first(v))),
844 val: map(first, ord),
845 vec: apply(addcol, map(second, ord)),
846 percents: 100 * val / apply("+", val),
847 [val, percents, vec])
848 else error("principal_components: the argument is not a matrix"))$
855 /* PLOTTING FUNCTIONS */
859 block([obase : 16, col : "#"],
862 block([sz : concat(random(16))],
863 substring(sz, slength(sz)))),
867 extract_options(s,[mo]):=
868 block([ss:[],mmo:[]],
870 if member(lhs(k), mo)
871 then mmo: endcons(k,mmo)
872 else ss : endcons(k,ss),
876 /* Plots scatter diagrams. */
877 scatterplot_description(m,[select]):=
879 [select, localopts]: extract_options(select,'nclasses,'htics,'frequency),
880 if listofnumbersp(m) or matrixp(m) and (length(m)=1 or length(m[1])=1)
881 then (/* m is a list of numbers or a column or a row matrix */
885 else m: transpose(m)[1],
886 gr2d(select, points(makelist([x,0],x,m))))
887 elseif listp(m) and every('identity,map(lambda([z],listofnumbersp(z) and length(z)=2),m)) or
888 matrixp(m) and length(m[1])=2 then
889 /* m is a two-dimensional sample */
890 gr2d(select, points(args(m)))
891 elseif matrixp(m) and length(m[1])>2 then
892 /* m is an d-dimensional (d>2) sample */
893 block([n: length(m[1]), gr],
899 then gr2d(apply('histogram_description,
900 append([col(m,i)],select,localopts)))
901 else apply('scatterplot_description,
902 append([subsample(m,lambda([v],true),i,j)],select)) ,
905 else error("sorry, can't plot the scatter diagram for these data")) $
906 scatterplot([desc]) := draw(apply('scatterplot_description, desc)) $
907 wxscatterplot([desc]) := wxdraw(apply('scatterplot_description, desc)) $
911 /* Histograms. Argument 'm' must be a list, a one column matrix or a */
912 /* one row matrix. */
914 /* Specific options (defaults in parentheses): */
915 /* - nclasses (10): number of classes, a positive integer, or */
916 /* a set of bin numbers. Also, it can be given */
917 /* the name of one of the three optimal algo- */
918 /* rithms available: 'fd, 'scott, or 'sturges. */
919 /* - frequency (absolute): 'absolute', 'relative', 'density' or */
921 /* - htics (auto): 'auto, 'endpoints, 'intervals, list of labels */
923 /* Draw options affecting this object: */
925 /* - color (used for labels) */
929 /* histogram modifies the following options: */
934 histogram_skyline: false;
936 make_skyline (xx, yy, fill_color, fill_density) :=
937 block ([skyline_interior, skyline_initial, skyline_final, skyline_points, n: length(xx)],
939 skyline_interior: apply (append, makelist ([[xx[k], yy[k - 1]], [xx[k], yy[k]]], k, 2, n - 1)),
940 skyline_initial: [[xx[1], 0], [xx[1], yy[1]]],
941 skyline_final: [[xx[n], yy[n - 1]], [xx[n], 0]],
942 skyline_points: append (skyline_initial, skyline_interior, skyline_final),
946 'fill_color = fill_color,
947 'fill_density = fill_density,
948 polygon (skyline_points),
950 'points_joined = true,
952 'points (skyline_points)]);
954 histogram_description(m,[select]):=
955 block([fr,amp,scen,before,localopts,num_classes,v,lbels, my_fill_color, my_fill_density,
956 /* specific options */
957 my_nclasses:10, my_frequency:'absolute, my_htics:'auto, histogram_bars, histogram_drawing],
958 [before, localopts]: extract_options (select, 'nclasses, 'frequency, 'htics, 'fill_color, 'fill_density),
960 /* This business about finding option values is kind of terrible.
961 * Ideally there would be some logic in the draw package to handle
962 * the hierarchy of option settings.
965 (my_fill_color: assoc ('fill_color, localopts)) # false
966 or (my_fill_color: assoc ('fill_color, get_draw_defaults ())) # false
967 or (my_fill_color: "#ff0000"), /* default from share/draw/grcommon.lisp */
969 (my_fill_density: assoc ('fill_density, localopts)) # false
970 or (my_fill_density: assoc ('fill_density, get_draw_defaults ())) # false
971 or (my_fill_density: 0), /* default "not filled" */
973 for k in localopts do
974 if lhs(k) = 'nclasses
975 then my_nclasses: rhs(k)
976 elseif lhs(k) = 'frequency
977 then my_frequency: rhs(k)
978 elseif lhs(k) = 'htics
979 then my_htics: rhs(k),
981 if listp(my_nclasses)
982 then ( v: length(my_nclasses),
986 then num_classes: third(my_nclasses)
987 else error("histogram: the second argument is not a list of two or three elements") )
988 elseif setp(my_nclasses)
989 then ( num_classes: length(my_nclasses)-1,
991 then error("histogram: at least two bin numbers are needed"))
992 elseif integerp(my_nclasses) and my_nclasses < 1
993 then error("histogram: number of classes must be an integer greater than two")
994 elseif integerp(my_nclasses) and my_nclasses > 0
995 then num_classes: my_nclasses
996 elseif not member(my_nclasses, ['fd, 'scott, 'sturges])
997 /* for optimal cases, computation of num_classes is delayed */
998 then error("histogram: incorrect format for option nclasses"),
999 if not member(my_frequency, ['absolute, 'relative, 'percent, 'density])
1000 then error("histogram: frenquency must be either absolute, relative, density or percent"),
1001 if not member(my_htics, ['auto, 'endpoints, 'intervals]) and
1003 then error("histogram: incorrect format in option htics"),
1005 /* if m is a list of numbers or a column or a row matrix then */
1006 /* plots a simple histogram. */
1007 if listofnumbersp(m) or matrixp(m) and (length(m)=1 or length(m[1])=1)
1008 then (/* transform input data into a list */
1012 else m: transpose(m)[1],
1014 /* frequency table */
1015 /* first, calculate num_clases for the optimal cases */
1016 if my_nclasses = 'fd /* Robust Freedman - Diaconis method */
1017 then ( h: qrange(m),
1019 then h: 2 * median(abs(m-median(m))), /* twice median absolute deviation */
1021 then my_nclasses: num_classes: ceiling(range(m) / (2 * h * length(m)^(-1/3)))
1022 else my_nclasses: num_classes: 1 ),
1023 if my_nclasses = 'scott /* Scott's method to be applied under Gaussian assumptions */
1024 then ( if length(m) = 1
1025 then error("histogram: with Scott's method, sample size must be greater than 1"),
1026 h: 3.5 * std1(m) * length(m)^(-1/3),
1028 then my_nclasses: num_classes: ceiling(range(m) / h)
1029 else my_nclasses: num_classes: 1 ),
1030 if my_nclasses = 'sturges /* Sturges' method to be applied under Gaussian assumptions and n < 200 */
1031 then my_nclasses: num_classes:
1032 ceiling(1 + log(length(m)) / 0.6931471805599453), /* <-- log(2) */
1033 fr: float(continuous_freq(m,my_nclasses)),
1034 if member(my_frequency, ['relative, 'percent])
1035 then fr: [first(fr), second(fr) / apply("+", second(fr))],
1036 if my_frequency = 'percent
1037 then fr[2]: fr[2] * 100.0,
1038 amp: makelist(fr[1][i+1]-fr[1][i], i, 1, length(fr[1])-1),
1039 if my_frequency = 'density
1040 then fr: [first(fr), second(fr) / apply("+", second(fr) * amp)],
1042 /* histogram tics */
1045 elseif my_htics = 'endpoints
1046 then my_htics: xtics = setify(fr[1])
1047 elseif my_htics = 'intervals
1048 then ( lbels: [concat("[",fr[1][1],",",fr[1][2],"]")],
1049 lbels: append(lbels, makelist(concat("(",fr[1][k],",",fr[1][k+1],"]"),k,2,num_classes)),
1050 lbels: makelist([lbels[k],fr[1][k]+amp[k]/2],k,1,num_classes),
1051 my_htics: xtics = makeset([a,b],[a,b],lbels) )
1052 elseif listp(my_htics)
1053 then ( if length(my_htics) < num_classes
1054 then my_htics: append(my_htics, makelist("",k,length(my_htics)+1,num_classes)),
1055 lbels: makelist([my_htics[k],fr[1][k]+amp[k]/2],k,1,num_classes),
1056 my_htics: xtics = makeset([a,b],[a,b],lbels) ),
1058 histogram_bars: apply ('bars, makelist ([(fr[1][k] + fr[1][k + 1])/2, fr[2][k], amp[k]], k, 1, length (fr[1]) - 1)),
1060 if histogram_skyline
1061 then make_skyline (fr[1], fr[2], my_fill_color, my_fill_density)
1062 else ['fill_color = my_fill_color, 'fill_density = my_fill_density, histogram_bars],
1064 scen: append ([before,
1065 'xrange = [first(fr[1]), last(fr[1])] + [-1,+1] * mean(amp)/2,
1066 'yrange = lmax(fr[2])*[-0.05, 1.05],
1070 else error("histogram: can't plot the histogram for these data") )$
1072 histogram([desc]) := draw2d(apply(histogram_description, desc)) $
1073 wxhistogram([desc]) := wxdraw2d(apply(histogram_description, desc)) $
1077 /* Plots bar charts for discrete or categorical data, both for one or more */
1078 /* samples. This function admits an arbitrary number of arguments. */
1079 /* The first arguments are lists or matrices with sample data. The rest of */
1080 /* arguments are specific options in equation format (option = value). */
1082 /* Specific options: */
1083 /* - box_width (3/4): a number between zero and 1. */
1084 /* - grouping (clustered): or 'stacked'. */
1085 /* - groups_gap (1): distance between clusters, must be a positive */
1087 /* - bars_colors ([]): list of colors for bars. If the list is shorter */
1088 /* than the number of samples, colors are generated randomly. */
1089 /* - frequency (absolute): 'absolute', 'relative' or 'percent'. */
1090 /* - ordering (orderlessp): 'orderlessp' or 'ordergreatp'. */
1091 /* - sample_keys ([]): entries for legend. */
1092 /* - start_at (0): starting point on the x axis. */
1094 /* Draw options affecting this object: */
1096 /* - color (used for labels) */
1098 /* - fill_density */
1100 /* barsplot modifies the following options: */
1102 barsplot_description([args]):=
1103 block([lastsample: 0, nargs: length(args), freqs: [], samplespace,
1104 sspacesize, nsamples, totalgap, expr, before, localopts,
1105 /* specific options */
1106 my_box_width: 3/4, my_groups_gap: 1, my_frequency: 'absolute,
1107 my_ordering: 'orderlessp, my_bars_colors: [], my_sample_keys: [],
1108 my_grouping: 'clustered, my_start_at: 0 ],
1110 /* looks for data */
1111 for i:1 thru nargs while (listp(args[i]) or matrixp(args[i])) do
1112 lastsample: lastsample + 1,
1114 [before, localopts]:
1116 makelist(args[k],k,lastsample+1,length(args)),
1117 'box_width,'grouping,'groups_gap,'bars_colors,'frequency,'ordering,'sample_keys,'start_at),
1119 for k in localopts do
1120 if lhs(k) = 'box_width
1121 then my_box_width: float(rhs(k))
1122 elseif lhs(k) = 'grouping
1123 then my_grouping: rhs(k)
1124 elseif lhs(k) = 'groups_gap
1125 then my_groups_gap: rhs(k)
1126 elseif lhs(k) = 'bars_colors
1127 then my_bars_colors: rhs(k)
1128 elseif lhs(k) = 'frequency
1129 then my_frequency: rhs(k)
1130 elseif lhs(k) = 'ordering
1131 then my_ordering: rhs(k)
1132 elseif lhs(k) = 'sample_keys
1133 then my_sample_keys: rhs(k)
1134 elseif lhs(k) = 'start_at
1135 then my_start_at: float(rhs(k)),
1137 if my_box_width > 1 or my_box_width < 0
1138 then error("barsplot: illegal value for box_width"),
1139 if not member(my_grouping, ['clustered, 'stacked])
1140 then error("barsplot: unrecognized grouping style"),
1141 if not integerp(my_groups_gap) or my_groups_gap < 1
1142 then error("barsplot: illegal value for groups_gap"),
1143 if not listp(my_bars_colors)
1144 then error("barsplot: illegal value for bars_colors"),
1145 if not member(my_frequency, ['absolute, 'relative, 'percent])
1146 then error("barsplot: frenquency must be either absolute, relative or percent"),
1147 if not member(my_ordering, [orderlessp, ordergreatp])
1148 then error("barsplot: illegal value for ordering"),
1149 if not (my_sample_keys = []
1150 or listp(my_sample_keys) and every('stringp, my_sample_keys))
1151 then error("barsplot: illegal value for sample_keys"),
1152 if not numberp(my_start_at)
1153 then error("barsplot: non numeric value for start_at option"),
1155 /* get absolute frequencies */
1156 for k: 1 thru lastsample do (
1158 then freqs: endcons(discrete_freq(args[k]), freqs)
1159 elseif matrixp(args[k])
1160 then for c in args(transpose(args[k])) do
1161 freqs: endcons(discrete_freq(c), freqs)
1162 else error("barsplot: unknown data format")),
1164 /* transform freqs into a more suitable form */
1165 samplespace: sort(listify(setify(apply('append,map('first,freqs)))), my_ordering),
1166 sspacesize: length(samplespace),
1167 nsamples: length(freqs),
1168 if my_sample_keys = []
1169 then my_sample_keys : makelist("",k,1,nsamples),
1170 if nsamples # length(my_sample_keys)
1171 then error("barsplot: incorrect number of elements in sample_keys"),
1174 block([pos: ?position(k,first(i))],
1177 else second(i)[pos]),
1181 /* transform to relative frequencies, if necessary */
1182 if member(my_frequency, ['relative, 'percent])
1183 then block([samplesizes: apply("+",freqs)],
1184 freqs: map(lambda([z], z/samplesizes), freqs)),
1185 if my_frequency = 'percent
1186 then freqs: 100.0 * freqs,
1188 /* complete my_bars_colors with random colors, if necessary */
1189 if nsamples > length(my_bars_colors)
1190 then my_bars_colors: append(my_bars_colors,
1191 makelist(random_color(),k,length(my_bars_colors)+1,nsamples)),
1192 if my_grouping = 'clustered
1193 then ( /* clustered bars */
1194 totalgap: nsamples + my_groups_gap,
1197 [points([[my_start_at,0]]), xtics = 'none],
1199 ['fill_color = my_bars_colors[m],
1200 'key = my_sample_keys[m],
1203 [my_start_at+(k-1)*totalgap + m, freqs[k][m], my_box_width],
1209 [string(samplespace[k]),
1210 my_start_at+(k-1)*totalgap + (nsamples+1)/2,
1212 k, 1, sspacesize))],
1214 else ( /* stacked bars */
1215 totalgap: my_groups_gap,
1216 freqs: map(lambda([z], reverse(makelist(block([s:0], for i:1 thru k do s: s+z[i], s),k,1,length(z)))), freqs),
1219 [points([[my_start_at,0]]), xtics = 'none],
1221 ['fill_color = my_bars_colors[m],
1222 'key = my_sample_keys[m],
1225 [my_start_at+(k-1)*totalgap, freqs[k][m], my_box_width],
1231 [string(samplespace[k]),
1232 my_start_at+(k-1)*totalgap,
1234 k, 1, sspacesize))],
1236 barsplot([desc]) := draw2d(apply(barsplot_description, desc)) $
1237 wxbarsplot([desc]) := wxdraw2d(apply(barsplot_description, desc)) $
1241 /* Plots pie charts for discrete or categorical data. Argument 'm' must be a */
1242 /* list, a one column matrix or a one row matrix. The rest of arguments are */
1243 /* specific options in equation format (option = value). */
1245 /* Specific options: */
1246 /* - sector_colors ([]): list of colors for sectors. If the list is */
1247 /* shorter than the number of sectors, colors are generated randomly. */
1248 /* - pie_center ([0,0]): a pair of numbers */
1249 /* - pie_radius (1): a positive number. */
1251 /* Draw options affecting this object: */
1253 /* - color (used for labels) */
1254 /* - fill_density */
1256 /* This object modifies the following options: */
1258 piechart_description(m,[select]):=
1259 block([fr,tot,degrees,ini,end:0,alpha,hexcolor,conver:float(%pi/180),
1260 sectors,before,localopts,
1261 /* specific options */
1262 my_sector_colors:[],my_pie_center:[0,0],my_pie_radius:1],
1263 [before, localopts]: extract_options(select,'sector_colors,'pie_center,'pie_radius),
1265 for k in localopts do
1266 if lhs(k) = 'sector_colors
1267 then my_sector_colors: rhs(k)
1268 elseif lhs(k) = 'pie_center
1269 then my_pie_center: float(rhs(k))
1270 elseif lhs(k) = 'pie_radius
1271 then my_pie_radius: float(rhs(k)),
1273 if not listp(my_sector_colors)
1274 then error("piechart: illegal value for sector_colors"),
1275 if not numberp(my_pie_radius) or my_pie_radius <= 0
1276 then error("piechart: radius must be greater than zero"),
1277 if not listp(my_pie_center) or length(my_pie_center) # 2 or
1278 not every(numberp, my_pie_center)
1279 then error("piechart: center must be a list of numbers"),
1281 if listofexpr(m) or matrixp(m) and (length(m)=1 or length(m[1])=1)
1282 then (/* transform input data into a list */
1286 else m: transpose(m)[1],
1288 /* frequency table */
1289 fr: discrete_freq(m),
1291 degrees: 360.0 * fr[2] / apply("+", fr[2]),
1293 /* complete my_sector_colors with random colors, if necessary */
1294 if tot > length(my_sector_colors)
1295 then my_sector_colors:
1296 append(my_sector_colors,
1297 makelist(random_color(),k,length(my_sector_colors)+1,tot)),
1299 /* build the object */
1302 end: ini + degrees[i],
1303 alpha: ini+degrees[i]/2.0,
1304 hexcolor: random_color(),
1305 [ 'color = my_sector_colors[i],
1306 'fill_color = my_sector_colors[i],
1307 'key = string(fr[1][i]),
1308 ellipse(my_pie_center[1],my_pie_center[2],
1309 my_pie_radius,my_pie_radius,
1312 else error("piechart: can't plot the piechart for these data") )$
1313 piechart([desc]) := draw2d(apply(piechart_description, desc)) $
1314 wxpiechart([desc]) := wxdraw2d(apply(piechart_description, desc)) $
1318 /* Plots box-whisker diagrams. Argument 'm' must be a list of numbers or a matrix. */
1319 /* The second and consecutive arguments are specific options. */
1321 /* Specific options: */
1322 /* - box_width (3/4): widths for boxes */
1323 /* - box_orientation (vertical): 'vertical' or 'horizontal' */
1324 /* - range (inf): sets outliers boundaries */
1325 /* - outliers_size (1): outliers size */
1327 /* Draw options affecting this object: */
1331 /* This object modifies the following options: */
1332 /* - points_joined */
1339 boxplot_description(m,[select]):=
1340 block([fr,tot,top:0,bot:inf,before,localopts,p,out:[],
1341 /* specific options*/
1342 my_box_width:3/4, my_box_orientation:'vertical, my_range:'inf, my_outliers_size:1],
1343 [before, localopts]: extract_options(select,'box_width,'box_orientation,'range,'outliers_size),
1345 for k in localopts do
1346 if lhs(k) = 'box_width
1347 then my_box_width: float(rhs(k))
1348 elseif lhs(k) = 'box_orientation
1349 then my_box_orientation: rhs(k)
1350 elseif lhs(k) = 'range
1351 then my_range: float(rhs(k))
1352 elseif lhs(k) = 'outliers_size
1353 then my_outliers_size: float(rhs(k)),
1355 if not floatp(my_box_width) or my_box_width < 0 or my_box_width > 1
1356 then error("boxplot: illegal value for option box_width"),
1357 if not member(my_box_orientation, ['vertical, 'horizontal])
1358 then error("boxplot: illegal value for option box_orientation"),
1359 if my_range # 'inf and (not floatp(my_range) or my_range < 0)
1360 then error("boxplot: illegal value for option range"),
1361 if not floatp(my_outliers_size) or my_outliers_size < 0
1362 then error("boxplot: illegal outliers point size"),
1364 /* if m is not a row matrix, transpose it */
1365 if matrixp(m) and length(m)>1
1366 then m: transpose(m),
1367 /* if m is a list of numbers, transform it */
1368 /* to the form [[n1,n2,....]] */
1369 if listofnumbersp(m) then m: [m],
1372 if listoflistsp(m) or matrixp(m)
1376 ['points_joined = true,
1378 'point_type = 'dot],
1384 q1: float(quantile(m[x],0.25)),
1385 q2: float(quantile(m[x],0.5)),
1386 q3: float(quantile(m[x],0.75)),
1387 w: float(boxplot_width),
1390 A,B,C,D,E,F,G,H,I,J,K,L,M,N],
1391 top: max(top, maxi),
1392 bot: min(bot, mini),
1394 then /* ignore possible outliers */
1395 [mi, ma]: float([mini, maxi])
1396 else /* calculate whisker positions */
1398 lowlim: q1-my_range*(q3-q1),
1399 upplim: q3+my_range*(q3-q1)],
1401 if k < lowlim or k > upplim
1402 then /* accumulate new outliers to global variable out */
1403 out: cons([x,k], out)
1404 else in: cons(k, in),
1407 A: [x-w2,q1], B: [x-w2,q3], C: [x+w2,q3],
1408 D: [x+w2,q1], E: [x-w2,q2], F: [x+w2,q2],
1409 G: [x,q3], H: [x,ma], I: [x-w4,ma],
1410 J: [x+w4,ma], K: [x,q1], L: [x,mi],
1411 M: [x-w4,mi], N: [x+w4,mi],
1412 p: [A,B,C,D,E,F,G,H,I,J,K,L,M,N],
1413 if my_box_orientation = 'horizontal
1414 then p: map(reverse,p),
1415 [ points([p[1],p[2],p[3],p[4],p[1]]),
1416 points([p[5],p[6]]),
1417 points([p[7],p[8]]),
1418 points([p[9],p[10]]),
1419 points([p[11],p[12]]),
1420 points([p[13],p[14]]) ]),
1423 /* plot all outliers as isolated points */
1425 then (if my_box_orientation = 'horizontal
1426 then out: map(reverse,out),
1427 ['points_joined = false,
1428 'point_size = my_outliers_size,
1429 'point_type = 'circle,
1433 if my_box_orientation = 'vertical
1434 then [ 'xtics = setify(makelist(k,k,1,tot)),
1435 'xrange = (tot-1)*0.05*[-1,+1.5]+[0.5,tot+0.5],
1436 'yrange = (top-bot)*0.05*[-1,+1]+[bot,top] ]
1437 else [ 'ytics = setify(makelist(k,k,1,tot)),
1438 'yrange = (tot-1)*0.05*[-1,+1.5]+[0.5,tot+0.5],
1439 'xrange = (top-bot)*0.05*[-1,+1]+[bot,top] ] ])
1440 else error("sorry, can't plot the box-whisker plot for these data"))$
1442 boxplot([desc]) := draw2d(apply(boxplot_description, desc)) $
1443 wxboxplot([desc]) := wxdraw2d(apply(boxplot_description, desc)) $
1447 /* Plots stem and leaf diagrams. Argument 'm' must be a list, a one */
1448 /* column matrix or a one row matrix. */
1450 /* Specific options: */
1451 /* - leaf_unit (1): indicates the unit of the leaves; must be a */
1453 stemplot(m,[select]):=
1454 block([localopts,d,s,si,l,lf,index,n,key,offset,
1455 /* specific options*/
1457 [select, localopts]: extract_options(select,'leaf_unit),
1459 for k in localopts do
1460 if lhs(k) = 'leaf_unit then my_leaf_unit: rhs(k),
1462 if numberp(my_leaf_unit) and my_leaf_unit > 0
1463 then my_leaf_unit: 10^round(log(my_leaf_unit)/log(10))
1464 else error("stemplot: illegal value for option leaf_unit"),
1466 if listofexpr(m) or matrixp(m) and (length(m)=1 or length(m[1])=1)
1467 then (/* transform input data into a list */
1471 else m: transpose(m)[1],
1473 d: my_leaf_unit*map('round,m/my_leaf_unit),
1474 s: map('floor,d/(10*my_leaf_unit)),
1475 l: map('floor,d/my_leaf_unit),
1477 si: listify(setify(s)),
1478 lf: makelist([],i,1,length(si)),
1479 for k:1 thru length(s) do (
1481 while s[k]>si[index] do index:index+1,
1482 lf[index]:append(lf[index],[l[k]]) ),
1484 offset: slength(concat("",si[length(si)])),
1485 for k:1 thru length(si) do (
1487 for n:1 thru length(lf[k]) do lv:concat(lv,lf[k][n]),
1488 printf(true,concat("~",offset,"d|",lv,"~%"),si[k]) ),
1489 key: 63*my_leaf_unit,
1490 print("key: 6|3 = ",if my_leaf_unit<1 then float(key) else key),
1492 else error("stemplot: can't plot the stemplot for these data") )$
1496 /* Plots star charts for discrete or categorical data, both for one or more*/
1497 /* samples. This function admits an arbitrary number of arguments. */
1498 /* The first arguments are lists or matrices with sample data. The rest of */
1499 /* arguments are specific options in equation format (option = value). */
1501 /* Specific options: */
1502 /* - stars_colors ([]): list of colors for stars. If the list is shorter*/
1503 /* than the number of samples, colors are generated randomly. */
1504 /* - frequency (absolute): 'absolute' or 'relative'. */
1505 /* - ordering (orderlessp): 'orderlessp' or 'ordergreatp'. */
1506 /* - sample_keys ([]): entries for legend. */
1507 /* - star_center ([0,0]): a pair of numbers */
1508 /* - star_radius (1): a positive number. */
1509 /* Draw options affecting this object: */
1513 /* starplot modifies the following options: */
1515 starplot_description([args]):=
1516 block([lastsample: 0, nargs: length(args), freqs: [], samplespace,
1517 sspacesize, nsamples, before, localopts, maxfreq, angle, cpnts,
1518 /* specific options */
1519 my_stars_colors: [], my_frequency: 'absolute, my_ordering: 'orderlessp,
1520 my_sample_keys: [], my_star_center: [0,0], my_star_radius: 1],
1522 /* looks for data */
1523 for i:1 thru nargs while (listp(args[i]) or matrixp(args[i])) do
1524 lastsample: lastsample + 1,
1526 [before, localopts]:
1528 makelist(args[k],k,lastsample+1,length(args)),
1529 'stars_colors,'frequency,'ordering,'sample_keys,'star_center,'star_radius),
1531 for k in localopts do
1532 if lhs(k) = 'stars_colors
1533 then my_stars_colors: rhs(k)
1534 elseif lhs(k) = 'frequency
1535 then my_frequency: rhs(k)
1536 elseif lhs(k) = 'ordering
1537 then my_ordering: rhs(k)
1538 elseif lhs(k) = 'sample_keys
1539 then my_sample_keys: rhs(k)
1540 elseif lhs(k) = 'star_center
1541 then my_star_center: float(rhs(k))
1542 elseif lhs(k) = 'star_radius
1543 then my_star_radius: float(rhs(k)),
1545 if not listp(my_stars_colors)
1546 then error("starplot: illegal value for stars_colors"),
1547 if not member(my_frequency, ['absolute, 'relative])
1548 then error("starplot: frenquency must be either absolute, relative or percent"),
1549 if not member(my_ordering, [orderlessp, ordergreatp])
1550 then error("starplot: illegal value for ordering"),
1551 if not (my_sample_keys = []
1552 or listp(my_sample_keys) and every('stringp, my_sample_keys))
1553 then error("starplot: illegal value for sample_keys"),
1554 if not numberp(my_star_radius) or my_star_radius <= 0
1555 then error("starplot: radius must be greater than zero"),
1556 if not listp(my_star_center) or length(my_star_center) # 2 or
1557 not every(numberp, my_star_center)
1558 then error("starplot: center must be a list of numbers"),
1560 /* get absolute frequencies */
1561 for k: 1 thru lastsample do (
1563 then freqs: endcons(discrete_freq(args[k]), freqs)
1564 elseif matrixp(args[k])
1565 then for c in args(transpose(args[k])) do
1566 freqs: endcons(discrete_freq(c), freqs)
1567 else error("starplot: unknown data format")),
1568 samplespace: sort(listify(setify(apply('append,map('first,freqs)))), my_ordering),
1569 sspacesize: length(samplespace),
1570 nsamples: length(freqs),
1571 angle: float(2*%pi/sspacesize),
1572 if my_sample_keys = []
1573 then my_sample_keys : makelist("",k,1,nsamples),
1574 if nsamples # length(my_sample_keys)
1575 then error("starplot: incorrect number of elements in sample_keys"),
1577 /* transform to relative frequencies, if necessary */
1578 if member(my_frequency, ['relative, 'percent])
1579 then freqs: makelist(block([ssinv: float(1 / apply("+", second(k)))],
1580 if my_frequency = 'percent then ssinv: 100*ssinv,
1581 [first(k), ssinv*second(k)]),
1583 maxfreq: lmax(flatten(map(second, freqs))),
1585 /* complete my_stars_colors with random colors, if necessary */
1586 if nsamples > length(my_stars_colors)
1587 then my_stars_colors: append(my_stars_colors,
1588 makelist(random_color(),k,length(my_stars_colors)+1,nsamples)),
1590 /* calculate circle points */
1593 [this_ang: k*angle],
1594 [cos(this_ang), sin(this_ang)]),
1597 /* return draw object */
1599 /* draw the radii and the circular grid */
1600 ['points_joined = true, 'point_type = 'dot, 'color = 'black],
1602 map(lambda([z], points([my_star_center, z+my_star_center])),
1603 my_star_radius*cpnts),
1605 [apply(label, maplist(cons,
1606 maplist(string, samplespace),
1607 map(lambda([z], z+my_star_center), 1.05*my_star_radius*cpnts)))],
1610 /* draw the stars */
1612 [ 'color = my_stars_colors[s],
1613 'key = my_sample_keys[s],
1617 pos: ?position(samplespace[k], freqs[s][1]),
1620 else my_star_radius * freqs[s][2][pos] /
1621 maxfreq * cpnts[k] + my_star_center),
1623 points(cons(last(pnts), pnts)) ) ],
1626 starplot([desc]) := draw2d(apply(starplot_description, desc)) $
1627 wxstarplot([desc]) := wxdraw2d(apply(starplot_description, desc)) $
1629 /* find_runs -- find consecutive identical values in an array or list.
1630 * Returns a structure runs(...) with fields lengths and values.
1633 defstruct (runs (lengths, values));
1636 if x = [] then runs ([], [])
1637 else block ([dx : map (lambda ([a, b], is(a # b)), rest(x), rest(x, -1)), ii0, ii, dii],
1638 ii0 : sublist_indices (dx, identity),
1639 ii : append ([0], ii0, [length (x)]),
1640 dii : rest(ii) - rest(ii, -1),
1641 runs (dii, makelist (x[i], i, rest (ii))));
1643 find_runs_inverse (r) :=
1644 block ([v: r@values, n: r@lengths],
1645 apply (append, makelist (makelist (v[i], n[i]), i, 1, length(v))));