3 Copyright (C) 2005-2017 Mario Rodriguez Riotorto
5 This program is free software; you can redistribute
6 it and/or modify it under the terms of the
7 GNU General Public License as published by
8 the Free Software Foundation; either version 2
9 of the License, or (at your option) any later version.
11 This program is distributed in the hope that it
12 will be useful, but WITHOUT ANY WARRANTY;
13 without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details at
16 http://www.gnu.org/copyleft/gpl.html
22 This is a set of Maxima functions for descriptive statistics.
24 This library supports two types of data:
25 a) lists storing univariate samples, like [34,23,76,45,32,...]
31 b) matrices storing multivariate samples, like
32 matrix([34,44,23],[87,23,54],....); in this case, the number
33 of columns equals the dimension of the multivariate random
34 variable, and the number of rows is the sample size.
36 (%i2) matrix([a,b],[c,d],[e,f]);
44 (%o3) [---------, ---------]
47 Lists of multiple samples with equal or different sizes are not
48 directly supported, but you can use the function 'map' as in the
50 (%i4) map(mean,[[a,b,c],[a,b]]);
52 (%o4) [---------, -----]
55 These are the functions implemented in this library,
56 (see comments bellow for interpretation):
60 continuous_freq: frequencies for continuous data
61 discrete_freq: frequencies for discrete data
62 subsample: subsample extraction
64 Univariate descriptive statistics:
66 smin: sample minimum value
67 smax: sample maximum value
69 noncentral_moment: non central moment
70 central_moment: central moment
71 var: variance (divided by n)
72 std: standard deviation based on var
73 var1: variance (divided by n-1)
74 std1: standard deviation based on var1
77 qrange: interquartilic range
78 skewness: skewness coefficient
79 kurtosis: kurtosis coefficient
80 harmonic_mean: harmonic mean
81 geometric_mean: geometric mean
82 cv: variation coefficient
83 mean_deviation: mean deviation
84 median_deviation: median deviation
85 pearson_skewness: Pearson's skewness coefficient
86 quartile_skewness: quartilic skewness coefficient
87 km: Kaplan-Meier estimator of the survival function
88 cdf_empirical: empirical cumulative distribution function
90 Multivariate descriptive statistics:
91 cov: covariance matrix (divided by n)
92 cov1: covariance matrix (divided by n-1)
93 cor: correlation matrix
94 global_variances: gives a list with
98 generalized standard deviation
100 effective standard deviation
101 list_correlations: gives a list with
103 multiple correlation coefficients
104 partial correlation coefficients
107 Statistical diagrams:
117 Johnson, A.J., Wichern, D.W. (1998) Applied Multivariate Statistical
118 Analysis. Prentice Hall.
119 Pe~na, D. (2002) An'alisis de datos multivariantes. McGraw-Hill.
121 Thanks to Robert Dodier and Barton Willis for their help.
123 For questions, suggestions, bugs and the like, feel free
126 riotorto AT yahoo DOT com
127 http://tecnostats.net
132 put('descriptive, 1, 'version) $
134 if not get('draw,'version) then load("draw") $
136 load("descriptive_util.lisp");
139 /* AUXILIARY FUCTIONS */
143 /* Computes the trace of a matrix */
144 matrixtrace(m):=block([n:length(m)],
145 if matrixp(m) and n=length(m[1])
146 then apply("+",makelist(m[i,i],i,1,n)))$
149 /* True if the argument is a list of numbers, false otherwise. */
150 listofnumbersp(y):=listp(y) and every('identity,map('numberp,y))$
153 /* True if the argument is a list containing */
154 /* no lists, false otherwise. */
155 listofexpr(y):=listp(y) and not some('identity,map('listp,y))$
158 /* True if the argument is a list of lists containing only numbers, */
159 /* false otherwise. */
160 listoflistsp(y):=listp(y) and every('identity,map('listofnumbersp,y))$
163 /* True if the argument is a list of lists, all of */
164 /* them of equal size. */
165 listsofequalsize(y) :=
168 every(lambda([z], length(z) = length(y[1])), y) $
172 /* DATA TRANSFORMATION */
175 /* Sub-sample matrix selection. */
176 /* Example: subsample(m,lambda([v],v[1]<3 and v[4]=A),3,2) */
177 /* gives the 3rd an 2nd components, in this order, of */
178 /* those rows of matrix m whose first component is */
179 /* less than 3 and fourth component equals A. */
180 subsample(mat,cond,[cols]):=
181 block([tempvect, tempmat:[]],
183 then cols: makelist(i,i,1,length(mat[1])),
189 tempvect: endcons(obs[i], tempvect),
190 tempmat: endcons(tempvect, tempmat)),
191 apply('matrix, tempmat))$
194 /* Subtract the mean and divide by the standard deviation */
197 then (m-mean(m)) / std(m)
198 elseif listoflistsp(m)
199 then makelist(standardize(k), k, m)
201 then block([av, dev, m2: copymatrix(m)],
204 for k:1 thru length(m2) do m2[k] : (m2[k]-av) / dev,
206 else error("standardize: unknown data format") $
209 /* Returns a new matrix with transformed, repeated, reordered, or removed columns */
210 transform_sample(m, varnames, expressions) :=
213 then (tm: args(transpose(m)),
214 transpose(apply('matrix, map('ev, psubst(map("=",varnames,tm), expressions)))))
215 else error("transform_sample: argument must be a matrix")) $
218 /* Builds a sample from a table of absolute frequencies. */
219 /* The input table can be a matrix or a list of lists, all of */
220 /* them of equal size. The number of columns or the length of */
221 /* the lists must be greater than 1. The last element of each */
222 /* row or list is interpreted as the absolute frequency. */
223 /* The output is always a sample in matrix form */
228 elseif not listsofequalsize(tbl)
229 then error("Table frequency has not the correct format"),
233 block([butlast: reverse(rest(reverse(a)))],
234 makelist(butlast,i,1,last(a)))),tbl)))) $
241 /* FREQUENCY COUNTERS */
244 /* Divides the range in intervals and counts how many values */
245 /* are inside them. The second argument is optional and either */
246 /* equals the number of classes we want; 10 by default, OR */
247 /* equals a list containing the class limits and the number of */
248 /* classes we want, OR a list containing only the limits. */
249 /* If sample values are all equal, this function returns only */
250 /* one class of amplitude 2 */
251 continuous_freq (lis, [opt]) :=
253 then apply (continuous_freq_array, cons (fillarray (make_array (any, length (lis)), lis), opt))
255 then apply (continuous_freq_array, cons (lis, opt))
256 else error ("continuous_freq: argument must be a list or Lisp array; found: ", lis);
258 continuous_freq_array (lis,[opt]):=block([nc,mini,maxi,lim,amp,fr,ult,n,k,index,bins],
260 then [[minf, inf], [0]]
261 elseif length(opt) > 0 and setp(opt[1])
262 then /* arbitrary bins */
263 ( lim: sort(listify(opt[1])),
267 then error("continuous_freq: at least two bin numbers are needed.")
268 else ( bins: makelist([lim[i], lim[i + 1]], i, 1, length(lim) - 1),
269 fr: count_array_by_bins (lis, bins),
271 else /* bins of equal amplitude */
273 then /* either a list comprising min, max, and number of classes,
274 * or a list comprising just min and max,
275 * or a number of classes
277 (if length(opt) = 1 and listp(opt[1])
278 then (if length(opt[1]) = 2 or length(opt[1]) = 3
279 then ([mini, maxi]: [opt[1][1], opt[1][2]],
280 if length(opt[1]) = 3
283 elseif length(opt) = 1 and integerp(opt[1])
284 then ([mini, maxi]: vector_min_max (lis),
287 error ("continuous_freq: unrecognized optional argument:", opt))
288 else /* default classes */
289 ([mini, maxi] : vector_min_max (lis),
294 then [[mini-1,maxi+1],[?length(lis)]]
295 else ( for i:1 thru nc do lim:endcons(mini+amp*i,lim),
296 bins : makelist ([lim[i], lim[i + 1]], i, 1, length(lim) - 1),
297 fr : count_array_by_bins (lis, bins),
300 count_by_bins (xx, bins) :=
302 then count_array_by_bins (fillarray (make_array (any, length (xx)), xx), bins)
304 then count_array_by_bins (xx, bins);
306 count_array_by_bins (xx, bins) := block ([counts : makelist (0, length (bins))],
307 xx : ?sort (?copy\-seq (xx), 'orderlessp),
308 for k thru length (bins)
309 do block ([i_first : find_index_first (xx, bins[k][1],
310 if k > 1 and bins[k][1] = bins[k - 1][2] then ">" else ">="),
311 i_last : find_index_last (xx, bins[k][2], "<=")],
312 counts[k] : if i_last = false or i_first = false then 0 else i_last - i_first + 1),
315 /* assume xx is a sorted Lisp array; find least i s.t. xx[i] > x or xx[i] >= x */
317 find_index_first (xx, x, comparison) := block ([n : ?length (xx)],
318 if comparison(xx[n - 1], x)
319 then if comparison(xx[0], x)
321 else find_index_first_1 (xx, x, 0, n - 1, comparison));
323 find_index_first_1 (xx, x, i0, i1, comparison) :=
324 if i1 - i0 <= 1 then i1
326 block ([i : floor (i0 + (i1 - i0) / 2)],
327 if comparison(xx[i], x)
328 then find_index_first_1 (xx, x, i0, i, comparison)
329 else find_index_first_1 (xx, x, i, i1, comparison));
331 /* assume xx is a sorted Lisp array; find greatest i s.t. xx[i] < x or xx[i] <= x */
333 find_index_last (xx, x, comparison) := block ([n : ?length (xx)],
334 if comparison(xx[0], x)
335 then if comparison(xx[n - 1], x)
337 else find_index_last_1 (xx, x, 0, n - 1, comparison));
339 find_index_last_1 (xx, x, i0, i1, comparison) :=
340 if i1 - i0 <= 1 then i0
342 block ([i : floor (i0 + (i1 - i0) / 2)],
343 if comparison(xx[i], x)
344 then find_index_last_1 (xx, x, i, i1, comparison)
345 else find_index_last_1 (xx, x, i0, i, comparison));
348 /* Counts the frequency of each element in 'lis', its elements */
349 /* can be numbers, Maxima expressions or strings. */
353 then discrete_freq_array (fillarray (make_array (any, length (l)), l))
355 then discrete_freq_array (l)
356 else error ("discrete_freq: argument must be a list or Lisp array; found: ", l);
358 discrete_freq_array (a) := block ([u],
359 a : ?sort (?copy\-seq (a), 'orderlessp),
360 u : unique_in_sorted_array (a),
361 [u, map (lambda ([u1], find_index_last (a, u1, lambda ([x,y], not ordergreatp(x, y)))
362 - find_index_first (a, u1, lambda([x,y], not orderlessp(x, y)))
366 /* UNIVARIATE DESCRIPTIVE STATISTICS */
369 /* Arithmetic mean */
370 mean(x) := block([listarith:true],
371 if listofexpr(x) or matrixp(x)
372 then apply("+", args(x)) / length(x)
373 else error("Input to 'mean' must be a list of expressions or a matrix"))$
379 then (t:transpose(x),
380 makelist(lmin(t[i]),i,1,length(t)))
381 else if listofexpr(x)
383 else error("Input to 'smin' must be a list of expressions or a matrix"))$
389 then (t:transpose(x),
390 makelist(lmax(t[i]),i,1,length(t)))
391 else if listofexpr(x)
393 else error("Input to 'smax' must be a list of expressions or a matrix"))$
396 /* mini and maxi are maintained for backward compatibility,
397 but removed from documentation. */
404 then (t:transpose(x),
405 makelist(range(t[i]),i,1,length(t)))
406 else if listofexpr(x)
407 then lmax(x) - lmin(x)
408 else error("Input to 'range' must be a list of expressions or a matrix"))$
411 /* Non central moment of order m */
412 noncentral_moment(x,m) := block([listarith:true],
413 if listofexpr(x) or matrixp(x)
414 then apply("+", args(x)^m) / length(x)
415 else error("Input to 'noncentral_moment' must be a list of expressions or a matrix"))$
418 /* Central moment of order m */
419 central_moment(x,m):=block([n:length(x),listarith:true,me:mean(x)],
421 then apply("+",makelist((x[i]-me)^m,i,1,n)) / n
422 else if listofexpr(x)
423 then apply("+",(x-me)^m) / n
424 else error("Input to 'central_moment' must be a list of expressions or a matrix"))$
427 /* Maximum likelihood estimator of variance */
428 var(x):=central_moment(x,2)$
431 /* Standard deviation as the root square of var */
432 std(x):=block([listarith:true],sqrt(var(x)))$
435 /* Unbiased estimator of variance (divided by n-1) */
436 var1(x):=block([n:length(x),listarith:true],var(x)*n/(n-1))$
439 /* Standard deviation as the root square of var1 */
440 std1(x):=block([listarith:true],sqrt(var1(x)))$
444 median(x):=block([n,s,t],
450 else (s[n/2] + s[n/2 + 1]) / 2 )
452 then (t:transpose(x),
453 makelist(median(t[i]),i,1,length(t)))
454 else error("Input to 'median' must be a list of expressions or a matrix"))$
457 /* p-quantile, with 0<=p<=1. Linear interpolation */
458 quantile(x,p):=block([n,s,pos,int,dif,t],
459 if numberp(p) and p>=0 and p<=1
460 then if listofexpr(x)
468 else (1-dif)*s[int]+dif*s[int+1])
470 then (t:transpose(x),
471 makelist(quantile(t[i],p),i,1,length(t)))
472 else error("First argument of 'quantile' must be a list of expressions or a matrix")
473 else error("Second argument of 'quantile' must be a probability") )$
476 /* Interquartilic range */
477 qrange(x):=block([t],
479 then (t:transpose(x),
480 makelist(qrange(t[i]),i,1,length(t)))
481 else if listofexpr(x)
482 then quantile(x,3/4)-quantile(x,1/4)
483 else error("Input to 'qrange' must be a list of expressions or a matrix"))$
486 /* Skewness coefficient */
487 skewness(x):=block([listarith:true],central_moment(x,3)/std(x)^3)$
490 /* Kurtosis coefficient, sometimes called kurtosis excess (see the -3) */
491 kurtosis(x):=block([listarith:true],central_moment(x,4)/var(x)^2 - 3)$
495 harmonic_mean(x):=block([listarith:true],
496 if listofexpr(x) or matrixp(x)
497 then length(x) / apply("+", 1/args(x))
498 else error("Input to 'harmonic_mean' must be a list of expressions or a matrix"))$
502 geometric_mean(x):=block([listarith:true],
503 if listofexpr(x) or matrixp(x)
504 then apply("*", args(x))^(1/length(x))
505 else error("Input to 'geometric_mean' must be a list of expressions or a matrix"))$
508 /* Variation coefficient */
509 cv(x):=block([listarith:true], std(x) / mean(x))$
513 mean_deviation(x):=block([t,listarith:true],
515 then (t:transpose(x),
516 makelist(mean(abs(t[i]-mean(t[i]))),i,1,length(t)))
517 else if listofexpr(x)
518 then mean(abs(x-mean(x)))
519 else error("Input to 'mean_deviation' must be a list of expressions or a matrix"))$
522 /* Median deviation */
523 median_deviation(x):=block([t,listarith:true],
525 then (t:transpose(x),
526 makelist(median(abs(t[i]-median(t[i]))),i,1,length(t)))
527 else if listofexpr(x)
528 then median(abs(x-median(x)))
529 else error("Input to 'median_deviation' must be a list of expressions or a matrix"))$
532 /* Pearson's skewness */
533 pearson_skewness(x):=block([t,listarith:true],
535 then (t:transpose(x),
536 3*makelist((mean(t[i])-median(t[i]))/std1(t[i]),i,1,length(t)))
537 else if listofexpr(x)
538 then 3*(mean(x)-median(x))/std1(x)
539 else error("Input to 'pearson_skewness' must be a list of expressions or a matrix"))$
542 /* Quartile skewness */
543 quartile_skewness(x):=block([q1,q2,q3,t],
545 then (t:transpose(x),
546 makelist(quartile_skewness(t[i]),i,1,length(t)))
547 else if listofexpr(x)
548 then (q1:quantile(x,1/4),
551 (q3-2*q2+q1)/(q3-q1))
552 else error("Input to 'quartile_skewness' must be a list of expressions or a matrix"))$
555 /* Kaplan-Meier estimator of the survival function S(x) = 1-F(x). */
556 /* First argument x is a list of pairs or a two column matrix. The first */
557 /* component is the observed time, and the second component a censoring */
558 /* index (1=non censored, 0=right censored). */
559 /* The second optional argument of function km is the name of the variable, */
560 /* which is x by default. Example calls are: */
561 /* km([[2,1], [3,1], [5,0], [8,1]]); */
562 /* km(matrix([2,1], [3,1], [5,0], [8,1]), 't); */
564 block([s,ss,t,st,n,d,c,i:2,idx:1,hatS,vn],
565 if length(var) > 0 and atom(var[1])
572 t: subsample(s,lambda([z], second(z)=1),1),
574 then /* only censored data */
576 t: append([0],listify(setify(first(transpose(t))))),
583 (n[i]: n[i-1] - d[i-1] - c[i-1],
584 while idx <= ss and s[idx][1] <= t[i] do
587 elseif s[idx][1] < t[i]
594 hatS[k]: hatS[k-1] * hatS[k],
596 sum(charfun(t[i-1] <= vn and vn < t[i]) * hatS[i-1], i, 2, st) +
597 charfun(t[st] <= vn) * hatS[st] ) $
600 /* Empirical distribution function F(x). */
601 /* First argument x is a list of numbers or a one column matrix. */
602 /* The second optional argument is the name of the variable, x by default. */
603 /* Example calls are: */
604 /* cdf_empirical([1,3,3,5,7,7,7,8,9]); */
605 /* cdf_empirical(transpose(matrix([1,3,3,5,7,7,7,8,9])), 'u); */
606 cdf_empirical(x,[var]):=
607 block([vn,tb,hatF:0,actual,i:1],
608 if length(var) > 0 and atom(var[1])
612 then if length(x[1])=1
613 then x: sort(args(transpose(x)))
614 else error("cdf_empirical: input matrix must have only one column")
616 tb: discrete_freq(x),
617 apply("+", map(lambda([z1,z2], z2*charfun(vn >= z1)),first(tb),second(tb)))/length(x)) $
625 /* MULTIVARIATE DESCRIPTIVE STATISTICS */
628 /* Covariance matrix */
629 cov(x):=block([n:length(x),dim,m,xi,sum],
631 then error("cov: the argument is not a matrix")
632 else (m:matrix(mean(x)),
634 sum:zeromatrix(dim,dim),
637 sum:sum+transpose(xi).xi),
638 sum/n - transpose(m).m) )$
641 /* Covariance matrix (divided by n-1). The argument x must be a matrix */
642 cov1(x):=block([n:length(x)], cov(x)*n/(n-1))$
645 /* A list of global variation measures. The argument x must be a matrix */
646 /* 1) total variance */
647 /* 2) mean variance */
648 /* 3) generalized variance */
649 /* 4) generalized standard deviation */
650 /* 5) effective variance */
651 /* 6) effective standard deviation */
652 /* Admits the following options: */
653 /* 'data='true: x stores sampled data and the covariance matrix 'cov1' */
654 /* must be computed; if false, x is the covariance matrix and 'cov1' */
655 /* is not recalculated. */
656 global_variances(x,[select]):=
657 block([s,p,aux,options,defaults,out:[]],
659 /* this is for backcompatibility */
660 if length(select)=1 and member(select[1], [true,false])
661 then select: ['data=select[1]],
663 /* check user options */
667 aux: ?position(lhs(i),options),
668 if numberp(aux) and aux <= length(options) and aux >= 1
669 then defaults[aux]: rhs(i)),
672 then(if defaults[1] /* does the matrix contain sample records? */
675 p:length(s), /* dimension */
677 out:cons(aux,out), /* total variance */
678 out:endcons(aux/p,out), /* mean variance */
680 out:endcons(aux,out), /* generalized variance */
681 out:endcons(sqrt(aux),out), /* generalized standard deviation */
683 out:endcons(aux,out), /* effective variance */
684 out:endcons(sqrt(aux),out), /* effective standard deviation */
686 else error("global_variances: the argument is not a matrix") )$
689 /* Correlation matrix. The argument x must be a matrix. */
690 /* cor(x,false)==> x is the covariance matrix and 'cov1' */
691 /* is not recalculated */
692 /* Admits the following options: */
693 /* 'data='true: x stores sampled data and the covariance matrix 'cov1' */
694 /* must be computed; if false, x is the covariance matrix and 'cov1' */
695 /* is not recalculated. */
697 block([m,s,d,defaults,options],
699 /* this is for backcompatibility */
700 if length(select)=1 and member(select[1], [true,false])
701 then select: ['data=select[1]],
703 /* check user options */
707 aux: ?position(lhs(i),options),
708 if numberp(aux) and aux <= length(options) and aux >= 1
709 then defaults[aux]: rhs(i)),
712 then(if defaults[1] /* does the matrix contain sample records? */
716 d:sqrt(sum(ematrix(m,m,1/s[i,i],i,i),i,1,m)),
718 else error("cor: the argument is not a matrix") )$
721 /* Returns a list of dependence measures. The argument x must be a matrix. */
722 /* 1) precision matrix */
723 /* 2) multiple correlation */
724 /* 3) partial correlation */
725 /* Admits the following options: */
726 /* 'data='true: x stores sampled data and the covariance matrix 'cov1' */
727 /* must be computed; if false, x is the covariance matrix and 'cov1' */
728 /* is not recalculated. */
729 list_correlations(x,[select]):=
730 block([s,p,s1,d,options,defaults,out:[],listarith:true],
732 /* this is for backcompatibility */
733 if length(select)=1 and member(select[1], [true,false])
734 then select: ['data=select[1]],
736 /* check user options */
740 aux: ?position(lhs(i),options),
741 if numberp(aux) and aux <= length(options) and aux >= 1
742 then defaults[aux]: rhs(i)),
745 then(if defaults[1] /* does the matrix contain sample records? */
748 p:length(s), /* dimension */
751 for i:1 thru p do d[i,i]:1/sqrt(s1[i,i]),
752 [s1, /* precision matrix */
753 1-1/makelist(s[i,i]*s1[i,i],i,1,p), /* mult. corr. */
754 -d.s1.d] /* part. corr. */ )
755 else error("list_correlations: the argument is not a matrix"))$
758 /* Principal components analysis for multivariate data. */
759 /* The argument x must be a matrix. This function returns a list containing: */
760 /* 1) Variances of the principal components in descending order */
761 /* 2) Proportions (%) of total variance explained by principal components */
762 /* 3) Matrix of coefficients for principal components */
763 /* Admits the following options: */
764 /* 'data='true: x stores sampled data and the covariance matrix 'cov1' must be */
765 /* computed; if false, x is the covariance matrix and 'cov1' is not */
767 principal_components(x,[select]):=
768 block([options,defaults,aux,s,p,val,vec,ord,percents,listarith:true],
770 /* check user options */
774 aux: ?position(lhs(i),options),
775 if numberp(aux) and aux <= length(options) and aux >= 1
776 then defaults[aux]: rhs(i)),
778 /* go ahead with calculations */
780 then(if defaults[1] /* does the matrix contain sample records? */
783 p:length(s), /* sample dimension */
784 [val,vec]: eigens_by_jacobi(s),
785 ord: sort(makelist([val[k], col(vec,k)], k, p), lambda([u,v], first(u)>=first(v))),
786 val: map(first, ord),
787 vec: apply(addcol, map(second, ord)),
788 percents: 100 * val / apply("+", val),
789 [val, percents, vec])
790 else error("principal_components: the argument is not a matrix"))$
797 /* PLOTTING FUNCTIONS */
801 block([obase : 16, col : "#"],
804 block([sz : concat(random(16))],
805 substring(sz, slength(sz)))),
809 extract_options(s,[mo]):=
810 block([ss:[],mmo:[]],
812 if member(lhs(k), mo)
813 then mmo: endcons(k,mmo)
814 else ss : endcons(k,ss),
818 /* Plots scatter diagrams. */
819 scatterplot_description(m,[select]):=
821 [select, localopts]: extract_options(select,'nclasses,'htics,'frequency),
822 if listofnumbersp(m) or matrixp(m) and (length(m)=1 or length(m[1])=1)
823 then (/* m is a list of numbers or a column or a row matrix */
827 else m: transpose(m)[1],
828 gr2d(select, points(makelist([x,0],x,m))))
829 elseif listp(m) and every('identity,map(lambda([z],listofnumbersp(z) and length(z)=2),m)) or
830 matrixp(m) and length(m[1])=2 then
831 /* m is a two-dimensional sample */
832 gr2d(select, points(args(m)))
833 elseif matrixp(m) and length(m[1])>2 then
834 /* m is an d-dimensional (d>2) sample */
835 block([n: length(m[1]), gr],
841 then gr2d(apply('histogram_description,
842 append([col(m,i)],select,localopts)))
843 else apply('scatterplot_description,
844 append([subsample(m,lambda([v],true),i,j)],select)) ,
847 else error("sorry, can't plot the scatter diagram for these data")) $
848 scatterplot([desc]) := draw(apply('scatterplot_description, desc)) $
849 wxscatterplot([desc]) := wxdraw(apply('scatterplot_description, desc)) $
853 /* Histograms. Argument 'm' must be a list, a one column matrix or a */
854 /* one row matrix. */
856 /* Specific options (defaults in parentheses): */
857 /* - nclasses (10): number of classes, a positive integer, or */
858 /* a set of bin numbers. Also, it can be given */
859 /* the name of one of the three optimal algo- */
860 /* rithms available: 'fd, 'scott, or 'sturges. */
861 /* - frequency (absolute): 'absolute', 'relative', 'density' or */
863 /* - htics (auto): 'auto, 'endpoints, 'intervals, list of labels */
865 /* Draw options affecting this object: */
867 /* - color (used for labels) */
871 /* histogram modifies the following options: */
875 histogram_description(m,[select]):=
876 block([fr,amp,scen,before,localopts,num_classes,v,lbels,
877 /* specific options */
878 my_nclasses:10, my_frequency:'absolute, my_htics:'auto],
879 [before, localopts]: extract_options(select,'nclasses,'frequency,'htics),
881 for k in localopts do
882 if lhs(k) = 'nclasses
883 then my_nclasses: rhs(k)
884 elseif lhs(k) = 'frequency
885 then my_frequency: rhs(k)
886 elseif lhs(k) = 'htics
887 then my_htics: rhs(k),
889 if listp(my_nclasses)
890 then ( v: length(my_nclasses),
894 then num_classes: third(my_nclasses)
895 else error("histogram: the second argument is not a list of two or three elements") )
896 elseif setp(my_nclasses)
897 then ( num_classes: length(my_nclasses)-1,
899 then error("histogram: at least two bin numbers are needed"))
900 elseif integerp(my_nclasses) and my_nclasses < 1
901 then error("histogram: number of classes must be an integer greater than two")
902 elseif integerp(my_nclasses) and my_nclasses > 0
903 then num_classes: my_nclasses
904 elseif not member(my_nclasses, ['fd, 'scott, 'sturges])
905 /* for optimal cases, computation of num_classes is delayed */
906 then error("histogram: incorrect format for option nclasses"),
907 if not member(my_frequency, ['absolute, 'relative, 'percent, 'density])
908 then error("histogram: frenquency must be either absolute, relative, density or percent"),
909 if not member(my_htics, ['auto, 'endpoints, 'intervals]) and
911 then error("histogram: incorrect format in option htics"),
913 /* if m is a list of numbers or a column or a row matrix then */
914 /* plots a simple histogram. */
915 if listofnumbersp(m) or matrixp(m) and (length(m)=1 or length(m[1])=1)
916 then (/* transform input data into a list */
920 else m: transpose(m)[1],
922 /* frequency table */
923 /* first, calculate num_clases for the optimal cases */
924 if my_nclasses = 'fd /* Robust Freedman - Diaconis method */
927 then h: 2 * median(abs(m-median(m))), /* twice median absolute deviation */
929 then my_nclasses: num_classes: ceiling(range(m) / (2 * h * length(m)^(-1/3)))
930 else my_nclasses: num_classes: 1 ),
931 if my_nclasses = 'scott /* Scott's method to be applied under Gaussian assumptions */
932 then ( if length(m) = 1
933 then error("histogram: with Scott's method, sample size must be greater than 1"),
934 h: 3.5 * std1(m) * length(m)^(-1/3),
936 then my_nclasses: num_classes: ceiling(range(m) / h)
937 else my_nclasses: num_classes: 1 ),
938 if my_nclasses = 'sturges /* Sturges' method to be applied under Gaussian assumptions and n < 200 */
939 then my_nclasses: num_classes:
940 ceiling(1 + log(length(m)) / 0.6931471805599453), /* <-- log(2) */
941 fr: float(continuous_freq(m,my_nclasses)),
942 if member(my_frequency, ['relative, 'percent])
943 then fr: [first(fr), second(fr) / apply("+", second(fr))],
944 if my_frequency = 'percent
945 then fr[2]: fr[2] * 100.0,
946 amp: makelist(fr[1][i+1]-fr[1][i], i, 1, length(fr[1])-1),
947 if my_frequency = 'density
948 then fr: [first(fr), second(fr) / apply("+", second(fr) * amp)],
953 elseif my_htics = 'endpoints
954 then my_htics: xtics = setify(fr[1])
955 elseif my_htics = 'intervals
956 then ( lbels: [concat("[",fr[1][1],",",fr[1][2],"]")],
957 lbels: append(lbels, makelist(concat("(",fr[1][k],",",fr[1][k+1],"]"),k,2,num_classes)),
958 lbels: makelist([lbels[k],fr[1][k]+amp[k]/2],k,1,num_classes),
959 my_htics: xtics = makeset([a,b],[a,b],lbels) )
960 elseif listp(my_htics)
961 then ( if length(my_htics) < num_classes
962 then my_htics: append(my_htics, makelist("",k,length(my_htics)+1,num_classes)),
963 lbels: makelist([my_htics[k],fr[1][k]+amp[k]/2],k,1,num_classes),
964 my_htics: xtics = makeset([a,b],[a,b],lbels) ),
967 'xrange = [first(fr[1]), last(fr[1])] + [-1,+1] * mean(amp)/2,
968 'yrange = lmax(fr[2])*[-0.05, 1.05],
971 makelist([(fr[1][k]+fr[1][k+1])/2, fr[2][k], amp[k]],k,1,length(fr[1])-1))])
972 else error("histogram: can't plot the histogram for these data") )$
973 histogram([desc]) := draw2d(apply(histogram_description, desc)) $
974 wxhistogram([desc]) := wxdraw2d(apply(histogram_description, desc)) $
978 /* Plots bar charts for discrete or categorical data, both for one or more */
979 /* samples. This function admits an arbitrary number of arguments. */
980 /* The first arguments are lists or matrices with sample data. The rest of */
981 /* arguments are specific options in equation format (option = value). */
983 /* Specific options: */
984 /* - box_width (3/4): a number between zero and 1. */
985 /* - grouping (clustered): or 'stacked'. */
986 /* - groups_gap (1): distance between clusters, must be a positive */
988 /* - bars_colors ([]): list of colors for bars. If the list is shorter */
989 /* than the number of samples, colors are generated randomly. */
990 /* - frequency (absolute): 'absolute', 'relative' or 'percent'. */
991 /* - ordering (orderlessp): 'orderlessp' or 'ordergreatp'. */
992 /* - sample_keys ([]): entries for legend. */
993 /* - start_at (0): starting point on the x axis. */
995 /* Draw options affecting this object: */
997 /* - color (used for labels) */
1001 /* barsplot modifies the following options: */
1003 barsplot_description([args]):=
1004 block([lastsample: 0, nargs: length(args), freqs: [], samplespace,
1005 sspacesize, nsamples, totalgap, expr, before, localopts,
1006 /* specific options */
1007 my_box_width: 3/4, my_groups_gap: 1, my_frequency: 'absolute,
1008 my_ordering: 'orderlessp, my_bars_colors: [], my_sample_keys: [],
1009 my_grouping: 'clustered, my_start_at: 0 ],
1011 /* looks for data */
1012 for i:1 thru nargs while (listp(args[i]) or matrixp(args[i])) do
1013 lastsample: lastsample + 1,
1015 [before, localopts]:
1017 makelist(args[k],k,lastsample+1,length(args)),
1018 'box_width,'grouping,'groups_gap,'bars_colors,'frequency,'ordering,'sample_keys,'start_at),
1020 for k in localopts do
1021 if lhs(k) = 'box_width
1022 then my_box_width: float(rhs(k))
1023 elseif lhs(k) = 'grouping
1024 then my_grouping: rhs(k)
1025 elseif lhs(k) = 'groups_gap
1026 then my_groups_gap: rhs(k)
1027 elseif lhs(k) = 'bars_colors
1028 then my_bars_colors: rhs(k)
1029 elseif lhs(k) = 'frequency
1030 then my_frequency: rhs(k)
1031 elseif lhs(k) = 'ordering
1032 then my_ordering: rhs(k)
1033 elseif lhs(k) = 'sample_keys
1034 then my_sample_keys: rhs(k)
1035 elseif lhs(k) = 'start_at
1036 then my_start_at: float(rhs(k)),
1038 if my_box_width > 1 or my_box_width < 0
1039 then error("barsplot: illegal value for box_width"),
1040 if not member(my_grouping, ['clustered, 'stacked])
1041 then error("barsplot: unrecognized grouping style"),
1042 if not integerp(my_groups_gap) or my_groups_gap < 1
1043 then error("barsplot: illegal value for groups_gap"),
1044 if not listp(my_bars_colors)
1045 then error("barsplot: illegal value for bars_colors"),
1046 if not member(my_frequency, ['absolute, 'relative, 'percent])
1047 then error("barsplot: frenquency must be either absolute, relative or percent"),
1048 if not member(my_ordering, [orderlessp, ordergreatp])
1049 then error("barsplot: illegal value for ordering"),
1050 if not (my_sample_keys = []
1051 or listp(my_sample_keys) and every('stringp, my_sample_keys))
1052 then error("barsplot: illegal value for sample_keys"),
1053 if not numberp(my_start_at)
1054 then error("barsplot: non numeric value for start_at option"),
1056 /* get absolute frequencies */
1057 for k: 1 thru lastsample do (
1059 then freqs: endcons(discrete_freq(args[k]), freqs)
1060 elseif matrixp(args[k])
1061 then for c in args(transpose(args[k])) do
1062 freqs: endcons(discrete_freq(c), freqs)
1063 else error("barsplot: unknown data format")),
1065 /* transform freqs into a more suitable form */
1066 samplespace: sort(listify(setify(apply('append,map('first,freqs)))), my_ordering),
1067 sspacesize: length(samplespace),
1068 nsamples: length(freqs),
1069 if my_sample_keys = []
1070 then my_sample_keys : makelist("",k,1,nsamples),
1071 if nsamples # length(my_sample_keys)
1072 then error("barsplot: incorrect number of elements in sample_keys"),
1075 block([pos: ?position(k,first(i))],
1078 else second(i)[pos]),
1082 /* transform to relative frequencies, if necessary */
1083 if member(my_frequency, ['relative, 'percent])
1084 then block([samplesizes: apply("+",freqs)],
1085 freqs: map(lambda([z], z/samplesizes), freqs)),
1086 if my_frequency = 'percent
1087 then freqs: 100.0 * freqs,
1089 /* complete my_bars_colors with random colors, if necessary */
1090 if nsamples > length(my_bars_colors)
1091 then my_bars_colors: append(my_bars_colors,
1092 makelist(random_color(),k,length(my_bars_colors)+1,nsamples)),
1093 if my_grouping = 'clustered
1094 then ( /* clustered bars */
1095 totalgap: nsamples + my_groups_gap,
1098 [points([[my_start_at,0]]), xtics = 'none],
1100 ['fill_color = my_bars_colors[m],
1101 'key = my_sample_keys[m],
1104 [my_start_at+(k-1)*totalgap + m, freqs[k][m], my_box_width],
1110 [string(samplespace[k]),
1111 my_start_at+(k-1)*totalgap + (nsamples+1)/2,
1113 k, 1, sspacesize))],
1115 else ( /* stacked bars */
1116 totalgap: my_groups_gap,
1117 freqs: map(lambda([z], reverse(makelist(block([s:0], for i:1 thru k do s: s+z[i], s),k,1,length(z)))), freqs),
1120 [points([[my_start_at,0]]), xtics = 'none],
1122 ['fill_color = my_bars_colors[m],
1123 'key = my_sample_keys[m],
1126 [my_start_at+(k-1)*totalgap, freqs[k][m], my_box_width],
1132 [string(samplespace[k]),
1133 my_start_at+(k-1)*totalgap,
1135 k, 1, sspacesize))],
1137 barsplot([desc]) := draw2d(apply(barsplot_description, desc)) $
1138 wxbarsplot([desc]) := wxdraw2d(apply(barsplot_description, desc)) $
1142 /* Plots pie charts for discrete or categorical data. Argument 'm' must be a */
1143 /* list, a one column matrix or a one row matrix. The rest of arguments are */
1144 /* specific options in equation format (option = value). */
1146 /* Specific options: */
1147 /* - sector_colors ([]): list of colors for sectors. If the list is */
1148 /* shorter than the number of sectors, colors are generated randomly. */
1149 /* - pie_center ([0,0]): a pair of numbers */
1150 /* - pie_radius (1): a positive number. */
1152 /* Draw options affecting this object: */
1154 /* - color (used for labels) */
1155 /* - fill_density */
1157 /* This object modifies the following options: */
1159 piechart_description(m,[select]):=
1160 block([fr,tot,degrees,ini,end:0,alpha,hexcolor,conver:float(%pi/180),
1161 sectors,before,localopts,
1162 /* specific options */
1163 my_sector_colors:[],my_pie_center:[0,0],my_pie_radius:1],
1164 [before, localopts]: extract_options(select,'sector_colors,'pie_center,'pie_radius),
1166 for k in localopts do
1167 if lhs(k) = 'sector_colors
1168 then my_sector_colors: rhs(k)
1169 elseif lhs(k) = 'pie_center
1170 then my_pie_center: float(rhs(k))
1171 elseif lhs(k) = 'pie_radius
1172 then my_pie_radius: float(rhs(k)),
1174 if not listp(my_sector_colors)
1175 then error("piechart: illegal value for sector_colors"),
1176 if not numberp(my_pie_radius) or my_pie_radius <= 0
1177 then error("piechart: radius must be greater than zero"),
1178 if not listp(my_pie_center) or length(my_pie_center) # 2 or
1179 not every(numberp, my_pie_center)
1180 then error("piechart: center must be a list of numbers"),
1182 if listofexpr(m) or matrixp(m) and (length(m)=1 or length(m[1])=1)
1183 then (/* transform input data into a list */
1187 else m: transpose(m)[1],
1189 /* frequency table */
1190 fr: discrete_freq(m),
1192 degrees: 360.0 * fr[2] / apply("+", fr[2]),
1194 /* complete my_sector_colors with random colors, if necessary */
1195 if tot > length(my_sector_colors)
1196 then my_sector_colors:
1197 append(my_sector_colors,
1198 makelist(random_color(),k,length(my_sector_colors)+1,tot)),
1200 /* build the object */
1203 end: ini + degrees[i],
1204 alpha: ini+degrees[i]/2.0,
1205 hexcolor: random_color(),
1206 [ 'color = my_sector_colors[i],
1207 'fill_color = my_sector_colors[i],
1208 'key = string(fr[1][i]),
1209 ellipse(my_pie_center[1],my_pie_center[2],
1210 my_pie_radius,my_pie_radius,
1213 else error("piechart: can't plot the piechart for these data") )$
1214 piechart([desc]) := draw2d(apply(piechart_description, desc)) $
1215 wxpiechart([desc]) := wxdraw2d(apply(piechart_description, desc)) $
1219 /* Plots box-whisker diagrams. Argument 'm' must be a list of numbers or a matrix. */
1220 /* The second and consecutive arguments are specific options. */
1222 /* Specific options: */
1223 /* - box_width (3/4): widths for boxes */
1224 /* - box_orientation (vertical): 'vertical' or 'horizontal' */
1225 /* - range (inf): sets outliers boundaries */
1226 /* - outliers_size (1): outliers size */
1228 /* Draw options affecting this object: */
1232 /* This object modifies the following options: */
1233 /* - points_joined */
1240 boxplot_description(m,[select]):=
1241 block([fr,tot,top:0,bot:inf,before,localopts,p,out:[],
1242 /* specific options*/
1243 my_box_width:3/4, my_box_orientation:'vertical, my_range:'inf, my_outliers_size:1],
1244 [before, localopts]: extract_options(select,'box_width,'box_orientation,'range,'outliers_size),
1246 for k in localopts do
1247 if lhs(k) = 'box_width
1248 then my_box_width: float(rhs(k))
1249 elseif lhs(k) = 'box_orientation
1250 then my_box_orientation: rhs(k)
1251 elseif lhs(k) = 'range
1252 then my_range: float(rhs(k))
1253 elseif lhs(k) = 'outliers_size
1254 then my_outliers_size: float(rhs(k)),
1256 if not floatp(my_box_width) or my_box_width < 0 or my_box_width > 1
1257 then error("boxplot: illegal value for option box_width"),
1258 if not member(my_box_orientation, ['vertical, 'horizontal])
1259 then error("boxplot: illegal value for option box_orientation"),
1260 if my_range # 'inf and (not floatp(my_range) or my_range < 0)
1261 then error("boxplot: illegal value for option range"),
1262 if not floatp(my_outliers_size) or my_outliers_size < 0
1263 then error("boxplot: illegal outliers point size"),
1265 /* if m is not a row matrix, transpose it */
1266 if matrixp(m) and length(m)>1
1267 then m: transpose(m),
1268 /* if m is a list of numbers, transform it */
1269 /* to the form [[n1,n2,....]] */
1270 if listofnumbersp(m) then m: [m],
1273 if listoflistsp(m) or matrixp(m)
1277 ['points_joined = true,
1279 'point_type = 'dot],
1285 q1: float(quantile(m[x],0.25)),
1286 q2: float(quantile(m[x],0.5)),
1287 q3: float(quantile(m[x],0.75)),
1288 w: float(boxplot_width),
1291 A,B,C,D,E,F,G,H,I,J,K,L,M,N],
1292 top: max(top, maxi),
1293 bot: min(bot, mini),
1295 then /* ignore possible outliers */
1296 [mi, ma]: float([mini, maxi])
1297 else /* calculate whisker positions */
1299 lowlim: q1-my_range*(q3-q1),
1300 upplim: q3+my_range*(q3-q1)],
1302 if k < lowlim or k > upplim
1303 then /* accumulate new outliers to global variable out */
1304 out: cons([x,k], out)
1305 else in: cons(k, in),
1308 A: [x-w2,q1], B: [x-w2,q3], C: [x+w2,q3],
1309 D: [x+w2,q1], E: [x-w2,q2], F: [x+w2,q2],
1310 G: [x,q3], H: [x,ma], I: [x-w4,ma],
1311 J: [x+w4,ma], K: [x,q1], L: [x,mi],
1312 M: [x-w4,mi], N: [x+w4,mi],
1313 p: [A,B,C,D,E,F,G,H,I,J,K,L,M,N],
1314 if my_box_orientation = 'horizontal
1315 then p: map(reverse,p),
1316 [ points([p[1],p[2],p[3],p[4],p[1]]),
1317 points([p[5],p[6]]),
1318 points([p[7],p[8]]),
1319 points([p[9],p[10]]),
1320 points([p[11],p[12]]),
1321 points([p[13],p[14]]) ]),
1324 /* plot all outliers as isolated points */
1326 then (if my_box_orientation = 'horizontal
1327 then out: map(reverse,out),
1328 ['points_joined = false,
1329 'point_size = my_outliers_size,
1330 'point_type = 'circle,
1334 if my_box_orientation = 'vertical
1335 then [ 'xtics = setify(makelist(k,k,1,tot)),
1336 'xrange = (tot-1)*0.05*[-1,+1.5]+[0.5,tot+0.5],
1337 'yrange = (top-bot)*0.05*[-1,+1]+[bot,top] ]
1338 else [ 'ytics = setify(makelist(k,k,1,tot)),
1339 'yrange = (tot-1)*0.05*[-1,+1.5]+[0.5,tot+0.5],
1340 'xrange = (top-bot)*0.05*[-1,+1]+[bot,top] ] ])
1341 else error("sorry, can't plot the box-whisker plot for these data"))$
1343 boxplot([desc]) := draw2d(apply(boxplot_description, desc)) $
1344 wxboxplot([desc]) := wxdraw2d(apply(boxplot_description, desc)) $
1348 /* Plots stem and leaf diagrams. Argument 'm' must be a list, a one */
1349 /* column matrix or a one row matrix. */
1351 /* Specific options: */
1352 /* - leaf_unit (1): indicates the unit of the leaves; must be a */
1354 stemplot(m,[select]):=
1355 block([localopts,d,s,si,l,lf,index,n,key,offset,
1356 /* specific options*/
1358 [select, localopts]: extract_options(select,'leaf_unit),
1360 for k in localopts do
1361 if lhs(k) = 'leaf_unit then my_leaf_unit: rhs(k),
1363 if numberp(my_leaf_unit) and my_leaf_unit > 0
1364 then my_leaf_unit: 10^round(log(my_leaf_unit)/log(10))
1365 else error("stemplot: illegal value for option leaf_unit"),
1367 if listofexpr(m) or matrixp(m) and (length(m)=1 or length(m[1])=1)
1368 then (/* transform input data into a list */
1372 else m: transpose(m)[1],
1374 d: my_leaf_unit*map('round,m/my_leaf_unit),
1375 s: map('floor,d/(10*my_leaf_unit)),
1376 l: map('floor,d/my_leaf_unit),
1378 si: listify(setify(s)),
1379 lf: makelist([],i,1,length(si)),
1380 for k:1 thru length(s) do (
1382 while s[k]>si[index] do index:index+1,
1383 lf[index]:append(lf[index],[l[k]]) ),
1385 offset: slength(concat("",si[length(si)])),
1386 for k:1 thru length(si) do (
1388 for n:1 thru length(lf[k]) do lv:concat(lv,lf[k][n]),
1389 printf(true,concat("~",offset,"d|",lv,"~%"),si[k]) ),
1390 key: 63*my_leaf_unit,
1391 print("key: 6|3 = ",if my_leaf_unit<1 then float(key) else key),
1393 else error("stemplot: can't plot the stemplot for these data") )$
1397 /* Plots star charts for discrete or categorical data, both for one or more*/
1398 /* samples. This function admits an arbitrary number of arguments. */
1399 /* The first arguments are lists or matrices with sample data. The rest of */
1400 /* arguments are specific options in equation format (option = value). */
1402 /* Specific options: */
1403 /* - stars_colors ([]): list of colors for stars. If the list is shorter*/
1404 /* than the number of samples, colors are generated randomly. */
1405 /* - frequency (absolute): 'absolute' or 'relative'. */
1406 /* - ordering (orderlessp): 'orderlessp' or 'ordergreatp'. */
1407 /* - sample_keys ([]): entries for legend. */
1408 /* - star_center ([0,0]): a pair of numbers */
1409 /* - star_radius (1): a positive number. */
1410 /* Draw options affecting this object: */
1414 /* starplot modifies the following options: */
1416 starplot_description([args]):=
1417 block([lastsample: 0, nargs: length(args), freqs: [], samplespace,
1418 sspacesize, nsamples, before, localopts, maxfreq, angle, cpnts,
1419 /* specific options */
1420 my_stars_colors: [], my_frequency: 'absolute, my_ordering: 'orderlessp,
1421 my_sample_keys: [], my_star_center: [0,0], my_star_radius: 1],
1423 /* looks for data */
1424 for i:1 thru nargs while (listp(args[i]) or matrixp(args[i])) do
1425 lastsample: lastsample + 1,
1427 [before, localopts]:
1429 makelist(args[k],k,lastsample+1,length(args)),
1430 'stars_colors,'frequency,'ordering,'sample_keys,'star_center,'star_radius),
1432 for k in localopts do
1433 if lhs(k) = 'stars_colors
1434 then my_stars_colors: rhs(k)
1435 elseif lhs(k) = 'frequency
1436 then my_frequency: rhs(k)
1437 elseif lhs(k) = 'ordering
1438 then my_ordering: rhs(k)
1439 elseif lhs(k) = 'sample_keys
1440 then my_sample_keys: rhs(k)
1441 elseif lhs(k) = 'star_center
1442 then my_star_center: float(rhs(k))
1443 elseif lhs(k) = 'star_radius
1444 then my_star_radius: float(rhs(k)),
1446 if not listp(my_stars_colors)
1447 then error("starplot: illegal value for stars_colors"),
1448 if not member(my_frequency, ['absolute, 'relative])
1449 then error("starplot: frenquency must be either absolute, relative or percent"),
1450 if not member(my_ordering, [orderlessp, ordergreatp])
1451 then error("starplot: illegal value for ordering"),
1452 if not (my_sample_keys = []
1453 or listp(my_sample_keys) and every('stringp, my_sample_keys))
1454 then error("starplot: illegal value for sample_keys"),
1455 if not numberp(my_star_radius) or my_star_radius <= 0
1456 then error("starplot: radius must be greater than zero"),
1457 if not listp(my_star_center) or length(my_star_center) # 2 or
1458 not every(numberp, my_star_center)
1459 then error("starplot: center must be a list of numbers"),
1461 /* get absolute frequencies */
1462 for k: 1 thru lastsample do (
1464 then freqs: endcons(discrete_freq(args[k]), freqs)
1465 elseif matrixp(args[k])
1466 then for c in args(transpose(args[k])) do
1467 freqs: endcons(discrete_freq(c), freqs)
1468 else error("starplot: unknown data format")),
1469 samplespace: sort(listify(setify(apply('append,map('first,freqs)))), my_ordering),
1470 sspacesize: length(samplespace),
1471 nsamples: length(freqs),
1472 angle: float(2*%pi/sspacesize),
1473 if my_sample_keys = []
1474 then my_sample_keys : makelist("",k,1,nsamples),
1475 if nsamples # length(my_sample_keys)
1476 then error("starplot: incorrect number of elements in sample_keys"),
1478 /* transform to relative frequencies, if necessary */
1479 if member(my_frequency, ['relative, 'percent])
1480 then freqs: makelist(block([ssinv: float(1 / apply("+", second(k)))],
1481 if my_frequency = 'percent then ssinv: 100*ssinv,
1482 [first(k), ssinv*second(k)]),
1484 maxfreq: lmax(flatten(map(second, freqs))),
1486 /* complete my_stars_colors with random colors, if necessary */
1487 if nsamples > length(my_stars_colors)
1488 then my_stars_colors: append(my_stars_colors,
1489 makelist(random_color(),k,length(my_stars_colors)+1,nsamples)),
1491 /* calculate circle points */
1494 [this_ang: k*angle],
1495 [cos(this_ang), sin(this_ang)]),
1498 /* return draw object */
1500 /* draw the radii and the circular grid */
1501 ['points_joined = true, 'point_type = 'dot, 'color = 'black],
1503 map(lambda([z], points([my_star_center, z+my_star_center])),
1504 my_star_radius*cpnts),
1506 [apply(label, maplist(cons,
1507 maplist(string, samplespace),
1508 map(lambda([z], z+my_star_center), 1.05*my_star_radius*cpnts)))],
1511 /* draw the stars */
1513 [ 'color = my_stars_colors[s],
1514 'key = my_sample_keys[s],
1518 pos: ?position(samplespace[k], freqs[s][1]),
1521 else my_star_radius * freqs[s][2][pos] /
1522 maxfreq * cpnts[k] + my_star_center),
1524 points(cons(last(pnts), pnts)) ) ],
1527 starplot([desc]) := draw2d(apply(starplot_description, desc)) $
1528 wxstarplot([desc]) := wxdraw2d(apply(starplot_description, desc)) $
1530 /* find_runs -- find consecutive identical values in an array or list.
1531 * Returns a structure runs(...) with fields lengths and values.
1534 defstruct (runs (lengths, values));
1537 if x = [] then runs ([], [])
1538 else block ([dx : map (lambda ([a, b], is(a # b)), rest(x), rest(x, -1)), ii0, ii],
1539 ii0 : sublist_indices (dx, identity),
1540 ii : append ([0], ii0, [length (x)]),
1541 dii : rest(ii) - rest(ii, -1),
1542 runs (dii, makelist (x[i], i, rest (ii))));
1544 find_runs_inverse (r) :=
1545 block ([v: r@values, n: r@lengths],
1546 apply (append, makelist (makelist (v[i], n[i]), i, 1, length(v))));