1 ;; We use cl-ppcre for text munging
5 (defparameter *HELV
* (pdf:get-font
"Helvetica"))
6 (defparameter *HELV-SIZE
* 12.0)
7 (defparameter *CLUST-HEIGHT
* 20)
9 ;; Dummy function used in delete-if
10 (defun truep (x) (declare (ignore x
)) t
)
12 (defun readfile (filename)
13 (with-open-file (stream filename
)
14 (let* ((whitespace '(#\Space
#\Tab
#\Newline
#\Linefeed
#\Return
))
15 (rownames (make-array 0 :fill-pointer
0 :adjustable t
))
16 (data (make-array 0 :fill-pointer
0 :adjustable t
))
17 (colnames (map 'vector
#'(lambda (x) x
) (cdr (cl-ppcre:split
#\Tab
(string-trim whitespace
(read-line stream nil nil
))))))
19 (loop for line
= (read-line stream nil nil
)
21 (setf p
(cl-ppcre:split
#\Tab
(string-trim whitespace line
)))
22 (vector-push-extend (car p
) rownames
)
23 (vector-push-extend (map 'vector
#'(lambda (x) (float (parse-integer x
))) (cdr p
)) data
)))
24 ;; subseq's lock down adjustable vectors into simple vectors
25 (list (subseq rownames
0) colnames
(subseq data
0)))))
27 (defun pearson-distance (v1 v2
)
28 (let* ((n (length v1
))
29 (sum1 (reduce #'+ v1
))
30 (sum2 (reduce #'+ v2
))
31 (sum1-sq (reduce #'+ (map 'vector
#'(lambda (x) (* x x
)) v1
)))
32 (sum2-sq (reduce #'+ (map 'vector
#'(lambda (x) (* x x
)) v2
)))
33 (psum (reduce #'+ (map 'vector
#'* v1 v2
)))
34 (num (- psum
(/ (* sum1 sum2
) n
)))
35 (den (sqrt (* (- sum1-sq
(/ (* sum1 sum1
) n
)) (- sum2-sq
(/ (* sum2 sum2
) n
))))))
36 (if (zerop den
) 0 (- 1.0 (/ num den
)))))
38 (defclass bicluster
()
39 ((vec :initarg
:vec
:accessor vec
:initform nil
)
40 (left :initarg
:left
:accessor left
:initform nil
)
41 (right :initarg
:right
:accessor right
:initform nil
)
42 (id :initarg
:id
:accessor id
:initform nil
)
43 (distance :initarg
:distance
:accessor distance
:initform
0.0)))
46 (defun hcluster (rows &optional
(distance 'pearson-distance
))
47 (let* ((distances (make-hash-table :test
'equalp
))
49 (rowlen (length rows
))
50 (clust (make-array rowlen
:fill-pointer rowlen
52 :initial-contents
(loop for i upto
(1- rowlen
) collect
(make-instance 'bicluster
:vec
(aref rows i
) :id i
)))))
53 (do* (lowestpair closest d mergevec newcluster
)
54 ((<= (length clust
) 1) (aref clust
0))
55 (setf lowestpair
(cons 0 1))
56 (setf closest
(funcall distance
(vec (aref clust
0)) (vec (aref clust
1))))
57 (dotimes (i (length clust
))
58 (do ((j (1+ i
) (1+ j
)))
59 ((>= j
(length clust
)))
60 (let* ((iid (id (aref clust i
)))
61 (jid (id (aref clust j
)))
63 (unless (gethash key distances
)
64 (setf (gethash key distances
) (funcall distance
(vec (aref clust i
)) (vec (aref clust j
)))))
65 (setf d
(gethash key distances
))
68 (setf lowestpair
(cons i j
))))))
69 (setf mergevec
(map 'vector
#'(lambda (x y
) (/ (+ x y
) 2.0))
70 (vec (aref clust
(car lowestpair
)))
71 (vec (aref clust
(cdr lowestpair
)))))
72 (setf newcluster
(make-instance 'bicluster
:vec mergevec
73 :left
(aref clust
(car lowestpair
))
74 :right
(aref clust
(cdr lowestpair
))
76 :id current-clust-id
))
77 (decf current-clust-id
)
78 (setf clust
(delete-if #'truep clust
:start
(cdr lowestpair
) :end
(1+ (cdr lowestpair
))))
79 (setf clust
(delete-if #'truep clust
:start
(car lowestpair
) :end
(1+ (car lowestpair
))))
80 (vector-push-extend newcluster clust
))))
83 (defun printclust (clust &optional labels
(n 0))
84 (let ((cid (id clust
)))
85 (format t
"~va" n
#\Space
)
89 (format t
"~a~%" (aref labels cid
))
90 (format t
"~a~%" cid
)))
92 (printclust (left clust
) labels
(1+ n
)))
94 (printclust (right clust
) labels
(1+ n
)))))
96 (defun getheight (clust)
97 (if (and (left clust
) (right clust
))
98 (+ (getheight (left clust
)) (getheight (right clust
)))
101 (defun getdepth (clust)
102 (if (and (left clust
) (right clust
))
103 (max (getdepth (left clust
)) (+ (getdepth (right clust
)) (distance clust
)))
106 (defun draw-line (x1 y1 x2 y2
)
111 (defun drawdendogram (clust labels file
)
112 (let* ((h (* (getheight clust
) *CLUST-HEIGHT
*))
114 (depth (getdepth clust
))
115 (scaling (float (/ (- w
150) depth
)))
116 ; Add to w for long blog names
117 (bounds (make-array 4 :initial-contents
(list 0 0 (+ w
200) h
))))
118 (pdf:with-document
()
119 (pdf:with-page
(:bounds bounds
)
120 (pdf:set-line-width
0.1)
122 (draw-line 0 (/ h
2) 10 (/ h
2))
124 (drawnode clust
10 (/ h
2) scaling labels
))
125 (with-open-file (s file
:direction
:output
:if-exists
:supersede
126 ; :element-type '(unsigned-byte 8)
127 :element-type
:default
128 :external-format
:latin-1
)
129 (pdf:write-document s
)))))
131 (defun drawnode (clust x y scaling labels
)
133 (let* ((h1 (* (getheight (left clust
)) *CLUST-HEIGHT
*))
134 (h2 (* (getheight (right clust
)) *CLUST-HEIGHT
*))
135 (avg (/ (+ h1 h2
) 2))
138 (ll (* (distance clust
) scaling
)))
140 ; Vertical line form this cluster to children
141 (draw-line x
(+ top
(/ h1
2)) x
(- bottom
(/ h2
2)))
142 ; horizontal line to left item
143 (draw-line x
(+ top
(/ h1
2)) (+ x ll
) (+ top
(/ h1
2)))
144 ; horizontal line to right item
145 (draw-line x
(- bottom
(/ h2
2)) (+ x ll
) (- bottom
(/ h2
2)))
148 (drawnode (left clust
) (+ x ll
) (+ top
(/ h1
2)) scaling labels
)
149 (drawnode (right clust
) (+ x ll
) (- bottom
(/ h2
2)) scaling labels
))
152 (pdf:set-font
*HELV
* *HELV-SIZE
*)
153 (pdf:move-text
(+ x
5) (- y
7))
154 (pdf:draw-text
(aref labels
(id clust
)))))))