changed reading hint
[gromacs/adressmacs.git] / src / tools / g_cluster.c
bloba1985409a1d1b4ecb32f6e5a59dc85aeb7991345
1 /*
2 * $Id$
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 2.0
12 * Copyright (c) 1991-1999
13 * BIOSON Research Institute, Dept. of Biophysical Chemistry
14 * University of Groningen, The Netherlands
16 * Please refer to:
17 * GROMACS: A message-passing parallel molecular dynamics implementation
18 * H.J.C. Berendsen, D. van der Spoel and R. van Drunen
19 * Comp. Phys. Comm. 91, 43-56 (1995)
21 * Also check out our WWW page:
22 * http://md.chem.rug.nl/~gmx
23 * or e-mail to:
24 * gromacs@chem.rug.nl
26 * And Hey:
27 * GRowing Old MAkes el Chrono Sweat
29 static char *SRCID_g_cluster_c = "$Id$";
31 #include <math.h>
32 #include <string.h>
33 #include "macros.h"
34 #include "assert.h"
35 #include "smalloc.h"
36 #include "typedefs.h"
37 #include "copyrite.h"
38 #include "statutil.h"
39 #include "tpxio.h"
40 #include "string2.h"
41 #include "vec.h"
42 #include "macros.h"
43 #include "rdgroup.h"
44 #include "random.h"
45 #include "pbc.h"
46 #include "xvgr.h"
47 #include "futil.h"
48 #include "matio.h"
49 #include "ql77.h"
50 #include "cmat.h"
51 #include "do_fit.h"
52 #include "trnio.h"
54 typedef struct {
55 int ncl;
56 int *cl;
57 } t_clusters;
59 void pr_energy(FILE *fp,real e)
61 fprintf(fp,"Energy: %8.4f\n",e);
64 void cp_index(int nn,int from[],int to[])
66 int i;
68 for(i=0; (i<nn); i++)
69 to[i]=from[i];
72 void mc_optimize(FILE *log,t_mat *m,int maxiter,int *seed,real kT)
74 real e[2],ei,ej,efac;
75 int *low_index;
76 int cur=0;
77 #define next (1-cur)
78 int i,isw,jsw,iisw,jjsw,nn;
80 fprintf(stderr,"\nDoing Monte Carlo clustering\n");
81 nn = m->nn;
82 snew(low_index,nn);
83 cp_index(nn,m->m_ind,low_index);
84 if (getenv("TESTMC")) {
85 e[cur] = mat_energy(m);
86 pr_energy(log,e[cur]);
87 fprintf(log,"Doing 1000 random swaps\n");
88 for(i=0; (i<1000); i++) {
89 do {
90 isw = nn*rando(seed);
91 jsw = nn*rando(seed);
92 } while ((isw == jsw) || (isw >= nn) || (jsw >= nn));
93 iisw = m->m_ind[isw];
94 jjsw = m->m_ind[jsw];
95 m->m_ind[isw] = jjsw;
96 m->m_ind[jsw] = iisw;
99 e[cur] = mat_energy(m);
100 pr_energy(log,e[cur]);
101 for(i=0; (i<maxiter); i++) {
102 do {
103 isw = nn*rando(seed);
104 jsw = nn*rando(seed);
105 } while ((isw == jsw) || (isw >= nn) || (jsw >= nn));
107 iisw = m->m_ind[isw];
108 jjsw = m->m_ind[jsw];
109 ei = row_energy(nn,iisw,m->mat[jsw],m->m_ind);
110 ej = row_energy(nn,jjsw,m->mat[isw],m->m_ind);
112 e[next] = e[cur] + (ei+ej-EROW(m,isw)-EROW(m,jsw))/nn;
114 efac = kT ? exp((e[next]-e[cur])/kT) : -1;
115 if ((e[next] > e[cur]) || (efac > rando(seed))) {
117 if (e[next] > e[cur])
118 cp_index(nn,m->m_ind,low_index);
119 else
120 fprintf(log,"Taking uphill step\n");
122 /* Now swapping rows */
123 m->m_ind[isw] = jjsw;
124 m->m_ind[jsw] = iisw;
125 EROW(m,isw) = ei;
126 EROW(m,jsw) = ej;
127 cur = next;
128 fprintf(log,"Iter: %d Swapped %4d and %4d (now %g)",
129 i,isw,jsw,mat_energy(m));
130 pr_energy(log,e[cur]);
133 /* Now restore the highest energy index */
134 cp_index(nn,low_index,m->m_ind);
137 static void calc_dist(int nind,rvec x[],real **d)
139 int i,j;
140 real *xi;
141 rvec dx;
143 for(i=0; (i<nind-1); i++) {
144 xi=x[i];
145 for(j=i+1; (j<nind); j++) {
146 pbc_dx(xi,x[j],dx);
147 d[i][j]=norm(dx);
152 static real rms_dist(int isize,real **d,real **d_r)
154 int i,j;
155 real r,r2;
157 r2=0.0;
158 for(i=0; (i<isize-1); i++)
159 for(j=i+1; (j<isize); j++) {
160 r=d[i][j]-d_r[i][j];
161 r2+=r*r;
163 r2/=(isize*(isize-1))/2;
165 return sqrt(r2);
168 static real **mcpy=NULL;
169 static int nnn=0;
171 int rcomp(const void *a,const void *b)
173 real *ra,*rb;
175 ra = (real *)a;
176 rb = (real *)b;
178 if ((*ra) < (*rb))
179 return -1;
180 else if ((*ra) > (*rb))
181 return 1;
183 return 0;
186 static int icomp(const void *a,const void *b)
188 int ia,ib;
190 ia = *(int *)a;
191 ib = *(int *)b;
193 if (mcpy[ia][nnn] < mcpy[ib][nnn])
194 return -1;
195 else if (mcpy[ia][nnn] > mcpy[ib][nnn])
196 return 1;
197 else
198 return 0;
201 void sort_matrix(int n1,real **mat)
203 int i,j,*index;
205 snew(index,n1);
206 snew(mcpy,n1);
207 for(i=0; (i<n1); i++) {
208 index[i] = i;
209 snew(mcpy[i],n1);
210 for(j=0; (j<n1); j++)
211 mcpy[i][j] = mat[i][j];
213 fprintf(stderr,"Going to sort the RMSD matrix.\n");
214 qsort(index,n1,sizeof(*index),icomp);
215 /* Copy the matrix back */
216 for(i=0; (i<n1); i++)
217 for(j=0; (j<n1); j++)
218 mat[i][j] = mcpy[index[i]][index[j]];
219 /* done_mat(n1,mcpy);
220 sfree(mcpy);*/
221 sfree(index);
224 static int dcomp(const void *a,const void *b)
226 t_dist *da,*db;
228 da = (t_dist *)a;
229 db = (t_dist *)b;
231 if (da->dist < db->dist)
232 return -1;
233 else if (da->dist > db->dist)
234 return 1;
235 return 0;
238 static int ccomp(const void *a,const void *b)
240 t_clustid *da,*db;
242 da = (t_clustid *)a;
243 db = (t_clustid *)b;
245 return da->clust - db->clust;
248 void gather(t_mat *m,real cutoff,t_clusters *clust)
250 t_clustid *c;
251 t_dist *d;
252 int i,j,k,nn,cid,n1,diff;
253 bool bChange;
255 /* First we sort the entries in the RMSD matrix */
256 n1 = m->nn;
257 nn = ((n1-1)*n1)/2;
258 snew(d,nn);
259 for(i=k=0; (i<n1); i++)
260 for(j=i+1; (j<n1); j++,k++) {
261 d[k].i = i;
262 d[k].j = j;
263 d[k].dist = m->mat[i][j];
265 assert(k == nn);
266 qsort(d,nn,sizeof(d[0]),dcomp);
268 /* Now we make a cluster index for all of the conformations */
269 c = new_clustid(n1);
271 /* Now we check the closest structures, and equalize their cluster
272 * numbers
274 fprintf(stderr,"Linking structures ");
275 do {
276 fprintf(stderr,"*");
277 bChange=FALSE;
278 for(k=0; (k<nn) && (d[k].dist < cutoff); k++) {
279 diff = c[d[k].j].clust - c[d[k].i].clust;
280 if (diff) {
281 bChange = TRUE;
282 if (diff > 0)
283 c[d[k].j].clust = c[d[k].i].clust;
284 else
285 c[d[k].i].clust = c[d[k].j].clust;
288 } while (bChange);
289 fprintf(stderr,"\nSorting and renumbering clusters\n");
290 /* Sort on cluster number */
291 qsort(c,n1,sizeof(c[0]),ccomp);
293 /* Renumber clusters */
294 cid = 1;
295 for(k=1; k<n1; k++) {
296 if (c[k].clust != c[k-1].clust) {
297 c[k-1].clust = cid;
298 cid ++;
299 } else
300 c[k-1].clust = cid;
302 c[k-1].clust = cid;
303 if (debug)
304 for(k=0; (k<n1); k++)
305 fprintf(debug,"Cluster index for conformation %d: %d\n",
306 c[k].conf,c[k].clust);
307 clust->ncl = cid;
308 for(k=0; k<n1; k++)
309 clust->cl[c[k].conf] = c[k].clust;
311 mcpy = mk_matrix(n1,FALSE);
312 for(i=0; (i<n1); i++) {
313 for(j=0; (j<n1); j++)
314 mcpy[c[i].conf][c[j].conf] = m->mat[i][j];
316 for(i=0; (i<n1); i++) {
317 for(j=0; (j<n1); j++)
318 m->mat[i][j] = mcpy[i][j];
320 done_matrix(n1,&mcpy);
322 sfree(c);
323 sfree(d);
326 bool jp_same(int **nnb,int i,int j,int P)
328 bool bIn;
329 int k,ii,jj,pp;
331 bIn = FALSE;
332 for(k=0; nnb[i][k]>=0; k++)
333 bIn = bIn || (nnb[i][k] == j);
334 if (!bIn)
335 return FALSE;
337 bIn = FALSE;
338 for(k=0; nnb[j][k]>=0; k++)
339 bIn = bIn || (nnb[j][k] == i);
340 if (!bIn)
341 return FALSE;
343 pp=0;
344 for(ii=0; nnb[i][ii]>=0; ii++)
345 for(jj=0; nnb[j][jj]>=0; jj++)
346 if ((nnb[i][ii] == nnb[j][jj]) && (nnb[i][ii] != -1))
347 pp++;
349 return (pp >= P);
352 static void jarvis_patrick(int n1,real **mat,int M,int P,
353 real rmsdcut,t_clusters *clust)
355 t_dist *tmp;
356 t_clustid *c;
357 int **nnb;
358 int i,j,k,cid,diff,max;
359 bool bChange;
361 if (rmsdcut < 0)
362 rmsdcut = 10000;
364 /* First we sort the entries in the RMSD matrix row by row.
365 * This gives us the nearest neighbor list.
367 snew(nnb,n1);
368 snew(tmp,n1);
369 for(i=0; (i<n1); i++) {
370 for(j=0; (j<n1); j++) {
371 tmp[j].j = j;
372 tmp[j].dist = mat[i][j];
374 qsort(tmp,n1,sizeof(tmp[0]),dcomp);
375 if (M>0) {
376 /* Put the M nearest neighbors in the list */
377 snew(nnb[i],M+1);
378 for(j=k=0; (k<M) && (j<n1) && (mat[i][tmp[j].j] < rmsdcut); j++)
379 if (tmp[j].j != i) {
380 nnb[i][k] = tmp[j].j;
381 k++;
383 nnb[i][k] = -1;
384 } else {
385 /* Put all neighbors nearer than rmsdcut in the list */
386 max=0;
387 k=0;
388 for(j=0; (j<n1) && (mat[i][tmp[j].j] < rmsdcut); j++)
389 if (tmp[j].j != i) {
390 if (k >= max) {
391 max += 10;
392 srenew(nnb[i],max);
394 nnb[i][k] = tmp[j].j;
395 k++;
397 if (k == max)
398 srenew(nnb[i],max+1);
399 nnb[i][k] = -1;
402 sfree(tmp);
403 if (debug) {
404 fprintf(debug,"Nearest neighborlist. M = %d, P = %d\n",M,P);
405 for(i=0; (i<n1); i++) {
406 fprintf(debug,"i:%5d nbs:",i);
407 for(j=0; nnb[i][j]>=0; j++)
408 fprintf(debug,"%5d[%5.3f]",nnb[i][j],mat[i][nnb[i][j]]);
409 fprintf(debug,"\n");
413 c = new_clustid(n1);
414 fprintf(stderr,"Linking structures ");
415 /* Use mcpy for temporary storage of booleans */
416 mcpy = mk_matrix(n1,FALSE);
417 for(i=0; i<n1; i++)
418 for(j=i+1; j<n1; j++)
419 mcpy[i][j] = jp_same(nnb,i,j,P);
420 do {
421 fprintf(stderr,"*");
422 bChange=FALSE;
423 for(i=0; i<n1; i++) {
424 for(j=i+1; j<n1; j++)
425 if (mcpy[i][j]) {
426 diff = c[j].clust - c[i].clust;
427 if (diff) {
428 bChange = TRUE;
429 if (diff > 0)
430 c[j].clust = c[i].clust;
431 else
432 c[i].clust = c[j].clust;
436 } while (bChange);
438 fprintf(stderr,"\nSorting and renumbering clusters\n");
439 /* Sort on cluster number */
440 qsort(c,n1,sizeof(c[0]),ccomp);
442 /* Renumber clusters */
443 cid = 1;
444 for(k=1; k<n1; k++) {
445 if (c[k].clust != c[k-1].clust) {
446 c[k-1].clust = cid;
447 cid ++;
448 } else
449 c[k-1].clust = cid;
451 c[k-1].clust = cid;
452 clust->ncl = cid;
453 for(k=0; k<n1; k++)
454 clust->cl[c[k].conf] = c[k].clust;
455 if (debug)
456 for(k=0; (k<n1); k++)
457 fprintf(debug,"Cluster index for conformation %d: %d\n",
458 c[k].conf,c[k].clust);
460 for(i=0; (i<n1); i++) {
461 for(j=0; (j<n1); j++)
462 mcpy[c[i].conf][c[j].conf] = mat[i][j];
464 for(i=0; (i<n1); i++) {
465 for(j=0; (j<n1); j++)
466 mat[i][j] = mcpy[i][j];
469 sfree(c);
470 for(i=0; (i<n1); i++)
471 sfree(nnb[i]);
472 sfree(nnb);
475 rvec **read_whole_trj(char *fn,int isize,atom_id index[],int skip,int *nframe,
476 int *natom, real **time)
478 rvec **xx,*x;
479 matrix box;
480 real t;
481 int i,i0,j,max_nf;
482 int nbytes,status;
484 max_nf = 0;
485 xx = NULL;
486 *time = NULL;
487 *natom = read_first_x(&status,fn,&t,&x,box);
488 i = 0;
489 i0 = 0;
490 do {
491 if (i0 >= max_nf) {
492 max_nf += 10;
493 srenew(xx,max_nf);
494 srenew(*time,max_nf);
496 if ((i % skip) == 0) {
497 snew(xx[i0],isize);
498 /* Store only the interesting atoms */
499 for(j=0; (j<isize); j++)
500 copy_rvec(x[index[j]],xx[i0][j]);
501 (*time)[i0] = t;
502 i0 ++;
504 i++;
505 } while (read_next_x(status,&t,*natom,x,box));
506 fprintf(stderr,"Allocated %lu bytes for frames\n",
507 (unsigned long) (max_nf*isize*sizeof(**xx)));
508 fprintf(stderr,"Read %d frames from trajectory %s\n",i0,fn);
509 *nframe = i0;
510 sfree(x);
512 return xx;
515 static void plot_clusters(int nf,real **mat,real val,t_clusters *clust)
517 int i,j;
519 for(i=0; i<nf; i++)
520 for(j=0; j<i; j++)
521 if (clust->cl[i] == clust->cl[j])
522 mat[i][j] = val;
523 else
524 mat[i][j] = 0;
527 static void analyze_clusters(int nf,t_clusters *clust,real **rmsd,
528 t_atoms *atoms,int natom,
529 int isize,atom_id *index,atom_id *alli,
530 rvec **xx,real *time,real *mass,
531 rvec *xtps,char *trxfn,bool bAv,FILE *log)
533 FILE *fp;
534 char buf[STRLEN],buf1[20],buf2[20];
535 int trxout=0;
536 int i,i1,i2,cl,nstr,*structure,first=0,midstr;
537 real r,clrmsd,midrmsd;
538 rvec *xtpsi=NULL,*xav=NULL,*xnatom=NULL;
539 matrix zerobox;
541 clear_mat(zerobox);
543 sprintf(buf,"\nFound %d clusters\n",clust->ncl);
544 fprintf(stderr,buf);
545 fprintf(log,buf);
546 if (trxfn) {
547 sprintf(buf,"\nWriting %s structure for each cluster to %s\n",
548 bAv ? "average" : "middle",trxfn);
549 fprintf(stderr,buf);
550 fprintf(log,buf);
552 /* Prepare a reference structure for the orientation of the clusters */
553 snew(xtpsi,isize);
554 for(i=0; i<isize; i++)
555 copy_rvec(xtps[index[i]],xtpsi[i]);
556 reset_x(isize,alli,isize,alli,xtpsi,mass);
557 trxout = open_trx(trxfn,"w");
558 /* Calculate the average structure in each cluster, *
559 * all structures are fitted to the first struture of the cluster */
560 snew(xav,isize);
561 snew(xnatom,natom);
563 snew(structure,nf);
564 sprintf(buf,"\n%3s | %3s %4s | %6s %4s | cluster members\n",
565 "cl.","#st","rmsd","middle","rmsd");
566 fprintf(stderr,buf);
567 fprintf(log,buf);
568 for(cl=1; cl<=clust->ncl; cl++) {
569 for(i=0; i<natom;i++)
570 clear_rvec(xav[i]);
571 nstr=0;
572 for(i1=0; i1<nf; i1++)
573 if (clust->cl[i1] == cl) {
574 structure[nstr] = i1;
575 nstr++;
576 if (trxfn && bAv) {
577 reset_x(isize,alli,isize,alli,xx[i1],mass);
578 if (nstr == 1)
579 first = i1;
580 else
581 do_fit(isize,mass,xx[first],xx[i1]);
582 for(i=0; i<isize; i++)
583 rvec_inc(xav[i],xx[i1][i]);
586 clrmsd = 0;
587 midstr = 0;
588 midrmsd = 10000;
589 for(i1=0; i1<nstr; i1++) {
590 r = 0;
591 if (nstr > 1) {
592 for(i=0; i<nstr; i++)
593 r += rmsd[structure[i1]][structure[i]];
594 r /= (nstr - 1);
596 if (r < midrmsd) {
597 midstr = structure[i1];
598 midrmsd = r;
600 clrmsd += r;
602 clrmsd /= nstr;
603 if (nstr > 1) {
604 sprintf(buf1,"%5.3f",clrmsd);
605 if (buf1[0] == '0')
606 buf1[0] = ' ';
607 sprintf(buf2,"%5.3f",midrmsd);
608 if (buf2[0] == '0')
609 buf2[0] = ' ';
610 } else {
611 sprintf(buf1,"%5s","");
612 sprintf(buf2,"%5s","");
614 sprintf(buf,"%3d | %3d%s | %6g%s |",
615 cl,nstr,buf1,time[midstr],buf2);
616 fprintf(stderr,buf);
617 fprintf(log,buf);
618 for(i=0; i<nstr; i++) {
619 if ((i % 7 == 0) && i)
620 sprintf(buf,"\n%3s | %3s %4s | %6s %4s |","","","","","");
621 else
622 buf[0] = '\0';
623 i1 = structure[i];
624 fprintf(stderr,"%s %6g",buf,time[i1]);
625 fprintf(log,"%s %6g",buf,time[i1]);
627 fprintf(stderr,"\n");
628 fprintf(log,"\n");
630 if (trxfn) {
631 /* Dump the average structure for this cluster */
632 if (bAv) {
633 r = 1.0/nstr;
634 for(i=0; i<isize; i++)
635 svmul(r,xav[i],xav[i]);
636 } else {
637 for(i=0; i<isize; i++)
638 copy_rvec(xx[midstr][i],xav[i]);
639 reset_x(isize,alli,isize,alli,xav,mass);
641 do_fit(isize,mass,xtpsi,xav);
642 for(i=0; i<isize; i++)
643 copy_rvec(xav[i],xnatom[index[i]]);
644 r = cl;
645 write_trx(trxout,isize,index,atoms,0,r,zerobox,xnatom,NULL);
648 if (trxfn) {
649 close_trx(trxout);
650 sfree(xtpsi);
651 sfree(xav);
652 sfree(xnatom);
654 sfree(structure);
657 static void convert_mat(t_matrix *mat,t_mat *rms)
659 int n,i,j;
660 real **tmp;
662 n = mat->nx;
663 rms->n1 = n;
664 matrix2real(mat,rms->mat);
666 for(i=0; i<n; i++)
667 for(j=i; j<n; j++) {
668 rms->sumrms += rms->mat[i][j];
669 if (rms->mat[i][j] > rms->maxrms)
670 rms->maxrms = rms->mat[i][j];
672 rms->nn = n;
675 int main(int argc,char *argv[])
677 static char *desc[] = {
678 "g_cluster can cluster structures with several different methods.",
679 "Distances between structures can be determined from a trajectory",
680 "or read from an XPM matrix file with the [TT]-dm[tt] option.",
681 "RMS deviation after fitting or RMS deviation of atom-pair distances",
682 "can be used to define the distance between structures.[PAR]",
683 "full linkage: add a structure to a cluster when its distance to any",
684 "element of the cluster is less than [TT]cutoff[tt].[PAR]",
685 "Jarvis Patrick: add a structure to a cluster when this structure",
686 "and a structure in the cluster have each other as neighbors and",
687 "they have a least [TT]P[tt] neighbors in common. The neighbors",
688 "of a structure are the M closest structures or all structures within",
689 "[TT]cutoff[tt].[PAR]",
690 "Monte Carlo: reorder the RMSD matrix using Monte Carlo.[PAR]",
691 "diagonalization: diagonalize the RMSD matrix.[PAR]"
692 "When unique cluster assignments can be determined (full linkage and",
693 "Jarvis Patrick) and a trajectory file is supplied, the structure with",
694 "the smallest average distance to the others or the average structure",
695 "for each cluster will be written to a trajectory file.[PAR]",
698 FILE *fp,*log;
699 int natom,i1,i2,i1s,i2s,i,teller,nf,nrms;
700 real t,t1,t2;
702 matrix box;
703 rvec *xtps,*x1,*x2,**xx=NULL;
704 char *fn;
705 t_clusters clust;
706 t_mat *rms;
707 real **rmsmat;
708 real *resnr,*eigval;
709 t_topology top;
710 bool bSameF;
711 t_rgb rlo,rhi;
712 t_matrix *readmat;
714 int status1,status2,isize,nbytes;
715 atom_id *index,*alli=NULL;
716 char *grpname;
717 real rmsd,**d1,**d2,*time,*mass=NULL;
718 char buf[STRLEN];
719 bool bAnalyze,bJP_RMSD=FALSE,bReadMat,bReadTraj;
721 static char *method[] = { NULL, "linkage", "jarvis-patrick","monte-carlo",
722 "diagonalization", NULL };
723 static int nlevels=40,skip=1;
724 static real scalemax=-1.0,rmsdcut=0.1;
725 static bool bRMSdist=FALSE,bBinary=FALSE,bAv=FALSE;
726 static int niter=10000,seed=1993;
727 static real kT=1e-3;
728 static int M=10,P=3;
729 t_pargs pa[] = {
730 { "-dista",FALSE, etBOOL, {&bRMSdist},
731 "Use RMSD of distances instead of RMS deviation" },
732 { "-nlevels", FALSE, etINT, {&nlevels},
733 "Discretize RMSD matrix in # levels" },
734 { "-cutoff", FALSE, etREAL, {&rmsdcut},
735 "RMSD cut-off (nm) for two structures to be similar" },
736 { "-max", FALSE, etREAL, {&scalemax},
737 "Maximum level in RMSD matrix" },
738 { "-skip", FALSE, etINT, {&skip},
739 "Only analyze every nr-th frame" },
740 { "-av", FALSE, etBOOL, {&bAv},
741 "Write average iso middle structure for each cluster" },
742 { "-method",FALSE, etENUM, {method},
743 "Method for cluster determination" },
744 { "-binary",FALSE, etBOOL, {&bBinary},
745 "Treat the RMSD matrix as consisting of 0 and 1, where the cut-off is given by -cutoff" },
746 { "-M", FALSE, etINT, {&M},
747 "Number of nearest neighbors considered for Jarvis-Patrick algorithm, 0 is use cutoff" },
748 { "-P", FALSE, etINT, {&P},
749 "Number of identical nearest neighbors required to form a cluster" },
750 { "-seed", FALSE, etINT, {&seed},
751 "Random number seed for Monte Carlo clustering algorithm" },
752 { "-niter", FALSE, etINT, {&niter},
753 "Number of iterations for MC" },
754 { "-kT", FALSE, etREAL, {&kT},
755 "Boltzmann weighting factor for Monte Carlo optimization (zero turns off uphill steps)" }
757 t_filenm fnm[] = {
758 { efTRX, "-f", NULL, ffOPTRD },
759 { efTPS, "-s", NULL, ffOPTRD },
760 { efNDX, NULL, NULL, ffOPTRD },
761 { efXPM, "-dm", "rmsd", ffOPTRD },
762 { efXPM, "-o", "rmsd-clust", ffWRITE },
763 { efLOG, "-g", "cluster", ffWRITE },
764 { efXVG, "-dist", "rmsd-dist", ffWRITE },
765 { efXVG, "-ev", "rmsd-eig", ffOPTWR },
766 { efTRX, "-cl", "clusters.pdb", ffOPTWR }
768 #define NFILE asize(fnm)
770 #define m_linkage (method[0][0] == 'l')
771 #define m_jarvis_patrick (method[0][0] == 'j')
772 #define m_monte_carlo (method[0][0] == 'm')
773 #define m_diagonalize (method[0][0] == 'd')
775 CopyRight(stderr,argv[0]);
776 parse_common_args(&argc,argv,PCA_CAN_VIEW | PCA_CAN_TIME,TRUE,
777 NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL);
779 bReadMat = opt2bSet("-dm",NFILE,fnm);
780 bReadTraj = opt2bSet("-f",NFILE,fnm) || !bReadMat;
782 /* Open log file */
783 log = ftp2FILE(efLOG,NFILE,fnm,"w");
785 fprintf(stderr,"Using %s for clustering\n",method[0]);
786 fprintf(log,"Using %s for clustering\n",method[0]);
788 if (m_jarvis_patrick) {
789 bJP_RMSD = (M == 0) || opt2parg_bSet("-cutoff",asize(pa),pa);
790 if ((M<0) || (M == 1))
791 fatal_error(0,"M (%d) must be 0 or larger than 1",M);
792 if (M < 2)
793 sprintf(buf,"Will use P=%d and RMSD cutoff (%g)",P,rmsdcut);
794 else {
795 if (P >= M)
796 fatal_error(0,"Number of neighbors required (P) must be less than M");
797 if (bJP_RMSD)
798 sprintf(buf,"Will use P=%d, M=%d and RMSD cutoff (%g)",P,M,rmsdcut);
799 else
800 sprintf(buf,"Will use P=%d, M=%d",P,M);
802 fprintf(stderr,"%s for determining the neighbors\n\n",buf);
803 fprintf(log,"%s for determining the neighbors\n\n",buf);
806 if (skip < 1)
807 fatal_error(0,"skip (%d) should be >= 1",skip);
809 bAnalyze = (m_linkage || m_jarvis_patrick);
811 if (bReadTraj) {
812 /* don't read mass-database as masses (and top) are not used */
813 read_tps_conf(ftp2fn(efTPS,NFILE,fnm),buf,&top,&xtps,NULL,box,bAnalyze);
815 fprintf(stderr,"\nSelect group for RMSD calculation:\n");
816 get_index(&(top.atoms),ftp2fn_null(efNDX,NFILE,fnm),
817 1,&isize,&index,&grpname);
819 /* Initiate arrays */
820 snew(d1,isize);
821 snew(d2,isize);
822 for(i=0; (i<isize); i++) {
823 snew(d1[i],isize);
824 snew(d2[i],isize);
827 if (bReadTraj) {
828 /*set box type*/
829 init_pbc(box,FALSE);
831 /* Loop over first coordinate file */
832 fn = opt2fn("-f",NFILE,fnm);
834 xx = read_whole_trj(fn,isize,index,skip,&nf,&natom,&time);
835 if (!bRMSdist || bAnalyze) {
836 /* Center all frames on zero */
837 snew(alli,isize);
838 for(i=0; i<isize; i++)
839 alli[i] = i;
840 snew(mass,isize);
841 for(i=0; i<isize; i++)
842 mass[i] = top.atoms.atom[index[i]].m;
843 for(i=0; i<nf; i++)
844 reset_x(isize,alli,isize,alli,xx[i],mass);
847 if (bReadMat) {
848 read_xpm_matrix(opt2fn("-dm",NFILE,fnm),&readmat);
849 if (readmat[0].nx != readmat[0].ny)
850 fatal_error(0,"Matrix (%dx%d) is not square",
851 readmat[0].nx,readmat[0].ny);
852 if (bReadTraj && bAnalyze && (readmat[0].nx != nf))
853 fatal_error(0,"Matrix size (%dx%d) does not match the number of frames (%d)",
854 readmat[0].nx,readmat[0].ny,nf);
856 nf = readmat[0].nx;
857 sfree(time);
858 time = readmat[0].axis_x;
860 rms = init_mat(readmat[0].nx,m_diagonalize);
861 convert_mat(&(readmat[0]),rms);
862 } else {
863 rms = init_mat(nf,m_diagonalize);
864 nrms = (nf*(nf-1))/2;
865 if (!bRMSdist) {
866 fprintf(stderr,"Computing %dx%d RMS deviation matrix\n",nf,nf);
867 snew(x1,isize);
868 for(i1=0; (i1<nf); i1++) {
869 for(i2=i1+1; (i2<nf); i2++) {
870 for(i=0; i<isize; i++)
871 copy_rvec(xx[i1][i],x1[i]);
872 do_fit(isize,mass,xx[i2],x1);
873 rmsd = rmsdev(isize,mass,xx[i2],x1);
874 set_mat_entry(rms,i1,i2,rmsd);
876 nrms -= (nf-i1-1);
877 fprintf(stderr,"\r# RMSD calculations left: %d ",nrms);
879 } else {
880 fprintf(stderr,"Computing %dx%d RMS distance deviation matrix\n",nf,nf);
881 for(i1=0; (i1<nf); i1++) {
882 calc_dist(isize,xx[i1],d1);
883 for(i2=i1+1; (i2<nf); i2++) {
884 calc_dist(isize,xx[i2],d2);
885 set_mat_entry(rms,i1,i2,rms_dist(isize,d1,d2));
887 nrms -= (nf-i1-1);
888 fprintf(stderr,"\r# RMSD calculations left: %d ",nrms);
891 fprintf(stderr,"\n\n");
893 sprintf(buf,"The maximum RMSD is %g nm\n"
894 "Average RMSD is %g\n"
895 "Number of structures for matrix %d\n"
896 "Energy of the matrix is %g nm\n",
897 rms->maxrms,2*rms->sumrms/(nf*(nf-1)),nf,mat_energy(rms));
898 fprintf(stderr,buf);
899 fprintf(log,buf);
901 /* Plot the rmsd distribution */
902 rmsd_distribution(opt2fn("-dist",NFILE,fnm),rms);
904 snew(rmsmat,nf);
905 for(i1=0; (i1 < nf); i1++) {
906 snew(rmsmat[i1],nf);
907 for(i2=0; (i2 < nf); i2++)
908 rmsmat[i1][i2] = rms->mat[i1][i2];
911 if (bBinary) {
912 for(i1=0; (i1 < nf); i1++)
913 for(i2=0; (i2 < nf); i2++)
914 if (rms->mat[i1][i2] < rmsdcut)
915 rms->mat[i1][i2] = 0;
916 else
917 rms->mat[i1][i2] = 1;
920 snew(clust.cl,nf);
921 if (m_linkage)
922 /* Now sort the matrix and write it out again */
923 gather(rms,rmsdcut,&clust);
924 else if (m_diagonalize) {
925 /* Do a diagonalization */
926 snew(eigval,nf);
927 /*for(i1=0; (i1<nf); i1++)
928 for(i2=0; (i2<nf); i2++)
929 rms[i1][i2] = maxrms - rms[i1][i2];*/
930 ql77(nf,rms->mat[0],eigval);
931 fp = xvgropen(opt2fn("-ev",NFILE,fnm),"Eigenvalues","index","value");
932 for(i=0; (i<nf); i++)
933 fprintf(fp,"%10d %10g\n",i,eigval[i]);
934 ffclose(fp);
935 xvgr_file(opt2fn("-ev",NFILE,fnm),NULL);
937 else if (m_monte_carlo)
938 mc_optimize(log,rms,niter,&seed,kT);
939 else if (m_jarvis_patrick)
940 jarvis_patrick(rms->nn,rms->mat,M,P,bJP_RMSD ? rmsdcut : -1,&clust);
941 else
942 fatal_error(0,"unknown method \"%s\"",method[0]);
945 if (m_monte_carlo || m_diagonalize)
946 fprintf(stderr,"Energy of the matrix after clustering is %g nm\n",
947 mat_energy(rms));
948 swap_mat(rms);
949 reset_index(rms);
951 /* Write the rmsd-matrix to the upper-left part of the matrix */
952 for(i1=0; (i1 < nf); i1++)
953 for(i2=i1; (i2 < nf); i2++)
954 rms->mat[i1][i2] = rmsmat[i1][i2];
956 if (bAnalyze) {
957 plot_clusters(nf,rms->mat,rms->maxrms,&clust);
958 analyze_clusters(nf,&clust,rmsmat,&(top.atoms),natom,isize,index,alli,
959 xx,time,mass,xtps,
960 bReadTraj ? opt2fn("-cl",NFILE,fnm) : NULL,bAv,log);
962 ffclose(log);
964 if (bBinary && !bAnalyze)
965 /* Make the clustering visible */
966 for(i2=0; (i2 < nf); i2++)
967 for(i1=i2+1; (i1 < nf); i1++)
968 if (rms->mat[i1][i2])
969 rms->mat[i1][i2] = rms->maxrms;
971 /* Set colors for plotting: white = zero RMS, black = maximum */
972 rlo.r=1.0, rlo.g=1.0, rlo.b=1.0;
973 rhi.r=0.0, rhi.g=0.0, rhi.b=0.0;
975 fp = opt2FILE("-o",NFILE,fnm,"w");
976 if (bReadMat) {
977 write_xpm(fp,readmat[0].title,readmat[0].legend,readmat[0].label_x,
978 readmat[0].label_y,nf,nf,readmat[0].axis_x,readmat[0].axis_y,
979 rms->mat,0.0,rms->maxrms,rlo,rhi,&nlevels);
980 } else
981 write_xpm(fp,bRMSdist ? "RMS Distance Deviation" : "RMS Deviation",
982 "RMSD (nm)","Time (ps)","Time (ps)",
983 nf,nf,time,time,rms->mat,0.0,rms->maxrms,rlo,rhi,&nlevels);
984 ffclose(fp);
985 xv_file(opt2fn("-o",NFILE,fnm),NULL);
987 /* Thank the user for her patience */
988 thanx(stdout);
990 return 0;