3 * This source code is part of
7 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Green Red Orange Magenta Azure Cyan Skyblue
56 #include "eigensolver.h"
63 /* macro's to print to two file pointers at once (i.e. stderr and log) */
64 #define lo_ffprintf(fp1,fp2,buf) \
65 fprintf(fp1,"%s",buf);\
66 fprintf(fp2,"%s",buf);
67 /* just print a prepared buffer to fp1 and fp2 */
68 #define ffprintf(fp1,fp2,buf) { lo_ffprintf(fp1,fp2,buf) }
69 /* prepare buffer with one argument, then print to fp1 and fp2 */
70 #define ffprintf1(fp1,fp2,buf,fmt,arg) {\
71 sprintf(buf,fmt,arg);\
72 lo_ffprintf(fp1,fp2,buf)\
74 /* prepare buffer with two arguments, then print to fp1 and fp2 */
75 #define ffprintf2(fp1,fp2,buf,fmt,arg1,arg2) {\
76 sprintf(buf,fmt,arg1,arg2);\
77 lo_ffprintf(fp1,fp2,buf)\
90 void pr_energy(FILE *fp
,real e
)
92 fprintf(fp
,"Energy: %8.4f\n",e
);
95 void cp_index(int nn
,int from
[],int to
[])
103 void mc_optimize(FILE *log
,t_mat
*m
,int maxiter
,int *seed
,real kT
)
105 real e
[2],ei
,ej
,efac
;
109 int i
,isw
,jsw
,iisw
,jjsw
,nn
;
111 fprintf(stderr
,"\nDoing Monte Carlo clustering\n");
114 cp_index(nn
,m
->m_ind
,low_index
);
115 if (getenv("TESTMC")) {
116 e
[cur
] = mat_energy(m
);
117 pr_energy(log
,e
[cur
]);
118 fprintf(log
,"Doing 1000 random swaps\n");
119 for(i
=0; (i
<1000); i
++) {
121 isw
= nn
*rando(seed
);
122 jsw
= nn
*rando(seed
);
123 } while ((isw
== jsw
) || (isw
>= nn
) || (jsw
>= nn
));
124 iisw
= m
->m_ind
[isw
];
125 jjsw
= m
->m_ind
[jsw
];
126 m
->m_ind
[isw
] = jjsw
;
127 m
->m_ind
[jsw
] = iisw
;
130 e
[cur
] = mat_energy(m
);
131 pr_energy(log
,e
[cur
]);
132 for(i
=0; (i
<maxiter
); i
++) {
134 isw
= nn
*rando(seed
);
135 jsw
= nn
*rando(seed
);
136 } while ((isw
== jsw
) || (isw
>= nn
) || (jsw
>= nn
));
138 iisw
= m
->m_ind
[isw
];
139 jjsw
= m
->m_ind
[jsw
];
140 ei
= row_energy(nn
,iisw
,m
->mat
[jsw
]);
141 ej
= row_energy(nn
,jjsw
,m
->mat
[isw
]);
143 e
[next
] = e
[cur
] + (ei
+ej
-EROW(m
,isw
)-EROW(m
,jsw
))/nn
;
145 efac
= kT
? exp((e
[next
]-e
[cur
])/kT
) : -1;
146 if ((e
[next
] > e
[cur
]) || (efac
> rando(seed
))) {
148 if (e
[next
] > e
[cur
])
149 cp_index(nn
,m
->m_ind
,low_index
);
151 fprintf(log
,"Taking uphill step\n");
153 /* Now swapping rows */
154 m
->m_ind
[isw
] = jjsw
;
155 m
->m_ind
[jsw
] = iisw
;
159 fprintf(log
,"Iter: %d Swapped %4d and %4d (now %g)",
160 i
,isw
,jsw
,mat_energy(m
));
161 pr_energy(log
,e
[cur
]);
164 /* Now restore the highest energy index */
165 cp_index(nn
,low_index
,m
->m_ind
);
168 static void calc_dist(int nind
,rvec x
[],real
**d
)
174 for(i
=0; (i
<nind
-1); i
++) {
176 for(j
=i
+1; (j
<nind
); j
++) {
177 /* Should use pbc_dx when analysing multiple molecueles,
178 * but the box is not stored for every frame.
180 rvec_sub(xi
,x
[j
],dx
);
186 static real
rms_dist(int isize
,real
**d
,real
**d_r
)
192 for(i
=0; (i
<isize
-1); i
++)
193 for(j
=i
+1; (j
<isize
); j
++) {
197 r2
/=(isize
*(isize
-1))/2;
202 static int rms_dist_comp(const void *a
,const void *b
)
209 if (da
->dist
- db
->dist
< 0)
211 else if (da
->dist
- db
->dist
> 0)
216 static int clust_id_comp(const void *a
,const void *b
)
223 return da
->clust
- db
->clust
;
226 static int nrnb_comp(const void *a
, const void *b
)
233 /* return the b-a, we want highest first */
234 return db
->nr
- da
->nr
;
237 void gather(t_mat
*m
,real cutoff
,t_clusters
*clust
)
241 int i
,j
,k
,nn
,cid
,n1
,diff
;
244 /* First we sort the entries in the RMSD matrix */
248 for(i
=k
=0; (i
<n1
); i
++)
249 for(j
=i
+1; (j
<n1
); j
++,k
++) {
252 d
[k
].dist
= m
->mat
[i
][j
];
255 gmx_incons("gather algortihm");
256 qsort(d
,nn
,sizeof(d
[0]),rms_dist_comp
);
258 /* Now we make a cluster index for all of the conformations */
261 /* Now we check the closest structures, and equalize their cluster numbers */
262 fprintf(stderr
,"Linking structures ");
266 for(k
=0; (k
<nn
) && (d
[k
].dist
< cutoff
); k
++) {
267 diff
= c
[d
[k
].j
].clust
- c
[d
[k
].i
].clust
;
271 c
[d
[k
].j
].clust
= c
[d
[k
].i
].clust
;
273 c
[d
[k
].i
].clust
= c
[d
[k
].j
].clust
;
277 fprintf(stderr
,"\nSorting and renumbering clusters\n");
278 /* Sort on cluster number */
279 qsort(c
,n1
,sizeof(c
[0]),clust_id_comp
);
281 /* Renumber clusters */
283 for(k
=1; k
<n1
; k
++) {
284 if (c
[k
].clust
!= c
[k
-1].clust
) {
292 for(k
=0; (k
<n1
); k
++)
293 fprintf(debug
,"Cluster index for conformation %d: %d\n",
294 c
[k
].conf
,c
[k
].clust
);
297 clust
->cl
[c
[k
].conf
] = c
[k
].clust
;
303 bool jp_same(int **nnb
,int i
,int j
,int P
)
309 for(k
=0; nnb
[i
][k
]>=0; k
++)
310 bIn
= bIn
|| (nnb
[i
][k
] == j
);
315 for(k
=0; nnb
[j
][k
]>=0; k
++)
316 bIn
= bIn
|| (nnb
[j
][k
] == i
);
321 for(ii
=0; nnb
[i
][ii
]>=0; ii
++)
322 for(jj
=0; nnb
[j
][jj
]>=0; jj
++)
323 if ((nnb
[i
][ii
] == nnb
[j
][jj
]) && (nnb
[i
][ii
] != -1))
329 static void jarvis_patrick(int n1
,real
**mat
,int M
,int P
,
330 real rmsdcut
,t_clusters
*clust
)
335 int i
,j
,k
,cid
,diff
,max
;
342 /* First we sort the entries in the RMSD matrix row by row.
343 * This gives us the nearest neighbor list.
347 for(i
=0; (i
<n1
); i
++) {
348 for(j
=0; (j
<n1
); j
++) {
350 row
[j
].dist
= mat
[i
][j
];
352 qsort(row
,n1
,sizeof(row
[0]),rms_dist_comp
);
354 /* Put the M nearest neighbors in the list */
356 for(j
=k
=0; (k
<M
) && (j
<n1
) && (mat
[i
][row
[j
].j
] < rmsdcut
); j
++)
358 nnb
[i
][k
] = row
[j
].j
;
363 /* Put all neighbors nearer than rmsdcut in the list */
366 for(j
=0; (j
<n1
) && (mat
[i
][row
[j
].j
] < rmsdcut
); j
++)
372 nnb
[i
][k
] = row
[j
].j
;
376 srenew(nnb
[i
],max
+1);
382 fprintf(debug
,"Nearest neighborlist. M = %d, P = %d\n",M
,P
);
383 for(i
=0; (i
<n1
); i
++) {
384 fprintf(debug
,"i:%5d nbs:",i
);
385 for(j
=0; nnb
[i
][j
]>=0; j
++)
386 fprintf(debug
,"%5d[%5.3f]",nnb
[i
][j
],mat
[i
][nnb
[i
][j
]]);
392 fprintf(stderr
,"Linking structures ");
393 /* Use mcpy for temporary storage of booleans */
394 mcpy
= mk_matrix(n1
,n1
,FALSE
);
396 for(j
=i
+1; j
<n1
; j
++)
397 mcpy
[i
][j
] = jp_same(nnb
,i
,j
,P
);
401 for(i
=0; i
<n1
; i
++) {
402 for(j
=i
+1; j
<n1
; j
++)
404 diff
= c
[j
].clust
- c
[i
].clust
;
408 c
[j
].clust
= c
[i
].clust
;
410 c
[i
].clust
= c
[j
].clust
;
416 fprintf(stderr
,"\nSorting and renumbering clusters\n");
417 /* Sort on cluster number */
418 qsort(c
,n1
,sizeof(c
[0]),clust_id_comp
);
420 /* Renumber clusters */
422 for(k
=1; k
<n1
; k
++) {
423 if (c
[k
].clust
!= c
[k
-1].clust
) {
432 clust
->cl
[c
[k
].conf
] = c
[k
].clust
;
434 for(k
=0; (k
<n1
); k
++)
435 fprintf(debug
,"Cluster index for conformation %d: %d\n",
436 c
[k
].conf
,c
[k
].clust
);
438 /* Again, I don't see the point in this... (AF) */
439 /* for(i=0; (i<n1); i++) { */
440 /* for(j=0; (j<n1); j++) */
441 /* mcpy[c[i].conf][c[j].conf] = mat[i][j]; */
443 /* for(i=0; (i<n1); i++) { */
444 /* for(j=0; (j<n1); j++) */
445 /* mat[i][j] = mcpy[i][j]; */
447 done_matrix(n1
,&mcpy
);
450 for(i
=0; (i
<n1
); i
++)
455 static void dump_nnb (FILE *fp
, const char *title
, int n1
, t_nnb
*nnb
)
459 /* dump neighbor list */
460 fprintf(fp
,"%s",title
);
461 for(i
=0; (i
<n1
); i
++) {
462 fprintf(fp
,"i:%5d #:%5d nbs:",i
,nnb
[i
].nr
);
463 for(j
=0; j
<nnb
[i
].nr
; j
++)
464 fprintf(fp
,"%5d",nnb
[i
].nb
[j
]);
469 static void gromos(int n1
, real
**mat
, real rmsdcut
, t_clusters
*clust
)
475 /* Put all neighbors nearer than rmsdcut in the list */
476 fprintf(stderr
,"Making list of neighbors within cutoff ");
479 for(i
=0; (i
<n1
); i
++) {
482 /* put all neighbors within cut-off in list */
484 if (mat
[i
][j
] < rmsdcut
) {
487 srenew(nnb
[i
].nb
,max
);
492 /* store nr of neighbors, we'll need that */
494 if (i
%(1+n1
/100)==0) fprintf(stderr
,"%3d%%\b\b\b\b",(i
*100+1)/n1
);
496 fprintf(stderr
,"%3d%%\n",100);
499 /* sort neighbor list on number of neighbors, largest first */
500 qsort(nnb
,n1
,sizeof(nnb
[0]),nrnb_comp
);
502 if (debug
) dump_nnb(debug
, "Nearest neighborlist after sort.\n", n1
, nnb
);
504 /* turn first structure with all its neighbors (largest) into cluster
505 remove them from pool of structures and repeat for all remaining */
506 fprintf(stderr
,"Finding clusters %4d", 0);
507 /* cluster id's start at 1: */
510 /* set cluster id (k) for first item in neighborlist */
511 for (j
=0; j
<nnb
[0].nr
; j
++)
512 clust
->cl
[nnb
[0].nb
[j
]] = k
;
517 /* adjust number of neighbors for others, taking removals into account: */
518 for(i
=1; i
<n1
&& nnb
[i
].nr
; i
++) {
520 for(j
=0; j
<nnb
[i
].nr
; j
++)
521 /* if this neighbor wasn't removed */
522 if ( clust
->cl
[nnb
[i
].nb
[j
]] == 0 ) {
523 /* shift the rest (j1<=j) */
524 nnb
[i
].nb
[j1
]=nnb
[i
].nb
[j
];
528 /* now j1 is the new number of neighbors */
531 /* sort again on nnb[].nr, because we have new # neighbors: */
532 /* but we only need to sort upto i, i.e. when nnb[].nr>0 */
533 qsort(nnb
,i
,sizeof(nnb
[0]),nrnb_comp
);
535 fprintf(stderr
,"\b\b\b\b%4d",k
);
539 fprintf(stderr
,"\n");
542 fprintf(debug
,"Clusters (%d):\n", k
);
544 fprintf(debug
," %3d", clust
->cl
[i
]);
551 rvec
**read_whole_trj(const char *fn
,int isize
,atom_id index
[],int skip
,
552 int *nframe
, real
**time
,const output_env_t oenv
)
563 natom
= read_first_x(oenv
,&status
,fn
,&t
,&x
,box
);
570 srenew(*time
,max_nf
);
572 if ((i
% skip
) == 0) {
574 /* Store only the interesting atoms */
575 for(j
=0; (j
<isize
); j
++)
576 copy_rvec(x
[index
[j
]],xx
[i0
][j
]);
581 } while (read_next_x(oenv
,status
,&t
,natom
,x
,box
));
582 fprintf(stderr
,"Allocated %lu bytes for frames\n",
583 (unsigned long) (max_nf
*isize
*sizeof(**xx
)));
584 fprintf(stderr
,"Read %d frames from trajectory %s\n",i0
,fn
);
591 static int plot_clusters(int nf
, real
**mat
, t_clusters
*clust
,
592 int nlevels
, int minstruct
)
595 int *cl_id
,*nstruct
,*strind
;
600 for(i
=0; i
<nf
; i
++) {
602 cl_id
[i
] = clust
->cl
[i
];
606 for(i
=0; i
<nf
; i
++) {
607 if (nstruct
[i
] >= minstruct
) {
609 for(j
=0; (j
<nf
); j
++)
611 strind
[j
] = ncluster
;
615 fprintf(stderr
,"There are %d clusters with at least %d conformations\n",
618 for(i
=0; (i
<nf
); i
++) {
621 if ((ci
== cl_id
[j
]) && (nstruct
[ci
] >= minstruct
)) {
622 /* color different clusters with different colors, as long as
623 we don't run out of colors */
624 mat
[i
][j
] = strind
[i
];
636 static void mark_clusters(int nf
, real
**mat
, real val
, t_clusters
*clust
)
642 if (clust
->cl
[i
] == clust
->cl
[j
])
648 static char *parse_filename(const char *fn
, int maxnr
)
656 gmx_fatal(FARGS
,"will not number filename %s containing '%c'",fn
,'%');
657 /* number of digits needed in numbering */
658 i
= (int)(log(maxnr
)/log(10)) + 1;
659 /* split fn and ext */
660 ext
= strrchr(fn
, '.');
662 gmx_fatal(FARGS
,"cannot separate extension in filename %s",fn
);
664 /* insert e.g. '%03d' between fn and ext */
665 sprintf(buf
,"%s%%0%dd.%s",fn
,i
,ext
);
666 snew(fnout
,strlen(buf
)+1);
672 static void ana_trans(t_clusters
*clust
, int nf
,
673 const char *transfn
, const char *ntransfn
, FILE *log
,
674 t_rgb rlo
,t_rgb rhi
,const output_env_t oenv
)
679 int i
,ntranst
,maxtrans
;
682 snew(ntrans
,clust
->ncl
);
683 snew(trans
,clust
->ncl
);
684 snew(axis
,clust
->ncl
);
685 for(i
=0; i
<clust
->ncl
; i
++) {
687 snew(trans
[i
],clust
->ncl
);
692 if(clust
->cl
[i
] != clust
->cl
[i
-1]) {
694 ntrans
[clust
->cl
[i
-1]-1]++;
695 ntrans
[clust
->cl
[i
]-1]++;
696 trans
[clust
->cl
[i
-1]-1][clust
->cl
[i
]-1]++;
697 maxtrans
= max(maxtrans
, trans
[clust
->cl
[i
]-1][clust
->cl
[i
-1]-1]);
699 ffprintf2(stderr
,log
,buf
,"Counted %d transitions in total, "
700 "max %d between two specific clusters\n",ntranst
,maxtrans
);
702 fp
=ffopen(transfn
,"w");
703 i
= min(maxtrans
+1, 80);
704 write_xpm(fp
,0,"Cluster Transitions","# transitions",
705 "from cluster","to cluster",
706 clust
->ncl
, clust
->ncl
, axis
, axis
, trans
,
707 0, maxtrans
, rlo
, rhi
, &i
);
711 fp
=xvgropen(ntransfn
,"Cluster Transitions","Cluster #","# transitions",
713 for(i
=0; i
<clust
->ncl
; i
++)
714 fprintf(fp
,"%5d %5d\n",i
+1,ntrans
[i
]);
718 for(i
=0; i
<clust
->ncl
; i
++)
724 static void analyze_clusters(int nf
, t_clusters
*clust
, real
**rmsd
,
725 int natom
, t_atoms
*atoms
, rvec
*xtps
,
726 real
*mass
, rvec
**xx
, real
*time
,
727 int ifsize
, atom_id
*fitidx
,
728 int iosize
, atom_id
*outidx
,
729 const char *trxfn
, const char *sizefn
,
730 const char *transfn
, const char *ntransfn
,
731 const char *clustidfn
, bool bAverage
,
732 int write_ncl
, int write_nst
, real rmsmin
,
733 bool bFit
, FILE *log
,t_rgb rlo
,t_rgb rhi
,
734 const output_env_t oenv
)
737 char buf
[STRLEN
],buf1
[40],buf2
[40],buf3
[40],*trxsfn
;
738 int trxout
=0,trxsout
=0;
739 int i
,i1
,cl
,nstr
,*structure
,first
=0,midstr
;
741 real r
,clrmsd
,midrmsd
;
747 ffprintf1(stderr
,log
,buf
,"\nFound %d clusters\n\n",clust
->ncl
);
750 /* do we write all structures? */
752 trxsfn
= parse_filename(trxfn
, max(write_ncl
,clust
->ncl
));
755 ffprintf2(stderr
,log
,buf
,"Writing %s structure for each cluster to %s\n",
756 bAverage
? "average" : "middle", trxfn
);
758 /* find out what we want to tell the user:
759 Writing [all structures|structures with rmsd > %g] for
760 {all|first %d} clusters {with more than %d structures} to %s */
762 sprintf(buf1
,"structures with rmsd > %g",rmsmin
);
764 sprintf(buf1
,"all structures");
765 buf2
[0]=buf3
[0]='\0';
766 if (write_ncl
>=clust
->ncl
) {
768 sprintf(buf2
,"all ");
770 sprintf(buf2
,"the first %d ",write_ncl
);
772 sprintf(buf3
," with more than %d structures",write_nst
);
773 sprintf(buf
,"Writing %s for %sclusters%s to %s\n",buf1
,buf2
,buf3
,trxsfn
);
774 ffprintf(stderr
,log
,buf
);
777 /* Prepare a reference structure for the orientation of the clusters */
779 reset_x(ifsize
,fitidx
,natom
,NULL
,xtps
,mass
);
780 trxout
= open_trx(trxfn
,"w");
781 /* Calculate the average structure in each cluster, *
782 * all structures are fitted to the first struture of the cluster */
786 if (transfn
|| ntransfn
)
787 ana_trans(clust
, nf
, transfn
, ntransfn
, log
,rlo
,rhi
,oenv
);
790 fp
=xvgropen(clustidfn
,"Clusters",output_env_get_xvgr_tlabel(oenv
),"Cluster #",oenv
);
791 fprintf(fp
,"@ s0 symbol 2\n");
792 fprintf(fp
,"@ s0 symbol size 0.2\n");
793 fprintf(fp
,"@ s0 linestyle 0\n");
795 fprintf(fp
,"%8g %8d\n",time
[i
],clust
->cl
[i
]);
799 fp
=xvgropen(sizefn
,"Cluster Sizes","Cluster #","# Structures",oenv
);
800 fprintf(fp
,"@g%d type %s\n",0,"bar");
803 fprintf(log
,"\n%3s | %3s %4s | %6s %4s | cluster members\n",
804 "cl.","#st","rmsd","middle","rmsd");
805 for(cl
=1; cl
<=clust
->ncl
; cl
++) {
806 /* prepare structures (fit, middle, average) */
808 for(i
=0; i
<natom
;i
++)
811 for(i1
=0; i1
<nf
; i1
++)
812 if (clust
->cl
[i1
] == cl
) {
813 structure
[nstr
] = i1
;
815 if (trxfn
&& (bAverage
|| write_ncl
) ) {
817 reset_x(ifsize
,fitidx
,natom
,NULL
,xx
[i1
],mass
);
821 do_fit(natom
,mass
,xx
[first
],xx
[i1
]);
823 for(i
=0; i
<natom
; i
++)
824 rvec_inc(xav
[i
],xx
[i1
][i
]);
828 fprintf(fp
,"%8d %8d\n",cl
,nstr
);
832 for(i1
=0; i1
<nstr
; i1
++) {
835 for(i
=0; i
<nstr
; i
++)
837 r
+= rmsd
[structure
[i
]][structure
[i1
]];
839 r
+= rmsd
[structure
[i1
]][structure
[i
]];
843 midstr
= structure
[i1
];
850 /* dump cluster info to logfile */
852 sprintf(buf1
,"%6.3f",clrmsd
);
855 sprintf(buf2
,"%5.3f",midrmsd
);
859 sprintf(buf1
,"%5s","");
860 sprintf(buf2
,"%5s","");
862 fprintf(log
,"%3d | %3d %s | %6g%s |",cl
,nstr
,buf1
,time
[midstr
],buf2
);
863 for(i
=0; i
<nstr
; i
++) {
864 if ((i
% 7 == 0) && i
)
865 sprintf(buf
,"\n%3s | %3s %4s | %6s %4s |","","","","","");
869 fprintf(log
,"%s %6g",buf
,time
[i1
]);
873 /* write structures to trajectory file(s) */
876 for(i
=0; i
<nstr
; i
++)
878 if ( cl
< write_ncl
+1 && nstr
> write_nst
) {
879 /* Dump all structures for this cluster */
880 /* generate numbered filename (there is a %d in trxfn!) */
881 sprintf(buf
,trxsfn
,cl
);
882 trxsout
= open_trx(buf
,"w");
883 for(i
=0; i
<nstr
; i
++) {
886 for(i1
=0; i1
<i
&& bWrite
[i
]; i1
++)
888 bWrite
[i
] = rmsd
[structure
[i1
]][structure
[i
]] > rmsmin
;
890 write_trx(trxsout
,iosize
,outidx
,atoms
,i
,time
[structure
[i
]],zerobox
,
891 xx
[structure
[i
]],NULL
,NULL
);
895 /* Dump the average structure for this cluster */
897 for(i
=0; i
<natom
; i
++)
898 svmul(1.0/nstr
,xav
[i
],xav
[i
]);
900 for(i
=0; i
<natom
; i
++)
901 copy_rvec(xx
[midstr
][i
],xav
[i
]);
903 reset_x(ifsize
,fitidx
,natom
,NULL
,xav
,mass
);
906 do_fit(natom
,mass
,xtps
,xav
);
908 write_trx(trxout
,iosize
,outidx
,atoms
,cl
,time
[midstr
],zerobox
,xav
,NULL
,NULL
);
923 static void convert_mat(t_matrix
*mat
,t_mat
*rms
)
928 matrix2real(mat
,rms
->mat
);
929 /* free input xpm matrix data */
930 for(i
=0; i
<mat
->nx
; i
++)
931 sfree(mat
->matrix
[i
]);
934 for(i
=0; i
<mat
->nx
; i
++)
935 for(j
=i
; j
<mat
->nx
; j
++) {
936 rms
->sumrms
+= rms
->mat
[i
][j
];
937 rms
->maxrms
= max(rms
->maxrms
, rms
->mat
[i
][j
]);
939 rms
->minrms
= min(rms
->minrms
, rms
->mat
[i
][j
]);
944 int gmx_cluster(int argc
,char *argv
[])
946 const char *desc
[] = {
947 "g_cluster can cluster structures with several different methods.",
948 "Distances between structures can be determined from a trajectory",
949 "or read from an XPM matrix file with the [TT]-dm[tt] option.",
950 "RMS deviation after fitting or RMS deviation of atom-pair distances",
951 "can be used to define the distance between structures.[PAR]",
953 "single linkage: add a structure to a cluster when its distance to any",
954 "element of the cluster is less than [TT]cutoff[tt].[PAR]",
956 "Jarvis Patrick: add a structure to a cluster when this structure",
957 "and a structure in the cluster have each other as neighbors and",
958 "they have a least [TT]P[tt] neighbors in common. The neighbors",
959 "of a structure are the M closest structures or all structures within",
960 "[TT]cutoff[tt].[PAR]",
962 "Monte Carlo: reorder the RMSD matrix using Monte Carlo.[PAR]",
964 "diagonalization: diagonalize the RMSD matrix.[PAR]"
966 "gromos: use algorithm as described in Daura [IT]et al.[it]",
967 "([IT]Angew. Chem. Int. Ed.[it] [BB]1999[bb], [IT]38[it], pp 236-240).",
968 "Count number of neighbors using cut-off, take structure with",
969 "largest number of neighbors with all its neighbors as cluster",
970 "and eleminate it from the pool of clusters. Repeat for remaining",
971 "structures in pool.[PAR]",
973 "When the clustering algorithm assigns each structure to exactly one",
974 "cluster (single linkage, Jarvis Patrick and gromos) and a trajectory",
975 "file is supplied, the structure with",
976 "the smallest average distance to the others or the average structure",
977 "or all structures for each cluster will be written to a trajectory",
978 "file. When writing all structures, separate numbered files are made",
979 "for each cluster.[PAR]"
981 "Two output files are always written:[BR]",
982 "[TT]-o[tt] writes the RMSD values in the upper left half of the matrix",
983 "and a graphical depiction of the clusters in the lower right half",
984 "When [TT]-minstruct[tt] = 1 the graphical depiction is black",
985 "when two structures are in the same cluster.",
986 "When [TT]-minstruct[tt] > 1 different colors will be used for each",
988 "[TT]-g[tt] writes information on the options used and a detailed list",
989 "of all clusters and their members.[PAR]",
991 "Additionally, a number of optional output files can be written:[BR]",
992 "[TT]-dist[tt] writes the RMSD distribution.[BR]",
993 "[TT]-ev[tt] writes the eigenvectors of the RMSD matrix",
994 "diagonalization.[BR]",
995 "[TT]-sz[tt] writes the cluster sizes.[BR]",
996 "[TT]-tr[tt] writes a matrix of the number transitions between",
997 "cluster pairs.[BR]",
998 "[TT]-ntr[tt] writes the total number of transitions to or from",
1000 "[TT]-clid[tt] writes the cluster number as a function of time.[BR]",
1001 "[TT]-cl[tt] writes average (with option [TT]-av[tt]) or central",
1002 "structure of each cluster or writes numbered files with cluster members",
1003 "for a selected set of clusters (with option [TT]-wcl[tt], depends on",
1004 "[TT]-nst[tt] and [TT]-rmsmin[tt]).[BR]",
1008 int i
,i1
,i2
,j
,nf
,nrms
;
1011 rvec
*xtps
,*usextps
,*x1
,**xx
=NULL
;
1012 const char *fn
,*trx_out_fn
;
1019 t_matrix
*readmat
=NULL
;
1022 int isize
=0,ifsize
=0,iosize
=0;
1023 atom_id
*index
=NULL
, *fitidx
, *outidx
;
1025 real rmsd
,**d1
,**d2
,*time
=NULL
,time_invfac
,*mass
=NULL
;
1026 char buf
[STRLEN
],buf1
[80],title
[STRLEN
];
1027 bool bAnalyze
,bUseRmsdCut
,bJP_RMSD
=FALSE
,bReadMat
,bReadTraj
;
1029 int method
,ncluster
=0;
1030 static const char *methodname
[] = {
1031 NULL
, "linkage", "jarvis-patrick","monte-carlo",
1032 "diagonalization", "gromos", NULL
1034 enum { m_null
, m_linkage
, m_jarvis_patrick
,
1035 m_monte_carlo
, m_diagonalize
, m_gromos
, m_nr
};
1036 /* Set colors for plotting: white = zero RMS, black = maximum */
1037 static t_rgb rlo_top
= { 1.0, 1.0, 1.0 };
1038 static t_rgb rhi_top
= { 0.0, 0.0, 0.0 };
1039 static t_rgb rlo_bot
= { 1.0, 1.0, 1.0 };
1040 static t_rgb rhi_bot
= { 0.0, 0.0, 1.0 };
1041 static int nlevels
=40,skip
=1;
1042 static real scalemax
=-1.0,rmsdcut
=0.1,rmsmin
=0.0;
1043 static bool bRMSdist
=FALSE
,bBinary
=FALSE
,bAverage
=FALSE
,bFit
=TRUE
;
1044 static int niter
=10000,seed
=1993,write_ncl
=0,write_nst
=1,minstruct
=1;
1045 static real kT
=1e-3;
1046 static int M
=10,P
=3;
1049 { "-dista", FALSE
, etBOOL
, {&bRMSdist
},
1050 "Use RMSD of distances instead of RMS deviation" },
1051 { "-nlevels",FALSE
,etINT
, {&nlevels
},
1052 "Discretize RMSD matrix in # levels" },
1053 { "-cutoff",FALSE
, etREAL
, {&rmsdcut
},
1054 "RMSD cut-off (nm) for two structures to be neighbor" },
1055 { "-fit", FALSE
, etBOOL
, {&bFit
},
1056 "Use least squares fitting before RMSD calculation" },
1057 { "-max", FALSE
, etREAL
, {&scalemax
},
1058 "Maximum level in RMSD matrix" },
1059 { "-skip", FALSE
, etINT
, {&skip
},
1060 "Only analyze every nr-th frame" },
1061 { "-av", FALSE
, etBOOL
, {&bAverage
},
1062 "Write average iso middle structure for each cluster" },
1063 { "-wcl", FALSE
, etINT
, {&write_ncl
},
1064 "Write all structures for first # clusters to numbered files" },
1065 { "-nst", FALSE
, etINT
, {&write_nst
},
1066 "Only write all structures if more than # per cluster" },
1067 { "-rmsmin",FALSE
, etREAL
, {&rmsmin
},
1068 "minimum rms difference with rest of cluster for writing structures" },
1069 { "-method",FALSE
, etENUM
, {methodname
},
1070 "Method for cluster determination" },
1071 { "-minstruct", FALSE
, etINT
, {&minstruct
},
1072 "Minimum number of structures in cluster for coloring in the xpm file" },
1073 { "-binary",FALSE
, etBOOL
, {&bBinary
},
1074 "Treat the RMSD matrix as consisting of 0 and 1, where the cut-off "
1075 "is given by -cutoff" },
1076 { "-M", FALSE
, etINT
, {&M
},
1077 "Number of nearest neighbors considered for Jarvis-Patrick algorithm, "
1078 "0 is use cutoff" },
1079 { "-P", FALSE
, etINT
, {&P
},
1080 "Number of identical nearest neighbors required to form a cluster" },
1081 { "-seed", FALSE
, etINT
, {&seed
},
1082 "Random number seed for Monte Carlo clustering algorithm" },
1083 { "-niter", FALSE
, etINT
, {&niter
},
1084 "Number of iterations for MC" },
1085 { "-kT", FALSE
, etREAL
, {&kT
},
1086 "Boltzmann weighting factor for Monte Carlo optimization "
1087 "(zero turns off uphill steps)" }
1090 { efTRX
, "-f", NULL
, ffOPTRD
},
1091 { efTPS
, "-s", NULL
, ffOPTRD
},
1092 { efNDX
, NULL
, NULL
, ffOPTRD
},
1093 { efXPM
, "-dm", "rmsd", ffOPTRD
},
1094 { efXPM
, "-o", "rmsd-clust", ffWRITE
},
1095 { efLOG
, "-g", "cluster", ffWRITE
},
1096 { efXVG
, "-dist", "rmsd-dist", ffOPTWR
},
1097 { efXVG
, "-ev", "rmsd-eig", ffOPTWR
},
1098 { efXVG
, "-sz", "clust-size", ffOPTWR
},
1099 { efXPM
, "-tr", "clust-trans",ffOPTWR
},
1100 { efXVG
, "-ntr", "clust-trans",ffOPTWR
},
1101 { efXVG
, "-clid", "clust-id.xvg",ffOPTWR
},
1102 { efTRX
, "-cl", "clusters.pdb", ffOPTWR
}
1104 #define NFILE asize(fnm)
1106 CopyRight(stderr
,argv
[0]);
1107 parse_common_args(&argc
,argv
,
1108 PCA_CAN_VIEW
| PCA_CAN_TIME
| PCA_TIME_UNIT
| PCA_BE_NICE
,
1109 NFILE
,fnm
,asize(pa
),pa
,asize(desc
),desc
,0,NULL
,
1113 bReadMat
= opt2bSet("-dm",NFILE
,fnm
);
1114 bReadTraj
= opt2bSet("-f",NFILE
,fnm
) || !bReadMat
;
1115 if ( opt2parg_bSet("-av",asize(pa
),pa
) ||
1116 opt2parg_bSet("-wcl",asize(pa
),pa
) ||
1117 opt2parg_bSet("-nst",asize(pa
),pa
) ||
1118 opt2parg_bSet("-rmsmin",asize(pa
),pa
) ||
1119 opt2bSet("-cl",NFILE
,fnm
) )
1120 trx_out_fn
= opt2fn("-cl",NFILE
,fnm
);
1123 if (bReadMat
&& output_env_get_time_factor(oenv
)!=1) {
1125 "\nWarning: assuming the time unit in %s is %s\n",
1126 opt2fn("-dm",NFILE
,fnm
),output_env_get_time_unit(oenv
));
1128 if (trx_out_fn
&& !bReadTraj
)
1129 fprintf(stderr
,"\nWarning: "
1130 "cannot write cluster structures without reading trajectory\n"
1131 " ignoring option -cl %s\n", trx_out_fn
);
1134 while ( method
< m_nr
&& strcasecmp(methodname
[0], methodname
[method
])!=0 )
1137 gmx_fatal(FARGS
,"Invalid method");
1139 bAnalyze
= (method
== m_linkage
|| method
== m_jarvis_patrick
||
1140 method
== m_gromos
);
1143 log
= ftp2FILE(efLOG
,NFILE
,fnm
,"w");
1145 fprintf(stderr
,"Using %s method for clustering\n",methodname
[0]);
1146 fprintf(log
,"Using %s method for clustering\n",methodname
[0]);
1148 /* check input and write parameters to log file */
1149 bUseRmsdCut
= FALSE
;
1150 if (method
== m_jarvis_patrick
) {
1151 bJP_RMSD
= (M
== 0) || opt2parg_bSet("-cutoff",asize(pa
),pa
);
1152 if ((M
<0) || (M
== 1))
1153 gmx_fatal(FARGS
,"M (%d) must be 0 or larger than 1",M
);
1155 sprintf(buf1
,"Will use P=%d and RMSD cutoff (%g)",P
,rmsdcut
);
1159 gmx_fatal(FARGS
,"Number of neighbors required (P) must be less than M");
1161 sprintf(buf1
,"Will use P=%d, M=%d and RMSD cutoff (%g)",P
,M
,rmsdcut
);
1164 sprintf(buf1
,"Will use P=%d, M=%d",P
,M
);
1166 ffprintf1(stderr
,log
,buf
,"%s for determining the neighbors\n\n",buf1
);
1167 } else /* method != m_jarvis */
1168 bUseRmsdCut
= ( bBinary
|| method
== m_linkage
|| method
== m_gromos
);
1169 if (bUseRmsdCut
&& method
!= m_jarvis_patrick
)
1170 fprintf(log
,"Using RMSD cutoff %g nm\n",rmsdcut
);
1171 if ( method
==m_monte_carlo
)
1172 fprintf(log
,"Using %d iterations\n",niter
);
1175 gmx_fatal(FARGS
,"skip (%d) should be >= 1",skip
);
1179 /* don't read mass-database as masses (and top) are not used */
1180 read_tps_conf(ftp2fn(efTPS
,NFILE
,fnm
),buf
,&top
,&ePBC
,&xtps
,NULL
,box
,
1183 fprintf(stderr
,"\nSelect group for least squares fit%s:\n",
1184 bReadMat
?"":" and RMSD calculation");
1185 get_index(&(top
.atoms
),ftp2fn_null(efNDX
,NFILE
,fnm
),
1186 1,&ifsize
,&fitidx
,&grpname
);
1188 fprintf(stderr
,"\nSelect group for output:\n");
1189 get_index(&(top
.atoms
),ftp2fn_null(efNDX
,NFILE
,fnm
),
1190 1,&iosize
,&outidx
,&grpname
);
1191 /* merge and convert both index groups: */
1192 /* first copy outidx to index. let outidx refer to elements in index */
1195 for(i
=0; i
<iosize
; i
++) {
1199 /* now lookup elements from fitidx in index, add them if necessary
1200 and also let fitidx refer to elements in index */
1201 for(i
=0; i
<ifsize
; i
++) {
1203 while (j
<isize
&& index
[j
]!=fitidx
[i
])
1206 /* slow this way, but doesn't matter much */
1208 srenew(index
,isize
);
1213 } else { /* !trx_out_fn */
1216 for(i
=0; i
<ifsize
; i
++) {
1222 /* Initiate arrays */
1225 for(i
=0; (i
<isize
); i
++) {
1231 /* Loop over first coordinate file */
1232 fn
= opt2fn("-f",NFILE
,fnm
);
1234 xx
= read_whole_trj(fn
,isize
,index
,skip
,&nf
,&time
,oenv
);
1235 output_env_conv_times(oenv
, nf
, time
);
1236 if (!bRMSdist
|| bAnalyze
) {
1237 /* Center all frames on zero */
1239 for(i
=0; i
<ifsize
; i
++)
1240 mass
[fitidx
[i
]] = top
.atoms
.atom
[index
[fitidx
[i
]]].m
;
1243 reset_x(ifsize
,fitidx
,isize
,NULL
,xx
[i
],mass
);
1247 fprintf(stderr
,"Reading rms distance matrix ");
1248 read_xpm_matrix(opt2fn("-dm",NFILE
,fnm
),&readmat
);
1249 fprintf(stderr
,"\n");
1250 if (readmat
[0].nx
!= readmat
[0].ny
)
1251 gmx_fatal(FARGS
,"Matrix (%dx%d) is not square",
1252 readmat
[0].nx
,readmat
[0].ny
);
1253 if (bReadTraj
&& bAnalyze
&& (readmat
[0].nx
!= nf
))
1254 gmx_fatal(FARGS
,"Matrix size (%dx%d) does not match the number of "
1255 "frames (%d)",readmat
[0].nx
,readmat
[0].ny
,nf
);
1259 time
= readmat
[0].axis_x
;
1260 time_invfac
= output_env_get_time_invfactor(oenv
);
1262 time
[i
] *= time_invfac
;
1264 rms
= init_mat(readmat
[0].nx
,method
== m_diagonalize
);
1265 convert_mat(&(readmat
[0]),rms
);
1267 nlevels
= readmat
[0].nmap
;
1268 } else { /* !bReadMat */
1269 rms
= init_mat(nf
,method
== m_diagonalize
);
1270 nrms
= (nf
*(nf
-1))/2;
1272 fprintf(stderr
,"Computing %dx%d RMS deviation matrix\n",nf
,nf
);
1274 for(i1
=0; (i1
<nf
); i1
++) {
1275 for(i2
=i1
+1; (i2
<nf
); i2
++) {
1276 for(i
=0; i
<isize
; i
++)
1277 copy_rvec(xx
[i1
][i
],x1
[i
]);
1279 do_fit(isize
,mass
,xx
[i2
],x1
);
1280 rmsd
= rmsdev(isize
,mass
,xx
[i2
],x1
);
1281 set_mat_entry(rms
,i1
,i2
,rmsd
);
1284 fprintf(stderr
,"\r# RMSD calculations left: %d ",nrms
);
1286 } else { /* bRMSdist */
1287 fprintf(stderr
,"Computing %dx%d RMS distance deviation matrix\n",nf
,nf
);
1288 for(i1
=0; (i1
<nf
); i1
++) {
1289 calc_dist(isize
,xx
[i1
],d1
);
1290 for(i2
=i1
+1; (i2
<nf
); i2
++) {
1291 calc_dist(isize
,xx
[i2
],d2
);
1292 set_mat_entry(rms
,i1
,i2
,rms_dist(isize
,d1
,d2
));
1295 fprintf(stderr
,"\r# RMSD calculations left: %d ",nrms
);
1298 fprintf(stderr
,"\n\n");
1300 ffprintf2(stderr
,log
,buf
,"The RMSD ranges from %g to %g nm\n",
1301 rms
->minrms
,rms
->maxrms
);
1302 ffprintf1(stderr
,log
,buf
,"Average RMSD is %g\n",2*rms
->sumrms
/(nf
*(nf
-1)));
1303 ffprintf1(stderr
,log
,buf
,"Number of structures for matrix %d\n",nf
);
1304 ffprintf1(stderr
,log
,buf
,"Energy of the matrix is %g nm\n",mat_energy(rms
));
1305 if (bUseRmsdCut
&& (rmsdcut
< rms
->minrms
|| rmsdcut
> rms
->maxrms
) )
1306 fprintf(stderr
,"WARNING: rmsd cutoff %g is outside range of rmsd values "
1307 "%g to %g\n",rmsdcut
,rms
->minrms
,rms
->maxrms
);
1308 if (bAnalyze
&& (rmsmin
< rms
->minrms
) )
1309 fprintf(stderr
,"WARNING: rmsd minimum %g is below lowest rmsd value %g\n",
1310 rmsmin
,rms
->minrms
);
1311 if (bAnalyze
&& (rmsmin
> rmsdcut
) )
1312 fprintf(stderr
,"WARNING: rmsd minimum %g is above rmsd cutoff %g\n",
1315 /* Plot the rmsd distribution */
1316 rmsd_distribution(opt2fn("-dist",NFILE
,fnm
),rms
,oenv
);
1319 for(i1
=0; (i1
< nf
); i1
++)
1320 for(i2
=0; (i2
< nf
); i2
++)
1321 if (rms
->mat
[i1
][i2
] < rmsdcut
)
1322 rms
->mat
[i1
][i2
] = 0;
1324 rms
->mat
[i1
][i2
] = 1;
1330 /* Now sort the matrix and write it out again */
1331 gather(rms
,rmsdcut
,&clust
);
1334 /* Do a diagonalization */
1337 memcpy(tmp
,rms
->mat
[0],nf
*nf
*sizeof(real
));
1338 eigensolver(tmp
,nf
,0,nf
,eigval
,rms
->mat
[0]);
1341 fp
= xvgropen(opt2fn("-ev",NFILE
,fnm
),"RMSD matrix Eigenvalues",
1342 "Eigenvector index","Eigenvalues (nm\\S2\\N)",oenv
);
1343 for(i
=0; (i
<nf
); i
++)
1344 fprintf(fp
,"%10d %10g\n",i
,eigval
[i
]);
1348 mc_optimize(log
,rms
,niter
,&seed
,kT
);
1352 case m_jarvis_patrick
:
1353 jarvis_patrick(rms
->nn
,rms
->mat
,M
,P
,bJP_RMSD
? rmsdcut
: -1,&clust
);
1356 gromos(rms
->nn
,rms
->mat
,rmsdcut
,&clust
);
1359 gmx_fatal(FARGS
,"DEATH HORROR unknown method \"%s\"",methodname
[0]);
1362 if (method
== m_monte_carlo
|| method
== m_diagonalize
)
1363 fprintf(stderr
,"Energy of the matrix after clustering is %g nm\n",
1367 if (minstruct
> 1) {
1368 ncluster
= plot_clusters(nf
,rms
->mat
,&clust
,nlevels
,minstruct
);
1370 mark_clusters(nf
,rms
->mat
,rms
->maxrms
,&clust
);
1372 init_t_atoms(&useatoms
,isize
,FALSE
);
1373 snew(usextps
, isize
);
1374 useatoms
.resinfo
= top
.atoms
.resinfo
;
1375 for(i
=0; i
<isize
; i
++) {
1376 useatoms
.atomname
[i
]=top
.atoms
.atomname
[index
[i
]];
1377 useatoms
.atom
[i
].resind
= top
.atoms
.atom
[index
[i
]].resind
;
1378 useatoms
.nres
= max(useatoms
.nres
,useatoms
.atom
[i
].resind
+1);
1379 copy_rvec(xtps
[index
[i
]],usextps
[i
]);
1382 analyze_clusters(nf
,&clust
,rms
->mat
,isize
,&useatoms
,usextps
,mass
,xx
,time
,
1383 ifsize
,fitidx
,iosize
,outidx
,
1384 bReadTraj
?trx_out_fn
:NULL
,
1385 opt2fn_null("-sz",NFILE
,fnm
),
1386 opt2fn_null("-tr",NFILE
,fnm
),
1387 opt2fn_null("-ntr",NFILE
,fnm
),
1388 opt2fn_null("-clid",NFILE
,fnm
),
1389 bAverage
, write_ncl
, write_nst
, rmsmin
, bFit
, log
,
1390 rlo_bot
,rhi_bot
,oenv
);
1394 if (bBinary
&& !bAnalyze
)
1395 /* Make the clustering visible */
1396 for(i2
=0; (i2
< nf
); i2
++)
1397 for(i1
=i2
+1; (i1
< nf
); i1
++)
1398 if (rms
->mat
[i1
][i2
])
1399 rms
->mat
[i1
][i2
] = rms
->maxrms
;
1401 fp
= opt2FILE("-o",NFILE
,fnm
,"w");
1402 fprintf(stderr
,"Writing rms distance/clustering matrix ");
1404 write_xpm(fp
,0,readmat
[0].title
,readmat
[0].legend
,readmat
[0].label_x
,
1405 readmat
[0].label_y
,nf
,nf
,readmat
[0].axis_x
,readmat
[0].axis_y
,
1406 rms
->mat
,0.0,rms
->maxrms
,rlo_top
,rhi_top
,&nlevels
);
1409 sprintf(buf
,"Time (%s)",output_env_get_time_unit(oenv
));
1410 sprintf(title
,"RMS%sDeviation / Cluster Index",
1411 bRMSdist
? " Distance " : " ");
1412 if (minstruct
> 1) {
1413 write_xpm_split(fp
,0,title
,"RMSD (nm)",buf
,buf
,
1414 nf
,nf
,time
,time
,rms
->mat
,0.0,rms
->maxrms
,&nlevels
,
1415 rlo_top
,rhi_top
,0.0,(real
) ncluster
,
1416 &ncluster
,TRUE
,rlo_bot
,rhi_bot
);
1418 write_xpm(fp
,0,title
,"RMSD (nm)",buf
,buf
,
1419 nf
,nf
,time
,time
,rms
->mat
,0.0,rms
->maxrms
,
1420 rlo_top
,rhi_top
,&nlevels
);
1423 fprintf(stderr
,"\n");
1426 /* now show what we've done */
1427 do_view(oenv
,opt2fn("-o",NFILE
,fnm
),"-nxy");
1428 do_view(oenv
,opt2fn_null("-sz",NFILE
,fnm
),"-nxy");
1429 if (method
== m_diagonalize
)
1430 do_view(oenv
,opt2fn_null("-ev",NFILE
,fnm
),"-nxy");
1431 do_view(oenv
,opt2fn("-dist",NFILE
,fnm
),"-nxy");
1433 do_view(oenv
,opt2fn_null("-tr",NFILE
,fnm
),"-nxy");
1434 do_view(oenv
,opt2fn_null("-ntr",NFILE
,fnm
),"-nxy");
1435 do_view(oenv
,opt2fn_null("-clid",NFILE
,fnm
),"-nxy");
1438 /* Thank the user for her patience */