minor fixes in ditribution files
[gromacs/qmmm-gamess-us.git] / src / tools / anadock.c
blobdf9a1c7703eabbb817c51b2fa4a77018af0a4c18
1 /*
2 *
3 * This source code is part of
4 *
5 * G R O M A C S
6 *
7 * GROningen MAchine for Chemical Simulations
8 *
9 * VERSION 3.2.0
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
32 * And Hey:
33 * Green Red Orange Magenta Azure Cyan Skyblue
35 #ifdef HAVE_CONFIG_H
36 #include <config.h>
37 #endif
39 #include "confio.h"
40 #include "copyrite.h"
41 #include "futil.h"
42 #include "gmx_fatal.h"
43 #include "smalloc.h"
44 #include "string2.h"
45 #include "vec.h"
46 #include "gmx_statistics.h"
47 #include "statutil.h"
48 #include "typedefs.h"
49 #include "xvgr.h"
51 static const char *etitles[] = { "E-docked", "Free Energy" };
53 typedef struct {
54 real edocked,efree;
55 int index,cluster_id;
56 t_atoms atoms;
57 rvec *x;
58 int ePBC;
59 matrix box;
60 } t_pdbfile;
62 static t_pdbfile *read_pdbf(const char *fn)
64 t_pdbfile *pdbf;
65 double e;
66 char buf[256],*ptr;
67 int natoms;
68 FILE *fp;
70 snew(pdbf,1);
71 get_stx_coordnum (fn,&natoms);
72 init_t_atoms(&(pdbf->atoms),natoms,FALSE);
73 snew(pdbf->x,natoms);
74 read_stx_conf(fn,buf,&pdbf->atoms,pdbf->x,NULL,&pdbf->ePBC,pdbf->box);
75 fp = ffopen(fn,"r");
76 do {
77 ptr = fgets2(buf,255,fp);
78 if (ptr) {
79 if (strstr(buf,"Intermolecular") != NULL) {
80 ptr = strchr(buf,'=');
81 sscanf(ptr+1,"%lf",&e);
82 pdbf->edocked = e;
84 else if (strstr(buf,"Estimated Free") != NULL) {
85 ptr = strchr(buf,'=');
86 sscanf(ptr+1,"%lf",&e);
87 pdbf->efree = e;
90 } while (ptr != NULL);
91 ffclose(fp);
93 return pdbf;
96 static t_pdbfile **read_em_all(const char *fn,int *npdbf)
98 t_pdbfile **pdbf=0;
99 int i,maxpdbf;
100 char buf[256],name[256];
101 bool bExist;
103 strcpy(buf,fn);
104 buf[strlen(buf)-4] = '\0';
105 maxpdbf = 100;
106 snew(pdbf,maxpdbf);
107 i=0;
108 do {
109 sprintf(name,"%s_%d.pdb",buf,i+1);
110 if ((bExist = gmx_fexist(name)) == TRUE) {
111 pdbf[i] = read_pdbf(name);
112 pdbf[i]->index = i+1;
113 i++;
114 if (i >= maxpdbf) {
115 maxpdbf += 100;
116 srenew(pdbf,maxpdbf);
119 } while (bExist);
121 *npdbf = i;
123 printf("Found %d pdb files\n",i);
125 return pdbf;
128 static bool bFreeSort=FALSE;
130 static int pdbf_comp(const void *a,const void *b)
132 t_pdbfile *pa,*pb;
133 real x;
134 int dc;
136 pa = *(t_pdbfile **)a;
137 pb = *(t_pdbfile **)b;
139 dc = pa->cluster_id - pb->cluster_id;
141 if (dc == 0) {
142 if (bFreeSort)
143 x = pa->efree - pb->efree;
144 else
145 x = pa->edocked - pb->edocked;
147 if (x < 0)
148 return -1;
149 else if (x > 0)
150 return 1;
151 else
152 return 0;
154 else
155 return dc;
158 static void analyse_em_all(int npdb,t_pdbfile *pdbf[], const char *edocked,
159 const char *efree, const output_env_t oenv)
161 FILE *fp;
162 int i;
164 for(bFreeSort = FALSE; (bFreeSort <= TRUE); bFreeSort++) {
165 qsort(pdbf,npdb,sizeof(pdbf[0]),pdbf_comp);
166 fp = xvgropen(bFreeSort ? efree : edocked,
167 etitles[bFreeSort],"()","E (kJ/mol)",oenv);
168 for(i=0; (i<npdb); i++)
169 fprintf(fp,"%12lf\n",bFreeSort ? pdbf[i]->efree : pdbf[i]->edocked);
170 ffclose(fp);
174 static void clust_stat(FILE *fp,int start,int end,t_pdbfile *pdbf[])
176 int i;
177 gmx_stats_t ed,ef;
178 real aver,sigma;
180 ed = gmx_stats_init();
181 ef = gmx_stats_init();
182 for(i=start; (i<end); i++) {
183 gmx_stats_add_point(ed,i-start,pdbf[i]->edocked,0,0);
184 gmx_stats_add_point(ef,i-start,pdbf[i]->efree,0,0);
186 gmx_stats_get_ase(ed,&aver,&sigma,NULL);
187 fprintf(fp," <%12s> = %8.3f (+/- %6.3f)\n",etitles[FALSE],aver,sigma);
188 gmx_stats_get_ase(ef,&aver,&sigma,NULL);
189 fprintf(fp," <%12s> = %8.3f (+/- %6.3f)\n",etitles[TRUE],aver,sigma);
190 gmx_stats_done(ed);
191 gmx_stats_done(ef);
192 sfree(ed);
193 sfree(ef);
196 static real rmsd_dist(t_pdbfile *pa,t_pdbfile *pb,bool bRMSD)
198 int i;
199 real rmsd,dist;
200 rvec acm,bcm,dx;
202 if (bRMSD) {
203 rmsd = 0;
204 for(i=0; (i<pa->atoms.nr); i++) {
205 rvec_sub(pa->x[i],pb->x[i],dx);
206 rmsd += iprod(dx,dx);
208 rmsd = sqrt(rmsd/pa->atoms.nr);
210 else {
211 dist = 0;
212 clear_rvec(acm);
213 clear_rvec(bcm);
214 for(i=0; (i<pa->atoms.nr); i++) {
215 rvec_inc(acm,pa->x[i]);
216 rvec_inc(bcm,pb->x[i]);
218 rvec_sub(acm,bcm,dx);
219 for(i=0; (i<DIM); i++)
220 dx[i] /= pa->atoms.nr;
221 rmsd = norm(dx);
223 return rmsd;
226 static void line(FILE *fp)
228 fprintf(fp," - - - - - - - - - -\n");
231 static void cluster_em_all(FILE *fp,int npdb,t_pdbfile *pdbf[],
232 const char *pdbout,bool bFree,bool bRMSD,real cutoff)
234 int i,j,k;
235 int *cndx,ncluster;
236 real rmsd;
238 bFreeSort = bFree;
239 qsort(pdbf,npdb,sizeof(pdbf[0]),pdbf_comp);
241 fprintf(fp,"Statistics over all structures:\n");
242 clust_stat(fp,0,npdb,pdbf);
243 line(fp);
245 /* Index to first structure in a cluster */
246 snew(cndx,npdb);
247 ncluster=1;
249 for(i=1; (i<npdb); i++) {
250 for(j=0; (j<ncluster); j++) {
251 rmsd = rmsd_dist(pdbf[cndx[j]],pdbf[i],bRMSD);
252 if (rmsd <= cutoff) {
253 /* Structure i is in cluster j */
254 pdbf[i]->cluster_id = pdbf[cndx[j]]->cluster_id;
255 break;
258 if (j == ncluster) {
259 /* New cluster! Cool! */
260 cndx[ncluster] = i;
261 pdbf[i]->cluster_id = ncluster;
262 ncluster++;
265 cndx[ncluster] = npdb;
266 qsort(pdbf,npdb,sizeof(pdbf[0]),pdbf_comp);
268 j=0;
269 cndx[j++] = 0;
270 for(i=1; (i<npdb); i++) {
271 if (pdbf[i]->cluster_id != pdbf[i-1]->cluster_id) {
272 cndx[j++] = i;
275 cndx[j] = npdb;
276 if (j != ncluster)
277 gmx_fatal(FARGS,"Consistency error: j = %d, ncluster = %d",j,ncluster);
279 fprintf(fp,"I found %d clusters based on %s and %s with a %.3f nm cut-off\n",
280 ncluster,etitles[bFree],bRMSD ? "RMSD" : "distance",cutoff);
281 line(fp);
282 for(j=0; (j<ncluster); j++) {
283 fprintf(fp,"Cluster: %3d %s: %10.5f kJ/mol %3d elements\n",
284 j,etitles[bFree],
285 bFree ? pdbf[cndx[j]]->efree : pdbf[cndx[j]]->edocked,
286 cndx[j+1]-cndx[j]);
287 clust_stat(fp,cndx[j],cndx[j+1],pdbf);
288 for(k=cndx[j]; (k<cndx[j+1]); k++)
289 fprintf(fp," %3d",pdbf[k]->index);
290 fprintf(fp,"\n");
291 line(fp);
293 sfree(cndx);
296 int main(int argc,char *argv[])
298 const char *desc[] = {
299 "anadock analyses the results of an Autodock run and clusters the",
300 "structures together, based on distance or RMSD. The docked energy",
301 "and free energy estimates are analysed, and for each cluster the",
302 "energy statistics are printed.[PAR]",
303 "An alternative approach to this is to cluster the structures first",
304 "(using [TT]g_cluster[tt] and then sort the clusters on either lowest",
305 "energy or average energy."
307 t_filenm fnm[] = {
308 { efPDB, "-f", NULL, ffREAD },
309 { efPDB, "-ox", "cluster", ffWRITE },
310 { efXVG, "-od", "edocked", ffWRITE },
311 { efXVG, "-of", "efree", ffWRITE },
312 { efLOG, "-g", "anadock", ffWRITE }
314 output_env_t oenv;
315 #define NFILE asize(fnm)
316 static bool bFree=FALSE,bRMS=TRUE;
317 static real cutoff = 0.2;
318 t_pargs pa[] = {
319 { "-free", FALSE, etBOOL, {&bFree},
320 "Use Free energy estimate from autodock for sorting the classes" },
321 { "-rms", FALSE, etBOOL, {&bRMS},
322 "Cluster on RMS or distance" },
323 { "-cutoff", FALSE, etREAL, {&cutoff},
324 "Maximum RMSD/distance for belonging to the same cluster" }
326 #define NPA asize(pa)
328 FILE *fp;
329 t_pdbfile **pdbf=NULL;
330 int npdbf;
332 CopyRight(stderr,argv[0]);
333 parse_common_args(&argc,argv,0,NFILE,fnm,NPA,pa, asize(desc),desc,0,
334 NULL,&oenv);
336 fp = ffopen(opt2fn("-g",NFILE,fnm),"w");
337 please_cite(stdout,"Hetenyi2002b");
338 please_cite(fp,"Hetenyi2002b");
340 pdbf = read_em_all(opt2fn("-f",NFILE,fnm),&npdbf);
342 analyse_em_all(npdbf,pdbf,opt2fn("-od",NFILE,fnm),opt2fn("-of",NFILE,fnm),
343 oenv);
345 cluster_em_all(fp,npdbf,pdbf,opt2fn("-ox",NFILE,fnm),
346 bFree,bRMS,cutoff);
348 thanx(fp);
349 ffclose(fp);
351 thanx(stdout);
353 return 0;