changed reading hint
[gromacs/adressmacs.git] / src / tools / do_dssp.c
bloba4268039df68ad30c80148eedba7713dc6374082
1 /*
2 * $Id$
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 2.0
12 * Copyright (c) 1991-1999
13 * BIOSON Research Institute, Dept. of Biophysical Chemistry
14 * University of Groningen, The Netherlands
16 * Please refer to:
17 * GROMACS: A message-passing parallel molecular dynamics implementation
18 * H.J.C. Berendsen, D. van der Spoel and R. van Drunen
19 * Comp. Phys. Comm. 91, 43-56 (1995)
21 * Also check out our WWW page:
22 * http://md.chem.rug.nl/~gmx
23 * or e-mail to:
24 * gromacs@chem.rug.nl
26 * And Hey:
27 * GRowing Old MAkes el Chrono Sweat
29 static char *SRCID_do_dssp_c = "$Id$";
31 #include "sysstuff.h"
32 #include "typedefs.h"
33 #include "string2.h"
34 #include "strdb.h"
35 #include "macros.h"
36 #include "smalloc.h"
37 #include "mshift.h"
38 #include "statutil.h"
39 #include "copyrite.h"
40 #include "pdbio.h"
41 #include "fatal.h"
42 #include "xvgr.h"
43 #include "matio.h"
44 #include "rdgroup.h"
45 #include "gstat.h"
46 #include "tpxio.h"
48 #ifdef MY_DSSP
49 extern void dssp_main(bool bDoAcc, bool bVerbose);
50 extern FILE *tapein, *tapeout;
51 #endif
53 static void strip_dssp(char *dsspfile,int nres,
54 bool bPhobres[],real t,real dt,
55 real *acc,FILE *fTArea,
56 t_matrix *mat,int average_area[])
58 static bool bFirst=TRUE;
59 static char *ssbuf;
60 #ifndef MY_DSSP
61 FILE *tapeout;
62 #endif
63 static int xsize,frame;
64 char buf[STRLEN+1];
65 char SSTP;
66 int i,nr,iacc;
67 real iaccf,iaccb;
68 t_xpmelmt c;
70 #ifndef MY_DSSP
71 tapeout=ffopen(dsspfile,"r");
72 #endif
74 /* Skip header */
75 do {
76 fgets2(buf,STRLEN,tapeout);
77 } while (strstr(buf,"KAPPA") == NULL);
78 if (bFirst) {
79 snew(ssbuf,nres+10);
82 iaccb=iaccf=0;
83 for(nr=0; (fgets2(buf,STRLEN,tapeout) != NULL); nr++) {
84 SSTP=buf[16]==' ' ? '~' : buf[16];
85 ssbuf[nr]=SSTP;
87 buf[39]='\0';
88 sscanf(&(buf[34]),"%d",&iacc);
89 acc[nr]=iacc;
90 average_area[nr]+=iacc;
91 if (bPhobres[nr])
92 iaccb+=iacc;
93 else
94 iaccf+=iacc;
96 ssbuf[nr]='\0';
98 if (bFirst) {
99 sprintf(mat->title,"Secondary structure");
100 mat->legend[0]=0;
101 sprintf(mat->label_x,"Time (ps)");
102 sprintf(mat->label_y,"Residue");
103 mat->bDiscrete=TRUE;
104 mat->ny=nr;
105 snew(mat->axis_y,nr);
106 for(i=0; i<nr; i++)
107 mat->axis_y[i]=i+1;
108 mat->axis_x=NULL;
109 mat->matrix=NULL;
110 xsize=0;
111 frame=0;
112 bFirst=FALSE;
114 if (frame>=xsize) {
115 xsize+=10;
116 srenew(mat->axis_x,xsize);
117 srenew(mat->matrix,xsize);
119 mat->axis_x[frame]=t;
120 snew(mat->matrix[frame],nr);
121 c.c2=0;
122 for(i=0; i<nr; i++) {
123 c.c1=ssbuf[i];
124 mat->matrix[frame][i]=searchcmap(mat->nmap,mat->map,c);
126 frame++;
127 mat->nx=frame;
129 if (fTArea)
130 fprintf(fTArea,"%10g %10g %10g\n",t,0.01*iaccb,0.01*iaccf);
131 #ifndef MY_DSSP
132 fclose(tapeout);
133 #endif
136 bool *bPhobics(t_atoms *atoms)
138 int i,nb;
139 char **cb;
140 bool *bb;
142 nb=get_strings("phbres.dat",&cb);
143 snew(bb,atoms->nres);
145 for(i=0; (i<atoms->nres); i++) {
146 if (search_str(nb,cb,*atoms->resname[i]) != -1)
147 bb[i]=TRUE;
149 return bb;
152 static void check_oo(t_atoms *atoms)
154 static char *OOO="O";
155 int i;
157 for(i=0; (i<atoms->nr); i++) {
158 if (strcmp(*(atoms->atomname[i]),"OXT")==0)
159 atoms->atomname[i]=&OOO;
160 else if (strcmp(*(atoms->atomname[i]),"O1")==0)
161 atoms->atomname[i]=&OOO;
165 static void norm_acc(t_atoms *atoms, int nres,
166 real av_area[], real norm_av_area[])
168 int i,n,n_surf;
170 char surffn[]="surface.dat";
171 char **surf_res, **surf_lines;
172 double *surf;
174 n_surf = get_lines(surffn, &surf_lines);
175 snew(surf, n_surf);
176 snew(surf_res, n_surf);
177 for(i=0; (i<n_surf); i++) {
178 snew(surf_res[i], 5);
179 sscanf(surf_lines[i],"%s %lf",surf_res[i],&surf[i]);
182 for(i=0; (i<nres); i++) {
183 n = search_str(n_surf,surf_res,*atoms->resname[i]);
184 if ( n != -1)
185 norm_av_area[i] = av_area[i] / surf[n];
186 else
187 fprintf(stderr,"Residue %s not found in surface database (%s)\n",
188 *atoms->resname[i],surffn);
192 void prune_ss_legend(t_matrix *mat)
194 bool *present;
195 int *newnum;
196 int i,r,f,newnmap;
197 t_mapping *newmap;
199 snew(present,mat->nmap);
200 snew(newnum,mat->nmap);
202 for(f=0; f<mat->nx; f++)
203 for(r=0; r<mat->ny; r++)
204 present[mat->matrix[f][r]]=TRUE;
206 newnmap=0;
207 newmap=NULL;
208 for (i=0; i<mat->nmap; i++) {
209 newnum[i]=-1;
210 if (present[i]) {
211 newnum[i]=newnmap;
212 newnmap++;
213 srenew(newmap,newnmap);
214 newmap[newnmap-1]=mat->map[i];
217 if (newnmap!=mat->nmap) {
218 mat->nmap=newnmap;
219 mat->map=newmap;
220 for(f=0; f<mat->nx; f++)
221 for(r=0; r<mat->ny; r++)
222 mat->matrix[f][r]=newnum[mat->matrix[f][r]];
226 void write_sas_mat(char *fn,real **accr,int nframe,int nres,t_matrix *mat)
228 real lo,hi;
229 int i,j,nlev;
230 t_rgb rlo={1,1,1}, rhi={0,0,0};
231 FILE *fp;
233 if(fn) {
234 hi=lo=accr[0][0];
235 for(i=0; i<nframe; i++)
236 for(j=0; j<nres; j++) {
237 lo=min(lo,accr[i][j]);
238 hi=max(hi,accr[i][j]);
240 fp=ffopen(fn,"w");
241 nlev=hi-lo+1;
242 write_xpm(fp,"Solvent Accessible Surface","Surface (A^2)",
243 "Time","Residue Index",nframe,nres,
244 mat->axis_x,mat->axis_y,accr,lo,hi,rlo,rhi,&nlev);
245 ffclose(fp);
249 void analyse_ss(char *outfile, t_matrix *mat, char *ss_string)
251 FILE *fp;
252 t_mapping *map;
253 int s,f,r,*count,ss_count;
254 char **leg;
256 map=mat->map;
257 snew(count,mat->nmap);
258 snew(leg,mat->nmap+1);
259 leg[0]="Structure";
260 for(s=0; s<mat->nmap; s++)
261 leg[s+1]=map[s].desc;
263 fp=xvgropen(outfile,"Secondary Structure",
264 mat->label_x,"Number of Residues");
265 fprintf(fp,"@ subtitle \"Structure = ");
266 for(s=0; s<strlen(ss_string); s++) {
267 if (s>0)
268 fprintf(fp," + ");
269 for(f=0; f<mat->nmap; f++)
270 if (ss_string[s]==map[f].code.c1)
271 fprintf(fp,map[f].desc);
273 fprintf(fp,"\"\n");
274 xvgr_legend(fp,mat->nmap+1,leg);
276 for(f=0; f<mat->nx; f++) {
277 ss_count=0;
278 for(s=0; s<mat->nmap; s++)
279 count[s]=0;
280 for(r=0; r<mat->ny; r++)
281 count[mat->matrix[f][r]]++;
282 for(s=0; s<mat->nmap; s++) {
283 if (strchr(ss_string,map[s].code.c1))
284 ss_count+=count[s];
286 fprintf(fp,"%8g %5d",mat->axis_x[f],ss_count);
287 for(s=0; s<mat->nmap; s++)
288 fprintf(fp," %5d",count[s]);
289 fprintf(fp,"\n");
292 fclose(fp);
293 sfree(leg);
294 sfree(count);
297 int main(int argc,char *argv[])
299 static char *desc[] = {
300 #ifdef MY_DSSP
301 "my_dssp ",
302 #else
303 "do_dssp ",
304 #endif
305 "reads a trajectory file and computes the secondary structure for",
306 "each time frame (or every [TT]-dt[tt] ps) by",
307 #ifdef MY_DSSP
308 "using the dssp program.[PAR]",
309 #else
310 "calling the dssp program. If you do not have the dssp program,",
311 "get it. do_dssp assumes that the dssp executable is in",
312 "/home/mdgroup/dssp/dssp. If that is not the case, then you should",
313 "set an environment variable [BB]DSSP[bb] pointing to the dssp",
314 "executable as in: [PAR]",
315 "[TT]setenv DSSP /usr/local/bin/dssp[tt][PAR]",
316 #endif
317 "The structure assignment for each residue and time is written to an",
318 "[TT].xpm[tt] matrix file. This file can be visualized with for instance",
319 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
320 "The number of residues with each secondary structure type and the",
321 "total secondary structure ([TT]-sss[tt]) count as a function of",
322 "time are also written to file ([TT]-sc[tt]).[PAR]",
323 "Solvent accessible surface per residue can be calculated, both in",
324 "absolute values (A^2) and in fractions of the maximal accessible",
325 "surface of a residue. The maximal accessible surface is defined as",
326 "the accessible surface of a residue in a chain of glycines.",
328 static real dt=0.0;
329 static bool bVerbose;
330 static char *ss_string="HEBT";
331 t_pargs pa[] = {
332 { "-v", FALSE, etBOOL, {&bVerbose},
333 "HIDDENGenerate miles of useless information" },
334 { "-dt", FALSE, etREAL, {&dt},
335 "Only analyze a frame each dt picoseconds" },
336 { "-sss", FALSE, etSTR, {&ss_string},
337 "Secondary structures for structure count"}
340 #ifndef MY_DSSP
341 static char *bugs[] = {
342 "The program is very slow"
344 #endif
346 int status;
347 #ifndef MY_DSSP
348 FILE *tapein;
349 #endif
350 FILE *ss,*acc,*fTArea;
351 char *fnSCount,*fnArea,*fnTArea,*fnAArea;
352 char *leg[] = { "Phobic", "Phylic" };
353 t_topology top;
354 t_atoms *atoms;
355 t_matrix mat;
356 int nres,nr0,naccr;
357 bool *bPhbres,bDoAccSurf;
358 real t,nt;
359 int i,j,natoms,nframe=0;
360 matrix box;
361 int gnx;
362 char *grpnm;
363 atom_id *index;
364 rvec *xp,*x;
365 int *average_area;
366 real **accr,*av_area, *norm_av_area;
367 char pdbfile[L_tmpnam],tmpfile[L_tmpnam],title[256];
368 #ifdef MY_DSSP
369 #define MAXBUF 1000000
370 char inbuf[MAXBUF],outbuf[MAXBUF];
371 #else
372 char dssp[256],*dptr;
373 #endif
375 t_filenm fnm[] = {
376 { efTRX, "-f", NULL, ffREAD },
377 { efTPS, NULL, NULL, ffREAD },
378 { efNDX, NULL, NULL, ffOPTRD },
379 { efMAP, "-map", "ss", ffLIBRD },
380 { efXPM, "-o", "ss", ffWRITE },
381 { efXVG, "-sc", "scount", ffWRITE },
382 { efXPM, "-a", "area", ffOPTWR },
383 { efXVG, "-ta", "totarea", ffOPTWR },
384 { efXVG, "-aa", "averarea",ffOPTWR }
386 #define NFILE asize(fnm)
388 CopyRight(stdout,argv[0]);
389 parse_common_args(&argc,argv,PCA_CAN_TIME | PCA_CAN_VIEW,TRUE,
390 NFILE,fnm,asize(pa),pa,asize(desc),desc,
391 #ifdef MY_DSSP
392 0,NULL
393 #else
394 asize(bugs),bugs
395 #endif
397 fnSCount= opt2fn("-sc",NFILE,fnm);
398 fnArea = opt2fn_null("-a", NFILE,fnm);
399 fnTArea = opt2fn_null("-ta",NFILE,fnm);
400 fnAArea = opt2fn_null("-aa",NFILE,fnm);
401 bDoAccSurf=(fnArea || fnTArea || fnAArea);
403 read_tps_conf(ftp2fn(efTPS,NFILE,fnm),title,&top,&xp,NULL,box,FALSE);
404 atoms=&(top.atoms);
405 check_oo(atoms);
406 bPhbres=bPhobics(atoms);
408 get_index(atoms,ftp2fn_null(efNDX,NFILE,fnm),1,&gnx,&index,&grpnm);
409 nres=0;
410 nr0=-1;
411 for(i=0; (i<gnx); i++) {
412 if (atoms->atom[index[i]].resnr != nr0) {
413 nr0=atoms->atom[index[i]].resnr;
414 nres++;
417 fprintf(stderr,"There are %d residues in your selected group\n",nres);
419 (void) tmpnam(pdbfile);
420 (void) tmpnam(tmpfile);
421 #ifdef MY_DSSP
422 /* Open all files read-write */
423 tapein=ffopen(pdbfile,"w+");
424 setvbuf(tapein,inbuf,_IOFBF,MAXBUF);
425 tapeout=ffopen(tmpfile,"w+");
426 setvbuf(tapeout,outbuf,_IOFBF,MAXBUF);
427 #else
428 if ((dptr=getenv("DSSP")) == NULL)
429 dptr="/home/mdgroup/dssp/dssp";
430 if (!fexist(dptr))
431 fatal_error(0,"DSSP executable (%s) does not exist (use setenv DSSP)",
432 dptr);
433 sprintf(dssp,"%s %s %s %s > /dev/null %s",
434 dptr,bDoAccSurf?"":"-na",pdbfile,tmpfile,bVerbose?"":"2> /dev/null");
435 if (bVerbose)
436 fprintf(stderr,"dssp cmd='%s'\n",dssp);
437 #endif
439 if (fnTArea) {
440 fTArea=xvgropen(fnTArea,"Solvent Accessible Surface Area",
441 "Time (ps)","Area (nm\\S2\\N)");
442 xvgr_legend(fTArea,2,leg);
443 } else
444 fTArea=NULL;
446 mat.map=NULL;
447 mat.nmap=getcmap(libopen(opt2fn("-map",NFILE,fnm)),
448 opt2fn("-map",NFILE,fnm),&(mat.map));
450 natoms=read_first_x(&status,ftp2fn(efTRX,NFILE,fnm),&t,&x,box);
451 if (natoms > atoms->nr)
452 fatal_error(0,"\nTrajectory does not match topology!");
453 if (gnx > natoms)
454 fatal_error(0,"\nTrajectory does not match selected group!");
456 snew(average_area,atoms->nres+10);
457 snew(av_area,atoms->nres+10);
458 snew(norm_av_area,atoms->nres+10);
459 accr=NULL;
460 naccr=0;
461 nt=t;
462 do {
463 if (t >= nt) {
464 if (nframe>=naccr) {
465 naccr+=10;
466 srenew(accr,naccr);
467 for(i=naccr-10; i<naccr; i++)
468 snew(accr[i],atoms->nres);
470 rm_pbc(&(top.idef),natoms,box,x,x);
471 #ifndef MY_DSSP
472 tapein=ffopen(pdbfile,"w");
473 #endif
474 hwrite_pdb_conf_indexed(tapein,NULL,atoms,x,box,gnx,index);
475 #ifdef MY_DSSP
476 rewind(tapein);
477 dssp_main(bDoAccSurf,bVerbose);
478 rewind(tapein);
479 rewind(tapeout);
480 #else
481 fclose(tapein);
482 system(dssp);
483 #endif
484 strip_dssp(tmpfile,nres,bPhbres,t,dt,
485 accr[nframe],fTArea,&mat,average_area);
486 #ifdef MY_DSSP
487 rewind(tapeout);
488 #else
489 remove(tmpfile);
490 remove(pdbfile);
491 #endif
492 nt+=dt;
493 nframe++;
495 } while(read_next_x(status,&t,natoms,x,box));
496 fprintf(stderr,"\n");
497 close_trj(status);
498 if (fTArea)
499 ffclose(fTArea);
501 prune_ss_legend(&mat);
503 ss=opt2FILE("-o",NFILE,fnm,"w");
504 write_xpm_m(ss,mat);
505 ffclose(ss);
507 analyse_ss(fnSCount,&mat,ss_string);
509 if (bDoAccSurf) {
510 write_sas_mat(fnArea,accr,nframe,nres,&mat);
512 for(i=0; i<atoms->nres; i++)
513 av_area[i] = (average_area[i] / (real)nframe);
515 norm_acc(atoms, nres, av_area, norm_av_area);
517 if (fnAArea) {
518 acc=xvgropen(fnAArea,"Average Accessible Area",
519 "Residue","A\\S2");
520 for(i=0; (i<nres); i++)
521 fprintf(acc,"%5d %10g %10g\n",i+1,av_area[i], norm_av_area[i]);
522 ffclose(acc);
526 if (fnTArea) xvgr_file(fnTArea ,NULL);
527 if (fnAArea) xvgr_file(fnAArea ,NULL);
528 if (fnSCount) xvgr_file(fnSCount,NULL);
530 thanx(stdout);
532 return 0;