Merge branch 'master' of git://git.gromacs.org/gromacs
[gromacs/adressmacs.git] / src / tools / do_dssp.c
blob368a8fcfcd568c2f252e2f6bfeec3bcf9ae3cd5b
1 /*
2 *
3 * This source code is part of
4 *
5 * G R O M A C S
6 *
7 * GROningen MAchine for Chemical Simulations
8 *
9 * VERSION 3.2.0
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
32 * And Hey:
33 * Green Red Orange Magenta Azure Cyan Skyblue
35 #ifdef HAVE_CONFIG_H
36 #include <config.h>
37 #endif
39 #include "sysstuff.h"
40 #include "typedefs.h"
41 #include "string2.h"
42 #include "strdb.h"
43 #include "macros.h"
44 #include "smalloc.h"
45 #include "mshift.h"
46 #include "statutil.h"
47 #include "copyrite.h"
48 #include "pdbio.h"
49 #include "gmx_fatal.h"
50 #include "xvgr.h"
51 #include "matio.h"
52 #include "index.h"
53 #include "gstat.h"
54 #include "tpxio.h"
55 #include "viewit.h"
57 static int strip_dssp(char *dsspfile,int nres,
58 gmx_bool bPhobres[],real t,
59 real *acc,FILE *fTArea,
60 t_matrix *mat,int average_area[],
61 const output_env_t oenv)
63 static gmx_bool bFirst=TRUE;
64 static char *ssbuf;
65 FILE *tapeout;
66 static int xsize,frame;
67 char buf[STRLEN+1];
68 char SSTP;
69 int i,nr,iacc,nresidues;
70 int naccf,naccb; /* Count hydrophobic and hydrophilic residues */
71 real iaccf,iaccb;
72 t_xpmelmt c;
74 tapeout=ffopen(dsspfile,"r");
76 /* Skip header */
77 do {
78 fgets2(buf,STRLEN,tapeout);
79 } while (strstr(buf,"KAPPA") == NULL);
80 if (bFirst) {
81 /* Since we also have empty lines in the dssp output (temp) file,
82 * and some line content is saved to the ssbuf variable,
83 * we need more memory than just nres elements. To be shure,
84 * we allocate 2*nres-1, since for each chain there is a
85 * separating line in the temp file. (At most each residue
86 * could have been defined as a separate chain.) */
87 snew(ssbuf,2*nres-1);
90 iaccb=iaccf=0;
91 nresidues = 0;
92 naccf = 0;
93 naccb = 0;
94 for(nr=0; (fgets2(buf,STRLEN,tapeout) != NULL); nr++) {
95 if (buf[13] == '!') /* Chain separator line has '!' at pos. 13 */
96 SSTP='='; /* Chain separator sign '=' */
97 else
98 SSTP=buf[16]==' ' ? '~' : buf[16];
99 ssbuf[nr]=SSTP;
101 buf[39]='\0';
103 /* Only calculate solvent accessible area if needed */
104 if ((NULL != acc) && (buf[13] != '!'))
106 sscanf(&(buf[34]),"%d",&iacc);
107 acc[nr]=iacc;
108 /* average_area and bPhobres are counted from 0...nres-1 */
109 average_area[nresidues]+=iacc;
110 if (bPhobres[nresidues])
112 naccb++;
113 iaccb+=iacc;
115 else
117 naccf++;
118 iaccf+=iacc;
120 /* Keep track of the residue number (do not count chain separator lines '!') */
121 nresidues++;
124 ssbuf[nr]='\0';
126 if (bFirst) {
127 if (0 != acc)
128 fprintf(stderr, "%d residues were classified as hydrophobic and %d as hydrophilic.\n", naccb, naccf);
130 sprintf(mat->title,"Secondary structure");
131 mat->legend[0]=0;
132 sprintf(mat->label_x,"%s",output_env_get_time_label(oenv));
133 sprintf(mat->label_y,"Residue");
134 mat->bDiscrete=TRUE;
135 mat->ny=nr;
136 snew(mat->axis_y,nr);
137 for(i=0; i<nr; i++)
138 mat->axis_y[i]=i+1;
139 mat->axis_x=NULL;
140 mat->matrix=NULL;
141 xsize=0;
142 frame=0;
143 bFirst=FALSE;
145 if (frame>=xsize) {
146 xsize+=10;
147 srenew(mat->axis_x,xsize);
148 srenew(mat->matrix,xsize);
150 mat->axis_x[frame]=t;
151 snew(mat->matrix[frame],nr);
152 c.c2=0;
153 for(i=0; i<nr; i++) {
154 c.c1=ssbuf[i];
155 mat->matrix[frame][i] = max(0,searchcmap(mat->nmap,mat->map,c));
157 frame++;
158 mat->nx=frame;
160 if (fTArea)
161 fprintf(fTArea,"%10g %10g %10g\n",t,0.01*iaccb,0.01*iaccf);
162 ffclose(tapeout);
164 /* Return the number of lines found in the dssp file (i.e. number
165 * of redidues plus chain separator lines).
166 * This is the number of y elements needed for the area xpm file */
167 return nr;
170 gmx_bool *bPhobics(t_atoms *atoms)
172 int i,nb;
173 char **cb;
174 gmx_bool *bb;
176 nb=get_strings("phbres.dat",&cb);
177 snew(bb,atoms->nres);
179 for(i=0; (i<atoms->nres); i++) {
180 if (search_str(nb,cb,*atoms->resinfo[i].name) != -1)
181 bb[i]=TRUE;
183 return bb;
186 static void check_oo(t_atoms *atoms)
188 char *OOO;
190 int i;
192 OOO=strdup("O");
194 for(i=0; (i<atoms->nr); i++) {
195 if (strcmp(*(atoms->atomname[i]),"OXT")==0)
196 *atoms->atomname[i]=OOO;
197 else if (strcmp(*(atoms->atomname[i]),"O1")==0)
198 *atoms->atomname[i]=OOO;
199 else if (strcmp(*(atoms->atomname[i]),"OC1")==0)
200 *atoms->atomname[i]=OOO;
204 static void norm_acc(t_atoms *atoms, int nres,
205 real av_area[], real norm_av_area[])
207 int i,n,n_surf;
209 char surffn[]="surface.dat";
210 char **surf_res, **surf_lines;
211 double *surf;
213 n_surf = get_lines(surffn, &surf_lines);
214 snew(surf, n_surf);
215 snew(surf_res, n_surf);
216 for(i=0; (i<n_surf); i++) {
217 snew(surf_res[i], 5);
218 sscanf(surf_lines[i],"%s %lf",surf_res[i],&surf[i]);
221 for(i=0; (i<nres); i++) {
222 n = search_str(n_surf,surf_res,*atoms->resinfo[i].name);
223 if ( n != -1)
224 norm_av_area[i] = av_area[i] / surf[n];
225 else
226 fprintf(stderr,"Residue %s not found in surface database (%s)\n",
227 *atoms->resinfo[i].name,surffn);
231 void prune_ss_legend(t_matrix *mat)
233 gmx_bool *present;
234 int *newnum;
235 int i,r,f,newnmap;
236 t_mapping *newmap;
238 snew(present,mat->nmap);
239 snew(newnum,mat->nmap);
241 for(f=0; f<mat->nx; f++)
242 for(r=0; r<mat->ny; r++)
243 present[mat->matrix[f][r]]=TRUE;
245 newnmap=0;
246 newmap=NULL;
247 for (i=0; i<mat->nmap; i++) {
248 newnum[i]=-1;
249 if (present[i]) {
250 newnum[i]=newnmap;
251 newnmap++;
252 srenew(newmap,newnmap);
253 newmap[newnmap-1]=mat->map[i];
256 if (newnmap!=mat->nmap) {
257 mat->nmap=newnmap;
258 mat->map=newmap;
259 for(f=0; f<mat->nx; f++)
260 for(r=0; r<mat->ny; r++)
261 mat->matrix[f][r]=newnum[mat->matrix[f][r]];
265 void write_sas_mat(const char *fn,real **accr,int nframe,int nres,t_matrix *mat)
267 real lo,hi;
268 int i,j,nlev;
269 t_rgb rlo={1,1,1}, rhi={0,0,0};
270 FILE *fp;
272 if(fn) {
273 hi=lo=accr[0][0];
274 for(i=0; i<nframe; i++)
275 for(j=0; j<nres; j++) {
276 lo=min(lo,accr[i][j]);
277 hi=max(hi,accr[i][j]);
279 fp=ffopen(fn,"w");
280 nlev=hi-lo+1;
281 write_xpm(fp,0,"Solvent Accessible Surface","Surface (A^2)",
282 "Time","Residue Index",nframe,nres,
283 mat->axis_x,mat->axis_y,accr,lo,hi,rlo,rhi,&nlev);
284 ffclose(fp);
288 void analyse_ss(const char *outfile, t_matrix *mat, const char *ss_string,
289 const output_env_t oenv)
291 FILE *fp;
292 t_mapping *map;
293 int f,r,*count,ss_count;
294 size_t s;
295 const char** leg;
297 map=mat->map;
298 snew(count,mat->nmap);
299 snew(leg,mat->nmap+1);
300 leg[0]="Structure";
301 for(s=0; s<mat->nmap; s++)
302 leg[s+1]=strdup(map[s].desc);
304 fp=xvgropen(outfile,"Secondary Structure",
305 output_env_get_xvgr_tlabel(oenv),"Number of Residues",oenv);
306 if (output_env_get_print_xvgr_codes(oenv))
307 fprintf(fp,"@ subtitle \"Structure = ");
308 for(s=0; s<strlen(ss_string); s++) {
309 if (s>0)
310 fprintf(fp," + ");
311 for(f=0; f<mat->nmap; f++)
312 if (ss_string[s]==map[f].code.c1)
313 fprintf(fp,"%s",map[f].desc);
315 fprintf(fp,"\"\n");
316 xvgr_legend(fp,mat->nmap+1,leg,oenv);
318 for(f=0; f<mat->nx; f++) {
319 ss_count=0;
320 for(s=0; s<mat->nmap; s++)
321 count[s]=0;
322 for(r=0; r<mat->ny; r++)
323 count[mat->matrix[f][r]]++;
324 for(s=0; s<mat->nmap; s++) {
325 if (strchr(ss_string,map[s].code.c1))
326 ss_count+=count[s];
328 fprintf(fp,"%8g %5d",mat->axis_x[f],ss_count);
329 for(s=0; s<mat->nmap; s++)
330 fprintf(fp," %5d",count[s]);
331 fprintf(fp,"\n");
334 ffclose(fp);
335 sfree(leg);
336 sfree(count);
339 int main(int argc,char *argv[])
341 const char *desc[] = {
342 "do_dssp ",
343 "reads a trajectory file and computes the secondary structure for",
344 "each time frame ",
345 "calling the dssp program. If you do not have the dssp program,",
346 "get it. do_dssp assumes that the dssp executable is",
347 "/usr/local/bin/dssp. If this is not the case, then you should",
348 "set an environment variable [BB]DSSP[bb] pointing to the dssp",
349 "executable, e.g.: [PAR]",
350 "[TT]setenv DSSP /opt/dssp/bin/dssp[tt][PAR]",
351 "The structure assignment for each residue and time is written to an",
352 "[TT].xpm[tt] matrix file. This file can be visualized with for instance",
353 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
354 "Individual chains are separated by light grey lines in the xpm and",
355 "postscript files.",
356 "The number of residues with each secondary structure type and the",
357 "total secondary structure ([TT]-sss[tt]) count as a function of",
358 "time are also written to file ([TT]-sc[tt]).[PAR]",
359 "Solvent accessible surface (SAS) per residue can be calculated, both in",
360 "absolute values (A^2) and in fractions of the maximal accessible",
361 "surface of a residue. The maximal accessible surface is defined as",
362 "the accessible surface of a residue in a chain of glycines.",
363 "[BB]Note[bb] that the program [TT]g_sas[tt] can also compute SAS",
364 "and that is more efficient.[PAR]",
365 "Finally, this program can dump the secondary structure in a special file",
366 "[TT]ssdump.dat[tt] for usage in the program [TT]g_chi[tt]. Together",
367 "these two programs can be used to analyze dihedral properties as a",
368 "function of secondary structure type."
370 static gmx_bool bVerbose;
371 static const char *ss_string="HEBT";
372 t_pargs pa[] = {
373 { "-v", FALSE, etBOOL, {&bVerbose},
374 "HIDDENGenerate miles of useless information" },
375 { "-sss", FALSE, etSTR, {&ss_string},
376 "Secondary structures for structure count"}
379 t_trxstatus *status;
380 FILE *tapein;
381 FILE *ss,*acc,*fTArea,*tmpf;
382 const char *fnSCount,*fnArea,*fnTArea,*fnAArea;
383 const char *leg[] = { "Phobic", "Phylic" };
384 t_topology top;
385 int ePBC;
386 t_atoms *atoms;
387 t_matrix mat;
388 int nres,nr0,naccr,nres_plus_separators;
389 gmx_bool *bPhbres,bDoAccSurf;
390 real t;
391 int i,natoms,nframe=0;
392 matrix box;
393 int gnx;
394 char *grpnm,*ss_str;
395 atom_id *index;
396 rvec *xp,*x;
397 int *average_area;
398 real **accr,*accr_ptr=NULL,*av_area, *norm_av_area;
399 char pdbfile[32],tmpfile[32],title[256];
400 char dssp[256];
401 const char *dptr;
402 output_env_t oenv;
403 gmx_rmpbc_t gpbc=NULL;
405 t_filenm fnm[] = {
406 { efTRX, "-f", NULL, ffREAD },
407 { efTPS, NULL, NULL, ffREAD },
408 { efNDX, NULL, NULL, ffOPTRD },
409 { efDAT, "-ssdump", "ssdump", ffOPTWR },
410 { efMAP, "-map", "ss", ffLIBRD },
411 { efXPM, "-o", "ss", ffWRITE },
412 { efXVG, "-sc", "scount", ffWRITE },
413 { efXPM, "-a", "area", ffOPTWR },
414 { efXVG, "-ta", "totarea", ffOPTWR },
415 { efXVG, "-aa", "averarea",ffOPTWR }
417 #define NFILE asize(fnm)
419 CopyRight(stderr,argv[0]);
420 parse_common_args(&argc,argv,
421 PCA_CAN_TIME | PCA_CAN_VIEW | PCA_TIME_UNIT | PCA_BE_NICE ,
422 NFILE,fnm, asize(pa),pa, asize(desc),desc,0,NULL,&oenv);
423 fnSCount= opt2fn("-sc",NFILE,fnm);
424 fnArea = opt2fn_null("-a", NFILE,fnm);
425 fnTArea = opt2fn_null("-ta",NFILE,fnm);
426 fnAArea = opt2fn_null("-aa",NFILE,fnm);
427 bDoAccSurf=(fnArea || fnTArea || fnAArea);
429 read_tps_conf(ftp2fn(efTPS,NFILE,fnm),title,&top,&ePBC,&xp,NULL,box,FALSE);
430 atoms=&(top.atoms);
431 check_oo(atoms);
432 bPhbres=bPhobics(atoms);
434 get_index(atoms,ftp2fn_null(efNDX,NFILE,fnm),1,&gnx,&index,&grpnm);
435 nres=0;
436 nr0=-1;
437 for(i=0; (i<gnx); i++) {
438 if (atoms->atom[index[i]].resind != nr0) {
439 nr0=atoms->atom[index[i]].resind;
440 nres++;
443 fprintf(stderr,"There are %d residues in your selected group\n",nres);
445 strcpy(pdbfile,"ddXXXXXX");
446 gmx_tmpnam(pdbfile);
447 if ((tmpf = fopen(pdbfile,"w")) == NULL) {
448 sprintf(pdbfile,"%ctmp%cfilterXXXXXX",DIR_SEPARATOR,DIR_SEPARATOR);
449 gmx_tmpnam(pdbfile);
450 if ((tmpf = fopen(pdbfile,"w")) == NULL)
451 gmx_fatal(FARGS,"Can not open tmp file %s",pdbfile);
453 else
454 fclose(tmpf);
456 strcpy(tmpfile,"ddXXXXXX");
457 gmx_tmpnam(tmpfile);
458 if ((tmpf = fopen(tmpfile,"w")) == NULL) {
459 sprintf(tmpfile,"%ctmp%cfilterXXXXXX",DIR_SEPARATOR,DIR_SEPARATOR);
460 gmx_tmpnam(tmpfile);
461 if ((tmpf = fopen(tmpfile,"w")) == NULL)
462 gmx_fatal(FARGS,"Can not open tmp file %s",tmpfile);
464 else
465 fclose(tmpf);
467 if ((dptr=getenv("DSSP")) == NULL)
468 dptr="/usr/local/bin/dssp";
469 if (!gmx_fexist(dptr))
470 gmx_fatal(FARGS,"DSSP executable (%s) does not exist (use setenv DSSP)",
471 dptr);
472 sprintf(dssp,"%s %s %s %s > /dev/null %s",
473 dptr,bDoAccSurf?"":"-na",pdbfile,tmpfile,bVerbose?"":"2> /dev/null");
474 if (bVerbose)
475 fprintf(stderr,"dssp cmd='%s'\n",dssp);
477 if (fnTArea) {
478 fTArea=xvgropen(fnTArea,"Solvent Accessible Surface Area",
479 output_env_get_xvgr_tlabel(oenv),"Area (nm\\S2\\N)",oenv);
480 xvgr_legend(fTArea,2,leg,oenv);
481 } else
482 fTArea=NULL;
484 mat.map=NULL;
485 mat.nmap=getcmap(libopen(opt2fn("-map",NFILE,fnm)),
486 opt2fn("-map",NFILE,fnm),&(mat.map));
488 natoms=read_first_x(oenv,&status,ftp2fn(efTRX,NFILE,fnm),&t,&x,box);
489 if (natoms > atoms->nr)
490 gmx_fatal(FARGS,"\nTrajectory does not match topology!");
491 if (gnx > natoms)
492 gmx_fatal(FARGS,"\nTrajectory does not match selected group!");
494 snew(average_area, atoms->nres);
495 snew(av_area , atoms->nres);
496 snew(norm_av_area, atoms->nres);
497 accr=NULL;
498 naccr=0;
500 gpbc = gmx_rmpbc_init(&top.idef,ePBC,natoms,box);
501 do {
502 t = output_env_conv_time(oenv,t);
503 if (bDoAccSurf && nframe>=naccr) {
504 naccr+=10;
505 srenew(accr,naccr);
506 for(i=naccr-10; i<naccr; i++)
507 snew(accr[i],2*atoms->nres-1);
509 gmx_rmpbc(gpbc,natoms,box,x);
510 tapein=ffopen(pdbfile,"w");
511 write_pdbfile_indexed(tapein,NULL,atoms,x,ePBC,box,' ',-1,gnx,index,NULL,TRUE);
512 ffclose(tapein);
514 #ifdef GMX_NO_SYSTEM
515 printf("Warning-- No calls to system(3) supported on this platform.");
516 printf("Warning-- Skipping execution of 'system(\"%s\")'.", dssp);
517 exit(1);
518 #else
519 if(0 != system(dssp))
521 gmx_fatal(FARGS,"Failed to execute command: %s",dssp);
523 #endif
525 /* strip_dssp returns the number of lines found in the dssp file, i.e.
526 * the number of residues plus the separator lines */
528 if (bDoAccSurf)
529 accr_ptr = accr[nframe];
531 nres_plus_separators = strip_dssp(tmpfile,nres,bPhbres,t,
532 accr_ptr,fTArea,&mat,average_area,oenv);
533 remove(tmpfile);
534 remove(pdbfile);
535 nframe++;
536 } while(read_next_x(oenv,status,&t,natoms,x,box));
537 fprintf(stderr,"\n");
538 close_trj(status);
539 if (fTArea)
540 ffclose(fTArea);
541 gmx_rmpbc_done(gpbc);
543 prune_ss_legend(&mat);
545 ss=opt2FILE("-o",NFILE,fnm,"w");
546 mat.flags = 0;
547 write_xpm_m(ss,mat);
548 ffclose(ss);
550 if (opt2bSet("-ssdump",NFILE,fnm)) {
551 snew(ss_str,nres+1);
552 for(i=0; (i<nres); i++)
553 ss_str[i] = mat.map[mat.matrix[0][i]].code.c1;
554 ss_str[i] = '\0';
555 ss = opt2FILE("-ssdump",NFILE,fnm,"w");
556 fprintf(ss,"%d\n%s\n",nres,ss_str);
557 ffclose(ss);
558 sfree(ss_str);
560 analyse_ss(fnSCount,&mat,ss_string,oenv);
562 if (bDoAccSurf) {
563 write_sas_mat(fnArea,accr,nframe,nres_plus_separators,&mat);
565 for(i=0; i<atoms->nres; i++)
566 av_area[i] = (average_area[i] / (real)nframe);
568 norm_acc(atoms, nres, av_area, norm_av_area);
570 if (fnAArea) {
571 acc=xvgropen(fnAArea,"Average Accessible Area",
572 "Residue","A\\S2",oenv);
573 for(i=0; (i<nres); i++)
574 fprintf(acc,"%5d %10g %10g\n",i+1,av_area[i], norm_av_area[i]);
575 ffclose(acc);
579 view_all(oenv, NFILE, fnm);
581 thanx(stderr);
583 return 0;