2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
44 #include "gromacs/commandline/pargs.h"
45 #include "gromacs/commandline/viewit.h"
46 #include "gromacs/fileio/confio.h"
47 #include "gromacs/fileio/matio.h"
48 #include "gromacs/fileio/pdbio.h"
49 #include "gromacs/fileio/trxio.h"
50 #include "gromacs/fileio/xvgr.h"
51 #include "gromacs/gmxana/gmx_ana.h"
52 #include "gromacs/gmxana/gstat.h"
53 #include "gromacs/pbcutil/rmpbc.h"
54 #include "gromacs/topology/index.h"
55 #include "gromacs/topology/topology.h"
56 #include "gromacs/utility/arraysize.h"
57 #include "gromacs/utility/cstringutil.h"
58 #include "gromacs/utility/dir_separator.h"
59 #include "gromacs/utility/fatalerror.h"
60 #include "gromacs/utility/futil.h"
61 #include "gromacs/utility/smalloc.h"
62 #include "gromacs/utility/strdb.h"
64 static int strip_dssp(char *dsspfile
, int nres
,
65 const gmx_bool bPhobres
[], real t
,
66 real
*acc
, FILE *fTArea
,
67 t_matrix
*mat
, int average_area
[],
68 const gmx_output_env_t
*oenv
)
70 static gmx_bool bFirst
= TRUE
;
73 static int xsize
, frame
;
76 int i
, nr
, iacc
, nresidues
;
77 int naccf
, naccb
; /* Count hydrophobic and hydrophilic residues */
81 tapeout
= gmx_ffopen(dsspfile
, "r");
86 fgets2(buf
, STRLEN
, tapeout
);
88 while (std::strstr(buf
, "KAPPA") == nullptr);
91 /* Since we also have empty lines in the dssp output (temp) file,
92 * and some line content is saved to the ssbuf variable,
93 * we need more memory than just nres elements. To be shure,
94 * we allocate 2*nres-1, since for each chain there is a
95 * separating line in the temp file. (At most each residue
96 * could have been defined as a separate chain.) */
97 snew(ssbuf
, 2*nres
-1);
104 for (nr
= 0; (fgets2(buf
, STRLEN
, tapeout
) != nullptr); nr
++)
106 if (buf
[13] == '!') /* Chain separator line has '!' at pos. 13 */
108 SSTP
= '='; /* Chain separator sign '=' */
112 SSTP
= buf
[16] == ' ' ? '~' : buf
[16];
118 /* Only calculate solvent accessible area if needed */
119 if ((nullptr != acc
) && (buf
[13] != '!'))
121 sscanf(&(buf
[34]), "%d", &iacc
);
123 /* average_area and bPhobres are counted from 0...nres-1 */
124 average_area
[nresidues
] += iacc
;
125 if (bPhobres
[nresidues
])
135 /* Keep track of the residue number (do not count chain separator lines '!') */
145 fprintf(stderr
, "%d residues were classified as hydrophobic and %d as hydrophilic.\n", naccb
, naccf
);
148 sprintf(mat
->title
, "Secondary structure");
150 sprintf(mat
->label_x
, "%s", output_env_get_time_label(oenv
).c_str());
151 sprintf(mat
->label_y
, "Residue");
152 mat
->bDiscrete
= TRUE
;
154 snew(mat
->axis_y
, nr
);
155 for (i
= 0; i
< nr
; i
++)
157 mat
->axis_y
[i
] = i
+1;
159 mat
->axis_x
= nullptr;
160 mat
->matrix
= nullptr;
168 srenew(mat
->axis_x
, xsize
);
169 srenew(mat
->matrix
, xsize
);
171 mat
->axis_x
[frame
] = t
;
172 snew(mat
->matrix
[frame
], nr
);
174 for (i
= 0; i
< nr
; i
++)
177 mat
->matrix
[frame
][i
] = std::max(static_cast<t_matelmt
>(0), searchcmap(mat
->nmap
, mat
->map
, c
));
184 fprintf(fTArea
, "%10g %10g %10g\n", t
, 0.01*iaccb
, 0.01*iaccf
);
186 gmx_ffclose(tapeout
);
188 /* Return the number of lines found in the dssp file (i.e. number
189 * of redidues plus chain separator lines).
190 * This is the number of y elements needed for the area xpm file */
194 static gmx_bool
*bPhobics(t_atoms
*atoms
)
201 nb
= get_lines("phbres.dat", &cb
);
202 snew(bb
, atoms
->nres
);
204 for (i
= 0; (i
< atoms
->nres
); i
++)
206 if (-1 != search_str(nb
, cb
, *atoms
->resinfo
[i
].name
) )
214 static void check_oo(t_atoms
*atoms
)
220 OOO
= gmx_strdup("O");
222 for (i
= 0; (i
< atoms
->nr
); i
++)
224 if (std::strcmp(*(atoms
->atomname
[i
]), "OXT") == 0)
226 *atoms
->atomname
[i
] = OOO
;
228 else if (std::strcmp(*(atoms
->atomname
[i
]), "O1") == 0)
230 *atoms
->atomname
[i
] = OOO
;
232 else if (std::strcmp(*(atoms
->atomname
[i
]), "OC1") == 0)
234 *atoms
->atomname
[i
] = OOO
;
239 static void norm_acc(t_atoms
*atoms
, int nres
,
240 const real av_area
[], real norm_av_area
[])
244 char surffn
[] = "surface.dat";
245 char **surf_res
, **surf_lines
;
248 n_surf
= get_lines(surffn
, &surf_lines
);
250 snew(surf_res
, n_surf
);
251 for (i
= 0; (i
< n_surf
); i
++)
253 snew(surf_res
[i
], 5);
254 sscanf(surf_lines
[i
], "%s %lf", surf_res
[i
], &surf
[i
]);
257 for (i
= 0; (i
< nres
); i
++)
259 n
= search_str(n_surf
, surf_res
, *atoms
->resinfo
[i
].name
);
262 norm_av_area
[i
] = av_area
[i
] / surf
[n
];
266 fprintf(stderr
, "Residue %s not found in surface database (%s)\n",
267 *atoms
->resinfo
[i
].name
, surffn
);
272 static void prune_ss_legend(t_matrix
*mat
)
276 int i
, r
, f
, newnmap
;
279 snew(present
, mat
->nmap
);
280 snew(newnum
, mat
->nmap
);
282 for (f
= 0; f
< mat
->nx
; f
++)
284 for (r
= 0; r
< mat
->ny
; r
++)
286 present
[mat
->matrix
[f
][r
]] = TRUE
;
292 for (i
= 0; i
< mat
->nmap
; i
++)
299 srenew(newmap
, newnmap
);
300 newmap
[newnmap
-1] = mat
->map
[i
];
303 if (newnmap
!= mat
->nmap
)
307 for (f
= 0; f
< mat
->nx
; f
++)
309 for (r
= 0; r
< mat
->ny
; r
++)
311 mat
->matrix
[f
][r
] = newnum
[mat
->matrix
[f
][r
]];
317 static void write_sas_mat(const char *fn
, real
**accr
, int nframe
, int nres
, t_matrix
*mat
)
321 t_rgb rlo
= {1, 1, 1}, rhi
= {0, 0, 0};
326 hi
= lo
= accr
[0][0];
327 for (i
= 0; i
< nframe
; i
++)
329 for (j
= 0; j
< nres
; j
++)
331 lo
= std::min(lo
, accr
[i
][j
]);
332 hi
= std::max(hi
, accr
[i
][j
]);
335 fp
= gmx_ffopen(fn
, "w");
336 nlev
= static_cast<int>(hi
-lo
+1);
337 write_xpm(fp
, 0, "Solvent Accessible Surface", "Surface (A^2)",
338 "Time", "Residue Index", nframe
, nres
,
339 mat
->axis_x
, mat
->axis_y
, accr
, lo
, hi
, rlo
, rhi
, &nlev
);
344 static void analyse_ss(const char *outfile
, t_matrix
*mat
, const char *ss_string
,
345 const gmx_output_env_t
*oenv
)
349 int f
, r
, *count
, *total
, ss_count
, total_count
;
354 snew(count
, mat
->nmap
);
355 snew(total
, mat
->nmap
);
356 snew(leg
, mat
->nmap
+1);
357 leg
[0] = "Structure";
358 for (s
= 0; s
< static_cast<size_t>(mat
->nmap
); s
++)
360 leg
[s
+1] = gmx_strdup(map
[s
].desc
);
363 fp
= xvgropen(outfile
, "Secondary Structure",
364 output_env_get_xvgr_tlabel(oenv
), "Number of Residues", oenv
);
365 if (output_env_get_print_xvgr_codes(oenv
))
367 fprintf(fp
, "@ subtitle \"Structure = ");
369 for (s
= 0; s
< std::strlen(ss_string
); s
++)
375 for (f
= 0; f
< mat
->nmap
; f
++)
377 if (ss_string
[s
] == map
[f
].code
.c1
)
379 fprintf(fp
, "%s", map
[f
].desc
);
384 xvgr_legend(fp
, mat
->nmap
+1, leg
, oenv
);
387 for (s
= 0; s
< static_cast<size_t>(mat
->nmap
); s
++)
391 for (f
= 0; f
< mat
->nx
; f
++)
394 for (s
= 0; s
< static_cast<size_t>(mat
->nmap
); s
++)
398 for (r
= 0; r
< mat
->ny
; r
++)
400 count
[mat
->matrix
[f
][r
]]++;
401 total
[mat
->matrix
[f
][r
]]++;
403 for (s
= 0; s
< static_cast<size_t>(mat
->nmap
); s
++)
405 if (std::strchr(ss_string
, map
[s
].code
.c1
))
407 ss_count
+= count
[s
];
408 total_count
+= count
[s
];
411 fprintf(fp
, "%8g %5d", mat
->axis_x
[f
], ss_count
);
412 for (s
= 0; s
< static_cast<size_t>(mat
->nmap
); s
++)
414 fprintf(fp
, " %5d", count
[s
]);
418 /* now print column totals */
419 fprintf(fp
, "%-8s %5d", "# Totals", total_count
);
420 for (s
= 0; s
< static_cast<size_t>(mat
->nmap
); s
++)
422 fprintf(fp
, " %5d", total
[s
]);
426 /* now print probabilities */
427 fprintf(fp
, "%-8s %5.2f", "# SS pr.", total_count
/ static_cast<real
>(mat
->nx
* mat
->ny
));
428 for (s
= 0; s
< static_cast<size_t>(mat
->nmap
); s
++)
430 fprintf(fp
, " %5.2f", total
[s
] / static_cast<real
>(mat
->nx
* mat
->ny
));
439 int gmx_do_dssp(int argc
, char *argv
[])
441 const char *desc
[] = {
443 "reads a trajectory file and computes the secondary structure for",
445 "calling the dssp program. If you do not have the dssp program,",
446 "get it from http://swift.cmbi.ru.nl/gv/dssp. [THISMODULE] assumes ",
447 "that the dssp executable is located in ",
448 "[TT]/usr/local/bin/dssp[tt]. If this is not the case, then you should",
449 "set an environment variable [TT]DSSP[tt] pointing to the dssp",
450 "executable, e.g.: [PAR]",
451 "[TT]setenv DSSP /opt/dssp/bin/dssp[tt][PAR]",
452 "Since version 2.0.0, dssp is invoked with a syntax that differs",
453 "from earlier versions. If you have an older version of dssp,",
454 "use the [TT]-ver[tt] option to direct do_dssp to use the older syntax.",
455 "By default, do_dssp uses the syntax introduced with version 2.0.0.",
456 "Even newer versions (which at the time of writing are not yet released)",
457 "are assumed to have the same syntax as 2.0.0.[PAR]",
458 "The structure assignment for each residue and time is written to an",
459 "[REF].xpm[ref] matrix file. This file can be visualized with for instance",
460 "[TT]xv[tt] and can be converted to postscript with [TT]xpm2ps[tt].",
461 "Individual chains are separated by light grey lines in the [REF].xpm[ref] and",
463 "The number of residues with each secondary structure type and the",
464 "total secondary structure ([TT]-sss[tt]) count as a function of",
465 "time are also written to file ([TT]-sc[tt]).[PAR]",
466 "Solvent accessible surface (SAS) per residue can be calculated, both in",
467 "absolute values (A^2) and in fractions of the maximal accessible",
468 "surface of a residue. The maximal accessible surface is defined as",
469 "the accessible surface of a residue in a chain of glycines.",
470 "[BB]Note[bb] that the program [gmx-sas] can also compute SAS",
471 "and that is more efficient.[PAR]",
472 "Finally, this program can dump the secondary structure in a special file",
473 "[TT]ssdump.dat[tt] for usage in the program [gmx-chi]. Together",
474 "these two programs can be used to analyze dihedral properties as a",
475 "function of secondary structure type."
477 static gmx_bool bVerbose
;
478 static const char *ss_string
= "HEBT";
479 static int dsspVersion
= 2;
481 { "-v", FALSE
, etBOOL
, {&bVerbose
},
482 "HIDDENGenerate miles of useless information" },
483 { "-sss", FALSE
, etSTR
, {&ss_string
},
484 "Secondary structures for structure count"},
485 { "-ver", FALSE
, etINT
, {&dsspVersion
},
486 "DSSP major version. Syntax changed with version 2"}
491 FILE *ss
, *acc
, *fTArea
, *tmpf
;
492 const char *fnSCount
, *fnArea
, *fnTArea
, *fnAArea
;
493 const char *leg
[] = { "Phobic", "Phylic" };
498 int nres
, nr0
, naccr
, nres_plus_separators
;
499 gmx_bool
*bPhbres
, bDoAccSurf
;
501 int i
, j
, natoms
, nframe
= 0;
504 char *grpnm
, *ss_str
;
508 real
**accr
, *accr_ptr
= nullptr, *av_area
, *norm_av_area
;
509 char pdbfile
[32], tmpfile
[32];
512 gmx_output_env_t
*oenv
;
513 gmx_rmpbc_t gpbc
= nullptr;
516 { efTRX
, "-f", nullptr, ffREAD
},
517 { efTPS
, nullptr, nullptr, ffREAD
},
518 { efNDX
, nullptr, nullptr, ffOPTRD
},
519 { efDAT
, "-ssdump", "ssdump", ffOPTWR
},
520 { efMAP
, "-map", "ss", ffLIBRD
},
521 { efXPM
, "-o", "ss", ffWRITE
},
522 { efXVG
, "-sc", "scount", ffWRITE
},
523 { efXPM
, "-a", "area", ffOPTWR
},
524 { efXVG
, "-ta", "totarea", ffOPTWR
},
525 { efXVG
, "-aa", "averarea", ffOPTWR
}
527 #define NFILE asize(fnm)
529 if (!parse_common_args(&argc
, argv
,
530 PCA_CAN_TIME
| PCA_CAN_VIEW
| PCA_TIME_UNIT
,
531 NFILE
, fnm
, asize(pa
), pa
, asize(desc
), desc
, 0, nullptr, &oenv
))
535 fnSCount
= opt2fn("-sc", NFILE
, fnm
);
536 fnArea
= opt2fn_null("-a", NFILE
, fnm
);
537 fnTArea
= opt2fn_null("-ta", NFILE
, fnm
);
538 fnAArea
= opt2fn_null("-aa", NFILE
, fnm
);
539 bDoAccSurf
= ((fnArea
!= nullptr) || (fnTArea
!= nullptr) || (fnAArea
!= nullptr));
541 read_tps_conf(ftp2fn(efTPS
, NFILE
, fnm
), &top
, &ePBC
, &xp
, nullptr, box
, FALSE
);
542 atoms
= &(top
.atoms
);
544 bPhbres
= bPhobics(atoms
);
546 get_index(atoms
, ftp2fn_null(efNDX
, NFILE
, fnm
), 1, &gnx
, &index
, &grpnm
);
549 for (i
= 0; (i
< gnx
); i
++)
551 if (atoms
->atom
[index
[i
]].resind
!= nr0
)
553 nr0
= atoms
->atom
[index
[i
]].resind
;
557 fprintf(stderr
, "There are %d residues in your selected group\n", nres
);
559 std::strcpy(pdbfile
, "ddXXXXXX");
561 if ((tmpf
= fopen(pdbfile
, "w")) == nullptr)
563 sprintf(pdbfile
, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR
, DIR_SEPARATOR
);
565 if ((tmpf
= fopen(pdbfile
, "w")) == nullptr)
567 gmx_fatal(FARGS
, "Can not open tmp file %s", pdbfile
);
575 std::strcpy(tmpfile
, "ddXXXXXX");
577 if ((tmpf
= fopen(tmpfile
, "w")) == nullptr)
579 sprintf(tmpfile
, "%ctmp%cfilterXXXXXX", DIR_SEPARATOR
, DIR_SEPARATOR
);
581 if ((tmpf
= fopen(tmpfile
, "w")) == nullptr)
583 gmx_fatal(FARGS
, "Can not open tmp file %s", tmpfile
);
591 if ((dptr
= getenv("DSSP")) == nullptr)
593 dptr
= "/usr/local/bin/dssp";
595 if (!gmx_fexist(dptr
))
597 gmx_fatal(FARGS
, "DSSP executable (%s) does not exist (use setenv DSSP)",
600 if (dsspVersion
>= 2)
604 printf("\nWARNING: You use DSSP version %d, which is not explicitly\nsupported by do_dssp. Assuming version 2 syntax.\n\n", dsspVersion
);
607 sprintf(dssp
, "%s -i %s -o %s > /dev/null %s",
608 dptr
, pdbfile
, tmpfile
, bVerbose
? "" : "2> /dev/null");
612 sprintf(dssp
, "%s %s %s %s > /dev/null %s",
613 dptr
, bDoAccSurf
? "" : "-na", pdbfile
, tmpfile
, bVerbose
? "" : "2> /dev/null");
616 fprintf(stderr
, "dssp cmd='%s'\n", dssp
);
620 fTArea
= xvgropen(fnTArea
, "Solvent Accessible Surface Area",
621 output_env_get_xvgr_tlabel(oenv
), "Area (nm\\S2\\N)", oenv
);
622 xvgr_legend(fTArea
, 2, leg
, oenv
);
630 mat
.nmap
= readcmap(opt2fn("-map", NFILE
, fnm
), &(mat
.map
));
632 natoms
= read_first_x(oenv
, &status
, ftp2fn(efTRX
, NFILE
, fnm
), &t
, &x
, box
);
633 if (natoms
> atoms
->nr
)
635 gmx_fatal(FARGS
, "\nTrajectory does not match topology!");
639 gmx_fatal(FARGS
, "\nTrajectory does not match selected group!");
642 snew(average_area
, atoms
->nres
);
643 snew(av_area
, atoms
->nres
);
644 snew(norm_av_area
, atoms
->nres
);
648 gpbc
= gmx_rmpbc_init(&top
.idef
, ePBC
, natoms
);
651 t
= output_env_conv_time(oenv
, t
);
652 if (bDoAccSurf
&& nframe
>= naccr
)
656 for (i
= naccr
-10; i
< naccr
; i
++)
658 snew(accr
[i
], 2*atoms
->nres
-1);
661 gmx_rmpbc(gpbc
, natoms
, box
, x
);
662 tapein
= gmx_ffopen(pdbfile
, "w");
663 write_pdbfile_indexed(tapein
, nullptr, atoms
, x
, ePBC
, box
, ' ', -1, gnx
, index
, nullptr, TRUE
, FALSE
);
666 if (0 != system(dssp
))
668 gmx_fatal(FARGS
, "Failed to execute command: %s\n"
669 "Try specifying your dssp version with the -ver option.", dssp
);
672 /* strip_dssp returns the number of lines found in the dssp file, i.e.
673 * the number of residues plus the separator lines */
677 accr_ptr
= accr
[nframe
];
680 nres_plus_separators
= strip_dssp(tmpfile
, nres
, bPhbres
, t
,
681 accr_ptr
, fTArea
, &mat
, average_area
, oenv
);
686 while (read_next_x(oenv
, status
, &t
, x
, box
));
687 fprintf(stderr
, "\n");
693 gmx_rmpbc_done(gpbc
);
695 prune_ss_legend(&mat
);
697 ss
= opt2FILE("-o", NFILE
, fnm
, "w");
699 write_xpm_m(ss
, mat
);
702 if (opt2bSet("-ssdump", NFILE
, fnm
))
704 ss
= opt2FILE("-ssdump", NFILE
, fnm
, "w");
705 snew(ss_str
, nres
+1);
706 fprintf(ss
, "%d\n", nres
);
707 for (j
= 0; j
< mat
.nx
; j
++)
709 for (i
= 0; (i
< mat
.ny
); i
++)
711 ss_str
[i
] = mat
.map
[mat
.matrix
[j
][i
]].code
.c1
;
714 fprintf(ss
, "%s\n", ss_str
);
719 analyse_ss(fnSCount
, &mat
, ss_string
, oenv
);
723 write_sas_mat(fnArea
, accr
, nframe
, nres_plus_separators
, &mat
);
725 for (i
= 0; i
< atoms
->nres
; i
++)
727 av_area
[i
] = (average_area
[i
] / static_cast<real
>(nframe
));
730 norm_acc(atoms
, nres
, av_area
, norm_av_area
);
734 acc
= xvgropen(fnAArea
, "Average Accessible Area",
735 "Residue", "A\\S2", oenv
);
736 for (i
= 0; (i
< nres
); i
++)
738 fprintf(acc
, "%5d %10g %10g\n", i
+1, av_area
[i
], norm_av_area
[i
]);
744 view_all(oenv
, NFILE
, fnm
);