Remove unused function generate_excls and make clean_excls static
[gromacs.git] / src / gromacs / gmxpreprocess / pdb2gmx.cpp
blob3c040aab0c617d349437db8ff80daf416bf4d0a2
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include "pdb2gmx.h"
41 #include <cctype>
42 #include <cstdlib>
43 #include <cstring>
44 #include <ctime>
46 #include <algorithm>
47 #include <memory>
48 #include <string>
49 #include <vector>
51 #include "gromacs/commandline/cmdlineoptionsmodule.h"
52 #include "gromacs/fileio/confio.h"
53 #include "gromacs/fileio/filetypes.h"
54 #include "gromacs/fileio/gmxfio.h"
55 #include "gromacs/fileio/pdbio.h"
56 #include "gromacs/gmxlib/conformation_utilities.h"
57 #include "gromacs/gmxpreprocess/fflibutil.h"
58 #include "gromacs/gmxpreprocess/genhydro.h"
59 #include "gromacs/gmxpreprocess/gpp_atomtype.h"
60 #include "gromacs/gmxpreprocess/grompp_impl.h"
61 #include "gromacs/gmxpreprocess/h_db.h"
62 #include "gromacs/gmxpreprocess/hizzie.h"
63 #include "gromacs/gmxpreprocess/pdb2top.h"
64 #include "gromacs/gmxpreprocess/pgutil.h"
65 #include "gromacs/gmxpreprocess/specbond.h"
66 #include "gromacs/gmxpreprocess/ter_db.h"
67 #include "gromacs/gmxpreprocess/toputil.h"
68 #include "gromacs/gmxpreprocess/xlate.h"
69 #include "gromacs/math/vec.h"
70 #include "gromacs/options/basicoptions.h"
71 #include "gromacs/options/filenameoption.h"
72 #include "gromacs/options/ioptionscontainer.h"
73 #include "gromacs/topology/atomprop.h"
74 #include "gromacs/topology/block.h"
75 #include "gromacs/topology/index.h"
76 #include "gromacs/topology/residuetypes.h"
77 #include "gromacs/topology/symtab.h"
78 #include "gromacs/topology/topology.h"
79 #include "gromacs/utility/dir_separator.h"
80 #include "gromacs/utility/exceptions.h"
81 #include "gromacs/utility/fatalerror.h"
82 #include "gromacs/utility/path.h"
83 #include "gromacs/utility/smalloc.h"
84 #include "gromacs/utility/strdb.h"
85 #include "gromacs/utility/stringutil.h"
87 #include "hackblock.h"
88 #include "resall.h"
90 struct RtpRename{
91 RtpRename(const char *newGmx, const char *newMain,
92 const char *newNter, const char *newCter,
93 const char *newBter) :
94 gmx(newGmx), main(newMain), nter(newNter), cter(newCter), bter(newBter)
96 std::string gmx;
97 std::string main;
98 std::string nter;
99 std::string cter;
100 std::string bter;
103 static const char *res2bb_notermini(const std::string &name,
104 gmx::ArrayRef<const RtpRename> rr)
106 /* NOTE: This function returns the main building block name,
107 * it does not take terminal renaming into account.
109 auto found = std::find_if(rr.begin(), rr.end(), [&name](const auto &rename)
110 { return gmx::equalCaseInsensitive(name, rename.gmx); });
111 return found != rr.end() ? found->main.c_str() : name.c_str();
114 static const char *select_res(int nr, int resnr,
115 const char *name[], const char *expl[],
116 const char *title,
117 gmx::ArrayRef<const RtpRename> rr)
119 printf("Which %s type do you want for residue %d\n", title, resnr+1);
120 for (int sel = 0; (sel < nr); sel++)
122 printf("%d. %s (%s)\n",
123 sel, expl[sel], res2bb_notermini(name[sel], rr));
125 printf("\nType a number:"); fflush(stdout);
127 int userSelection;
128 if (scanf("%d", &userSelection) != 1)
130 gmx_fatal(FARGS, "Answer me for res %s %d!", title, resnr+1);
133 return name[userSelection];
136 static const char *get_asptp(int resnr, gmx::ArrayRef<const RtpRename> rr)
138 enum {
139 easp, easpH, easpNR
141 const char *lh[easpNR] = { "ASP", "ASPH" };
142 const char *expl[easpNR] = {
143 "Not protonated (charge -1)",
144 "Protonated (charge 0)"
147 return select_res(easpNR, resnr, lh, expl, "ASPARTIC ACID", rr);
150 static const char *get_glutp(int resnr, gmx::ArrayRef<const RtpRename> rr)
152 enum {
153 eglu, egluH, egluNR
155 const char *lh[egluNR] = { "GLU", "GLUH" };
156 const char *expl[egluNR] = {
157 "Not protonated (charge -1)",
158 "Protonated (charge 0)"
161 return select_res(egluNR, resnr, lh, expl, "GLUTAMIC ACID", rr);
164 static const char *get_glntp(int resnr, gmx::ArrayRef<const RtpRename> rr)
166 enum {
167 egln, eglnH, eglnNR
169 const char *lh[eglnNR] = { "GLN", "QLN" };
170 const char *expl[eglnNR] = {
171 "Not protonated (charge 0)",
172 "Protonated (charge +1)"
175 return select_res(eglnNR, resnr, lh, expl, "GLUTAMINE", rr);
178 static const char *get_lystp(int resnr, gmx::ArrayRef<const RtpRename> rr)
180 enum {
181 elys, elysH, elysNR
183 const char *lh[elysNR] = { "LYSN", "LYS" };
184 const char *expl[elysNR] = {
185 "Not protonated (charge 0)",
186 "Protonated (charge +1)"
189 return select_res(elysNR, resnr, lh, expl, "LYSINE", rr);
192 static const char *get_argtp(int resnr, gmx::ArrayRef<const RtpRename> rr)
194 enum {
195 earg, eargH, eargNR
197 const char *lh[eargNR] = { "ARGN", "ARG" };
198 const char *expl[eargNR] = {
199 "Not protonated (charge 0)",
200 "Protonated (charge +1)"
203 return select_res(eargNR, resnr, lh, expl, "ARGININE", rr);
206 static const char *get_histp(int resnr, gmx::ArrayRef<const RtpRename> rr)
208 const char *expl[ehisNR] = {
209 "H on ND1 only",
210 "H on NE2 only",
211 "H on ND1 and NE2",
212 "Coupled to Heme"
215 return select_res(ehisNR, resnr, hh, expl, "HISTIDINE", rr);
218 static void read_rtprename(const char *fname, FILE *fp,
219 std::vector<RtpRename> *rtprename)
221 char line[STRLEN], buf[STRLEN];
223 int ncol = 0;
224 while (get_a_line(fp, line, STRLEN))
226 /* line is NULL-terminated and length<STRLEN, so final arg cannot overflow.
227 * For other args, we read up to 6 chars (so we can detect if the length is > 5).
228 * Note that the buffer length has been increased to 7 to allow this,
229 * so we just need to make sure the strings have zero-length initially.
231 char gmx[STRLEN];
232 char main[STRLEN];
233 char nter[STRLEN];
234 char cter[STRLEN];
235 char bter[STRLEN];
236 gmx[0] = '\0'; main[0] = '\0'; nter[0] = '\0'; cter[0] = '\0'; bter[0] = '\0';
237 int nc = sscanf(line, "%6s %6s %6s %6s %6s %s",
238 gmx, main, nter,
239 cter, bter, buf);
240 RtpRename newEntry(gmx, main, nter, cter, bter);
241 if (ncol == 0)
243 if (nc != 2 && nc != 5)
245 gmx_fatal(FARGS, "Residue renaming database '%s' has %d columns instead of %d or %d", fname, ncol, 2, 5);
247 ncol = nc;
249 else if (nc != ncol)
251 gmx_fatal(FARGS, "A line in residue renaming database '%s' has %d columns, while previous lines have %d columns", fname, nc, ncol);
254 if (nc == 2)
256 /* This file does not have special termini names, copy them from main */
257 newEntry.nter = newEntry.main;
258 newEntry.cter = newEntry.main;
259 newEntry.bter = newEntry.main;
261 rtprename->push_back(newEntry);
265 static std::string search_resrename(gmx::ArrayRef<const RtpRename> rr,
266 const char *name,
267 bool bStart, bool bEnd,
268 bool bCompareFFRTPname)
270 auto found = std::find_if(rr.begin(), rr.end(), [&name, &bCompareFFRTPname](const auto &rename)
271 { return ((!bCompareFFRTPname && (name == rename.gmx)) ||
272 (bCompareFFRTPname && (name == rename.main))); });
274 std::string newName;
275 /* If found in the database, rename this residue's rtp building block,
276 * otherwise keep the old name.
278 if (found != rr.end())
280 if (bStart && bEnd)
282 newName = found->bter;
284 else if (bStart)
286 newName = found->nter;
288 else if (bEnd)
290 newName = found->cter;
292 else
294 newName = found->main;
297 if (newName[0] == '-')
299 gmx_fatal(FARGS, "In the chosen force field there is no residue type for '%s'%s", name, bStart ? ( bEnd ? " as a standalone (starting & ending) residue" : " as a starting terminus") : (bEnd ? " as an ending terminus" : ""));
303 return newName;
306 static void rename_resrtp(t_atoms *pdba,
307 int nterpairs,
308 gmx::ArrayRef<const int> r_start,
309 gmx::ArrayRef<const int> r_end,
310 gmx::ArrayRef<const RtpRename> rr,
311 t_symtab *symtab,
312 bool bVerbose)
314 bool bFFRTPTERRNM = (getenv("GMX_NO_FFRTP_TER_RENAME") == nullptr);
316 for (int r = 0; r < pdba->nres; r++)
318 bool bStart = false;
319 bool bEnd = false;
320 for (int j = 0; j < nterpairs; j++)
322 if (r == r_start[j])
324 bStart = true;
327 for (int j = 0; j < nterpairs; j++)
329 if (r == r_end[j])
331 bEnd = true;
335 std::string newName = search_resrename(rr, *pdba->resinfo[r].rtp, bStart, bEnd, false);
337 if (bFFRTPTERRNM && newName.empty() && (bStart || bEnd))
339 /* This is a terminal residue, but the residue name,
340 * currently stored in .rtp, is not a standard residue name,
341 * but probably a force field specific rtp name.
342 * Check if we need to rename it because it is terminal.
344 newName = search_resrename(rr,
345 *pdba->resinfo[r].rtp, bStart, bEnd, true);
348 if (!newName.empty() && newName != *pdba->resinfo[r].rtp)
350 if (bVerbose)
352 printf("Changing rtp entry of residue %d %s to '%s'\n",
353 pdba->resinfo[r].nr, *pdba->resinfo[r].name, newName.c_str());
355 pdba->resinfo[r].rtp = put_symtab(symtab, newName.c_str());
360 static void pdbres_to_gmxrtp(t_atoms *pdba)
362 int i;
364 for (i = 0; (i < pdba->nres); i++)
366 if (pdba->resinfo[i].rtp == nullptr)
368 pdba->resinfo[i].rtp = pdba->resinfo[i].name;
373 static void rename_pdbres(t_atoms *pdba, const char *oldnm, const char *newnm,
374 bool bFullCompare, t_symtab *symtab)
376 char *resnm;
377 int i;
379 for (i = 0; (i < pdba->nres); i++)
381 resnm = *pdba->resinfo[i].name;
382 if ((bFullCompare && (gmx::equalCaseInsensitive(resnm, oldnm))) ||
383 (!bFullCompare && strstr(resnm, oldnm) != nullptr))
385 /* Rename the residue name (not the rtp name) */
386 pdba->resinfo[i].name = put_symtab(symtab, newnm);
391 static void rename_bb(t_atoms *pdba, const char *oldnm, const char *newnm,
392 bool bFullCompare, t_symtab *symtab)
394 char *bbnm;
395 int i;
397 for (i = 0; (i < pdba->nres); i++)
399 /* We have not set the rtp name yes, use the residue name */
400 bbnm = *pdba->resinfo[i].name;
401 if ((bFullCompare && (gmx::equalCaseInsensitive(bbnm, oldnm))) ||
402 (!bFullCompare && strstr(bbnm, oldnm) != nullptr))
404 /* Change the rtp builing block name */
405 pdba->resinfo[i].rtp = put_symtab(symtab, newnm);
410 static void rename_bbint(t_atoms *pdba, const char *oldnm,
411 const char *gettp(int, gmx::ArrayRef<const RtpRename>),
412 bool bFullCompare,
413 t_symtab *symtab,
414 gmx::ArrayRef<const RtpRename> rr)
416 int i;
417 const char *ptr;
418 char *bbnm;
420 for (i = 0; i < pdba->nres; i++)
422 /* We have not set the rtp name yet, use the residue name */
423 bbnm = *pdba->resinfo[i].name;
424 if ((bFullCompare && (strcmp(bbnm, oldnm) == 0)) ||
425 (!bFullCompare && strstr(bbnm, oldnm) != nullptr))
427 ptr = gettp(i, rr);
428 pdba->resinfo[i].rtp = put_symtab(symtab, ptr);
433 static void check_occupancy(t_atoms *atoms, const char *filename, bool bVerbose)
435 int i, ftp;
436 int nzero = 0;
437 int nnotone = 0;
439 ftp = fn2ftp(filename);
440 if (!atoms->pdbinfo || ((ftp != efPDB) && (ftp != efBRK) && (ftp != efENT)))
442 fprintf(stderr, "No occupancies in %s\n", filename);
444 else
446 for (i = 0; (i < atoms->nr); i++)
448 if (atoms->pdbinfo[i].occup != 1)
450 if (bVerbose)
452 fprintf(stderr, "Occupancy for atom %s%d-%s is %f rather than 1\n",
453 *atoms->resinfo[atoms->atom[i].resind].name,
454 atoms->resinfo[ atoms->atom[i].resind].nr,
455 *atoms->atomname[i],
456 atoms->pdbinfo[i].occup);
458 if (atoms->pdbinfo[i].occup == 0)
460 nzero++;
462 else
464 nnotone++;
468 if (nzero == atoms->nr)
470 fprintf(stderr, "All occupancy fields zero. This is probably not an X-Ray structure\n");
472 else if ((nzero > 0) || (nnotone > 0))
474 fprintf(stderr,
475 "\n"
476 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
477 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
478 "\n",
479 nzero, nnotone, atoms->nr);
481 else
483 fprintf(stderr, "All occupancies are one\n");
488 static void write_posres(const char *fn, t_atoms *pdba, real fc)
490 FILE *fp;
491 int i;
493 fp = gmx_fio_fopen(fn, "w");
494 fprintf(fp,
495 "; In this topology include file, you will find position restraint\n"
496 "; entries for all the heavy atoms in your original pdb file.\n"
497 "; This means that all the protons which were added by pdb2gmx are\n"
498 "; not restrained.\n"
499 "\n"
500 "[ position_restraints ]\n"
501 "; %4s%6s%8s%8s%8s\n", "atom", "type", "fx", "fy", "fz"
503 for (i = 0; (i < pdba->nr); i++)
505 if (!is_hydrogen(*pdba->atomname[i]) && !is_dummymass(*pdba->atomname[i]))
507 fprintf(fp, "%6d%6d %g %g %g\n", i+1, 1, fc, fc, fc);
510 gmx_fio_fclose(fp);
513 static int read_pdball(const char *inf, bool bOutput, const char *outf, char **title,
514 t_atoms *atoms, rvec **x,
515 int *ePBC, matrix box, bool bRemoveH,
516 t_symtab *symtab, ResidueType *rt, const char *watres,
517 AtomProperties *aps, bool bVerbose)
518 /* Read a pdb file. (containing proteins) */
520 int natom, new_natom, i;
522 /* READ IT */
523 printf("Reading %s...\n", inf);
524 readConfAndAtoms(inf, symtab, title, atoms, ePBC, x, nullptr, box);
525 natom = atoms->nr;
526 if (atoms->pdbinfo == nullptr)
528 snew(atoms->pdbinfo, atoms->nr);
530 if (fn2ftp(inf) == efPDB)
532 get_pdb_atomnumber(atoms, aps);
534 if (bRemoveH)
536 new_natom = 0;
537 for (i = 0; i < atoms->nr; i++)
539 if (!is_hydrogen(*atoms->atomname[i]))
541 atoms->atom[new_natom] = atoms->atom[i];
542 atoms->atomname[new_natom] = atoms->atomname[i];
543 atoms->pdbinfo[new_natom] = atoms->pdbinfo[i];
544 copy_rvec((*x)[i], (*x)[new_natom]);
545 new_natom++;
548 atoms->nr = new_natom;
549 natom = new_natom;
552 printf("Read");
553 if (title[0])
555 printf(" '%s',", *title);
557 printf(" %d atoms\n", natom);
559 /* Rename residues */
560 rename_pdbres(atoms, "HOH", watres, false, symtab);
561 rename_pdbres(atoms, "SOL", watres, false, symtab);
562 rename_pdbres(atoms, "WAT", watres, false, symtab);
564 rename_atoms("xlateat.dat", nullptr,
565 atoms, symtab, {}, true,
566 rt, true, bVerbose);
568 if (natom == 0)
570 return 0;
572 if (bOutput)
574 write_sto_conf(outf, *title, atoms, *x, nullptr, *ePBC, box);
577 return natom;
580 static void process_chain(t_atoms *pdba, gmx::ArrayRef<gmx::RVec> x,
581 bool bTrpU, bool bPheU, bool bTyrU,
582 bool bLysMan, bool bAspMan, bool bGluMan,
583 bool bHisMan, bool bArgMan, bool bGlnMan,
584 real angle, real distance, t_symtab *symtab,
585 gmx::ArrayRef<const RtpRename> rr)
587 /* Rename aromatics, lys, asp and histidine */
588 if (bTyrU)
590 rename_bb(pdba, "TYR", "TYRU", false, symtab);
592 if (bTrpU)
594 rename_bb(pdba, "TRP", "TRPU", false, symtab);
596 if (bPheU)
598 rename_bb(pdba, "PHE", "PHEU", false, symtab);
600 if (bLysMan)
602 rename_bbint(pdba, "LYS", get_lystp, false, symtab, rr);
604 if (bArgMan)
606 rename_bbint(pdba, "ARG", get_argtp, false, symtab, rr);
608 if (bGlnMan)
610 rename_bbint(pdba, "GLN", get_glntp, false, symtab, rr);
612 if (bAspMan)
614 rename_bbint(pdba, "ASP", get_asptp, false, symtab, rr);
616 else
618 rename_bb(pdba, "ASPH", "ASP", false, symtab);
620 if (bGluMan)
622 rename_bbint(pdba, "GLU", get_glutp, false, symtab, rr);
624 else
626 rename_bb(pdba, "GLUH", "GLU", false, symtab);
629 if (!bHisMan)
631 set_histp(pdba, gmx::as_rvec_array(x.data()), symtab, angle, distance);
633 else
635 rename_bbint(pdba, "HIS", get_histp, true, symtab, rr);
638 /* Initialize the rtp builing block names with the residue names
639 * for the residues that have not been processed above.
641 pdbres_to_gmxrtp(pdba);
643 /* Now we have all rtp names set.
644 * The rtp names will conform to Gromacs naming,
645 * unless the input pdb file contained one or more force field specific
646 * rtp names as residue names.
650 /* struct for sorting the atoms from the pdb file */
651 typedef struct {
652 int resnr; /* residue number */
653 int j; /* database order index */
654 int index; /* original atom number */
655 char anm1; /* second letter of atom name */
656 char altloc; /* alternate location indicator */
657 } t_pdbindex;
659 static bool pdbicomp(const t_pdbindex &a, const t_pdbindex &b)
661 int d = (a.resnr - b.resnr);
662 if (d == 0)
664 d = (a.j - b.j);
665 if (d == 0)
667 d = (a.anm1 - b.anm1);
668 if (d == 0)
670 d = (a.altloc - b.altloc);
674 return d < 0;
677 static void sort_pdbatoms(gmx::ArrayRef<const PreprocessResidue> restp_chain,
678 int natoms,
679 t_atoms **pdbaptr,
680 t_atoms **newPdbAtoms,
681 std::vector<gmx::RVec> *x,
682 t_blocka *block, char ***gnames)
684 t_atoms *pdba = *pdbaptr;
685 std::vector<gmx::RVec> xnew;
686 t_pdbindex *pdbi;
687 char *atomnm;
689 natoms = pdba->nr;
690 snew(pdbi, natoms);
692 for (int i = 0; i < natoms; i++)
694 atomnm = *pdba->atomname[i];
695 const PreprocessResidue *localPpResidue = &restp_chain[pdba->atom[i].resind];
696 auto found = std::find_if(localPpResidue->atomname.begin(), localPpResidue->atomname.end(),
697 [&atomnm](char** it){return gmx::equalCaseInsensitive(atomnm, *it); });
698 if (found == localPpResidue->atomname.end())
700 char buf[STRLEN];
702 sprintf(buf,
703 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
704 "while sorting atoms.\n%s", atomnm,
705 *pdba->resinfo[pdba->atom[i].resind].name,
706 pdba->resinfo[pdba->atom[i].resind].nr,
707 localPpResidue->resname.c_str(),
708 localPpResidue->natom(),
709 is_hydrogen(atomnm) ?
710 "\nFor a hydrogen, this can be a different protonation state, or it\n"
711 "might have had a different number in the PDB file and was rebuilt\n"
712 "(it might for instance have been H3, and we only expected H1 & H2).\n"
713 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
714 "Remove this hydrogen or choose a different protonation state to solve it.\n"
715 "Option -ignh will ignore all hydrogens in the input." : ".");
716 gmx_fatal(FARGS, "%s", buf);
718 /* make shadow array to be sorted into indexgroup */
719 pdbi[i].resnr = pdba->atom[i].resind;
720 pdbi[i].j = std::distance(localPpResidue->atomname.begin(), found);
721 pdbi[i].index = i;
722 pdbi[i].anm1 = atomnm[1];
723 pdbi[i].altloc = pdba->pdbinfo[i].altloc;
725 std::sort(pdbi, pdbi+natoms, pdbicomp);
727 /* pdba is sorted in pdbnew using the pdbi index */
728 std::vector<int> a(natoms);
729 srenew(*newPdbAtoms, 1);
730 init_t_atoms((*newPdbAtoms), natoms, true);
731 (*newPdbAtoms)->nr = pdba->nr;
732 (*newPdbAtoms)->nres = pdba->nres;
733 srenew((*newPdbAtoms)->resinfo, pdba->nres);
734 std::copy(pdba->resinfo, pdba->resinfo + pdba->nres, (*newPdbAtoms)->resinfo);
735 for (int i = 0; i < natoms; i++)
737 (*newPdbAtoms)->atom[i] = pdba->atom[pdbi[i].index];
738 (*newPdbAtoms)->atomname[i] = pdba->atomname[pdbi[i].index];
739 (*newPdbAtoms)->pdbinfo[i] = pdba->pdbinfo[pdbi[i].index];
740 xnew.push_back(x->at(pdbi[i].index));
741 /* make indexgroup in block */
742 a[i] = pdbi[i].index;
744 /* clean up */
745 done_atom(pdba);
746 sfree(pdba);
747 /* copy the sorted pdbnew back to pdba */
748 *pdbaptr = *newPdbAtoms;
749 *x = xnew;
750 add_grp(block, gnames, a, "prot_sort");
751 sfree(pdbi);
754 static int remove_duplicate_atoms(t_atoms *pdba, gmx::ArrayRef<gmx::RVec> x, bool bVerbose)
756 int i, j, oldnatoms, ndel;
757 t_resinfo *ri;
759 printf("Checking for duplicate atoms....\n");
760 oldnatoms = pdba->nr;
761 ndel = 0;
762 /* NOTE: pdba->nr is modified inside the loop */
763 for (i = 1; (i < pdba->nr); i++)
765 /* compare 'i' and 'i-1', throw away 'i' if they are identical
766 this is a 'while' because multiple alternate locations can be present */
767 while ( (i < pdba->nr) &&
768 (pdba->atom[i-1].resind == pdba->atom[i].resind) &&
769 (strcmp(*pdba->atomname[i-1], *pdba->atomname[i]) == 0) )
771 ndel++;
772 if (bVerbose)
774 ri = &pdba->resinfo[pdba->atom[i].resind];
775 printf("deleting duplicate atom %4s %s%4d%c",
776 *pdba->atomname[i], *ri->name, ri->nr, ri->ic);
777 if (ri->chainid && (ri->chainid != ' '))
779 printf(" ch %c", ri->chainid);
781 if (pdba->pdbinfo)
783 if (pdba->pdbinfo[i].atomnr)
785 printf(" pdb nr %4d", pdba->pdbinfo[i].atomnr);
787 if (pdba->pdbinfo[i].altloc && (pdba->pdbinfo[i].altloc != ' '))
789 printf(" altloc %c", pdba->pdbinfo[i].altloc);
792 printf("\n");
794 pdba->nr--;
795 /* We can not free, since it might be in the symtab */
796 /* sfree(pdba->atomname[i]); */
797 for (j = i; j < pdba->nr; j++)
799 pdba->atom[j] = pdba->atom[j+1];
800 pdba->atomname[j] = pdba->atomname[j+1];
801 if (pdba->pdbinfo)
803 pdba->pdbinfo[j] = pdba->pdbinfo[j+1];
805 copy_rvec(x[j+1], x[j]);
807 srenew(pdba->atom, pdba->nr);
808 /* srenew(pdba->atomname, pdba->nr); */
809 srenew(pdba->pdbinfo, pdba->nr);
812 if (pdba->nr != oldnatoms)
814 printf("Now there are %d atoms. Deleted %d duplicates.\n", pdba->nr, ndel);
817 return pdba->nr;
820 static void
821 checkResidueTypeSanity(t_atoms *pdba,
822 int r0,
823 int r1,
824 ResidueType *rt)
826 std::string startResidueString = gmx::formatString("%s%d", *pdba->resinfo[r0].name, pdba->resinfo[r0].nr);
827 std::string endResidueString = gmx::formatString("%s%d", *pdba->resinfo[r1-1].name, pdba->resinfo[r1-1].nr);
829 // Check whether all residues in chain have the same chain ID.
830 bool allResiduesHaveSameChainID = true;
831 char chainID0 = pdba->resinfo[r0].chainid;
832 char chainID;
833 std::string residueString;
835 for (int i = r0 + 1; i < r1; i++)
837 chainID = pdba->resinfo[i].chainid;
838 if (chainID != chainID0)
840 allResiduesHaveSameChainID = false;
841 residueString = gmx::formatString("%s%d", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
842 break;
846 if (!allResiduesHaveSameChainID)
848 gmx_fatal(FARGS,
849 "The chain covering the range %s--%s does not have a consistent chain ID. "
850 "The first residue has ID '%c', while residue %s has ID '%c'.",
851 startResidueString.c_str(), endResidueString.c_str(),
852 chainID0, residueString.c_str(), chainID);
855 // At this point all residues have the same ID. If they are also non-blank
856 // we can be a bit more aggressive and require the types match too.
857 if (chainID0 != ' ')
859 bool allResiduesHaveSameType = true;
860 std::string restype;
861 std::string restype0 = rt->typeOfNamedDatabaseResidue(*pdba->resinfo[r0].name);
863 for (int i = r0 + 1; i < r1; i++)
865 restype = rt->typeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
866 if (!gmx::equalCaseInsensitive(restype, restype0))
868 allResiduesHaveSameType = false;
869 residueString = gmx::formatString("%s%d", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
870 break;
874 if (!allResiduesHaveSameType)
876 gmx_fatal(FARGS,
877 "The residues in the chain %s--%s do not have a consistent type. "
878 "The first residue has type '%s', while residue %s is of type '%s'. "
879 "Either there is a mistake in your chain, or it includes nonstandard "
880 "residue names that have not yet been added to the residuetypes.dat "
881 "file in the GROMACS library directory. If there are other molecules "
882 "such as ligands, they should not have the same chain ID as the "
883 "adjacent protein chain since it's a separate molecule.",
884 startResidueString.c_str(), endResidueString.c_str(),
885 restype0.c_str(), residueString.c_str(), restype.c_str());
890 static void find_nc_ter(t_atoms *pdba, int r0, int r1, int *r_start, int *r_end,
891 ResidueType *rt)
893 int i;
894 gmx::compat::optional<std::string> startrestype;
896 *r_start = -1;
897 *r_end = -1;
899 int startWarnings = 0;
900 int endWarnings = 0;
901 int ionNotes = 0;
903 // Check that all residues have the same chain identifier, and if it is
904 // non-blank we also require the residue types to match.
905 checkResidueTypeSanity(pdba, r0, r1, rt);
907 // If we return correctly from checkResidueTypeSanity(), the only
908 // remaining cases where we can have non-matching residue types is if
909 // the chain ID was blank, which could be the case e.g. for a structure
910 // read from a GRO file or other file types without chain information.
911 // In that case we need to be a bit more liberal and detect chains based
912 // on the residue type.
914 // If we get here, the chain ID must be identical for all residues
915 char chainID = pdba->resinfo[r0].chainid;
917 /* Find the starting terminus (typially N or 5') */
918 for (i = r0; i < r1 && *r_start == -1; i++)
920 startrestype = rt->optionalTypeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
921 if (!startrestype)
923 continue;
925 if (gmx::equalCaseInsensitive(*startrestype, "Protein") ||
926 gmx::equalCaseInsensitive(*startrestype, "DNA") ||
927 gmx::equalCaseInsensitive(*startrestype, "RNA") )
929 printf("Identified residue %s%d as a starting terminus.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
930 *r_start = i;
932 else if (gmx::equalCaseInsensitive(*startrestype, "Ion"))
934 if (ionNotes < 5)
936 printf("Residue %s%d has type 'Ion', assuming it is not linked into a chain.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
938 if (ionNotes == 4)
940 printf("Disabling further notes about ions.\n");
942 ionNotes++;
944 else
946 // Either no known residue type, or one not needing special handling
947 if (startWarnings < 5)
949 if (chainID == ' ')
951 printf("\nWarning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n"
952 "This chain lacks identifiers, which makes it impossible to do strict\n"
953 "classification of the start/end residues. Here we need to guess this residue\n"
954 "should not be part of the chain and instead introduce a break, but that will\n"
955 "be catastrophic if they should in fact be linked. Please check your structure,\n"
956 "and add %s to residuetypes.dat if this was not correct.\n\n",
957 *pdba->resinfo[i].name, pdba->resinfo[i].nr, *pdba->resinfo[i].name);
959 else
961 printf("\nWarning: No residues in chain starting at %s%d identified as Protein/RNA/DNA.\n"
962 "This makes it impossible to link them into a molecule, which could either be\n"
963 "correct or a catastrophic error. Please check your structure, and add all\n"
964 "necessary residue names to residuetypes.dat if this was not correct.\n\n",
965 *pdba->resinfo[i].name, pdba->resinfo[i].nr);
968 if (startWarnings == 4)
970 printf("Disabling further warnings about unidentified residues at start of chain.\n");
972 startWarnings++;
976 if (*r_start >= 0)
978 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
979 for (int i = *r_start; i < r1; i++)
981 gmx::compat::optional<std::string> restype =
982 rt->optionalTypeOfNamedDatabaseResidue(*pdba->resinfo[i].name);
983 if (!restype)
985 continue;
987 if (gmx::equalCaseInsensitive(*restype, *startrestype) && endWarnings == 0)
989 *r_end = i;
991 else if (gmx::equalCaseInsensitive(*startrestype, "Ion"))
993 if (ionNotes < 5)
995 printf("Residue %s%d has type 'Ion', assuming it is not linked into a chain.\n", *pdba->resinfo[i].name, pdba->resinfo[i].nr);
997 if (ionNotes == 4)
999 printf("Disabling further notes about ions.\n");
1001 ionNotes++;
1003 else
1005 // Either no known residue type, or one not needing special handling.
1006 GMX_RELEASE_ASSERT(chainID == ' ', "Chain ID must be blank");
1007 // Otherwise the call to checkResidueTypeSanity() will
1008 // have caught the problem.
1009 if (endWarnings < 5)
1011 printf("\nWarning: Residue %s%d in chain has different type ('%s') from\n"
1012 "residue %s%d ('%s'). This chain lacks identifiers, which makes\n"
1013 "it impossible to do strict classification of the start/end residues. Here we\n"
1014 "need to guess this residue should not be part of the chain and instead\n"
1015 "introduce a break, but that will be catastrophic if they should in fact be\n"
1016 "linked. Please check your structure, and add %s to residuetypes.dat\n"
1017 "if this was not correct.\n\n",
1018 *pdba->resinfo[i].name, pdba->resinfo[i].nr, restype->c_str(),
1019 *pdba->resinfo[*r_start].name, pdba->resinfo[*r_start].nr, startrestype->c_str(), *pdba->resinfo[i].name);
1021 if (endWarnings == 4)
1023 printf("Disabling further warnings about unidentified residues at end of chain.\n");
1025 endWarnings++;
1030 if (*r_end >= 0)
1032 printf("Identified residue %s%d as a ending terminus.\n", *pdba->resinfo[*r_end].name, pdba->resinfo[*r_end].nr);
1036 /* enum for chain separation */
1037 enum ChainSepType {
1038 enChainSep_id_or_ter, enChainSep_id_and_ter, enChainSep_ter,
1039 enChainSep_id, enChainSep_interactive
1041 static const char *ChainSepEnum[] = {"id_or_ter", "id_and_ter", "ter", "id", "interactive"};
1042 static const char *ChainSepInfoString[] =
1044 "Splitting chemical chains based on TER records or chain id changing.\n",
1045 "Splitting chemical chains based on TER records and chain id changing.\n",
1046 "Splitting chemical chains based on TER records only (ignoring chain id).\n",
1047 "Splitting chemical chains based on changing chain id only (ignoring TER records).\n",
1048 "Splitting chemical chains interactively.\n"
1051 static void
1052 modify_chain_numbers(t_atoms * pdba,
1053 ChainSepType enumChainSep)
1055 int i;
1056 char old_prev_chainid;
1057 char old_this_chainid;
1058 int old_prev_chainnum;
1059 int old_this_chainnum;
1060 t_resinfo *ri;
1061 char select[STRLEN];
1062 int new_chainnum;
1063 int this_atomnum;
1064 int prev_atomnum;
1065 const char * prev_atomname;
1066 const char * this_atomname;
1067 const char * prev_resname;
1068 const char * this_resname;
1069 int prev_resnum;
1070 int this_resnum;
1071 char prev_chainid;
1072 char this_chainid;
1074 /* The default chain enumeration is based on TER records only */
1075 printf("%s", ChainSepInfoString[enumChainSep]);
1077 old_prev_chainid = '?';
1078 old_prev_chainnum = -1;
1079 new_chainnum = -1;
1081 this_atomname = nullptr;
1082 this_atomnum = -1;
1083 this_resname = nullptr;
1084 this_resnum = -1;
1085 this_chainid = '?';
1087 for (i = 0; i < pdba->nres; i++)
1089 ri = &pdba->resinfo[i];
1090 old_this_chainid = ri->chainid;
1091 old_this_chainnum = ri->chainnum;
1093 prev_atomname = this_atomname;
1094 prev_atomnum = this_atomnum;
1095 prev_resname = this_resname;
1096 prev_resnum = this_resnum;
1097 prev_chainid = this_chainid;
1099 this_atomname = *(pdba->atomname[i]);
1100 this_atomnum = (pdba->pdbinfo != nullptr) ? pdba->pdbinfo[i].atomnr : i+1;
1101 this_resname = *ri->name;
1102 this_resnum = ri->nr;
1103 this_chainid = ri->chainid;
1105 switch (enumChainSep)
1107 case enChainSep_id_or_ter:
1108 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1110 new_chainnum++;
1112 break;
1114 case enChainSep_id_and_ter:
1115 if (old_this_chainid != old_prev_chainid && old_this_chainnum != old_prev_chainnum)
1117 new_chainnum++;
1119 break;
1121 case enChainSep_id:
1122 if (old_this_chainid != old_prev_chainid)
1124 new_chainnum++;
1126 break;
1128 case enChainSep_ter:
1129 if (old_this_chainnum != old_prev_chainnum)
1131 new_chainnum++;
1133 break;
1134 case enChainSep_interactive:
1135 if (old_this_chainid != old_prev_chainid || old_this_chainnum != old_prev_chainnum)
1137 if (i > 0)
1139 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\
1141 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
1142 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1143 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1145 if (nullptr == fgets(select, STRLEN-1, stdin))
1147 gmx_fatal(FARGS, "Error reading from stdin");
1150 if (i == 0 || select[0] == 'y')
1152 new_chainnum++;
1155 break;
1156 default:
1157 gmx_fatal(FARGS, "Internal inconsistency - this shouldn't happen...");
1159 old_prev_chainid = old_this_chainid;
1160 old_prev_chainnum = old_this_chainnum;
1162 ri->chainnum = new_chainnum;
1166 struct t_pdbchain {
1167 char chainid = ' ';
1168 char chainnum = ' ';
1169 int start = -1;
1170 int natom = -1;
1171 bool bAllWat = false;
1172 int nterpairs = -1;
1173 std::vector<int> chainstart;
1176 struct t_chain {
1177 char chainid = ' ';
1178 int chainnum = ' ';
1179 bool bAllWat = false;
1180 int nterpairs = -1;
1181 std::vector<int> chainstart;
1182 std::vector<MoleculePatchDatabase *> ntdb;
1183 std::vector<MoleculePatchDatabase *> ctdb;
1184 std::vector<int> r_start;
1185 std::vector<int> r_end;
1186 t_atoms *pdba;
1187 std::vector<gmx::RVec> x;
1190 // TODO make all enums into scoped enums
1191 /* enum for vsites */
1192 enum VSitesType {
1193 enVSites_none, enVSites_hydrogens, enVSites_aromatics
1195 static const char *VSitesEnum[] = {"none", "hydrogens", "aromatics"};
1197 /* enum for water model */
1198 enum WaterType {
1199 enWater_select, enWater_none, enWater_spc, enWater_spce,
1200 enWater_tip3p, enWater_tip4p, enWater_tip5p, enWater_tips3p
1202 static const char *WaterEnum[] = {
1203 "select", "none", "spc", "spce",
1204 "tip3p", "tip4p", "tip5p", "tips3p"
1207 /* enum for merge */
1208 enum MergeType {
1209 enMerge_no, enMerge_all, enMerge_interactive
1211 static const char *MergeEnum[] = {"no", "all", "interactive"};
1213 namespace gmx
1216 namespace
1219 class pdb2gmx : public ICommandLineOptionsModule
1221 public:
1222 pdb2gmx() :
1223 bVsites_(FALSE), bPrevWat_(FALSE), bVsiteAromatics_(FALSE),
1224 enumChainSep_(enChainSep_id_or_ter),
1225 enumVSites_(enVSites_none),
1226 enumWater_(enWater_select),
1227 enumMerge_(enMerge_no),
1228 itp_file_(nullptr),
1229 mHmult_(0)
1233 // From ICommandLineOptionsModule
1234 void init(CommandLineModuleSettings * /*settings*/) override
1238 void initOptions(IOptionsContainer *options,
1239 ICommandLineOptionsModuleSettings *settings) override;
1241 void optionsFinished() override;
1243 int run() override;
1245 private:
1246 bool bNewRTP_;
1247 bool bInter_;
1248 bool bCysMan_;
1249 bool bLysMan_;
1250 bool bAspMan_;
1251 bool bGluMan_;
1252 bool bHisMan_;
1253 bool bGlnMan_;
1254 bool bArgMan_;
1255 bool bTerMan_;
1256 bool bUnA_;
1257 bool bHeavyH_;
1258 bool bSort_;
1259 bool bAllowMissing_;
1260 bool bRemoveH_;
1261 bool bDeuterate_;
1262 bool bVerbose_;
1263 bool bChargeGroups_;
1264 bool bCmap_;
1265 bool bRenumRes_;
1266 bool bRTPresname_;
1267 bool bIndexSet_;
1268 bool bOutputSet_;
1269 bool bVsites_;
1270 bool bWat_;
1271 bool bPrevWat_;
1272 bool bITP_;
1273 bool bVsiteAromatics_;
1274 real angle_;
1275 real distance_;
1276 real posre_fc_;
1277 real long_bond_dist_;
1278 real short_bond_dist_;
1280 std::string indexOutputFile_;
1281 std::string outputFile_;
1282 std::string topologyFile_;
1283 std::string includeTopologyFile_;
1284 std::string outputConfFile_;
1285 std::string inputConfFile_;
1286 std::string outFile_;
1287 std::string ff_;
1289 ChainSepType enumChainSep_;
1290 VSitesType enumVSites_;
1291 WaterType enumWater_;
1292 MergeType enumMerge_;
1294 FILE *itp_file_;
1295 char forcefield_[STRLEN];
1296 char ffdir_[STRLEN];
1297 char *ffname_;
1298 char *watermodel_;
1299 std::vector<std::string> incls_;
1300 std::vector<t_mols> mols_;
1301 real mHmult_;
1304 void pdb2gmx::initOptions(IOptionsContainer *options,
1305 ICommandLineOptionsModuleSettings *settings)
1307 const char *desc[] = {
1308 "[THISMODULE] reads a [REF].pdb[ref] (or [REF].gro[ref]) file, reads",
1309 "some database files, adds hydrogens to the molecules and generates",
1310 "coordinates in GROMACS (GROMOS), or optionally [REF].pdb[ref], format",
1311 "and a topology in GROMACS format.",
1312 "These files can subsequently be processed to generate a run input file.",
1313 "[PAR]",
1314 "[THISMODULE] will search for force fields by looking for",
1315 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1316 "of the current working directory and of the GROMACS library directory",
1317 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1318 "variable.",
1319 "By default the forcefield selection is interactive,",
1320 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1321 "in the list on the command line instead. In that case [THISMODULE] just looks",
1322 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1323 "[PAR]",
1324 "After choosing a force field, all files will be read only from",
1325 "the corresponding force field directory.",
1326 "If you want to modify or add a residue types, you can copy the force",
1327 "field directory from the GROMACS library directory to your current",
1328 "working directory. If you want to add new protein residue types,",
1329 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1330 "or copy the whole library directory to a local directory and set",
1331 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1332 "Check Chapter 5 of the manual for more information about file formats.",
1333 "[PAR]",
1335 "Note that a [REF].pdb[ref] file is nothing more than a file format, and it",
1336 "need not necessarily contain a protein structure. Every kind of",
1337 "molecule for which there is support in the database can be converted.",
1338 "If there is no support in the database, you can add it yourself.[PAR]",
1340 "The program has limited intelligence, it reads a number of database",
1341 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1342 "if necessary this can be done manually. The program can prompt the",
1343 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1344 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1345 "protonated (three protons, default), for Asp and Glu unprotonated",
1346 "(default) or protonated, for His the proton can be either on ND1,",
1347 "on NE2 or on both. By default these selections are done automatically.",
1348 "For His, this is based on an optimal hydrogen bonding",
1349 "conformation. Hydrogen bonds are defined based on a simple geometric",
1350 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1351 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1352 "[TT]-dist[tt] respectively.[PAR]",
1354 "The protonation state of N- and C-termini can be chosen interactively",
1355 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1356 "respectively. Some force fields support zwitterionic forms for chains of",
1357 "one residue, but for polypeptides these options should NOT be selected.",
1358 "The AMBER force fields have unique forms for the terminal residues,",
1359 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1360 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1361 "respectively to use these forms, making sure you preserve the format",
1362 "of the coordinate file. Alternatively, use named terminating residues",
1363 "(e.g. ACE, NME).[PAR]",
1365 "The separation of chains is not entirely trivial since the markup",
1366 "in user-generated PDB files frequently varies and sometimes it",
1367 "is desirable to merge entries across a TER record, for instance",
1368 "if you want a disulfide bridge or distance restraints between",
1369 "two protein chains or if you have a HEME group bound to a protein.",
1370 "In such cases multiple chains should be contained in a single",
1371 "[TT]moleculetype[tt] definition.",
1372 "To handle this, [THISMODULE] uses two separate options.",
1373 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1374 "start, and termini added when applicable. This can be done based on the",
1375 "existence of TER records, when the chain id changes, or combinations of either",
1376 "or both of these. You can also do the selection fully interactively.",
1377 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1378 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1379 "This can be turned off (no merging), all non-water chains can be merged into a",
1380 "single molecule, or the selection can be done interactively.[PAR]",
1382 "[THISMODULE] will also check the occupancy field of the [REF].pdb[ref] file.",
1383 "If any of the occupancies are not one, indicating that the atom is",
1384 "not resolved well in the structure, a warning message is issued.",
1385 "When a [REF].pdb[ref] file does not originate from an X-ray structure determination",
1386 "all occupancy fields may be zero. Either way, it is up to the user",
1387 "to verify the correctness of the input data (read the article!).[PAR]",
1389 "During processing the atoms will be reordered according to GROMACS",
1390 "conventions. With [TT]-n[tt] an index file can be generated that",
1391 "contains one group reordered in the same way. This allows you to",
1392 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1393 "one limitation: reordering is done after the hydrogens are stripped",
1394 "from the input and before new hydrogens are added. This means that",
1395 "you should not use [TT]-ignh[tt].[PAR]",
1397 "The [REF].gro[ref] and [TT].g96[tt] file formats do not support chain",
1398 "identifiers. Therefore it is useful to enter a [REF].pdb[ref] file name at",
1399 "the [TT]-o[tt] option when you want to convert a multi-chain [REF].pdb[ref] file.",
1400 "[PAR]",
1402 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1403 "motions. Angular and out-of-plane motions can be removed by changing",
1404 "hydrogens into virtual sites and fixing angles, which fixes their",
1405 "position relative to neighboring atoms. Additionally, all atoms in the",
1406 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1407 "can be converted into virtual sites, eliminating the fast improper dihedral",
1408 "fluctuations in these rings (but this feature is deprecated).",
1409 "[BB]Note[bb] that in this case all other hydrogen",
1410 "atoms are also converted to virtual sites. The mass of all atoms that are",
1411 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1412 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1413 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1414 "done for water hydrogens to slow down the rotational motion of water.",
1415 "The increase in mass of the hydrogens is subtracted from the bonded",
1416 "(heavy) atom so that the total mass of the system remains the same."
1419 settings->setHelpText(desc);
1421 options->addOption(BooleanOption("newrtp")
1422 .store(&bNewRTP_).defaultValue(false).hidden()
1423 .description("Write the residue database in new format to [TT]new.rtp[tt]"));
1424 options->addOption(RealOption("lb")
1425 .store(&long_bond_dist_).defaultValue(0.25).hidden()
1426 .description("Long bond warning distance"));
1427 options->addOption(RealOption("sb")
1428 .store(&short_bond_dist_).defaultValue(0.05).hidden()
1429 .description("Short bond warning distance"));
1430 options->addOption(EnumOption<ChainSepType>("chainsep").enumValue(ChainSepEnum)
1431 .store(&enumChainSep_)
1432 .description("Condition in PDB files when a new chain should be started (adding termini)"));
1433 options->addOption(EnumOption<MergeType>("merge").enumValue(MergeEnum)
1434 .store(&enumMerge_)
1435 .description("Merge multiple chains into a single [moleculetype]"));
1436 options->addOption(StringOption("ff")
1437 .store(&ff_).defaultValue("select")
1438 .description("Force field, interactive by default. Use [TT]-h[tt] for information."));
1439 options->addOption(EnumOption<WaterType>("water")
1440 .store(&enumWater_).enumValue(WaterEnum)
1441 .description("Water model to use"));
1442 options->addOption(BooleanOption("inter")
1443 .store(&bInter_).defaultValue(false)
1444 .description("Set the next 8 options to interactive"));
1445 options->addOption(BooleanOption("ss")
1446 .store(&bCysMan_).defaultValue(false)
1447 .description("Interactive SS bridge selection"));
1448 options->addOption(BooleanOption("ter")
1449 .store(&bTerMan_).defaultValue(false)
1450 .description("Interactive termini selection, instead of charged (default)"));
1451 options->addOption(BooleanOption("lys")
1452 .store(&bLysMan_).defaultValue(false)
1453 .description("Interactive lysine selection, instead of charged"));
1454 options->addOption(BooleanOption("arg")
1455 .store(&bArgMan_).defaultValue(false)
1456 .description("Interactive arginine selection, instead of charged"));
1457 options->addOption(BooleanOption("asp")
1458 .store(&bAspMan_).defaultValue(false)
1459 .description("Interactive aspartic acid selection, instead of charged"));
1460 options->addOption(BooleanOption("glu")
1461 .store(&bGluMan_).defaultValue(false)
1462 .description("Interactive glutamic acid selection, instead of charged"));
1463 options->addOption(BooleanOption("gln")
1464 .store(&bGlnMan_).defaultValue(false)
1465 .description("Interactive glutamine selection, instead of charged"));
1466 options->addOption(BooleanOption("his")
1467 .store(&bHisMan_).defaultValue(false)
1468 .description("Interactive histidine selection, instead of checking H-bonds"));
1469 options->addOption(RealOption("angle")
1470 .store(&angle_).defaultValue(135.0)
1471 .description("Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)"));
1472 options->addOption(RealOption("dist")
1473 .store(&distance_).defaultValue(0.3)
1474 .description("Maximum donor-acceptor distance for a H-bond (nm)"));
1475 options->addOption(BooleanOption("una")
1476 .store(&bUnA_).defaultValue(false)
1477 .description("Select aromatic rings with united CH atoms on phenylalanine, tryptophane and tyrosine"));
1478 options->addOption(BooleanOption("sort")
1479 .store(&bSort_).defaultValue(true).hidden()
1480 .description("Sort the residues according to database, turning this off is dangerous as charge groups might be broken in parts"));
1481 options->addOption(BooleanOption("ignh")
1482 .store(&bRemoveH_).defaultValue(false)
1483 .description("Ignore hydrogen atoms that are in the coordinate file"));
1484 options->addOption(BooleanOption("missing")
1485 .store(&bAllowMissing_).defaultValue(false)
1486 .description("Continue when atoms are missing and bonds cannot be made, dangerous"));
1487 options->addOption(BooleanOption("v")
1488 .store(&bVerbose_).defaultValue(false)
1489 .description("Be slightly more verbose in messages"));
1490 options->addOption(RealOption("posrefc")
1491 .store(&posre_fc_).defaultValue(1000)
1492 .description("Force constant for position restraints"));
1493 options->addOption(EnumOption<VSitesType>("vsite")
1494 .store(&enumVSites_).enumValue(VSitesEnum)
1495 .description("Convert atoms to virtual sites"));
1496 options->addOption(BooleanOption("heavyh")
1497 .store(&bHeavyH_).defaultValue(false)
1498 .description("Make hydrogen atoms heavy"));
1499 options->addOption(BooleanOption("deuterate")
1500 .store(&bDeuterate_).defaultValue(false)
1501 .description("Change the mass of hydrogens to 2 amu"));
1502 options->addOption(BooleanOption("chargegrp")
1503 .store(&bChargeGroups_).defaultValue(true)
1504 .description("Use charge groups in the [REF].rtp[ref] file"));
1505 options->addOption(BooleanOption("cmap")
1506 .store(&bCmap_).defaultValue(true)
1507 .description("Use cmap torsions (if enabled in the [REF].rtp[ref] file)"));
1508 options->addOption(BooleanOption("renum")
1509 .store(&bRenumRes_).defaultValue(false)
1510 .description("Renumber the residues consecutively in the output"));
1511 options->addOption(BooleanOption("rtpres")
1512 .store(&bRTPresname_).defaultValue(false)
1513 .description("Use [REF].rtp[ref] entry names as residue names"));
1514 options->addOption(FileNameOption("f")
1515 .legacyType(efSTX).inputFile()
1516 .store(&inputConfFile_).required()
1517 .defaultBasename("protein").defaultType(efPDB)
1518 .description("Structure file"));
1519 options->addOption(FileNameOption("o")
1520 .legacyType(efSTO).outputFile()
1521 .store(&outputConfFile_).required()
1522 .defaultBasename("conf")
1523 .description("Structure file"));
1524 options->addOption(FileNameOption("p")
1525 .legacyType(efTOP).outputFile()
1526 .store(&topologyFile_).required()
1527 .defaultBasename("topol")
1528 .description("Topology file"));
1529 options->addOption(FileNameOption("i")
1530 .legacyType(efITP).outputFile()
1531 .store(&includeTopologyFile_).required()
1532 .defaultBasename("posre")
1533 .description("Include file for topology"));
1534 options->addOption(FileNameOption("n")
1535 .legacyType(efNDX).outputFile()
1536 .store(&indexOutputFile_).storeIsSet(&bIndexSet_)
1537 .defaultBasename("index")
1538 .description("Index file"));
1539 options->addOption(FileNameOption("q")
1540 .legacyType(efSTO).outputFile()
1541 .store(&outFile_).storeIsSet(&bOutputSet_)
1542 .defaultBasename("clean").defaultType(efPDB)
1543 .description("Structure file"));
1546 void pdb2gmx::optionsFinished()
1548 if (inputConfFile_.empty())
1550 GMX_THROW(InconsistentInputError("You must supply an input file"));
1552 if (bInter_)
1554 /* if anything changes here, also change description of -inter */
1555 bCysMan_ = true;
1556 bTerMan_ = true;
1557 bLysMan_ = true;
1558 bArgMan_ = true;
1559 bAspMan_ = true;
1560 bGluMan_ = true;
1561 bGlnMan_ = true;
1562 bHisMan_ = true;
1565 if (bHeavyH_)
1567 mHmult_ = 4.0;
1569 else if (bDeuterate_)
1571 mHmult_ = 2.0;
1573 else
1575 mHmult_ = 1.0;
1578 /* Force field selection, interactive or direct */
1579 choose_ff(strcmp(ff_.c_str(), "select") == 0 ? nullptr : ff_.c_str(),
1580 forcefield_, sizeof(forcefield_),
1581 ffdir_, sizeof(ffdir_));
1583 if (strlen(forcefield_) > 0)
1585 ffname_ = forcefield_;
1586 ffname_[0] = std::toupper(ffname_[0]);
1588 else
1590 gmx_fatal(FARGS, "Empty forcefield string");
1594 int pdb2gmx::run()
1596 char select[STRLEN];
1597 std::vector<DisulfideBond> ssbonds;
1599 int this_atomnum;
1600 int prev_atomnum;
1601 const char *prev_atomname;
1602 const char *this_atomname;
1603 const char *prev_resname;
1604 const char *this_resname;
1605 int prev_resnum;
1606 int this_resnum;
1607 char prev_chainid;
1608 char this_chainid;
1609 int prev_chainnumber;
1610 int this_chainnumber;
1611 int this_chainstart;
1612 int prev_chainstart;
1614 printf("\nUsing the %s force field in directory %s\n\n",
1615 ffname_, ffdir_);
1617 choose_watermodel(WaterEnum[enumWater_], ffdir_, &watermodel_);
1619 switch (enumVSites_)
1621 case enVSites_none:
1622 bVsites_ = false;
1623 bVsiteAromatics_ = false;
1624 break;
1625 case enVSites_hydrogens:
1626 bVsites_ = true;
1627 bVsiteAromatics_ = false;
1628 break;
1629 case enVSites_aromatics:
1630 bVsites_ = true;
1631 bVsiteAromatics_ = true;
1632 break;
1633 default:
1634 gmx_fatal(FARGS, "Internal inconsistency: VSitesEnum='%s'", VSitesEnum[enumVSites_]);
1635 } /* end switch */
1637 /* Open the symbol table */
1638 t_symtab symtab;
1639 open_symtab(&symtab);
1641 /* Residue type database */
1642 ResidueType rt;
1644 /* Read residue renaming database(s), if present */
1645 std::vector<std::string> rrn = fflib_search_file_end(ffdir_, ".r2b", FALSE);
1647 std::vector<RtpRename> rtprename;
1648 for (const auto &filename : rrn)
1650 printf("going to rename %s\n", filename.c_str());
1651 FILE *fp = fflib_open(filename);
1652 read_rtprename(filename.c_str(), fp, &rtprename);
1653 gmx_ffclose(fp);
1656 /* Add all alternative names from the residue renaming database to the list
1657 of recognized amino/nucleic acids. */
1658 for (const auto &rename : rtprename)
1660 /* Only add names if the 'standard' gromacs/iupac base name was found */
1661 if (auto restype = rt.optionalTypeOfNamedDatabaseResidue(rename.gmx))
1663 rt.addResidue(rename.main, *restype);
1664 rt.addResidue(rename.nter, *restype);
1665 rt.addResidue(rename.cter, *restype);
1666 rt.addResidue(rename.bter, *restype);
1670 matrix box;
1671 const char *watres;
1672 clear_mat(box);
1673 if (watermodel_ != nullptr && (strstr(watermodel_, "4p") ||
1674 strstr(watermodel_, "4P")))
1676 watres = "HO4";
1678 else if (watermodel_ != nullptr && (strstr(watermodel_, "5p") ||
1679 strstr(watermodel_, "5P")))
1681 watres = "HO5";
1683 else
1685 watres = "HOH";
1688 AtomProperties aps;
1689 char *title = nullptr;
1690 int ePBC;
1691 t_atoms pdba_all;
1692 rvec *pdbx;
1693 int natom = read_pdball(inputConfFile_.c_str(), bOutputSet_, outFile_.c_str(),
1694 &title, &pdba_all, &pdbx, &ePBC, box, bRemoveH_,
1695 &symtab, &rt, watres, &aps, bVerbose_);
1697 if (natom == 0)
1699 std::string message = formatString("No atoms found in pdb file %s\n", inputConfFile_.c_str());
1700 GMX_THROW(InconsistentInputError(message));
1703 printf("Analyzing pdb file\n");
1704 int nwaterchain = 0;
1706 modify_chain_numbers(&pdba_all, enumChainSep_);
1708 int nchainmerges = 0;
1710 this_atomname = nullptr;
1711 this_atomnum = -1;
1712 this_resname = nullptr;
1713 this_resnum = -1;
1714 this_chainid = '?';
1715 this_chainnumber = -1;
1716 this_chainstart = 0;
1717 /* Keep the compiler happy */
1718 prev_chainstart = 0;
1720 int numChains = 0;
1721 std::vector<t_pdbchain> pdb_ch;
1723 t_resinfo *ri;
1724 bool bMerged = false;
1725 for (int i = 0; (i < natom); i++)
1727 ri = &pdba_all.resinfo[pdba_all.atom[i].resind];
1729 /* TODO this should live in a helper object, and consolidate
1730 that with code in modify_chain_numbers */
1731 prev_atomname = this_atomname;
1732 prev_atomnum = this_atomnum;
1733 prev_resname = this_resname;
1734 prev_resnum = this_resnum;
1735 prev_chainid = this_chainid;
1736 prev_chainnumber = this_chainnumber;
1737 if (!bMerged)
1739 prev_chainstart = this_chainstart;
1742 this_atomname = *pdba_all.atomname[i];
1743 this_atomnum = (pdba_all.pdbinfo != nullptr) ? pdba_all.pdbinfo[i].atomnr : i+1;
1744 this_resname = *ri->name;
1745 this_resnum = ri->nr;
1746 this_chainid = ri->chainid;
1747 this_chainnumber = ri->chainnum;
1749 bWat_ = gmx::equalCaseInsensitive(*ri->name, watres);
1751 if ((i == 0) || (this_chainnumber != prev_chainnumber) || (bWat_ != bPrevWat_))
1753 GMX_RELEASE_ASSERT(pdba_all.pdbinfo, "Must have pdbinfo from reading a PDB file if chain number is changing");
1754 this_chainstart = pdba_all.atom[i].resind;
1755 bMerged = false;
1756 if (i > 0 && !bWat_)
1758 if (!strncmp(MergeEnum[enumMerge_], "int", 3))
1760 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1761 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1762 prev_resname, prev_resnum, prev_chainid, prev_atomnum, prev_atomname,
1763 this_resname, this_resnum, this_chainid, this_atomnum, this_atomname);
1765 if (nullptr == fgets(select, STRLEN-1, stdin))
1767 gmx_fatal(FARGS, "Error reading from stdin");
1769 bMerged = (select[0] == 'y');
1771 else if (!strncmp(MergeEnum[enumMerge_], "all", 3))
1773 bMerged = true;
1777 if (bMerged)
1779 pdb_ch[numChains-1].chainstart[pdb_ch[numChains-1].nterpairs] =
1780 pdba_all.atom[i].resind - prev_chainstart;
1781 pdb_ch[numChains-1].nterpairs++;
1782 pdb_ch[numChains-1].chainstart.resize(pdb_ch[numChains-1].nterpairs+1);
1783 nchainmerges++;
1785 else
1787 /* set natom for previous chain */
1788 if (numChains > 0)
1790 pdb_ch[numChains-1].natom = i-pdb_ch[numChains-1].start;
1792 if (bWat_)
1794 nwaterchain++;
1795 ri->chainid = ' ';
1797 /* check if chain identifier was used before */
1798 for (int j = 0; (j < numChains); j++)
1800 if (pdb_ch[j].chainid != ' ' && pdb_ch[j].chainid == ri->chainid)
1802 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1803 "They will be treated as separate chains unless you reorder your file.\n",
1804 ri->chainid);
1807 t_pdbchain newChain;
1808 newChain.chainid = ri->chainid;
1809 newChain.chainnum = ri->chainnum;
1810 newChain.start = i;
1811 newChain.bAllWat = bWat_;
1812 if (bWat_)
1814 newChain.nterpairs = 0;
1816 else
1818 newChain.nterpairs = 1;
1820 newChain.chainstart.resize(newChain.nterpairs+1);
1821 /* modified [numChains] to [0] below */
1822 newChain.chainstart[0] = 0;
1823 pdb_ch.push_back(newChain);
1824 numChains++;
1827 bPrevWat_ = bWat_;
1829 pdb_ch.back().natom = natom-pdb_ch.back().start;
1831 /* set all the water blocks at the end of the chain */
1832 std::vector<int> swap_index(numChains);
1833 int j = 0;
1834 for (int i = 0; i < numChains; i++)
1836 if (!pdb_ch[i].bAllWat)
1838 swap_index[j] = i;
1839 j++;
1842 for (int i = 0; i < numChains; i++)
1844 if (pdb_ch[i].bAllWat)
1846 swap_index[j] = i;
1847 j++;
1850 if (nwaterchain > 1)
1852 printf("Moved all the water blocks to the end\n");
1855 t_atoms *pdba;
1856 std::vector<t_chain> chains(numChains);
1857 /* copy pdb data and x for all chains */
1858 for (int i = 0; (i < numChains); i++)
1860 int si = swap_index[i];
1861 chains[i].chainid = pdb_ch[si].chainid;
1862 chains[i].chainnum = pdb_ch[si].chainnum;
1863 chains[i].bAllWat = pdb_ch[si].bAllWat;
1864 chains[i].nterpairs = pdb_ch[si].nterpairs;
1865 chains[i].chainstart = pdb_ch[si].chainstart;
1866 chains[i].ntdb.clear();
1867 chains[i].ctdb.clear();
1868 chains[i].r_start.resize(pdb_ch[si].nterpairs);
1869 chains[i].r_end.resize(pdb_ch[si].nterpairs);
1871 snew(chains[i].pdba, 1);
1872 init_t_atoms(chains[i].pdba, pdb_ch[si].natom, true);
1873 for (j = 0; j < chains[i].pdba->nr; j++)
1875 chains[i].pdba->atom[j] = pdba_all.atom[pdb_ch[si].start+j];
1876 chains[i].pdba->atomname[j] =
1877 put_symtab(&symtab, *pdba_all.atomname[pdb_ch[si].start+j]);
1878 chains[i].pdba->pdbinfo[j] = pdba_all.pdbinfo[pdb_ch[si].start+j];
1879 chains[i].x.emplace_back(pdbx[pdb_ch[si].start+j]);
1881 /* Re-index the residues assuming that the indices are continuous */
1882 int k = chains[i].pdba->atom[0].resind;
1883 int nres = chains[i].pdba->atom[chains[i].pdba->nr-1].resind - k + 1;
1884 chains[i].pdba->nres = nres;
1885 for (int j = 0; j < chains[i].pdba->nr; j++)
1887 chains[i].pdba->atom[j].resind -= k;
1889 srenew(chains[i].pdba->resinfo, nres);
1890 for (int j = 0; j < nres; j++)
1892 chains[i].pdba->resinfo[j] = pdba_all.resinfo[k+j];
1893 chains[i].pdba->resinfo[j].name = put_symtab(&symtab, *pdba_all.resinfo[k+j].name);
1894 /* make all chain identifiers equal to that of the chain */
1895 chains[i].pdba->resinfo[j].chainid = pdb_ch[si].chainid;
1899 if (nchainmerges > 0)
1901 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1902 nchainmerges);
1905 printf("There are %d chains and %d blocks of water and "
1906 "%d residues with %d atoms\n",
1907 numChains-nwaterchain, nwaterchain,
1908 pdba_all.nres, natom);
1910 printf("\n %5s %4s %6s\n", "chain", "#res", "#atoms");
1911 for (int i = 0; (i < numChains); i++)
1913 printf(" %d '%c' %5d %6d %s\n",
1914 i+1, chains[i].chainid ? chains[i].chainid : '-',
1915 chains[i].pdba->nres, chains[i].pdba->nr,
1916 chains[i].bAllWat ? "(only water)" : "");
1918 printf("\n");
1920 check_occupancy(&pdba_all, inputConfFile_.c_str(), bVerbose_);
1922 /* Read atomtypes... */
1923 PreprocessingAtomTypes atype = read_atype(ffdir_, &symtab);
1925 /* read residue database */
1926 printf("Reading residue database... (%s)\n", forcefield_);
1927 std::vector<std::string> rtpf = fflib_search_file_end(ffdir_, ".rtp", true);
1928 std::vector<PreprocessResidue> rtpFFDB;
1929 for (const auto &filename : rtpf)
1931 readResidueDatabase(filename, &rtpFFDB, &atype, &symtab, false);
1933 if (bNewRTP_)
1935 /* Not correct with multiple rtp input files with different bonded types */
1936 FILE *fp = gmx_fio_fopen("new.rtp", "w");
1937 print_resall(fp, rtpFFDB, atype);
1938 gmx_fio_fclose(fp);
1941 /* read hydrogen database */
1942 std::vector<MoleculePatchDatabase> ah;
1943 read_h_db(ffdir_, &ah);
1945 /* Read Termini database... */
1946 std::vector<MoleculePatchDatabase> ntdb;
1947 std::vector<MoleculePatchDatabase> ctdb;
1948 std::vector<MoleculePatchDatabase *> tdblist;
1949 int nNtdb = read_ter_db(ffdir_, 'n', &ntdb, &atype);
1950 int nCtdb = read_ter_db(ffdir_, 'c', &ctdb, &atype);
1952 FILE *top_file = gmx_fio_fopen(topologyFile_.c_str(), "w");
1954 print_top_header(top_file, topologyFile_.c_str(), FALSE, ffdir_, mHmult_);
1956 t_chain *cc;
1957 std::vector<gmx::RVec> x;
1958 /* new pdb datastructure for sorting. */
1959 t_atoms **sortAtoms = nullptr;
1960 t_atoms **localAtoms = nullptr;
1961 snew(sortAtoms, numChains);
1962 snew(localAtoms, numChains);
1963 for (int chain = 0; (chain < numChains); chain++)
1965 cc = &(chains[chain]);
1967 /* set pdba, natom and nres to the current chain */
1968 pdba = cc->pdba;
1969 x = cc->x;
1970 natom = cc->pdba->nr;
1971 int nres = cc->pdba->nres;
1973 if (cc->chainid && ( cc->chainid != ' ' ) )
1975 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1976 chain+1, cc->chainid, natom, nres);
1978 else
1980 printf("Processing chain %d (%d atoms, %d residues)\n",
1981 chain+1, natom, nres);
1984 process_chain(pdba, x, bUnA_, bUnA_, bUnA_, bLysMan_, bAspMan_, bGluMan_,
1985 bHisMan_, bArgMan_, bGlnMan_, angle_, distance_, &symtab,
1986 rtprename);
1988 cc->chainstart[cc->nterpairs] = pdba->nres;
1989 j = 0;
1990 for (int i = 0; i < cc->nterpairs; i++)
1992 find_nc_ter(pdba, cc->chainstart[i], cc->chainstart[i+1],
1993 &(cc->r_start[j]), &(cc->r_end[j]), &rt);
1995 if (cc->r_start[j] >= 0 && cc->r_end[j] >= 0)
1997 j++;
2000 cc->nterpairs = j;
2001 if (cc->nterpairs == 0)
2003 printf("Problem with chain definition, or missing terminal residues.\n"
2004 "This chain does not appear to contain a recognized chain molecule.\n"
2005 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
2008 /* Check for disulfides and other special bonds */
2009 ssbonds = makeDisulfideBonds(pdba, gmx::as_rvec_array(x.data()), bCysMan_, bVerbose_);
2011 if (!rtprename.empty())
2013 rename_resrtp(pdba, cc->nterpairs, cc->r_start, cc->r_end, rtprename,
2014 &symtab, bVerbose_);
2017 for (int i = 0; i < cc->nterpairs; i++)
2019 /* Set termini.
2020 * We first apply a filter so we only have the
2021 * termini that can be applied to the residue in question
2022 * (or a generic terminus if no-residue specific is available).
2024 /* First the N terminus */
2025 if (nNtdb > 0)
2027 tdblist = filter_ter(ntdb,
2028 *pdba->resinfo[cc->r_start[i]].name);
2029 if (tdblist.empty())
2031 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
2032 "is already in a terminus-specific form and skipping terminus selection.\n");
2033 cc->ntdb.push_back(nullptr);
2035 else
2037 if (bTerMan_ && !tdblist.empty())
2039 sprintf(select, "Select start terminus type for %s-%d",
2040 *pdba->resinfo[cc->r_start[i]].name,
2041 pdba->resinfo[cc->r_start[i]].nr);
2042 cc->ntdb.push_back(choose_ter(tdblist, select));
2044 else
2046 cc->ntdb.push_back(tdblist[0]);
2049 printf("Start terminus %s-%d: %s\n",
2050 *pdba->resinfo[cc->r_start[i]].name,
2051 pdba->resinfo[cc->r_start[i]].nr,
2052 (cc->ntdb[i])->name.c_str());
2053 tdblist.clear();
2056 else
2058 cc->ntdb.push_back(nullptr);
2061 /* And the C terminus */
2062 if (nCtdb > 0)
2064 tdblist = filter_ter(ctdb,
2065 *pdba->resinfo[cc->r_end[i]].name);
2066 if (tdblist.empty())
2068 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
2069 "is already in a terminus-specific form and skipping terminus selection.\n");
2070 cc->ctdb.push_back(nullptr);
2072 else
2074 if (bTerMan_ && !tdblist.empty())
2076 sprintf(select, "Select end terminus type for %s-%d",
2077 *pdba->resinfo[cc->r_end[i]].name,
2078 pdba->resinfo[cc->r_end[i]].nr);
2079 cc->ctdb.push_back(choose_ter(tdblist, select));
2081 else
2083 cc->ctdb.push_back(tdblist[0]);
2085 printf("End terminus %s-%d: %s\n",
2086 *pdba->resinfo[cc->r_end[i]].name,
2087 pdba->resinfo[cc->r_end[i]].nr,
2088 (cc->ctdb[i])->name.c_str());
2089 tdblist.clear();
2092 else
2094 cc->ctdb.push_back(nullptr);
2097 std::vector<MoleculePatchDatabase> hb_chain;
2098 /* lookup hackblocks and rtp for all residues */
2099 std::vector<PreprocessResidue> restp_chain;
2100 get_hackblocks_rtp(&hb_chain, &restp_chain,
2101 rtpFFDB, pdba->nres, pdba->resinfo,
2102 cc->nterpairs, &symtab, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end,
2103 bAllowMissing_);
2104 /* ideally, now we would not need the rtp itself anymore, but do
2105 everything using the hb and restp arrays. Unfortunately, that
2106 requires some re-thinking of code in gen_vsite.c, which I won't
2107 do now :( AF 26-7-99 */
2109 rename_atoms(nullptr, ffdir_,
2110 pdba, &symtab, restp_chain, false, &rt, false, bVerbose_);
2112 match_atomnames_with_rtp(restp_chain, hb_chain, pdba, &symtab, x, bVerbose_);
2114 if (bSort_)
2116 char **gnames;
2117 t_blocka *block = new_blocka();
2118 snew(gnames, 1);
2119 sort_pdbatoms(restp_chain, natom, &pdba, &sortAtoms[chain], &x, block, &gnames);
2120 remove_duplicate_atoms(pdba, x, bVerbose_);
2121 if (bIndexSet_)
2123 if (bRemoveH_)
2125 fprintf(stderr, "WARNING: with the -remh option the generated "
2126 "index file (%s) might be useless\n"
2127 "(the index file is generated before hydrogens are added)",
2128 indexOutputFile_.c_str());
2130 write_index(indexOutputFile_.c_str(), block, gnames, false, 0);
2132 for (int i = 0; i < block->nr; i++)
2134 sfree(gnames[i]);
2136 sfree(gnames);
2137 done_blocka(block);
2138 sfree(block);
2140 else
2142 fprintf(stderr, "WARNING: "
2143 "without sorting no check for duplicate atoms can be done\n");
2146 /* Generate Hydrogen atoms (and termini) in the sequence */
2147 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
2148 add_h(&pdba, &localAtoms[chain], &x, ah, &symtab,
2149 cc->nterpairs, cc->ntdb, cc->ctdb, cc->r_start, cc->r_end, bAllowMissing_);
2150 printf("Now there are %d residues with %d atoms\n",
2151 pdba->nres, pdba->nr);
2153 /* make up molecule name(s) */
2155 int k = (cc->nterpairs > 0 && cc->r_start[0] >= 0) ? cc->r_start[0] : 0;
2157 std::string restype = rt.typeOfNamedDatabaseResidue(*pdba->resinfo[k].name);
2159 std::string molname;
2160 std::string suffix;
2161 if (cc->bAllWat)
2163 molname = "Water";
2165 else
2167 this_chainid = cc->chainid;
2169 /* Add the chain id if we have one */
2170 if (this_chainid != ' ')
2172 suffix.append(formatString("_chain_%c", this_chainid));
2175 /* Check if there have been previous chains with the same id */
2176 int nid_used = 0;
2177 for (int k = 0; k < chain; k++)
2179 if (cc->chainid == chains[k].chainid)
2181 nid_used++;
2184 /* Add the number for this chain identifier if there are multiple copies */
2185 if (nid_used > 0)
2187 suffix.append(formatString("%d", nid_used+1));
2190 if (suffix.length() > 0)
2192 molname.append(restype);
2193 molname.append(suffix);
2195 else
2197 molname = restype;
2200 std::string itp_fn = topologyFile_;;
2201 std::string posre_fn = includeTopologyFile_;
2202 if ((numChains-nwaterchain > 1) && !cc->bAllWat)
2204 bITP_ = true;
2205 printf("Chain time...\n");
2206 //construct the itp file name
2207 itp_fn = stripSuffixIfPresent(itp_fn, ".top");
2208 itp_fn.append("_");
2209 itp_fn.append(molname);
2210 itp_fn.append(".itp");
2211 //now do the same for posre
2212 posre_fn = stripSuffixIfPresent(posre_fn, ".itp");
2213 posre_fn.append("_");
2214 posre_fn.append(molname);
2215 posre_fn.append(".itp");
2216 if (posre_fn == itp_fn)
2218 posre_fn = Path::concatenateBeforeExtension(posre_fn, "_pr");
2220 incls_.emplace_back();
2221 incls_.back() = itp_fn;
2222 itp_file_ = gmx_fio_fopen(itp_fn.c_str(), "w");
2224 else
2226 bITP_ = false;
2229 mols_.emplace_back();
2230 if (cc->bAllWat)
2232 mols_.back().name = "SOL";
2233 mols_.back().nr = pdba->nres;
2235 else
2237 mols_.back().name = molname;
2238 mols_.back().nr = 1;
2241 if (bITP_)
2243 print_top_comment(itp_file_, itp_fn.c_str(), ffdir_, true);
2246 FILE *top_file2;
2247 if (cc->bAllWat)
2249 top_file2 = nullptr;
2251 else if (bITP_)
2253 top_file2 = itp_file_;
2255 else
2257 top_file2 = top_file;
2260 pdb2top(top_file2, posre_fn.c_str(), molname.c_str(), pdba, &x, &atype, &symtab,
2261 rtpFFDB,
2262 restp_chain, hb_chain,
2263 bAllowMissing_,
2264 bVsites_, bVsiteAromatics_, ffdir_,
2265 mHmult_, ssbonds,
2266 long_bond_dist_, short_bond_dist_, bDeuterate_, bChargeGroups_, bCmap_,
2267 bRenumRes_, bRTPresname_);
2269 if (!cc->bAllWat)
2271 write_posres(posre_fn.c_str(), pdba, posre_fc_);
2274 if (bITP_)
2276 gmx_fio_fclose(itp_file_);
2279 /* pdba and natom have been reassigned somewhere so: */
2280 cc->pdba = pdba;
2281 cc->x = x;
2284 if (watermodel_ == nullptr)
2286 for (int chain = 0; chain < numChains; chain++)
2288 if (chains[chain].bAllWat)
2290 auto message = formatString("You have chosen not to include a water model, "
2291 "but there is water in the input file. Select a "
2292 "water model or remove the water from your input file.");
2293 GMX_THROW(InconsistentInputError(message));
2297 else
2299 std::string waterFile = formatString("%s%c%s.itp", ffdir_, DIR_SEPARATOR, watermodel_);
2300 if (!fflib_fexist(waterFile))
2302 auto message = formatString("The topology file '%s' for the selected water "
2303 "model '%s' can not be found in the force field "
2304 "directory. Select a different water model.",
2305 waterFile.c_str(), watermodel_);
2306 GMX_THROW(InconsistentInputError(message));
2310 print_top_mols(top_file, title, ffdir_, watermodel_, incls_, mols_);
2311 gmx_fio_fclose(top_file);
2313 /* now merge all chains back together */
2314 natom = 0;
2315 int nres = 0;
2316 for (int i = 0; (i < numChains); i++)
2318 natom += chains[i].pdba->nr;
2319 nres += chains[i].pdba->nres;
2321 t_atoms *atoms;
2322 snew(atoms, 1);
2323 init_t_atoms(atoms, natom, false);
2324 for (int i = 0; i < atoms->nres; i++)
2326 sfree(atoms->resinfo[i].name);
2328 atoms->nres = nres;
2329 srenew(atoms->resinfo, nres);
2330 x.clear();
2331 int k = 0;
2332 int l = 0;
2333 for (int i = 0; (i < numChains); i++)
2335 if (numChains > 1)
2337 printf("Including chain %d in system: %d atoms %d residues\n",
2338 i+1, chains[i].pdba->nr, chains[i].pdba->nres);
2340 for (int j = 0; (j < chains[i].pdba->nr); j++)
2342 atoms->atom[k] = chains[i].pdba->atom[j];
2343 atoms->atom[k].resind += l; /* l is processed nr of residues */
2344 atoms->atomname[k] = chains[i].pdba->atomname[j];
2345 atoms->resinfo[atoms->atom[k].resind].chainid = chains[i].chainid;
2346 x.push_back(chains[i].x[j]);
2347 k++;
2349 for (int j = 0; (j < chains[i].pdba->nres); j++)
2351 atoms->resinfo[l] = chains[i].pdba->resinfo[j];
2352 if (bRTPresname_)
2354 atoms->resinfo[l].name = atoms->resinfo[l].rtp;
2356 l++;
2358 done_atom(chains[i].pdba);
2361 if (numChains > 1)
2363 fprintf(stderr, "Now there are %d atoms and %d residues\n", k, l);
2364 print_sums(atoms, true);
2367 rvec box_space;
2368 fprintf(stderr, "\nWriting coordinate file...\n");
2369 clear_rvec(box_space);
2370 if (box[0][0] == 0)
2372 make_new_box(atoms->nr, gmx::as_rvec_array(x.data()), box, box_space, false);
2374 write_sto_conf(outputConfFile_.c_str(), title, atoms, gmx::as_rvec_array(x.data()), nullptr, ePBC, box);
2376 done_symtab(&symtab);
2377 done_atom(&pdba_all);
2378 done_atom(atoms);
2379 for (int chain = 0; chain < numChains; chain++)
2381 sfree(sortAtoms[chain]);
2382 sfree(localAtoms[chain]);
2384 sfree(sortAtoms);
2385 sfree(localAtoms);
2386 sfree(atoms);
2387 sfree(title);
2388 sfree(pdbx);
2389 printf("\t\t--------- PLEASE NOTE ------------\n");
2390 printf("You have successfully generated a topology from: %s.\n",
2391 inputConfFile_.c_str());
2392 if (watermodel_ != nullptr)
2394 printf("The %s force field and the %s water model are used.\n",
2395 ffname_, watermodel_);
2396 sfree(watermodel_);
2398 else
2400 printf("The %s force field is used.\n",
2401 ffname_);
2403 printf("\t\t--------- ETON ESAELP ------------\n");
2405 return 0;
2408 } // namespace
2410 const char pdb2gmxInfo::name[] = "pdb2gmx";
2411 const char pdb2gmxInfo::shortDescription[] =
2412 "Convert coordinate files to topology and FF-compliant coordinate files";
2413 ICommandLineOptionsModulePointer pdb2gmxInfo::create()
2415 return std::make_unique<pdb2gmx>();
2418 } // namespace gmx