2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
51 #include "gromacs/commandline/cmdlineoptionsmodule.h"
52 #include "gromacs/fileio/confio.h"
53 #include "gromacs/fileio/filetypes.h"
54 #include "gromacs/fileio/gmxfio.h"
55 #include "gromacs/fileio/pdbio.h"
56 #include "gromacs/gmxlib/conformation_utilities.h"
57 #include "gromacs/gmxpreprocess/fflibutil.h"
58 #include "gromacs/gmxpreprocess/genhydro.h"
59 #include "gromacs/gmxpreprocess/gpp_atomtype.h"
60 #include "gromacs/gmxpreprocess/grompp_impl.h"
61 #include "gromacs/gmxpreprocess/h_db.h"
62 #include "gromacs/gmxpreprocess/hizzie.h"
63 #include "gromacs/gmxpreprocess/pdb2top.h"
64 #include "gromacs/gmxpreprocess/pgutil.h"
65 #include "gromacs/gmxpreprocess/specbond.h"
66 #include "gromacs/gmxpreprocess/ter_db.h"
67 #include "gromacs/gmxpreprocess/toputil.h"
68 #include "gromacs/gmxpreprocess/xlate.h"
69 #include "gromacs/math/vec.h"
70 #include "gromacs/options/basicoptions.h"
71 #include "gromacs/options/filenameoption.h"
72 #include "gromacs/options/ioptionscontainer.h"
73 #include "gromacs/topology/atomprop.h"
74 #include "gromacs/topology/block.h"
75 #include "gromacs/topology/index.h"
76 #include "gromacs/topology/residuetypes.h"
77 #include "gromacs/topology/symtab.h"
78 #include "gromacs/topology/topology.h"
79 #include "gromacs/utility/dir_separator.h"
80 #include "gromacs/utility/exceptions.h"
81 #include "gromacs/utility/fatalerror.h"
82 #include "gromacs/utility/path.h"
83 #include "gromacs/utility/smalloc.h"
84 #include "gromacs/utility/strdb.h"
85 #include "gromacs/utility/stringutil.h"
87 #include "hackblock.h"
91 RtpRename(const char *newGmx
, const char *newMain
,
92 const char *newNter
, const char *newCter
,
93 const char *newBter
) :
94 gmx(newGmx
), main(newMain
), nter(newNter
), cter(newCter
), bter(newBter
)
103 static const char *res2bb_notermini(const std::string
&name
,
104 gmx::ArrayRef
<const RtpRename
> rr
)
106 /* NOTE: This function returns the main building block name,
107 * it does not take terminal renaming into account.
109 auto found
= std::find_if(rr
.begin(), rr
.end(), [&name
](const auto &rename
)
110 { return gmx::equalCaseInsensitive(name
, rename
.gmx
); });
111 return found
!= rr
.end() ? found
->main
.c_str() : name
.c_str();
114 static const char *select_res(int nr
, int resnr
,
115 const char *name
[], const char *expl
[],
117 gmx::ArrayRef
<const RtpRename
> rr
)
119 printf("Which %s type do you want for residue %d\n", title
, resnr
+1);
120 for (int sel
= 0; (sel
< nr
); sel
++)
122 printf("%d. %s (%s)\n",
123 sel
, expl
[sel
], res2bb_notermini(name
[sel
], rr
));
125 printf("\nType a number:"); fflush(stdout
);
128 if (scanf("%d", &userSelection
) != 1)
130 gmx_fatal(FARGS
, "Answer me for res %s %d!", title
, resnr
+1);
133 return name
[userSelection
];
136 static const char *get_asptp(int resnr
, gmx::ArrayRef
<const RtpRename
> rr
)
141 const char *lh
[easpNR
] = { "ASP", "ASPH" };
142 const char *expl
[easpNR
] = {
143 "Not protonated (charge -1)",
144 "Protonated (charge 0)"
147 return select_res(easpNR
, resnr
, lh
, expl
, "ASPARTIC ACID", rr
);
150 static const char *get_glutp(int resnr
, gmx::ArrayRef
<const RtpRename
> rr
)
155 const char *lh
[egluNR
] = { "GLU", "GLUH" };
156 const char *expl
[egluNR
] = {
157 "Not protonated (charge -1)",
158 "Protonated (charge 0)"
161 return select_res(egluNR
, resnr
, lh
, expl
, "GLUTAMIC ACID", rr
);
164 static const char *get_glntp(int resnr
, gmx::ArrayRef
<const RtpRename
> rr
)
169 const char *lh
[eglnNR
] = { "GLN", "QLN" };
170 const char *expl
[eglnNR
] = {
171 "Not protonated (charge 0)",
172 "Protonated (charge +1)"
175 return select_res(eglnNR
, resnr
, lh
, expl
, "GLUTAMINE", rr
);
178 static const char *get_lystp(int resnr
, gmx::ArrayRef
<const RtpRename
> rr
)
183 const char *lh
[elysNR
] = { "LYSN", "LYS" };
184 const char *expl
[elysNR
] = {
185 "Not protonated (charge 0)",
186 "Protonated (charge +1)"
189 return select_res(elysNR
, resnr
, lh
, expl
, "LYSINE", rr
);
192 static const char *get_argtp(int resnr
, gmx::ArrayRef
<const RtpRename
> rr
)
197 const char *lh
[eargNR
] = { "ARGN", "ARG" };
198 const char *expl
[eargNR
] = {
199 "Not protonated (charge 0)",
200 "Protonated (charge +1)"
203 return select_res(eargNR
, resnr
, lh
, expl
, "ARGININE", rr
);
206 static const char *get_histp(int resnr
, gmx::ArrayRef
<const RtpRename
> rr
)
208 const char *expl
[ehisNR
] = {
215 return select_res(ehisNR
, resnr
, hh
, expl
, "HISTIDINE", rr
);
218 static void read_rtprename(const char *fname
, FILE *fp
,
219 std::vector
<RtpRename
> *rtprename
)
221 char line
[STRLEN
], buf
[STRLEN
];
224 while (get_a_line(fp
, line
, STRLEN
))
226 /* line is NULL-terminated and length<STRLEN, so final arg cannot overflow.
227 * For other args, we read up to 6 chars (so we can detect if the length is > 5).
228 * Note that the buffer length has been increased to 7 to allow this,
229 * so we just need to make sure the strings have zero-length initially.
236 gmx
[0] = '\0'; main
[0] = '\0'; nter
[0] = '\0'; cter
[0] = '\0'; bter
[0] = '\0';
237 int nc
= sscanf(line
, "%6s %6s %6s %6s %6s %s",
240 RtpRename
newEntry(gmx
, main
, nter
, cter
, bter
);
243 if (nc
!= 2 && nc
!= 5)
245 gmx_fatal(FARGS
, "Residue renaming database '%s' has %d columns instead of %d or %d", fname
, ncol
, 2, 5);
251 gmx_fatal(FARGS
, "A line in residue renaming database '%s' has %d columns, while previous lines have %d columns", fname
, nc
, ncol
);
256 /* This file does not have special termini names, copy them from main */
257 newEntry
.nter
= newEntry
.main
;
258 newEntry
.cter
= newEntry
.main
;
259 newEntry
.bter
= newEntry
.main
;
261 rtprename
->push_back(newEntry
);
265 static std::string
search_resrename(gmx::ArrayRef
<const RtpRename
> rr
,
267 bool bStart
, bool bEnd
,
268 bool bCompareFFRTPname
)
270 auto found
= std::find_if(rr
.begin(), rr
.end(), [&name
, &bCompareFFRTPname
](const auto &rename
)
271 { return ((!bCompareFFRTPname
&& (name
== rename
.gmx
)) ||
272 (bCompareFFRTPname
&& (name
== rename
.main
))); });
275 /* If found in the database, rename this residue's rtp building block,
276 * otherwise keep the old name.
278 if (found
!= rr
.end())
282 newName
= found
->bter
;
286 newName
= found
->nter
;
290 newName
= found
->cter
;
294 newName
= found
->main
;
297 if (newName
[0] == '-')
299 gmx_fatal(FARGS
, "In the chosen force field there is no residue type for '%s'%s", name
, bStart
? ( bEnd
? " as a standalone (starting & ending) residue" : " as a starting terminus") : (bEnd
? " as an ending terminus" : ""));
306 static void rename_resrtp(t_atoms
*pdba
,
308 gmx::ArrayRef
<const int> r_start
,
309 gmx::ArrayRef
<const int> r_end
,
310 gmx::ArrayRef
<const RtpRename
> rr
,
314 bool bFFRTPTERRNM
= (getenv("GMX_NO_FFRTP_TER_RENAME") == nullptr);
316 for (int r
= 0; r
< pdba
->nres
; r
++)
320 for (int j
= 0; j
< nterpairs
; j
++)
327 for (int j
= 0; j
< nterpairs
; j
++)
335 std::string newName
= search_resrename(rr
, *pdba
->resinfo
[r
].rtp
, bStart
, bEnd
, false);
337 if (bFFRTPTERRNM
&& newName
.empty() && (bStart
|| bEnd
))
339 /* This is a terminal residue, but the residue name,
340 * currently stored in .rtp, is not a standard residue name,
341 * but probably a force field specific rtp name.
342 * Check if we need to rename it because it is terminal.
344 newName
= search_resrename(rr
,
345 *pdba
->resinfo
[r
].rtp
, bStart
, bEnd
, true);
348 if (!newName
.empty() && newName
!= *pdba
->resinfo
[r
].rtp
)
352 printf("Changing rtp entry of residue %d %s to '%s'\n",
353 pdba
->resinfo
[r
].nr
, *pdba
->resinfo
[r
].name
, newName
.c_str());
355 pdba
->resinfo
[r
].rtp
= put_symtab(symtab
, newName
.c_str());
360 static void pdbres_to_gmxrtp(t_atoms
*pdba
)
364 for (i
= 0; (i
< pdba
->nres
); i
++)
366 if (pdba
->resinfo
[i
].rtp
== nullptr)
368 pdba
->resinfo
[i
].rtp
= pdba
->resinfo
[i
].name
;
373 static void rename_pdbres(t_atoms
*pdba
, const char *oldnm
, const char *newnm
,
374 bool bFullCompare
, t_symtab
*symtab
)
379 for (i
= 0; (i
< pdba
->nres
); i
++)
381 resnm
= *pdba
->resinfo
[i
].name
;
382 if ((bFullCompare
&& (gmx::equalCaseInsensitive(resnm
, oldnm
))) ||
383 (!bFullCompare
&& strstr(resnm
, oldnm
) != nullptr))
385 /* Rename the residue name (not the rtp name) */
386 pdba
->resinfo
[i
].name
= put_symtab(symtab
, newnm
);
391 static void rename_bb(t_atoms
*pdba
, const char *oldnm
, const char *newnm
,
392 bool bFullCompare
, t_symtab
*symtab
)
397 for (i
= 0; (i
< pdba
->nres
); i
++)
399 /* We have not set the rtp name yes, use the residue name */
400 bbnm
= *pdba
->resinfo
[i
].name
;
401 if ((bFullCompare
&& (gmx::equalCaseInsensitive(bbnm
, oldnm
))) ||
402 (!bFullCompare
&& strstr(bbnm
, oldnm
) != nullptr))
404 /* Change the rtp builing block name */
405 pdba
->resinfo
[i
].rtp
= put_symtab(symtab
, newnm
);
410 static void rename_bbint(t_atoms
*pdba
, const char *oldnm
,
411 const char *gettp(int, gmx::ArrayRef
<const RtpRename
>),
414 gmx::ArrayRef
<const RtpRename
> rr
)
420 for (i
= 0; i
< pdba
->nres
; i
++)
422 /* We have not set the rtp name yet, use the residue name */
423 bbnm
= *pdba
->resinfo
[i
].name
;
424 if ((bFullCompare
&& (strcmp(bbnm
, oldnm
) == 0)) ||
425 (!bFullCompare
&& strstr(bbnm
, oldnm
) != nullptr))
428 pdba
->resinfo
[i
].rtp
= put_symtab(symtab
, ptr
);
433 static void check_occupancy(t_atoms
*atoms
, const char *filename
, bool bVerbose
)
439 ftp
= fn2ftp(filename
);
440 if (!atoms
->pdbinfo
|| ((ftp
!= efPDB
) && (ftp
!= efBRK
) && (ftp
!= efENT
)))
442 fprintf(stderr
, "No occupancies in %s\n", filename
);
446 for (i
= 0; (i
< atoms
->nr
); i
++)
448 if (atoms
->pdbinfo
[i
].occup
!= 1)
452 fprintf(stderr
, "Occupancy for atom %s%d-%s is %f rather than 1\n",
453 *atoms
->resinfo
[atoms
->atom
[i
].resind
].name
,
454 atoms
->resinfo
[ atoms
->atom
[i
].resind
].nr
,
456 atoms
->pdbinfo
[i
].occup
);
458 if (atoms
->pdbinfo
[i
].occup
== 0)
468 if (nzero
== atoms
->nr
)
470 fprintf(stderr
, "All occupancy fields zero. This is probably not an X-Ray structure\n");
472 else if ((nzero
> 0) || (nnotone
> 0))
476 "WARNING: there were %d atoms with zero occupancy and %d atoms with\n"
477 " occupancy unequal to one (out of %d atoms). Check your pdb file.\n"
479 nzero
, nnotone
, atoms
->nr
);
483 fprintf(stderr
, "All occupancies are one\n");
488 static void write_posres(const char *fn
, t_atoms
*pdba
, real fc
)
493 fp
= gmx_fio_fopen(fn
, "w");
495 "; In this topology include file, you will find position restraint\n"
496 "; entries for all the heavy atoms in your original pdb file.\n"
497 "; This means that all the protons which were added by pdb2gmx are\n"
498 "; not restrained.\n"
500 "[ position_restraints ]\n"
501 "; %4s%6s%8s%8s%8s\n", "atom", "type", "fx", "fy", "fz"
503 for (i
= 0; (i
< pdba
->nr
); i
++)
505 if (!is_hydrogen(*pdba
->atomname
[i
]) && !is_dummymass(*pdba
->atomname
[i
]))
507 fprintf(fp
, "%6d%6d %g %g %g\n", i
+1, 1, fc
, fc
, fc
);
513 static int read_pdball(const char *inf
, bool bOutput
, const char *outf
, char **title
,
514 t_atoms
*atoms
, rvec
**x
,
515 int *ePBC
, matrix box
, bool bRemoveH
,
516 t_symtab
*symtab
, ResidueType
*rt
, const char *watres
,
517 AtomProperties
*aps
, bool bVerbose
)
518 /* Read a pdb file. (containing proteins) */
520 int natom
, new_natom
, i
;
523 printf("Reading %s...\n", inf
);
524 readConfAndAtoms(inf
, symtab
, title
, atoms
, ePBC
, x
, nullptr, box
);
526 if (atoms
->pdbinfo
== nullptr)
528 snew(atoms
->pdbinfo
, atoms
->nr
);
530 if (fn2ftp(inf
) == efPDB
)
532 get_pdb_atomnumber(atoms
, aps
);
537 for (i
= 0; i
< atoms
->nr
; i
++)
539 if (!is_hydrogen(*atoms
->atomname
[i
]))
541 atoms
->atom
[new_natom
] = atoms
->atom
[i
];
542 atoms
->atomname
[new_natom
] = atoms
->atomname
[i
];
543 atoms
->pdbinfo
[new_natom
] = atoms
->pdbinfo
[i
];
544 copy_rvec((*x
)[i
], (*x
)[new_natom
]);
548 atoms
->nr
= new_natom
;
555 printf(" '%s',", *title
);
557 printf(" %d atoms\n", natom
);
559 /* Rename residues */
560 rename_pdbres(atoms
, "HOH", watres
, false, symtab
);
561 rename_pdbres(atoms
, "SOL", watres
, false, symtab
);
562 rename_pdbres(atoms
, "WAT", watres
, false, symtab
);
564 rename_atoms("xlateat.dat", nullptr,
565 atoms
, symtab
, {}, true,
574 write_sto_conf(outf
, *title
, atoms
, *x
, nullptr, *ePBC
, box
);
580 static void process_chain(t_atoms
*pdba
, gmx::ArrayRef
<gmx::RVec
> x
,
581 bool bTrpU
, bool bPheU
, bool bTyrU
,
582 bool bLysMan
, bool bAspMan
, bool bGluMan
,
583 bool bHisMan
, bool bArgMan
, bool bGlnMan
,
584 real angle
, real distance
, t_symtab
*symtab
,
585 gmx::ArrayRef
<const RtpRename
> rr
)
587 /* Rename aromatics, lys, asp and histidine */
590 rename_bb(pdba
, "TYR", "TYRU", false, symtab
);
594 rename_bb(pdba
, "TRP", "TRPU", false, symtab
);
598 rename_bb(pdba
, "PHE", "PHEU", false, symtab
);
602 rename_bbint(pdba
, "LYS", get_lystp
, false, symtab
, rr
);
606 rename_bbint(pdba
, "ARG", get_argtp
, false, symtab
, rr
);
610 rename_bbint(pdba
, "GLN", get_glntp
, false, symtab
, rr
);
614 rename_bbint(pdba
, "ASP", get_asptp
, false, symtab
, rr
);
618 rename_bb(pdba
, "ASPH", "ASP", false, symtab
);
622 rename_bbint(pdba
, "GLU", get_glutp
, false, symtab
, rr
);
626 rename_bb(pdba
, "GLUH", "GLU", false, symtab
);
631 set_histp(pdba
, gmx::as_rvec_array(x
.data()), symtab
, angle
, distance
);
635 rename_bbint(pdba
, "HIS", get_histp
, true, symtab
, rr
);
638 /* Initialize the rtp builing block names with the residue names
639 * for the residues that have not been processed above.
641 pdbres_to_gmxrtp(pdba
);
643 /* Now we have all rtp names set.
644 * The rtp names will conform to Gromacs naming,
645 * unless the input pdb file contained one or more force field specific
646 * rtp names as residue names.
650 /* struct for sorting the atoms from the pdb file */
652 int resnr
; /* residue number */
653 int j
; /* database order index */
654 int index
; /* original atom number */
655 char anm1
; /* second letter of atom name */
656 char altloc
; /* alternate location indicator */
659 static bool pdbicomp(const t_pdbindex
&a
, const t_pdbindex
&b
)
661 int d
= (a
.resnr
- b
.resnr
);
667 d
= (a
.anm1
- b
.anm1
);
670 d
= (a
.altloc
- b
.altloc
);
677 static void sort_pdbatoms(gmx::ArrayRef
<const PreprocessResidue
> restp_chain
,
680 t_atoms
**newPdbAtoms
,
681 std::vector
<gmx::RVec
> *x
,
682 t_blocka
*block
, char ***gnames
)
684 t_atoms
*pdba
= *pdbaptr
;
685 std::vector
<gmx::RVec
> xnew
;
692 for (int i
= 0; i
< natoms
; i
++)
694 atomnm
= *pdba
->atomname
[i
];
695 const PreprocessResidue
*localPpResidue
= &restp_chain
[pdba
->atom
[i
].resind
];
696 auto found
= std::find_if(localPpResidue
->atomname
.begin(), localPpResidue
->atomname
.end(),
697 [&atomnm
](char** it
){return gmx::equalCaseInsensitive(atomnm
, *it
); });
698 if (found
== localPpResidue
->atomname
.end())
703 "Atom %s in residue %s %d was not found in rtp entry %s with %d atoms\n"
704 "while sorting atoms.\n%s", atomnm
,
705 *pdba
->resinfo
[pdba
->atom
[i
].resind
].name
,
706 pdba
->resinfo
[pdba
->atom
[i
].resind
].nr
,
707 localPpResidue
->resname
.c_str(),
708 localPpResidue
->natom(),
709 is_hydrogen(atomnm
) ?
710 "\nFor a hydrogen, this can be a different protonation state, or it\n"
711 "might have had a different number in the PDB file and was rebuilt\n"
712 "(it might for instance have been H3, and we only expected H1 & H2).\n"
713 "Note that hydrogens might have been added to the entry for the N-terminus.\n"
714 "Remove this hydrogen or choose a different protonation state to solve it.\n"
715 "Option -ignh will ignore all hydrogens in the input." : ".");
716 gmx_fatal(FARGS
, "%s", buf
);
718 /* make shadow array to be sorted into indexgroup */
719 pdbi
[i
].resnr
= pdba
->atom
[i
].resind
;
720 pdbi
[i
].j
= std::distance(localPpResidue
->atomname
.begin(), found
);
722 pdbi
[i
].anm1
= atomnm
[1];
723 pdbi
[i
].altloc
= pdba
->pdbinfo
[i
].altloc
;
725 std::sort(pdbi
, pdbi
+natoms
, pdbicomp
);
727 /* pdba is sorted in pdbnew using the pdbi index */
728 std::vector
<int> a(natoms
);
729 srenew(*newPdbAtoms
, 1);
730 init_t_atoms((*newPdbAtoms
), natoms
, true);
731 (*newPdbAtoms
)->nr
= pdba
->nr
;
732 (*newPdbAtoms
)->nres
= pdba
->nres
;
733 srenew((*newPdbAtoms
)->resinfo
, pdba
->nres
);
734 std::copy(pdba
->resinfo
, pdba
->resinfo
+ pdba
->nres
, (*newPdbAtoms
)->resinfo
);
735 for (int i
= 0; i
< natoms
; i
++)
737 (*newPdbAtoms
)->atom
[i
] = pdba
->atom
[pdbi
[i
].index
];
738 (*newPdbAtoms
)->atomname
[i
] = pdba
->atomname
[pdbi
[i
].index
];
739 (*newPdbAtoms
)->pdbinfo
[i
] = pdba
->pdbinfo
[pdbi
[i
].index
];
740 xnew
.push_back(x
->at(pdbi
[i
].index
));
741 /* make indexgroup in block */
742 a
[i
] = pdbi
[i
].index
;
747 /* copy the sorted pdbnew back to pdba */
748 *pdbaptr
= *newPdbAtoms
;
750 add_grp(block
, gnames
, a
, "prot_sort");
754 static int remove_duplicate_atoms(t_atoms
*pdba
, gmx::ArrayRef
<gmx::RVec
> x
, bool bVerbose
)
756 int i
, j
, oldnatoms
, ndel
;
759 printf("Checking for duplicate atoms....\n");
760 oldnatoms
= pdba
->nr
;
762 /* NOTE: pdba->nr is modified inside the loop */
763 for (i
= 1; (i
< pdba
->nr
); i
++)
765 /* compare 'i' and 'i-1', throw away 'i' if they are identical
766 this is a 'while' because multiple alternate locations can be present */
767 while ( (i
< pdba
->nr
) &&
768 (pdba
->atom
[i
-1].resind
== pdba
->atom
[i
].resind
) &&
769 (strcmp(*pdba
->atomname
[i
-1], *pdba
->atomname
[i
]) == 0) )
774 ri
= &pdba
->resinfo
[pdba
->atom
[i
].resind
];
775 printf("deleting duplicate atom %4s %s%4d%c",
776 *pdba
->atomname
[i
], *ri
->name
, ri
->nr
, ri
->ic
);
777 if (ri
->chainid
&& (ri
->chainid
!= ' '))
779 printf(" ch %c", ri
->chainid
);
783 if (pdba
->pdbinfo
[i
].atomnr
)
785 printf(" pdb nr %4d", pdba
->pdbinfo
[i
].atomnr
);
787 if (pdba
->pdbinfo
[i
].altloc
&& (pdba
->pdbinfo
[i
].altloc
!= ' '))
789 printf(" altloc %c", pdba
->pdbinfo
[i
].altloc
);
795 /* We can not free, since it might be in the symtab */
796 /* sfree(pdba->atomname[i]); */
797 for (j
= i
; j
< pdba
->nr
; j
++)
799 pdba
->atom
[j
] = pdba
->atom
[j
+1];
800 pdba
->atomname
[j
] = pdba
->atomname
[j
+1];
803 pdba
->pdbinfo
[j
] = pdba
->pdbinfo
[j
+1];
805 copy_rvec(x
[j
+1], x
[j
]);
807 srenew(pdba
->atom
, pdba
->nr
);
808 /* srenew(pdba->atomname, pdba->nr); */
809 srenew(pdba
->pdbinfo
, pdba
->nr
);
812 if (pdba
->nr
!= oldnatoms
)
814 printf("Now there are %d atoms. Deleted %d duplicates.\n", pdba
->nr
, ndel
);
821 checkResidueTypeSanity(t_atoms
*pdba
,
826 std::string startResidueString
= gmx::formatString("%s%d", *pdba
->resinfo
[r0
].name
, pdba
->resinfo
[r0
].nr
);
827 std::string endResidueString
= gmx::formatString("%s%d", *pdba
->resinfo
[r1
-1].name
, pdba
->resinfo
[r1
-1].nr
);
829 // Check whether all residues in chain have the same chain ID.
830 bool allResiduesHaveSameChainID
= true;
831 char chainID0
= pdba
->resinfo
[r0
].chainid
;
833 std::string residueString
;
835 for (int i
= r0
+ 1; i
< r1
; i
++)
837 chainID
= pdba
->resinfo
[i
].chainid
;
838 if (chainID
!= chainID0
)
840 allResiduesHaveSameChainID
= false;
841 residueString
= gmx::formatString("%s%d", *pdba
->resinfo
[i
].name
, pdba
->resinfo
[i
].nr
);
846 if (!allResiduesHaveSameChainID
)
849 "The chain covering the range %s--%s does not have a consistent chain ID. "
850 "The first residue has ID '%c', while residue %s has ID '%c'.",
851 startResidueString
.c_str(), endResidueString
.c_str(),
852 chainID0
, residueString
.c_str(), chainID
);
855 // At this point all residues have the same ID. If they are also non-blank
856 // we can be a bit more aggressive and require the types match too.
859 bool allResiduesHaveSameType
= true;
861 std::string restype0
= rt
->typeOfNamedDatabaseResidue(*pdba
->resinfo
[r0
].name
);
863 for (int i
= r0
+ 1; i
< r1
; i
++)
865 restype
= rt
->typeOfNamedDatabaseResidue(*pdba
->resinfo
[i
].name
);
866 if (!gmx::equalCaseInsensitive(restype
, restype0
))
868 allResiduesHaveSameType
= false;
869 residueString
= gmx::formatString("%s%d", *pdba
->resinfo
[i
].name
, pdba
->resinfo
[i
].nr
);
874 if (!allResiduesHaveSameType
)
877 "The residues in the chain %s--%s do not have a consistent type. "
878 "The first residue has type '%s', while residue %s is of type '%s'. "
879 "Either there is a mistake in your chain, or it includes nonstandard "
880 "residue names that have not yet been added to the residuetypes.dat "
881 "file in the GROMACS library directory. If there are other molecules "
882 "such as ligands, they should not have the same chain ID as the "
883 "adjacent protein chain since it's a separate molecule.",
884 startResidueString
.c_str(), endResidueString
.c_str(),
885 restype0
.c_str(), residueString
.c_str(), restype
.c_str());
890 static void find_nc_ter(t_atoms
*pdba
, int r0
, int r1
, int *r_start
, int *r_end
,
894 gmx::compat::optional
<std::string
> startrestype
;
899 int startWarnings
= 0;
903 // Check that all residues have the same chain identifier, and if it is
904 // non-blank we also require the residue types to match.
905 checkResidueTypeSanity(pdba
, r0
, r1
, rt
);
907 // If we return correctly from checkResidueTypeSanity(), the only
908 // remaining cases where we can have non-matching residue types is if
909 // the chain ID was blank, which could be the case e.g. for a structure
910 // read from a GRO file or other file types without chain information.
911 // In that case we need to be a bit more liberal and detect chains based
912 // on the residue type.
914 // If we get here, the chain ID must be identical for all residues
915 char chainID
= pdba
->resinfo
[r0
].chainid
;
917 /* Find the starting terminus (typially N or 5') */
918 for (i
= r0
; i
< r1
&& *r_start
== -1; i
++)
920 startrestype
= rt
->optionalTypeOfNamedDatabaseResidue(*pdba
->resinfo
[i
].name
);
925 if (gmx::equalCaseInsensitive(*startrestype
, "Protein") ||
926 gmx::equalCaseInsensitive(*startrestype
, "DNA") ||
927 gmx::equalCaseInsensitive(*startrestype
, "RNA") )
929 printf("Identified residue %s%d as a starting terminus.\n", *pdba
->resinfo
[i
].name
, pdba
->resinfo
[i
].nr
);
932 else if (gmx::equalCaseInsensitive(*startrestype
, "Ion"))
936 printf("Residue %s%d has type 'Ion', assuming it is not linked into a chain.\n", *pdba
->resinfo
[i
].name
, pdba
->resinfo
[i
].nr
);
940 printf("Disabling further notes about ions.\n");
946 // Either no known residue type, or one not needing special handling
947 if (startWarnings
< 5)
951 printf("\nWarning: Starting residue %s%d in chain not identified as Protein/RNA/DNA.\n"
952 "This chain lacks identifiers, which makes it impossible to do strict\n"
953 "classification of the start/end residues. Here we need to guess this residue\n"
954 "should not be part of the chain and instead introduce a break, but that will\n"
955 "be catastrophic if they should in fact be linked. Please check your structure,\n"
956 "and add %s to residuetypes.dat if this was not correct.\n\n",
957 *pdba
->resinfo
[i
].name
, pdba
->resinfo
[i
].nr
, *pdba
->resinfo
[i
].name
);
961 printf("\nWarning: No residues in chain starting at %s%d identified as Protein/RNA/DNA.\n"
962 "This makes it impossible to link them into a molecule, which could either be\n"
963 "correct or a catastrophic error. Please check your structure, and add all\n"
964 "necessary residue names to residuetypes.dat if this was not correct.\n\n",
965 *pdba
->resinfo
[i
].name
, pdba
->resinfo
[i
].nr
);
968 if (startWarnings
== 4)
970 printf("Disabling further warnings about unidentified residues at start of chain.\n");
978 /* Go through the rest of the residues, check that they are the same class, and identify the ending terminus. */
979 for (int i
= *r_start
; i
< r1
; i
++)
981 gmx::compat::optional
<std::string
> restype
=
982 rt
->optionalTypeOfNamedDatabaseResidue(*pdba
->resinfo
[i
].name
);
987 if (gmx::equalCaseInsensitive(*restype
, *startrestype
) && endWarnings
== 0)
991 else if (gmx::equalCaseInsensitive(*startrestype
, "Ion"))
995 printf("Residue %s%d has type 'Ion', assuming it is not linked into a chain.\n", *pdba
->resinfo
[i
].name
, pdba
->resinfo
[i
].nr
);
999 printf("Disabling further notes about ions.\n");
1005 // Either no known residue type, or one not needing special handling.
1006 GMX_RELEASE_ASSERT(chainID
== ' ', "Chain ID must be blank");
1007 // Otherwise the call to checkResidueTypeSanity() will
1008 // have caught the problem.
1009 if (endWarnings
< 5)
1011 printf("\nWarning: Residue %s%d in chain has different type ('%s') from\n"
1012 "residue %s%d ('%s'). This chain lacks identifiers, which makes\n"
1013 "it impossible to do strict classification of the start/end residues. Here we\n"
1014 "need to guess this residue should not be part of the chain and instead\n"
1015 "introduce a break, but that will be catastrophic if they should in fact be\n"
1016 "linked. Please check your structure, and add %s to residuetypes.dat\n"
1017 "if this was not correct.\n\n",
1018 *pdba
->resinfo
[i
].name
, pdba
->resinfo
[i
].nr
, restype
->c_str(),
1019 *pdba
->resinfo
[*r_start
].name
, pdba
->resinfo
[*r_start
].nr
, startrestype
->c_str(), *pdba
->resinfo
[i
].name
);
1021 if (endWarnings
== 4)
1023 printf("Disabling further warnings about unidentified residues at end of chain.\n");
1032 printf("Identified residue %s%d as a ending terminus.\n", *pdba
->resinfo
[*r_end
].name
, pdba
->resinfo
[*r_end
].nr
);
1036 /* enum for chain separation */
1038 enChainSep_id_or_ter
, enChainSep_id_and_ter
, enChainSep_ter
,
1039 enChainSep_id
, enChainSep_interactive
1041 static const char *ChainSepEnum
[] = {"id_or_ter", "id_and_ter", "ter", "id", "interactive"};
1042 static const char *ChainSepInfoString
[] =
1044 "Splitting chemical chains based on TER records or chain id changing.\n",
1045 "Splitting chemical chains based on TER records and chain id changing.\n",
1046 "Splitting chemical chains based on TER records only (ignoring chain id).\n",
1047 "Splitting chemical chains based on changing chain id only (ignoring TER records).\n",
1048 "Splitting chemical chains interactively.\n"
1052 modify_chain_numbers(t_atoms
* pdba
,
1053 ChainSepType enumChainSep
)
1056 char old_prev_chainid
;
1057 char old_this_chainid
;
1058 int old_prev_chainnum
;
1059 int old_this_chainnum
;
1061 char select
[STRLEN
];
1065 const char * prev_atomname
;
1066 const char * this_atomname
;
1067 const char * prev_resname
;
1068 const char * this_resname
;
1074 /* The default chain enumeration is based on TER records only */
1075 printf("%s", ChainSepInfoString
[enumChainSep
]);
1077 old_prev_chainid
= '?';
1078 old_prev_chainnum
= -1;
1081 this_atomname
= nullptr;
1083 this_resname
= nullptr;
1087 for (i
= 0; i
< pdba
->nres
; i
++)
1089 ri
= &pdba
->resinfo
[i
];
1090 old_this_chainid
= ri
->chainid
;
1091 old_this_chainnum
= ri
->chainnum
;
1093 prev_atomname
= this_atomname
;
1094 prev_atomnum
= this_atomnum
;
1095 prev_resname
= this_resname
;
1096 prev_resnum
= this_resnum
;
1097 prev_chainid
= this_chainid
;
1099 this_atomname
= *(pdba
->atomname
[i
]);
1100 this_atomnum
= (pdba
->pdbinfo
!= nullptr) ? pdba
->pdbinfo
[i
].atomnr
: i
+1;
1101 this_resname
= *ri
->name
;
1102 this_resnum
= ri
->nr
;
1103 this_chainid
= ri
->chainid
;
1105 switch (enumChainSep
)
1107 case enChainSep_id_or_ter
:
1108 if (old_this_chainid
!= old_prev_chainid
|| old_this_chainnum
!= old_prev_chainnum
)
1114 case enChainSep_id_and_ter
:
1115 if (old_this_chainid
!= old_prev_chainid
&& old_this_chainnum
!= old_prev_chainnum
)
1122 if (old_this_chainid
!= old_prev_chainid
)
1128 case enChainSep_ter
:
1129 if (old_this_chainnum
!= old_prev_chainnum
)
1134 case enChainSep_interactive
:
1135 if (old_this_chainid
!= old_prev_chainid
|| old_this_chainnum
!= old_prev_chainnum
)
1139 printf("Split the chain (and introduce termini) between residue %s%d (chain id '%c', atom %d %s)\
1141 "and residue %s%d (chain id '%c', atom %d %s) ? [n/y]\n",
1142 prev_resname
, prev_resnum
, prev_chainid
, prev_atomnum
, prev_atomname
,
1143 this_resname
, this_resnum
, this_chainid
, this_atomnum
, this_atomname
);
1145 if (nullptr == fgets(select
, STRLEN
-1, stdin
))
1147 gmx_fatal(FARGS
, "Error reading from stdin");
1150 if (i
== 0 || select
[0] == 'y')
1157 gmx_fatal(FARGS
, "Internal inconsistency - this shouldn't happen...");
1159 old_prev_chainid
= old_this_chainid
;
1160 old_prev_chainnum
= old_this_chainnum
;
1162 ri
->chainnum
= new_chainnum
;
1168 char chainnum
= ' ';
1171 bool bAllWat
= false;
1173 std::vector
<int> chainstart
;
1179 bool bAllWat
= false;
1181 std::vector
<int> chainstart
;
1182 std::vector
<MoleculePatchDatabase
*> ntdb
;
1183 std::vector
<MoleculePatchDatabase
*> ctdb
;
1184 std::vector
<int> r_start
;
1185 std::vector
<int> r_end
;
1187 std::vector
<gmx::RVec
> x
;
1190 // TODO make all enums into scoped enums
1191 /* enum for vsites */
1193 enVSites_none
, enVSites_hydrogens
, enVSites_aromatics
1195 static const char *VSitesEnum
[] = {"none", "hydrogens", "aromatics"};
1197 /* enum for water model */
1199 enWater_select
, enWater_none
, enWater_spc
, enWater_spce
,
1200 enWater_tip3p
, enWater_tip4p
, enWater_tip5p
, enWater_tips3p
1202 static const char *WaterEnum
[] = {
1203 "select", "none", "spc", "spce",
1204 "tip3p", "tip4p", "tip5p", "tips3p"
1207 /* enum for merge */
1209 enMerge_no
, enMerge_all
, enMerge_interactive
1211 static const char *MergeEnum
[] = {"no", "all", "interactive"};
1219 class pdb2gmx
: public ICommandLineOptionsModule
1223 bVsites_(FALSE
), bPrevWat_(FALSE
), bVsiteAromatics_(FALSE
),
1224 enumChainSep_(enChainSep_id_or_ter
),
1225 enumVSites_(enVSites_none
),
1226 enumWater_(enWater_select
),
1227 enumMerge_(enMerge_no
),
1233 // From ICommandLineOptionsModule
1234 void init(CommandLineModuleSettings
* /*settings*/) override
1238 void initOptions(IOptionsContainer
*options
,
1239 ICommandLineOptionsModuleSettings
*settings
) override
;
1241 void optionsFinished() override
;
1259 bool bAllowMissing_
;
1263 bool bChargeGroups_
;
1273 bool bVsiteAromatics_
;
1277 real long_bond_dist_
;
1278 real short_bond_dist_
;
1280 std::string indexOutputFile_
;
1281 std::string outputFile_
;
1282 std::string topologyFile_
;
1283 std::string includeTopologyFile_
;
1284 std::string outputConfFile_
;
1285 std::string inputConfFile_
;
1286 std::string outFile_
;
1289 ChainSepType enumChainSep_
;
1290 VSitesType enumVSites_
;
1291 WaterType enumWater_
;
1292 MergeType enumMerge_
;
1295 char forcefield_
[STRLEN
];
1296 char ffdir_
[STRLEN
];
1299 std::vector
<std::string
> incls_
;
1300 std::vector
<t_mols
> mols_
;
1304 void pdb2gmx::initOptions(IOptionsContainer
*options
,
1305 ICommandLineOptionsModuleSettings
*settings
)
1307 const char *desc
[] = {
1308 "[THISMODULE] reads a [REF].pdb[ref] (or [REF].gro[ref]) file, reads",
1309 "some database files, adds hydrogens to the molecules and generates",
1310 "coordinates in GROMACS (GROMOS), or optionally [REF].pdb[ref], format",
1311 "and a topology in GROMACS format.",
1312 "These files can subsequently be processed to generate a run input file.",
1314 "[THISMODULE] will search for force fields by looking for",
1315 "a [TT]forcefield.itp[tt] file in subdirectories [TT]<forcefield>.ff[tt]",
1316 "of the current working directory and of the GROMACS library directory",
1317 "as inferred from the path of the binary or the [TT]GMXLIB[tt] environment",
1319 "By default the forcefield selection is interactive,",
1320 "but you can use the [TT]-ff[tt] option to specify one of the short names",
1321 "in the list on the command line instead. In that case [THISMODULE] just looks",
1322 "for the corresponding [TT]<forcefield>.ff[tt] directory.",
1324 "After choosing a force field, all files will be read only from",
1325 "the corresponding force field directory.",
1326 "If you want to modify or add a residue types, you can copy the force",
1327 "field directory from the GROMACS library directory to your current",
1328 "working directory. If you want to add new protein residue types,",
1329 "you will need to modify [TT]residuetypes.dat[tt] in the library directory",
1330 "or copy the whole library directory to a local directory and set",
1331 "the environment variable [TT]GMXLIB[tt] to the name of that directory.",
1332 "Check Chapter 5 of the manual for more information about file formats.",
1335 "Note that a [REF].pdb[ref] file is nothing more than a file format, and it",
1336 "need not necessarily contain a protein structure. Every kind of",
1337 "molecule for which there is support in the database can be converted.",
1338 "If there is no support in the database, you can add it yourself.[PAR]",
1340 "The program has limited intelligence, it reads a number of database",
1341 "files, that allow it to make special bonds (Cys-Cys, Heme-His, etc.),",
1342 "if necessary this can be done manually. The program can prompt the",
1343 "user to select which kind of LYS, ASP, GLU, CYS or HIS residue is",
1344 "desired. For Lys the choice is between neutral (two protons on NZ) or",
1345 "protonated (three protons, default), for Asp and Glu unprotonated",
1346 "(default) or protonated, for His the proton can be either on ND1,",
1347 "on NE2 or on both. By default these selections are done automatically.",
1348 "For His, this is based on an optimal hydrogen bonding",
1349 "conformation. Hydrogen bonds are defined based on a simple geometric",
1350 "criterion, specified by the maximum hydrogen-donor-acceptor angle",
1351 "and donor-acceptor distance, which are set by [TT]-angle[tt] and",
1352 "[TT]-dist[tt] respectively.[PAR]",
1354 "The protonation state of N- and C-termini can be chosen interactively",
1355 "with the [TT]-ter[tt] flag. Default termini are ionized (NH3+ and COO-),",
1356 "respectively. Some force fields support zwitterionic forms for chains of",
1357 "one residue, but for polypeptides these options should NOT be selected.",
1358 "The AMBER force fields have unique forms for the terminal residues,",
1359 "and these are incompatible with the [TT]-ter[tt] mechanism. You need",
1360 "to prefix your N- or C-terminal residue names with \"N\" or \"C\"",
1361 "respectively to use these forms, making sure you preserve the format",
1362 "of the coordinate file. Alternatively, use named terminating residues",
1363 "(e.g. ACE, NME).[PAR]",
1365 "The separation of chains is not entirely trivial since the markup",
1366 "in user-generated PDB files frequently varies and sometimes it",
1367 "is desirable to merge entries across a TER record, for instance",
1368 "if you want a disulfide bridge or distance restraints between",
1369 "two protein chains or if you have a HEME group bound to a protein.",
1370 "In such cases multiple chains should be contained in a single",
1371 "[TT]moleculetype[tt] definition.",
1372 "To handle this, [THISMODULE] uses two separate options.",
1373 "First, [TT]-chainsep[tt] allows you to choose when a new chemical chain should",
1374 "start, and termini added when applicable. This can be done based on the",
1375 "existence of TER records, when the chain id changes, or combinations of either",
1376 "or both of these. You can also do the selection fully interactively.",
1377 "In addition, there is a [TT]-merge[tt] option that controls how multiple chains",
1378 "are merged into one moleculetype, after adding all the chemical termini (or not).",
1379 "This can be turned off (no merging), all non-water chains can be merged into a",
1380 "single molecule, or the selection can be done interactively.[PAR]",
1382 "[THISMODULE] will also check the occupancy field of the [REF].pdb[ref] file.",
1383 "If any of the occupancies are not one, indicating that the atom is",
1384 "not resolved well in the structure, a warning message is issued.",
1385 "When a [REF].pdb[ref] file does not originate from an X-ray structure determination",
1386 "all occupancy fields may be zero. Either way, it is up to the user",
1387 "to verify the correctness of the input data (read the article!).[PAR]",
1389 "During processing the atoms will be reordered according to GROMACS",
1390 "conventions. With [TT]-n[tt] an index file can be generated that",
1391 "contains one group reordered in the same way. This allows you to",
1392 "convert a GROMOS trajectory and coordinate file to GROMOS. There is",
1393 "one limitation: reordering is done after the hydrogens are stripped",
1394 "from the input and before new hydrogens are added. This means that",
1395 "you should not use [TT]-ignh[tt].[PAR]",
1397 "The [REF].gro[ref] and [TT].g96[tt] file formats do not support chain",
1398 "identifiers. Therefore it is useful to enter a [REF].pdb[ref] file name at",
1399 "the [TT]-o[tt] option when you want to convert a multi-chain [REF].pdb[ref] file.",
1402 "The option [TT]-vsite[tt] removes hydrogen and fast improper dihedral",
1403 "motions. Angular and out-of-plane motions can be removed by changing",
1404 "hydrogens into virtual sites and fixing angles, which fixes their",
1405 "position relative to neighboring atoms. Additionally, all atoms in the",
1406 "aromatic rings of the standard amino acids (i.e. PHE, TRP, TYR and HIS)",
1407 "can be converted into virtual sites, eliminating the fast improper dihedral",
1408 "fluctuations in these rings (but this feature is deprecated).",
1409 "[BB]Note[bb] that in this case all other hydrogen",
1410 "atoms are also converted to virtual sites. The mass of all atoms that are",
1411 "converted into virtual sites, is added to the heavy atoms.[PAR]",
1412 "Also slowing down of dihedral motion can be done with [TT]-heavyh[tt]",
1413 "done by increasing the hydrogen-mass by a factor of 4. This is also",
1414 "done for water hydrogens to slow down the rotational motion of water.",
1415 "The increase in mass of the hydrogens is subtracted from the bonded",
1416 "(heavy) atom so that the total mass of the system remains the same."
1419 settings
->setHelpText(desc
);
1421 options
->addOption(BooleanOption("newrtp")
1422 .store(&bNewRTP_
).defaultValue(false).hidden()
1423 .description("Write the residue database in new format to [TT]new.rtp[tt]"));
1424 options
->addOption(RealOption("lb")
1425 .store(&long_bond_dist_
).defaultValue(0.25).hidden()
1426 .description("Long bond warning distance"));
1427 options
->addOption(RealOption("sb")
1428 .store(&short_bond_dist_
).defaultValue(0.05).hidden()
1429 .description("Short bond warning distance"));
1430 options
->addOption(EnumOption
<ChainSepType
>("chainsep").enumValue(ChainSepEnum
)
1431 .store(&enumChainSep_
)
1432 .description("Condition in PDB files when a new chain should be started (adding termini)"));
1433 options
->addOption(EnumOption
<MergeType
>("merge").enumValue(MergeEnum
)
1435 .description("Merge multiple chains into a single [moleculetype]"));
1436 options
->addOption(StringOption("ff")
1437 .store(&ff_
).defaultValue("select")
1438 .description("Force field, interactive by default. Use [TT]-h[tt] for information."));
1439 options
->addOption(EnumOption
<WaterType
>("water")
1440 .store(&enumWater_
).enumValue(WaterEnum
)
1441 .description("Water model to use"));
1442 options
->addOption(BooleanOption("inter")
1443 .store(&bInter_
).defaultValue(false)
1444 .description("Set the next 8 options to interactive"));
1445 options
->addOption(BooleanOption("ss")
1446 .store(&bCysMan_
).defaultValue(false)
1447 .description("Interactive SS bridge selection"));
1448 options
->addOption(BooleanOption("ter")
1449 .store(&bTerMan_
).defaultValue(false)
1450 .description("Interactive termini selection, instead of charged (default)"));
1451 options
->addOption(BooleanOption("lys")
1452 .store(&bLysMan_
).defaultValue(false)
1453 .description("Interactive lysine selection, instead of charged"));
1454 options
->addOption(BooleanOption("arg")
1455 .store(&bArgMan_
).defaultValue(false)
1456 .description("Interactive arginine selection, instead of charged"));
1457 options
->addOption(BooleanOption("asp")
1458 .store(&bAspMan_
).defaultValue(false)
1459 .description("Interactive aspartic acid selection, instead of charged"));
1460 options
->addOption(BooleanOption("glu")
1461 .store(&bGluMan_
).defaultValue(false)
1462 .description("Interactive glutamic acid selection, instead of charged"));
1463 options
->addOption(BooleanOption("gln")
1464 .store(&bGlnMan_
).defaultValue(false)
1465 .description("Interactive glutamine selection, instead of charged"));
1466 options
->addOption(BooleanOption("his")
1467 .store(&bHisMan_
).defaultValue(false)
1468 .description("Interactive histidine selection, instead of checking H-bonds"));
1469 options
->addOption(RealOption("angle")
1470 .store(&angle_
).defaultValue(135.0)
1471 .description("Minimum hydrogen-donor-acceptor angle for a H-bond (degrees)"));
1472 options
->addOption(RealOption("dist")
1473 .store(&distance_
).defaultValue(0.3)
1474 .description("Maximum donor-acceptor distance for a H-bond (nm)"));
1475 options
->addOption(BooleanOption("una")
1476 .store(&bUnA_
).defaultValue(false)
1477 .description("Select aromatic rings with united CH atoms on phenylalanine, tryptophane and tyrosine"));
1478 options
->addOption(BooleanOption("sort")
1479 .store(&bSort_
).defaultValue(true).hidden()
1480 .description("Sort the residues according to database, turning this off is dangerous as charge groups might be broken in parts"));
1481 options
->addOption(BooleanOption("ignh")
1482 .store(&bRemoveH_
).defaultValue(false)
1483 .description("Ignore hydrogen atoms that are in the coordinate file"));
1484 options
->addOption(BooleanOption("missing")
1485 .store(&bAllowMissing_
).defaultValue(false)
1486 .description("Continue when atoms are missing and bonds cannot be made, dangerous"));
1487 options
->addOption(BooleanOption("v")
1488 .store(&bVerbose_
).defaultValue(false)
1489 .description("Be slightly more verbose in messages"));
1490 options
->addOption(RealOption("posrefc")
1491 .store(&posre_fc_
).defaultValue(1000)
1492 .description("Force constant for position restraints"));
1493 options
->addOption(EnumOption
<VSitesType
>("vsite")
1494 .store(&enumVSites_
).enumValue(VSitesEnum
)
1495 .description("Convert atoms to virtual sites"));
1496 options
->addOption(BooleanOption("heavyh")
1497 .store(&bHeavyH_
).defaultValue(false)
1498 .description("Make hydrogen atoms heavy"));
1499 options
->addOption(BooleanOption("deuterate")
1500 .store(&bDeuterate_
).defaultValue(false)
1501 .description("Change the mass of hydrogens to 2 amu"));
1502 options
->addOption(BooleanOption("chargegrp")
1503 .store(&bChargeGroups_
).defaultValue(true)
1504 .description("Use charge groups in the [REF].rtp[ref] file"));
1505 options
->addOption(BooleanOption("cmap")
1506 .store(&bCmap_
).defaultValue(true)
1507 .description("Use cmap torsions (if enabled in the [REF].rtp[ref] file)"));
1508 options
->addOption(BooleanOption("renum")
1509 .store(&bRenumRes_
).defaultValue(false)
1510 .description("Renumber the residues consecutively in the output"));
1511 options
->addOption(BooleanOption("rtpres")
1512 .store(&bRTPresname_
).defaultValue(false)
1513 .description("Use [REF].rtp[ref] entry names as residue names"));
1514 options
->addOption(FileNameOption("f")
1515 .legacyType(efSTX
).inputFile()
1516 .store(&inputConfFile_
).required()
1517 .defaultBasename("protein").defaultType(efPDB
)
1518 .description("Structure file"));
1519 options
->addOption(FileNameOption("o")
1520 .legacyType(efSTO
).outputFile()
1521 .store(&outputConfFile_
).required()
1522 .defaultBasename("conf")
1523 .description("Structure file"));
1524 options
->addOption(FileNameOption("p")
1525 .legacyType(efTOP
).outputFile()
1526 .store(&topologyFile_
).required()
1527 .defaultBasename("topol")
1528 .description("Topology file"));
1529 options
->addOption(FileNameOption("i")
1530 .legacyType(efITP
).outputFile()
1531 .store(&includeTopologyFile_
).required()
1532 .defaultBasename("posre")
1533 .description("Include file for topology"));
1534 options
->addOption(FileNameOption("n")
1535 .legacyType(efNDX
).outputFile()
1536 .store(&indexOutputFile_
).storeIsSet(&bIndexSet_
)
1537 .defaultBasename("index")
1538 .description("Index file"));
1539 options
->addOption(FileNameOption("q")
1540 .legacyType(efSTO
).outputFile()
1541 .store(&outFile_
).storeIsSet(&bOutputSet_
)
1542 .defaultBasename("clean").defaultType(efPDB
)
1543 .description("Structure file"));
1546 void pdb2gmx::optionsFinished()
1548 if (inputConfFile_
.empty())
1550 GMX_THROW(InconsistentInputError("You must supply an input file"));
1554 /* if anything changes here, also change description of -inter */
1569 else if (bDeuterate_
)
1578 /* Force field selection, interactive or direct */
1579 choose_ff(strcmp(ff_
.c_str(), "select") == 0 ? nullptr : ff_
.c_str(),
1580 forcefield_
, sizeof(forcefield_
),
1581 ffdir_
, sizeof(ffdir_
));
1583 if (strlen(forcefield_
) > 0)
1585 ffname_
= forcefield_
;
1586 ffname_
[0] = std::toupper(ffname_
[0]);
1590 gmx_fatal(FARGS
, "Empty forcefield string");
1596 char select
[STRLEN
];
1597 std::vector
<DisulfideBond
> ssbonds
;
1601 const char *prev_atomname
;
1602 const char *this_atomname
;
1603 const char *prev_resname
;
1604 const char *this_resname
;
1609 int prev_chainnumber
;
1610 int this_chainnumber
;
1611 int this_chainstart
;
1612 int prev_chainstart
;
1614 printf("\nUsing the %s force field in directory %s\n\n",
1617 choose_watermodel(WaterEnum
[enumWater_
], ffdir_
, &watermodel_
);
1619 switch (enumVSites_
)
1623 bVsiteAromatics_
= false;
1625 case enVSites_hydrogens
:
1627 bVsiteAromatics_
= false;
1629 case enVSites_aromatics
:
1631 bVsiteAromatics_
= true;
1634 gmx_fatal(FARGS
, "Internal inconsistency: VSitesEnum='%s'", VSitesEnum
[enumVSites_
]);
1637 /* Open the symbol table */
1639 open_symtab(&symtab
);
1641 /* Residue type database */
1644 /* Read residue renaming database(s), if present */
1645 std::vector
<std::string
> rrn
= fflib_search_file_end(ffdir_
, ".r2b", FALSE
);
1647 std::vector
<RtpRename
> rtprename
;
1648 for (const auto &filename
: rrn
)
1650 printf("going to rename %s\n", filename
.c_str());
1651 FILE *fp
= fflib_open(filename
);
1652 read_rtprename(filename
.c_str(), fp
, &rtprename
);
1656 /* Add all alternative names from the residue renaming database to the list
1657 of recognized amino/nucleic acids. */
1658 for (const auto &rename
: rtprename
)
1660 /* Only add names if the 'standard' gromacs/iupac base name was found */
1661 if (auto restype
= rt
.optionalTypeOfNamedDatabaseResidue(rename
.gmx
))
1663 rt
.addResidue(rename
.main
, *restype
);
1664 rt
.addResidue(rename
.nter
, *restype
);
1665 rt
.addResidue(rename
.cter
, *restype
);
1666 rt
.addResidue(rename
.bter
, *restype
);
1673 if (watermodel_
!= nullptr && (strstr(watermodel_
, "4p") ||
1674 strstr(watermodel_
, "4P")))
1678 else if (watermodel_
!= nullptr && (strstr(watermodel_
, "5p") ||
1679 strstr(watermodel_
, "5P")))
1689 char *title
= nullptr;
1693 int natom
= read_pdball(inputConfFile_
.c_str(), bOutputSet_
, outFile_
.c_str(),
1694 &title
, &pdba_all
, &pdbx
, &ePBC
, box
, bRemoveH_
,
1695 &symtab
, &rt
, watres
, &aps
, bVerbose_
);
1699 std::string message
= formatString("No atoms found in pdb file %s\n", inputConfFile_
.c_str());
1700 GMX_THROW(InconsistentInputError(message
));
1703 printf("Analyzing pdb file\n");
1704 int nwaterchain
= 0;
1706 modify_chain_numbers(&pdba_all
, enumChainSep_
);
1708 int nchainmerges
= 0;
1710 this_atomname
= nullptr;
1712 this_resname
= nullptr;
1715 this_chainnumber
= -1;
1716 this_chainstart
= 0;
1717 /* Keep the compiler happy */
1718 prev_chainstart
= 0;
1721 std::vector
<t_pdbchain
> pdb_ch
;
1724 bool bMerged
= false;
1725 for (int i
= 0; (i
< natom
); i
++)
1727 ri
= &pdba_all
.resinfo
[pdba_all
.atom
[i
].resind
];
1729 /* TODO this should live in a helper object, and consolidate
1730 that with code in modify_chain_numbers */
1731 prev_atomname
= this_atomname
;
1732 prev_atomnum
= this_atomnum
;
1733 prev_resname
= this_resname
;
1734 prev_resnum
= this_resnum
;
1735 prev_chainid
= this_chainid
;
1736 prev_chainnumber
= this_chainnumber
;
1739 prev_chainstart
= this_chainstart
;
1742 this_atomname
= *pdba_all
.atomname
[i
];
1743 this_atomnum
= (pdba_all
.pdbinfo
!= nullptr) ? pdba_all
.pdbinfo
[i
].atomnr
: i
+1;
1744 this_resname
= *ri
->name
;
1745 this_resnum
= ri
->nr
;
1746 this_chainid
= ri
->chainid
;
1747 this_chainnumber
= ri
->chainnum
;
1749 bWat_
= gmx::equalCaseInsensitive(*ri
->name
, watres
);
1751 if ((i
== 0) || (this_chainnumber
!= prev_chainnumber
) || (bWat_
!= bPrevWat_
))
1753 GMX_RELEASE_ASSERT(pdba_all
.pdbinfo
, "Must have pdbinfo from reading a PDB file if chain number is changing");
1754 this_chainstart
= pdba_all
.atom
[i
].resind
;
1756 if (i
> 0 && !bWat_
)
1758 if (!strncmp(MergeEnum
[enumMerge_
], "int", 3))
1760 printf("Merge chain ending with residue %s%d (chain id '%c', atom %d %s) and chain starting with\n"
1761 "residue %s%d (chain id '%c', atom %d %s) into a single moleculetype (keeping termini)? [n/y]\n",
1762 prev_resname
, prev_resnum
, prev_chainid
, prev_atomnum
, prev_atomname
,
1763 this_resname
, this_resnum
, this_chainid
, this_atomnum
, this_atomname
);
1765 if (nullptr == fgets(select
, STRLEN
-1, stdin
))
1767 gmx_fatal(FARGS
, "Error reading from stdin");
1769 bMerged
= (select
[0] == 'y');
1771 else if (!strncmp(MergeEnum
[enumMerge_
], "all", 3))
1779 pdb_ch
[numChains
-1].chainstart
[pdb_ch
[numChains
-1].nterpairs
] =
1780 pdba_all
.atom
[i
].resind
- prev_chainstart
;
1781 pdb_ch
[numChains
-1].nterpairs
++;
1782 pdb_ch
[numChains
-1].chainstart
.resize(pdb_ch
[numChains
-1].nterpairs
+1);
1787 /* set natom for previous chain */
1790 pdb_ch
[numChains
-1].natom
= i
-pdb_ch
[numChains
-1].start
;
1797 /* check if chain identifier was used before */
1798 for (int j
= 0; (j
< numChains
); j
++)
1800 if (pdb_ch
[j
].chainid
!= ' ' && pdb_ch
[j
].chainid
== ri
->chainid
)
1802 printf("WARNING: Chain identifier '%c' is used in two non-sequential blocks.\n"
1803 "They will be treated as separate chains unless you reorder your file.\n",
1807 t_pdbchain newChain
;
1808 newChain
.chainid
= ri
->chainid
;
1809 newChain
.chainnum
= ri
->chainnum
;
1811 newChain
.bAllWat
= bWat_
;
1814 newChain
.nterpairs
= 0;
1818 newChain
.nterpairs
= 1;
1820 newChain
.chainstart
.resize(newChain
.nterpairs
+1);
1821 /* modified [numChains] to [0] below */
1822 newChain
.chainstart
[0] = 0;
1823 pdb_ch
.push_back(newChain
);
1829 pdb_ch
.back().natom
= natom
-pdb_ch
.back().start
;
1831 /* set all the water blocks at the end of the chain */
1832 std::vector
<int> swap_index(numChains
);
1834 for (int i
= 0; i
< numChains
; i
++)
1836 if (!pdb_ch
[i
].bAllWat
)
1842 for (int i
= 0; i
< numChains
; i
++)
1844 if (pdb_ch
[i
].bAllWat
)
1850 if (nwaterchain
> 1)
1852 printf("Moved all the water blocks to the end\n");
1856 std::vector
<t_chain
> chains(numChains
);
1857 /* copy pdb data and x for all chains */
1858 for (int i
= 0; (i
< numChains
); i
++)
1860 int si
= swap_index
[i
];
1861 chains
[i
].chainid
= pdb_ch
[si
].chainid
;
1862 chains
[i
].chainnum
= pdb_ch
[si
].chainnum
;
1863 chains
[i
].bAllWat
= pdb_ch
[si
].bAllWat
;
1864 chains
[i
].nterpairs
= pdb_ch
[si
].nterpairs
;
1865 chains
[i
].chainstart
= pdb_ch
[si
].chainstart
;
1866 chains
[i
].ntdb
.clear();
1867 chains
[i
].ctdb
.clear();
1868 chains
[i
].r_start
.resize(pdb_ch
[si
].nterpairs
);
1869 chains
[i
].r_end
.resize(pdb_ch
[si
].nterpairs
);
1871 snew(chains
[i
].pdba
, 1);
1872 init_t_atoms(chains
[i
].pdba
, pdb_ch
[si
].natom
, true);
1873 for (j
= 0; j
< chains
[i
].pdba
->nr
; j
++)
1875 chains
[i
].pdba
->atom
[j
] = pdba_all
.atom
[pdb_ch
[si
].start
+j
];
1876 chains
[i
].pdba
->atomname
[j
] =
1877 put_symtab(&symtab
, *pdba_all
.atomname
[pdb_ch
[si
].start
+j
]);
1878 chains
[i
].pdba
->pdbinfo
[j
] = pdba_all
.pdbinfo
[pdb_ch
[si
].start
+j
];
1879 chains
[i
].x
.emplace_back(pdbx
[pdb_ch
[si
].start
+j
]);
1881 /* Re-index the residues assuming that the indices are continuous */
1882 int k
= chains
[i
].pdba
->atom
[0].resind
;
1883 int nres
= chains
[i
].pdba
->atom
[chains
[i
].pdba
->nr
-1].resind
- k
+ 1;
1884 chains
[i
].pdba
->nres
= nres
;
1885 for (int j
= 0; j
< chains
[i
].pdba
->nr
; j
++)
1887 chains
[i
].pdba
->atom
[j
].resind
-= k
;
1889 srenew(chains
[i
].pdba
->resinfo
, nres
);
1890 for (int j
= 0; j
< nres
; j
++)
1892 chains
[i
].pdba
->resinfo
[j
] = pdba_all
.resinfo
[k
+j
];
1893 chains
[i
].pdba
->resinfo
[j
].name
= put_symtab(&symtab
, *pdba_all
.resinfo
[k
+j
].name
);
1894 /* make all chain identifiers equal to that of the chain */
1895 chains
[i
].pdba
->resinfo
[j
].chainid
= pdb_ch
[si
].chainid
;
1899 if (nchainmerges
> 0)
1901 printf("\nMerged chains into joint molecule definitions at %d places.\n\n",
1905 printf("There are %d chains and %d blocks of water and "
1906 "%d residues with %d atoms\n",
1907 numChains
-nwaterchain
, nwaterchain
,
1908 pdba_all
.nres
, natom
);
1910 printf("\n %5s %4s %6s\n", "chain", "#res", "#atoms");
1911 for (int i
= 0; (i
< numChains
); i
++)
1913 printf(" %d '%c' %5d %6d %s\n",
1914 i
+1, chains
[i
].chainid
? chains
[i
].chainid
: '-',
1915 chains
[i
].pdba
->nres
, chains
[i
].pdba
->nr
,
1916 chains
[i
].bAllWat
? "(only water)" : "");
1920 check_occupancy(&pdba_all
, inputConfFile_
.c_str(), bVerbose_
);
1922 /* Read atomtypes... */
1923 PreprocessingAtomTypes atype
= read_atype(ffdir_
, &symtab
);
1925 /* read residue database */
1926 printf("Reading residue database... (%s)\n", forcefield_
);
1927 std::vector
<std::string
> rtpf
= fflib_search_file_end(ffdir_
, ".rtp", true);
1928 std::vector
<PreprocessResidue
> rtpFFDB
;
1929 for (const auto &filename
: rtpf
)
1931 readResidueDatabase(filename
, &rtpFFDB
, &atype
, &symtab
, false);
1935 /* Not correct with multiple rtp input files with different bonded types */
1936 FILE *fp
= gmx_fio_fopen("new.rtp", "w");
1937 print_resall(fp
, rtpFFDB
, atype
);
1941 /* read hydrogen database */
1942 std::vector
<MoleculePatchDatabase
> ah
;
1943 read_h_db(ffdir_
, &ah
);
1945 /* Read Termini database... */
1946 std::vector
<MoleculePatchDatabase
> ntdb
;
1947 std::vector
<MoleculePatchDatabase
> ctdb
;
1948 std::vector
<MoleculePatchDatabase
*> tdblist
;
1949 int nNtdb
= read_ter_db(ffdir_
, 'n', &ntdb
, &atype
);
1950 int nCtdb
= read_ter_db(ffdir_
, 'c', &ctdb
, &atype
);
1952 FILE *top_file
= gmx_fio_fopen(topologyFile_
.c_str(), "w");
1954 print_top_header(top_file
, topologyFile_
.c_str(), FALSE
, ffdir_
, mHmult_
);
1957 std::vector
<gmx::RVec
> x
;
1958 /* new pdb datastructure for sorting. */
1959 t_atoms
**sortAtoms
= nullptr;
1960 t_atoms
**localAtoms
= nullptr;
1961 snew(sortAtoms
, numChains
);
1962 snew(localAtoms
, numChains
);
1963 for (int chain
= 0; (chain
< numChains
); chain
++)
1965 cc
= &(chains
[chain
]);
1967 /* set pdba, natom and nres to the current chain */
1970 natom
= cc
->pdba
->nr
;
1971 int nres
= cc
->pdba
->nres
;
1973 if (cc
->chainid
&& ( cc
->chainid
!= ' ' ) )
1975 printf("Processing chain %d '%c' (%d atoms, %d residues)\n",
1976 chain
+1, cc
->chainid
, natom
, nres
);
1980 printf("Processing chain %d (%d atoms, %d residues)\n",
1981 chain
+1, natom
, nres
);
1984 process_chain(pdba
, x
, bUnA_
, bUnA_
, bUnA_
, bLysMan_
, bAspMan_
, bGluMan_
,
1985 bHisMan_
, bArgMan_
, bGlnMan_
, angle_
, distance_
, &symtab
,
1988 cc
->chainstart
[cc
->nterpairs
] = pdba
->nres
;
1990 for (int i
= 0; i
< cc
->nterpairs
; i
++)
1992 find_nc_ter(pdba
, cc
->chainstart
[i
], cc
->chainstart
[i
+1],
1993 &(cc
->r_start
[j
]), &(cc
->r_end
[j
]), &rt
);
1995 if (cc
->r_start
[j
] >= 0 && cc
->r_end
[j
] >= 0)
2001 if (cc
->nterpairs
== 0)
2003 printf("Problem with chain definition, or missing terminal residues.\n"
2004 "This chain does not appear to contain a recognized chain molecule.\n"
2005 "If this is incorrect, you can edit residuetypes.dat to modify the behavior.\n");
2008 /* Check for disulfides and other special bonds */
2009 ssbonds
= makeDisulfideBonds(pdba
, gmx::as_rvec_array(x
.data()), bCysMan_
, bVerbose_
);
2011 if (!rtprename
.empty())
2013 rename_resrtp(pdba
, cc
->nterpairs
, cc
->r_start
, cc
->r_end
, rtprename
,
2014 &symtab
, bVerbose_
);
2017 for (int i
= 0; i
< cc
->nterpairs
; i
++)
2020 * We first apply a filter so we only have the
2021 * termini that can be applied to the residue in question
2022 * (or a generic terminus if no-residue specific is available).
2024 /* First the N terminus */
2027 tdblist
= filter_ter(ntdb
,
2028 *pdba
->resinfo
[cc
->r_start
[i
]].name
);
2029 if (tdblist
.empty())
2031 printf("No suitable end (N or 5') terminus found in database - assuming this residue\n"
2032 "is already in a terminus-specific form and skipping terminus selection.\n");
2033 cc
->ntdb
.push_back(nullptr);
2037 if (bTerMan_
&& !tdblist
.empty())
2039 sprintf(select
, "Select start terminus type for %s-%d",
2040 *pdba
->resinfo
[cc
->r_start
[i
]].name
,
2041 pdba
->resinfo
[cc
->r_start
[i
]].nr
);
2042 cc
->ntdb
.push_back(choose_ter(tdblist
, select
));
2046 cc
->ntdb
.push_back(tdblist
[0]);
2049 printf("Start terminus %s-%d: %s\n",
2050 *pdba
->resinfo
[cc
->r_start
[i
]].name
,
2051 pdba
->resinfo
[cc
->r_start
[i
]].nr
,
2052 (cc
->ntdb
[i
])->name
.c_str());
2058 cc
->ntdb
.push_back(nullptr);
2061 /* And the C terminus */
2064 tdblist
= filter_ter(ctdb
,
2065 *pdba
->resinfo
[cc
->r_end
[i
]].name
);
2066 if (tdblist
.empty())
2068 printf("No suitable end (C or 3') terminus found in database - assuming this residue\n"
2069 "is already in a terminus-specific form and skipping terminus selection.\n");
2070 cc
->ctdb
.push_back(nullptr);
2074 if (bTerMan_
&& !tdblist
.empty())
2076 sprintf(select
, "Select end terminus type for %s-%d",
2077 *pdba
->resinfo
[cc
->r_end
[i
]].name
,
2078 pdba
->resinfo
[cc
->r_end
[i
]].nr
);
2079 cc
->ctdb
.push_back(choose_ter(tdblist
, select
));
2083 cc
->ctdb
.push_back(tdblist
[0]);
2085 printf("End terminus %s-%d: %s\n",
2086 *pdba
->resinfo
[cc
->r_end
[i
]].name
,
2087 pdba
->resinfo
[cc
->r_end
[i
]].nr
,
2088 (cc
->ctdb
[i
])->name
.c_str());
2094 cc
->ctdb
.push_back(nullptr);
2097 std::vector
<MoleculePatchDatabase
> hb_chain
;
2098 /* lookup hackblocks and rtp for all residues */
2099 std::vector
<PreprocessResidue
> restp_chain
;
2100 get_hackblocks_rtp(&hb_chain
, &restp_chain
,
2101 rtpFFDB
, pdba
->nres
, pdba
->resinfo
,
2102 cc
->nterpairs
, &symtab
, cc
->ntdb
, cc
->ctdb
, cc
->r_start
, cc
->r_end
,
2104 /* ideally, now we would not need the rtp itself anymore, but do
2105 everything using the hb and restp arrays. Unfortunately, that
2106 requires some re-thinking of code in gen_vsite.c, which I won't
2107 do now :( AF 26-7-99 */
2109 rename_atoms(nullptr, ffdir_
,
2110 pdba
, &symtab
, restp_chain
, false, &rt
, false, bVerbose_
);
2112 match_atomnames_with_rtp(restp_chain
, hb_chain
, pdba
, &symtab
, x
, bVerbose_
);
2117 t_blocka
*block
= new_blocka();
2119 sort_pdbatoms(restp_chain
, natom
, &pdba
, &sortAtoms
[chain
], &x
, block
, &gnames
);
2120 remove_duplicate_atoms(pdba
, x
, bVerbose_
);
2125 fprintf(stderr
, "WARNING: with the -remh option the generated "
2126 "index file (%s) might be useless\n"
2127 "(the index file is generated before hydrogens are added)",
2128 indexOutputFile_
.c_str());
2130 write_index(indexOutputFile_
.c_str(), block
, gnames
, false, 0);
2132 for (int i
= 0; i
< block
->nr
; i
++)
2142 fprintf(stderr
, "WARNING: "
2143 "without sorting no check for duplicate atoms can be done\n");
2146 /* Generate Hydrogen atoms (and termini) in the sequence */
2147 printf("Generating any missing hydrogen atoms and/or adding termini.\n");
2148 add_h(&pdba
, &localAtoms
[chain
], &x
, ah
, &symtab
,
2149 cc
->nterpairs
, cc
->ntdb
, cc
->ctdb
, cc
->r_start
, cc
->r_end
, bAllowMissing_
);
2150 printf("Now there are %d residues with %d atoms\n",
2151 pdba
->nres
, pdba
->nr
);
2153 /* make up molecule name(s) */
2155 int k
= (cc
->nterpairs
> 0 && cc
->r_start
[0] >= 0) ? cc
->r_start
[0] : 0;
2157 std::string restype
= rt
.typeOfNamedDatabaseResidue(*pdba
->resinfo
[k
].name
);
2159 std::string molname
;
2167 this_chainid
= cc
->chainid
;
2169 /* Add the chain id if we have one */
2170 if (this_chainid
!= ' ')
2172 suffix
.append(formatString("_chain_%c", this_chainid
));
2175 /* Check if there have been previous chains with the same id */
2177 for (int k
= 0; k
< chain
; k
++)
2179 if (cc
->chainid
== chains
[k
].chainid
)
2184 /* Add the number for this chain identifier if there are multiple copies */
2187 suffix
.append(formatString("%d", nid_used
+1));
2190 if (suffix
.length() > 0)
2192 molname
.append(restype
);
2193 molname
.append(suffix
);
2200 std::string itp_fn
= topologyFile_
;;
2201 std::string posre_fn
= includeTopologyFile_
;
2202 if ((numChains
-nwaterchain
> 1) && !cc
->bAllWat
)
2205 printf("Chain time...\n");
2206 //construct the itp file name
2207 itp_fn
= stripSuffixIfPresent(itp_fn
, ".top");
2209 itp_fn
.append(molname
);
2210 itp_fn
.append(".itp");
2211 //now do the same for posre
2212 posre_fn
= stripSuffixIfPresent(posre_fn
, ".itp");
2213 posre_fn
.append("_");
2214 posre_fn
.append(molname
);
2215 posre_fn
.append(".itp");
2216 if (posre_fn
== itp_fn
)
2218 posre_fn
= Path::concatenateBeforeExtension(posre_fn
, "_pr");
2220 incls_
.emplace_back();
2221 incls_
.back() = itp_fn
;
2222 itp_file_
= gmx_fio_fopen(itp_fn
.c_str(), "w");
2229 mols_
.emplace_back();
2232 mols_
.back().name
= "SOL";
2233 mols_
.back().nr
= pdba
->nres
;
2237 mols_
.back().name
= molname
;
2238 mols_
.back().nr
= 1;
2243 print_top_comment(itp_file_
, itp_fn
.c_str(), ffdir_
, true);
2249 top_file2
= nullptr;
2253 top_file2
= itp_file_
;
2257 top_file2
= top_file
;
2260 pdb2top(top_file2
, posre_fn
.c_str(), molname
.c_str(), pdba
, &x
, &atype
, &symtab
,
2262 restp_chain
, hb_chain
,
2264 bVsites_
, bVsiteAromatics_
, ffdir_
,
2266 long_bond_dist_
, short_bond_dist_
, bDeuterate_
, bChargeGroups_
, bCmap_
,
2267 bRenumRes_
, bRTPresname_
);
2271 write_posres(posre_fn
.c_str(), pdba
, posre_fc_
);
2276 gmx_fio_fclose(itp_file_
);
2279 /* pdba and natom have been reassigned somewhere so: */
2284 if (watermodel_
== nullptr)
2286 for (int chain
= 0; chain
< numChains
; chain
++)
2288 if (chains
[chain
].bAllWat
)
2290 auto message
= formatString("You have chosen not to include a water model, "
2291 "but there is water in the input file. Select a "
2292 "water model or remove the water from your input file.");
2293 GMX_THROW(InconsistentInputError(message
));
2299 std::string waterFile
= formatString("%s%c%s.itp", ffdir_
, DIR_SEPARATOR
, watermodel_
);
2300 if (!fflib_fexist(waterFile
))
2302 auto message
= formatString("The topology file '%s' for the selected water "
2303 "model '%s' can not be found in the force field "
2304 "directory. Select a different water model.",
2305 waterFile
.c_str(), watermodel_
);
2306 GMX_THROW(InconsistentInputError(message
));
2310 print_top_mols(top_file
, title
, ffdir_
, watermodel_
, incls_
, mols_
);
2311 gmx_fio_fclose(top_file
);
2313 /* now merge all chains back together */
2316 for (int i
= 0; (i
< numChains
); i
++)
2318 natom
+= chains
[i
].pdba
->nr
;
2319 nres
+= chains
[i
].pdba
->nres
;
2323 init_t_atoms(atoms
, natom
, false);
2324 for (int i
= 0; i
< atoms
->nres
; i
++)
2326 sfree(atoms
->resinfo
[i
].name
);
2329 srenew(atoms
->resinfo
, nres
);
2333 for (int i
= 0; (i
< numChains
); i
++)
2337 printf("Including chain %d in system: %d atoms %d residues\n",
2338 i
+1, chains
[i
].pdba
->nr
, chains
[i
].pdba
->nres
);
2340 for (int j
= 0; (j
< chains
[i
].pdba
->nr
); j
++)
2342 atoms
->atom
[k
] = chains
[i
].pdba
->atom
[j
];
2343 atoms
->atom
[k
].resind
+= l
; /* l is processed nr of residues */
2344 atoms
->atomname
[k
] = chains
[i
].pdba
->atomname
[j
];
2345 atoms
->resinfo
[atoms
->atom
[k
].resind
].chainid
= chains
[i
].chainid
;
2346 x
.push_back(chains
[i
].x
[j
]);
2349 for (int j
= 0; (j
< chains
[i
].pdba
->nres
); j
++)
2351 atoms
->resinfo
[l
] = chains
[i
].pdba
->resinfo
[j
];
2354 atoms
->resinfo
[l
].name
= atoms
->resinfo
[l
].rtp
;
2358 done_atom(chains
[i
].pdba
);
2363 fprintf(stderr
, "Now there are %d atoms and %d residues\n", k
, l
);
2364 print_sums(atoms
, true);
2368 fprintf(stderr
, "\nWriting coordinate file...\n");
2369 clear_rvec(box_space
);
2372 make_new_box(atoms
->nr
, gmx::as_rvec_array(x
.data()), box
, box_space
, false);
2374 write_sto_conf(outputConfFile_
.c_str(), title
, atoms
, gmx::as_rvec_array(x
.data()), nullptr, ePBC
, box
);
2376 done_symtab(&symtab
);
2377 done_atom(&pdba_all
);
2379 for (int chain
= 0; chain
< numChains
; chain
++)
2381 sfree(sortAtoms
[chain
]);
2382 sfree(localAtoms
[chain
]);
2389 printf("\t\t--------- PLEASE NOTE ------------\n");
2390 printf("You have successfully generated a topology from: %s.\n",
2391 inputConfFile_
.c_str());
2392 if (watermodel_
!= nullptr)
2394 printf("The %s force field and the %s water model are used.\n",
2395 ffname_
, watermodel_
);
2400 printf("The %s force field is used.\n",
2403 printf("\t\t--------- ETON ESAELP ------------\n");
2410 const char pdb2gmxInfo::name
[] = "pdb2gmx";
2411 const char pdb2gmxInfo::shortDescription
[] =
2412 "Convert coordinate files to topology and FF-compliant coordinate files";
2413 ICommandLineOptionsModulePointer
pdb2gmxInfo::create()
2415 return std::make_unique
<pdb2gmx
>();