Refactor preprocessing atom types.
[gromacs.git] / src / gromacs / gmxpreprocess / resall.cpp
blob37e6024d6f9cdc9f30f7ee98014faad900172e39
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include "resall.h"
41 #include <cctype>
42 #include <cstdlib>
43 #include <cstring>
45 #include <algorithm>
46 #include <string>
47 #include <vector>
49 #include "gromacs/gmxpreprocess/fflibutil.h"
50 #include "gromacs/gmxpreprocess/gpp_atomtype.h"
51 #include "gromacs/gmxpreprocess/grompp_impl.h"
52 #include "gromacs/gmxpreprocess/notset.h"
53 #include "gromacs/gmxpreprocess/pgutil.h"
54 #include "gromacs/topology/atoms.h"
55 #include "gromacs/topology/symtab.h"
56 #include "gromacs/utility/cstringutil.h"
57 #include "gromacs/utility/fatalerror.h"
58 #include "gromacs/utility/futil.h"
59 #include "gromacs/utility/smalloc.h"
60 #include "gromacs/utility/strdb.h"
62 #include "hackblock.h"
64 PreprocessingAtomTypes read_atype(const char *ffdir, t_symtab *tab)
66 FILE *in;
67 char buf[STRLEN], name[STRLEN];
68 double m;
69 int nratt = 0;
70 t_atom *a;
71 t_param *nb;
73 std::vector<std::string> files = fflib_search_file_end(ffdir, ".atp", TRUE);
74 snew(a, 1);
75 snew(nb, 1);
76 PreprocessingAtomTypes at;
78 for (const auto &filename : files)
80 in = fflib_open(filename);
81 while (!feof(in))
83 /* Skip blank or comment-only lines */
86 if (fgets2(buf, STRLEN, in) != nullptr)
88 strip_comment(buf);
89 trim(buf);
92 while ((feof(in) == 0) && strlen(buf) == 0);
94 if (sscanf(buf, "%s%lf", name, &m) == 2)
96 a->m = m;
97 at.addType(tab, *a, name, nb, 0, 0);
98 fprintf(stderr, "\rAtomtype %d", ++nratt);
99 fflush(stderr);
101 else
103 fprintf(stderr, "\nInvalid format: %s\n", buf);
106 gmx_ffclose(in);
108 fprintf(stderr, "\n");
109 sfree(a);
110 return at;
113 static void print_resatoms(FILE *out, const PreprocessingAtomTypes &atype, const PreprocessResidue &rtpDBEntry)
115 fprintf(out, "[ %s ]\n", rtpDBEntry.resname.c_str());
116 fprintf(out, " [ atoms ]\n");
118 for (int j = 0; (j < rtpDBEntry.natom()); j++)
120 int tp = rtpDBEntry.atom[j].type;
121 const char *tpnm = atype.atomNameFromAtomType(tp);
122 if (tpnm == nullptr)
124 gmx_fatal(FARGS, "Incorrect atomtype (%d)", tp);
126 fprintf(out, "%6s %6s %8.3f %6d\n",
127 *(rtpDBEntry.atomname[j]), tpnm, rtpDBEntry.atom[j].q, rtpDBEntry.cgnr[j]);
131 static bool read_atoms(FILE *in, char *line,
132 PreprocessResidue *r0, t_symtab *tab, PreprocessingAtomTypes *atype)
134 int cg;
135 char buf[256], buf1[256];
136 double q;
138 /* Read Atoms */
139 r0->atom.clear();
140 r0->atomname.clear();
141 r0->cgnr.clear();
142 while (get_a_line(in, line, STRLEN) && (strchr(line, '[') == nullptr))
144 if (sscanf(line, "%s%s%lf%d", buf, buf1, &q, &cg) != 4)
146 return FALSE;
148 r0->atomname.push_back(put_symtab(tab, buf));
149 r0->atom.emplace_back();
150 r0->atom.back().q = q;
151 r0->cgnr.push_back(cg);
152 int j = atype->atomTypeFromName(buf1);
153 if (j == NOTSET)
155 gmx_fatal(FARGS, "Atom type %s (residue %s) not found in atomtype "
156 "database", buf1, r0->resname.c_str());
158 r0->atom.back().type = j;
159 r0->atom.back().m = atype->atomMassAFromAtomType(j);
162 return TRUE;
165 static bool read_bondeds(int bt, FILE *in, char *line, PreprocessResidue *rtpDBEntry)
167 char str[STRLEN];
169 while (get_a_line(in, line, STRLEN) && (strchr(line, '[') == nullptr))
171 int n = 0;
172 int ni;
173 rtpDBEntry->rb[bt].b.emplace_back();
174 BondedInteraction *newBond = &rtpDBEntry->rb[bt].b.back();
175 for (int j = 0; j < btsNiatoms[bt]; j++)
177 if (sscanf(line+n, "%s%n", str, &ni) == 1)
179 newBond->a[j] = str;
181 else
183 return FALSE;
185 n += ni;
187 while (isspace(line[n]))
189 n++;
191 rtrim(line+n);
192 newBond->s = line+n;
195 return TRUE;
198 static void print_resbondeds(FILE *out, int bt, const PreprocessResidue &rtpDBEntry)
200 if (!rtpDBEntry.rb[bt].b.empty())
202 fprintf(out, " [ %s ]\n", btsNames[bt]);
204 for (const auto &b : rtpDBEntry.rb[bt].b)
206 for (int j = 0; j < btsNiatoms[bt]; j++)
208 fprintf(out, "%6s ", b.a[j].c_str());
210 if (!b.s.empty())
212 fprintf(out, " %s", b.s.c_str());
214 fprintf(out, "\n");
219 static void check_rtp(gmx::ArrayRef<const PreprocessResidue> rtpDBEntry, const std::string &libfn)
221 /* check for double entries, assuming list is already sorted */
222 for (auto it = rtpDBEntry.begin() + 1; it != rtpDBEntry.end(); it++)
224 auto prev = it-1;
225 if (gmx::equalCaseInsensitive(prev->resname, it->resname))
227 fprintf(stderr, "WARNING double entry %s in file %s\n",
228 it->resname.c_str(), libfn.c_str());
233 static int get_bt(char* header)
235 int i;
237 for (i = 0; i < ebtsNR; i++)
239 if (gmx_strcasecmp(btsNames[i], header) == 0)
241 return i;
244 return NOTSET;
247 /* print all the ebtsNR type numbers */
248 static void print_resall_header(FILE *out, gmx::ArrayRef<const PreprocessResidue> rtpDBEntry)
250 fprintf(out, "[ bondedtypes ]\n");
251 fprintf(out, "; bonds angles dihedrals impropers all_dihedrals nr_exclusions HH14 remove_dih\n");
252 fprintf(out, " %5d %6d %9d %9d %14d %14d %14d %14d\n\n",
253 rtpDBEntry[0].rb[0].type,
254 rtpDBEntry[0].rb[1].type,
255 rtpDBEntry[0].rb[2].type,
256 rtpDBEntry[0].rb[3].type,
257 static_cast<int>(rtpDBEntry[0].bKeepAllGeneratedDihedrals),
258 rtpDBEntry[0].nrexcl,
259 static_cast<int>(rtpDBEntry[0].bGenerateHH14Interactions),
260 static_cast<int>(rtpDBEntry[0].bRemoveDihedralIfWithImproper));
263 void print_resall(FILE *out, gmx::ArrayRef<const PreprocessResidue> rtpDBEntry,
264 const PreprocessingAtomTypes &atype)
266 if (rtpDBEntry.empty())
268 return;
271 print_resall_header(out, rtpDBEntry);
273 for (const auto &r : rtpDBEntry)
275 if (r.natom() > 0)
277 print_resatoms(out, atype, r);
278 for (int bt = 0; bt < ebtsNR; bt++)
280 print_resbondeds(out, bt, r);
286 void readResidueDatabase(const std::string &rrdb, std::vector<PreprocessResidue> *rtpDBEntry,
287 PreprocessingAtomTypes *atype, t_symtab *tab,
288 bool bAllowOverrideRTP)
290 FILE *in;
291 char filebase[STRLEN], line[STRLEN], header[STRLEN];
292 int bt, nparam;
293 int dum1, dum2, dum3;
294 bool bNextResidue, bError;
296 fflib_filename_base(rrdb.c_str(), filebase, STRLEN);
298 in = fflib_open(rrdb);
300 PreprocessResidue header_settings;
302 /* these bonded parameters will overwritten be when *
303 * there is a [ bondedtypes ] entry in the .rtp file */
304 header_settings.rb[ebtsBONDS].type = 1; /* normal bonds */
305 header_settings.rb[ebtsANGLES].type = 1; /* normal angles */
306 header_settings.rb[ebtsPDIHS].type = 1; /* normal dihedrals */
307 header_settings.rb[ebtsIDIHS].type = 2; /* normal impropers */
308 header_settings.rb[ebtsEXCLS].type = 1; /* normal exclusions */
309 header_settings.rb[ebtsCMAP].type = 1; /* normal cmap torsions */
311 header_settings.bKeepAllGeneratedDihedrals = FALSE;
312 header_settings.nrexcl = 3;
313 header_settings.bGenerateHH14Interactions = TRUE;
314 header_settings.bRemoveDihedralIfWithImproper = TRUE;
316 /* Column 5 & 6 aren't really bonded types, but we include
317 * them here to avoid introducing a new section:
318 * Column 5 : This controls the generation of dihedrals from the bonding.
319 * All possible dihedrals are generated automatically. A value of
320 * 1 here means that all these are retained. A value of
321 * 0 here requires generated dihedrals be removed if
322 * * there are any dihedrals on the same central atoms
323 * specified in the residue topology, or
324 * * there are other identical generated dihedrals
325 * sharing the same central atoms, or
326 * * there are other generated dihedrals sharing the
327 * same central bond that have fewer hydrogen atoms
328 * Column 6: Number of bonded neighbors to exclude.
329 * Column 7: Generate 1,4 interactions between two hydrogen atoms
330 * Column 8: Remove proper dihedrals if centered on the same bond
331 * as an improper dihedral
333 get_a_line(in, line, STRLEN);
334 if (!get_header(line, header))
336 gmx_fatal(FARGS, "in .rtp file at line:\n%s\n", line);
338 if (gmx_strncasecmp("bondedtypes", header, 5) == 0)
340 get_a_line(in, line, STRLEN);
341 if ((nparam = sscanf(line, "%d %d %d %d %d %d %d %d",
342 &header_settings.rb[ebtsBONDS].type, &header_settings.rb[ebtsANGLES].type,
343 &header_settings.rb[ebtsPDIHS].type, &header_settings.rb[ebtsIDIHS].type,
344 &dum1, &header_settings.nrexcl, &dum2, &dum3)) < 4)
346 gmx_fatal(FARGS, "need 4 to 8 parameters in the header of .rtp file %s at line:\n%s\n", rrdb.c_str(), line);
348 header_settings.bKeepAllGeneratedDihedrals = (dum1 != 0);
349 header_settings.bGenerateHH14Interactions = (dum2 != 0);
350 header_settings.bRemoveDihedralIfWithImproper = (dum3 != 0);
351 get_a_line(in, line, STRLEN);
352 if (nparam < 5)
354 fprintf(stderr, "Using default: not generating all possible dihedrals\n");
355 header_settings.bKeepAllGeneratedDihedrals = FALSE;
357 if (nparam < 6)
359 fprintf(stderr, "Using default: excluding 3 bonded neighbors\n");
360 header_settings.nrexcl = 3;
362 if (nparam < 7)
364 fprintf(stderr, "Using default: generating 1,4 H--H interactions\n");
365 header_settings.bGenerateHH14Interactions = TRUE;
367 if (nparam < 8)
369 fprintf(stderr, "Using default: removing proper dihedrals found on the same bond as a proper dihedral\n");
370 header_settings.bRemoveDihedralIfWithImproper = TRUE;
373 else
375 fprintf(stderr,
376 "Reading .rtp file without '[ bondedtypes ]' directive,\n"
377 "Will proceed as if the entry was:\n");
378 print_resall_header(stderr, gmx::arrayRefFromArray(&header_settings, 1));
380 /* We don't know the current size of rrtp, but simply realloc immediately */
381 auto oldArrayEnd = rtpDBEntry->end() - 1;
382 while (!feof(in))
384 /* Initialise rtp entry structure */
385 rtpDBEntry->push_back(header_settings);
386 PreprocessResidue *res = &rtpDBEntry->back();
387 if (!get_header(line, header))
389 gmx_fatal(FARGS, "in .rtp file at line:\n%s\n", line);
391 res->resname = header;
392 res->filebase = filebase;
394 get_a_line(in, line, STRLEN);
395 bError = FALSE;
396 bNextResidue = FALSE;
399 if (!get_header(line, header))
401 bError = TRUE;
403 else
405 bt = get_bt(header);
406 if (bt != NOTSET)
408 /* header is an bonded directive */
409 bError = !read_bondeds(bt, in, line, res);
411 else if (gmx_strncasecmp("atoms", header, 5) == 0)
413 /* header is the atoms directive */
414 bError = !read_atoms(in, line, res, tab, atype);
416 else
418 /* else header must be a residue name */
419 bNextResidue = TRUE;
422 if (bError)
424 gmx_fatal(FARGS, "in .rtp file in residue %s at line:\n%s\n",
425 res->resname.c_str(), line);
428 while ((feof(in) == 0) && !bNextResidue);
430 if (res->natom() == 0)
432 gmx_fatal(FARGS, "No atoms found in .rtp file in residue %s\n",
433 res->resname.c_str());
436 auto found = std::find_if(rtpDBEntry->begin(), rtpDBEntry->end()-1,
437 [&res](const PreprocessResidue &entry)
438 { return gmx::equalCaseInsensitive(res->resname, entry.resname); });
440 if (found == rtpDBEntry->end() - 1)
442 int size = rtpDBEntry->size() - 1;
443 fprintf(stderr, "\rResidue %d", size);
444 fflush(stderr);
446 else
448 if (found >= oldArrayEnd)
450 gmx_fatal(FARGS, "Found a second entry for '%s' in '%s'",
451 res->resname.c_str(), rrdb.c_str());
453 if (bAllowOverrideRTP)
455 fprintf(stderr, "\n");
456 fprintf(stderr, "Found another rtp entry for '%s' in '%s', ignoring this entry and keeping the one from '%s.rtp'\n",
457 res->resname.c_str(), rrdb.c_str(), found->filebase.c_str());
458 /* We should free all the data for this entry.
459 * The current code gives a lot of dangling pointers.
461 rtpDBEntry->erase(rtpDBEntry->end() - 1);
463 else
465 gmx_fatal(FARGS, "Found rtp entries for '%s' in both '%s' and '%s'. If you want the first definition to override the second one, set the -rtpo option of pdb2gmx.", res->resname.c_str(), found->filebase.c_str(), rrdb.c_str());
469 gmx_ffclose(in);
471 fprintf(stderr, "\nSorting it all out...\n");
472 std::sort(rtpDBEntry->begin(), rtpDBEntry->end(), []
473 (const PreprocessResidue &a,
474 const PreprocessResidue &b)
475 {return std::lexicographical_compare(
476 a.resname.begin(), a.resname.end(),
477 b.resname.begin(), b.resname.end(),
478 [](const char &c1, const char &c2)
479 { return std::toupper(c1) < std::toupper(c2); }); });
481 check_rtp(*rtpDBEntry, rrdb);
484 /************************************************************
486 * SEARCH ROUTINES
488 ***********************************************************/
489 static bool is_sign(char c)
491 return (c == '+' || c == '-');
494 /* Compares if the strins match except for a sign at the end
495 * of (only) one of the two.
497 static int neq_str_sign(const char *a1, const char *a2)
499 int l1, l2, lm;
501 l1 = static_cast<int>(strlen(a1));
502 l2 = static_cast<int>(strlen(a2));
503 lm = std::min(l1, l2);
505 if (lm >= 1 &&
506 ((l1 == l2+1 && is_sign(a1[l1-1])) ||
507 (l2 == l1+1 && is_sign(a2[l2-1]))) &&
508 gmx_strncasecmp(a1, a2, lm) == 0)
510 return lm;
512 else
514 return 0;
518 std::string searchResidueDatabase(const std::string &key, gmx::ArrayRef<const PreprocessResidue> rtpDBEntry)
520 int nbest, best, besti;
521 std::string bestbuf;
523 nbest = 0;
524 besti = -1;
525 /* We want to match at least one character */
526 best = 1;
527 for (auto it = rtpDBEntry.begin(); it != rtpDBEntry.end(); it++)
529 if (gmx::equalCaseInsensitive(key, it->resname))
531 besti = std::distance(rtpDBEntry.begin(), it);
532 nbest = 1;
533 break;
535 else
537 /* Allow a mismatch of at most a sign character (with warning) */
538 int n = neq_str_sign(key.c_str(), it->resname.c_str());
539 if (n >= best &&
540 n+1 >= gmx::index(key.length()) &&
541 n+1 >= gmx::index(it->resname.length()))
543 if (n == best)
545 if (nbest == 1)
547 bestbuf = rtpDBEntry[besti].resname;
549 if (nbest >= 1)
551 bestbuf.append(" or ");
552 bestbuf.append(it->resname);
555 else
557 nbest = 0;
559 besti = std::distance(rtpDBEntry.begin(), it);
560 best = n;
561 nbest++;
565 if (nbest > 1)
567 gmx_fatal(FARGS, "Residue '%s' not found in residue topology database, looks a bit like %s", key.c_str(), bestbuf.c_str());
569 else if (besti == -1)
571 gmx_fatal(FARGS, "Residue '%s' not found in residue topology database", key.c_str());
573 if (!gmx::equalCaseInsensitive(rtpDBEntry[besti].resname, key))
575 fprintf(stderr,
576 "\nWARNING: '%s' not found in residue topology database, "
577 "trying to use '%s'\n\n", key.c_str(), rtpDBEntry[besti].resname.c_str());
580 return rtpDBEntry[besti].resname;
583 gmx::ArrayRef<const PreprocessResidue>::const_iterator
584 getDatabaseEntry(const std::string &rtpname, gmx::ArrayRef<const PreprocessResidue> rtpDBEntry)
586 auto found = std::find_if(rtpDBEntry.begin(), rtpDBEntry.end(),
587 [&rtpname](const PreprocessResidue &entry)
588 { return gmx::equalCaseInsensitive(rtpname, entry.resname); });
589 if (found == rtpDBEntry.end())
591 /* This should never happen, since searchResidueDatabase should have been called
592 * before calling getDatabaseEntry.
594 gmx_fatal(FARGS, "Residue type '%s' not found in residue topology database", rtpname.c_str());
597 return found;