Refactor preprocessing atom types.
[gromacs.git] / src / gromacs / gmxpreprocess / insert_molecules.cpp
blob8f7df5bbbce7ffc533f485cd36be65b0f8e33dde
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include "insert_molecules.h"
41 #include <algorithm>
42 #include <memory>
43 #include <set>
44 #include <string>
45 #include <vector>
47 #include "gromacs/commandline/cmdlineoptionsmodule.h"
48 #include "gromacs/fileio/confio.h"
49 #include "gromacs/fileio/filetypes.h"
50 #include "gromacs/fileio/xvgr.h"
51 #include "gromacs/gmxlib/conformation_utilities.h"
52 #include "gromacs/gmxpreprocess/makeexclusiondistances.h"
53 #include "gromacs/math/functions.h"
54 #include "gromacs/math/utilities.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/options/basicoptions.h"
57 #include "gromacs/options/filenameoption.h"
58 #include "gromacs/options/ioptionscontainer.h"
59 #include "gromacs/pbcutil/pbc.h"
60 #include "gromacs/random/threefry.h"
61 #include "gromacs/random/uniformrealdistribution.h"
62 #include "gromacs/selection/nbsearch.h"
63 #include "gromacs/selection/selection.h"
64 #include "gromacs/selection/selectioncollection.h"
65 #include "gromacs/selection/selectionoption.h"
66 #include "gromacs/selection/selectionoptionbehavior.h"
67 #include "gromacs/topology/atomprop.h"
68 #include "gromacs/topology/atoms.h"
69 #include "gromacs/topology/atomsbuilder.h"
70 #include "gromacs/topology/mtop_util.h"
71 #include "gromacs/topology/symtab.h"
72 #include "gromacs/topology/topology.h"
73 #include "gromacs/trajectory/trajectoryframe.h"
74 #include "gromacs/utility/cstringutil.h"
75 #include "gromacs/utility/exceptions.h"
76 #include "gromacs/utility/fatalerror.h"
77 #include "gromacs/utility/smalloc.h"
79 using gmx::RVec;
81 /* enum for random rotations of inserted solutes */
82 enum RotationType {
83 en_rotXYZ, en_rotZ, en_rotNone
85 const char *const cRotationEnum[] = {"xyz", "z", "none"};
87 static void center_molecule(gmx::ArrayRef<RVec> x)
89 rvec center = {0};
90 for (auto &xi : x)
92 rvec_inc(center, xi);
94 svmul(1.0/x.size(), center, center);
95 for (auto &xi : x)
97 rvec_dec(xi, center);
101 static void generate_trial_conf(gmx::ArrayRef<RVec> xin,
102 const rvec offset, RotationType enum_rot,
103 gmx::DefaultRandomEngine * rng,
104 std::vector<RVec> *xout)
106 gmx::UniformRealDistribution<real> dist(0, 2.0*M_PI);
107 xout->assign(xin.begin(), xin.end());
109 real alfa = 0.0, beta = 0.0, gamma = 0.0;
110 switch (enum_rot)
112 case en_rotXYZ:
113 alfa = dist(*rng);
114 beta = dist(*rng);
115 gamma = dist(*rng);
116 break;
117 case en_rotZ:
118 alfa = beta = 0.;
119 gamma = dist(*rng);
120 break;
121 case en_rotNone:
122 alfa = beta = gamma = 0.;
123 break;
125 if (enum_rot == en_rotXYZ || enum_rot == en_rotZ)
127 rotate_conf(xout->size(), as_rvec_array(xout->data()), nullptr, alfa, beta, gamma);
129 for (size_t i = 0; i < xout->size(); ++i)
131 rvec_inc((*xout)[i], offset);
135 static bool isInsertionAllowed(gmx::AnalysisNeighborhoodSearch *search,
136 const std::vector<real> &exclusionDistances,
137 const std::vector<RVec> &x,
138 const std::vector<real> &exclusionDistances_insrt,
139 const t_atoms &atoms,
140 const std::set<int> &removableAtoms,
141 gmx::AtomsRemover *remover)
143 gmx::AnalysisNeighborhoodPositions pos(x);
144 gmx::AnalysisNeighborhoodPairSearch pairSearch = search->startPairSearch(pos);
145 gmx::AnalysisNeighborhoodPair pair;
146 while (pairSearch.findNextPair(&pair))
148 const real r1 = exclusionDistances[pair.refIndex()];
149 const real r2 = exclusionDistances_insrt[pair.testIndex()];
150 if (pair.distance2() < gmx::square(r1 + r2))
152 if (removableAtoms.count(pair.refIndex()) == 0)
154 return false;
156 // TODO: If molecule information is available, this should ideally
157 // use it to remove whole molecules.
158 remover->markResidue(atoms, pair.refIndex(), true);
161 return true;
164 static void insert_mols(int nmol_insrt, int ntry, int seed,
165 real defaultDistance, real scaleFactor,
166 t_atoms *atoms, t_symtab *symtab, std::vector<RVec> *x,
167 const std::set<int> &removableAtoms,
168 const t_atoms &atoms_insrt, gmx::ArrayRef<RVec> x_insrt,
169 int ePBC, matrix box,
170 const std::string &posfn, const rvec deltaR,
171 RotationType enum_rot)
173 fprintf(stderr, "Initialising inter-atomic distances...\n");
174 AtomProperties aps;
175 std::vector<real> exclusionDistances(
176 makeExclusionDistances(atoms, &aps, defaultDistance, scaleFactor));
177 const std::vector<real> exclusionDistances_insrt(
178 makeExclusionDistances(&atoms_insrt, &aps, defaultDistance, scaleFactor));
180 const real maxInsertRadius
181 = *std::max_element(exclusionDistances_insrt.begin(),
182 exclusionDistances_insrt.end());
183 real maxRadius = maxInsertRadius;
184 if (!exclusionDistances.empty())
186 const real maxExistingRadius
187 = *std::max_element(exclusionDistances.begin(),
188 exclusionDistances.end());
189 maxRadius = std::max(maxInsertRadius, maxExistingRadius);
192 // TODO: Make all of this exception-safe.
193 gmx::AnalysisNeighborhood nb;
194 nb.setCutoff(maxInsertRadius + maxRadius);
197 if (seed == 0)
199 seed = static_cast<int>(gmx::makeRandomSeed());
201 fprintf(stderr, "Using random seed %d\n", seed);
203 gmx::DefaultRandomEngine rng(seed);
205 t_pbc pbc;
206 set_pbc(&pbc, ePBC, box);
208 /* With -ip, take nmol_insrt from file posfn */
209 double **rpos = nullptr;
210 const bool insertAtPositions = !posfn.empty();
211 if (insertAtPositions)
213 int ncol;
214 nmol_insrt = read_xvg(posfn.c_str(), &rpos, &ncol);
215 if (ncol != 3)
217 gmx_fatal(FARGS, "Expected 3 columns (x/y/z coordinates) in file %s\n",
218 posfn.c_str());
220 fprintf(stderr, "Read %d positions from file %s\n\n",
221 nmol_insrt, posfn.c_str());
224 gmx::AtomsBuilder builder(atoms, symtab);
225 gmx::AtomsRemover remover(*atoms);
227 const int finalAtomCount = atoms->nr + nmol_insrt * atoms_insrt.nr;
228 const int finalResidueCount = atoms->nres + nmol_insrt * atoms_insrt.nres;
229 builder.reserve(finalAtomCount, finalResidueCount);
230 x->reserve(finalAtomCount);
231 exclusionDistances.reserve(finalAtomCount);
234 std::vector<RVec> x_n(x_insrt.size());
236 int mol = 0;
237 int trial = 0;
238 int firstTrial = 0;
239 int failed = 0;
240 gmx::UniformRealDistribution<real> dist;
242 while (mol < nmol_insrt && trial < ntry*nmol_insrt)
244 rvec offset_x;
245 if (!insertAtPositions)
247 // Insert at random positions.
248 offset_x[XX] = box[XX][XX] * dist(rng);
249 offset_x[YY] = box[YY][YY] * dist(rng);
250 offset_x[ZZ] = box[ZZ][ZZ] * dist(rng);
252 else
254 // Skip a position if ntry trials were not successful.
255 if (trial >= firstTrial + ntry)
257 fprintf(stderr, " skipped position (%.3f, %.3f, %.3f)\n",
258 rpos[XX][mol], rpos[YY][mol], rpos[ZZ][mol]);
259 ++mol;
260 ++failed;
261 firstTrial = trial;
262 continue;
264 // Insert at positions taken from option -ip file.
265 offset_x[XX] = rpos[XX][mol] + deltaR[XX]*(2 * dist(rng)-1);
266 offset_x[YY] = rpos[YY][mol] + deltaR[YY]*(2 * dist(rng)-1);
267 offset_x[ZZ] = rpos[ZZ][mol] + deltaR[ZZ]*(2 * dist(rng)-1);
269 fprintf(stderr, "\rTry %d", ++trial);
270 fflush(stderr);
272 generate_trial_conf(x_insrt, offset_x, enum_rot, &rng, &x_n);
273 gmx::AnalysisNeighborhoodPositions pos(*x);
274 gmx::AnalysisNeighborhoodSearch search = nb.initSearch(&pbc, pos);
275 if (isInsertionAllowed(&search, exclusionDistances, x_n, exclusionDistances_insrt,
276 *atoms, removableAtoms, &remover))
278 x->insert(x->end(), x_n.begin(), x_n.end());
279 exclusionDistances.insert(exclusionDistances.end(),
280 exclusionDistances_insrt.begin(),
281 exclusionDistances_insrt.end());
282 builder.mergeAtoms(atoms_insrt);
283 ++mol;
284 firstTrial = trial;
285 fprintf(stderr, " success (now %d atoms)!\n", builder.currentAtomCount());
289 fprintf(stderr, "\n");
290 /* print number of molecules added */
291 fprintf(stderr, "Added %d molecules (out of %d requested)\n",
292 mol - failed, nmol_insrt);
294 const int originalAtomCount = atoms->nr;
295 const int originalResidueCount = atoms->nres;
296 remover.refreshAtomCount(*atoms);
297 remover.removeMarkedElements(x);
298 remover.removeMarkedAtoms(atoms);
299 if (atoms->nr < originalAtomCount)
301 fprintf(stderr, "Replaced %d residues (%d atoms)\n",
302 originalResidueCount - atoms->nres,
303 originalAtomCount - atoms->nr);
306 if (rpos != nullptr)
308 for (int i = 0; i < DIM; ++i)
310 sfree(rpos[i]);
312 sfree(rpos);
316 namespace gmx
319 namespace
322 class InsertMolecules : public ICommandLineOptionsModule, public ITopologyProvider
324 public:
325 InsertMolecules()
326 : bBox_(false), nmolIns_(0), nmolTry_(10), seed_(0),
327 defaultDistance_(0.105), scaleFactor_(0.57), enumRot_(en_rotXYZ)
329 clear_rvec(newBox_);
330 clear_rvec(deltaR_);
331 clear_mat(box_);
334 // From ITopologyProvider
335 gmx_mtop_t *getTopology(bool /*required*/) override { return &top_; }
336 int getAtomCount() override { return 0; }
338 // From ICommandLineOptionsModule
339 void init(CommandLineModuleSettings * /*settings*/) override
342 void initOptions(IOptionsContainer *options,
343 ICommandLineOptionsModuleSettings *settings) override;
344 void optionsFinished() override;
345 int run() override;
347 private:
348 SelectionCollection selections_;
350 std::string inputConfFile_;
351 std::string insertConfFile_;
352 std::string positionFile_;
353 std::string outputConfFile_;
354 rvec newBox_;
355 bool bBox_;
356 int nmolIns_;
357 int nmolTry_;
358 int seed_;
359 real defaultDistance_;
360 real scaleFactor_;
361 rvec deltaR_;
362 RotationType enumRot_;
363 Selection replaceSel_;
365 gmx_mtop_t top_;
366 std::vector<RVec> x_;
367 matrix box_;
368 int ePBC_;
371 void InsertMolecules::initOptions(IOptionsContainer *options,
372 ICommandLineOptionsModuleSettings *settings)
374 const char *const desc[] = {
375 "[THISMODULE] inserts [TT]-nmol[tt] copies of the system specified in",
376 "the [TT]-ci[tt] input file. The insertions take place either into",
377 "vacant space in the solute conformation given with [TT]-f[tt], or",
378 "into an empty box given by [TT]-box[tt]. Specifying both [TT]-f[tt]",
379 "and [TT]-box[tt] behaves like [TT]-f[tt], but places a new box",
380 "around the solute before insertions. Any velocities present are",
381 "discarded.",
383 "It is possible to also insert into a solvated configuration and",
384 "replace solvent atoms with the inserted atoms. To do this, use",
385 "[TT]-replace[tt] to specify a selection that identifies the atoms",
386 "that can be replaced. The tool assumes that all molecules in this",
387 "selection consist of single residues: each residue from this",
388 "selection that overlaps with the inserted molecules will be removed",
389 "instead of preventing insertion.",
391 "By default, the insertion positions are random (with initial seed",
392 "specified by [TT]-seed[tt]). The program iterates until [TT]-nmol[tt]",
393 "molecules have been inserted in the box. Molecules are not inserted",
394 "where the distance between any existing atom and any atom of the",
395 "inserted molecule is less than the sum based on the van der Waals",
396 "radii of both atoms. A database ([TT]vdwradii.dat[tt]) of van der",
397 "Waals radii is read by the program, and the resulting radii scaled",
398 "by [TT]-scale[tt]. If radii are not found in the database, those",
399 "atoms are assigned the (pre-scaled) distance [TT]-radius[tt].",
400 "Note that the usefulness of those radii depends on the atom names,",
401 "and thus varies widely with force field.",
403 "A total of [TT]-nmol[tt] * [TT]-try[tt] insertion attempts are made",
404 "before giving up. Increase [TT]-try[tt] if you have several small",
405 "holes to fill. Option [TT]-rot[tt] specifies whether the insertion",
406 "molecules are randomly oriented before insertion attempts.",
408 "Alternatively, the molecules can be inserted only at positions defined in",
409 "positions.dat ([TT]-ip[tt]). That file should have 3 columns (x,y,z),",
410 "that give the displacements compared to the input molecule position",
411 "([TT]-ci[tt]). Hence, if that file should contain the absolute",
412 "positions, the molecule must be centered on (0,0,0) before using",
413 "[THISMODULE] (e.g. from [gmx-editconf] [TT]-center[tt]).",
414 "Comments in that file starting with # are ignored. Option [TT]-dr[tt]",
415 "defines the maximally allowed displacements during insertial trials.",
416 "[TT]-try[tt] and [TT]-rot[tt] work as in the default mode (see above)."
419 settings->setHelpText(desc);
421 std::shared_ptr<SelectionOptionBehavior> selectionOptionBehavior(
422 new SelectionOptionBehavior(&selections_, this));
423 settings->addOptionsBehavior(selectionOptionBehavior);
425 // TODO: Replace use of legacyType.
426 options->addOption(FileNameOption("f")
427 .legacyType(efSTX).inputFile()
428 .store(&inputConfFile_)
429 .defaultBasename("protein")
430 .description("Existing configuration to insert into"));
431 options->addOption(FileNameOption("ci")
432 .legacyType(efSTX).inputFile().required()
433 .store(&insertConfFile_)
434 .defaultBasename("insert")
435 .description("Configuration to insert"));
436 options->addOption(FileNameOption("ip")
437 .filetype(eftGenericData).inputFile()
438 .store(&positionFile_)
439 .defaultBasename("positions")
440 .description("Predefined insertion trial positions"));
441 options->addOption(FileNameOption("o")
442 .legacyType(efSTO).outputFile().required()
443 .store(&outputConfFile_)
444 .defaultBasename("out")
445 .description("Output configuration after insertion"));
447 options->addOption(SelectionOption("replace").onlyAtoms()
448 .store(&replaceSel_)
449 .description("Atoms that can be removed if overlapping"));
450 selectionOptionBehavior->initOptions(options);
452 options->addOption(RealOption("box").vector()
453 .store(newBox_).storeIsSet(&bBox_)
454 .description("Box size (in nm)"));
455 options->addOption(IntegerOption("nmol")
456 .store(&nmolIns_)
457 .description("Number of extra molecules to insert"));
458 options->addOption(IntegerOption("try")
459 .store(&nmolTry_)
460 .description("Try inserting [TT]-nmol[tt] times [TT]-try[tt] times"));
461 options->addOption(IntegerOption("seed")
462 .store(&seed_)
463 .description("Random generator seed (0 means generate)"));
464 options->addOption(RealOption("radius")
465 .store(&defaultDistance_)
466 .description("Default van der Waals distance"));
467 options->addOption(RealOption("scale")
468 .store(&scaleFactor_)
469 .description("Scale factor to multiply Van der Waals radii from the database in share/gromacs/top/vdwradii.dat. The default value of 0.57 yields density close to 1000 g/l for proteins in water."));
470 options->addOption(RealOption("dr").vector()
471 .store(deltaR_)
472 .description("Allowed displacement in x/y/z from positions in [TT]-ip[tt] file"));
473 options->addOption(EnumOption<RotationType>("rot").enumValue(cRotationEnum)
474 .store(&enumRot_)
475 .description("Rotate inserted molecules randomly"));
478 void InsertMolecules::optionsFinished()
480 if (nmolIns_ <= 0 && positionFile_.empty())
482 GMX_THROW(InconsistentInputError("Either -nmol must be larger than 0, "
483 "or positions must be given with -ip."));
485 if (inputConfFile_.empty() && !bBox_)
487 GMX_THROW(InconsistentInputError("When no solute (-f) is specified, "
488 "a box size (-box) must be specified."));
490 if (replaceSel_.isValid() && inputConfFile_.empty())
492 GMX_THROW(InconsistentInputError("Replacement (-replace) only makes sense "
493 "together with an existing configuration (-f)."));
496 if (!inputConfFile_.empty())
498 bool bTprFileWasRead;
499 rvec *temporaryX = nullptr;
500 fprintf(stderr, "Reading solute configuration\n");
501 readConfAndTopology(inputConfFile_.c_str(), &bTprFileWasRead, &top_,
502 &ePBC_, &temporaryX, nullptr, box_);
503 x_.assign(temporaryX, temporaryX + top_.natoms);
504 sfree(temporaryX);
505 if (top_.natoms == 0)
507 fprintf(stderr, "Note: no atoms in %s\n", inputConfFile_.c_str());
512 int InsertMolecules::run()
514 std::set<int> removableAtoms;
515 if (replaceSel_.isValid())
517 t_pbc pbc;
518 set_pbc(&pbc, ePBC_, box_);
519 t_trxframe *fr;
520 snew(fr, 1);
521 fr->natoms = x_.size();
522 fr->bX = TRUE;
523 fr->x = as_rvec_array(x_.data());
524 selections_.evaluate(fr, &pbc);
525 sfree(fr);
526 removableAtoms.insert(replaceSel_.atomIndices().begin(),
527 replaceSel_.atomIndices().end());
528 // TODO: It could be nice to check that removableAtoms contains full
529 // residues, since we anyways remove whole residues instead of
530 // individual atoms.
533 int ePBCForOutput = ePBC_;
534 if (bBox_)
536 ePBCForOutput = epbcXYZ;
537 clear_mat(box_);
538 box_[XX][XX] = newBox_[XX];
539 box_[YY][YY] = newBox_[YY];
540 box_[ZZ][ZZ] = newBox_[ZZ];
542 if (det(box_) == 0)
544 gmx_fatal(FARGS, "Undefined solute box.\nCreate one with gmx editconf "
545 "or give explicit -box command line option");
548 gmx_mtop_t topInserted;
549 t_atoms atomsInserted;
550 std::vector<RVec> xInserted;
552 bool bTprFileWasRead;
553 int ePBC_dummy;
554 matrix box_dummy;
555 rvec *temporaryX;
556 readConfAndTopology(insertConfFile_.c_str(), &bTprFileWasRead, &topInserted,
557 &ePBC_dummy, &temporaryX, nullptr, box_dummy);
558 xInserted.assign(temporaryX, temporaryX + topInserted.natoms);
559 sfree(temporaryX);
560 atomsInserted = gmx_mtop_global_atoms(&topInserted);
561 if (atomsInserted.nr == 0)
563 gmx_fatal(FARGS, "No molecule in %s, please check your input",
564 insertConfFile_.c_str());
566 if (top_.name == nullptr)
568 top_.name = topInserted.name;
570 if (positionFile_.empty())
572 center_molecule(xInserted);
576 t_atoms atoms = gmx_mtop_global_atoms(&top_);
578 /* add nmol_ins molecules of atoms_ins
579 in random orientation at random place */
580 insert_mols(nmolIns_, nmolTry_, seed_, defaultDistance_, scaleFactor_,
581 &atoms, &top_.symtab, &x_, removableAtoms, atomsInserted, xInserted,
582 ePBCForOutput, box_, positionFile_, deltaR_, enumRot_);
584 /* write new configuration to file confout */
585 fprintf(stderr, "Writing generated configuration to %s\n",
586 outputConfFile_.c_str());
587 write_sto_conf(outputConfFile_.c_str(), *top_.name, &atoms,
588 as_rvec_array(x_.data()), nullptr, ePBCForOutput, box_);
590 /* print size of generated configuration */
591 fprintf(stderr, "\nOutput configuration contains %d atoms in %d residues\n",
592 atoms.nr, atoms.nres);
594 done_atom(&atoms);
595 done_atom(&atomsInserted);
597 return 0;
600 } // namespace
603 const char* InsertMoleculesInfo::name()
605 static const char* name = "insert-molecules";
606 return name;
609 const char* InsertMoleculesInfo::shortDescription()
611 static const char* shortDescription =
612 "Insert molecules into existing vacancies";
613 return shortDescription;
616 ICommandLineOptionsModulePointer InsertMoleculesInfo::create()
618 return ICommandLineOptionsModulePointer(new InsertMolecules());
621 } // namespace gmx