2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017,2018,2019, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
39 #include "insert_molecules.h"
47 #include "gromacs/commandline/cmdlineoptionsmodule.h"
48 #include "gromacs/fileio/confio.h"
49 #include "gromacs/fileio/filetypes.h"
50 #include "gromacs/fileio/xvgr.h"
51 #include "gromacs/gmxlib/conformation_utilities.h"
52 #include "gromacs/gmxpreprocess/makeexclusiondistances.h"
53 #include "gromacs/math/functions.h"
54 #include "gromacs/math/utilities.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/options/basicoptions.h"
57 #include "gromacs/options/filenameoption.h"
58 #include "gromacs/options/ioptionscontainer.h"
59 #include "gromacs/pbcutil/pbc.h"
60 #include "gromacs/random/threefry.h"
61 #include "gromacs/random/uniformrealdistribution.h"
62 #include "gromacs/selection/nbsearch.h"
63 #include "gromacs/selection/selection.h"
64 #include "gromacs/selection/selectioncollection.h"
65 #include "gromacs/selection/selectionoption.h"
66 #include "gromacs/selection/selectionoptionbehavior.h"
67 #include "gromacs/topology/atomprop.h"
68 #include "gromacs/topology/atoms.h"
69 #include "gromacs/topology/atomsbuilder.h"
70 #include "gromacs/topology/mtop_util.h"
71 #include "gromacs/topology/symtab.h"
72 #include "gromacs/topology/topology.h"
73 #include "gromacs/trajectory/trajectoryframe.h"
74 #include "gromacs/utility/cstringutil.h"
75 #include "gromacs/utility/exceptions.h"
76 #include "gromacs/utility/fatalerror.h"
77 #include "gromacs/utility/smalloc.h"
81 /* enum for random rotations of inserted solutes */
83 en_rotXYZ
, en_rotZ
, en_rotNone
85 const char *const cRotationEnum
[] = {"xyz", "z", "none"};
87 static void center_molecule(gmx::ArrayRef
<RVec
> x
)
94 svmul(1.0/x
.size(), center
, center
);
101 static void generate_trial_conf(gmx::ArrayRef
<RVec
> xin
,
102 const rvec offset
, RotationType enum_rot
,
103 gmx::DefaultRandomEngine
* rng
,
104 std::vector
<RVec
> *xout
)
106 gmx::UniformRealDistribution
<real
> dist(0, 2.0*M_PI
);
107 xout
->assign(xin
.begin(), xin
.end());
109 real alfa
= 0.0, beta
= 0.0, gamma
= 0.0;
122 alfa
= beta
= gamma
= 0.;
125 if (enum_rot
== en_rotXYZ
|| enum_rot
== en_rotZ
)
127 rotate_conf(xout
->size(), as_rvec_array(xout
->data()), nullptr, alfa
, beta
, gamma
);
129 for (size_t i
= 0; i
< xout
->size(); ++i
)
131 rvec_inc((*xout
)[i
], offset
);
135 static bool isInsertionAllowed(gmx::AnalysisNeighborhoodSearch
*search
,
136 const std::vector
<real
> &exclusionDistances
,
137 const std::vector
<RVec
> &x
,
138 const std::vector
<real
> &exclusionDistances_insrt
,
139 const t_atoms
&atoms
,
140 const std::set
<int> &removableAtoms
,
141 gmx::AtomsRemover
*remover
)
143 gmx::AnalysisNeighborhoodPositions
pos(x
);
144 gmx::AnalysisNeighborhoodPairSearch pairSearch
= search
->startPairSearch(pos
);
145 gmx::AnalysisNeighborhoodPair pair
;
146 while (pairSearch
.findNextPair(&pair
))
148 const real r1
= exclusionDistances
[pair
.refIndex()];
149 const real r2
= exclusionDistances_insrt
[pair
.testIndex()];
150 if (pair
.distance2() < gmx::square(r1
+ r2
))
152 if (removableAtoms
.count(pair
.refIndex()) == 0)
156 // TODO: If molecule information is available, this should ideally
157 // use it to remove whole molecules.
158 remover
->markResidue(atoms
, pair
.refIndex(), true);
164 static void insert_mols(int nmol_insrt
, int ntry
, int seed
,
165 real defaultDistance
, real scaleFactor
,
166 t_atoms
*atoms
, t_symtab
*symtab
, std::vector
<RVec
> *x
,
167 const std::set
<int> &removableAtoms
,
168 const t_atoms
&atoms_insrt
, gmx::ArrayRef
<RVec
> x_insrt
,
169 int ePBC
, matrix box
,
170 const std::string
&posfn
, const rvec deltaR
,
171 RotationType enum_rot
)
173 fprintf(stderr
, "Initialising inter-atomic distances...\n");
175 std::vector
<real
> exclusionDistances(
176 makeExclusionDistances(atoms
, &aps
, defaultDistance
, scaleFactor
));
177 const std::vector
<real
> exclusionDistances_insrt(
178 makeExclusionDistances(&atoms_insrt
, &aps
, defaultDistance
, scaleFactor
));
180 const real maxInsertRadius
181 = *std::max_element(exclusionDistances_insrt
.begin(),
182 exclusionDistances_insrt
.end());
183 real maxRadius
= maxInsertRadius
;
184 if (!exclusionDistances
.empty())
186 const real maxExistingRadius
187 = *std::max_element(exclusionDistances
.begin(),
188 exclusionDistances
.end());
189 maxRadius
= std::max(maxInsertRadius
, maxExistingRadius
);
192 // TODO: Make all of this exception-safe.
193 gmx::AnalysisNeighborhood nb
;
194 nb
.setCutoff(maxInsertRadius
+ maxRadius
);
199 seed
= static_cast<int>(gmx::makeRandomSeed());
201 fprintf(stderr
, "Using random seed %d\n", seed
);
203 gmx::DefaultRandomEngine
rng(seed
);
206 set_pbc(&pbc
, ePBC
, box
);
208 /* With -ip, take nmol_insrt from file posfn */
209 double **rpos
= nullptr;
210 const bool insertAtPositions
= !posfn
.empty();
211 if (insertAtPositions
)
214 nmol_insrt
= read_xvg(posfn
.c_str(), &rpos
, &ncol
);
217 gmx_fatal(FARGS
, "Expected 3 columns (x/y/z coordinates) in file %s\n",
220 fprintf(stderr
, "Read %d positions from file %s\n\n",
221 nmol_insrt
, posfn
.c_str());
224 gmx::AtomsBuilder
builder(atoms
, symtab
);
225 gmx::AtomsRemover
remover(*atoms
);
227 const int finalAtomCount
= atoms
->nr
+ nmol_insrt
* atoms_insrt
.nr
;
228 const int finalResidueCount
= atoms
->nres
+ nmol_insrt
* atoms_insrt
.nres
;
229 builder
.reserve(finalAtomCount
, finalResidueCount
);
230 x
->reserve(finalAtomCount
);
231 exclusionDistances
.reserve(finalAtomCount
);
234 std::vector
<RVec
> x_n(x_insrt
.size());
240 gmx::UniformRealDistribution
<real
> dist
;
242 while (mol
< nmol_insrt
&& trial
< ntry
*nmol_insrt
)
245 if (!insertAtPositions
)
247 // Insert at random positions.
248 offset_x
[XX
] = box
[XX
][XX
] * dist(rng
);
249 offset_x
[YY
] = box
[YY
][YY
] * dist(rng
);
250 offset_x
[ZZ
] = box
[ZZ
][ZZ
] * dist(rng
);
254 // Skip a position if ntry trials were not successful.
255 if (trial
>= firstTrial
+ ntry
)
257 fprintf(stderr
, " skipped position (%.3f, %.3f, %.3f)\n",
258 rpos
[XX
][mol
], rpos
[YY
][mol
], rpos
[ZZ
][mol
]);
264 // Insert at positions taken from option -ip file.
265 offset_x
[XX
] = rpos
[XX
][mol
] + deltaR
[XX
]*(2 * dist(rng
)-1);
266 offset_x
[YY
] = rpos
[YY
][mol
] + deltaR
[YY
]*(2 * dist(rng
)-1);
267 offset_x
[ZZ
] = rpos
[ZZ
][mol
] + deltaR
[ZZ
]*(2 * dist(rng
)-1);
269 fprintf(stderr
, "\rTry %d", ++trial
);
272 generate_trial_conf(x_insrt
, offset_x
, enum_rot
, &rng
, &x_n
);
273 gmx::AnalysisNeighborhoodPositions
pos(*x
);
274 gmx::AnalysisNeighborhoodSearch search
= nb
.initSearch(&pbc
, pos
);
275 if (isInsertionAllowed(&search
, exclusionDistances
, x_n
, exclusionDistances_insrt
,
276 *atoms
, removableAtoms
, &remover
))
278 x
->insert(x
->end(), x_n
.begin(), x_n
.end());
279 exclusionDistances
.insert(exclusionDistances
.end(),
280 exclusionDistances_insrt
.begin(),
281 exclusionDistances_insrt
.end());
282 builder
.mergeAtoms(atoms_insrt
);
285 fprintf(stderr
, " success (now %d atoms)!\n", builder
.currentAtomCount());
289 fprintf(stderr
, "\n");
290 /* print number of molecules added */
291 fprintf(stderr
, "Added %d molecules (out of %d requested)\n",
292 mol
- failed
, nmol_insrt
);
294 const int originalAtomCount
= atoms
->nr
;
295 const int originalResidueCount
= atoms
->nres
;
296 remover
.refreshAtomCount(*atoms
);
297 remover
.removeMarkedElements(x
);
298 remover
.removeMarkedAtoms(atoms
);
299 if (atoms
->nr
< originalAtomCount
)
301 fprintf(stderr
, "Replaced %d residues (%d atoms)\n",
302 originalResidueCount
- atoms
->nres
,
303 originalAtomCount
- atoms
->nr
);
308 for (int i
= 0; i
< DIM
; ++i
)
322 class InsertMolecules
: public ICommandLineOptionsModule
, public ITopologyProvider
326 : bBox_(false), nmolIns_(0), nmolTry_(10), seed_(0),
327 defaultDistance_(0.105), scaleFactor_(0.57), enumRot_(en_rotXYZ
)
334 // From ITopologyProvider
335 gmx_mtop_t
*getTopology(bool /*required*/) override
{ return &top_
; }
336 int getAtomCount() override
{ return 0; }
338 // From ICommandLineOptionsModule
339 void init(CommandLineModuleSettings
* /*settings*/) override
342 void initOptions(IOptionsContainer
*options
,
343 ICommandLineOptionsModuleSettings
*settings
) override
;
344 void optionsFinished() override
;
348 SelectionCollection selections_
;
350 std::string inputConfFile_
;
351 std::string insertConfFile_
;
352 std::string positionFile_
;
353 std::string outputConfFile_
;
359 real defaultDistance_
;
362 RotationType enumRot_
;
363 Selection replaceSel_
;
366 std::vector
<RVec
> x_
;
371 void InsertMolecules::initOptions(IOptionsContainer
*options
,
372 ICommandLineOptionsModuleSettings
*settings
)
374 const char *const desc
[] = {
375 "[THISMODULE] inserts [TT]-nmol[tt] copies of the system specified in",
376 "the [TT]-ci[tt] input file. The insertions take place either into",
377 "vacant space in the solute conformation given with [TT]-f[tt], or",
378 "into an empty box given by [TT]-box[tt]. Specifying both [TT]-f[tt]",
379 "and [TT]-box[tt] behaves like [TT]-f[tt], but places a new box",
380 "around the solute before insertions. Any velocities present are",
383 "It is possible to also insert into a solvated configuration and",
384 "replace solvent atoms with the inserted atoms. To do this, use",
385 "[TT]-replace[tt] to specify a selection that identifies the atoms",
386 "that can be replaced. The tool assumes that all molecules in this",
387 "selection consist of single residues: each residue from this",
388 "selection that overlaps with the inserted molecules will be removed",
389 "instead of preventing insertion.",
391 "By default, the insertion positions are random (with initial seed",
392 "specified by [TT]-seed[tt]). The program iterates until [TT]-nmol[tt]",
393 "molecules have been inserted in the box. Molecules are not inserted",
394 "where the distance between any existing atom and any atom of the",
395 "inserted molecule is less than the sum based on the van der Waals",
396 "radii of both atoms. A database ([TT]vdwradii.dat[tt]) of van der",
397 "Waals radii is read by the program, and the resulting radii scaled",
398 "by [TT]-scale[tt]. If radii are not found in the database, those",
399 "atoms are assigned the (pre-scaled) distance [TT]-radius[tt].",
400 "Note that the usefulness of those radii depends on the atom names,",
401 "and thus varies widely with force field.",
403 "A total of [TT]-nmol[tt] * [TT]-try[tt] insertion attempts are made",
404 "before giving up. Increase [TT]-try[tt] if you have several small",
405 "holes to fill. Option [TT]-rot[tt] specifies whether the insertion",
406 "molecules are randomly oriented before insertion attempts.",
408 "Alternatively, the molecules can be inserted only at positions defined in",
409 "positions.dat ([TT]-ip[tt]). That file should have 3 columns (x,y,z),",
410 "that give the displacements compared to the input molecule position",
411 "([TT]-ci[tt]). Hence, if that file should contain the absolute",
412 "positions, the molecule must be centered on (0,0,0) before using",
413 "[THISMODULE] (e.g. from [gmx-editconf] [TT]-center[tt]).",
414 "Comments in that file starting with # are ignored. Option [TT]-dr[tt]",
415 "defines the maximally allowed displacements during insertial trials.",
416 "[TT]-try[tt] and [TT]-rot[tt] work as in the default mode (see above)."
419 settings
->setHelpText(desc
);
421 std::shared_ptr
<SelectionOptionBehavior
> selectionOptionBehavior(
422 new SelectionOptionBehavior(&selections_
, this));
423 settings
->addOptionsBehavior(selectionOptionBehavior
);
425 // TODO: Replace use of legacyType.
426 options
->addOption(FileNameOption("f")
427 .legacyType(efSTX
).inputFile()
428 .store(&inputConfFile_
)
429 .defaultBasename("protein")
430 .description("Existing configuration to insert into"));
431 options
->addOption(FileNameOption("ci")
432 .legacyType(efSTX
).inputFile().required()
433 .store(&insertConfFile_
)
434 .defaultBasename("insert")
435 .description("Configuration to insert"));
436 options
->addOption(FileNameOption("ip")
437 .filetype(eftGenericData
).inputFile()
438 .store(&positionFile_
)
439 .defaultBasename("positions")
440 .description("Predefined insertion trial positions"));
441 options
->addOption(FileNameOption("o")
442 .legacyType(efSTO
).outputFile().required()
443 .store(&outputConfFile_
)
444 .defaultBasename("out")
445 .description("Output configuration after insertion"));
447 options
->addOption(SelectionOption("replace").onlyAtoms()
449 .description("Atoms that can be removed if overlapping"));
450 selectionOptionBehavior
->initOptions(options
);
452 options
->addOption(RealOption("box").vector()
453 .store(newBox_
).storeIsSet(&bBox_
)
454 .description("Box size (in nm)"));
455 options
->addOption(IntegerOption("nmol")
457 .description("Number of extra molecules to insert"));
458 options
->addOption(IntegerOption("try")
460 .description("Try inserting [TT]-nmol[tt] times [TT]-try[tt] times"));
461 options
->addOption(IntegerOption("seed")
463 .description("Random generator seed (0 means generate)"));
464 options
->addOption(RealOption("radius")
465 .store(&defaultDistance_
)
466 .description("Default van der Waals distance"));
467 options
->addOption(RealOption("scale")
468 .store(&scaleFactor_
)
469 .description("Scale factor to multiply Van der Waals radii from the database in share/gromacs/top/vdwradii.dat. The default value of 0.57 yields density close to 1000 g/l for proteins in water."));
470 options
->addOption(RealOption("dr").vector()
472 .description("Allowed displacement in x/y/z from positions in [TT]-ip[tt] file"));
473 options
->addOption(EnumOption
<RotationType
>("rot").enumValue(cRotationEnum
)
475 .description("Rotate inserted molecules randomly"));
478 void InsertMolecules::optionsFinished()
480 if (nmolIns_
<= 0 && positionFile_
.empty())
482 GMX_THROW(InconsistentInputError("Either -nmol must be larger than 0, "
483 "or positions must be given with -ip."));
485 if (inputConfFile_
.empty() && !bBox_
)
487 GMX_THROW(InconsistentInputError("When no solute (-f) is specified, "
488 "a box size (-box) must be specified."));
490 if (replaceSel_
.isValid() && inputConfFile_
.empty())
492 GMX_THROW(InconsistentInputError("Replacement (-replace) only makes sense "
493 "together with an existing configuration (-f)."));
496 if (!inputConfFile_
.empty())
498 bool bTprFileWasRead
;
499 rvec
*temporaryX
= nullptr;
500 fprintf(stderr
, "Reading solute configuration\n");
501 readConfAndTopology(inputConfFile_
.c_str(), &bTprFileWasRead
, &top_
,
502 &ePBC_
, &temporaryX
, nullptr, box_
);
503 x_
.assign(temporaryX
, temporaryX
+ top_
.natoms
);
505 if (top_
.natoms
== 0)
507 fprintf(stderr
, "Note: no atoms in %s\n", inputConfFile_
.c_str());
512 int InsertMolecules::run()
514 std::set
<int> removableAtoms
;
515 if (replaceSel_
.isValid())
518 set_pbc(&pbc
, ePBC_
, box_
);
521 fr
->natoms
= x_
.size();
523 fr
->x
= as_rvec_array(x_
.data());
524 selections_
.evaluate(fr
, &pbc
);
526 removableAtoms
.insert(replaceSel_
.atomIndices().begin(),
527 replaceSel_
.atomIndices().end());
528 // TODO: It could be nice to check that removableAtoms contains full
529 // residues, since we anyways remove whole residues instead of
533 int ePBCForOutput
= ePBC_
;
536 ePBCForOutput
= epbcXYZ
;
538 box_
[XX
][XX
] = newBox_
[XX
];
539 box_
[YY
][YY
] = newBox_
[YY
];
540 box_
[ZZ
][ZZ
] = newBox_
[ZZ
];
544 gmx_fatal(FARGS
, "Undefined solute box.\nCreate one with gmx editconf "
545 "or give explicit -box command line option");
548 gmx_mtop_t topInserted
;
549 t_atoms atomsInserted
;
550 std::vector
<RVec
> xInserted
;
552 bool bTprFileWasRead
;
556 readConfAndTopology(insertConfFile_
.c_str(), &bTprFileWasRead
, &topInserted
,
557 &ePBC_dummy
, &temporaryX
, nullptr, box_dummy
);
558 xInserted
.assign(temporaryX
, temporaryX
+ topInserted
.natoms
);
560 atomsInserted
= gmx_mtop_global_atoms(&topInserted
);
561 if (atomsInserted
.nr
== 0)
563 gmx_fatal(FARGS
, "No molecule in %s, please check your input",
564 insertConfFile_
.c_str());
566 if (top_
.name
== nullptr)
568 top_
.name
= topInserted
.name
;
570 if (positionFile_
.empty())
572 center_molecule(xInserted
);
576 t_atoms atoms
= gmx_mtop_global_atoms(&top_
);
578 /* add nmol_ins molecules of atoms_ins
579 in random orientation at random place */
580 insert_mols(nmolIns_
, nmolTry_
, seed_
, defaultDistance_
, scaleFactor_
,
581 &atoms
, &top_
.symtab
, &x_
, removableAtoms
, atomsInserted
, xInserted
,
582 ePBCForOutput
, box_
, positionFile_
, deltaR_
, enumRot_
);
584 /* write new configuration to file confout */
585 fprintf(stderr
, "Writing generated configuration to %s\n",
586 outputConfFile_
.c_str());
587 write_sto_conf(outputConfFile_
.c_str(), *top_
.name
, &atoms
,
588 as_rvec_array(x_
.data()), nullptr, ePBCForOutput
, box_
);
590 /* print size of generated configuration */
591 fprintf(stderr
, "\nOutput configuration contains %d atoms in %d residues\n",
592 atoms
.nr
, atoms
.nres
);
595 done_atom(&atomsInserted
);
603 const char* InsertMoleculesInfo::name()
605 static const char* name
= "insert-molecules";
609 const char* InsertMoleculesInfo::shortDescription()
611 static const char* shortDescription
=
612 "Insert molecules into existing vacancies";
613 return shortDescription
;
616 ICommandLineOptionsModulePointer
InsertMoleculesInfo::create()
618 return ICommandLineOptionsModulePointer(new InsertMolecules());