2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
46 #include "gromacs/domdec/dlbtiming.h"
47 #include "gromacs/domdec/domdec.h"
48 #include "gromacs/domdec/domdec_struct.h"
49 #include "gromacs/ewald/ewald.h"
50 #include "gromacs/ewald/long_range_correction.h"
51 #include "gromacs/ewald/pme.h"
52 #include "gromacs/gmxlib/network.h"
53 #include "gromacs/gmxlib/nrnb.h"
54 #include "gromacs/math/vec.h"
55 #include "gromacs/math/vecdump.h"
56 #include "gromacs/mdlib/forcerec_threading.h"
57 #include "gromacs/mdtypes/commrec.h"
58 #include "gromacs/mdtypes/enerdata.h"
59 #include "gromacs/mdtypes/forceoutput.h"
60 #include "gromacs/mdtypes/forcerec.h"
61 #include "gromacs/mdtypes/inputrec.h"
62 #include "gromacs/mdtypes/interaction_const.h"
63 #include "gromacs/mdtypes/md_enums.h"
64 #include "gromacs/mdtypes/mdatom.h"
65 #include "gromacs/mdtypes/simulation_workload.h"
66 #include "gromacs/pbcutil/ishift.h"
67 #include "gromacs/pbcutil/pbc.h"
68 #include "gromacs/timing/wallcycle.h"
69 #include "gromacs/utility/exceptions.h"
70 #include "gromacs/utility/fatalerror.h"
71 #include "gromacs/utility/smalloc.h"
76 static void clearEwaldThreadOutput(ewald_corr_thread_t
* ewc_t
)
80 ewc_t
->dvdl
[efptCOUL
] = 0;
81 ewc_t
->dvdl
[efptVDW
] = 0;
82 clear_mat(ewc_t
->vir_q
);
83 clear_mat(ewc_t
->vir_lj
);
86 static void reduceEwaldThreadOuput(int nthreads
, ewald_corr_thread_t
* ewc_t
)
88 ewald_corr_thread_t
& dest
= ewc_t
[0];
90 for (int t
= 1; t
< nthreads
; t
++)
92 dest
.Vcorr_q
+= ewc_t
[t
].Vcorr_q
;
93 dest
.Vcorr_lj
+= ewc_t
[t
].Vcorr_lj
;
94 dest
.dvdl
[efptCOUL
] += ewc_t
[t
].dvdl
[efptCOUL
];
95 dest
.dvdl
[efptVDW
] += ewc_t
[t
].dvdl
[efptVDW
];
96 m_add(dest
.vir_q
, ewc_t
[t
].vir_q
, dest
.vir_q
);
97 m_add(dest
.vir_lj
, ewc_t
[t
].vir_lj
, dest
.vir_lj
);
101 void calculateLongRangeNonbondeds(t_forcerec
* fr
,
102 const t_inputrec
* ir
,
105 gmx_wallcycle_t wcycle
,
107 gmx::ArrayRef
<const RVec
> coordinates
,
108 gmx::ForceWithVirial
* forceWithVirial
,
109 gmx_enerdata_t
* enerd
,
113 const gmx::StepWorkload
& stepWork
,
114 const DDBalanceRegionHandler
& ddBalanceRegionHandler
)
116 const bool computePmeOnCpu
= (EEL_PME(fr
->ic
->eeltype
) || EVDW_PME(fr
->ic
->vdwtype
))
117 && thisRankHasDuty(cr
, DUTY_PME
)
118 && (pme_run_mode(fr
->pmedata
) == PmeRunMode::CPU
);
120 const bool haveEwaldSurfaceTerm
= haveEwaldSurfaceContribution(*ir
);
122 /* Do long-range electrostatics and/or LJ-PME
123 * and compute PME surface terms when necessary.
125 if ((computePmeOnCpu
|| fr
->ic
->eeltype
== eelEWALD
|| haveEwaldSurfaceTerm
)
126 && stepWork
.computeNonbondedForces
)
129 real Vlr_q
= 0, Vlr_lj
= 0;
131 /* We reduce all virial, dV/dlambda and energy contributions, except
132 * for the reciprocal energies (Vlr_q, Vlr_lj) into the same struct.
134 ewald_corr_thread_t
& ewaldOutput
= fr
->ewc_t
[0];
135 clearEwaldThreadOutput(&ewaldOutput
);
137 if (EEL_PME_EWALD(fr
->ic
->eeltype
) || EVDW_PME(fr
->ic
->vdwtype
))
139 /* Calculate the Ewald surface force and energy contributions, when necessary */
140 if (haveEwaldSurfaceTerm
)
142 wallcycle_sub_start(wcycle
, ewcsEWALD_CORRECTION
);
144 int nthreads
= fr
->nthread_ewc
;
145 #pragma omp parallel for num_threads(nthreads) schedule(static)
146 for (int t
= 0; t
< nthreads
; t
++)
150 ewald_corr_thread_t
& ewc_t
= fr
->ewc_t
[t
];
153 clearEwaldThreadOutput(&ewc_t
);
156 /* Threading is only supported with the Verlet cut-off
157 * scheme and then only single particle forces (no
158 * exclusion forces) are calculated, so we can store
159 * the forces in the normal, single forceWithVirial->force_ array.
161 const rvec
* x
= as_rvec_array(coordinates
.data());
162 ewald_LRcorrection(md
->homenr
, cr
, nthreads
, t
, *fr
, *ir
, md
->chargeA
,
163 md
->chargeB
, (md
->nChargePerturbed
!= 0), x
, box
, mu_tot
,
164 as_rvec_array(forceWithVirial
->force_
.data()),
165 &ewc_t
.Vcorr_q
, lambda
[efptCOUL
], &ewc_t
.dvdl
[efptCOUL
]);
167 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
171 reduceEwaldThreadOuput(nthreads
, fr
->ewc_t
);
173 wallcycle_sub_stop(wcycle
, ewcsEWALD_CORRECTION
);
176 if (EEL_PME_EWALD(fr
->ic
->eeltype
) && fr
->n_tpi
== 0)
178 /* This is not in a subcounter because it takes a
179 negligible and constant-sized amount of time */
180 ewaldOutput
.Vcorr_q
+= ewald_charge_correction(
181 cr
, fr
, lambda
[efptCOUL
], box
, &ewaldOutput
.dvdl
[efptCOUL
], ewaldOutput
.vir_q
);
186 /* Do reciprocal PME for Coulomb and/or LJ. */
187 assert(fr
->n_tpi
>= 0);
188 if (fr
->n_tpi
== 0 || stepWork
.stateChanged
)
190 /* With domain decomposition we close the CPU side load
191 * balancing region here, because PME does global
192 * communication that acts as a global barrier.
194 ddBalanceRegionHandler
.closeAfterForceComputationCpu();
196 wallcycle_start(wcycle
, ewcPMEMESH
);
199 gmx::constArrayRefFromArray(coordinates
.data(), md
->homenr
- fr
->n_tpi
),
200 forceWithVirial
->force_
, md
->chargeA
, md
->chargeB
, md
->sqrt_c6A
,
201 md
->sqrt_c6B
, md
->sigmaA
, md
->sigmaB
, box
, cr
,
202 DOMAINDECOMP(cr
) ? dd_pme_maxshift_x(cr
->dd
) : 0,
203 DOMAINDECOMP(cr
) ? dd_pme_maxshift_y(cr
->dd
) : 0, nrnb
, wcycle
,
204 ewaldOutput
.vir_q
, ewaldOutput
.vir_lj
, &Vlr_q
, &Vlr_lj
,
205 lambda
[efptCOUL
], lambda
[efptVDW
], &ewaldOutput
.dvdl
[efptCOUL
],
206 &ewaldOutput
.dvdl
[efptVDW
], stepWork
);
207 wallcycle_stop(wcycle
, ewcPMEMESH
);
210 gmx_fatal(FARGS
, "Error %d in reciprocal PME routine", status
);
213 /* We should try to do as little computation after
214 * this as possible, because parallel PME synchronizes
215 * the nodes, so we want all load imbalance of the
216 * rest of the force calculation to be before the PME
217 * call. DD load balancing is done on the whole time
218 * of the force call (without PME).
223 /* Determine the PME grid energy of the test molecule
224 * with the PME grid potential of the other charges.
227 fr
->pmedata
, coordinates
.subArray(md
->homenr
- fr
->n_tpi
, fr
->n_tpi
),
228 gmx::arrayRefFromArray(md
->chargeA
+ md
->homenr
- fr
->n_tpi
, fr
->n_tpi
),
234 if (fr
->ic
->eeltype
== eelEWALD
)
236 const rvec
* x
= as_rvec_array(coordinates
.data());
237 Vlr_q
= do_ewald(ir
, x
, as_rvec_array(forceWithVirial
->force_
.data()), md
->chargeA
,
238 md
->chargeB
, box
, cr
, md
->homenr
, ewaldOutput
.vir_q
, fr
->ic
->ewaldcoeff_q
,
239 lambda
[efptCOUL
], &ewaldOutput
.dvdl
[efptCOUL
], fr
->ewald_table
);
242 /* Note that with separate PME nodes we get the real energies later */
243 // TODO it would be simpler if we just accumulated a single
244 // long-range virial contribution.
245 forceWithVirial
->addVirialContribution(ewaldOutput
.vir_q
);
246 forceWithVirial
->addVirialContribution(ewaldOutput
.vir_lj
);
247 enerd
->dvdl_lin
[efptCOUL
] += ewaldOutput
.dvdl
[efptCOUL
];
248 enerd
->dvdl_lin
[efptVDW
] += ewaldOutput
.dvdl
[efptVDW
];
249 enerd
->term
[F_COUL_RECIP
] = Vlr_q
+ ewaldOutput
.Vcorr_q
;
250 enerd
->term
[F_LJ_RECIP
] = Vlr_lj
+ ewaldOutput
.Vcorr_lj
;
254 fprintf(debug
, "Vlr_q = %g, Vcorr_q = %g, Vlr_corr_q = %g\n", Vlr_q
,
255 ewaldOutput
.Vcorr_q
, enerd
->term
[F_COUL_RECIP
]);
256 pr_rvecs(debug
, 0, "vir_el_recip after corr", ewaldOutput
.vir_q
, DIM
);
257 fprintf(debug
, "Vlr_lj: %g, Vcorr_lj = %g, Vlr_corr_lj = %g\n", Vlr_lj
,
258 ewaldOutput
.Vcorr_lj
, enerd
->term
[F_LJ_RECIP
]);
259 pr_rvecs(debug
, 0, "vir_lj_recip after corr", ewaldOutput
.vir_lj
, DIM
);
265 print_nrnb(debug
, nrnb
);