From 4fb0842dea2696182c6e281eb99d92827493d791 Mon Sep 17 00:00:00 2001 From: Erik Lindahl Date: Sat, 3 Oct 2015 19:36:13 +0200 Subject: [PATCH] Added try/catch statements in OpenMP regions and tMPI start OpenMP cannot handle exceptions when they are thrown inside an OpenMP region, but caught outside of it. Instead we catch these exceptions inside the region and exit with a fatal error. Not beautiful, but at least we will know what happened, and exceptions should only occur in catastrophic scenarios in parallel regions (where we already call the fata error handler). For trivial OpenMP blocks that should never through there are comments noting this, so we remember to add a try/catch pair if necessary in the future. There is also a new catch-all in the tMPI entry point function. Refs #1828. Change-Id: I52103a076b7f0a5e8ac1f740160e4de0fa97f638 --- src/gromacs/correlationfunctions/crosscorr.cpp | 13 +- .../correlationfunctions/manyautocorrelation.cpp | 93 ++++--- src/gromacs/domdec/domdec.cpp | 141 +++++----- src/gromacs/domdec/domdec_constraints.cpp | 67 ++--- src/gromacs/domdec/domdec_topology.cpp | 133 ++++----- src/gromacs/ewald/pme-grid.cpp | 2 + src/gromacs/ewald/pme-redistribute.cpp | 11 +- src/gromacs/ewald/pme-solve.cpp | 7 +- src/gromacs/ewald/pme-spread.cpp | 115 ++++---- src/gromacs/ewald/pme.cpp | 265 +++++++++--------- src/gromacs/fft/fft5d.cpp | 21 +- src/gromacs/gmxana/gmx_hbond.cpp | 298 ++++++++++++--------- src/gromacs/gmxana/gmx_wham.cpp | 185 +++++++------ src/gromacs/gmxana/nsfactor.cpp | 21 +- src/gromacs/gmxlib/gmx_thread_affinity.cpp | 47 ++-- src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp | 3 + src/gromacs/listed-forces/listed-forces.cpp | 117 ++++---- src/gromacs/listed-forces/manage-threading.cpp | 33 ++- src/gromacs/mdlib/calcmu.cpp | 2 + src/gromacs/mdlib/clincs.cpp | 133 +++++---- src/gromacs/mdlib/constr.cpp | 81 +++--- src/gromacs/mdlib/coupling.cpp | 1 + src/gromacs/mdlib/force.cpp | 91 ++++--- src/gromacs/mdlib/mdatoms.cpp | 295 ++++++++++---------- src/gromacs/mdlib/minimize.cpp | 30 ++- src/gromacs/mdlib/nbnxn_atomdata.cpp | 291 ++++++++++---------- src/gromacs/mdlib/nbnxn_grid.cpp | 109 ++++---- .../nbnxn_kernel_simd_template.cpp.pre | 2 + src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c | 2 + .../simd_2xnn/nbnxn_kernel_simd_2xnn.cpp | 2 + .../simd_4xn/nbnxn_kernel_simd_4xn.cpp | 2 + src/gromacs/mdlib/nbnxn_search.cpp | 193 +++++++------ src/gromacs/mdlib/sim_util.cpp | 17 +- src/gromacs/mdlib/tgroup.cpp | 29 +- src/gromacs/mdlib/update.cpp | 252 +++++++++-------- src/gromacs/mdlib/vsite.cpp | 65 +++-- src/gromacs/pbcutil/pbc.cpp | 13 +- src/programs/mdrun/runner.cpp | 51 ++-- 38 files changed, 1787 insertions(+), 1446 deletions(-) diff --git a/src/gromacs/correlationfunctions/crosscorr.cpp b/src/gromacs/correlationfunctions/crosscorr.cpp index ec3878ad96..37998b5f75 100644 --- a/src/gromacs/correlationfunctions/crosscorr.cpp +++ b/src/gromacs/correlationfunctions/crosscorr.cpp @@ -45,6 +45,7 @@ #include "crosscorr.h" #include "gromacs/fft/fft.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/smalloc.h" /*! \brief @@ -157,10 +158,14 @@ void many_cross_corr(int nFunc, int * nData, real ** f, real ** g, real ** corr) #pragma omp for for (i = 0; i < nFunc; i++) { - gmx_fft_t fft; - gmx_fft_init_1d(&fft, zeroPaddingSize(nData[i]), GMX_FFT_FLAG_CONSERVATIVE); - cross_corr_low( nData[i], f[i], g[i], corr[i], fft); - gmx_fft_destroy(fft); + try + { + gmx_fft_t fft; + gmx_fft_init_1d(&fft, zeroPaddingSize(nData[i]), GMX_FFT_FLAG_CONSERVATIVE); + cross_corr_low( nData[i], f[i], g[i], corr[i], fft); + gmx_fft_destroy(fft); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } gmx_fft_cleanup(); diff --git a/src/gromacs/correlationfunctions/manyautocorrelation.cpp b/src/gromacs/correlationfunctions/manyautocorrelation.cpp index 33bdc057e3..71869f43ec 100644 --- a/src/gromacs/correlationfunctions/manyautocorrelation.cpp +++ b/src/gromacs/correlationfunctions/manyautocorrelation.cpp @@ -51,6 +51,7 @@ #include #include "gromacs/fft/fft.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/gmxomp.h" #include "gromacs/utility/smalloc.h" @@ -58,59 +59,63 @@ int many_auto_correl(int nfunc, int ndata, int nfft, real **c) { #pragma omp parallel { - typedef real complex[2]; - int i, j; - gmx_fft_t fft1; - complex *in, *out; - int i0, i1; - int nthreads, thread_id; - - nthreads = gmx_omp_get_max_threads(); - thread_id = gmx_omp_get_thread_num(); - if ((0 == thread_id)) + try { - // fprintf(stderr, "There are %d threads for correlation functions\n", nthreads); - } - i0 = thread_id*nfunc/nthreads; - i1 = std::min(nfunc, (thread_id+1)*nfunc/nthreads); + typedef real complex[2]; + int i, j; + gmx_fft_t fft1; + complex *in, *out; + int i0, i1; + int nthreads, thread_id; - gmx_fft_init_1d(&fft1, nfft, GMX_FFT_FLAG_CONSERVATIVE); - /* Allocate temporary arrays */ - snew(in, nfft); - snew(out, nfft); - for (i = i0; (i < i1); i++) - { - for (j = 0; j < ndata; j++) - { - in[j][0] = c[i][j]; - in[j][1] = 0; - } - for (; (j < nfft); j++) + nthreads = gmx_omp_get_max_threads(); + thread_id = gmx_omp_get_thread_num(); + if ((0 == thread_id)) { - in[j][0] = in[j][1] = 0; + // fprintf(stderr, "There are %d threads for correlation functions\n", nthreads); } + i0 = thread_id*nfunc/nthreads; + i1 = std::min(nfunc, (thread_id+1)*nfunc/nthreads); - gmx_fft_1d(fft1, GMX_FFT_BACKWARD, (void *)in, (void *)out); - for (j = 0; j < nfft; j++) + gmx_fft_init_1d(&fft1, nfft, GMX_FFT_FLAG_CONSERVATIVE); + /* Allocate temporary arrays */ + snew(in, nfft); + snew(out, nfft); + for (i = i0; (i < i1); i++) { - in[j][0] = (out[j][0]*out[j][0] + out[j][1]*out[j][1])/nfft; - in[j][1] = 0; - } - for (; (j < nfft); j++) - { - in[j][0] = in[j][1] = 0; - } + for (j = 0; j < ndata; j++) + { + in[j][0] = c[i][j]; + in[j][1] = 0; + } + for (; (j < nfft); j++) + { + in[j][0] = in[j][1] = 0; + } - gmx_fft_1d(fft1, GMX_FFT_FORWARD, (void *)in, (void *)out); - for (j = 0; (j < nfft); j++) - { - c[i][j] = out[j][0]/ndata; + gmx_fft_1d(fft1, GMX_FFT_BACKWARD, (void *)in, (void *)out); + for (j = 0; j < nfft; j++) + { + in[j][0] = (out[j][0]*out[j][0] + out[j][1]*out[j][1])/nfft; + in[j][1] = 0; + } + for (; (j < nfft); j++) + { + in[j][0] = in[j][1] = 0; + } + + gmx_fft_1d(fft1, GMX_FFT_FORWARD, (void *)in, (void *)out); + for (j = 0; (j < nfft); j++) + { + c[i][j] = out[j][0]/ndata; + } } + /* Free the memory */ + gmx_fft_destroy(fft1); + sfree(in); + sfree(out); } - /* Free the memory */ - gmx_fft_destroy(fft1); - sfree(in); - sfree(out); + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } // gmx_fft_cleanup(); return 0; diff --git a/src/gromacs/domdec/domdec.cpp b/src/gromacs/domdec/domdec.cpp index ef4db07ee0..350cac73da 100644 --- a/src/gromacs/domdec/domdec.cpp +++ b/src/gromacs/domdec/domdec.cpp @@ -99,6 +99,7 @@ #include "gromacs/utility/basedefinitions.h" #include "gromacs/utility/basenetwork.h" #include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxmpi.h" #include "gromacs/utility/qsort_threadsafe.h" @@ -4460,13 +4461,17 @@ static void dd_redistribute_cg(FILE *fplog, gmx_int64_t step, #pragma omp parallel for num_threads(nthread) schedule(static) for (thread = 0; thread < nthread; thread++) { - calc_cg_move(fplog, step, dd, state, tric_dir, tcm, - cell_x0, cell_x1, limitd, limit0, limit1, - cgindex, - ( thread *dd->ncg_home)/nthread, - ((thread+1)*dd->ncg_home)/nthread, - fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x, - move); + try + { + calc_cg_move(fplog, step, dd, state, tric_dir, tcm, + cell_x0, cell_x1, limitd, limit0, limit1, + cgindex, + ( thread *dd->ncg_home)/nthread, + ((thread+1)*dd->ncg_home)/nthread, + fr->cutoff_scheme == ecutsGROUP ? cg_cm : state->x, + move); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } for (cg = 0; cg < dd->ncg_home; cg++) @@ -8114,68 +8119,72 @@ static void setup_dd_communication(gmx_domdec_t *dd, #pragma omp parallel for num_threads(comm->nth) schedule(static) for (th = 0; th < comm->nth; th++) { - gmx_domdec_ind_t *ind_p; - int **ibuf_p, *ibuf_nalloc_p; - vec_rvec_t *vbuf_p; - int *nsend_p, *nat_p; - int *nsend_zone_p; - int cg0_th, cg1_th; - - if (th == 0) - { - /* Thread 0 writes in the comm buffers */ - ind_p = ind; - ibuf_p = &comm->buf_int; - ibuf_nalloc_p = &comm->nalloc_int; - vbuf_p = &comm->vbuf; - nsend_p = &nsend; - nat_p = &nat; - nsend_zone_p = &ind->nsend[zone]; - } - else + try { - /* Other threads write into temp buffers */ - ind_p = &comm->dth[th].ind; - ibuf_p = &comm->dth[th].ibuf; - ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc; - vbuf_p = &comm->dth[th].vbuf; - nsend_p = &comm->dth[th].nsend; - nat_p = &comm->dth[th].nat; - nsend_zone_p = &comm->dth[th].nsend_zone; - - comm->dth[th].nsend = 0; - comm->dth[th].nat = 0; - comm->dth[th].nsend_zone = 0; - } + gmx_domdec_ind_t *ind_p; + int **ibuf_p, *ibuf_nalloc_p; + vec_rvec_t *vbuf_p; + int *nsend_p, *nat_p; + int *nsend_zone_p; + int cg0_th, cg1_th; + + if (th == 0) + { + /* Thread 0 writes in the comm buffers */ + ind_p = ind; + ibuf_p = &comm->buf_int; + ibuf_nalloc_p = &comm->nalloc_int; + vbuf_p = &comm->vbuf; + nsend_p = &nsend; + nat_p = &nat; + nsend_zone_p = &ind->nsend[zone]; + } + else + { + /* Other threads write into temp buffers */ + ind_p = &comm->dth[th].ind; + ibuf_p = &comm->dth[th].ibuf; + ibuf_nalloc_p = &comm->dth[th].ibuf_nalloc; + vbuf_p = &comm->dth[th].vbuf; + nsend_p = &comm->dth[th].nsend; + nat_p = &comm->dth[th].nat; + nsend_zone_p = &comm->dth[th].nsend_zone; + + comm->dth[th].nsend = 0; + comm->dth[th].nat = 0; + comm->dth[th].nsend_zone = 0; + } - if (comm->nth == 1) - { - cg0_th = cg0; - cg1_th = cg1; - } - else - { - cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth; - cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth; - } + if (comm->nth == 1) + { + cg0_th = cg0; + cg1_th = cg1; + } + else + { + cg0_th = cg0 + ((cg1 - cg0)* th )/comm->nth; + cg1_th = cg0 + ((cg1 - cg0)*(th+1))/comm->nth; + } - /* Get the cg's for this pulse in this zone */ - get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th, - index_gl, cgindex, - dim, dim_ind, dim0, dim1, dim2, - r_comm2, r_bcomm2, - box, tric_dist, - normal, skew_fac2_d, skew_fac_01, - v_d, v_0, v_1, &corners, sf2_round, - bDistBonded, bBondComm, - bDist2B, bDistMB, - cg_cm, fr->cginfo, - ind_p, - ibuf_p, ibuf_nalloc_p, - vbuf_p, - nsend_p, nat_p, - nsend_zone_p); - } + /* Get the cg's for this pulse in this zone */ + get_zone_pulse_cgs(dd, zonei, zone, cg0_th, cg1_th, + index_gl, cgindex, + dim, dim_ind, dim0, dim1, dim2, + r_comm2, r_bcomm2, + box, tric_dist, + normal, skew_fac2_d, skew_fac_01, + v_d, v_0, v_1, &corners, sf2_round, + bDistBonded, bBondComm, + bDist2B, bDistMB, + cg_cm, fr->cginfo, + ind_p, + ibuf_p, ibuf_nalloc_p, + vbuf_p, + nsend_p, nat_p, + nsend_zone_p); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; + } // END /* Append data of threads>=1 to the communication buffers */ for (th = 1; th < comm->nth; th++) diff --git a/src/gromacs/domdec/domdec_constraints.cpp b/src/gromacs/domdec/domdec_constraints.cpp index d6cb485a1c..e920ee4071 100644 --- a/src/gromacs/domdec/domdec_constraints.cpp +++ b/src/gromacs/domdec/domdec_constraints.cpp @@ -59,6 +59,7 @@ #include "gromacs/mdlib/constr.h" #include "gromacs/pbcutil/ishift.h" #include "gromacs/topology/mtop_util.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxassert.h" #include "gromacs/utility/smalloc.h" @@ -528,44 +529,48 @@ int dd_make_local_constraints(gmx_domdec_t *dd, int at_start, #pragma omp parallel for num_threads(dc->nthread) schedule(static) for (thread = 0; thread < dc->nthread; thread++) { - if (at2con_mt && thread == 0) + try { - atoms_to_constraints(dd, mtop, cginfo, at2con_mt, nrec, - ilc_local, ireq); - } - - if (thread >= t0_set) - { - int cg0, cg1; - t_ilist *ilst; - ind_req_t *ireqt; - - /* Distribute the settle check+assignments over - * dc->nthread or dc->nthread-1 threads. - */ - cg0 = (dd->ncg_home*(thread-t0_set ))/(dc->nthread-t0_set); - cg1 = (dd->ncg_home*(thread-t0_set+1))/(dc->nthread-t0_set); - - if (thread == t0_set) + if (at2con_mt && thread == 0) { - ilst = ils_local; + atoms_to_constraints(dd, mtop, cginfo, at2con_mt, nrec, + ilc_local, ireq); } - else - { - ilst = &dc->ils[thread]; - } - ilst->nr = 0; - ireqt = &dd->constraint_comm->ireq[thread]; - if (thread > 0) + if (thread >= t0_set) { - ireqt->n = 0; - } + int cg0, cg1; + t_ilist *ilst; + ind_req_t *ireqt; + + /* Distribute the settle check+assignments over + * dc->nthread or dc->nthread-1 threads. + */ + cg0 = (dd->ncg_home*(thread-t0_set ))/(dc->nthread-t0_set); + cg1 = (dd->ncg_home*(thread-t0_set+1))/(dc->nthread-t0_set); - atoms_to_settles(dd, mtop, cginfo, at2settle_mt, - cg0, cg1, - ilst, ireqt); + if (thread == t0_set) + { + ilst = ils_local; + } + else + { + ilst = &dc->ils[thread]; + } + ilst->nr = 0; + + ireqt = &dd->constraint_comm->ireq[thread]; + if (thread > 0) + { + ireqt->n = 0; + } + + atoms_to_settles(dd, mtop, cginfo, at2settle_mt, + cg0, cg1, + ilst, ireqt); + } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } /* Combine the generate settles and requested indices */ diff --git a/src/gromacs/domdec/domdec_topology.cpp b/src/gromacs/domdec/domdec_topology.cpp index b43abd9d41..860f444a40 100644 --- a/src/gromacs/domdec/domdec_topology.cpp +++ b/src/gromacs/domdec/domdec_topology.cpp @@ -69,6 +69,7 @@ #include "gromacs/topology/mtop_util.h" #include "gromacs/topology/topsort.h" #include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxassert.h" #include "gromacs/utility/smalloc.h" @@ -1985,88 +1986,92 @@ static int make_local_bondeds_excls(gmx_domdec_t *dd, #pragma omp parallel for num_threads(rt->nthread) schedule(static) for (thread = 0; thread < rt->nthread; thread++) { - int cg0t, cg1t; - t_idef *idef_t; - int **vsite_pbc; - int *vsite_pbc_nalloc; - t_blocka *excl_t; - - cg0t = cg0 + ((cg1 - cg0)* thread )/rt->nthread; - cg1t = cg0 + ((cg1 - cg0)*(thread+1))/rt->nthread; - - if (thread == 0) - { - idef_t = idef; - } - else + try { - idef_t = &rt->th_work[thread].idef; - clear_idef(idef_t); - } + int cg0t, cg1t; + t_idef *idef_t; + int **vsite_pbc; + int *vsite_pbc_nalloc; + t_blocka *excl_t; + + cg0t = cg0 + ((cg1 - cg0)* thread )/rt->nthread; + cg1t = cg0 + ((cg1 - cg0)*(thread+1))/rt->nthread; - if (vsite && vsite->bHaveChargeGroups && vsite->n_intercg_vsite > 0) - { if (thread == 0) { - vsite_pbc = vsite->vsite_pbc_loc; - vsite_pbc_nalloc = vsite->vsite_pbc_loc_nalloc; + idef_t = idef; } else { - vsite_pbc = rt->th_work[thread].vsite_pbc; - vsite_pbc_nalloc = rt->th_work[thread].vsite_pbc_nalloc; + idef_t = &rt->th_work[thread].idef; + clear_idef(idef_t); } - } - else - { - vsite_pbc = NULL; - vsite_pbc_nalloc = NULL; - } - rt->th_work[thread].nbonded = - make_bondeds_zone(dd, zones, - mtop->molblock, - bRCheckMB, rcheck, bRCheck2B, rc2, - la2lc, pbc_null, cg_cm, idef->iparams, - idef_t, - vsite_pbc, vsite_pbc_nalloc, - izone, - dd->cgindex[cg0t], dd->cgindex[cg1t]); - - if (izone < nzone_excl) - { - if (thread == 0) + if (vsite && vsite->bHaveChargeGroups && vsite->n_intercg_vsite > 0) { - excl_t = lexcls; + if (thread == 0) + { + vsite_pbc = vsite->vsite_pbc_loc; + vsite_pbc_nalloc = vsite->vsite_pbc_loc_nalloc; + } + else + { + vsite_pbc = rt->th_work[thread].vsite_pbc; + vsite_pbc_nalloc = rt->th_work[thread].vsite_pbc_nalloc; + } } else { - excl_t = &rt->th_work[thread].excl; - excl_t->nr = 0; - excl_t->nra = 0; + vsite_pbc = NULL; + vsite_pbc_nalloc = NULL; } - if (dd->cgindex[dd->ncg_tot] == dd->ncg_tot && - !rt->bExclRequired) - { - /* No charge groups and no distance check required */ - make_exclusions_zone(dd, zones, - mtop->moltype, cginfo, - excl_t, - izone, - cg0t, cg1t); - } - else + rt->th_work[thread].nbonded = + make_bondeds_zone(dd, zones, + mtop->molblock, + bRCheckMB, rcheck, bRCheck2B, rc2, + la2lc, pbc_null, cg_cm, idef->iparams, + idef_t, + vsite_pbc, vsite_pbc_nalloc, + izone, + dd->cgindex[cg0t], dd->cgindex[cg1t]); + + if (izone < nzone_excl) { - rt->th_work[thread].excl_count = - make_exclusions_zone_cg(dd, zones, - mtop->moltype, bRCheck2B, rc2, - la2lc, pbc_null, cg_cm, cginfo, - excl_t, - izone, - cg0t, cg1t); + if (thread == 0) + { + excl_t = lexcls; + } + else + { + excl_t = &rt->th_work[thread].excl; + excl_t->nr = 0; + excl_t->nra = 0; + } + + if (dd->cgindex[dd->ncg_tot] == dd->ncg_tot && + !rt->bExclRequired) + { + /* No charge groups and no distance check required */ + make_exclusions_zone(dd, zones, + mtop->moltype, cginfo, + excl_t, + izone, + cg0t, cg1t); + } + else + { + rt->th_work[thread].excl_count = + make_exclusions_zone_cg(dd, zones, + mtop->moltype, bRCheck2B, rc2, + la2lc, pbc_null, cg_cm, cginfo, + excl_t, + izone, + cg0t, cg1t); + } } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } if (rt->nthread > 1) diff --git a/src/gromacs/ewald/pme-grid.cpp b/src/gromacs/ewald/pme-grid.cpp index 1a5fdd890f..0c5cea2bb8 100644 --- a/src/gromacs/ewald/pme-grid.cpp +++ b/src/gromacs/ewald/pme-grid.cpp @@ -468,6 +468,7 @@ void unwrap_periodic_pmegrid(struct gmx_pme_t *pme, real *pmegrid) #pragma omp parallel for num_threads(pme->nthread) schedule(static) for (ix = 0; ix < pme->pmegrid_nx; ix++) { + // Trivial OpenMP region that does not throw, no need for try/catch int iy, iz; for (iy = 0; iy < overlap; iy++) @@ -485,6 +486,7 @@ void unwrap_periodic_pmegrid(struct gmx_pme_t *pme, real *pmegrid) #pragma omp parallel for num_threads(pme->nthread) schedule(static) for (ix = 0; ix < pme->pmegrid_nx; ix++) { + // Trivial OpenMP region that does not throw, no need for try/catch int iy, iz; for (iy = 0; iy < pme->pmegrid_ny; iy++) diff --git a/src/gromacs/ewald/pme-redistribute.cpp b/src/gromacs/ewald/pme-redistribute.cpp index 28b1d78397..70b4c3d527 100644 --- a/src/gromacs/ewald/pme-redistribute.cpp +++ b/src/gromacs/ewald/pme-redistribute.cpp @@ -44,6 +44,7 @@ #include "gromacs/legacyheaders/types/commrec.h" #include "gromacs/math/vec.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxmpi.h" #include "gromacs/utility/smalloc.h" @@ -118,9 +119,13 @@ static void pme_calc_pidx_wrapper(int natoms, matrix recipbox, rvec x[], #pragma omp parallel for num_threads(nthread) schedule(static) for (thread = 0; thread < nthread; thread++) { - pme_calc_pidx(natoms* thread /nthread, - natoms*(thread+1)/nthread, - recipbox, x, atc, atc->count_thread[thread]); + try + { + pme_calc_pidx(natoms* thread /nthread, + natoms*(thread+1)/nthread, + recipbox, x, atc, atc->count_thread[thread]); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } /* Non-parallel reduction, since nslab is small */ diff --git a/src/gromacs/ewald/pme-solve.cpp b/src/gromacs/ewald/pme-solve.cpp index 2a14b3b48f..60936bb666 100644 --- a/src/gromacs/ewald/pme-solve.cpp +++ b/src/gromacs/ewald/pme-solve.cpp @@ -45,6 +45,7 @@ #include "gromacs/math/vec.h" #include "gromacs/simd/simd.h" #include "gromacs/simd/simd_math.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/smalloc.h" #include "pme-internal.h" @@ -125,7 +126,11 @@ void pme_init_all_work(struct pme_solve_work_t **work, int nthread, int nkx) #pragma omp parallel for num_threads(nthread) schedule(static) for (thread = 0; thread < nthread; thread++) { - realloc_work(&((*work)[thread]), nkx); + try + { + realloc_work(&((*work)[thread]), nkx); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } diff --git a/src/gromacs/ewald/pme-spread.cpp b/src/gromacs/ewald/pme-spread.cpp index 2364a157a0..1459dcd1ba 100644 --- a/src/gromacs/ewald/pme-spread.cpp +++ b/src/gromacs/ewald/pme-spread.cpp @@ -46,6 +46,7 @@ #include "gromacs/ewald/pme.h" #include "gromacs/simd/simd.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/smalloc.h" @@ -874,15 +875,19 @@ void spread_on_grid(struct gmx_pme_t *pme, #pragma omp parallel for num_threads(nthread) schedule(static) for (thread = 0; thread < nthread; thread++) { - int start, end; + try + { + int start, end; - start = atc->n* thread /nthread; - end = atc->n*(thread+1)/nthread; + start = atc->n* thread /nthread; + end = atc->n*(thread+1)/nthread; - /* Compute fftgrid index for all atoms, - * with help of some extra variables. - */ - calc_interpolation_idx(pme, atc, start, grid_index, end, thread); + /* Compute fftgrid index for all atoms, + * with help of some extra variables. + */ + calc_interpolation_idx(pme, atc, start, grid_index, end, thread); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } #ifdef PME_TIME_THREADS @@ -896,62 +901,66 @@ void spread_on_grid(struct gmx_pme_t *pme, #pragma omp parallel for num_threads(nthread) schedule(static) for (thread = 0; thread < nthread; thread++) { - splinedata_t *spline; - pmegrid_t *grid = NULL; - - /* make local bsplines */ - if (grids == NULL || !pme->bUseThreads) + try { - spline = &atc->spline[0]; - - spline->n = atc->n; + splinedata_t *spline; + pmegrid_t *grid = NULL; - if (bSpread) + /* make local bsplines */ + if (grids == NULL || !pme->bUseThreads) { - grid = &grids->grid; - } - } - else - { - spline = &atc->spline[thread]; + spline = &atc->spline[0]; - if (grids->nthread == 1) - { - /* One thread, we operate on all coefficients */ spline->n = atc->n; + + if (bSpread) + { + grid = &grids->grid; + } } else { - /* Get the indices our thread should operate on */ - make_thread_local_ind(atc, thread, spline); - } + spline = &atc->spline[thread]; - grid = &grids->grid_th[thread]; - } + if (grids->nthread == 1) + { + /* One thread, we operate on all coefficients */ + spline->n = atc->n; + } + else + { + /* Get the indices our thread should operate on */ + make_thread_local_ind(atc, thread, spline); + } - if (bCalcSplines) - { - make_bsplines(spline->theta, spline->dtheta, pme->pme_order, - atc->fractx, spline->n, spline->ind, atc->coefficient, bDoSplines); - } + grid = &grids->grid_th[thread]; + } - if (bSpread) - { - /* put local atoms on grid. */ + if (bCalcSplines) + { + make_bsplines(spline->theta, spline->dtheta, pme->pme_order, + atc->fractx, spline->n, spline->ind, atc->coefficient, bDoSplines); + } + + if (bSpread) + { + /* put local atoms on grid. */ #ifdef PME_TIME_SPREAD - ct1a = omp_cyc_start(); + ct1a = omp_cyc_start(); #endif - spread_coefficients_bsplines_thread(grid, atc, spline, pme->spline_work); + spread_coefficients_bsplines_thread(grid, atc, spline, pme->spline_work); - if (pme->bUseThreads) - { - copy_local_grid(pme, grids, grid_index, thread, fftgrid); - } + if (pme->bUseThreads) + { + copy_local_grid(pme, grids, grid_index, thread, fftgrid); + } #ifdef PME_TIME_SPREAD - ct1a = omp_cyc_end(ct1a); - cs1a[thread] += (double)ct1a; + ct1a = omp_cyc_end(ct1a); + cs1a[thread] += (double)ct1a; #endif + } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } #ifdef PME_TIME_THREADS c2 = omp_cyc_end(c2); @@ -966,11 +975,15 @@ void spread_on_grid(struct gmx_pme_t *pme, #pragma omp parallel for num_threads(grids->nthread) schedule(static) for (thread = 0; thread < grids->nthread; thread++) { - reduce_threadgrid_overlap(pme, grids, thread, - fftgrid, - pme->overlap[0].sendbuf, - pme->overlap[1].sendbuf, - grid_index); + try + { + reduce_threadgrid_overlap(pme, grids, thread, + fftgrid, + pme->overlap[0].sendbuf, + pme->overlap[1].sendbuf, + grid_index); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } #ifdef PME_TIME_THREADS c3 = omp_cyc_end(c3); diff --git a/src/gromacs/ewald/pme.cpp b/src/gromacs/ewald/pme.cpp index c872e52ca6..fa2131a72d 100644 --- a/src/gromacs/ewald/pme.cpp +++ b/src/gromacs/ewald/pme.cpp @@ -98,6 +98,7 @@ #include "gromacs/timing/wallcycle.h" #include "gromacs/timing/walltime_accounting.h" #include "gromacs/utility/basedefinitions.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxmpi.h" #include "gromacs/utility/gmxomp.h" @@ -1088,85 +1089,88 @@ int gmx_pme_do(struct gmx_pme_t *pme, /* Here we start a large thread parallel region */ #pragma omp parallel num_threads(pme->nthread) private(thread) { - thread = gmx_omp_get_thread_num(); - if (flags & GMX_PME_SOLVE) + try { - int loop_count; - - /* do 3d-fft */ - if (thread == 0) - { - wallcycle_start(wcycle, ewcPME_FFT); - } - gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX, - thread, wcycle); - if (thread == 0) - { - wallcycle_stop(wcycle, ewcPME_FFT); - } - where(); - - /* solve in k-space for our local cells */ - if (thread == 0) - { - wallcycle_start(wcycle, (grid_index < DO_Q ? ewcPME_SOLVE : ewcLJPME)); - } - if (grid_index < DO_Q) - { - loop_count = - solve_pme_yzx(pme, cfftgrid, ewaldcoeff_q, - box[XX][XX]*box[YY][YY]*box[ZZ][ZZ], - bCalcEnerVir, - pme->nthread, thread); - } - else + thread = gmx_omp_get_thread_num(); + if (flags & GMX_PME_SOLVE) { - loop_count = - solve_pme_lj_yzx(pme, &cfftgrid, FALSE, ewaldcoeff_lj, - box[XX][XX]*box[YY][YY]*box[ZZ][ZZ], - bCalcEnerVir, - pme->nthread, thread); - } + int loop_count; - if (thread == 0) - { - wallcycle_stop(wcycle, (grid_index < DO_Q ? ewcPME_SOLVE : ewcLJPME)); + /* do 3d-fft */ + if (thread == 0) + { + wallcycle_start(wcycle, ewcPME_FFT); + } + gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX, + thread, wcycle); + if (thread == 0) + { + wallcycle_stop(wcycle, ewcPME_FFT); + } where(); - inc_nrnb(nrnb, eNR_SOLVEPME, loop_count); - } - } - if (bCalcF) - { - /* do 3d-invfft */ - if (thread == 0) - { - where(); - wallcycle_start(wcycle, ewcPME_FFT); + /* solve in k-space for our local cells */ + if (thread == 0) + { + wallcycle_start(wcycle, (grid_index < DO_Q ? ewcPME_SOLVE : ewcLJPME)); + } + if (grid_index < DO_Q) + { + loop_count = + solve_pme_yzx(pme, cfftgrid, ewaldcoeff_q, + box[XX][XX]*box[YY][YY]*box[ZZ][ZZ], + bCalcEnerVir, + pme->nthread, thread); + } + else + { + loop_count = + solve_pme_lj_yzx(pme, &cfftgrid, FALSE, ewaldcoeff_lj, + box[XX][XX]*box[YY][YY]*box[ZZ][ZZ], + bCalcEnerVir, + pme->nthread, thread); + } + + if (thread == 0) + { + wallcycle_stop(wcycle, (grid_index < DO_Q ? ewcPME_SOLVE : ewcLJPME)); + where(); + inc_nrnb(nrnb, eNR_SOLVEPME, loop_count); + } } - gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL, - thread, wcycle); - if (thread == 0) + + if (bCalcF) { - wallcycle_stop(wcycle, ewcPME_FFT); + /* do 3d-invfft */ + if (thread == 0) + { + where(); + wallcycle_start(wcycle, ewcPME_FFT); + } + gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL, + thread, wcycle); + if (thread == 0) + { + wallcycle_stop(wcycle, ewcPME_FFT); - where(); + where(); - if (pme->nodeid == 0) - { - real ntot = pme->nkx*pme->nky*pme->nkz; - npme = static_cast(ntot*log(ntot)/log(2.0)); - inc_nrnb(nrnb, eNR_FFT, 2*npme); + if (pme->nodeid == 0) + { + real ntot = pme->nkx*pme->nky*pme->nkz; + npme = static_cast(ntot*log(ntot)/log(2.0)); + inc_nrnb(nrnb, eNR_FFT, 2*npme); + } + + /* Note: this wallcycle region is closed below + outside an OpenMP region, so take care if + refactoring code here. */ + wallcycle_start(wcycle, ewcPME_SPREADGATHER); } - /* Note: this wallcycle region is closed below - outside an OpenMP region, so take care if - refactoring code here. */ - wallcycle_start(wcycle, ewcPME_SPREADGATHER); + copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread); } - - copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread); - } + } GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } /* End of thread parallel section. * With MPI we have to synchronize here before gmx_sum_qgrid_dd. @@ -1198,9 +1202,13 @@ int gmx_pme_do(struct gmx_pme_t *pme, #pragma omp parallel for num_threads(pme->nthread) schedule(static) for (thread = 0; thread < pme->nthread; thread++) { - gather_f_bsplines(pme, grid, bClearF, atc, - &atc->spline[thread], - pme->bFEP ? (grid_index % 2 == 0 ? 1.0-lambda : lambda) : 1.0); + try + { + gather_f_bsplines(pme, grid, bClearF, atc, + &atc->spline[thread], + pme->bFEP ? (grid_index % 2 == 0 ? 1.0-lambda : lambda) : 1.0); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } where(); @@ -1346,23 +1354,27 @@ int gmx_pme_do(struct gmx_pme_t *pme, /*Here we start a large thread parallel region*/ #pragma omp parallel num_threads(pme->nthread) private(thread) { - thread = gmx_omp_get_thread_num(); - if (flags & GMX_PME_SOLVE) + try { - /* do 3d-fft */ - if (thread == 0) + thread = gmx_omp_get_thread_num(); + if (flags & GMX_PME_SOLVE) { - wallcycle_start(wcycle, ewcPME_FFT); - } + /* do 3d-fft */ + if (thread == 0) + { + wallcycle_start(wcycle, ewcPME_FFT); + } - gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX, - thread, wcycle); - if (thread == 0) - { - wallcycle_stop(wcycle, ewcPME_FFT); + gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_REAL_TO_COMPLEX, + thread, wcycle); + if (thread == 0) + { + wallcycle_stop(wcycle, ewcPME_FFT); + } + where(); } - where(); } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } bFirst = FALSE; } @@ -1371,24 +1383,28 @@ int gmx_pme_do(struct gmx_pme_t *pme, /* solve in k-space for our local cells */ #pragma omp parallel num_threads(pme->nthread) private(thread) { - int loop_count; - thread = gmx_omp_get_thread_num(); - if (thread == 0) + try { - wallcycle_start(wcycle, ewcLJPME); - } + int loop_count; + thread = gmx_omp_get_thread_num(); + if (thread == 0) + { + wallcycle_start(wcycle, ewcLJPME); + } - loop_count = - solve_pme_lj_yzx(pme, &pme->cfftgrid[2], TRUE, ewaldcoeff_lj, - box[XX][XX]*box[YY][YY]*box[ZZ][ZZ], - bCalcEnerVir, - pme->nthread, thread); - if (thread == 0) - { - wallcycle_stop(wcycle, ewcLJPME); - where(); - inc_nrnb(nrnb, eNR_SOLVEPME, loop_count); + loop_count = + solve_pme_lj_yzx(pme, &pme->cfftgrid[2], TRUE, ewaldcoeff_lj, + box[XX][XX]*box[YY][YY]*box[ZZ][ZZ], + bCalcEnerVir, + pme->nthread, thread); + if (thread == 0) + { + wallcycle_stop(wcycle, ewcLJPME); + where(); + inc_nrnb(nrnb, eNR_SOLVEPME, loop_count); + } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } @@ -1415,33 +1431,36 @@ int gmx_pme_do(struct gmx_pme_t *pme, where(); #pragma omp parallel num_threads(pme->nthread) private(thread) { - thread = gmx_omp_get_thread_num(); - /* do 3d-invfft */ - if (thread == 0) + try { - where(); - wallcycle_start(wcycle, ewcPME_FFT); - } + thread = gmx_omp_get_thread_num(); + /* do 3d-invfft */ + if (thread == 0) + { + where(); + wallcycle_start(wcycle, ewcPME_FFT); + } - gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL, - thread, wcycle); - if (thread == 0) - { - wallcycle_stop(wcycle, ewcPME_FFT); + gmx_parallel_3dfft_execute(pfft_setup, GMX_FFT_COMPLEX_TO_REAL, + thread, wcycle); + if (thread == 0) + { + wallcycle_stop(wcycle, ewcPME_FFT); - where(); + where(); - if (pme->nodeid == 0) - { - real ntot = pme->nkx*pme->nky*pme->nkz; - npme = static_cast(ntot*log(ntot)/log(2.0)); - inc_nrnb(nrnb, eNR_FFT, 2*npme); + if (pme->nodeid == 0) + { + real ntot = pme->nkx*pme->nky*pme->nkz; + npme = static_cast(ntot*log(ntot)/log(2.0)); + inc_nrnb(nrnb, eNR_FFT, 2*npme); + } + wallcycle_start(wcycle, ewcPME_SPREADGATHER); } - wallcycle_start(wcycle, ewcPME_SPREADGATHER); - } - - copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread); + copy_fftgrid_to_pmegrid(pme, fftgrid, grid, grid_index, pme->nthread, thread); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } /*#pragma omp parallel*/ /* distribute local grid to all nodes */ @@ -1463,9 +1482,13 @@ int gmx_pme_do(struct gmx_pme_t *pme, #pragma omp parallel for num_threads(pme->nthread) schedule(static) for (thread = 0; thread < pme->nthread; thread++) { - gather_f_bsplines(pme, grid, bClearF, &pme->atc[0], - &pme->atc[0].spline[thread], - scale); + try + { + gather_f_bsplines(pme, grid, bClearF, &pme->atc[0], + &pme->atc[0].spline[thread], + scale); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } where(); diff --git a/src/gromacs/fft/fft5d.cpp b/src/gromacs/fft/fft5d.cpp index 4982dfbf59..da103c0139 100644 --- a/src/gromacs/fft/fft5d.cpp +++ b/src/gromacs/fft/fft5d.cpp @@ -47,6 +47,7 @@ #include +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxmpi.h" #include "gromacs/utility/smalloc.h" @@ -600,16 +601,20 @@ fft5d_plan fft5d_plan_3d(int NG, int MG, int KG, MPI_Comm comm[2], int flags, t_ { #pragma omp ordered { - int tsize = ((t+1)*pM[s]*pK[s]/nthreads)-(t*pM[s]*pK[s]/nthreads); - - if ((flags&FFT5D_REALCOMPLEX) && ((!(flags&FFT5D_BACKWARD) && s == 0) || ((flags&FFT5D_BACKWARD) && s == 2))) - { - gmx_fft_init_many_1d_real( &plan->p1d[s][t], rC[s], tsize, (flags&FFT5D_NOMEASURE) ? GMX_FFT_FLAG_CONSERVATIVE : 0 ); - } - else + try { - gmx_fft_init_many_1d ( &plan->p1d[s][t], C[s], tsize, (flags&FFT5D_NOMEASURE) ? GMX_FFT_FLAG_CONSERVATIVE : 0 ); + int tsize = ((t+1)*pM[s]*pK[s]/nthreads)-(t*pM[s]*pK[s]/nthreads); + + if ((flags&FFT5D_REALCOMPLEX) && ((!(flags&FFT5D_BACKWARD) && s == 0) || ((flags&FFT5D_BACKWARD) && s == 2))) + { + gmx_fft_init_many_1d_real( &plan->p1d[s][t], rC[s], tsize, (flags&FFT5D_NOMEASURE) ? GMX_FFT_FLAG_CONSERVATIVE : 0 ); + } + else + { + gmx_fft_init_many_1d ( &plan->p1d[s][t], C[s], tsize, (flags&FFT5D_NOMEASURE) ? GMX_FFT_FLAG_CONSERVATIVE : 0 ); + } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } } diff --git a/src/gromacs/gmxana/gmx_hbond.cpp b/src/gromacs/gmxana/gmx_hbond.cpp index f01313a7d7..f415d04203 100644 --- a/src/gromacs/gmxana/gmx_hbond.cpp +++ b/src/gromacs/gmxana/gmx_hbond.cpp @@ -65,6 +65,7 @@ #include "gromacs/topology/index.h" #include "gromacs/utility/arraysize.h" #include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/futil.h" #include "gromacs/utility/gmxomp.h" @@ -499,13 +500,17 @@ static void add_hbond(t_hbdata *hb, int d, int a, int h, int grpd, int grpa, #pragma omp critical { - if (hb->hbmap[id][ia] == NULL) + try { - snew(hb->hbmap[id][ia], 1); - snew(hb->hbmap[id][ia]->h, hb->maxhydro); - snew(hb->hbmap[id][ia]->g, hb->maxhydro); + if (hb->hbmap[id][ia] == NULL) + { + snew(hb->hbmap[id][ia], 1); + snew(hb->hbmap[id][ia]->h, hb->maxhydro); + snew(hb->hbmap[id][ia]->g, hb->maxhydro); + } + add_ff(hb, id, k, ia, frame, ihb); } - add_ff(hb, id, k, ia, frame, ihb); + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } @@ -2877,26 +2882,34 @@ int gmx_hbond(int argc, char *argv[]) if (bOMP) { - sync_hbdata(p_hb[threadNr], nframes); + try + { + sync_hbdata(p_hb[threadNr], nframes); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } #pragma omp single { - build_grid(hb, x, x[shatom], bBox, box, hbox, (rcut > r2cut) ? rcut : r2cut, - rshell, ngrid, grid); - reset_nhbonds(&(hb->d)); - - if (debug && bDebug) + try { - dump_grid(debug, ngrid, grid); - } + build_grid(hb, x, x[shatom], bBox, box, hbox, (rcut > r2cut) ? rcut : r2cut, + rshell, ngrid, grid); + reset_nhbonds(&(hb->d)); - add_frames(hb, nframes); - init_hbframe(hb, nframes, output_env_conv_time(oenv, t)); + if (debug && bDebug) + { + dump_grid(debug, ngrid, grid); + } - if (hb->bDAnr) - { - count_da_grid(ngrid, grid, hb->danr[nframes]); + add_frames(hb, nframes); + init_hbframe(hb, nframes, output_env_conv_time(oenv, t)); + + if (hb->bDAnr) + { + count_da_grid(ngrid, grid, hb->danr[nframes]); + } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } /* omp single */ if (bOMP) @@ -2909,23 +2922,27 @@ int gmx_hbond(int argc, char *argv[]) #pragma omp single { - /* Do not parallelize this just yet. */ - /* int ii; */ - for (ii = 0; (ii < nsel); ii++) + try { - int dd = index[0][i]; - int aa = index[0][i+2]; - /* int */ hh = index[0][i+1]; - ihb = is_hbond(hb, ii, ii, dd, aa, rcut, r2cut, ccut, x, bBox, box, - hbox, &dist, &ang, bDA, &h, bContact, bMerge); - - if (ihb) + /* Do not parallelize this just yet. */ + /* int ii; */ + for (ii = 0; (ii < nsel); ii++) { - /* add to index if not already there */ - /* Add a hbond */ - add_hbond(hb, dd, aa, hh, ii, ii, nframes, bMerge, ihb, bContact); + int dd = index[0][i]; + int aa = index[0][i+2]; + /* int */ hh = index[0][i+1]; + ihb = is_hbond(hb, ii, ii, dd, aa, rcut, r2cut, ccut, x, bBox, box, + hbox, &dist, &ang, bDA, &h, bContact, bMerge); + + if (ihb) + { + /* add to index if not already there */ + /* Add a hbond */ + add_hbond(hb, dd, aa, hh, ii, ii, nframes, bMerge, ihb, bContact); + } } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } /* omp single */ } /* if (bSelected) */ else @@ -2934,108 +2951,112 @@ int gmx_hbond(int argc, char *argv[]) #pragma omp for schedule(dynamic) for (xi = 0; xi < ngrid[XX]; xi++) { - for (yi = 0; (yi < ngrid[YY]); yi++) + try { - for (zi = 0; (zi < ngrid[ZZ]); zi++) + for (yi = 0; (yi < ngrid[YY]); yi++) { - - /* loop over donor groups gr0 (always) and gr1 (if necessary) */ - for (grp = gr0; (grp <= (bTwo ? gr1 : gr0)); grp++) + for (zi = 0; (zi < ngrid[ZZ]); zi++) { - icell = &(grid[zi][yi][xi].d[grp]); - if (bTwo) - { - ogrp = 1-grp; - } - else + /* loop over donor groups gr0 (always) and gr1 (if necessary) */ + for (grp = gr0; (grp <= (bTwo ? gr1 : gr0)); grp++) { - ogrp = grp; - } + icell = &(grid[zi][yi][xi].d[grp]); - /* loop over all hydrogen atoms from group (grp) - * in this gridcell (icell) - */ - for (ai = 0; (ai < icell->nr); ai++) - { - i = icell->atoms[ai]; + if (bTwo) + { + ogrp = 1-grp; + } + else + { + ogrp = grp; + } - /* loop over all adjacent gridcells (xj,yj,zj) */ - for (zjj = grid_loop_begin(ngrid[ZZ], zi, bTric, FALSE); - zjj <= grid_loop_end(ngrid[ZZ], zi, bTric, FALSE); - zjj++) + /* loop over all hydrogen atoms from group (grp) + * in this gridcell (icell) + */ + for (ai = 0; (ai < icell->nr); ai++) { - zj = grid_mod(zjj, ngrid[ZZ]); - bEdge_yjj = (zj == 0) || (zj == ngrid[ZZ] - 1); - for (yjj = grid_loop_begin(ngrid[YY], yi, bTric, bEdge_yjj); - yjj <= grid_loop_end(ngrid[YY], yi, bTric, bEdge_yjj); - yjj++) + i = icell->atoms[ai]; + + /* loop over all adjacent gridcells (xj,yj,zj) */ + for (zjj = grid_loop_begin(ngrid[ZZ], zi, bTric, FALSE); + zjj <= grid_loop_end(ngrid[ZZ], zi, bTric, FALSE); + zjj++) { - yj = grid_mod(yjj, ngrid[YY]); - bEdge_xjj = - (yj == 0) || (yj == ngrid[YY] - 1) || - (zj == 0) || (zj == ngrid[ZZ] - 1); - for (xjj = grid_loop_begin(ngrid[XX], xi, bTric, bEdge_xjj); - xjj <= grid_loop_end(ngrid[XX], xi, bTric, bEdge_xjj); - xjj++) + zj = grid_mod(zjj, ngrid[ZZ]); + bEdge_yjj = (zj == 0) || (zj == ngrid[ZZ] - 1); + for (yjj = grid_loop_begin(ngrid[YY], yi, bTric, bEdge_yjj); + yjj <= grid_loop_end(ngrid[YY], yi, bTric, bEdge_yjj); + yjj++) { - xj = grid_mod(xjj, ngrid[XX]); - jcell = &(grid[zj][yj][xj].a[ogrp]); - /* loop over acceptor atoms from other group (ogrp) - * in this adjacent gridcell (jcell) - */ - for (aj = 0; (aj < jcell->nr); aj++) + yj = grid_mod(yjj, ngrid[YY]); + bEdge_xjj = + (yj == 0) || (yj == ngrid[YY] - 1) || + (zj == 0) || (zj == ngrid[ZZ] - 1); + for (xjj = grid_loop_begin(ngrid[XX], xi, bTric, bEdge_xjj); + xjj <= grid_loop_end(ngrid[XX], xi, bTric, bEdge_xjj); + xjj++) { - j = jcell->atoms[aj]; - - /* check if this once was a h-bond */ - ihb = is_hbond(__HBDATA, grp, ogrp, i, j, rcut, r2cut, ccut, x, bBox, box, - hbox, &dist, &ang, bDA, &h, bContact, bMerge); - - if (ihb) + xj = grid_mod(xjj, ngrid[XX]); + jcell = &(grid[zj][yj][xj].a[ogrp]); + /* loop over acceptor atoms from other group (ogrp) + * in this adjacent gridcell (jcell) + */ + for (aj = 0; (aj < jcell->nr); aj++) { - /* add to index if not already there */ - /* Add a hbond */ - add_hbond(__HBDATA, i, j, h, grp, ogrp, nframes, bMerge, ihb, bContact); + j = jcell->atoms[aj]; + + /* check if this once was a h-bond */ + ihb = is_hbond(__HBDATA, grp, ogrp, i, j, rcut, r2cut, ccut, x, bBox, box, + hbox, &dist, &ang, bDA, &h, bContact, bMerge); - /* make angle and distance distributions */ - if (ihb == hbHB && !bContact) + if (ihb) { - if (dist > rcut) - { - gmx_fatal(FARGS, "distance is higher than what is allowed for an hbond: %f", dist); - } - ang *= RAD2DEG; - __ADIST[static_cast( ang/abin)]++; - __RDIST[static_cast(dist/rbin)]++; - if (!bTwo) + /* add to index if not already there */ + /* Add a hbond */ + add_hbond(__HBDATA, i, j, h, grp, ogrp, nframes, bMerge, ihb, bContact); + + /* make angle and distance distributions */ + if (ihb == hbHB && !bContact) { - if (donor_index(&hb->d, grp, i) == NOTSET) + if (dist > rcut) { - gmx_fatal(FARGS, "Invalid donor %d", i); + gmx_fatal(FARGS, "distance is higher than what is allowed for an hbond: %f", dist); } - if (acceptor_index(&hb->a, ogrp, j) == NOTSET) + ang *= RAD2DEG; + __ADIST[static_cast( ang/abin)]++; + __RDIST[static_cast(dist/rbin)]++; + if (!bTwo) { - gmx_fatal(FARGS, "Invalid acceptor %d", j); + if (donor_index(&hb->d, grp, i) == NOTSET) + { + gmx_fatal(FARGS, "Invalid donor %d", i); + } + if (acceptor_index(&hb->a, ogrp, j) == NOTSET) + { + gmx_fatal(FARGS, "Invalid acceptor %d", j); + } + resdist = std::abs(top.atoms.atom[i].resind-top.atoms.atom[j].resind); + if (resdist >= max_hx) + { + resdist = max_hx-1; + } + __HBDATA->nhx[nframes][resdist]++; } - resdist = std::abs(top.atoms.atom[i].resind-top.atoms.atom[j].resind); - if (resdist >= max_hx) - { - resdist = max_hx-1; - } - __HBDATA->nhx[nframes][resdist]++; } - } - } - } /* for aj */ - } /* for xjj */ - } /* for yjj */ - } /* for zjj */ - } /* for ai */ - } /* for grp */ - } /* for xi,yi,zi */ + } + } /* for aj */ + } /* for xjj */ + } /* for yjj */ + } /* for zjj */ + } /* for ai */ + } /* for grp */ + } /* for xi,yi,zi */ + } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } /* if (bSelected) {...} else */ @@ -3045,16 +3066,20 @@ int gmx_hbond(int argc, char *argv[]) #pragma omp barrier #pragma omp critical { - /* Sum up histograms and counts from p_hb[] into hb */ - if (bOMP) + try { - hb->nhb[k] += p_hb[threadNr]->nhb[k]; - hb->ndist[k] += p_hb[threadNr]->ndist[k]; - for (j = 0; j < max_hx; j++) + /* Sum up histograms and counts from p_hb[] into hb */ + if (bOMP) { - hb->nhx[k][j] += p_hb[threadNr]->nhx[k][j]; + hb->nhb[k] += p_hb[threadNr]->nhb[k]; + hb->ndist[k] += p_hb[threadNr]->ndist[k]; + for (j = 0; j < max_hx; j++) + { + hb->nhx[k][j] += p_hb[threadNr]->nhx[k][j]; + } } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } /* Here are a handful of single constructs @@ -3065,21 +3090,33 @@ int gmx_hbond(int argc, char *argv[]) #pragma omp barrier #pragma omp single { - analyse_donor_properties(donor_properties, hb, k, t); + try + { + analyse_donor_properties(donor_properties, hb, k, t); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } #pragma omp single { - if (fpnhb) + try { - do_nhb_dist(fpnhb, hb, t); + if (fpnhb) + { + do_nhb_dist(fpnhb, hb, t); + } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } #pragma omp single { - trrStatus = (read_next_x(oenv, status, &t, x, box)); - nframes++; + try + { + trrStatus = (read_next_x(oenv, status, &t, x, box)); + nframes++; + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } #pragma omp barrier @@ -3093,6 +3130,7 @@ int gmx_hbond(int argc, char *argv[]) hb->nrhb += p_hb[threadNr]->nrhb; hb->nrdist += p_hb[threadNr]->nrdist; } + /* Free parallel datastructures */ sfree(p_hb[threadNr]->nhb); sfree(p_hb[threadNr]->ndist); @@ -3101,19 +3139,27 @@ int gmx_hbond(int argc, char *argv[]) #pragma omp for for (i = 0; i < nabin; i++) { - for (j = 0; j < actual_nThreads; j++) + try { + for (j = 0; j < actual_nThreads; j++) + { - adist[i] += p_adist[j][i]; + adist[i] += p_adist[j][i]; + } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } #pragma omp for for (i = 0; i <= nrbin; i++) { - for (j = 0; j < actual_nThreads; j++) + try { - rdist[i] += p_rdist[j][i]; + for (j = 0; j < actual_nThreads; j++) + { + rdist[i] += p_rdist[j][i]; + } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } sfree(p_adist[threadNr]); diff --git a/src/gromacs/gmxana/gmx_wham.cpp b/src/gromacs/gmxana/gmx_wham.cpp index 5e290121d4..67c6210518 100644 --- a/src/gromacs/gmxana/gmx_wham.cpp +++ b/src/gromacs/gmxana/gmx_wham.cpp @@ -64,6 +64,7 @@ #include "gromacs/random/random.h" #include "gromacs/utility/arraysize.h" #include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/futil.h" #include "gromacs/utility/gmxomp.h" @@ -930,55 +931,59 @@ void calc_profile(double *profile, t_UmbrellaWindow * window, int nWindows, #pragma omp parallel { - int nthreads = gmx_omp_get_max_threads(); - int thread_id = gmx_omp_get_thread_num(); - int i; - int i0 = thread_id*opt->bins/nthreads; - int i1 = std::min(opt->bins, ((thread_id+1)*opt->bins)/nthreads); - - for (i = i0; i < i1; ++i) + try { - int j, k; - double num, denom, invg, temp = 0, distance, U = 0; - num = denom = 0.; - for (j = 0; j < nWindows; ++j) + int nthreads = gmx_omp_get_max_threads(); + int thread_id = gmx_omp_get_thread_num(); + int i; + int i0 = thread_id*opt->bins/nthreads; + int i1 = std::min(opt->bins, ((thread_id+1)*opt->bins)/nthreads); + + for (i = i0; i < i1; ++i) { - for (k = 0; k < window[j].nPull; ++k) + int j, k; + double num, denom, invg, temp = 0, distance, U = 0; + num = denom = 0.; + for (j = 0; j < nWindows; ++j) { - invg = 1.0/window[j].g[k] * window[j].bsWeight[k]; - temp = (1.0*i+0.5)*dz+min; - num += invg*window[j].Histo[k][i]; - - if (!(bExact || window[j].bContrib[k][i])) + for (k = 0; k < window[j].nPull; ++k) { - continue; - } - distance = temp - window[j].pos[k]; /* distance to umbrella center */ - if (opt->bCycl) - { /* in cyclic wham: */ - if (distance > ztot_half) /* |distance| < ztot_half */ + invg = 1.0/window[j].g[k] * window[j].bsWeight[k]; + temp = (1.0*i+0.5)*dz+min; + num += invg*window[j].Histo[k][i]; + + if (!(bExact || window[j].bContrib[k][i])) { - distance -= ztot; + continue; } - else if (distance < -ztot_half) - { - distance += ztot; + distance = temp - window[j].pos[k]; /* distance to umbrella center */ + if (opt->bCycl) + { /* in cyclic wham: */ + if (distance > ztot_half) /* |distance| < ztot_half */ + { + distance -= ztot; + } + else if (distance < -ztot_half) + { + distance += ztot; + } } - } - if (!opt->bTab) - { - U = 0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */ - } - else - { - U = tabulated_pot(distance, opt); /* Use tabulated potential */ + if (!opt->bTab) + { + U = 0.5*window[j].k[k]*sqr(distance); /* harmonic potential assumed. */ + } + else + { + U = tabulated_pot(distance, opt); /* Use tabulated potential */ + } + denom += invg*window[j].N[k]*std::exp(-U/(8.314e-3*opt->Temperature) + window[j].z[k]); } - denom += invg*window[j].N[k]*std::exp(-U/(8.314e-3*opt->Temperature) + window[j].z[k]); } + profile[i] = num/denom; } - profile[i] = num/denom; } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } @@ -994,79 +999,83 @@ double calc_z(double * profile, t_UmbrellaWindow * window, int nWindows, #pragma omp parallel { - int nthreads = gmx_omp_get_max_threads(); - int thread_id = gmx_omp_get_thread_num(); - int i; - int i0 = thread_id*nWindows/nthreads; - int i1 = std::min(nWindows, ((thread_id+1)*nWindows)/nthreads); - double maxloc = -1e20; - - for (i = i0; i < i1; ++i) + try { - double total = 0, temp, distance, U = 0; - int j, k; + int nthreads = gmx_omp_get_max_threads(); + int thread_id = gmx_omp_get_thread_num(); + int i; + int i0 = thread_id*nWindows/nthreads; + int i1 = std::min(nWindows, ((thread_id+1)*nWindows)/nthreads); + double maxloc = -1e20; - for (j = 0; j < window[i].nPull; ++j) + for (i = i0; i < i1; ++i) { - total = 0; - for (k = 0; k < window[i].nBin; ++k) + double total = 0, temp, distance, U = 0; + int j, k; + + for (j = 0; j < window[i].nPull; ++j) { - if (!(bExact || window[i].bContrib[j][k])) + total = 0; + for (k = 0; k < window[i].nBin; ++k) { - continue; - } - temp = (1.0*k+0.5)*dz+min; - distance = temp - window[i].pos[j]; /* distance to umbrella center */ - if (opt->bCycl) - { /* in cyclic wham: */ - if (distance > ztot_half) /* |distance| < ztot_half */ + if (!(bExact || window[i].bContrib[j][k])) { - distance -= ztot; + continue; } - else if (distance < -ztot_half) + temp = (1.0*k+0.5)*dz+min; + distance = temp - window[i].pos[j]; /* distance to umbrella center */ + if (opt->bCycl) + { /* in cyclic wham: */ + if (distance > ztot_half) /* |distance| < ztot_half */ + { + distance -= ztot; + } + else if (distance < -ztot_half) + { + distance += ztot; + } + } + + if (!opt->bTab) { - distance += ztot; + U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */ } + else + { + U = tabulated_pot(distance, opt); /* Use tabulated potential */ + } + total += profile[k]*std::exp(-U/(8.314e-3*opt->Temperature)); } - - if (!opt->bTab) + /* Avoid floating point exception if window is far outside min and max */ + if (total != 0.0) { - U = 0.5*window[i].k[j]*sqr(distance); /* harmonic potential assumed. */ + total = -std::log(total); } else { - U = tabulated_pot(distance, opt); /* Use tabulated potential */ + total = 1000.0; } - total += profile[k]*std::exp(-U/(8.314e-3*opt->Temperature)); - } - /* Avoid floating point exception if window is far outside min and max */ - if (total != 0.0) - { - total = -std::log(total); - } - else - { - total = 1000.0; - } - temp = std::abs(total - window[i].z[j]); - if (temp > maxloc) - { - maxloc = temp; + temp = std::abs(total - window[i].z[j]); + if (temp > maxloc) + { + maxloc = temp; + } + window[i].z[j] = total; } - window[i].z[j] = total; } - } - /* Now get maximum maxloc from the threads and put in maxglob */ - if (maxloc > maxglob) - { -#pragma omp critical + /* Now get maximum maxloc from the threads and put in maxglob */ + if (maxloc > maxglob) { - if (maxloc > maxglob) +#pragma omp critical { - maxglob = maxloc; + if (maxloc > maxglob) + { + maxglob = maxloc; + } } } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } return maxglob; diff --git a/src/gromacs/gmxana/nsfactor.cpp b/src/gromacs/gmxana/nsfactor.cpp index 4a7e05a713..6da3be50ce 100644 --- a/src/gromacs/gmxana/nsfactor.cpp +++ b/src/gromacs/gmxana/nsfactor.cpp @@ -47,6 +47,7 @@ #include "gromacs/topology/atom_id.h" #include "gromacs/topology/topology.h" #include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/futil.h" #include "gromacs/utility/gmxomp.h" @@ -259,12 +260,16 @@ gmx_radial_distribution_histogram_t *calc_radial_distribution_histogram ( #pragma omp for for (mc = 0; mc < mc_max; mc++) { - i = static_cast(std::floor(gmx_rng_uniform_real(trng[tid])*isize)); - j = static_cast(std::floor(gmx_rng_uniform_real(trng[tid])*isize)); - if (i != j) + try { - tgr[tid][static_cast(std::floor(std::sqrt(distance2(x[index[i]], x[index[j]]))/binwidth))] += gsans->slength[index[i]]*gsans->slength[index[j]]; + i = static_cast(std::floor(gmx_rng_uniform_real(trng[tid])*isize)); + j = static_cast(std::floor(gmx_rng_uniform_real(trng[tid])*isize)); + if (i != j) + { + tgr[tid][static_cast(std::floor(std::sqrt(distance2(x[index[i]], x[index[j]]))/binwidth))] += gsans->slength[index[i]]*gsans->slength[index[j]]; + } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } /* collecting data from threads */ @@ -313,10 +318,14 @@ gmx_radial_distribution_histogram_t *calc_radial_distribution_histogram ( #pragma omp for for (i = 0; i < isize; i++) { - for (j = 0; j < i; j++) + try { - tgr[tid][static_cast(std::floor(std::sqrt(distance2(x[index[i]], x[index[j]]))/binwidth))] += gsans->slength[index[i]]*gsans->slength[index[j]]; + for (j = 0; j < i; j++) + { + tgr[tid][static_cast(std::floor(std::sqrt(distance2(x[index[i]], x[index[j]]))/binwidth))] += gsans->slength[index[i]]*gsans->slength[index[j]]; + } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } /* collecating data for pr->gr */ diff --git a/src/gromacs/gmxlib/gmx_thread_affinity.cpp b/src/gromacs/gmxlib/gmx_thread_affinity.cpp index ae1ba4bf5c..94e9f29c50 100644 --- a/src/gromacs/gmxlib/gmx_thread_affinity.cpp +++ b/src/gromacs/gmxlib/gmx_thread_affinity.cpp @@ -58,6 +58,7 @@ #include "gromacs/legacyheaders/types/hw_info.h" #include "gromacs/utility/basenetwork.h" #include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxassert.h" #include "gromacs/utility/gmxomp.h" @@ -295,32 +296,36 @@ gmx_set_thread_affinity(FILE *fplog, nth_affinity_set = 0; #pragma omp parallel num_threads(nthread_local) reduction(+:nth_affinity_set) { - int thread_id, thread_id_node; - int index, core; - gmx_bool setaffinity_ret; - - thread_id = gmx_omp_get_thread_num(); - thread_id_node = thread0_id_node + thread_id; - index = offset + thread_id_node*core_pinning_stride; - if (locality_order != NULL) + try { - core = locality_order[index]; - } - else - { - core = index; - } + int thread_id, thread_id_node; + int index, core; + gmx_bool setaffinity_ret; + + thread_id = gmx_omp_get_thread_num(); + thread_id_node = thread0_id_node + thread_id; + index = offset + thread_id_node*core_pinning_stride; + if (locality_order != NULL) + { + core = locality_order[index]; + } + else + { + core = index; + } - setaffinity_ret = tMPI_Thread_setaffinity_single(tMPI_Thread_self(), core); + setaffinity_ret = tMPI_Thread_setaffinity_single(tMPI_Thread_self(), core); - /* store the per-thread success-values of the setaffinity */ - nth_affinity_set += (setaffinity_ret == 0); + /* store the per-thread success-values of the setaffinity */ + nth_affinity_set += (setaffinity_ret == 0); - if (debug) - { - fprintf(debug, "On rank %2d, thread %2d, index %2d, core %2d the affinity setting returned %d\n", - cr->nodeid, gmx_omp_get_thread_num(), index, core, setaffinity_ret); + if (debug) + { + fprintf(debug, "On rank %2d, thread %2d, index %2d, core %2d the affinity setting returned %d\n", + cr->nodeid, gmx_omp_get_thread_num(), index, core, setaffinity_ret); + } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } if (nth_affinity_set > nthread_local) diff --git a/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp b/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp index 3185246499..ef9043470e 100644 --- a/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp +++ b/src/gromacs/gmxlib/nonbonded/nb_free_energy.cpp @@ -872,6 +872,9 @@ gmx_nb_free_energy_kernel(const t_nblist * gmx_restrict nlist, /* OpenMP atomics are expensive, but this kernels is also * expensive, so we can take this hit, instead of using * thread-local output buffers and extra reduction. + * + * All the OpenMP regions in this file are trivial and should + * not throw, so no need for try/catch. */ #pragma omp atomic f[j3] -= tx; diff --git a/src/gromacs/listed-forces/listed-forces.cpp b/src/gromacs/listed-forces/listed-forces.cpp index 44cd2368c9..2504df8f77 100644 --- a/src/gromacs/listed-forces/listed-forces.cpp +++ b/src/gromacs/listed-forces/listed-forces.cpp @@ -67,6 +67,7 @@ #include "gromacs/pbcutil/pbc.h" #include "gromacs/simd/simd.h" #include "gromacs/timing/wallcycle.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/smalloc.h" @@ -159,33 +160,37 @@ reduce_thread_force_buffer(int n, rvec *f, #pragma omp parallel for num_threads(nthreads) schedule(static) for (b = 0; b < nblock; b++) { - rvec *fp[MAX_BONDED_THREADS]; - int nfb, ft, fb; - int a0, a1, a; - - /* Determine which threads contribute to this block */ - nfb = 0; - for (ft = 1; ft < nthreads; ft++) + try { - if (bitmask_is_set(f_t[ft].red_mask, b)) + rvec *fp[MAX_BONDED_THREADS]; + int nfb, ft, fb; + int a0, a1, a; + + /* Determine which threads contribute to this block */ + nfb = 0; + for (ft = 1; ft < nthreads; ft++) { - fp[nfb++] = f_t[ft].f; + if (bitmask_is_set(f_t[ft].red_mask, b)) + { + fp[nfb++] = f_t[ft].f; + } } - } - if (nfb > 0) - { - /* Reduce force buffers for threads that contribute */ - a0 = b *block_size; - a1 = (b+1)*block_size; - a1 = std::min(a1, n); - for (a = a0; a < a1; a++) + if (nfb > 0) { - for (fb = 0; fb < nfb; fb++) + /* Reduce force buffers for threads that contribute */ + a0 = b *block_size; + a1 = (b+1)*block_size; + a1 = std::min(a1, n); + for (a = a0; a < a1; a++) { - rvec_inc(f[a], fp[fb][a]); + for (fb = 0; fb < nfb; fb++) + { + rvec_inc(f[a], fp[fb][a]); + } } } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } @@ -490,45 +495,49 @@ void calc_listed(const struct gmx_multisim_t *ms, #pragma omp parallel for num_threads(bt->nthreads) schedule(static) for (thread = 0; thread < bt->nthreads; thread++) { - int ftype; - real *epot, v; - /* thread stuff */ - rvec *ft, *fshift; - real *dvdlt; - gmx_grppairener_t *grpp; - - if (thread == 0) + try { - ft = f; - fshift = fr->fshift; - epot = enerd->term; - grpp = &enerd->grpp; - dvdlt = dvdl; - } - else - { - zero_thread_forces(&bt->f_t[thread], fr->natoms_force, - bt->red_nblock, 1<red_ashift); - - ft = bt->f_t[thread].f; - fshift = bt->f_t[thread].fshift; - epot = bt->f_t[thread].ener; - grpp = &bt->f_t[thread].grpp; - dvdlt = bt->f_t[thread].dvdl; - } - /* Loop over all bonded force types to calculate the bonded forces */ - for (ftype = 0; (ftype < F_NRE); ftype++) - { - if (idef->il[ftype].nr > 0 && ftype_is_bonded_potential(ftype)) + int ftype; + real *epot, v; + /* thread stuff */ + rvec *ft, *fshift; + real *dvdlt; + gmx_grppairener_t *grpp; + + if (thread == 0) { - v = calc_one_bond(thread, ftype, idef, x, - ft, fshift, fr, pbc_null, g, grpp, - nrnb, lambda, dvdlt, - md, fcd, bCalcEnerVir, - global_atom_index); - epot[ftype] += v; + ft = f; + fshift = fr->fshift; + epot = enerd->term; + grpp = &enerd->grpp; + dvdlt = dvdl; + } + else + { + zero_thread_forces(&bt->f_t[thread], fr->natoms_force, + bt->red_nblock, 1<red_ashift); + + ft = bt->f_t[thread].f; + fshift = bt->f_t[thread].fshift; + epot = bt->f_t[thread].ener; + grpp = &bt->f_t[thread].grpp; + dvdlt = bt->f_t[thread].dvdl; + } + /* Loop over all bonded force types to calculate the bonded forces */ + for (ftype = 0; (ftype < F_NRE); ftype++) + { + if (idef->il[ftype].nr > 0 && ftype_is_bonded_potential(ftype)) + { + v = calc_one_bond(thread, ftype, idef, x, + ft, fshift, fr, pbc_null, g, grpp, + nrnb, lambda, dvdlt, + md, fcd, bCalcEnerVir, + global_atom_index); + epot[ftype] += v; + } } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } wallcycle_sub_stop(wcycle, ewcsLISTED); diff --git a/src/gromacs/listed-forces/manage-threading.cpp b/src/gromacs/listed-forces/manage-threading.cpp index f6a1aa08a4..d76a53c19e 100644 --- a/src/gromacs/listed-forces/manage-threading.cpp +++ b/src/gromacs/listed-forces/manage-threading.cpp @@ -55,6 +55,7 @@ #include "gromacs/legacyheaders/types/ifunc.h" #include "gromacs/listed-forces/listed-forces.h" #include "gromacs/pbcutil/ishift.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/smalloc.h" #include "gromacs/utility/stringutil.h" @@ -382,8 +383,12 @@ void setup_bonded_threading(t_forcerec *fr, t_idef *idef) #pragma omp parallel for num_threads(bt->nthreads) schedule(static) for (t = 1; t < bt->nthreads; t++) { - calc_bonded_reduction_mask(&bt->f_t[t].red_mask, - idef, bt->red_ashift, t, bt->nthreads); + try + { + calc_bonded_reduction_mask(&bt->f_t[t].red_mask, + idef, bt->red_ashift, t, bt->nthreads); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } /* Determine the maximum number of blocks we need to reduce over */ @@ -442,20 +447,24 @@ void init_bonded_threading(FILE *fplog, int nenergrp, #pragma omp parallel for num_threads(bt->nthreads) schedule(static) for (t = 0; t < bt->nthreads; t++) { - /* Thread 0 uses the global force and energy arrays */ - if (t > 0) + try { - int i; - - bt->f_t[t].f = NULL; - bt->f_t[t].f_nalloc = 0; - snew(bt->f_t[t].fshift, SHIFTS); - bt->f_t[t].grpp.nener = nenergrp*nenergrp; - for (i = 0; i < egNR; i++) + /* Thread 0 uses the global force and energy arrays */ + if (t > 0) { - snew(bt->f_t[t].grpp.ener[i], bt->f_t[t].grpp.nener); + int i; + + bt->f_t[t].f = NULL; + bt->f_t[t].f_nalloc = 0; + snew(bt->f_t[t].fshift, SHIFTS); + bt->f_t[t].grpp.nener = nenergrp*nenergrp; + for (i = 0; i < egNR; i++) + { + snew(bt->f_t[t].grpp.ener[i], bt->f_t[t].grpp.nener); + } } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } /* The optimal value after which to switch from uniform to localized diff --git a/src/gromacs/mdlib/calcmu.cpp b/src/gromacs/mdlib/calcmu.cpp index 63cb5c54ee..00472646f6 100644 --- a/src/gromacs/mdlib/calcmu.cpp +++ b/src/gromacs/mdlib/calcmu.cpp @@ -62,6 +62,7 @@ void calc_mu(int start, int homenr, rvec x[], real q[], real qB[], num_threads(gmx_omp_nthreads_get(emntDefault)) for (i = start; i < end; i++) { + // Trivial OpenMP region that cannot throw mu_x += q[i]*x[i][XX]; mu_y += q[i]*x[i][YY]; mu_z += q[i]*x[i][ZZ]; @@ -82,6 +83,7 @@ void calc_mu(int start, int homenr, rvec x[], real q[], real qB[], num_threads(gmx_omp_nthreads_get(emntDefault)) for (i = start; i < end; i++) { + // Trivial OpenMP region that cannot throw mu_x += qB[i]*x[i][XX]; mu_y += qB[i]*x[i][YY]; mu_z += qB[i]*x[i][ZZ]; diff --git a/src/gromacs/mdlib/clincs.cpp b/src/gromacs/mdlib/clincs.cpp index b0215739ff..5a57aa8d44 100644 --- a/src/gromacs/mdlib/clincs.cpp +++ b/src/gromacs/mdlib/clincs.cpp @@ -63,6 +63,7 @@ #include "gromacs/topology/mtop_util.h" #include "gromacs/utility/bitmask.h" #include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxomp.h" #include "gromacs/utility/smalloc.h" @@ -1457,8 +1458,12 @@ void set_lincs_matrix(struct gmx_lincsdata *li, real *invmass, real lambda) #pragma omp parallel for reduction(+: ntriangle, ncc_triangle) num_threads(li->ntask) schedule(static) for (th = 0; th < li->ntask; th++) { - set_lincs_matrix_task(li, &li->task[th], invmass, &ncc_triangle); - ntriangle = li->task[th].ntriangle; + try + { + set_lincs_matrix_task(li, &li->task[th], invmass, &ncc_triangle); + ntriangle = li->task[th].ntriangle; + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } li->ntriangle = ntriangle; li->ncc_triangle = ncc_triangle; @@ -1673,9 +1678,13 @@ gmx_lincsdata_t init_lincs(FILE *fplog, const gmx_mtop_t *mtop, #pragma omp parallel for num_threads(li->ntask) for (th = 0; th < li->ntask; th++) { - /* Per thread SIMD load buffer for loading 2 simd_width rvecs */ - snew_aligned(li->task[th].simd_buf, 2*simd_width*DIM, - align_bytes); + try + { + /* Per thread SIMD load buffer for loading 2 simd_width rvecs */ + snew_aligned(li->task[th].simd_buf, 2*simd_width*DIM, + align_bytes); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } if (bPLINCS || li->ncg_triangle > 0) @@ -1752,40 +1761,44 @@ static void lincs_thread_setup(struct gmx_lincsdata *li, int natoms) #pragma omp parallel for num_threads(li->ntask) schedule(static) for (th = 0; th < li->ntask; th++) { - lincs_task_t *li_task; - gmx_bitmask_t mask; - int b; - - li_task = &li->task[th]; - - if (li_task->b1 - li_task->b0 > li_task->ind_nalloc) + try { - li_task->ind_nalloc = over_alloc_large(li_task->b1-li_task->b0); - srenew(li_task->ind, li_task->ind_nalloc); - srenew(li_task->ind_r, li_task->ind_nalloc); - } + lincs_task_t *li_task; + gmx_bitmask_t mask; + int b; - bitmask_init_low_bits(&mask, th); + li_task = &li->task[th]; - li_task->nind = 0; - li_task->nind_r = 0; - for (b = li_task->b0; b < li_task->b1; b++) - { - /* We let the constraint with the lowest thread index - * operate on atoms with constraints from multiple threads. - */ - if (bitmask_is_disjoint(atf[li->bla[b*2]], mask) && - bitmask_is_disjoint(atf[li->bla[b*2+1]], mask)) + if (li_task->b1 - li_task->b0 > li_task->ind_nalloc) { - /* Add the constraint to the local atom update index */ - li_task->ind[li_task->nind++] = b; + li_task->ind_nalloc = over_alloc_large(li_task->b1-li_task->b0); + srenew(li_task->ind, li_task->ind_nalloc); + srenew(li_task->ind_r, li_task->ind_nalloc); } - else + + bitmask_init_low_bits(&mask, th); + + li_task->nind = 0; + li_task->nind_r = 0; + for (b = li_task->b0; b < li_task->b1; b++) { - /* Add the constraint to the rest block */ - li_task->ind_r[li_task->nind_r++] = b; + /* We let the constraint with the lowest thread index + * operate on atoms with constraints from multiple threads. + */ + if (bitmask_is_disjoint(atf[li->bla[b*2]], mask) && + bitmask_is_disjoint(atf[li->bla[b*2+1]], mask)) + { + /* Add the constraint to the local atom update index */ + li_task->ind[li_task->nind++] = b; + } + else + { + /* Add the constraint to the rest block */ + li_task->ind_r[li_task->nind_r++] = b; + } } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } /* We need to copy all constraints which have not be assigned @@ -2308,20 +2321,24 @@ void set_lincs(const t_idef *idef, #pragma omp parallel for num_threads(li->ntask) schedule(static) for (th = 0; th < li->ntask; th++) { - lincs_task_t *li_task; + try + { + lincs_task_t *li_task; - li_task = &li->task[th]; + li_task = &li->task[th]; - if (li->ncg_triangle > 0 && - li_task->b1 - li_task->b0 > li_task->tri_alloc) - { - /* This is allocating too much, but it is difficult to improve */ - li_task->tri_alloc = over_alloc_dd(li_task->b1 - li_task->b0); - srenew(li_task->triangle, li_task->tri_alloc); - srenew(li_task->tri_bits, li_task->tri_alloc); - } + if (li->ncg_triangle > 0 && + li_task->b1 - li_task->b0 > li_task->tri_alloc) + { + /* This is allocating too much, but it is difficult to improve */ + li_task->tri_alloc = over_alloc_dd(li_task->b1 - li_task->b0); + srenew(li_task->triangle, li_task->tri_alloc); + srenew(li_task->tri_bits, li_task->tri_alloc); + } - set_matrix_indices(li, li_task, &at2con, bSortMatrix); + set_matrix_indices(li, li_task, &at2con, bSortMatrix); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } done_blocka(&at2con); @@ -2603,16 +2620,20 @@ gmx_bool constrain_lincs(FILE *fplog, gmx_bool bLog, gmx_bool bEner, /* The OpenMP parallel region of constrain_lincs for coords */ #pragma omp parallel num_threads(lincsd->ntask) { - int th = gmx_omp_get_thread_num(); + try + { + int th = gmx_omp_get_thread_num(); - clear_mat(lincsd->task[th].vir_r_m_dr); + clear_mat(lincsd->task[th].vir_r_m_dr); - do_lincs(x, xprime, box, pbc, lincsd, th, - md->invmass, cr, - bCalcDHDL, - ir->LincsWarnAngle, &bWarn, - invdt, v, bCalcVir, - th == 0 ? vir_r_m_dr : lincsd->task[th].vir_r_m_dr); + do_lincs(x, xprime, box, pbc, lincsd, th, + md->invmass, cr, + bCalcDHDL, + ir->LincsWarnAngle, &bWarn, + invdt, v, bCalcVir, + th == 0 ? vir_r_m_dr : lincsd->task[th].vir_r_m_dr); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } if (bLog && fplog && lincsd->nc > 0) @@ -2704,11 +2725,15 @@ gmx_bool constrain_lincs(FILE *fplog, gmx_bool bLog, gmx_bool bEner, /* The OpenMP parallel region of constrain_lincs for derivatives */ #pragma omp parallel num_threads(lincsd->ntask) { - int th = gmx_omp_get_thread_num(); + try + { + int th = gmx_omp_get_thread_num(); - do_lincsp(x, xprime, min_proj, pbc, lincsd, th, - md->invmass, econq, bCalcDHDL, - bCalcVir, th == 0 ? vir_r_m_dr : lincsd->task[th].vir_r_m_dr); + do_lincsp(x, xprime, min_proj, pbc, lincsd, th, + md->invmass, econq, bCalcDHDL, + bCalcVir, th == 0 ? vir_r_m_dr : lincsd->task[th].vir_r_m_dr); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } diff --git a/src/gromacs/mdlib/constr.cpp b/src/gromacs/mdlib/constr.cpp index dc29cbd80e..f002a5fdc0 100644 --- a/src/gromacs/mdlib/constr.cpp +++ b/src/gromacs/mdlib/constr.cpp @@ -64,6 +64,7 @@ #include "gromacs/topology/block.h" #include "gromacs/topology/invblock.h" #include "gromacs/topology/mtop_util.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/smalloc.h" @@ -457,26 +458,30 @@ gmx_bool constrain(FILE *fplog, gmx_bool bLog, gmx_bool bEner, #pragma omp parallel for num_threads(nth) schedule(static) for (th = 0; th < nth; th++) { - int start_th, end_th; - - if (th > 0) - { - clear_mat(constr->vir_r_m_dr_th[th]); - } - - start_th = (nsettle* th )/nth; - end_th = (nsettle*(th+1))/nth; - if (start_th >= 0 && end_th - start_th > 0) + try { - csettle(constr->settled, - end_th-start_th, - settle->iatoms+start_th*(1+NRAL(F_SETTLE)), - pbc_null, - x[0], xprime[0], - invdt, v ? v[0] : NULL, calcvir_atom_end, - th == 0 ? vir_r_m_dr : constr->vir_r_m_dr_th[th], - th == 0 ? &settle_error : &constr->settle_error[th]); + int start_th, end_th; + + if (th > 0) + { + clear_mat(constr->vir_r_m_dr_th[th]); + } + + start_th = (nsettle* th )/nth; + end_th = (nsettle*(th+1))/nth; + if (start_th >= 0 && end_th - start_th > 0) + { + csettle(constr->settled, + end_th-start_th, + settle->iatoms+start_th*(1+NRAL(F_SETTLE)), + pbc_null, + x[0], xprime[0], + invdt, v ? v[0] : NULL, calcvir_atom_end, + th == 0 ? vir_r_m_dr : constr->vir_r_m_dr_th[th], + th == 0 ? &settle_error : &constr->settle_error[th]); + } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } inc_nrnb(nrnb, eNR_SETTLE, nsettle); if (v != NULL) @@ -495,26 +500,30 @@ gmx_bool constrain(FILE *fplog, gmx_bool bLog, gmx_bool bEner, #pragma omp parallel for num_threads(nth) schedule(static) for (th = 0; th < nth; th++) { - int start_th, end_th; - - if (th > 0) + try { - clear_mat(constr->vir_r_m_dr_th[th]); - } - - start_th = (nsettle* th )/nth; - end_th = (nsettle*(th+1))/nth; - - if (start_th >= 0 && end_th - start_th > 0) - { - settle_proj(constr->settled, econq, - end_th-start_th, - settle->iatoms+start_th*(1+NRAL(F_SETTLE)), - pbc_null, - x, - xprime, min_proj, calcvir_atom_end, - th == 0 ? vir_r_m_dr : constr->vir_r_m_dr_th[th]); + int start_th, end_th; + + if (th > 0) + { + clear_mat(constr->vir_r_m_dr_th[th]); + } + + start_th = (nsettle* th )/nth; + end_th = (nsettle*(th+1))/nth; + + if (start_th >= 0 && end_th - start_th > 0) + { + settle_proj(constr->settled, econq, + end_th-start_th, + settle->iatoms+start_th*(1+NRAL(F_SETTLE)), + pbc_null, + x, + xprime, min_proj, calcvir_atom_end, + th == 0 ? vir_r_m_dr : constr->vir_r_m_dr_th[th]); + } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } /* This is an overestimate */ inc_nrnb(nrnb, eNR_SETTLE, nsettle); diff --git a/src/gromacs/mdlib/coupling.cpp b/src/gromacs/mdlib/coupling.cpp index 797265ed2e..ab3397e40d 100644 --- a/src/gromacs/mdlib/coupling.cpp +++ b/src/gromacs/mdlib/coupling.cpp @@ -663,6 +663,7 @@ void berendsen_pscale(t_inputrec *ir, matrix mu, #pragma omp parallel for num_threads(nthreads) schedule(static) for (n = start; n < start+nr_atoms; n++) { + // Trivial OpenMP region that does not throw int g; if (cFREEZE == NULL) diff --git a/src/gromacs/mdlib/force.cpp b/src/gromacs/mdlib/force.cpp index 6481ebebe1..1e3475c92c 100644 --- a/src/gromacs/mdlib/force.cpp +++ b/src/gromacs/mdlib/force.cpp @@ -68,6 +68,7 @@ #include "gromacs/pbcutil/pbc.h" #include "gromacs/timing/wallcycle.h" #include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/smalloc.h" @@ -432,51 +433,55 @@ void do_force_lowlevel(t_forcerec *fr, t_inputrec *ir, #pragma omp parallel for num_threads(nthreads) schedule(static) for (t = 0; t < nthreads; t++) { - tensor *vir_q, *vir_lj; - real *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj; - if (t == 0) + try { - vir_q = &fr->vir_el_recip; - vir_lj = &fr->vir_lj_recip; - Vcorrt_q = &Vcorr_q; - Vcorrt_lj = &Vcorr_lj; - dvdlt_q = &dvdl_long_range_correction_q; - dvdlt_lj = &dvdl_long_range_correction_lj; + tensor *vir_q, *vir_lj; + real *Vcorrt_q, *Vcorrt_lj, *dvdlt_q, *dvdlt_lj; + if (t == 0) + { + vir_q = &fr->vir_el_recip; + vir_lj = &fr->vir_lj_recip; + Vcorrt_q = &Vcorr_q; + Vcorrt_lj = &Vcorr_lj; + dvdlt_q = &dvdl_long_range_correction_q; + dvdlt_lj = &dvdl_long_range_correction_lj; + } + else + { + vir_q = &fr->ewc_t[t].vir_q; + vir_lj = &fr->ewc_t[t].vir_lj; + Vcorrt_q = &fr->ewc_t[t].Vcorr_q; + Vcorrt_lj = &fr->ewc_t[t].Vcorr_lj; + dvdlt_q = &fr->ewc_t[t].dvdl[efptCOUL]; + dvdlt_lj = &fr->ewc_t[t].dvdl[efptVDW]; + clear_mat(*vir_q); + clear_mat(*vir_lj); + } + *dvdlt_q = 0; + *dvdlt_lj = 0; + + /* Threading is only supported with the Verlet cut-off + * scheme and then only single particle forces (no + * exclusion forces) are calculated, so we can store + * the forces in the normal, single fr->f_novirsum array. + */ + ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1], + cr, t, fr, + md->chargeA, md->chargeB, + md->sqrt_c6A, md->sqrt_c6B, + md->sigmaA, md->sigmaB, + md->sigma3A, md->sigma3B, + md->nChargePerturbed || md->nTypePerturbed, + ir->cutoff_scheme != ecutsVERLET, + excl, x, bSB ? boxs : box, mu_tot, + ir->ewald_geometry, + ir->epsilon_surface, + fr->f_novirsum, *vir_q, *vir_lj, + Vcorrt_q, Vcorrt_lj, + lambda[efptCOUL], lambda[efptVDW], + dvdlt_q, dvdlt_lj); } - else - { - vir_q = &fr->ewc_t[t].vir_q; - vir_lj = &fr->ewc_t[t].vir_lj; - Vcorrt_q = &fr->ewc_t[t].Vcorr_q; - Vcorrt_lj = &fr->ewc_t[t].Vcorr_lj; - dvdlt_q = &fr->ewc_t[t].dvdl[efptCOUL]; - dvdlt_lj = &fr->ewc_t[t].dvdl[efptVDW]; - clear_mat(*vir_q); - clear_mat(*vir_lj); - } - *dvdlt_q = 0; - *dvdlt_lj = 0; - - /* Threading is only supported with the Verlet cut-off - * scheme and then only single particle forces (no - * exclusion forces) are calculated, so we can store - * the forces in the normal, single fr->f_novirsum array. - */ - ewald_LRcorrection(fr->excl_load[t], fr->excl_load[t+1], - cr, t, fr, - md->chargeA, md->chargeB, - md->sqrt_c6A, md->sqrt_c6B, - md->sigmaA, md->sigmaB, - md->sigma3A, md->sigma3B, - md->nChargePerturbed || md->nTypePerturbed, - ir->cutoff_scheme != ecutsVERLET, - excl, x, bSB ? boxs : box, mu_tot, - ir->ewald_geometry, - ir->epsilon_surface, - fr->f_novirsum, *vir_q, *vir_lj, - Vcorrt_q, Vcorrt_lj, - lambda[efptCOUL], lambda[efptVDW], - dvdlt_q, dvdlt_lj); + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } if (nthreads > 1) { diff --git a/src/gromacs/mdlib/mdatoms.cpp b/src/gromacs/mdlib/mdatoms.cpp index e972cca6a7..aebdec1175 100644 --- a/src/gromacs/mdlib/mdatoms.cpp +++ b/src/gromacs/mdlib/mdatoms.cpp @@ -44,6 +44,7 @@ #include "gromacs/legacyheaders/typedefs.h" #include "gromacs/mdlib/qmmm.h" #include "gromacs/topology/mtop_util.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/smalloc.h" #define ALMOST_ZERO 1e-30 @@ -231,190 +232,194 @@ void atoms2md(const gmx_mtop_t *mtop, const t_inputrec *ir, #pragma omp parallel for num_threads(nthreads) schedule(static) for (i = 0; i < md->nr; i++) { - int g, ag; - real mA, mB, fac; - real c6, c12; - t_atom *atom; - - if (index == NULL) - { - ag = i; - } - else + try { - ag = index[i]; - } - gmx_mtop_atomnr_to_atom(alook, ag, &atom); + int g, ag; + real mA, mB, fac; + real c6, c12; + t_atom *atom; - if (md->cFREEZE) - { - md->cFREEZE[i] = ggrpnr(groups, egcFREEZE, ag); - } - if (EI_ENERGY_MINIMIZATION(ir->eI)) - { - /* Displacement is proportional to F, masses used for constraints */ - mA = 1.0; - mB = 1.0; - } - else if (ir->eI == eiBD) - { - /* With BD the physical masses are irrelevant. - * To keep the code simple we use most of the normal MD code path - * for BD. Thus for constraining the masses should be proportional - * to the friction coefficient. We set the absolute value such that - * m/2<(dx/dt)^2> = m/2*2kT/fric*dt = kT/2 => m=fric*dt/2 - * Then if we set the (meaningless) velocity to v=dx/dt, we get the - * correct kinetic energy and temperature using the usual code path. - * Thus with BD v*dt will give the displacement and the reported - * temperature can signal bad integration (too large time step). - */ - if (ir->bd_fric > 0) + if (index == NULL) { - mA = 0.5*ir->bd_fric*ir->delta_t; - mB = 0.5*ir->bd_fric*ir->delta_t; + ag = i; } else { - /* The friction coefficient is mass/tau_t */ - fac = ir->delta_t/opts->tau_t[md->cTC ? groups->grpnr[egcTC][ag] : 0]; - mA = 0.5*atom->m*fac; - mB = 0.5*atom->mB*fac; + ag = index[i]; } - } - else - { - mA = atom->m; - mB = atom->mB; - } - if (md->nMassPerturbed) - { - md->massA[i] = mA; - md->massB[i] = mB; - } - md->massT[i] = mA; - if (mA == 0.0) - { - md->invmass[i] = 0; - } - else if (md->cFREEZE) - { - g = md->cFREEZE[i]; - if (opts->nFreeze[g][XX] && opts->nFreeze[g][YY] && opts->nFreeze[g][ZZ]) + gmx_mtop_atomnr_to_atom(alook, ag, &atom); + + if (md->cFREEZE) + { + md->cFREEZE[i] = ggrpnr(groups, egcFREEZE, ag); + } + if (EI_ENERGY_MINIMIZATION(ir->eI)) + { + /* Displacement is proportional to F, masses used for constraints */ + mA = 1.0; + mB = 1.0; + } + else if (ir->eI == eiBD) { - /* Set the mass of completely frozen particles to ALMOST_ZERO iso 0 - * to avoid div by zero in lincs or shake. - * Note that constraints can still move a partially frozen particle. + /* With BD the physical masses are irrelevant. + * To keep the code simple we use most of the normal MD code path + * for BD. Thus for constraining the masses should be proportional + * to the friction coefficient. We set the absolute value such that + * m/2<(dx/dt)^2> = m/2*2kT/fric*dt = kT/2 => m=fric*dt/2 + * Then if we set the (meaningless) velocity to v=dx/dt, we get the + * correct kinetic energy and temperature using the usual code path. + * Thus with BD v*dt will give the displacement and the reported + * temperature can signal bad integration (too large time step). */ - md->invmass[i] = ALMOST_ZERO; + if (ir->bd_fric > 0) + { + mA = 0.5*ir->bd_fric*ir->delta_t; + mB = 0.5*ir->bd_fric*ir->delta_t; + } + else + { + /* The friction coefficient is mass/tau_t */ + fac = ir->delta_t/opts->tau_t[md->cTC ? groups->grpnr[egcTC][ag] : 0]; + mA = 0.5*atom->m*fac; + mB = 0.5*atom->mB*fac; + } } else { - md->invmass[i] = 1.0/mA; + mA = atom->m; + mB = atom->mB; } - } - else - { - md->invmass[i] = 1.0/mA; - } - md->chargeA[i] = atom->q; - md->typeA[i] = atom->type; - if (bLJPME) - { - c6 = mtop->ffparams.iparams[atom->type*(mtop->ffparams.atnr+1)].lj.c6; - c12 = mtop->ffparams.iparams[atom->type*(mtop->ffparams.atnr+1)].lj.c12; - md->sqrt_c6A[i] = sqrt(c6); - if (c6 == 0.0 || c12 == 0) + if (md->nMassPerturbed) + { + md->massA[i] = mA; + md->massB[i] = mB; + } + md->massT[i] = mA; + if (mA == 0.0) { - md->sigmaA[i] = 1.0; + md->invmass[i] = 0; + } + else if (md->cFREEZE) + { + g = md->cFREEZE[i]; + if (opts->nFreeze[g][XX] && opts->nFreeze[g][YY] && opts->nFreeze[g][ZZ]) + { + /* Set the mass of completely frozen particles to ALMOST_ZERO iso 0 + * to avoid div by zero in lincs or shake. + * Note that constraints can still move a partially frozen particle. + */ + md->invmass[i] = ALMOST_ZERO; + } + else + { + md->invmass[i] = 1.0/mA; + } } else { - md->sigmaA[i] = pow(c12/c6, oneOverSix); + md->invmass[i] = 1.0/mA; } - md->sigma3A[i] = 1/(md->sigmaA[i]*md->sigmaA[i]*md->sigmaA[i]); - } - if (md->nPerturbed) - { - md->bPerturbed[i] = PERTURBED(*atom); - md->chargeB[i] = atom->qB; - md->typeB[i] = atom->typeB; + md->chargeA[i] = atom->q; + md->typeA[i] = atom->type; if (bLJPME) { - c6 = mtop->ffparams.iparams[atom->typeB*(mtop->ffparams.atnr+1)].lj.c6; - c12 = mtop->ffparams.iparams[atom->typeB*(mtop->ffparams.atnr+1)].lj.c12; - md->sqrt_c6B[i] = sqrt(c6); + c6 = mtop->ffparams.iparams[atom->type*(mtop->ffparams.atnr+1)].lj.c6; + c12 = mtop->ffparams.iparams[atom->type*(mtop->ffparams.atnr+1)].lj.c12; + md->sqrt_c6A[i] = sqrt(c6); if (c6 == 0.0 || c12 == 0) { - md->sigmaB[i] = 1.0; + md->sigmaA[i] = 1.0; } else { - md->sigmaB[i] = pow(c12/c6, oneOverSix); + md->sigmaA[i] = pow(c12/c6, oneOverSix); } - md->sigma3B[i] = 1/(md->sigmaB[i]*md->sigmaB[i]*md->sigmaB[i]); + md->sigma3A[i] = 1/(md->sigmaA[i]*md->sigmaA[i]*md->sigmaA[i]); + } + if (md->nPerturbed) + { + md->bPerturbed[i] = PERTURBED(*atom); + md->chargeB[i] = atom->qB; + md->typeB[i] = atom->typeB; + if (bLJPME) + { + c6 = mtop->ffparams.iparams[atom->typeB*(mtop->ffparams.atnr+1)].lj.c6; + c12 = mtop->ffparams.iparams[atom->typeB*(mtop->ffparams.atnr+1)].lj.c12; + md->sqrt_c6B[i] = sqrt(c6); + if (c6 == 0.0 || c12 == 0) + { + md->sigmaB[i] = 1.0; + } + else + { + md->sigmaB[i] = pow(c12/c6, oneOverSix); + } + md->sigma3B[i] = 1/(md->sigmaB[i]*md->sigmaB[i]*md->sigmaB[i]); + } + } + md->ptype[i] = atom->ptype; + if (md->cTC) + { + md->cTC[i] = groups->grpnr[egcTC][ag]; + } + md->cENER[i] = + (groups->grpnr[egcENER] ? groups->grpnr[egcENER][ag] : 0); + if (md->cACC) + { + md->cACC[i] = groups->grpnr[egcACC][ag]; + } + if (md->cVCM) + { + md->cVCM[i] = groups->grpnr[egcVCM][ag]; + } + if (md->cORF) + { + md->cORF[i] = groups->grpnr[egcORFIT][ag]; } - } - md->ptype[i] = atom->ptype; - if (md->cTC) - { - md->cTC[i] = groups->grpnr[egcTC][ag]; - } - md->cENER[i] = - (groups->grpnr[egcENER] ? groups->grpnr[egcENER][ag] : 0); - if (md->cACC) - { - md->cACC[i] = groups->grpnr[egcACC][ag]; - } - if (md->cVCM) - { - md->cVCM[i] = groups->grpnr[egcVCM][ag]; - } - if (md->cORF) - { - md->cORF[i] = groups->grpnr[egcORFIT][ag]; - } - - if (md->cU1) - { - md->cU1[i] = groups->grpnr[egcUser1][ag]; - } - if (md->cU2) - { - md->cU2[i] = groups->grpnr[egcUser2][ag]; - } - if (ir->bQMMM) - { - if (groups->grpnr[egcQMMM] == 0 || - groups->grpnr[egcQMMM][ag] < groups->grps[egcQMMM].nr-1) + if (md->cU1) { - md->bQM[i] = TRUE; + md->cU1[i] = groups->grpnr[egcUser1][ag]; } - else + if (md->cU2) { - md->bQM[i] = FALSE; + md->cU2[i] = groups->grpnr[egcUser2][ag]; } - } - /* Initialize AdResS weighting functions to adressw */ - if (ir->bAdress) - { - md->wf[i] = 1.0; - /* if no tf table groups specified, use default table */ - md->tf_table_index[i] = DEFAULT_TF_TABLE; - if (ir->adress->n_tf_grps > 0) + + if (ir->bQMMM) + { + if (groups->grpnr[egcQMMM] == 0 || + groups->grpnr[egcQMMM][ag] < groups->grps[egcQMMM].nr-1) + { + md->bQM[i] = TRUE; + } + else + { + md->bQM[i] = FALSE; + } + } + /* Initialize AdResS weighting functions to adressw */ + if (ir->bAdress) { - /* if tf table groups specified, tf is only applied to thoose energy groups*/ - md->tf_table_index[i] = NO_TF_TABLE; - /* check wether atom is in one of the relevant energy groups and assign a table index */ - for (g = 0; g < ir->adress->n_tf_grps; g++) + md->wf[i] = 1.0; + /* if no tf table groups specified, use default table */ + md->tf_table_index[i] = DEFAULT_TF_TABLE; + if (ir->adress->n_tf_grps > 0) { - if (md->cENER[i] == ir->adress->tf_table_index[g]) + /* if tf table groups specified, tf is only applied to thoose energy groups*/ + md->tf_table_index[i] = NO_TF_TABLE; + /* check wether atom is in one of the relevant energy groups and assign a table index */ + for (g = 0; g < ir->adress->n_tf_grps; g++) { - md->tf_table_index[i] = g; + if (md->cENER[i] == ir->adress->tf_table_index[g]) + { + md->tf_table_index[i] = g; + } } } } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } gmx_mtop_atomlookup_destroy(alook); diff --git a/src/gromacs/mdlib/minimize.cpp b/src/gromacs/mdlib/minimize.cpp index d8676b1ed8..8f8e85672f 100644 --- a/src/gromacs/mdlib/minimize.cpp +++ b/src/gromacs/mdlib/minimize.cpp @@ -90,6 +90,7 @@ #include "gromacs/timing/walltime_accounting.h" #include "gromacs/topology/mtop_util.h" #include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/smalloc.h" @@ -634,21 +635,25 @@ static void do_em_step(t_commrec *cr, t_inputrec *ir, t_mdatoms *md, #pragma omp for schedule(static) nowait for (i = start; i < end; i++) { - if (md->cFREEZE) + try { - gf = md->cFREEZE[i]; - } - for (m = 0; m < DIM; m++) - { - if (ir->opts.nFreeze[gf][m]) + if (md->cFREEZE) { - x2[i][m] = x1[i][m]; + gf = md->cFREEZE[i]; } - else + for (m = 0; m < DIM; m++) { - x2[i][m] = x1[i][m] + a*f[i][m]; + if (ir->opts.nFreeze[gf][m]) + { + x2[i][m] = x1[i][m]; + } + else + { + x2[i][m] = x1[i][m] + a*f[i][m]; + } } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } if (s2->flags & (1<cg_gl_nalloc = s1->cg_gl_nalloc; - srenew(s2->cg_gl, s2->cg_gl_nalloc); + try + { + srenew(s2->cg_gl, s2->cg_gl_nalloc); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; #pragma omp barrier } s2->ncg_gl = s1->ncg_gl; diff --git a/src/gromacs/mdlib/nbnxn_atomdata.cpp b/src/gromacs/mdlib/nbnxn_atomdata.cpp index 20bb7b6e2c..af0195394d 100644 --- a/src/gromacs/mdlib/nbnxn_atomdata.cpp +++ b/src/gromacs/mdlib/nbnxn_atomdata.cpp @@ -59,6 +59,7 @@ #include "gromacs/mdlib/nbnxn_util.h" #include "gromacs/pbcutil/ishift.h" #include "gromacs/simd/simd.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxomp.h" #include "gromacs/utility/smalloc.h" @@ -1169,41 +1170,45 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search_t nbs, #pragma omp parallel for num_threads(nth) schedule(static) for (th = 0; th < nth; th++) { - for (int g = g0; g < g1; g++) + try { - const nbnxn_grid_t *grid; - int cxy0, cxy1; + for (int g = g0; g < g1; g++) + { + const nbnxn_grid_t *grid; + int cxy0, cxy1; - grid = &nbs->grid[g]; + grid = &nbs->grid[g]; - cxy0 = (grid->ncx*grid->ncy* th +nth-1)/nth; - cxy1 = (grid->ncx*grid->ncy*(th+1)+nth-1)/nth; + cxy0 = (grid->ncx*grid->ncy* th +nth-1)/nth; + cxy1 = (grid->ncx*grid->ncy*(th+1)+nth-1)/nth; - for (int cxy = cxy0; cxy < cxy1; cxy++) - { - int na, ash, na_fill; + for (int cxy = cxy0; cxy < cxy1; cxy++) + { + int na, ash, na_fill; - na = grid->cxy_na[cxy]; - ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc; + na = grid->cxy_na[cxy]; + ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc; - if (g == 0 && FillLocal) - { - na_fill = - (grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy])*grid->na_sc; - } - else - { - /* We fill only the real particle locations. - * We assume the filling entries at the end have been - * properly set before during ns. - */ - na_fill = na; + if (g == 0 && FillLocal) + { + na_fill = + (grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy])*grid->na_sc; + } + else + { + /* We fill only the real particle locations. + * We assume the filling entries at the end have been + * properly set before during ns. + */ + na_fill = na; + } + copy_rvec_to_nbat_real(nbs->a+ash, na, na_fill, x, + nbat->XFormat, nbat->x, ash, + 0, 0, 0); } - copy_rvec_to_nbat_real(nbs->a+ash, na, na_fill, x, - nbat->XFormat, nbat->x, ash, - 0, 0, 0); } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } @@ -1420,112 +1425,116 @@ static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(const nbnxn_atomdata_t *nb #pragma omp parallel num_threads(nth) { - int b0, b1, b; - int i0, i1; - int group_size, th; - - th = gmx_omp_get_thread_num(); - - for (group_size = 2; group_size < 2*next_pow2; group_size *= 2) + try { - int index[2], group_pos, partner_pos, wu; - int partner_th = th ^ (group_size/2); + int b0, b1, b; + int i0, i1; + int group_size, th; - if (group_size > 2) + th = gmx_omp_get_thread_num(); + + for (group_size = 2; group_size < 2*next_pow2; group_size *= 2) { + int index[2], group_pos, partner_pos, wu; + int partner_th = th ^ (group_size/2); + + if (group_size > 2) + { #ifdef TMPI_ATOMICS - /* wait on partner thread - replaces full barrier */ - int sync_th, sync_group_size; + /* wait on partner thread - replaces full barrier */ + int sync_th, sync_group_size; - tMPI_Atomic_memory_barrier(); /* gurantee data is saved before marking work as done */ - tMPI_Atomic_set(&(nbat->syncStep[th]), group_size/2); /* mark previous step as completed */ + tMPI_Atomic_memory_barrier(); /* gurantee data is saved before marking work as done */ + tMPI_Atomic_set(&(nbat->syncStep[th]), group_size/2); /* mark previous step as completed */ - /* find thread to sync with. Equal to partner_th unless nth is not a power of two. */ - for (sync_th = partner_th, sync_group_size = group_size; sync_th >= nth && sync_group_size > 2; sync_group_size /= 2) - { - sync_th &= ~(sync_group_size/4); - } - if (sync_th < nth) /* otherwise nothing to sync index[1] will be >=nout */ - { - /* wait on the thread which computed input data in previous step */ - while (tMPI_Atomic_get((volatile tMPI_Atomic_t*)&(nbat->syncStep[sync_th])) < group_size/2) + /* find thread to sync with. Equal to partner_th unless nth is not a power of two. */ + for (sync_th = partner_th, sync_group_size = group_size; sync_th >= nth && sync_group_size > 2; sync_group_size /= 2) { - gmx_pause(); + sync_th &= ~(sync_group_size/4); } - /* guarantee that no later load happens before wait loop is finisehd */ - tMPI_Atomic_memory_barrier(); - } -#else /* TMPI_ATOMICS */ + if (sync_th < nth) /* otherwise nothing to sync index[1] will be >=nout */ + { + /* wait on the thread which computed input data in previous step */ + while (tMPI_Atomic_get((volatile tMPI_Atomic_t*)&(nbat->syncStep[sync_th])) < group_size/2) + { + gmx_pause(); + } + /* guarantee that no later load happens before wait loop is finisehd */ + tMPI_Atomic_memory_barrier(); + } +#else /* TMPI_ATOMICS */ #pragma omp barrier #endif - } + } - /* Calculate buffers to sum (result goes into first buffer) */ - group_pos = th % group_size; - index[0] = th - group_pos; - index[1] = index[0] + group_size/2; + /* Calculate buffers to sum (result goes into first buffer) */ + group_pos = th % group_size; + index[0] = th - group_pos; + index[1] = index[0] + group_size/2; - /* If no second buffer, nothing to do */ - if (index[1] >= nbat->nout && group_size > 2) - { - continue; - } + /* If no second buffer, nothing to do */ + if (index[1] >= nbat->nout && group_size > 2) + { + continue; + } #if NBNXN_BUFFERFLAG_MAX_THREADS > 256 #error reverse_bits assumes max 256 threads #endif - /* Position is permuted so that one of the 2 vectors being added was computed on the same thread in the previous step. - This improves locality and enables to sync with just a single thread between steps (=the levels in the btree). - The permutation which allows this corresponds to reversing the bits of the group position. - */ - group_pos = reverse_bits(group_pos)/(256/group_size); + /* Position is permuted so that one of the 2 vectors being added was computed on the same thread in the previous step. + This improves locality and enables to sync with just a single thread between steps (=the levels in the btree). + The permutation which allows this corresponds to reversing the bits of the group position. + */ + group_pos = reverse_bits(group_pos)/(256/group_size); - partner_pos = group_pos ^ 1; + partner_pos = group_pos ^ 1; - /* loop over two work-units (own and partner) */ - for (wu = 0; wu < 2; wu++) - { - if (wu == 1) + /* loop over two work-units (own and partner) */ + for (wu = 0; wu < 2; wu++) { - if (partner_th < nth) - { - break; /* partner exists we don't have to do his work */ - } - else + if (wu == 1) { - group_pos = partner_pos; + if (partner_th < nth) + { + break; /* partner exists we don't have to do his work */ + } + else + { + group_pos = partner_pos; + } } - } - /* Calculate the cell-block range for our thread */ - b0 = (flags->nflag* group_pos )/group_size; - b1 = (flags->nflag*(group_pos+1))/group_size; + /* Calculate the cell-block range for our thread */ + b0 = (flags->nflag* group_pos )/group_size; + b1 = (flags->nflag*(group_pos+1))/group_size; - for (b = b0; b < b1; b++) - { - i0 = b *NBNXN_BUFFERFLAG_SIZE*nbat->fstride; - i1 = (b+1)*NBNXN_BUFFERFLAG_SIZE*nbat->fstride; - - if (bitmask_is_set(flags->flag[b], index[1]) || group_size > 2) + for (b = b0; b < b1; b++) { + i0 = b *NBNXN_BUFFERFLAG_SIZE*nbat->fstride; + i1 = (b+1)*NBNXN_BUFFERFLAG_SIZE*nbat->fstride; + + if (bitmask_is_set(flags->flag[b], index[1]) || group_size > 2) + { #ifdef GMX_NBNXN_SIMD - nbnxn_atomdata_reduce_reals_simd + nbnxn_atomdata_reduce_reals_simd #else - nbnxn_atomdata_reduce_reals + nbnxn_atomdata_reduce_reals #endif - (nbat->out[index[0]].f, - bitmask_is_set(flags->flag[b], index[0]) || group_size > 2, - &(nbat->out[index[1]].f), 1, i0, i1); - - } - else if (!bitmask_is_set(flags->flag[b], index[0])) - { - nbnxn_atomdata_clear_reals(nbat->out[index[0]].f, - i0, i1); + (nbat->out[index[0]].f, + bitmask_is_set(flags->flag[b], index[0]) || group_size > 2, + &(nbat->out[index[1]].f), 1, i0, i1); + + } + else if (!bitmask_is_set(flags->flag[b], index[0])) + { + nbnxn_atomdata_clear_reals(nbat->out[index[0]].f, + i0, i1); + } } } } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } @@ -1536,47 +1545,51 @@ static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(const nbnxn_atomdata_t *nba #pragma omp parallel for num_threads(nth) schedule(static) for (int th = 0; th < nth; th++) { - const nbnxn_buffer_flags_t *flags; - int nfptr; - real *fptr[NBNXN_BUFFERFLAG_MAX_THREADS]; + try + { + const nbnxn_buffer_flags_t *flags; + int nfptr; + real *fptr[NBNXN_BUFFERFLAG_MAX_THREADS]; - flags = &nbat->buffer_flags; + flags = &nbat->buffer_flags; - /* Calculate the cell-block range for our thread */ - int b0 = (flags->nflag* th )/nth; - int b1 = (flags->nflag*(th+1))/nth; + /* Calculate the cell-block range for our thread */ + int b0 = (flags->nflag* th )/nth; + int b1 = (flags->nflag*(th+1))/nth; - for (int b = b0; b < b1; b++) - { - int i0 = b *NBNXN_BUFFERFLAG_SIZE*nbat->fstride; - int i1 = (b+1)*NBNXN_BUFFERFLAG_SIZE*nbat->fstride; - - nfptr = 0; - for (int out = 1; out < nbat->nout; out++) + for (int b = b0; b < b1; b++) { - if (bitmask_is_set(flags->flag[b], out)) + int i0 = b *NBNXN_BUFFERFLAG_SIZE*nbat->fstride; + int i1 = (b+1)*NBNXN_BUFFERFLAG_SIZE*nbat->fstride; + + nfptr = 0; + for (int out = 1; out < nbat->nout; out++) { - fptr[nfptr++] = nbat->out[out].f; + if (bitmask_is_set(flags->flag[b], out)) + { + fptr[nfptr++] = nbat->out[out].f; + } } - } - if (nfptr > 0) - { + if (nfptr > 0) + { #ifdef GMX_NBNXN_SIMD - nbnxn_atomdata_reduce_reals_simd + nbnxn_atomdata_reduce_reals_simd #else - nbnxn_atomdata_reduce_reals + nbnxn_atomdata_reduce_reals #endif - (nbat->out[0].f, - bitmask_is_set(flags->flag[b], 0), - fptr, nfptr, - i0, i1); - } - else if (!bitmask_is_set(flags->flag[b], 0)) - { - nbnxn_atomdata_clear_reals(nbat->out[0].f, - i0, i1); + (nbat->out[0].f, + bitmask_is_set(flags->flag[b], 0), + fptr, nfptr, + i0, i1); + } + else if (!bitmask_is_set(flags->flag[b], 0)) + { + nbnxn_atomdata_clear_reals(nbat->out[0].f, + i0, i1); + } } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } @@ -1630,12 +1643,16 @@ void nbnxn_atomdata_add_nbat_f_to_f(const nbnxn_search_t nbs, #pragma omp parallel for num_threads(nth) schedule(static) for (int th = 0; th < nth; th++) { - nbnxn_atomdata_add_nbat_f_to_f_part(nbs, nbat, - nbat->out, - 1, - a0+((th+0)*na)/nth, - a0+((th+1)*na)/nth, - f); + try + { + nbnxn_atomdata_add_nbat_f_to_f_part(nbs, nbat, + nbat->out, + 1, + a0+((th+0)*na)/nth, + a0+((th+1)*na)/nth, + f); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } nbs_cycle_stop(&nbs->cc[enbsCCreducef]); diff --git a/src/gromacs/mdlib/nbnxn_grid.cpp b/src/gromacs/mdlib/nbnxn_grid.cpp index ec81b416a8..1e4507a192 100644 --- a/src/gromacs/mdlib/nbnxn_grid.cpp +++ b/src/gromacs/mdlib/nbnxn_grid.cpp @@ -59,6 +59,7 @@ #include "gromacs/mdlib/nbnxn_util.h" #include "gromacs/simd/simd.h" #include "gromacs/simd/vector_operations.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/smalloc.h" struct gmx_domdec_zones_t; @@ -1254,8 +1255,12 @@ static void calc_cell_indices(const nbnxn_search_t nbs, #pragma omp parallel for num_threads(nthread) schedule(static) for (int thread = 0; thread < nthread; thread++) { - calc_column_indices(grid, a0, a1, x, dd_zone, move, thread, nthread, - nbs->cell, nbs->work[thread].cxy_na); + try + { + calc_column_indices(grid, a0, a1, x, dd_zone, move, thread, nthread, + nbs->cell, nbs->work[thread].cxy_na); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } /* Make the cell index as a function of x and y */ @@ -1357,20 +1362,24 @@ static void calc_cell_indices(const nbnxn_search_t nbs, #pragma omp parallel for num_threads(nthread) schedule(static) for (int thread = 0; thread < nthread; thread++) { - if (grid->bSimple) - { - sort_columns_simple(nbs, dd_zone, grid, a0, a1, atinfo, x, nbat, - ((thread+0)*grid->ncx*grid->ncy)/nthread, - ((thread+1)*grid->ncx*grid->ncy)/nthread, - nbs->work[thread].sort_work); - } - else + try { - sort_columns_supersub(nbs, dd_zone, grid, a0, a1, atinfo, x, nbat, - ((thread+0)*grid->ncx*grid->ncy)/nthread, - ((thread+1)*grid->ncx*grid->ncy)/nthread, - nbs->work[thread].sort_work); + if (grid->bSimple) + { + sort_columns_simple(nbs, dd_zone, grid, a0, a1, atinfo, x, nbat, + ((thread+0)*grid->ncx*grid->ncy)/nthread, + ((thread+1)*grid->ncx*grid->ncy)/nthread, + nbs->work[thread].sort_work); + } + else + { + sort_columns_supersub(nbs, dd_zone, grid, a0, a1, atinfo, x, nbat, + ((thread+0)*grid->ncx*grid->ncy)/nthread, + ((thread+1)*grid->ncx*grid->ncy)/nthread, + nbs->work[thread].sort_work); + } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } if (grid->bSimple && nbat->XFormat == nbatX8) @@ -1611,47 +1620,51 @@ void nbnxn_grid_add_simple(nbnxn_search_t nbs, #pragma omp parallel for num_threads(nthreads) schedule(static) for (int sc = 0; sc < grid->nc; sc++) { - for (int c = 0; c < ncd; c++) + try { - int tx = sc*ncd + c; - int na = NBNXN_CPU_CLUSTER_I_SIZE; - while (na > 0 && - nbat->type[tx*NBNXN_CPU_CLUSTER_I_SIZE+na-1] == nbat->ntype-1) - { - na--; - } - - if (na > 0) + for (int c = 0; c < ncd; c++) { - switch (nbat->XFormat) + int tx = sc*ncd + c; + int na = NBNXN_CPU_CLUSTER_I_SIZE; + while (na > 0 && + nbat->type[tx*NBNXN_CPU_CLUSTER_I_SIZE+na-1] == nbat->ntype-1) { - case nbatX4: - /* PACK_X4==NBNXN_CPU_CLUSTER_I_SIZE, so this is simple */ - calc_bounding_box_x_x4(na, nbat->x+tx*STRIDE_P4, - bb+tx); - break; - case nbatX8: - /* PACK_X8>NBNXN_CPU_CLUSTER_I_SIZE, more complicated */ - calc_bounding_box_x_x8(na, nbat->x+X8_IND_A(tx*NBNXN_CPU_CLUSTER_I_SIZE), - bb+tx); - break; - default: - calc_bounding_box(na, nbat->xstride, - nbat->x+tx*NBNXN_CPU_CLUSTER_I_SIZE*nbat->xstride, - bb+tx); - break; + na--; } - bbcz[tx*NNBSBB_D+0] = bb[tx].lower[BB_Z]; - bbcz[tx*NNBSBB_D+1] = bb[tx].upper[BB_Z]; - /* No interaction optimization yet here */ - grid->flags_simple[tx] = NBNXN_CI_DO_LJ(0) | NBNXN_CI_DO_COUL(0); - } - else - { - grid->flags_simple[tx] = 0; + if (na > 0) + { + switch (nbat->XFormat) + { + case nbatX4: + /* PACK_X4==NBNXN_CPU_CLUSTER_I_SIZE, so this is simple */ + calc_bounding_box_x_x4(na, nbat->x+tx*STRIDE_P4, + bb+tx); + break; + case nbatX8: + /* PACK_X8>NBNXN_CPU_CLUSTER_I_SIZE, more complicated */ + calc_bounding_box_x_x8(na, nbat->x+X8_IND_A(tx*NBNXN_CPU_CLUSTER_I_SIZE), + bb+tx); + break; + default: + calc_bounding_box(na, nbat->xstride, + nbat->x+tx*NBNXN_CPU_CLUSTER_I_SIZE*nbat->xstride, + bb+tx); + break; + } + bbcz[tx*NNBSBB_D+0] = bb[tx].lower[BB_Z]; + bbcz[tx*NNBSBB_D+1] = bb[tx].upper[BB_Z]; + + /* No interaction optimization yet here */ + grid->flags_simple[tx] = NBNXN_CI_DO_LJ(0) | NBNXN_CI_DO_COUL(0); + } + else + { + grid->flags_simple[tx] = 0; + } } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } if (grid->bSimple && nbat->XFormat == nbatX8) diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.cpp.pre b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.cpp.pre index 2571a05348..5656f48599 100644 --- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.cpp.pre +++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_file_generator/nbnxn_kernel_simd_template.cpp.pre @@ -216,6 +216,8 @@ void #pragma omp parallel for schedule(static) num_threads(nthreads) for (nb = 0; nb < nnbl; nb++) {{ + // Presently, the kernels do not call C++ code that can throw, so + // no need for a try/catch pair in this OpenMP region. nbnxn_atomdata_output_t *out; real *fshift_p; diff --git a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c index 84f0d8f259..a9d454be48 100644 --- a/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c +++ b/src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.c @@ -247,6 +247,8 @@ nbnxn_kernel_ref(const nbnxn_pairlist_set_t *nbl_list, #pragma omp parallel for schedule(static) num_threads(nthreads) for (nb = 0; nb < nnbl; nb++) { + // Presently, the kernels do not call C++ code that can throw, so + // no need for a try/catch pair in this OpenMP region. nbnxn_atomdata_output_t *out; real *fshift_p; diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.cpp b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.cpp index 5faa3785fa..46ee21b830 100644 --- a/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.cpp +++ b/src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn.cpp @@ -352,6 +352,8 @@ nbnxn_kernel_simd_2xnn(nbnxn_pairlist_set_t gmx_unused *nbl_list, #pragma omp parallel for schedule(static) num_threads(nthreads) for (nb = 0; nb < nnbl; nb++) { + // Presently, the kernels do not call C++ code that can throw, so + // no need for a try/catch pair in this OpenMP region. nbnxn_atomdata_output_t *out; real *fshift_p; diff --git a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.cpp b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.cpp index 6ed1911480..9b19281c15 100644 --- a/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.cpp +++ b/src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn.cpp @@ -352,6 +352,8 @@ nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t gmx_unused *nbl_list, #pragma omp parallel for schedule(static) num_threads(nthreads) for (nb = 0; nb < nnbl; nb++) { + // Presently, the kernels do not call C++ code that can throw, so + // no need for a try/catch pair in this OpenMP region. nbnxn_atomdata_output_t *out; real *fshift_p; diff --git a/src/gromacs/mdlib/nbnxn_search.cpp b/src/gromacs/mdlib/nbnxn_search.cpp index 9429ddf89d..9fa72185b8 100644 --- a/src/gromacs/mdlib/nbnxn_search.cpp +++ b/src/gromacs/mdlib/nbnxn_search.cpp @@ -64,6 +64,7 @@ #include "gromacs/pbcutil/pbc.h" #include "gromacs/simd/simd.h" #include "gromacs/simd/vector_operations.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/smalloc.h" @@ -833,23 +834,27 @@ void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list, #pragma omp parallel for num_threads(nbl_list->nnbl) schedule(static) for (int i = 0; i < nbl_list->nnbl; i++) { - /* Allocate the nblist data structure locally on each thread - * to optimize memory access for NUMA architectures. - */ - snew(nbl_list->nbl[i], 1); - - /* Only list 0 is used on the GPU, use normal allocation for i>0 */ - if (i == 0) + try { - nbnxn_init_pairlist(nbl_list->nbl[i], nbl_list->bSimple, alloc, free); - } - else - { - nbnxn_init_pairlist(nbl_list->nbl[i], nbl_list->bSimple, NULL, NULL); - } + /* Allocate the nblist data structure locally on each thread + * to optimize memory access for NUMA architectures. + */ + snew(nbl_list->nbl[i], 1); - snew(nbl_list->nbl_fep[i], 1); - nbnxn_init_pairlist_fep(nbl_list->nbl_fep[i]); + /* Only list 0 is used on the GPU, use normal allocation for i>0 */ + if (i == 0) + { + nbnxn_init_pairlist(nbl_list->nbl[i], nbl_list->bSimple, alloc, free); + } + else + { + nbnxn_init_pairlist(nbl_list->nbl[i], nbl_list->bSimple, NULL, NULL); + } + + snew(nbl_list->nbl_fep[i], 1); + nbnxn_init_pairlist_fep(nbl_list->nbl_fep[i]); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } @@ -2774,43 +2779,47 @@ static void combine_nblists(int nnbl, nbnxn_pairlist_t **nbl, #pragma omp parallel for num_threads(nthreads) schedule(static) for (int n = 0; n < nnbl; n++) { - int sci_offset; - int cj4_offset; - int excl_offset; - const nbnxn_pairlist_t *nbli; + try + { + int sci_offset; + int cj4_offset; + int excl_offset; + const nbnxn_pairlist_t *nbli; - /* Determine the offset in the combined data for our thread */ - sci_offset = nblc->nsci; - cj4_offset = nblc->ncj4; - excl_offset = nblc->nexcl; + /* Determine the offset in the combined data for our thread */ + sci_offset = nblc->nsci; + cj4_offset = nblc->ncj4; + excl_offset = nblc->nexcl; - for (int i = 0; i < n; i++) - { - sci_offset += nbl[i]->nsci; - cj4_offset += nbl[i]->ncj4; - excl_offset += nbl[i]->nexcl; - } + for (int i = 0; i < n; i++) + { + sci_offset += nbl[i]->nsci; + cj4_offset += nbl[i]->ncj4; + excl_offset += nbl[i]->nexcl; + } - nbli = nbl[n]; + nbli = nbl[n]; - for (int i = 0; i < nbli->nsci; i++) - { - nblc->sci[sci_offset+i] = nbli->sci[i]; - nblc->sci[sci_offset+i].cj4_ind_start += cj4_offset; - nblc->sci[sci_offset+i].cj4_ind_end += cj4_offset; - } + for (int i = 0; i < nbli->nsci; i++) + { + nblc->sci[sci_offset+i] = nbli->sci[i]; + nblc->sci[sci_offset+i].cj4_ind_start += cj4_offset; + nblc->sci[sci_offset+i].cj4_ind_end += cj4_offset; + } - for (int j4 = 0; j4 < nbli->ncj4; j4++) - { - nblc->cj4[cj4_offset+j4] = nbli->cj4[j4]; - nblc->cj4[cj4_offset+j4].imei[0].excl_ind += excl_offset; - nblc->cj4[cj4_offset+j4].imei[1].excl_ind += excl_offset; - } + for (int j4 = 0; j4 < nbli->ncj4; j4++) + { + nblc->cj4[cj4_offset+j4] = nbli->cj4[j4]; + nblc->cj4[cj4_offset+j4].imei[0].excl_ind += excl_offset; + nblc->cj4[cj4_offset+j4].imei[1].excl_ind += excl_offset; + } - for (int j4 = 0; j4 < nbli->nexcl; j4++) - { - nblc->excl[excl_offset+j4] = nbli->excl[j4]; + for (int j4 = 0; j4 < nbli->nexcl; j4++) + { + nblc->excl[excl_offset+j4] = nbli->excl[j4]; + } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } for (int n = 0; n < nnbl; n++) @@ -2854,26 +2863,30 @@ static void balance_fep_lists(const nbnxn_search_t nbs, #pragma omp parallel for schedule(static) num_threads(nnbl) for (int th = 0; th < nnbl; th++) { - t_nblist *nbl; + try + { + t_nblist *nbl; - nbl = nbs->work[th].nbl_fep; + nbl = nbs->work[th].nbl_fep; - /* Note that here we allocate for the total size, instead of - * a per-thread esimate (which is hard to obtain). - */ - if (nri_tot > nbl->maxnri) - { - nbl->maxnri = over_alloc_large(nri_tot); - reallocate_nblist(nbl); - } - if (nri_tot > nbl->maxnri || nrj_tot > nbl->maxnrj) - { - nbl->maxnrj = over_alloc_small(nrj_tot); - srenew(nbl->jjnr, nbl->maxnrj); - srenew(nbl->excl_fep, nbl->maxnrj); - } + /* Note that here we allocate for the total size, instead of + * a per-thread esimate (which is hard to obtain). + */ + if (nri_tot > nbl->maxnri) + { + nbl->maxnri = over_alloc_large(nri_tot); + reallocate_nblist(nbl); + } + if (nri_tot > nbl->maxnri || nrj_tot > nbl->maxnrj) + { + nbl->maxnrj = over_alloc_small(nrj_tot); + srenew(nbl->jjnr, nbl->maxnrj); + srenew(nbl->excl_fep, nbl->maxnrj); + } - clear_pairlist_fep(nbl); + clear_pairlist_fep(nbl); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } /* Loop over the source lists and assign and copy i-entries */ @@ -3990,32 +4003,36 @@ void nbnxn_make_pairlist(const nbnxn_search_t nbs, #pragma omp parallel for num_threads(nnbl) schedule(static) for (int th = 0; th < nnbl; th++) { - /* Re-init the thread-local work flag data before making - * the first list (not an elegant conditional). - */ - if (nbat->bUseBufferFlags && ((zi == 0 && zj == 0) || - (bGPUCPU && zi == 0 && zj == 1))) + try { - init_buffer_flags(&nbs->work[th].buffer_flags, nbat->natoms); - } + /* Re-init the thread-local work flag data before making + * the first list (not an elegant conditional). + */ + if (nbat->bUseBufferFlags && ((zi == 0 && zj == 0) || + (bGPUCPU && zi == 0 && zj == 1))) + { + init_buffer_flags(&nbs->work[th].buffer_flags, nbat->natoms); + } - if (CombineNBLists && th > 0) - { - clear_pairlist(nbl[th]); - } + if (CombineNBLists && th > 0) + { + clear_pairlist(nbl[th]); + } - /* Divide the i super cell equally over the nblists */ - nbnxn_make_pairlist_part(nbs, gridi, gridj, - &nbs->work[th], nbat, excl, - rlist, - nb_kernel_type, - ci_block, - nbat->bUseBufferFlags, - nsubpair_target, - progBal, nsubpair_tot_est, - th, nnbl, - nbl[th], - nbl_list->nbl_fep[th]); + /* Divide the i super cell equally over the nblists */ + nbnxn_make_pairlist_part(nbs, gridi, gridj, + &nbs->work[th], nbat, excl, + rlist, + nb_kernel_type, + ci_block, + nbat->bUseBufferFlags, + nsubpair_target, + progBal, nsubpair_tot_est, + th, nnbl, + nbl[th], + nbl_list->nbl_fep[th]); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } nbs_cycle_stop(&nbs->cc[enbsCCsearch]); @@ -4066,7 +4083,11 @@ void nbnxn_make_pairlist(const nbnxn_search_t nbs, #pragma omp parallel for num_threads(nnbl) schedule(static) for (int th = 0; th < nnbl; th++) { - sort_sci(nbl[th]); + try + { + sort_sci(nbl[th]); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } } diff --git a/src/gromacs/mdlib/sim_util.cpp b/src/gromacs/mdlib/sim_util.cpp index 8e9f363d0f..22e2d85fc4 100644 --- a/src/gromacs/mdlib/sim_util.cpp +++ b/src/gromacs/mdlib/sim_util.cpp @@ -92,6 +92,7 @@ #include "gromacs/timing/wallcycle.h" #include "gromacs/timing/walltime_accounting.h" #include "gromacs/utility/cstringutil.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxassert.h" #include "gromacs/utility/gmxmpi.h" @@ -666,8 +667,12 @@ static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists, #pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl) for (th = 0; th < nbl_lists->nnbl; th++) { - gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th], - x, f, fr, mdatoms, &kernel_data, nrnb); + try + { + gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th], + x, f, fr, mdatoms, &kernel_data, nrnb); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } if (fepvals->sc_alpha != 0) @@ -702,8 +707,12 @@ static void do_nb_verlet_fep(nbnxn_pairlist_set_t *nbl_lists, #pragma omp parallel for schedule(static) num_threads(nbl_lists->nnbl) for (th = 0; th < nbl_lists->nnbl; th++) { - gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th], - x, f, fr, mdatoms, &kernel_data, nrnb); + try + { + gmx_nb_free_energy_kernel(nbl_lists->nbl_fep[th], + x, f, fr, mdatoms, &kernel_data, nrnb); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } sum_epot(&(enerd->foreign_grpp), enerd->foreign_term); diff --git a/src/gromacs/mdlib/tgroup.cpp b/src/gromacs/mdlib/tgroup.cpp index af62dfca1f..8f31bbe3f2 100644 --- a/src/gromacs/mdlib/tgroup.cpp +++ b/src/gromacs/mdlib/tgroup.cpp @@ -49,6 +49,7 @@ #include "gromacs/mdlib/rbin.h" #include "gromacs/mdlib/update.h" #include "gromacs/topology/mtop_util.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/futil.h" #include "gromacs/utility/smalloc.h" @@ -131,20 +132,24 @@ void init_ekindata(FILE gmx_unused *log, gmx_mtop_t *mtop, t_grpopts *opts, #pragma omp parallel for num_threads(nthread) schedule(static) for (thread = 0; thread < nthread; thread++) { + try + { #define EKIN_WORK_BUFFER_SIZE 2 - /* Allocate 2 extra elements on both sides, so in single - * precision we have - * EKIN_WORK_BUFFER_SIZE*DIM*DIM*sizeof(real) = 72/144 bytes - * buffer on both sides to avoid cache pollution. - */ - snew(ekind->ekin_work_alloc[thread], ekind->ngtc+2*EKIN_WORK_BUFFER_SIZE); - ekind->ekin_work[thread] = ekind->ekin_work_alloc[thread] + EKIN_WORK_BUFFER_SIZE; - /* Nasty hack so we can have the per-thread accumulation - * variable for dekindl in the same thread-local cache lines - * as the per-thread accumulation tensors for ekin[fh], - * because they are accumulated in the same loop. */ - ekind->dekindl_work[thread] = &(ekind->ekin_work[thread][ekind->ngtc][0][0]); + /* Allocate 2 extra elements on both sides, so in single + * precision we have + * EKIN_WORK_BUFFER_SIZE*DIM*DIM*sizeof(real) = 72/144 bytes + * buffer on both sides to avoid cache pollution. + */ + snew(ekind->ekin_work_alloc[thread], ekind->ngtc+2*EKIN_WORK_BUFFER_SIZE); + ekind->ekin_work[thread] = ekind->ekin_work_alloc[thread] + EKIN_WORK_BUFFER_SIZE; + /* Nasty hack so we can have the per-thread accumulation + * variable for dekindl in the same thread-local cache lines + * as the per-thread accumulation tensors for ekin[fh], + * because they are accumulated in the same loop. */ + ekind->dekindl_work[thread] = &(ekind->ekin_work[thread][ekind->ngtc][0][0]); #undef EKIN_WORK_BUFFER_SIZE + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } ekind->ngacc = opts->ngacc; diff --git a/src/gromacs/mdlib/update.cpp b/src/gromacs/mdlib/update.cpp index 13dc2e8a96..1ad0507bde 100644 --- a/src/gromacs/mdlib/update.cpp +++ b/src/gromacs/mdlib/update.cpp @@ -65,6 +65,7 @@ #include "gromacs/pulling/pull.h" #include "gromacs/random/random.h" #include "gromacs/timing/wallcycle.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/futil.h" #include "gromacs/utility/gmxomp.h" @@ -991,6 +992,9 @@ static void calc_ke_part_normal(rvec v[], t_grpopts *opts, t_mdatoms *md, #pragma omp parallel for num_threads(nthread) schedule(static) for (thread = 0; thread < nthread; thread++) { + // This OpenMP only loops over arrays and does not call any functions + // or memory allocation. It should not be able to throw, so for now + // we do not need a try/catch wrapper. int start_t, end_t, n; int ga, gt; rvec v_corrt; @@ -1635,25 +1639,28 @@ void update_constraints(FILE *fplog, nth = gmx_omp_nthreads_get(emntUpdate); #pragma omp parallel for num_threads(nth) schedule(static) - for (th = 0; th < nth; th++) { - int start_th, end_th; + try + { + int start_th, end_th; - start_th = start + ((nrend-start)* th )/nth; - end_th = start + ((nrend-start)*(th+1))/nth; + start_th = start + ((nrend-start)* th )/nth; + end_th = start + ((nrend-start)*(th+1))/nth; - /* The second part of the SD integration */ - do_update_sd1(upd->sd, - start_th, end_th, dt, - inputrec->opts.acc, inputrec->opts.nFreeze, - md->invmass, md->ptype, - md->cFREEZE, md->cACC, md->cTC, - state->x, xprime, state->v, force, - inputrec->opts.ngtc, inputrec->opts.ref_t, - bDoConstr, FALSE, - step, inputrec->ld_seed, - DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); + /* The second part of the SD integration */ + do_update_sd1(upd->sd, + start_th, end_th, dt, + inputrec->opts.acc, inputrec->opts.nFreeze, + md->invmass, md->ptype, + md->cFREEZE, md->cACC, md->cTC, + state->x, xprime, state->v, force, + inputrec->opts.ngtc, inputrec->opts.ref_t, + bDoConstr, FALSE, + step, inputrec->ld_seed, + DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } inc_nrnb(nrnb, eNR_UPDATE, homenr); wallcycle_stop(wcycle, ewcUPDATE); @@ -1684,21 +1691,25 @@ void update_constraints(FILE *fplog, #pragma omp parallel for num_threads(nth) schedule(static) for (th = 0; th < nth; th++) { - int start_th, end_th; + try + { + int start_th, end_th; - start_th = start + ((nrend-start)* th )/nth; - end_th = start + ((nrend-start)*(th+1))/nth; + start_th = start + ((nrend-start)* th )/nth; + end_th = start + ((nrend-start)*(th+1))/nth; - /* The second part of the SD integration */ - do_update_sd2(upd->sd, - FALSE, start_th, end_th, - inputrec->opts.acc, inputrec->opts.nFreeze, - md->invmass, md->ptype, - md->cFREEZE, md->cACC, md->cTC, - state->x, xprime, state->v, force, state->sd_X, - inputrec->opts.tau_t, - FALSE, step, inputrec->ld_seed, - DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); + /* The second part of the SD integration */ + do_update_sd2(upd->sd, + FALSE, start_th, end_th, + inputrec->opts.acc, inputrec->opts.nFreeze, + md->invmass, md->ptype, + md->cFREEZE, md->cACC, md->cTC, + state->x, xprime, state->v, force, state->sd_X, + inputrec->opts.tau_t, + FALSE, step, inputrec->ld_seed, + DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } inc_nrnb(nrnb, eNR_UPDATE, homenr); wallcycle_stop(wcycle, ewcUPDATE); @@ -1751,6 +1762,7 @@ void update_constraints(FILE *fplog, #pragma omp parallel for num_threads(nth) schedule(static) for (i = start; i < nrend; i++) { + // Trivial statement, does not throw copy_rvec(upd->xp[i], state->x[i]); } } @@ -1960,99 +1972,103 @@ void update_coords(FILE *fplog, #pragma omp parallel for num_threads(nth) schedule(static) private(alpha) for (th = 0; th < nth; th++) { - int start_th, end_th; + try + { + int start_th, end_th; - start_th = start + ((nrend-start)* th )/nth; - end_th = start + ((nrend-start)*(th+1))/nth; + start_th = start + ((nrend-start)* th )/nth; + end_th = start + ((nrend-start)*(th+1))/nth; - switch (inputrec->eI) - { - case (eiMD): - if (ekind->cosacc.cos_accel == 0) - { - do_update_md(start_th, end_th, dt, - ekind->tcstat, state->nosehoover_vxi, - ekind->bNEMD, ekind->grpstat, inputrec->opts.acc, - inputrec->opts.nFreeze, - md->invmass, md->ptype, - md->cFREEZE, md->cACC, md->cTC, - state->x, xprime, state->v, force, M, - bNH, bPR); - } - else - { - do_update_visc(start_th, end_th, dt, - ekind->tcstat, state->nosehoover_vxi, - md->invmass, md->ptype, - md->cTC, state->x, xprime, state->v, force, M, - state->box, - ekind->cosacc.cos_accel, - ekind->cosacc.vcos, - bNH, bPR); - } - break; - case (eiSD1): - /* With constraints, the SD1 update is done in 2 parts */ - do_update_sd1(upd->sd, - start_th, end_th, dt, - inputrec->opts.acc, inputrec->opts.nFreeze, - md->invmass, md->ptype, - md->cFREEZE, md->cACC, md->cTC, - state->x, xprime, state->v, force, - inputrec->opts.ngtc, inputrec->opts.ref_t, - bDoConstr, TRUE, - step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); - break; - case (eiSD2): - /* The SD2 update is always done in 2 parts, - * because an extra constraint step is needed - */ - do_update_sd2(upd->sd, - bInitStep, start_th, end_th, - inputrec->opts.acc, inputrec->opts.nFreeze, - md->invmass, md->ptype, - md->cFREEZE, md->cACC, md->cTC, - state->x, xprime, state->v, force, state->sd_X, - inputrec->opts.tau_t, - TRUE, step, inputrec->ld_seed, - DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); - break; - case (eiBD): - do_update_bd(start_th, end_th, dt, - inputrec->opts.nFreeze, md->invmass, md->ptype, - md->cFREEZE, md->cTC, - state->x, xprime, state->v, force, - inputrec->bd_fric, - upd->sd->bd_rf, - step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); - break; - case (eiVV): - case (eiVVAK): - alpha = 1.0 + DIM/((double)inputrec->opts.nrdf[0]); /* assuming barostat coupled to group 0. */ - switch (UpdatePart) - { - case etrtVELOCITY1: - case etrtVELOCITY2: - do_update_vv_vel(start_th, end_th, dt, - inputrec->opts.acc, inputrec->opts.nFreeze, - md->invmass, md->ptype, - md->cFREEZE, md->cACC, - state->v, force, - (bNH || bPR), state->veta, alpha); - break; - case etrtPOSITION: - do_update_vv_pos(start_th, end_th, dt, - inputrec->opts.nFreeze, - md->ptype, md->cFREEZE, - state->x, xprime, state->v, - (bNH || bPR), state->veta); - break; - } - break; - default: - gmx_fatal(FARGS, "Don't know how to update coordinates"); - break; + switch (inputrec->eI) + { + case (eiMD): + if (ekind->cosacc.cos_accel == 0) + { + do_update_md(start_th, end_th, dt, + ekind->tcstat, state->nosehoover_vxi, + ekind->bNEMD, ekind->grpstat, inputrec->opts.acc, + inputrec->opts.nFreeze, + md->invmass, md->ptype, + md->cFREEZE, md->cACC, md->cTC, + state->x, xprime, state->v, force, M, + bNH, bPR); + } + else + { + do_update_visc(start_th, end_th, dt, + ekind->tcstat, state->nosehoover_vxi, + md->invmass, md->ptype, + md->cTC, state->x, xprime, state->v, force, M, + state->box, + ekind->cosacc.cos_accel, + ekind->cosacc.vcos, + bNH, bPR); + } + break; + case (eiSD1): + /* With constraints, the SD1 update is done in 2 parts */ + do_update_sd1(upd->sd, + start_th, end_th, dt, + inputrec->opts.acc, inputrec->opts.nFreeze, + md->invmass, md->ptype, + md->cFREEZE, md->cACC, md->cTC, + state->x, xprime, state->v, force, + inputrec->opts.ngtc, inputrec->opts.ref_t, + bDoConstr, TRUE, + step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); + break; + case (eiSD2): + /* The SD2 update is always done in 2 parts, + * because an extra constraint step is needed + */ + do_update_sd2(upd->sd, + bInitStep, start_th, end_th, + inputrec->opts.acc, inputrec->opts.nFreeze, + md->invmass, md->ptype, + md->cFREEZE, md->cACC, md->cTC, + state->x, xprime, state->v, force, state->sd_X, + inputrec->opts.tau_t, + TRUE, step, inputrec->ld_seed, + DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); + break; + case (eiBD): + do_update_bd(start_th, end_th, dt, + inputrec->opts.nFreeze, md->invmass, md->ptype, + md->cFREEZE, md->cTC, + state->x, xprime, state->v, force, + inputrec->bd_fric, + upd->sd->bd_rf, + step, inputrec->ld_seed, DOMAINDECOMP(cr) ? cr->dd->gatindex : NULL); + break; + case (eiVV): + case (eiVVAK): + alpha = 1.0 + DIM/((double)inputrec->opts.nrdf[0]); /* assuming barostat coupled to group 0. */ + switch (UpdatePart) + { + case etrtVELOCITY1: + case etrtVELOCITY2: + do_update_vv_vel(start_th, end_th, dt, + inputrec->opts.acc, inputrec->opts.nFreeze, + md->invmass, md->ptype, + md->cFREEZE, md->cACC, + state->v, force, + (bNH || bPR), state->veta, alpha); + break; + case etrtPOSITION: + do_update_vv_pos(start_th, end_th, dt, + inputrec->opts.nFreeze, + md->ptype, md->cFREEZE, + state->x, xprime, state->v, + (bNH || bPR), state->veta); + break; + } + break; + default: + gmx_fatal(FARGS, "Don't know how to update coordinates"); + break; + } } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } diff --git a/src/gromacs/mdlib/vsite.cpp b/src/gromacs/mdlib/vsite.cpp index 005ae91988..4cb0dc8816 100644 --- a/src/gromacs/mdlib/vsite.cpp +++ b/src/gromacs/mdlib/vsite.cpp @@ -53,6 +53,7 @@ #include "gromacs/pbcutil/mshift.h" #include "gromacs/pbcutil/pbc.h" #include "gromacs/topology/mtop_util.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxomp.h" #include "gromacs/utility/smalloc.h" @@ -523,10 +524,14 @@ void construct_vsites(const gmx_vsite_t *vsite, { #pragma omp parallel num_threads(vsite->nthreads) { - construct_vsites_thread(vsite, - x, dt, v, - ip, vsite->tdata[gmx_omp_get_thread_num()].ilist, - pbc_null); + try + { + construct_vsites_thread(vsite, + x, dt, v, + ip, vsite->tdata[gmx_omp_get_thread_num()].ilist, + pbc_null); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } /* Now we can construct the vsites that might depend on other vsites */ construct_vsites_thread(vsite, @@ -1462,33 +1467,37 @@ void spread_vsite_f(gmx_vsite_t *vsite, #pragma omp parallel num_threads(vsite->nthreads) { - int thread; - rvec *fshift_t; - - thread = gmx_omp_get_thread_num(); - - if (thread == 0 || fshift == NULL) + try { - fshift_t = fshift; - } - else - { - int i; + int thread; + rvec *fshift_t; - fshift_t = vsite->tdata[thread].fshift; + thread = gmx_omp_get_thread_num(); - for (i = 0; i < SHIFTS; i++) + if (thread == 0 || fshift == NULL) { - clear_rvec(fshift_t[i]); + fshift_t = fshift; } - } + else + { + int i; + + fshift_t = vsite->tdata[thread].fshift; - spread_vsite_f_thread(vsite, - x, f, fshift_t, - VirCorr, vsite->tdata[thread].dxdf, - idef->iparams, - vsite->tdata[thread].ilist, - g, pbc_null); + for (i = 0; i < SHIFTS; i++) + { + clear_rvec(fshift_t[i]); + } + } + + spread_vsite_f_thread(vsite, + x, f, fshift_t, + VirCorr, vsite->tdata[thread].dxdf, + idef->iparams, + vsite->tdata[thread].ilist, + g, pbc_null); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } if (fshift != NULL) @@ -1861,7 +1870,11 @@ void split_vsites_over_threads(const t_ilist *ilist, #pragma omp parallel for num_threads(vsite->nthreads) schedule(static) for (th = 0; th < vsite->nthreads; th++) { - prepare_vsite_thread(ilist, &vsite->tdata[th]); + try + { + prepare_vsite_thread(ilist, &vsite->tdata[th]); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } /* Master threads does the (potential) overlap vsites */ prepare_vsite_thread(ilist, &vsite->tdata[vsite->nthreads]); diff --git a/src/gromacs/pbcutil/pbc.cpp b/src/gromacs/pbcutil/pbc.cpp index 0997097295..740d57b8fa 100644 --- a/src/gromacs/pbcutil/pbc.cpp +++ b/src/gromacs/pbcutil/pbc.cpp @@ -58,6 +58,7 @@ #include "gromacs/math/vec.h" #include "gromacs/pbcutil/ishift.h" #include "gromacs/pbcutil/mshift.h" +#include "gromacs/utility/exceptions.h" #include "gromacs/utility/fatalerror.h" #include "gromacs/utility/gmxassert.h" #include "gromacs/utility/smalloc.h" @@ -1514,11 +1515,15 @@ void put_atoms_in_box_omp(int ePBC, matrix box, int natoms, rvec x[]) #pragma omp parallel for num_threads(nth) schedule(static) for (t = 0; t < nth; t++) { - int offset, len; + try + { + int offset, len; - offset = (natoms*t )/nth; - len = (natoms*(t + 1))/nth - offset; - put_atoms_in_box(ePBC, box, len, x + offset); + offset = (natoms*t )/nth; + len = (natoms*(t + 1))/nth - offset; + put_atoms_in_box(ePBC, box, len, x + offset); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } } diff --git a/src/programs/mdrun/runner.cpp b/src/programs/mdrun/runner.cpp index 9e395d347f..e50cde2a2f 100644 --- a/src/programs/mdrun/runner.cpp +++ b/src/programs/mdrun/runner.cpp @@ -169,35 +169,40 @@ struct mdrunner_arglist a commrec. */ static void mdrunner_start_fn(void *arg) { - struct mdrunner_arglist *mda = (struct mdrunner_arglist*)arg; - struct mdrunner_arglist mc = *mda; /* copy the arg list to make sure - that it's thread-local. This doesn't - copy pointed-to items, of course, - but those are all const. */ - t_commrec *cr; /* we need a local version of this */ - FILE *fplog = NULL; - t_filenm *fnm; + try + { + struct mdrunner_arglist *mda = (struct mdrunner_arglist*)arg; + struct mdrunner_arglist mc = *mda; /* copy the arg list to make sure + that it's thread-local. This doesn't + copy pointed-to items, of course, + but those are all const. */ + t_commrec *cr; /* we need a local version of this */ + FILE *fplog = NULL; + t_filenm *fnm; - fnm = dup_tfn(mc.nfile, mc.fnm); + fnm = dup_tfn(mc.nfile, mc.fnm); - cr = reinitialize_commrec_for_this_thread(mc.cr); + cr = reinitialize_commrec_for_this_thread(mc.cr); - if (MASTER(cr)) - { - fplog = mc.fplog; - } + if (MASTER(cr)) + { + fplog = mc.fplog; + } - gmx::mdrunner(&mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv, - mc.bVerbose, mc.bCompact, mc.nstglobalcomm, - mc.ddxyz, mc.dd_node_order, mc.rdd, - mc.rconstr, mc.dddlb_opt, mc.dlb_scale, - mc.ddcsx, mc.ddcsy, mc.ddcsz, - mc.nbpu_opt, mc.nstlist_cmdline, - mc.nsteps_cmdline, mc.nstepout, mc.resetstep, - mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce, - mc.cpt_period, mc.max_hours, mc.imdport, mc.Flags); + gmx::mdrunner(&mc.hw_opt, fplog, cr, mc.nfile, fnm, mc.oenv, + mc.bVerbose, mc.bCompact, mc.nstglobalcomm, + mc.ddxyz, mc.dd_node_order, mc.rdd, + mc.rconstr, mc.dddlb_opt, mc.dlb_scale, + mc.ddcsx, mc.ddcsy, mc.ddcsz, + mc.nbpu_opt, mc.nstlist_cmdline, + mc.nsteps_cmdline, mc.nstepout, mc.resetstep, + mc.nmultisim, mc.repl_ex_nst, mc.repl_ex_nex, mc.repl_ex_seed, mc.pforce, + mc.cpt_period, mc.max_hours, mc.imdport, mc.Flags); + } + GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR; } + /* called by mdrunner() to start a specific number of threads (including the main thread) for thread-parallel runs. This in turn calls mdrunner() for each thread. -- 2.11.4.GIT